From dc49f93b9907ed0df00303f2d376da97c6630541 Mon Sep 17 00:00:00 2001
From: BayoNet <bayonet@virtUbuntu16.04>
Date: Mon, 26 Jun 2017 10:14:13 +0300
Subject: [PATCH 001/281] New part "Server configuration parameters" of the
 document is added. Other changes are made related to this new part.

---
 docs/ru/dicts/external_dicts.rst              |  13 +-
 docs/ru/operations/configuration_files.rst    |   2 +-
 docs/ru/operations/server_settings/index.rst  |  17 +
 .../operations/server_settings/settings.rst   | 804 ++++++++++++++++++
 docs/ru/operations/settings/index.rst         |   2 +
 docs/ru/operations/settings/settings.rst      |  16 +
 docs/ru/query_language/queries.rst            |   2 +
 .../system.asynchronous_metrics.rst           |   2 +
 docs/ru/system_tables/system.events.rst       |   2 +
 docs/ru/system_tables/system.metrics.rst      |   2 +
 docs/ru/table_engines/distributed.rst         |   2 +
 docs/ru/table_engines/graphitemergetree.rst   |  30 +-
 docs/ru/table_engines/replication.rst         |   2 +
 docs/ru/table_engines/resharding.rst          |   2 +
 14 files changed, 875 insertions(+), 23 deletions(-)
 create mode 100644 docs/ru/operations/server_settings/index.rst
 create mode 100644 docs/ru/operations/server_settings/settings.rst

diff --git a/docs/ru/dicts/external_dicts.rst b/docs/ru/dicts/external_dicts.rst
index f8dc1a5fcb6..4001eeff87f 100644
--- a/docs/ru/dicts/external_dicts.rst
+++ b/docs/ru/dicts/external_dicts.rst
@@ -1,3 +1,5 @@
+.. _dicts-external_dicts:
+
 Внешние словари
 ===============
 
@@ -5,12 +7,12 @@
 Источником данных для словаря может быть файл на локальной файловой системе, сервер ClickHouse, сервер MySQL, MongoDB или любой ODBC источник.
 Словарь может полностью храниться в оперативке и периодически обновляться, или быть частично закэшированным в оперативке и динамически подгружать отсутствующие значения.
 
-Конфигурация внешних словарей находится в отдельном файле или файлах, указанных в конфигурационном параметре dictionaries_config.
-Этот параметр содержит абсолютный или относительный путь к файлу с конфигурацией словарей. Относительный путь - относительно директории с конфигурационным файлом сервера. Путь может содержать wildcard-ы \* и ? - тогда рассматриваются все подходящие файлы. Пример: dictionaries/\*.xml.
+Конфигурация внешних словарей находится в отдельном файле или файлах, указанных в конфигурационном параметре :ref:`dictionaries_config <server_settings-dictionaries_config>`.
+Этот параметр содержит абсолютный или относительный путь к файлу с конфигурацией словарей. Относительный путь - относительно директории с конфигурационным файлом сервера. Путь может содержать wildcard-ы \* и ? - тогда рассматриваются все подходящие файлы. Пример: ``dictionaries/*.xml``.
 
 Конфигурация словарей, а также множество файлов с конфигурацией, может обновляться без перезапуска сервера. Сервер проверяет обновления каждые 5 секунд. То есть, словари могут подключаться динамически.
 
-Создание словарей может производиться при старте сервера или при первом использовании. Это определяется конфигурационном параметром dictionaries_lazy_load (в основном конфигурационном файле сервера). Параметр не обязателен, по умолчанию - true. Если true, то каждый словарь создаётся при первом использовании;  если словарь не удалось создать - вызов функции, использующей словарь, кидает исключение. Если false, то все словари создаются при старте сервера, и в случае ошибки, сервер завершает работу.
+Создание словарей может производиться при старте сервера или при первом использовании. Это определяется конфигурационном параметром :ref:`dictionaries_lazy_load <server_settings-dictionaries_lazy_load>` (в основном конфигурационном файле сервера). Параметр не обязателен, по умолчанию - ``true``. Если true, то каждый словарь создаётся при первом использовании;  если словарь не удалось создать - вызов функции, использующей словарь, кидает исключение. Если ``false``, то все словари создаются при старте сервера, и в случае ошибки, сервер завершает работу.
 
 Конфигурационный файл словарей имеет вид:
 
@@ -170,7 +172,7 @@
 Существует шесть способов размещения словаря в памяти.
 
 flat
-----
+-----
 В виде плоских массивов. Самый эффективный способ. Он подходит, если все ключи меньше 500 000. Если при создании словаря обнаружен ключ больше, то кидается исключение и словарь не создаётся. Словарь загружается в оперативку целиком. Словарь использует количество оперативки, пропорциональное максимальному значению ключа. Ввиду ограничения на 500 000, потребление оперативки вряд ли может быть большим.
 Поддерживаются все виды источников. При обновлении, данные (из файла, из таблицы) читаются целиком.
 
@@ -189,9 +191,6 @@ range_hashed
 
 
 Пример: таблица содержит скидки для каждого рекламодателя в виде:
-.. code-block:: text
-
-.. code-block:: text
 
   +------------------+-----------------------------+------------+----------+
   | id рекламодателя | дата начала действия скидки | дата конца | величина |
diff --git a/docs/ru/operations/configuration_files.rst b/docs/ru/operations/configuration_files.rst
index efe396a89fa..2ef120f7e19 100644
--- a/docs/ru/operations/configuration_files.rst
+++ b/docs/ru/operations/configuration_files.rst
@@ -15,7 +15,7 @@
 
 Если указано ``remove`` - удалить элемент.
 
-Также в конфиге могут быть указаны "подстановки". Если у элемента присутствует атрибут ``incl``, то в качестве значения будет использована соответствующая подстановка из файла. По умолчанию, путь к файлу с подстановками - ``/etc/metrika.xml``. Он может быть изменён в конфиге в элементе ``include_from``. Значения подстановок указываются в элементах ``/yandex/имя_подстановки`` этого файла.
+Также в конфиге могут быть указаны "подстановки". Если у элемента присутствует атрибут ``incl``, то в качестве значения будет использована соответствующая подстановка из файла. По умолчанию, путь к файлу с подстановками - ``/etc/metrika.xml``. Он может быть изменён в конфигурации сервера в элементе :ref:`server_settings-include_from`. Значения подстановок указываются в элементах ``/yandex/имя_подстановки`` этого файла. Если подстановка, заданная в ``incl`` отсутствует, то в лог попадает соответствующая запись. Чтобы ClickHouse не писал в лог об отсутствии подстановки, необходимо указать атрибут ``optional="true"`` (например, настройка :ref:`server_settings-macros`).
 
 Подстановки могут также выполняться из ZooKeeper. Для этого укажите у элемента атрибут ``from_zk="/path/to/node"``. Значение элемента заменится на содержимое узла ``/path/to/node`` в ZooKeeper. В ZooKeeper-узел также можно положить целое XML-поддерево, оно будет целиком вставлено в исходный элемент.
 
diff --git a/docs/ru/operations/server_settings/index.rst b/docs/ru/operations/server_settings/index.rst
new file mode 100644
index 00000000000..490af776729
--- /dev/null
+++ b/docs/ru/operations/server_settings/index.rst
@@ -0,0 +1,17 @@
+.. _server_settings:
+
+Конфигурационные параметры сервера
+==================================
+
+Раздел содержит описания настроек сервера, которые не могут изменяться на уровне сессии или запроса.
+
+Рассмотренные настройки хранятся в файле ``config.xml`` сервера ClickHouse.
+
+Прочие настройки описаны в разделе :ref:`settings`.
+
+Перед изучением настроек ознакомьтесь с разделом :ref:`configuration_files`, обратите внимание на использование подстановок (атрибуты ``incl`` и ``optional``).
+
+.. toctree::
+    :glob:
+
+    *
diff --git a/docs/ru/operations/server_settings/settings.rst b/docs/ru/operations/server_settings/settings.rst
new file mode 100644
index 00000000000..c99054a18fa
--- /dev/null
+++ b/docs/ru/operations/server_settings/settings.rst
@@ -0,0 +1,804 @@
+.. _server_settings-builtin_dictionaries_reload_interval:
+
+builtin_dictionaries_reload_interval
+------------------------------------
+Интервал (в секундах) перезагрузки встроенных словарей.
+
+ClickHouse перезагружает встроенные словари с заданным интервалом. Это позволяет править словари "на лету" без перезапуска сервера.
+
+Значение по умолчанию - 3600.
+
+**Пример**
+
+.. code-block:: xml
+
+    <builtin_dictionaries_reload_interval>3600</builtin_dictionaries_reload_interval>
+
+.. _server_settings-compression:
+
+compression
+-----------
+Настройки компрессии данных.
+
+.. warning:: Не используйте, если вы только начали работать с ClickHouse.
+
+Общий вид конфигурации:
+
+.. code-block:: xml
+
+    <compression>
+        <case>
+          <parameters/>
+        </case>
+        ...
+    </compression>
+
+
+Можно сконфигурировать несколько разделов ``<case>``.
+
+Поля блока ``<case>``:
+
++---------------------+--------------------------------------------------------------------------+
+| Параметр            | Описание                                                                 |
++=====================+==========================================================================+
+| min_part_size       | Минимальный размер части таблицы.                                        |
++---------------------+--------------------------------------------------------------------------+
+| min_part_size_ratio | Отношение размера минимальной части таблицы к полному размеру таблицы.   |
++---------------------+--------------------------------------------------------------------------+
+| method              | Метод сжатия. Возможные значения: ``lz4``, ``zstd`` (экспериментальный). |
++---------------------+--------------------------------------------------------------------------+
+
+ClickHouse проверит условия ``min_part_size`` и ``min_part_size_ratio`` и выполнит те блоки ``case``, для которых условия совпали. Если ни один ``<case>`` не подходит, то ClickHouse применит алгоритм сжатия ``lz4``.
+
+**Пример**
+
+.. code-block:: xml
+
+    <compression incl="clickhouse_compression">
+        <case>
+            <min_part_size>10000000000</min_part_size>
+            <min_part_size_ratio>0.01</min_part_size_ratio>
+            <method>zstd</method>
+        </case>
+    </compression>
+
+
+.. _server_settings-default_database:
+
+default_database
+----------------
+База данных по умолчанию.
+
+Перечень баз данных можно получить запросом :ref:`query_language_queries_show_databases`.
+
+**Пример**
+
+.. code-block:: xml
+
+    <default_database>default</default_database>
+
+
+
+.. _server_settings-default_profile:
+
+default_profile
+---------------
+Профиль настроек по умолчанию.
+
+Профили настроек находятся в файле, указанном в параметре :ref:`server_settings-users_config`.
+
+**Пример**
+
+.. code-block:: xml
+
+    <default_profile>default</default_profile>
+
+
+.. _server_settings-dictionaries_config:
+
+dictionaries_config
+-------------------
+Конфигурация внешних словарей.
+
+Смотрите раздел :ref:`dicts-external_dicts`.
+
+**Пример**
+
+.. code-block:: xml
+
+    <dictionaries_config>*_dictionary.xml</dictionaries_config>
+
+
+.. _server_settings-dictionaries_lazy_load:
+
+dictionaries_lazy_load
+----------------------
+
+Отложенная загрузка словарей.
+
+С установленным параметром словари подгружаются не при запуске сервера, а при первом обращении.
+
+**Пример**
+
+.. code-block:: xml
+
+    <dictionaries_lazy_load>true</dictionaries_lazy_load>
+
+
+.. _server_settings-graphite:
+
+graphite
+--------
+Отправка даных в `Graphite <https://github.com/graphite-project>`_.
+
+Настройки:
+
++----------------------+------------------------------------------------------------------------------+
+| Настройка            | Описание                                                                     |
++======================+==============================================================================+
+| host                 | Сервер Graphite.                                                             |
++----------------------+------------------------------------------------------------------------------+
+| port                 | Порт сервера Graphite.                                                       |
++----------------------+------------------------------------------------------------------------------+
+| interval             | Период отправки в секундах.                                                  |
++----------------------+------------------------------------------------------------------------------+
+| timeout              | Таймаут отправки данных в секундах.                                          |
++----------------------+------------------------------------------------------------------------------+
+| root_path            | Префикс для ключей.                                                          |
++----------------------+------------------------------------------------------------------------------+
+| metrics              | Отправка данных из таблицы :ref:`system_tables-system.metrics`.              |
++----------------------+------------------------------------------------------------------------------+
+| events               | Отправка данных из таблицы :ref:`system_tables-system.events`.               |
++----------------------+------------------------------------------------------------------------------+
+| asynchronous_metrics | Отправка данных из таблицы :ref:`system_tables-system.asynchronous_metrics`. |
++----------------------+------------------------------------------------------------------------------+
+ 
+
+Можно определить несколько секций ``<graphite>``, например, для передачи различных данных с различной частотой.
+
+**Пример**
+
+.. code-block:: xml
+
+    <graphite>
+        <host>localhost</host>
+        <port>42000</port>
+        <timeout>0.1</timeout>
+        <interval>60</interval>
+        <root_path>one_min</root_path>
+        <metrics>true</metrics>
+        <events>true</events>
+        <asynchronous_metrics>true</asynchronous_metrics>
+    </graphite>
+
+
+.. _server_settings-graphite_rollup:
+
+graphite_rollup
+---------------
+
+Настройка прореживания данных для Graphite.
+
+Подробнее читайте в разделе :ref:`table_engines-graphitemergetree`.
+
+**Пример**
+
+.. code-block:: xml
+
+    <graphite_rollup_example>
+        <default>
+            <function>max</function>
+            <retention>
+                <age>0</age>
+                <precision>60</precision>
+            </retention>
+            <retention>
+                <age>3600</age>
+                <precision>300</precision>
+            </retention>
+            <retention>
+                <age>86400</age>
+                <precision>3600</precision>
+            </retention>
+        </default>
+    </graphite_rollup_example>
+
+
+.. _server_settings-http_port:
+
+http_port/https_port
+--------------------
+Порт для обращений к серверу по протоколу HTTP(s).
+
+Если указан ``https_port``, то требуется конфигурирование :ref:`server_settings-openSSL`.
+
+Если указан ``http_port``, то настройка :ref:`server_settings-openSSL` игнорируется, даже если она задана.
+
+**Пример**
+
+.. code-block:: xml
+
+    <https>0000</https>
+
+
+.. _server_settings-http_server_default_response:
+
+http_server_default_response
+----------------------------
+Страница, показываемая по умолчанию, при обращении к HTTP(s) серверу ClickHouse.
+
+**Пример**
+
+Показывает ``https://tabix.io/`` при обращенинии к ``http://localhost:http_port``.
+
+.. code-block:: xml
+
+  <http_server_default_response>
+    <![CDATA[<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>]]>
+  </http_server_default_response>
+
+.. _server_settings-include_from:
+
+include_from
+------------
+Путь к файлу с подстановками.
+
+Подробности смотрите в разделе :ref:`configuration_files`.
+
+**Пример**
+
+.. code-block:: xml
+
+    <include_from>/etc/metrica.xml</include_from>
+
+
+.. _server_settings-interserver_http_port:
+
+interserver_http_port
+---------------------
+
+Порт для обмена между серверами ClickHouse.
+
+**Пример**
+
+.. code-block:: xml
+
+  <interserver_http_port>9009</interserver_http_port>
+
+
+.. _server_settings-interserver_http_host:
+
+interserver_http_host
+---------------------
+
+Имя хоста, которое могут использовать другие серверы для обращения к этому.
+
+Если не указано, то определяется аналогично команде ``hostname -f``.
+
+Удобно использовать, чтобы отвязаться от конкретного сетевого интерфейса.
+
+
+**Пример**
+
+.. code-block:: xml
+
+    <interserver_http_host>example.yandex.ru</interserver_http_host>
+
+
+.. _server_settings-keep_alive_timeout:
+
+keep_alive_timeout
+------------------
+
+Время в миллисекундах, в течение которого ClickHouse ожидает входящих запросов прежде, чем закрыть соединение.
+
+**Пример**
+
+.. code-block:: xml
+
+   <keep_alive_timeout>3</keep_alive_timeout>
+
+
+.. _server_settings-listen_host:
+
+listen_host
+-----------
+
+Ограничение по хостам, с которых может прийти запрос. Если необходимо, чтобы сервер отвечал всем, то надо указать ``::``.
+
+Примеры:
+
+.. code-block:: xml
+
+    <listen_host>::1</listen_host>
+    <listen_host>127.0.0.1</listen_host>
+
+
+.. _server_settings-logger:
+
+logger
+------
+Настройки логгирования.
+
+Ключи:
+
++----------+-----------------------------------------------------------------------------------------------------------+
+| Ключ     | Описание                                                                                                  |
++==========+===========================================================================================================+
+| level    | Уровень логгирования. Допустимые значения: ``trace``, ``debug``, ``information``, ``warning``, ``error``. |
++----------+-----------------------------------------------------------------------------------------------------------+
+| log      | Файл лога. Содержит все записи согласно ``level``.                                                        |
++----------+-----------------------------------------------------------------------------------------------------------+
+| errorlog | Файл лога ошибок.                                                                                         |
++----------+-----------------------------------------------------------------------------------------------------------+
+| size     | Размер файла. Действует для ``log`` и ``errorlog``. Как только файл достиг размера ``size``,              |
+|          | ClickHouse архивирует и переименовывает его, а на его месте создает новый файл лога.                      |
++----------+-----------------------------------------------------------------------------------------------------------+
+| count    | Количество заархивированных файлов логов, которые сохраняет ClickHouse.                                   |
++----------+-----------------------------------------------------------------------------------------------------------+
+
+
+**Пример**
+
+.. code-block:: xml
+
+    <logger>
+        <level>trace</level>
+        <log>/var/log/clickhouse-server/clickhouse-server.log</log>
+        <errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
+        <size>1000M</size>
+        <count>10</count>
+    </logger>
+
+.. _server_settings-macros:
+
+macros
+------
+Подстановки параметров реплицируемых таблиц.
+
+Можно не указывать, если реплицируемых таблицы не используются.
+
+Подробнее смотрите в разделе :ref:`table_engines-replication-creation_of_rep_tables`.
+
+**Пример**
+
+.. code-block:: xml
+
+    <macros incl="macros" optional="true" />
+
+
+.. _server_settings-mark_cache_size:
+
+mark_cache_size
+---------------
+Приблизительный размер (в байтах) кеша "засечек", используемых движками таблиц семейства :ref:`table_engines-mergetree`. 
+
+Кеш общий для сервера, память выделяется по мере необходимости. Кеш не может быть меньше, чем 5368709120.
+
+**Пример**
+
+.. code-block:: xml
+
+  <mark_cache_size>5368709120</mark_cache_size>
+
+
+.. _server_settings-max_concurrent_queries:
+
+max_concurrent_queries
+----------------------
+
+Максимальное количество одновременно обрабатываемых запросов.
+
+**Пример**
+
+.. code-block:: xml
+
+  <max_concurrent_queries>100</max_concurrent_queries>
+
+
+.. _server_settings-max_connections:
+
+max_connections
+---------------
+
+Максимальное количество входящих соединений.
+
+**Пример**
+
+.. code-block:: xml
+
+  <max_connections>4096</max_connections>
+
+.. _server_settings-max_open_files:
+
+max_open_files
+--------------
+
+Максимальное количество открытых файлов.
+
+По умолчанию - ``maximum``.
+
+Рекомендуется использовать в Mac OS X, поскольу функция ``getrlimit()`` возвращает некорректное значение.
+
+**Пример**
+
+.. code-block:: xml
+
+    <max_open_files>262144</max_open_files>
+
+
+.. _server_settings-max_table_size_to_drop:
+
+max_table_size_to_drop
+----------------------
+
+Ограничение на удаление таблиц.
+
+Если размер таблицы семейства :ref:`table_engines-mergetree` превышает ``max_table_size_to_drop`` (в байтах), то ее нельзя удалить запросом DROP.
+
+Если таблицу все же необходимо удалить, не перезапуская при этом сервер ClickHouse, то необходимо создать файл ``<clickhouse-path>/flags/force_drop_table`` и выполнить запрос DROP.
+
+Значение по умолчанию - 50GB.
+
+Значение 0 означает, что можно удалять все таблицы без ограничений.
+
+**Пример**
+
+.. code-block:: xml
+
+    <max_table_size_to_drop>0</max_table_size_to_drop>
+
+
+.. _server_settings-merge_tree:
+
+merge_tree
+----------
+Тонкая настройка таблиц семейства :ref:`table_engines-mergetree`.
+
+Подробнее смотрите в заголовочном файле MergeTreeSettings.h.
+
+**Пример**
+
+.. code-block:: xml
+
+    <merge_tree>
+        <max_suspicious_broken_parts>5</max_suspicious_broken_parts>
+    </merge_tree>
+
+
+
+.. _server_settings-openSSL:
+
+openSSL
+-------
+
+Настройки клиента/сервера SSL.
+
+Поддержку SSL обеспечивает библиотека ``libpoco``. Описание интерфейса находится в файле `SSLManager.h <https://github.com/yandex/ClickHouse/blob/master/contrib/libpoco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h>`_
+
+Ключи настроек сервера/клиента:
+
++-----------------------------+--------------------------------------------------------------------------------------------------------------------------+
+| Ключ                        | Описание                                                                                                                 |
++=============================+==========================================================================================================================+
+| privateKeyFile              | Путь к файлу с секретным ключем сертификата в формате PEM. Файл может содержать ключ и сертификат одновременно.          |
++-----------------------------+--------------------------------------------------------------------------------------------------------------------------+
+| certificateFile             | Путь к файлу сертификата клиента/сервера в формате PEM. Можно не указывать, если ``privateKeyFile`` содержит сертификат. |
++-----------------------------+--------------------------------------------------------------------------------------------------------------------------+
+| caConfig                    | Путь к файлу или каталогу, которые содержат доверенные корневые сертификаты.                                             |
++-----------------------------+--------------------------------------------------------------------------------------------------------------------------+
+| verificationMode            | Способ проверки сертификатов узла. Подробности находятся в описании класса                                               |
+|                             | `Context <https://github.com/yandex/ClickHouse/blob/master/contrib/libpoco/NetSSL_OpenSSL/include/Poco/Net/Context.h>`_. |
+|                             | Допустимые значения: ``none``, ``relaxed``, ``strict``, ``once``.                                                        |
++-----------------------------+--------------------------------------------------------------------------------------------------------------------------+
+| verificationDepth           | Максимальная длина верификационой цепи.                                                                                  |
+|                             | Верификация завершится ошибкой, если длина цепи сертификатов превысит установленное значение.                            |
++-----------------------------+--------------------------------------------------------------------------------------------------------------------------+
+| loadDefaultCAFile           | Признак того, что будут использоваться встроенные CA-сертификаты для OpenSSL. Допустимые значения: ``true``, ``false``.  |
++-----------------------------+--------------------------------------------------------------------------------------------------------------------------+
+| cipherList                  | Поддерживаемые OpenSSL-шифры. Например, ``ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH``.                                           |
++-----------------------------+--------------------------------------------------------------------------------------------------------------------------+
+| cacheSessions               | Включение/выключение кеширования сессии.                                                                                 |
+|                             | Использовать обязательно вместе с ``sessionIdContext``. Допустимые значения: ``true``, ``false``.                        |
++-----------------------------+--------------------------------------------------------------------------------------------------------------------------+
+| sessionIdContext            | Уникальный набор произвольных символов, которые сервер добавляет к каждому сгенерированному идентификатору.              |
+|                             | Длина строки не должна превышать ``SSL_MAX_SSL_SESSION_ID_LENGTH``. Рекомендуется к использованию всегда,                |
+|                             | поскольку позволяет избежать проблем как в случае, если сервер кеширует сессию,                                          |
+|                             | так и если клиент затребовал кеширование. По умолчанию ``${application.name}``.                                          |
++-----------------------------+--------------------------------------------------------------------------------------------------------------------------+
+| sessionCacheSize            | Максимальное количество сессий, которые кэширует сервер.                                                                 |
+|                             | По умолчанию - 1024\*20. 0 - неограниченное количество сессий.                                                           |
++-----------------------------+--------------------------------------------------------------------------------------------------------------------------+
+| sessionTimeout              | Время кеширования сессии на севрере.                                                                                     |
++-----------------------------+--------------------------------------------------------------------------------------------------------------------------+
+| extendedVerification        | Автоматическая расширенная проверка сертификатов после завершении сессии.                                                |
+|                             | Допустимые значения: ``true``, ``false``.                                                                                |
++-----------------------------+--------------------------------------------------------------------------------------------------------------------------+
+| requireTLSv1                | Требование соединения TLSv1. Допустимые значения: ``true``, ``false``.                                                   |
++-----------------------------+--------------------------------------------------------------------------------------------------------------------------+
+| requireTLSv1_1              | Требование соединения TLSv1.1. Допустимые значения: ``true``, ``false``.                                                 |
++-----------------------------+--------------------------------------------------------------------------------------------------------------------------+
+| requireTLSv1_2              | Требование соединения TLSv1.2. Допустимые значения: ``true``, ``false``.                                                 |
++-----------------------------+--------------------------------------------------------------------------------------------------------------------------+
+| fips                        | Активация режима OpenSSL FIPS. Поддерживается, если версия OpenSSL, с которой собрана библиотека поддерживает fips.      |
++-----------------------------+--------------------------------------------------------------------------------------------------------------------------+
+| privateKeyPassphraseHandler | Класс (подкласс PrivateKeyPassphraseHandler)запрашивающий кодовую фразу доступа к                                        |
+|                             | секретному ключу. Например, ``<privateKeyPassphraseHandler>`` ``<name>KeyFileHandler</name>``                            |
+|                             | ``<options><password>test</password></options>`` ``</privateKeyPassphraseHandler>``.                                     |
++-----------------------------+--------------------------------------------------------------------------------------------------------------------------+
+| invalidCertificateHandler   | Класс (подкласс CertificateHandler) для подтвеждения невалидных сертификатов.                                            |
+|                             | Например,  ``<invalidCertificateHandler> <name>ConsoleCertificateHandler</name> </invalidCertificateHandler>``.          |
++-----------------------------+--------------------------------------------------------------------------------------------------------------------------+
+| disableProtocols            | Запрещенные к искользованию протоколы.                                                                                   |
++-----------------------------+--------------------------------------------------------------------------------------------------------------------------+
+| preferServerCiphers         | Предпочтение серверных шифров на клиенте.                                                                                |
++-----------------------------+--------------------------------------------------------------------------------------------------------------------------+
+
+  
+  
+**Пример настройки:**
+
+.. code-block:: xml
+
+    <openSSL>
+        <server>
+            <!-- openssl req -subj "/CN=localhost" -new -newkey rsa:2048 -days 365 -nodes -x509 -keyout /etc/clickhouse-server/server.key -out /etc/clickhouse-server/server.crt -->
+            <certificateFile>/etc/clickhouse-server/server.crt</certificateFile>
+            <privateKeyFile>/etc/clickhouse-server/server.key</privateKeyFile>
+            <!-- openssl dhparam -out /etc/clickhouse-server/dhparam.pem 4096 -->
+            <dhParamsFile>/etc/clickhouse-server/dhparam.pem</dhParamsFile>
+            <verificationMode>none</verificationMode>
+            <loadDefaultCAFile>true</loadDefaultCAFile>
+            <cacheSessions>true</cacheSessions>
+            <disableProtocols>sslv2,sslv3</disableProtocols>
+            <preferServerCiphers>true</preferServerCiphers>
+        </server>
+        <client>
+            <loadDefaultCAFile>true</loadDefaultCAFile>
+            <cacheSessions>true</cacheSessions>
+            <disableProtocols>sslv2,sslv3</disableProtocols>
+            <preferServerCiphers>true</preferServerCiphers>
+            <!-- Use for self-signed: <verificationMode>none</verificationMode> -->
+            <invalidCertificateHandler>
+                <!-- Use for self-signed: <name>AcceptCertificateHandler</name> -->
+                <name>RejectCertificateHandler</name>
+            </invalidCertificateHandler>
+        </client>
+    </openSSL>
+
+.. _server_settings-part_log:
+
+part_log
+--------
+
+Логгирование событий, связанных с данными типа :ref:`table_engines-mergetree`. Например, события добавления или мержа данных. Лог можно использовать для симуляции алгоритмов слияния, чтобы сравнивать их характеристики. Также, можно визуализировать процесс слияния.
+
+Запросы логгируются не в отдельный файл, а в таблицу ClickHouse.
+
+Столбцы лога:
+
++---------------+-----------------------------------------------------------------------------------------------------------------------+
+| Столбец       | Описание                                                                                                              |
++===============+=======================================================================================================================+
+| event_time    | Дата события.                                                                                                         |
++---------------+-----------------------------------------------------------------------------------------------------------------------+
+| duration_ms   | Время события.                                                                                                        |
++---------------+-----------------------------------------------------------------------------------------------------------------------+
+| event_type    | Тип события. 1 - кусок новый, 2 - результат мержа, 3 - кусок скачан с реплики, 4 - кусок удаляется.                   |
++---------------+-----------------------------------------------------------------------------------------------------------------------+
+| database_name | Имя базы даных.                                                                                                       |
++---------------+-----------------------------------------------------------------------------------------------------------------------+
+| table_name    | Имя таблицы.                                                                                                          |
++---------------+-----------------------------------------------------------------------------------------------------------------------+
+| part_name     | Имя куска данных.                                                                                                     |
++---------------+-----------------------------------------------------------------------------------------------------------------------+
+| size_in_bytes | Размер куска данных в байтах.                                                                                         |
++---------------+-----------------------------------------------------------------------------------------------------------------------+
+| merged_from   | Массив имён кусков, из которых он образован при мерже (так же заполняется в случае скачивания уже смерженного куска). |
++---------------+-----------------------------------------------------------------------------------------------------------------------+
+| merge_time_ms | Время, потраченное на мерж.                                                                                           |
++---------------+-----------------------------------------------------------------------------------------------------------------------+
+
+При настройке логгирования используются следующие параметры:
+
++-----------------------------+-----------------------------------------------------+
+| Параметр                    | Описание                                            |
++=============================+=====================================================+
+| database                    | Имя базы данных.                                    |
++-----------------------------+-----------------------------------------------------+
+| table                       | Имя таблицы.                                        |
++-----------------------------+-----------------------------------------------------+
+| flush_interval_milliseconds | Период сброса данных из оперативной памяти на диск. |
++-----------------------------+-----------------------------------------------------+
+
+
+**Пример**
+
+.. code-block:: xml
+
+    <part_log>
+        <database>system</database>
+        <table>part_log</table>
+        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
+    </part_log>
+
+
+.. _server_settings-path:
+
+path
+----
+Путь к каталогу с данными. 
+
+.. warning:: Завершающий слеш обязателен.
+
+**Пример**
+
+.. code-block:: xml
+
+  <path>/var/lib/clickhouse/</path>
+
+.. _server_settings-query_log:
+
+query_log
+---------
+
+Настройка логгирования запросов, принятых с настройкой :ref:`log_queries=1 <settings-log_queries>`.
+
+Запросы логгируются не в отдельный файл, а в таблицу ClickHouse.
+
+При настройке логгирования используются следующие параметры:
+
++-----------------------------+-----------------------------------------------------+
+| Параметр                    | Описание                                            |
++=============================+=====================================================+
+| database                    | Имя базы данных.                                    |
++-----------------------------+-----------------------------------------------------+
+| table                       | Имя таблицы.                                        |
++-----------------------------+-----------------------------------------------------+
+| flush_interval_milliseconds | Период сброса данных из оперативной памяти на диск. |
++-----------------------------+-----------------------------------------------------+
+
+Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически. 
+
+**Пример**
+
+.. code-block:: xml
+
+    <query_log>
+        <database>system</database>
+        <table>query_log</table>
+        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
+    </query_log>
+
+
+.. _server_settings-remote_servers:
+
+remote_servers
+--------------
+Конфигурация кластеров, которые использует движок таблиц Distributed.
+
+Пример настройки смотрите в разделе :ref:`Движки таблиц/Distributed <table_engines-distributed>`.
+
+**Пример**
+
+.. code-block:: xml
+
+    <remote_servers incl="clickhouse_remote_servers" />
+
+Значение атрибута ``incl`` смотрите в разделе :ref:`configuration_files`.
+
+.. _server_settings-resharding:
+
+resharding
+----------
+
+Путь в ZooKeeper к очереди задач.
+
+Подробнее читайте в разделе :ref:`table_engines-resharding`.
+
+**Пример**
+
+.. code-block:: xml
+
+    <resharding>
+        <task_queue_path>/clickhouse/task_queue</task_queue_path>
+    </resharding>
+
+
+.. _server_settings-timezone:
+
+timezone
+--------
+Временная зона сервера.
+
+Указывается идентификатором IANA в виде часового пояса UTC или географического положения (например, Africa/Abidjan).
+
+Временная зона необходима при преобразованиях между форматами String и DateTime, которые возникают при выводе полей DateTime в текстовый формат (на экран или в файл) и при получении DateTime из строки. Также, временная зона используется в функциях, которые работают со временем и датой, если они не получили временную зону в параметрах вызова.
+
+**Пример**
+
+.. code-block:: xml
+
+    <timezone>Europe/Moscow</timezone>
+
+
+.. _server_settings-tcp_port:
+
+tcp_port
+--------
+
+Порт для взаимодействия с клиентами по протоколу TCP.
+
+**Пример**
+
+.. code-block:: xml
+
+    <tcp_port>9000</tcp_port>
+
+
+.. _server_settings-tmp_path:
+
+tmp_path
+--------
+Путь ко временным данным для обработки больших запросов.
+
+.. warning:: Завершающий слеш обязателен.
+
+**Пример**
+
+.. code-block:: xml
+
+    <tmp_path>/var/lib/clickhouse/tmp/</tmp_path>
+
+.. _server_settings-uncompressed_cache_size:
+
+uncompressed_cache_size
+-----------------------
+Размер кеша (в байтах) для несжатых данных, используемых движками таблиц семейства :ref:`table_engines-mergetree`.
+
+Кеш единый для сервера. Память выделяется по-требованию. Кеш используется в том случае, если включена опция :ref:`settings-use_uncompressed_cache`.
+
+Несжатый кеш выгодно использовать для очень коротких запросов в отдельных случаях.
+
+**Пример**
+
+.. code-block:: xml
+
+    <uncompressed_cache_size>8589934592</uncompressed_cache_size>
+
+
+.. _server_settings-users_config:
+
+users_config
+------------
+
+Путь к файлу, который содержит:
+  - Конфигурации пользователей.
+  - Права доступа.
+  - Профили настроек.
+  - Настройки квот.
+
+**Пример**
+
+.. code-block:: xml
+
+    <users_config>users.xml</users_config>
+
+
+.. _server_settings-zookeeper:
+
+zookeeper
+---------
+Конфигурация серверов ZooKeeper.
+
+ClickHouse использует ZooKeeper для хранения метаданных о репликах при использовании реплицированных таблиц.
+
+Параметр можно не указывать, если реплицированные таблицы не используются.
+
+Подробно о репликации читайте в разделе :ref:`table_engines-replication`.
+
+**Пример**
+
+.. code-block:: xml
+
+    <zookeeper incl="zookeeper-servers" optional="true" />
\ No newline at end of file
diff --git a/docs/ru/operations/settings/index.rst b/docs/ru/operations/settings/index.rst
index 8e90e22a3a8..e1e23052f35 100644
--- a/docs/ru/operations/settings/index.rst
+++ b/docs/ru/operations/settings/index.rst
@@ -1,3 +1,5 @@
+.. _settings:
+
 Настройки
 =========
 
diff --git a/docs/ru/operations/settings/settings.rst b/docs/ru/operations/settings/settings.rst
index 2110064713c..ed6f78ff00d 100644
--- a/docs/ru/operations/settings/settings.rst
+++ b/docs/ru/operations/settings/settings.rst
@@ -85,6 +85,19 @@ preferred_block_size_bytes
 При этом размер блока не может быть более ``max_block_size`` строк.
 По-умолчанию выключен (равен 0), работает только при чтении из MergeTree-движков.
 
+.. _settings-log_queries:
+
+log_queries
+------------
+
+Установка логгирования запроса.
+
+Запросы, переданные в ClickHouse с этой установкой, логгируются согласно правилам конфигурационного параметра сервера :ref:`server_settings-query_log`.
+
+**Пример** ::
+
+  log_queries=1
+
 max_insert_block_size
 ---------------------
 Формировать блоки указанного размера, при вставке в таблицу.
@@ -157,6 +170,7 @@ max_query_size
 interactive_delay
 -----------------
 Интервал в микросекундах для проверки, не запрошена ли остановка выполнения запроса, и отправки прогресса.
+
 По умолчанию - 100 000 (проверять остановку запроса и отправлять прогресс десять раз в секунду).
 
 connect_timeout
@@ -209,6 +223,8 @@ extremes
 Считать ли экстремальные значения (минимумы и максимумы по столбцам результата запроса). Принимает 0 или 1. По умолчанию - 0 (выключено).
 Подробнее смотрите раздел "Экстремальные значения".
 
+.. _settings-use_uncompressed_cache:
+
 use_uncompressed_cache
 ----------------------
 Использовать ли кэш разжатых блоков. Принимает 0 или 1. По умолчанию - 0 (выключено).
diff --git a/docs/ru/query_language/queries.rst b/docs/ru/query_language/queries.rst
index 0b550f57395..90fbbf45d22 100644
--- a/docs/ru/query_language/queries.rst
+++ b/docs/ru/query_language/queries.rst
@@ -420,6 +420,8 @@ ALTER
 Для запросов ``ALTER ... ATTACH|DETACH|DROP`` можно настроить ожидание, с помощью настройки ``replication_alter_partitions_sync``.
 Возможные значения: ``0`` - не ждать, ``1`` - ждать выполнения только у себя (по умолчанию), ``2`` - ждать всех.
 
+.. _query_language_queries_show_databases:
+
 SHOW DATABASES
 ~~~~~~~~~~~~~~
 
diff --git a/docs/ru/system_tables/system.asynchronous_metrics.rst b/docs/ru/system_tables/system.asynchronous_metrics.rst
index d12131acbd7..38da983aa4b 100644
--- a/docs/ru/system_tables/system.asynchronous_metrics.rst
+++ b/docs/ru/system_tables/system.asynchronous_metrics.rst
@@ -1,3 +1,5 @@
+.. _system_tables-system.asynchronous_metrics:
+
 system.asynchronous_metrics
 ---------------------------
 
diff --git a/docs/ru/system_tables/system.events.rst b/docs/ru/system_tables/system.events.rst
index 3f4ab0c90f5..8168559d25b 100644
--- a/docs/ru/system_tables/system.events.rst
+++ b/docs/ru/system_tables/system.events.rst
@@ -1,3 +1,5 @@
+.. _system_tables-system.events:
+
 system.events
 -------------
 
diff --git a/docs/ru/system_tables/system.metrics.rst b/docs/ru/system_tables/system.metrics.rst
index dee53b399e6..25038d3e92e 100644
--- a/docs/ru/system_tables/system.metrics.rst
+++ b/docs/ru/system_tables/system.metrics.rst
@@ -1,2 +1,4 @@
+.. _system_tables-system.metrics:
+
 system.metrics
 --------------
diff --git a/docs/ru/table_engines/distributed.rst b/docs/ru/table_engines/distributed.rst
index 75b73737b10..4ab76d1e8c1 100644
--- a/docs/ru/table_engines/distributed.rst
+++ b/docs/ru/table_engines/distributed.rst
@@ -1,3 +1,5 @@
+.. _table_engines-distributed:
+
 Distributed
 -----------
 
diff --git a/docs/ru/table_engines/graphitemergetree.rst b/docs/ru/table_engines/graphitemergetree.rst
index d036f267ec6..5b741522061 100644
--- a/docs/ru/table_engines/graphitemergetree.rst
+++ b/docs/ru/table_engines/graphitemergetree.rst
@@ -1,3 +1,5 @@
+.. _table_engines-graphitemergetree:
+
 GraphiteMergeTree
 -----------------
 
@@ -13,7 +15,7 @@ Graphite хранит в ClickHouse полные данные, а получат
   
   Используется движок ``GraphiteMergeTree``.
 
-Движок наследует свойства `MergeTree`. Настройки прореживания данных размещаются в :ref:`общей конфигурации <configuration_files>` ClickHouse (config.xml).
+Движок наследует свойства `MergeTree`. Настройки прореживания данных задаются параметром :ref:`server_settings-graphite_rollup` в конфигурации сервера .
 
 Использование движка
 ^^^^^^^^^^^^^^^^^^^^
@@ -29,7 +31,7 @@ Graphite хранит в ClickHouse полные данные, а получат
 
 .. code-block:: text
 
-    pattern
+	pattern
         regexp
         function
         age -> precision
@@ -45,19 +47,17 @@ Graphite хранит в ClickHouse полные данные, а получат
    
 Поля шаблона правил.
 
-.. code-block:: text
-
-    +---------------+----------------------------------------------------------------------------------------------------------------------------+
-    | Поле          | Описание                                                                                                                   |
-    +===============+============================================================================================================================+
-    | ``age``       | Минимальный возраст данных в секундах.                                                                                     |
-    +---------------+----------------------------------------------------------------------------------------------------------------------------+
-    | ``function``  | Имя агрегирующей функции, которую следует применить к данным, чей возраст оказался в интервале ``[age, age + precision]``. |
-    +---------------+----------------------------------------------------------------------------------------------------------------------------+
-    | ``precision`` | Точность определения возраста данных в секундах.                                                                           |
-    +---------------+----------------------------------------------------------------------------------------------------------------------------+
-    | ``regexp``    | Шаблон имени метрики.                                                                                                      |
-    +---------------+----------------------------------------------------------------------------------------------------------------------------+
++---------------+----------------------------------------------------------------------------------------------------------------------------+
+| Поле          | Описание                                                                                                                   |
++===============+============================================================================================================================+
+| ``age``       | Минимальный возраст данных в секундах.                                                                                     |
++---------------+----------------------------------------------------------------------------------------------------------------------------+
+| ``function``  | Имя агрегирующей функции, которую следует применить к данным, чей возраст оказался в интервале ``[age, age + precision]``. |
++---------------+----------------------------------------------------------------------------------------------------------------------------+
+| ``precision`` | Точность определения возраста данных в секундах.                                                                           |
++---------------+----------------------------------------------------------------------------------------------------------------------------+
+| ``regexp``    | Шаблон имени метрики.                                                                                                      |
++---------------+----------------------------------------------------------------------------------------------------------------------------+
 
 
 Пример настройки: 
diff --git a/docs/ru/table_engines/replication.rst b/docs/ru/table_engines/replication.rst
index c7d8e84dc68..83f97928dea 100644
--- a/docs/ru/table_engines/replication.rst
+++ b/docs/ru/table_engines/replication.rst
@@ -67,6 +67,8 @@ ReplicatedSummingMergeTree
 
 Система следит за синхронностью данных на репликах и умеет восстанавливаться после сбоя. Восстановление после сбоя автоматическое (в случае небольших различий в данных) или полуавтоматическое (когда данные отличаются слишком сильно, что может свидетельствовать об ошибке конфигурации).
 
+.. _table_engines-replication-creation_of_rep_tables:
+
 Создание реплицируемых таблиц
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/docs/ru/table_engines/resharding.rst b/docs/ru/table_engines/resharding.rst
index af92fc9b127..06ac38967a2 100644
--- a/docs/ru/table_engines/resharding.rst
+++ b/docs/ru/table_engines/resharding.rst
@@ -1,3 +1,5 @@
+.. _table_engines-resharding:
+
 Перешардирование
 ----------------
 

From b3157aebb21f5ae6e44ec12d415b2abba76f2b88 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 25 Jul 2017 22:42:36 +0300
Subject: [PATCH 002/281] added synchronous insert into distributed table
 [#CLICKHOUSE-3033]

---
 dbms/src/Interpreters/Settings.h              | 10 ++-
 .../Storages/Distributed/DirectoryMonitor.h   |  2 +
 .../DistributedBlockOutputStream.cpp          | 75 ++++++++++++++++++-
 .../DistributedBlockOutputStream.h            | 23 +++++-
 dbms/src/Storages/StorageDistributed.cpp      | 14 ++--
 dbms/src/Storages/StorageDistributed.h        |  6 +-
 6 files changed, 114 insertions(+), 16 deletions(-)

diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h
index a3bf110aa36..e41d6ec1375 100644
--- a/dbms/src/Interpreters/Settings.h
+++ b/dbms/src/Interpreters/Settings.h
@@ -286,7 +286,15 @@ struct Settings
     M(SettingBool, distributed_ddl_allow_replicated_alter, 0) \
     /** Limit on max column size in block while reading. Helps to decrease cache misses count. \
       * Should be close to L2 cache size. */ \
-    M(SettingUInt64, preferred_max_column_in_block_size_bytes, 250000)
+    M(SettingUInt64, preferred_max_column_in_block_size_bytes, 250000) \
+    \
+    /** If setting is enabled, insert query into distributed waits until data will be sent to all nodes in cluster. \
+     */ \
+    M(SettingBool, insert_distributed_sync, 0) \
+    /** Timeout for insert query into distributed. Setting is used only with insert_distributed_sync enabled. \
+     *  Zero value means no timeout. \
+     */ \
+    M(SettingUInt64, insert_distributed_timeout, 0)
 
 
     /// Possible limits for query execution.
diff --git a/dbms/src/Storages/Distributed/DirectoryMonitor.h b/dbms/src/Storages/Distributed/DirectoryMonitor.h
index fc34198cdbe..80f6f6ad16d 100644
--- a/dbms/src/Storages/Distributed/DirectoryMonitor.h
+++ b/dbms/src/Storages/Distributed/DirectoryMonitor.h
@@ -19,6 +19,8 @@ public:
     StorageDistributedDirectoryMonitor(StorageDistributed & storage, const std::string & name);
     ~StorageDistributedDirectoryMonitor();
 
+    const ConnectionPoolPtr & getPool() const { return pool; }
+
 private:
     void run();
     ConnectionPoolPtr createPool(const std::string & name);
diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
index 4eb68848531..e3ce6cf689a 100644
--- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
+++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
@@ -1,4 +1,5 @@
 #include <Storages/Distributed/DistributedBlockOutputStream.h>
+#include <Storages/Distributed/DirectoryMonitor.h>
 #include <Storages/StorageDistributed.h>
 
 #include <Parsers/formatAST.h>
@@ -7,25 +8,42 @@
 #include <IO/WriteBufferFromFile.h>
 #include <IO/CompressedWriteBuffer.h>
 #include <DataStreams/NativeBlockOutputStream.h>
+#include <DataStreams/RemoteBlockOutputStream.h>
 #include <Interpreters/InterpreterInsertQuery.h>
 #include <Interpreters/Cluster.h>
 #include <Interpreters/createBlockSelector.h>
 
 #include <DataTypes/DataTypesNumber.h>
 #include <Common/ClickHouseRevision.h>
+#include <Common/CurrentMetrics.h>
 #include <Common/typeid_cast.h>
+#include <Common/Exception.h>
+#include <common/logger_useful.h>
 
 #include <Poco/DirectoryIterator.h>
 
-#include <memory>
 #include <iostream>
+#include <future>
+
+namespace CurrentMetrics
+{
+    extern const Metric DistributedSend;
+}
 
 
 namespace DB
 {
 
-DistributedBlockOutputStream::DistributedBlockOutputStream(StorageDistributed & storage, const ASTPtr & query_ast, const ClusterPtr & cluster_)
-    : storage(storage), query_ast(query_ast), cluster(cluster_)
+namespace ErrorCodes
+{
+    extern const int TIMEOUT_EXCEEDED;
+}
+
+DistributedBlockOutputStream::DistributedBlockOutputStream(StorageDistributed & storage, const ASTPtr & query_ast,
+                                                           const ClusterPtr & cluster_, bool insert_sync_, UInt64 insert_timeout_)
+    : storage(storage), query_ast(query_ast), cluster(cluster_), insert_sync(insert_sync_), insert_timeout(insert_timeout_),
+      deadline(std::chrono::system_clock::now() + std::chrono::seconds(insert_timeout)),
+      log(&Poco::Logger::get("DistributedBlockOutputStream"))
 {
 }
 
@@ -36,6 +54,7 @@ void DistributedBlockOutputStream::write(const Block & block)
         return writeSplit(block);
 
     writeImpl(block);
+    ++blocks_inserted;
 }
 
 
@@ -94,6 +113,8 @@ void DistributedBlockOutputStream::writeSplit(const Block & block)
     for (size_t shard_idx = 0; shard_idx < num_shards; ++shard_idx)
         if (splitted_blocks[shard_idx].rows())
             writeImpl(splitted_blocks[shard_idx], shard_idx);
+
+    ++blocks_inserted;
 }
 
 
@@ -105,7 +126,19 @@ void DistributedBlockOutputStream::writeImpl(const Block & block, const size_t s
 
     /// dir_names is empty if shard has only local addresses
     if (!shard_info.dir_names.empty())
-        writeToShard(block, shard_info.dir_names);
+    {
+        if (!insert_sync)
+            writeToShard(block, shard_info.dir_names);
+        else
+        {
+            std::atomic<bool> timeout_exceeded(false);
+            auto result = std::async(std::launch::async, &DistributedBlockOutputStream::writeToShardDirect,
+                                     this, std::cref(block), std::cref(shard_info.dir_names), std::ref(timeout_exceeded));
+            if (insert_timeout && result.wait_until(deadline) == std::future_status::timeout)
+                timeout_exceeded = true;
+            result.get();
+        }
+    }
 }
 
 
@@ -123,6 +156,40 @@ void DistributedBlockOutputStream::writeToLocal(const Block & block, const size_
 }
 
 
+void DistributedBlockOutputStream::writeToShardDirect(const Block & block, const std::vector<std::string> & dir_names, std::atomic<bool> & timeout_exceeded)
+{
+    const auto & query_string = queryToString(query_ast);
+    for (const auto & dir_name : dir_names)
+    {
+        auto & monitor = storage.requireDirectoryMonitor(dir_name);
+        auto & pool = monitor.getPool();
+        auto connection = pool->get();
+
+        CurrentMetrics::Increment metric_increment{CurrentMetrics::DistributedSend};
+
+        if (timeout_exceeded)
+            throw Exception("Timeout exceeded. Inserted blocks: " + std::to_string(blocks_inserted), ErrorCodes::TIMEOUT_EXCEEDED);
+
+        try
+        {
+            RemoteBlockOutputStream remote{*connection, query_string};
+
+            remote.writePrefix();
+            remote.write(block);
+            remote.writeSuffix();
+        }
+        catch (Exception & exception)
+        {
+            std::string message = "\nWhile insertion to ";
+            message += connection->getDescription();
+            message += " Inserted blocks: " + std::to_string(blocks_inserted);
+            exception.addMessage(message);
+            LOG_ERROR(log, message);
+            exception.rethrow();
+        }
+    }
+}
+
 void DistributedBlockOutputStream::writeToShard(const Block & block, const std::vector<std::string> & dir_names)
 {
     /** tmp directory is used to ensure atomicity of transactions
diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h
index 9b5cf3cbbcc..82cfcc09f76 100644
--- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h
+++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h
@@ -3,12 +3,21 @@
 #include <Parsers/formatAST.h>
 #include <DataStreams/IBlockOutputStream.h>
 #include <Core/Block.h>
-#include <Interpreters/Cluster.h>
+#include <atomic>
+#include <memory>
+#include <chrono>
+
+namespace Poco
+{
+    class Logger;
+}
 
 namespace DB
 {
 
 class StorageDistributed;
+class Cluster;
+using ClusterPtr = std::shared_ptr<Cluster>;
 
 /** The write is asynchronous - the data is first written to the local filesystem, and then sent to the remote servers.
  *  If the Distributed table uses more than one shard, then in order to support the write,
@@ -21,10 +30,12 @@ class StorageDistributed;
 class DistributedBlockOutputStream : public IBlockOutputStream
 {
 public:
-    DistributedBlockOutputStream(StorageDistributed & storage, const ASTPtr & query_ast, const ClusterPtr & cluster_);
+    DistributedBlockOutputStream(StorageDistributed & storage, const ASTPtr & query_ast, const ClusterPtr & cluster_, bool insert_sync_, UInt64 insert_timeout_ = 0);
 
     void write(const Block & block) override;
 
+    void writePrefix() override { deadline = std::chrono::system_clock::now() + std::chrono::seconds(insert_timeout); }
+
 private:
     IColumn::Selector createSelector(Block block);
 
@@ -36,10 +47,18 @@ private:
 
     void writeToShard(const Block & block, const std::vector<std::string> & dir_names);
 
+    void writeToShardDirect(const Block & block, const std::vector<std::string> & dir_names, std::atomic<bool> & timeout_exceeded);
+
 private:
     StorageDistributed & storage;
     ASTPtr query_ast;
     ClusterPtr cluster;
+    bool insert_sync = true;
+    UInt64 insert_timeout = 1;
+    size_t blocks_inserted = 0;
+    std::chrono::system_clock::time_point deadline;
+
+    Poco::Logger * log;
 };
 
 }
diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp
index 02684f706a0..a3a891bad8c 100644
--- a/dbms/src/Storages/StorageDistributed.cpp
+++ b/dbms/src/Storages/StorageDistributed.cpp
@@ -245,7 +245,7 @@ BlockOutputStreamPtr StorageDistributed::write(const ASTPtr & query, const Setti
 
     /// DistributedBlockOutputStream will not own cluster, but will own ConnectionPools of the cluster
     return std::make_shared<DistributedBlockOutputStream>(
-        *this, rewriteInsertQuery(query, remote_database, remote_table), cluster);
+        *this, rewriteInsertQuery(query, remote_database, remote_table), cluster, settings.insert_distributed_sync, settings.insert_distributed_timeout);
 }
 
 
@@ -456,9 +456,9 @@ bool StorageDistributed::hasColumn(const String & column_name) const
 }
 
 
-void StorageDistributed::createDirectoryMonitor(const std::string & name)
+StorageDistributedDirectoryMonitor & StorageDistributed::createDirectoryMonitor(const std::string & name)
 {
-    directory_monitors.emplace(name, std::make_unique<StorageDistributedDirectoryMonitor>(*this, name));
+    return *(directory_monitors.emplace(name, std::make_unique<StorageDistributedDirectoryMonitor>(*this, name)).first->second);
 }
 
 
@@ -477,10 +477,12 @@ void StorageDistributed::createDirectoryMonitors()
 }
 
 
-void StorageDistributed::requireDirectoryMonitor(const std::string & name)
+StorageDistributedDirectoryMonitor & StorageDistributed::requireDirectoryMonitor(const std::string & name)
 {
-    if (!directory_monitors.count(name))
-        createDirectoryMonitor(name);
+    auto it = directory_monitors.find(name);
+    if (it == directory_monitors.end())
+        return createDirectoryMonitor(name);
+    return *it->second;
 }
 
 size_t StorageDistributed::getShardCount() const
diff --git a/dbms/src/Storages/StorageDistributed.h b/dbms/src/Storages/StorageDistributed.h
index 128f0da1efb..09bec125048 100644
--- a/dbms/src/Storages/StorageDistributed.h
+++ b/dbms/src/Storages/StorageDistributed.h
@@ -118,11 +118,11 @@ private:
 
 
     /// create directory monitor thread by subdirectory name
-    void createDirectoryMonitor(const std::string & name);
+    StorageDistributedDirectoryMonitor & createDirectoryMonitor(const std::string & name);
     /// create directory monitors for each existing subdirectory
     void createDirectoryMonitors();
-    /// ensure directory monitor creation
-    void requireDirectoryMonitor(const std::string & name);
+    /// ensure directory monitor creation and return it
+    StorageDistributedDirectoryMonitor & requireDirectoryMonitor(const std::string & name);
 
     ClusterPtr getCluster() const;
 

From 19d3c36871b7520afdc56e58c8d22afa6152cd1b Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Wed, 26 Jul 2017 17:41:21 +0300
Subject: [PATCH 003/281] added tests for synchronous insert into distributed
 table [#CLICKHOUSE-3033]

---
 dbms/tests/integration/helpers/client.py      | 16 +++-
 .../__init__.py                               |  0
 .../configs/remote_servers.xml                | 16 ++++
 .../test_sync_insert_into_distributed/test.py | 84 +++++++++++++++++++
 4 files changed, 112 insertions(+), 4 deletions(-)
 create mode 100644 dbms/tests/integration/test_sync_insert_into_distributed/__init__.py
 create mode 100644 dbms/tests/integration/test_sync_insert_into_distributed/configs/remote_servers.xml
 create mode 100644 dbms/tests/integration/test_sync_insert_into_distributed/test.py

diff --git a/dbms/tests/integration/helpers/client.py b/dbms/tests/integration/helpers/client.py
index a7479efde36..919807919e3 100644
--- a/dbms/tests/integration/helpers/client.py
+++ b/dbms/tests/integration/helpers/client.py
@@ -19,6 +19,14 @@ class Client:
         return QueryRequest(self, sql, stdin, timeout)
 
 
+class QueryTimeoutExceedException(Exception):
+    pass
+
+
+class QueryRuntimeException(Exception):
+    pass
+
+
 class QueryRequest:
     def __init__(self, client, sql, stdin=None, timeout=None):
         self.client = client
@@ -61,11 +69,11 @@ class QueryRequest:
         stdout = self.stdout_file.read()
         stderr = self.stderr_file.read()
 
-        if self.process.returncode != 0 or stderr:
-            raise Exception('Client failed! Return code: {}, stderr: {}'.format(self.process.returncode, stderr))
-
         if self.timer is not None and not self.process_finished_before_timeout:
-            raise Exception('Client timed out!')
+            raise QueryTimeoutExceedException('Client timed out!')
+
+        if self.process.returncode != 0 or stderr:
+            raise QueryRuntimeException('Client failed! Return code: {}, stderr: {}'.format(self.process.returncode, stderr))
 
         return stdout
 
diff --git a/dbms/tests/integration/test_sync_insert_into_distributed/__init__.py b/dbms/tests/integration/test_sync_insert_into_distributed/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/dbms/tests/integration/test_sync_insert_into_distributed/configs/remote_servers.xml b/dbms/tests/integration/test_sync_insert_into_distributed/configs/remote_servers.xml
new file mode 100644
index 00000000000..3593cbd7f36
--- /dev/null
+++ b/dbms/tests/integration/test_sync_insert_into_distributed/configs/remote_servers.xml
@@ -0,0 +1,16 @@
+<yandex>
+    <remote_servers>
+        <test_cluster>
+            <shard>
+                <replica>
+                    <host>node1</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node2</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </test_cluster>
+    </remote_servers>
+</yandex>
diff --git a/dbms/tests/integration/test_sync_insert_into_distributed/test.py b/dbms/tests/integration/test_sync_insert_into_distributed/test.py
new file mode 100644
index 00000000000..3af21683e57
--- /dev/null
+++ b/dbms/tests/integration/test_sync_insert_into_distributed/test.py
@@ -0,0 +1,84 @@
+from contextlib import contextmanager
+from helpers.network import PartitionManager
+
+import pytest
+
+from helpers.cluster import ClickHouseCluster
+from helpers.client import QueryRuntimeException, QueryTimeoutExceedException
+
+cluster = ClickHouseCluster(__file__)
+
+node1 = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml'])
+node2 = cluster.add_instance('node2', main_configs=['configs/remote_servers.xml'])
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+
+        for node in (node1, node2):
+            node.query('''
+CREATE TABLE local_table(date Date, val UInt64) ENGINE = MergeTree(date, (date, val), 8192);
+''')
+
+
+        node1.query('''
+CREATE TABLE distributed_table(date Date, val UInt64) ENGINE = Distributed(test_cluster, default, local_table)
+''')
+
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+def test_insertion_sync(started_cluster):
+
+    node1.query('''SET insert_distributed_sync = 1, insert_distributed_timeout = 0;
+    INSERT INTO distributed_table SELECT today() as date, number as val FROM system.numbers LIMIT 10000''')
+
+    assert node2.query("SELECT count() FROM local_table").rstrip() == '10000'
+
+    node1.query('''
+    SET insert_distributed_sync = 1, insert_distributed_timeout = 1;
+    INSERT INTO distributed_table SELECT today() - 1 as date, number as val FROM system.numbers LIMIT 10000''')
+
+    assert node2.query("SELECT count() FROM local_table").rstrip() == '20000'
+
+"""
+def test_insertion_sync_fails_on_error(started_cluster):
+    with PartitionManager() as pm:
+        pm.partition_instances(node2, node1, action='REJECT --reject-with tcp-reset')
+        with pytest.raises(QueryRuntimeException):
+            node1.query('''
+            SET insert_distributed_sync = 1, insert_distributed_timeout = 0;
+            INSERT INTO distributed_table SELECT today() as date, number as val FROM system.numbers''', timeout=2)
+"""
+
+
+def test_insertion_sync_fails_with_timeout(started_cluster):
+    with pytest.raises(QueryRuntimeException):
+        node1.query('''
+        SET insert_distributed_sync = 1, insert_distributed_timeout = 1;
+        INSERT INTO distributed_table SELECT today() as date, number as val FROM system.numbers''', timeout=1.5)
+
+
+def test_insertion_without_sync_ignores_timeout(started_cluster):
+    with pytest.raises(QueryTimeoutExceedException):
+        node1.query('''
+        SET insert_distributed_sync = 0, insert_distributed_timeout = 1;
+        INSERT INTO distributed_table SELECT today() as date, number as val FROM system.numbers''', timeout=1.5)
+
+
+def test_insertion_sync_with_disabled_timeout(started_cluster):
+    with pytest.raises(QueryTimeoutExceedException):
+        node1.query('''
+        SET insert_distributed_sync = 1, insert_distributed_timeout = 0;
+        INSERT INTO distributed_table SELECT today() as date, number as val FROM system.numbers''', timeout=1)
+
+
+if __name__ == '__main__':
+    with contextmanager(started_cluster)() as cluster:
+        for name, instance in cluster.instances.items():
+            print name, instance.ip_address
+        raw_input("Cluster created, press any key to destroy...")

From 2f8f199d08c14402bee6e0403749d3edac51dfd2 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 27 Jul 2017 18:24:39 +0300
Subject: [PATCH 004/281] separated connection pool creation from
 StorageDistributedDirectoryMonitor in StorageDistributed; fixed bugs from
 review [#CLICKHOUSE-3033]

---
 dbms/src/Common/ProfileEvents.cpp             |  2 +
 .../Storages/Distributed/DirectoryMonitor.cpp | 12 ++---
 .../Storages/Distributed/DirectoryMonitor.h   |  5 +-
 .../DistributedBlockOutputStream.cpp          | 46 +++++++++++++------
 .../DistributedBlockOutputStream.h            | 19 ++++----
 dbms/src/Storages/StorageDistributed.cpp      | 38 +++++++++------
 dbms/src/Storages/StorageDistributed.h        | 21 ++++++---
 7 files changed, 92 insertions(+), 51 deletions(-)

diff --git a/dbms/src/Common/ProfileEvents.cpp b/dbms/src/Common/ProfileEvents.cpp
index 12dfecf576c..06d1cd59e0b 100644
--- a/dbms/src/Common/ProfileEvents.cpp
+++ b/dbms/src/Common/ProfileEvents.cpp
@@ -122,6 +122,8 @@
     M(DictCacheRequests) \
     M(DictCacheLockWriteNs) \
     M(DictCacheLockReadNs) \
+    \
+    M(DistributedSyncInsertionTimeoutExceeded) \
 
 
 namespace ProfileEvents
diff --git a/dbms/src/Storages/Distributed/DirectoryMonitor.cpp b/dbms/src/Storages/Distributed/DirectoryMonitor.cpp
index 4732e0659b2..07bed4a6cf4 100644
--- a/dbms/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/dbms/src/Storages/Distributed/DirectoryMonitor.cpp
@@ -86,8 +86,8 @@ namespace
 }
 
 
-StorageDistributedDirectoryMonitor::StorageDistributedDirectoryMonitor(StorageDistributed & storage, const std::string & name)
-    : storage(storage), pool{createPool(name)}, path{storage.path + name + '/'}
+StorageDistributedDirectoryMonitor::StorageDistributedDirectoryMonitor(StorageDistributed & storage, const std::string & name, ConnectionPoolPtr pool)
+    : storage(storage), pool{pool}, path{storage.path + name + '/'}
     , current_batch_file_path{path + "current_batch.txt"}
     , default_sleep_time{storage.context.getSettingsRef().distributed_directory_monitor_sleep_time_ms.totalMilliseconds()}
     , sleep_time{default_sleep_time}
@@ -150,11 +150,11 @@ void StorageDistributedDirectoryMonitor::run()
 }
 
 
-ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::string & name)
+ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::string & name, const StorageDistributed & storage)
 {
-    const auto pool_factory = [this, &name] (const std::string & host, const UInt16 port,
-                                             const std::string & user, const std::string & password,
-                                             const std::string & default_database)
+    const auto pool_factory = [&storage, &name] (const std::string & host, const UInt16 port,
+                                                 const std::string & user, const std::string & password,
+                                                 const std::string & default_database)
     {
         return std::make_shared<ConnectionPool>(
             1, host, port, default_database,
diff --git a/dbms/src/Storages/Distributed/DirectoryMonitor.h b/dbms/src/Storages/Distributed/DirectoryMonitor.h
index 80f6f6ad16d..0b556fdbbfd 100644
--- a/dbms/src/Storages/Distributed/DirectoryMonitor.h
+++ b/dbms/src/Storages/Distributed/DirectoryMonitor.h
@@ -16,14 +16,13 @@ namespace DB
 class StorageDistributedDirectoryMonitor
 {
 public:
-    StorageDistributedDirectoryMonitor(StorageDistributed & storage, const std::string & name);
+    StorageDistributedDirectoryMonitor(StorageDistributed & storage, const std::string & name, ConnectionPoolPtr pool);
     ~StorageDistributedDirectoryMonitor();
 
-    const ConnectionPoolPtr & getPool() const { return pool; }
+    static ConnectionPoolPtr createPool(const std::string & name, const StorageDistributed & storage);
 
 private:
     void run();
-    ConnectionPoolPtr createPool(const std::string & name);
     bool findFiles();
     void processFile(const std::string & file_path);
     void processFilesWithBatching(const std::map<UInt64, std::string> & files);
diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
index e3ce6cf689a..3d6200c0620 100644
--- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
+++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
@@ -7,6 +7,8 @@
 
 #include <IO/WriteBufferFromFile.h>
 #include <IO/CompressedWriteBuffer.h>
+#include <IO/Operators.h>
+#include <IO/WriteBufferFromString.h>
 #include <DataStreams/NativeBlockOutputStream.h>
 #include <DataStreams/RemoteBlockOutputStream.h>
 #include <Interpreters/InterpreterInsertQuery.h>
@@ -18,6 +20,7 @@
 #include <Common/CurrentMetrics.h>
 #include <Common/typeid_cast.h>
 #include <Common/Exception.h>
+#include <Common/ProfileEvents.h>
 #include <common/logger_useful.h>
 
 #include <Poco/DirectoryIterator.h>
@@ -30,6 +33,10 @@ namespace CurrentMetrics
     extern const Metric DistributedSend;
 }
 
+namespace ProfileEvents
+{
+    extern const Event DistributedSyncInsertionTimeoutExceeded;
+}
 
 namespace DB
 {
@@ -41,9 +48,7 @@ namespace ErrorCodes
 
 DistributedBlockOutputStream::DistributedBlockOutputStream(StorageDistributed & storage, const ASTPtr & query_ast,
                                                            const ClusterPtr & cluster_, bool insert_sync_, UInt64 insert_timeout_)
-    : storage(storage), query_ast(query_ast), cluster(cluster_), insert_sync(insert_sync_), insert_timeout(insert_timeout_),
-      deadline(std::chrono::system_clock::now() + std::chrono::seconds(insert_timeout)),
-      log(&Poco::Logger::get("DistributedBlockOutputStream"))
+    : storage(storage), query_ast(query_ast), cluster(cluster_), insert_sync(insert_sync_), insert_timeout(insert_timeout_)
 {
 }
 
@@ -132,8 +137,9 @@ void DistributedBlockOutputStream::writeImpl(const Block & block, const size_t s
         else
         {
             std::atomic<bool> timeout_exceeded(false);
-            auto result = std::async(std::launch::async, &DistributedBlockOutputStream::writeToShardDirect,
-                                     this, std::cref(block), std::cref(shard_info.dir_names), std::ref(timeout_exceeded));
+            auto launch = insert_timeout ? std::launch::async : std::launch::deferred;
+            auto result = std::async(launch, &DistributedBlockOutputStream::writeToShardSync, this, std::cref(block),
+                                     std::cref(shard_info.dir_names), shard_id, std::ref(timeout_exceeded));
             if (insert_timeout && result.wait_until(deadline) == std::future_status::timeout)
                 timeout_exceeded = true;
             result.get();
@@ -156,19 +162,34 @@ void DistributedBlockOutputStream::writeToLocal(const Block & block, const size_
 }
 
 
-void DistributedBlockOutputStream::writeToShardDirect(const Block & block, const std::vector<std::string> & dir_names, std::atomic<bool> & timeout_exceeded)
+void DistributedBlockOutputStream::writeToShardSync(const Block & block, const std::vector<std::string> & dir_names,
+                                                    size_t shard_id, const std::atomic<bool> & timeout_exceeded)
 {
+    auto & blocks_inserted = this->blocks_inserted;
+    auto writeNodeDescription = [shard_id, & blocks_inserted](WriteBufferFromString & out, const Connection & connection)
+    {
+        out << " (While insertion to " << connection.getDescription() << " shard " << shard_id;
+        out << " Inserted blocks: " << blocks_inserted << ")";
+    };
+
     const auto & query_string = queryToString(query_ast);
     for (const auto & dir_name : dir_names)
     {
-        auto & monitor = storage.requireDirectoryMonitor(dir_name);
-        auto & pool = monitor.getPool();
+        auto pool = storage.requireConnectionPool(dir_name);
         auto connection = pool->get();
 
         CurrentMetrics::Increment metric_increment{CurrentMetrics::DistributedSend};
 
         if (timeout_exceeded)
-            throw Exception("Timeout exceeded. Inserted blocks: " + std::to_string(blocks_inserted), ErrorCodes::TIMEOUT_EXCEEDED);
+        {
+            ProfileEvents::increment(ProfileEvents::DistributedSyncInsertionTimeoutExceeded);
+
+            String message;
+            WriteBufferFromString out(message);
+            out << "Timeout exceeded.";
+            writeNodeDescription(out, *connection);
+            throw Exception(message, ErrorCodes::TIMEOUT_EXCEEDED);
+        }
 
         try
         {
@@ -180,11 +201,10 @@ void DistributedBlockOutputStream::writeToShardDirect(const Block & block, const
         }
         catch (Exception & exception)
         {
-            std::string message = "\nWhile insertion to ";
-            message += connection->getDescription();
-            message += " Inserted blocks: " + std::to_string(blocks_inserted);
+            String message;
+            WriteBufferFromString out(message);
+            writeNodeDescription(out, *connection);
             exception.addMessage(message);
-            LOG_ERROR(log, message);
             exception.rethrow();
         }
     }
diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h
index 82cfcc09f76..469134afdea 100644
--- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h
+++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h
@@ -19,7 +19,8 @@ class StorageDistributed;
 class Cluster;
 using ClusterPtr = std::shared_ptr<Cluster>;
 
-/** The write is asynchronous - the data is first written to the local filesystem, and then sent to the remote servers.
+/** If insert_sync_ is true, the write is synchronous. Uses insert_timeout_ if it is not zero.
+ *  Otherwise, the write is asynchronous - the data is first written to the local filesystem, and then sent to the remote servers.
  *  If the Distributed table uses more than one shard, then in order to support the write,
  *  when creating the table, an additional parameter must be specified for ENGINE - the sharding key.
  *  Sharding key is an arbitrary expression from the columns. For example, rand() or UserID.
@@ -30,11 +31,11 @@ using ClusterPtr = std::shared_ptr<Cluster>;
 class DistributedBlockOutputStream : public IBlockOutputStream
 {
 public:
-    DistributedBlockOutputStream(StorageDistributed & storage, const ASTPtr & query_ast, const ClusterPtr & cluster_, bool insert_sync_, UInt64 insert_timeout_ = 0);
+    DistributedBlockOutputStream(StorageDistributed & storage, const ASTPtr & query_ast, const ClusterPtr & cluster_, bool insert_sync_, UInt64 insert_timeout_);
 
     void write(const Block & block) override;
 
-    void writePrefix() override { deadline = std::chrono::system_clock::now() + std::chrono::seconds(insert_timeout); }
+    void writePrefix() override { deadline = std::chrono::steady_clock::now() + std::chrono::seconds(insert_timeout); }
 
 private:
     IColumn::Selector createSelector(Block block);
@@ -47,18 +48,18 @@ private:
 
     void writeToShard(const Block & block, const std::vector<std::string> & dir_names);
 
-    void writeToShardDirect(const Block & block, const std::vector<std::string> & dir_names, std::atomic<bool> & timeout_exceeded);
+    /// Performs synchronous insertion to remote nodes. If timeout_exceeded flag was set, throws.
+    void writeToShardSync(const Block & block, const std::vector<std::string> & dir_names,
+                          size_t shard_id, const std::atomic<bool> & timeout_exceeded);
 
 private:
     StorageDistributed & storage;
     ASTPtr query_ast;
     ClusterPtr cluster;
-    bool insert_sync = true;
-    UInt64 insert_timeout = 1;
+    bool insert_sync;
+    UInt64 insert_timeout;
     size_t blocks_inserted = 0;
-    std::chrono::system_clock::time_point deadline;
-
-    Poco::Logger * log;
+    std::chrono::steady_clock::time_point deadline;
 };
 
 }
diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp
index a3a891bad8c..160d50e304b 100644
--- a/dbms/src/Storages/StorageDistributed.cpp
+++ b/dbms/src/Storages/StorageDistributed.cpp
@@ -273,7 +273,7 @@ void StorageDistributed::startup()
 
 void StorageDistributed::shutdown()
 {
-    directory_monitors.clear();
+    cluster_nodes_data.clear();
 }
 
 
@@ -455,13 +455,6 @@ bool StorageDistributed::hasColumn(const String & column_name) const
     return VirtualColumnFactory::hasColumn(column_name) || IStorage::hasColumn(column_name);
 }
 
-
-StorageDistributedDirectoryMonitor & StorageDistributed::createDirectoryMonitor(const std::string & name)
-{
-    return *(directory_monitors.emplace(name, std::make_unique<StorageDistributedDirectoryMonitor>(*this, name)).first->second);
-}
-
-
 void StorageDistributed::createDirectoryMonitors()
 {
     if (path.empty())
@@ -473,16 +466,20 @@ void StorageDistributed::createDirectoryMonitors()
     boost::filesystem::directory_iterator end;
     for (auto it = begin; it != end; ++it)
         if (it->status().type() == boost::filesystem::directory_file)
-            createDirectoryMonitor(it->path().filename().string());
+            requireDirectoryMonitor(it->path().filename().string());
 }
 
 
-StorageDistributedDirectoryMonitor & StorageDistributed::requireDirectoryMonitor(const std::string & name)
+void StorageDistributed::requireDirectoryMonitor(const std::string & name)
 {
-    auto it = directory_monitors.find(name);
-    if (it == directory_monitors.end())
-        return createDirectoryMonitor(name);
-    return *it->second;
+    cluster_nodes_data[name].requireDirectoryMonitor(name, *this);
+}
+
+ConnectionPoolPtr StorageDistributed::requireConnectionPool(const std::string & name)
+{
+    auto & node_data = cluster_nodes_data[name];
+    node_data.requireConnectionPool(name, *this);
+    return node_data.conneciton_pool;
 }
 
 size_t StorageDistributed::getShardCount() const
@@ -496,4 +493,17 @@ ClusterPtr StorageDistributed::getCluster() const
     return (owned_cluster) ? owned_cluster : context.getCluster(cluster_name);
 }
 
+void StorageDistributed::ClusterNodeData::requireConnectionPool(const std::string & name, const StorageDistributed & storage)
+{
+    if (!conneciton_pool)
+        conneciton_pool = StorageDistributedDirectoryMonitor::createPool(name, storage);
+}
+
+void StorageDistributed::ClusterNodeData::requireDirectoryMonitor(const std::string & name, StorageDistributed & storage)
+{
+    requireConnectionPool(name, storage);
+    if (!directory_monitor)
+        directory_monitor = std::make_unique<StorageDistributedDirectoryMonitor>(storage, name, conneciton_pool);
+}
+
 }
diff --git a/dbms/src/Storages/StorageDistributed.h b/dbms/src/Storages/StorageDistributed.h
index 09bec125048..de79c4ed1da 100644
--- a/dbms/src/Storages/StorageDistributed.h
+++ b/dbms/src/Storages/StorageDistributed.h
@@ -116,13 +116,12 @@ private:
         const ASTPtr & sharding_key_ = nullptr,
         const String & data_path_ = String{});
 
-
-    /// create directory monitor thread by subdirectory name
-    StorageDistributedDirectoryMonitor & createDirectoryMonitor(const std::string & name);
     /// create directory monitors for each existing subdirectory
     void createDirectoryMonitors();
-    /// ensure directory monitor creation and return it
-    StorageDistributedDirectoryMonitor & requireDirectoryMonitor(const std::string & name);
+    /// ensure directory monitor thread by subdirectory name creation
+    void requireDirectoryMonitor(const std::string & name);
+    /// ensure connection pool creation and return it
+    ConnectionPoolPtr requireConnectionPool(const std::string & name);
 
     ClusterPtr getCluster() const;
 
@@ -146,7 +145,17 @@ private:
     String sharding_key_column_name;
     String path;    /// Can be empty if data_path_ is empty. In this case, a directory for the data to be sent is not created.
 
-    std::unordered_map<std::string, std::unique_ptr<StorageDistributedDirectoryMonitor>> directory_monitors;
+    struct ClusterNodeData
+    {
+        std::unique_ptr<StorageDistributedDirectoryMonitor> directory_monitor;
+        ConnectionPoolPtr conneciton_pool;
+
+        /// Creates connection_pool if not exists.
+        void requireConnectionPool(const std::string & name, const StorageDistributed & storage);
+        /// Creates directory_monitor if not exists.
+        void requireDirectoryMonitor(const std::string & name, StorageDistributed & storage);
+    };
+    std::unordered_map<std::string, ClusterNodeData> cluster_nodes_data;
 
     /// Used for global monotonic ordering of files to send.
     SimpleIncrement file_names_increment;

From 1f6229d279e3b48d3bdffc0f6676868251ee2faa Mon Sep 17 00:00:00 2001
From: BayoNet <bayonet@virtUbuntu16.04>
Date: Mon, 31 Jul 2017 11:31:30 +0300
Subject: [PATCH 005/281] External dictionaries topic is restructured and
 updated.

---
 docs/ru/dicts/external_dicts.rst              | 343 +---------------
 docs/ru/dicts/external_dicts_dict.rst         |  35 ++
 docs/ru/dicts/external_dicts_dict_layout.rst  | 250 +++++++++++
 .../ru/dicts/external_dicts_dict_lifetime.rst |  38 ++
 docs/ru/dicts/external_dicts_dict_sources.rst | 388 ++++++++++++++++++
 .../dicts/external_dicts_dict_structure.rst   | 125 ++++++
 docs/ru/dicts/index.rst                       |   4 +-
 docs/ru/formats/index.rst                     |   2 +
 docs/ru/functions/ext_dict_functions.rst      |   4 +-
 docs/ru/functions/other_functions.rst         |   2 +
 .../operations/server_settings/settings.rst   |  14 +-
 11 files changed, 877 insertions(+), 328 deletions(-)
 create mode 100644 docs/ru/dicts/external_dicts_dict.rst
 create mode 100644 docs/ru/dicts/external_dicts_dict_layout.rst
 create mode 100644 docs/ru/dicts/external_dicts_dict_lifetime.rst
 create mode 100644 docs/ru/dicts/external_dicts_dict_sources.rst
 create mode 100644 docs/ru/dicts/external_dicts_dict_structure.rst

diff --git a/docs/ru/dicts/external_dicts.rst b/docs/ru/dicts/external_dicts.rst
index 4001eeff87f..00374b5a91e 100644
--- a/docs/ru/dicts/external_dicts.rst
+++ b/docs/ru/dicts/external_dicts.rst
@@ -1,345 +1,44 @@
 .. _dicts-external_dicts:
 
+***************
 Внешние словари
-===============
+***************
 
-Существует возможность подключать свои собственные словари из различных источников данных.
-Источником данных для словаря может быть файл на локальной файловой системе, сервер ClickHouse, сервер MySQL, MongoDB или любой ODBC источник.
-Словарь может полностью храниться в оперативке и периодически обновляться, или быть частично закэшированным в оперативке и динамически подгружать отсутствующие значения.
+Существует возможность подключать собственные словари из различных источников данных. Источником данных для словаря может быть локальный текстовый/исполняемый файл, HTTP(s) ресурс или другая СУБД. Подробнее смотрите в разделе ":ref:`dicts-external_dicts_dict_sources`".
 
-Конфигурация внешних словарей находится в отдельном файле или файлах, указанных в конфигурационном параметре :ref:`dictionaries_config <server_settings-dictionaries_config>`.
-Этот параметр содержит абсолютный или относительный путь к файлу с конфигурацией словарей. Относительный путь - относительно директории с конфигурационным файлом сервера. Путь может содержать wildcard-ы \* и ? - тогда рассматриваются все подходящие файлы. Пример: ``dictionaries/*.xml``.
+ClickHouse может полностью или частично хранить словари в оперативной памяти, периодически обновлять их и динамически подгружать отсутствующие значения.
 
-Конфигурация словарей, а также множество файлов с конфигурацией, может обновляться без перезапуска сервера. Сервер проверяет обновления каждые 5 секунд. То есть, словари могут подключаться динамически.
+Конфигурация внешних словарей находится в одном или нескольких файлах. Путь к конфигурации указывается в параметре :ref:`server_settings-dictionaries_config`.
 
-Создание словарей может производиться при старте сервера или при первом использовании. Это определяется конфигурационном параметром :ref:`dictionaries_lazy_load <server_settings-dictionaries_lazy_load>` (в основном конфигурационном файле сервера). Параметр не обязателен, по умолчанию - ``true``. Если true, то каждый словарь создаётся при первом использовании;  если словарь не удалось создать - вызов функции, использующей словарь, кидает исключение. Если ``false``, то все словари создаются при старте сервера, и в случае ошибки, сервер завершает работу.
+Периодически ClickHouse обновляет конфигурацию словарей и словари. Т.о. словари можно подгружать динамически.
+
+Словари могут загружаться при старте сервера или при первом использовании, в зависимости от настройки :ref:`server_settings-dictionaries_lazy_load`.
 
 Конфигурационный файл словарей имеет вид:
 
 .. code-block:: xml
 
   <dictionaries>
-      <comment>Не обязательный элемент с любым содержимым; полностью игнорируется.</comment>
+      <comment>Необязательный элемент с любым содержимым. Полностью игнорируется.</comment>
   
-      <!-- Можно задать произвольное количество разных словарей. -->
       <dictionary>
-          <!-- Имя словаря. Под этим именем словарь будет доступен для использования. -->
-          <name>os</name>
-  
-          <!-- Источник данных. -->
-          <source>
-  
-              <!-- Источник - файл на локальной файловой системе. -->
-              <file>
-                  <!-- Путь на локальной файловой системе. -->
-                  <path>/opt/dictionaries/os.tsv</path>
-                  <!-- С помощью какого формата понимать файл. -->
-                  <format>TabSeparated</format>
-              </file>
-  
-              <!-- или источник - таблица на сервере MySQL.
-              <mysql>
-                  <!- - Эти параметры могут быть указаны как снаружи (общие для всех реплик), так и внутри конкретной реплики - ->
-                  <port>3306</port>
-                  <user>clickhouse</user>
-                  <password>qwerty</password>
-                  <!- - Можно указать от одной до произвольного количества реплик для отказоустойчивости. - ->
-                  <replica>
-                      <host>example01-1</host>
-                      <priority>1</priority> <!- - Меньше значение - больше приоритет. - ->
-                  </replica>
-                  <replica>
-                      <host>example01-2</host>
-                      <priority>1</priority>
-                  </replica>
-                  <db>conv_main</db>
-                  <table>counters</table>
-              </mysql>
-              -->
-  
-              <!-- или источник - таблица на сервере ClickHouse.
-              <clickhouse>
-                  <host>example01-01-1</host>
-                  <port>9000</port>
-                  <user>default</user>
-                  <password></password>
-                  <db>default</db>
-                  <table>counters</table>
-              </clickhouse>
-              <!- - Если адрес похож на localhost, то запрос будет идти без сетевого взаимодействия.
-                    Для отказоустойчивости, вы можете создать Distributed таблицу на localhost и прописать её. - ->
-              -->
-  
-              <!-- Для <mysql> и <clickhouse> доступен атрибут <where>, позволяющий задать условие выбора
-              <clickhouse>
-                  <host>example01-01-1</host>
-                  <port>9000</port>
-                  <user>default</user>
-                  <password></password>
-                  <db>default</db>
-                  <table>ids</table>
-                  <where>id=10</where>
-              </clickhouse>
-              -->
+          <!-- Конфигурация словаря -->
+      </dictionary>
 
-              <!-- или источник - исполняемый файл. Если layout.cache - список нужных ключей будет записан в поток STDIN программы -->
-              <executable>
-                  <!-- Путь или имя программы (если директория есть в переменной окружения PATH) и параметры -->
-                  <command>cat /opt/dictionaries/os.tsv</command>
-                  <!-- С помощью какого формата понимать вывод и формировать список ключей. -->
-                  <format>TabSeparated</format>
-              </executable>
-  
-              <!-- или источник - http сервер. Если layout.cache - список нужных ключей будет послан как POST запрос -->
-              <http>
-                  <url>http://[::1]/os.tsv</url>
-                  <!-- С помощью какого формата понимать ответ и формировать список ключей. -->
-                  <format>TabSeparated</format>
-              </http>
-  
-          </source>
-  
-          <!-- Периодичность обновления для полностью загружаемых словарей. 0 - никогда не обновлять. -->
-          <lifetime>
-              <min>300</min>
-              <max>360</max>
-              <!-- Периодичность обновления выбирается равномерно-случайно между min и max,
-                   чтобы размазать по времени нагрузку при обновлении словарей на большом количестве серверов. -->
-          </lifetime>
-  
-          <!-- или
-          <!- - Периодичность обновления для полностью загружаемых словарей или время инвалидации для кэшируемых словарей.
-                0 - никогда не обновлять. - ->
-          <lifetime>300</lifetime>
-          -->
-  
-          <layout>   <!-- Способ размещения в памяти. -->
-              <flat />
-              <!-- или
-              <hashed />
-              или
-              <cache>
-                  <!- - Размер кэша в количестве ячеек; округляется вверх до степени двух. - ->
-                  <size_in_cells>1000000000</size_in_cells>
-              </cache>
-              -->
-          </layout>
-  
-          <!-- Структура. -->
-          <structure>
-              <!-- Описание столбца, являющегося идентификатором (ключом) словаря. -->
-              <id>
-                  <!-- Имя столбца с идентификатором. -->
-                  <name>Id</name>
-              </id>
-  
-              <attribute>    <!-- id уже входит в атрибуты и дополнительно указывать его здесь не нужно. -->
-                  <!-- Имя столбца. -->
-                  <name>Name</name>
-                  <!-- Тип столбца. (Как столбец понимается при загрузке.
-                       В случае MySQL, в таблице может быть TEXT, VARCHAR, BLOB, но загружается всё как String) -->
-                  <type>String</type>
-                  <!-- Какое значение использовать для несуществующего элемента. В примере - пустая строка. -->
-                  <null_value></null_value>
-              </attribute>
-  
-              <!-- Может быть указано произвольное количество атрибутов. -->
-              <attribute>
-                  <name>ParentID</name>
-                  <type>UInt64</type>
-                  <null_value>0</null_value>
-                  <!-- Определяет ли иерархию - отображение в идентификатор родителя (по умолчанию, false). -->
-                  <hierarchical>true</hierarchical>
-                  <!-- Можно считать отображение id -> attribute инъективным, чтобы оптимизировать GROUP BY. (по умолчанию, false) -->
-                  <injective>true</injective>
-              </attribute>
+      ...
 
-              <!-- Атрибут может быть выражением -->
-              <attribute>
-                  <name>expr</name>
-                  <type>UInt64</type>
-                  <expression>rand64()</expression>
-                  <null_value>0</null_value>
-              </attribute>
-          </structure>
+      <dictionary>
+          <!-- Конфигурация словаря -->
       </dictionary>
   </dictionaries>
 
-Идентификатор (ключевой атрибут) словаря должен быть числом, помещающимся в UInt64.
-Также есть возможность задавать произвольные составные ключи (см. раздел "Словари с составными ключами"). Замечание: составной ключ может состоять и из одного элемента, что даёт возможность использовать в качестве ключа, например, строку.
+В одном файле можно :ref:`сконфигурировать <dicts-external_dicts_dict>` произвольное количество словарей. Формат файла сохраняется даже если словарь один (т.е. ``<dictionaries><dictionary> <!--configuration--> </dictionary></dictionaries>``).
 
+Смотрите также ":ref:`ext_dict_functions`" .
 
-Существует шесть способов размещения словаря в памяти.
+.. attention:: Вы можете преобразовать значения по небольшому словарю, описав его в запросе ``SELECT`` (см. функцию ":ref:`other_functions-transform`"). Эта функциональность не связана с внешними словарями.
 
-flat
------
-В виде плоских массивов. Самый эффективный способ. Он подходит, если все ключи меньше 500 000. Если при создании словаря обнаружен ключ больше, то кидается исключение и словарь не создаётся. Словарь загружается в оперативку целиком. Словарь использует количество оперативки, пропорциональное максимальному значению ключа. Ввиду ограничения на 500 000, потребление оперативки вряд ли может быть большим.
-Поддерживаются все виды источников. При обновлении, данные (из файла, из таблицы) читаются целиком.
-
-hashed
-------
-В виде хэш-таблиц. Слегка менее эффективный способ. Словарь тоже загружается в оперативку целиком, и может содержать произвольное количество элементов с произвольными идентификаторами. На практике, имеет смысл использовать до десятков миллионов элементов, пока хватает оперативки.
-Поддерживаются все виды источников. При обновлении, данные (из файла, из таблицы) читаются целиком.
-
-cache
------
-Наименее эффективный способ. Подходит, если словарь не помещается в оперативку. Представляет собой кэш из фиксированного количества ячеек, в которых могут быть расположены часто используемые данные. Поддерживается источник MySQL, ClickHouse, executable, http; источник-файл не поддерживается. При поиске в словаре, сначала просматривается кэш. На каждый блок данных, все не найденные в кэше ключи (или устаревшие ключи) собираются в пачку, и с этой пачкой делается запрос к источнику вида SELECT attrs... FROM db.table WHERE id IN (k1, k2, ...). Затем полученные данные записываются в кэш.
-
-range_hashed
-------------
-В таблице прописаны какие-то данные для диапазонов дат, для каждого ключа. Дать возможность доставать эти данные для заданного ключа, для заданной даты.
-
-
-Пример: таблица содержит скидки для каждого рекламодателя в виде:
-
-  +------------------+-----------------------------+------------+----------+
-  | id рекламодателя | дата начала действия скидки | дата конца | величина |
-  +==================+=============================+============+==========+
-  | 123              | 2015-01-01                  | 2015-01-15 | 0.15     |
-  +------------------+-----------------------------+------------+----------+
-  | 123              | 2015-01-16                  | 2015-01-31 | 0.25     |
-  +------------------+-----------------------------+------------+----------+
-  | 456              | 2015-01-01                  | 2015-01-15 | 0.05     |
-  +------------------+-----------------------------+------------+----------+
-
-Добавляем ``layout = range_hashed``.
-При использовании такого layout, в structure должны быть элементы ``range_min``, ``range_max``.
-
-Пример:
-
-.. code-block:: xml
-
-  <structure>
-      <id>
-          <name>Id</name>
-      </id>
-      <range_min>
-          <name>first</name>
-      </range_min>
-      <range_max>
-          <name>last</name>
-      </range_max>
-      ...
-      
-Эти столбцы должны иметь тип Date. Другие типы пока не поддерживаем.
-Столбцы обозначают закрытый диапазон дат.
-
-Для работы с такими словарями, функции dictGetT должны принимать ещё один аргумент - дату:
-
-``dictGetT('dict_name', 'attr_name', id, date)``
-
-Функция достаёт значение для данного id и для диапазона дат, в который входит переданная дата. Если не найден id или для найденного id не найден диапазон, то возвращается значение по умолчанию для словаря.
-
-Если есть перекрывающиеся диапазоны, то можно использовать любой подходящий.
-
-Если граница диапазона является NULL или является некорректной датой (1900-01-01, 2039-01-01), то диапазон следует считать открытым. Диапазон может быть открытым с обеих сторон.
-
-В оперативке данные представлены в виде хэш-таблицы со значением в виде упорядоченного массива диапазонов и соответствующих им значений.
-
-Пример словаря по диапазонам:
-
-.. code-block:: xml
-
-  <dictionaries>
-          <dictionary>
-                  <name>xxx</name>
-                  <source>
-                          <mysql>
-                                  <password>xxx</password>
-                                  <port>3306</port>
-                                  <user>xxx</user>
-                                  <replica>
-                                          <host>xxx</host>
-                                          <priority>1</priority>
-                                  </replica>
-                                  <db>dicts</db>
-                                  <table>xxx</table>
-                          </mysql>
-                  </source>
-                  <lifetime>
-                          <min>300</min>
-                          <max>360</max>
-                  </lifetime>
-                  <layout>
-                          <range_hashed />
-                  </layout>
-                  <structure>
-                          <id>
-                                  <name>Abcdef</name>
-                          </id>
-                          <range_min>
-                                  <name>StartDate</name>
-                          </range_min>
-                          <range_max>
-                                  <name>EndDate</name>
-                          </range_max>
-                          <attribute>
-                                  <name>XXXType</name>
-                                  <type>String</type>
-                                  <null_value />
-                          </attribute>
-                  </structure>
-          </dictionary>
-  </dictionaries>
-
-complex_key_hashed
-------------------
-
-Для использования с составными ключами. Аналогичен hashed.
-
-complex_key_cache
------------------
-
-Для использования с составными ключами. Аналогичен cache.
-
-Примечания
-----------
-
-Рекомендуется использовать способ ``flat``, если возможно, или ``hashed``, ``complex_key_hashed``. Скорость работы словарей с таким размещением в памяти является безупречной.
-
-Способы ``cache`` и ``complex_key_cache`` следует использовать лишь если это неизбежно. Скорость работы кэша очень сильно зависит от правильности настройки и сценария использования. Словарь типа cache нормально работает лишь при достаточно больших hit rate-ах (рекомендуется 99% и выше). Посмотреть средний hit rate можно в таблице system.dictionaries. Укажите достаточно большой размер кэша. Количество ячеек следует подобрать экспериментальным путём - выставить некоторое значение, с помощью запроса добиться полной заполненности кэша, посмотреть на потребление оперативки (эта информация находится в таблице system.dictionaries); затем пропорционально увеличить количество ячеек так, чтобы расходовалось разумное количество оперативки. В качестве источника для кэша рекомендуется MySQL, MongoDB, так как ClickHouse плохо обрабатывает запросы со случайными чтениями.
-
-Во всех случаях, производительность будет выше, если вызывать функцию для работы со словарём после ``GROUP BY``, или если доставаемый атрибут помечен как инъективный. Для cache словарей, производительность будет лучше, если вызывать функцию после LIMIT-а - для этого можно использовать подзапрос с LIMIT-ом, и снаружи вызывать функцию со словарём.
-
-Атрибут называется инъективным, если разным ключам соответствуют разные значения атрибута. Тогда при использовании в ``GROUP BY`` функции, достающей значение атрибута по ключу, эта функция автоматически выносится из GROUP BY.
-
-При обновлении словарей из файла, сначала проверяется время модификации файла, и загрузка производится только если файл изменился.
-При обновлении из MySQL, для flat и hashed словарей, сначала делается запрос ``SHOW TABLE STATUS`` и смотрится время обновления таблицы. И если оно не NULL, то оно сравнивается с запомненным временем. Это работает для MyISAM таблиц, а для InnoDB таблиц время обновления неизвестно, поэтому загрузка из InnoDB делается при каждом обновлении.
-
-Для cache-словарей может быть задано время устаревания (``lifetime``) данных в кэше. Если от загрузки данных в ячейке прошло больше времени, чем lifetime, то значение не используется, и будет запрошено заново при следующей необходимости его использовать.
-
-Если словарь не удалось ни разу загрузить, то при попытке его использования, будет брошено исключение.
-Если при запросе к источнику cached словаря возникла ошибка, то будет брошено исключение.
-Обновление словарей (кроме загрузки при первом использовании) не блокирует запросы - во время обновления используется старая версия словаря. Если при обновлении возникнет ошибка, то ошибка пишется в лог сервера, а запросы продолжат использовать старую версию словарей.
-
-Список внешних словарей и их статус можно посмотреть в таблице ``system.dictionaries``.
-
-Для использования внешних словарей, смотрите раздел "Функции для работы с внешними словарями".
-
-Обратите внимание, что вы можете преобразовать значения по небольшому словарю, указав всё содержимое словаря прямо в запросе SELECT - смотрите раздел "Функция transform". Эта функциональность никак не связана с внешними словарями.
-
-Словари с составными ключами
-----------------------------
-
-В качестве ключа может выступать кортеж (tuple) из полей произвольных типов. Параметр layout в этом случае должен быть равен complex_key_hashed или complex_key_cache.
-
-Структура ключа задаётся не в элементе ``<id>``, а в элементе ``<key>``. Поля ключа задаются в том же формате, что и атрибуты словаря. Пример:
-
-.. code-block:: xml
-
-  <structure>
-      <key>
-          <attribute>
-              <name>field1</name>
-              <type>String</type>
-          </attribute>
-          <attribute>
-              <name>field2</name>
-              <type>UInt32</type>
-          </attribute>
-          ...
-      </key>
-  ...
-
-
-При использовании такого словаря, в функции dictGet* в качестве ключа передаётся Tuple со значениями полей. Пример: ``dictGetString('dict_name', 'attr_name', tuple('field1', 123))``.
+.. toctree::
+    :glob:
+   
+    external_dicts_dict*
diff --git a/docs/ru/dicts/external_dicts_dict.rst b/docs/ru/dicts/external_dicts_dict.rst
new file mode 100644
index 00000000000..b400261f0e5
--- /dev/null
+++ b/docs/ru/dicts/external_dicts_dict.rst
@@ -0,0 +1,35 @@
+.. _dicts-external_dicts_dict:
+
+**************************
+Настройка внешнего словаря
+**************************
+
+Конфигурация словаря имеет следующую структуру:
+
+.. code-block:: xml
+
+  <dictionary>
+      <name>dict_name</name>
+
+      <source>
+        <!-- Source configuration -->
+      </source>
+
+      <layout>
+        <!-- Memory layout configuration -->
+      </layout>
+
+      <structure>
+        <!-- Complex key configuration -->
+      </structure>
+
+      <lifetime>
+        <!-- Lifetime of dictionary in memory -->
+      </lifetime>
+  </dictionary>
+
+* name - Идентификатор, под которым словарь будет доступен для использования. Используйте символы ``[a-zA-Z0-9_\-]``.
+* :ref:`source <dicts-external_dicts_dict_sources>` - Источник словаря.
+* :ref:`layout <dicts-external_dicts_dict_layout>` - Размещение словаря в памяти.
+* :ref:`structure <dicts-external_dicts_dict_structure>` - Ключ словаря.
+* :ref:`lifetime <dicts-external_dicts_dict_lifetime>` - Периодичность обновления словарей.
diff --git a/docs/ru/dicts/external_dicts_dict_layout.rst b/docs/ru/dicts/external_dicts_dict_layout.rst
new file mode 100644
index 00000000000..4ee4cc6fe05
--- /dev/null
+++ b/docs/ru/dicts/external_dicts_dict_layout.rst
@@ -0,0 +1,250 @@
+.. _dicts-external_dicts_dict_layout:
+
+**************************
+Хранение словарей в памяти
+**************************
+
+Словари можно размещать в памяти :ref:`множеством способов <dicts-external_dicts_dict_layout-manner>`.
+
+Рекомендуем :ref:`dicts-external_dicts_dict_layout-flat`, :ref:`dicts-external_dicts_dict_layout-hashed` и :ref:`dicts-external_dicts_dict_layout-complex_key_hashed`. Скорость обработки словарей при этом максимальна.
+
+Размещение с кэшированием не рекомендуется использовать из-за потенциально низкой производительности и сложностей в подборе оптимальных параметров. Читайте об этом подробнее в разделе ":ref:`dicts-external_dicts_dict_layout-cache`".
+
+Повысить производительнось словарей можно следующими способами:
+
+* Вызывать функцию для работы со словарём после ``GROUP BY``.
+* Помечать извлекаемые атрибуты как инъективные. Атрибут называется инъективным, если разным ключам соответствуют разные значения атрибута. Тогда при использовании в ``GROUP BY`` функции, достающей значение атрибута по ключу, эта функция автоматически выносится из ``GROUP BY``.
+
+ClickHouse периодически обновляет словари. Сначала проверяется время модификации файла/таблицы, затем, если файл/таблица обновились, обновляется словарь. Если словарь хранится в таблице типа MyISAM, то время модификации проверяется запросом ``SHOW TABLE STATUS``. Для таблиц InnoDB нельзя получить время модификации, поэтому словарь обновляется каждый раз.
+
+Обновление словарей (кроме загрузки при первом использовании) не блокирует запросы - во время обновления используется старая версия словаря. Если при обновлении возникнет ошибка, то ошибка пишется в лог сервера, а запросы продолжат использовать старую версию словарей.
+
+При ошибках работы со словарями ClickHouse генерирует исключения. Например, в следующих ситуациях:
+
+* При обращении к словарю, который не удалось загрузить.
+* При ошибке запроса к ``cached``-словарю.
+
+
+Список внешних словарей и их статус можно посмотреть в таблице ``system.dictionaries``.
+
+Общий вид конфигурации:
+
+.. code-block:: xml
+
+  <dictionaries>
+      <dictionary>
+          ...
+          <layout>
+              <layout_type> 
+                  <!-- layout settings -->
+              </layout_type>
+          </layout>
+          ...
+      </dictionary>
+  </dictionaries>
+
+
+.. _dicts-external_dicts_dict_layout-manner:
+
+Способы размещения словарей в памяти
+====================================
+
+* :ref:`dicts-external_dicts_dict_layout-flat`
+* :ref:`dicts-external_dicts_dict_layout-hashed`
+* :ref:`dicts-external_dicts_dict_layout-cache`
+* :ref:`dicts-external_dicts_dict_layout-range_hashed`
+* :ref:`dicts-external_dicts_dict_layout-complex_key_hashed`
+* :ref:`dicts-external_dicts_dict_layout-complex_key_cache`
+
+
+.. _dicts-external_dicts_dict_layout-flat:
+
+flat
+----
+
+Словарь полностью хранится в оперативной памяти в виде плоских массивов. Объем памяти, занимаемой словарем? пропорционален размеру самого большого (по размеру) ключа.
+
+Ключ словаря имеет тип ``UInt64`` и его величина ограничена 500 000. Если при создании словаря обнаружен ключ больше, то ClickHouse бросает исключение и не создает словарь.
+
+Поддерживаются все виды источников. При обновлении, данные (из файла, из таблицы) читаются целиком.
+
+Это метод обеспечивает максимальную производительность среди всех доступных способов размещения словаря.
+
+Пример конфигурации:
+
+.. code-block:: xml
+
+  <layout>
+    <flat />
+  </layout>
+
+
+.. _dicts-external_dicts_dict_layout-hashed:
+
+hashed
+------
+
+Словарь полностью хранится в оперативной памяти в виде хэш-таблиц. Словарь может содержать произвольное количество элементов с произвольными идентификаторами. На практике, количество ключей может достигать десятков миллионов элементов.
+
+Поддерживаются все виды источников. При обновлении, данные (из файла, из таблицы) читаются целиком.
+
+Пример конфигурации:
+
+.. code-block:: xml
+
+  <layout>
+    <hashed />
+  </layout>
+
+
+.. _dicts-external_dicts_dict_layout-complex_key_hashed:
+
+complex_key_hashed
+------------------
+
+Тип размещения предназначен для использования с составными :ref:`ключами <dicts-external_dicts_dict_structure>`. Аналогичен hashed.
+
+Пример конфигурации:
+
+.. code-block:: xml
+
+  <layout>
+    <complex_key_hashed />
+  </layout>
+
+
+.. _dicts-external_dicts_dict_layout-range_hashed:
+
+range_hashed
+------------
+
+Словарь хранится в оперативной памяти в виде хэш-таблицы с упорядоченным массивом диапазонов и соответствующих им значений.
+
+Этот способ размещения работает также как и hashed и позволяет дополнительно к ключу использовать дипазоны по дате/времени, если они указаны в словаре.
+
+Пример: таблица содержит скидки для каждого рекламодателя в виде:
+
+  +------------------+-----------------------------+------------+----------+
+  | id рекламодателя | дата начала действия скидки | дата конца | величина |
+  +==================+=============================+============+==========+
+  | 123              | 2015-01-01                  | 2015-01-15 | 0.15     |
+  +------------------+-----------------------------+------------+----------+
+  | 123              | 2015-01-16                  | 2015-01-31 | 0.25     |
+  +------------------+-----------------------------+------------+----------+
+  | 456              | 2015-01-01                  | 2015-01-15 | 0.05     |
+  +------------------+-----------------------------+------------+----------+
+
+Столбцы с датами в словаре должны иметь тип ``Date``.
+
+Чтобы использовать выборку по диапазонам дат, необходимо в :ref:`structure <dicts-external_dicts_dict_structure>` определить элементы ``range_min``, ``range_max``.
+
+Пример:
+
+.. code-block:: xml
+
+  <structure>
+      <id>
+          <name>Id</name>
+      </id>
+      <range_min>
+          <name>first</name>
+      </range_min>
+      <range_max>
+          <name>last</name>
+      </range_max>
+      ...
+      
+
+
+Для работы с такими словарями в функцию ``dictGetT`` необходимо передавать дополнительный аргумент - дату: ::
+
+  dictGetT('dict_name', 'attr_name', id, date)
+
+Функция возвращает значение для заданных ``id`` и диапазона дат, в который входит переданная дата.
+
+Особенности алгоритма:
+
+* Если не найден ``id`` или для найденного ``id`` не найден диапазон, то возвращается значение по умолчанию для словаря.
+* Если есть перекрывающиеся диапазоны, то можно использовать любой подходящий.
+* Если граница диапазона ``NULL`` или некорректная дата (1900-01-01, 2039-01-01), то диапазон считается открытым. Диапазон может быть открытым с обеих сторон.
+
+
+Пример конфигурации:
+
+.. code-block:: xml
+
+  <dictionaries>
+          <dictionary>
+                  
+                  ...
+                  
+                  <layout>
+                          <range_hashed />
+                  </layout>
+                  
+                  <structure>
+                          <id>
+                                  <name>Abcdef</name>
+                          </id>
+                          <range_min>
+                                  <name>StartDate</name>
+                          </range_min>
+                          <range_max>
+                                  <name>EndDate</name>
+                          </range_max>
+                          <attribute>
+                                  <name>XXXType</name>
+                                  <type>String</type>
+                                  <null_value />
+                          </attribute>
+                  </structure>
+
+          </dictionary>
+  </dictionaries>
+
+
+.. _dicts-external_dicts_dict_layout-cache:
+
+cache
+-----
+
+Словарь хранится в кэше, состоящем из фиксированного количества ячеек. Ячейки содержат часто используемые элементы.
+
+При поиске в словаре сначала просматривается кэш. На каждый блок данных, все не найденные в кэше или устаревшие ключи запрашиваются у источника с помощью ``SELECT attrs... FROM db.table WHERE id IN (k1, k2, ...)``. Затем, полученные данные записываются в кэш.
+
+Для cache-словарей может быть задано время устаревания (:ref:`lifetime <dicts-external_dicts_dict_lifetime>`) данных в кэше. Если от загрузки данных в ячейке прошло больше времени, чем ``lifetime``, то значение не используется, и будет запрошено заново при следующей необходимости его использовать.
+
+Это наименее эффективный из всех способов размещения словарей. Скорость работы кэша очень сильно зависит от правильности настройки и сценария использования. Словарь типа cache показывает высокую производительность лишь при достаточно больших hit rate-ах (рекомендуется 99% и выше). Посмотреть средний hit rate можно в таблице ``system.dictionaries``.
+
+Чтобы увеличить производительность кэша, используйте подзапрос с ``LIMIT``, а снаружи вызывайте функцию со словарём.
+
+Поддерживаются :ref:`источники <dicts-external_dicts_dict_sources>`: MySQL, ClickHouse, executable, HTTP.
+
+Пример настройки:
+
+.. code-block:: xml
+             
+    <layout>
+        <cache>
+            <!-- Размер кэша в количестве ячеек. Округляется вверх до степени двух. -->
+            <size_in_cells>1000000000</size_in_cells>
+        </cache>
+    </layout>
+
+Укажите достаточно большой размер кэша. Количество ячеек следует подобрать экспериментальным путём:
+
+1. Выставить некоторое значение.
+2. Запросами добиться полной заполненности кэша.
+3. Оценить потребление оперативной памяти с помощью таблицы ``system.dictionaries``.
+4. Увеличивать/уменьшать количество ячеек до получения требуемого расхода оперативной памяти.
+
+.. warning:: Не используйте в качестве источника ClickHouse, поскольку он медленно обрабатывает запросы со случайным чтением.
+
+
+.. _dicts-external_dicts_dict_layout-complex_key_cache:
+
+complex_key_cache
+-----------------
+
+Тип размещения предназначен для использования с составными :ref:`ключами <dicts-external_dicts_dict_structure>`. Аналогичен ``cache``.
+
diff --git a/docs/ru/dicts/external_dicts_dict_lifetime.rst b/docs/ru/dicts/external_dicts_dict_lifetime.rst
new file mode 100644
index 00000000000..d3de506b800
--- /dev/null
+++ b/docs/ru/dicts/external_dicts_dict_lifetime.rst
@@ -0,0 +1,38 @@
+.. _dicts-external_dicts_dict_lifetime:
+
+*******************
+Обновление словарей
+*******************
+
+ClickHouse периодически обновляет словари. Интервал обновления для полностью загружаемых словарей и интервал инвалидации для кэшируемых словарей определяется в теге ``<lifetime>`` в секундах.
+
+Обновление словарей (кроме загрузки при первом использовании) не блокирует запросы, во время обновления используется старая версия словаря. Если при обновлении возникнет ошибка, то ошибка пишется в лог сервера, а запросы продолжат использовать старую версию словарей.
+
+Пример настройки:
+
+.. code-block:: xml
+
+  <dictionary>
+      ...
+      <lifetime>300</lifetime>
+      ...
+  </dictionary>
+
+
+Настройка ``<lifetime>0</lifetime>`` запрещает обновление словарей.
+
+
+Можно задать интервал, внутри которого ClickHouse равномерно-случайно выберет время для обновления. Это необходимо для распределения нагрузки на источник словаря при обновлении на большом количестве серверов.
+
+Пример настройки:
+
+.. code-block:: xml
+
+  <dictionary>
+      ...
+      <lifetime>
+          <min>300</min>
+          <max>360</max>
+      </lifetime>
+      ...
+  </dictionary>
diff --git a/docs/ru/dicts/external_dicts_dict_sources.rst b/docs/ru/dicts/external_dicts_dict_sources.rst
new file mode 100644
index 00000000000..035287b1a52
--- /dev/null
+++ b/docs/ru/dicts/external_dicts_dict_sources.rst
@@ -0,0 +1,388 @@
+.. _dicts-external_dicts_dict_sources:
+
+**************************
+Источники внешних словарей
+**************************
+
+Внешний словарь можно подключить из множества источников.
+
+Общий вид конфигурации:
+
+.. code-block:: xml
+
+  <dictionaries>
+    <dictionary>
+      ...
+      <source>
+        <source_type>
+          <!-- Source configuration -->
+        </source_type>
+      </source>
+      ...
+    </dictionary>
+    ...
+  </dictionaries>
+
+Источник настраивается в разделе ``source``. 
+
+Типы источников (``source_type``):
+
+ * :ref:`dicts-external_dicts_dict_sources-local_file`
+ * :ref:`dicts-external_dicts_dict_sources-executable`
+ * :ref:`dicts-external_dicts_dict_sources-http`
+ * :ref:`dicts-external_dicts_dict_sources-odbc`
+ * СУБД:
+
+   * :ref:`dicts-external_dicts_dict_sources-mysql`
+   * :ref:`dicts-external_dicts_dict_sources-clickhouse`
+   * :ref:`dicts-external_dicts_dict_sources-mongodb`
+
+
+.. _dicts-external_dicts_dict_sources-local_file:
+
+Локальный файл
+==============
+
+Пример настройки:
+
+.. code-block:: xml
+
+  <source>
+    <file>
+      <path>/opt/dictionaries/os.tsv</path>
+      <format>TabSeparated</format>
+    </file>
+  </source>
+
+Поля настройки:
+
+* ``path`` - Абсолютный путь к файлу.
+* ``format`` - Формат файла. Поддерживаются все форматы, описанные в разделе ":ref:`formats`".
+
+
+.. _dicts-external_dicts_dict_sources-executable:
+
+Исполняемый файл
+================
+
+Работа с исполняемым файлом зависит от :ref:`размещения словаря в памяти <dicts-external_dicts_dict_layout>`. Если тип размещения словаря ``cache`` и ``complex_key_cache``, то  ClickHouse запрашивает необходимые ключи, отправляя запрос в ``STDIN`` исполняемого файла.
+
+Пример настройки:
+
+.. code-block:: xml
+
+  <source>
+      <executable>
+          <command>cat /opt/dictionaries/os.tsv</command>
+          <format>TabSeparated</format>
+      </executable>
+  </source>
+
+Поля настройки:
+
+* ``command`` - Абсолютный путь к исполняемому файлу или имя файла (если каталог программы прописан в ``PATH``).
+* ``format`` - Формат файла. Поддерживаются все форматы, описанные в разделе ":ref:`formats`".
+
+
+.. _dicts-external_dicts_dict_sources-http:
+
+HTTP(s)
+=======
+
+Работа с HTTP(s) сервером зависит от :ref:`размещения словаря в памяти <dicts-external_dicts_dict_layout>`. Если тип размещения словаря ``cache`` и ``complex_key_cache``, то  ClickHouse запрашивает необходимые ключи, отправляя запрос методом ``POST``.
+
+Пример настройки:
+
+.. code-block:: xml
+
+  <source>
+      <http>
+          <url>http://[::1]/os.tsv</url>
+          <format>TabSeparated</format>
+      </http>
+  </source>
+
+Чтобы ClickHouse смог обратиться к HTTPS-ресурсу, необходимо прописать :ref:`настройки openSSL <server_settings-openSSL>` в конфигурации сервера.
+
+Поля настройки:
+
+* ``url`` - URL источника.
+* ``format`` - Формат файла. Поддерживаются все форматы, описанные в разделе ":ref:`formats`".
+
+
+.. _dicts-external_dicts_dict_sources-odbc:
+
+ODBC
+====
+
+Этим способом можно подключить любую базу данных, имеющую ODBC драйвер.
+
+Пример настройки:
+
+.. code-block:: xml
+
+  <odbc>
+      <db>DatabaseName</db>
+      <table>TableName</table>
+      <connection_string>DSN=some_parameters</connection_string>
+  </odbc>
+
+Поля настройки:
+
+* ``db`` - Имя базы данных. Не указывать, если имя базы задано в параметрах ``<connection_string>``.
+* ``table`` - Имя таблицы.
+* ``connection_string`` - Строка соединения.
+  
+
+Пример подключения PostgreSQL
+-----------------------------
+
+ОС Ubuntu.
+
+Установка unixODBC и ODBC-драйвера для PostgreSQL: ::
+
+  sudo apt-get install -y unixodbc odbcinst odbc-postgresql
+
+
+Настройка ``/etc/odbc.ini`` (или ``~/.odbc.ini``): ::
+
+  [DEFAULT]
+  Driver = myconnection
+
+  [myconnection]
+  Description         = PostgreSQL connection to my_db
+  Driver              = PostgreSQL Unicode
+  Database            = my_db
+  Servername          = 127.0.0.1
+  UserName            = username
+  Password            = password
+  Port                = 5432
+  Protocol            = 9.3
+  ReadOnly            = No
+  RowVersioning       = No
+  ShowSystemTables    = No
+  ConnSettings        =
+
+
+Конфигурация словаря в ClickHouse:
+
+.. code-block:: xml
+
+  <dictionary>
+      <name>table_name</name>
+      <source>
+          <odbc>
+              <!-- в connection_string можно указывать следующие параметры: -->
+              <!-- DSN=myconnection;UID=username;PWD=password;HOST=127.0.0.1;PORT=5432;DATABASE=my_db -->
+              <connection_string>DSN=myconnection</connection_string>
+              <table>postgresql_table</table>
+          </odbc>
+      </source>
+      <lifetime>
+          <min>300</min>
+          <max>360</max>
+      </lifetime>
+      <layout>
+          <hashed/>
+      </layout>
+      <structure>
+          <id>
+              <name>id</name>
+          </id>
+          <attribute>
+              <name>some_column</name>
+              <type>UInt64</type>
+              <null_value>0</null_value>
+          </attribute>
+      </structure>
+  </dictionary>
+
+Может понадобиться в ``odbc.ini`` указать полный путь до библиотеки с драйвером ``DRIVER=/usr/local/lib/psqlodbcw.so``.
+
+Пример подключения MS SQL Server
+--------------------------------
+
+ОС Ubuntu.
+
+Установка драйвера: ::
+  
+  sudo apt-get install tdsodbc freetds-bin sqsh
+
+Настройка драйвера: ::
+
+  $ cat /etc/freetds/freetds.conf 
+  ...
+
+  [MSSQL]
+  host = 192.168.56.101
+  port = 1433
+  tds version = 7.0
+  client charset = UTF-8
+
+  $ cat /etc/odbcinst.ini 
+  ...
+
+  [FreeTDS]
+  Description     = FreeTDS
+  Driver          = /usr/lib/x86_64-linux-gnu/odbc/libtdsodbc.so
+  Setup           = /usr/lib/x86_64-linux-gnu/odbc/libtdsS.so
+  FileUsage       = 1
+  UsageCount      = 5
+
+  $ cat ~/.odbc.ini 
+  ...
+
+  [MSSQL]
+  Description     = FreeTDS
+  Driver          = FreeTDS
+  Servername      = MSSQL
+  Database        = test
+  UID             = test
+  PWD             = test
+  Port            = 1433
+
+
+Настройка словаря в ClickHouse:
+
+.. code-block:: xml
+
+  <dictionaries>
+      <dictionary>
+          <name>test</name>
+          <source>
+              <odbc>
+                  <table>dict</table>
+                  <connection_string>DSN=MSSQL;UID=test;PWD=test</connection_string>
+              </odbc>
+          </source>
+
+          <lifetime>
+              <min>300</min>
+              <max>360</max>
+          </lifetime>
+
+          <layout>
+              <flat />
+          </layout>
+
+          <structure>
+              <id>
+                  <name>k</name>
+              </id>
+              <attribute>
+                  <name>s</name>
+                  <type>String</type>
+                  <null_value></null_value>
+              </attribute>
+          </structure>
+      </dictionary>
+  </dictionaries>
+
+
+
+СУБД
+====
+
+.. _dicts-external_dicts_dict_sources-mysql:
+
+MySQL
+-----
+
+Пример настройки:
+
+.. code-block:: xml
+
+  <source>
+    <mysql>
+        <port>3306</port>
+        <user>clickhouse</user>
+        <password>qwerty</password>
+        <replica>
+            <host>example01-1</host>
+            <priority>1</priority>
+        </replica>
+        <replica>
+            <host>example01-2</host>
+            <priority>1</priority>
+        </replica>
+        <db>conv_main</db>
+        <table>counters</table>
+        <where>id=10</where>
+    </mysql>
+  </source>
+
+
+Поля настройки:
+
+* ``port`` - порт сервера MySQL. Можно указать для всех реплик или для каждой в отдельности (внутри ``<replica>``).
+* ``user`` - имя пользователя MySQL. Можно указать для всех реплик или для каждой в отдельности (внутри ``<replica>``).
+* ``password`` - пароль пользователя MySQL. Можно указать для всех реплик или для каждой в отдельности (внутри ``<replica>``).
+* ``replica`` - блок конфигурации реплики. Блоков может быть несколько.
+  
+  * ``replica/host`` - хост MySQL.
+  * ``replica/priority`` - приоритет реплики. При попытке соединения ClickHouse обходит реплики в соответствии с приоритетом. Чем меньше цифра, тем выше приоритет.
+* ``db`` - имя базы данных.
+* ``table`` - имя таблицы.
+* ``where`` - условие выбора. Может отсутствовать.
+
+.. _dicts-external_dicts_dict_sources-clickhouse:
+
+ClickHouse
+----------
+
+Пример настройки:
+
+.. code-block:: xml
+  
+  <source>
+      <clickhouse>
+          <host>example01-01-1</host>
+          <port>9000</port>
+          <user>default</user>
+          <password></password>
+          <db>default</db>
+          <table>ids</table>
+          <where>id=10</where>
+      </clickhouse>
+  </source>
+
+Поля настройки:
+
+* ``host`` - хост ClickHouse. Если host локальный, то запрос выполняется без сетевого взаимодействия. Чтобы повысить отказоустойчивость решения, можно создать таблицу типа :ref:`Distributed <table_engines-distributed>` и прописать её в дальнейших настройках.
+* ``port`` - порт сервера ClickHouse.
+* ``user`` - имя пользователя ClickHouse.
+* ``password`` - пароль пользователя ClickHouse.
+* ``db`` - имя базы данных.
+* ``table`` - имя таблицы.
+* ``where`` - условие выбора. Может отсутствовать.
+
+
+.. _dicts-external_dicts_dict_sources-mongodb:
+
+MongoDB
+-------
+
+Пример настройки:
+
+.. code-block:: xml
+
+  <source>
+      <mongodb>
+          <host>localhost</host>
+          <port>27017</port>
+          <user></user>
+          <password></password>
+          <db>test</db>
+          <collection>dictionary_source</collection>
+      </mongodb>
+  </source>
+
+
+Поля настройки:
+
+* ``host`` - хост MongoDB.
+* ``port`` - порт сервера MongoDB.
+* ``user`` - имя пользователя MongoDB.
+* ``password`` - пароль пользователя MongoDB.
+* ``db`` - имя базы данных.
+* ``collection`` - имя коллекции.
diff --git a/docs/ru/dicts/external_dicts_dict_structure.rst b/docs/ru/dicts/external_dicts_dict_structure.rst
new file mode 100644
index 00000000000..ec0ae0be370
--- /dev/null
+++ b/docs/ru/dicts/external_dicts_dict_structure.rst
@@ -0,0 +1,125 @@
+.. _dicts-external_dicts_dict_structure:
+
+*******************
+Ключ и поля словаря
+*******************
+
+Секция ``<structure>`` описывает ключ словаря и поля, доступные для запросов.
+
+
+Общий вид структуры:
+
+.. code-block:: xml
+
+      <dictionary>
+          <structure>
+              <id>
+                  <name>Id</name>
+              </id>
+  
+              <attribute>
+                  <!-- Attribute parameters -->
+              </attribute>
+              
+              ...
+
+          </structure>
+      </dictionary>
+
+В структуре описываются столбцы:
+
+* ``<id>`` - :ref:`ключевой столбец <dicts-external_dicts_dict_structure-key>`.
+* ``<attribute>`` - :ref:`столбец данных <dicts-external_dicts_dict_structure-attributes>`. Столбцов может быть много.
+
+.. _dicts-external_dicts_dict_structure-key:
+
+Ключ
+====
+
+ClickHouse поддерживает следующие виды ключей:
+
+* Числовой ключ. Формат UInt64. Описывается в теге ``<id>``.
+* Составной ключ. Набор значений разного типа. Описывается в теге ``<key>``.
+  
+Структура может содержать либо ``<id>`` либо ``<key>``.
+
+
+.. attention:: Ключ не надо дополнительно описывать в атрибутах.
+
+Числовой ключ
+--------------
+
+Формат: ``UInt64``.
+
+Пример конфигурации:
+
+.. code-block:: xml
+
+    <id>
+        <name>Id</name>
+    </id>
+
+
+Поля конфигурации:
+
+* name - имя столбца с ключами.
+  
+
+Составной ключ
+---------------
+
+Ключем может быть кортеж (``tuple``) из полей произвольных типов. :ref:`layout <dicts-external_dicts_dict_layout>` в этом случае должен быть ``complex_key_hashed`` или ``complex_key_cache``.
+
+.. tip:: Cоставной ключ может состоять и из одного элемента, что даёт возможность использовать в качестве ключа, например, строку.
+
+Структура ключа задаётся в элементе ``<key>``. Поля ключа задаются в том же формате, что и :ref:`атрибуты <dicts-external_dicts_dict_structure-attributes>` словаря. Пример:
+
+.. code-block:: xml
+
+  <structure>
+      <key>
+          <attribute>
+              <name>field1</name>
+              <type>String</type>
+          </attribute>
+          <attribute>
+              <name>field2</name>
+              <type>UInt32</type>
+          </attribute>
+          ...
+      </key>
+  ...
+
+
+При запросе в функции ``dictGet*`` в качестве ключа передаётся кортеж. Пример: ``dictGetString('dict_name', 'attr_name', tuple('string for field1', num_for_field2))``.
+
+
+.. _dicts-external_dicts_dict_structure-attributes:
+
+Атрибуты
+========
+
+Пример конфигурации:
+
+.. code-block:: xml
+
+    <structure>
+        ...
+        <attribute>
+            <name>Name</name>
+            <type>Type</type>
+            <null_value></null_value>
+            <expression>rand64()</expression>
+            <hierarchical>true</hierarchical>
+            <injective>true</injective>
+        </attribute>
+    </structure>
+
+Поля конфигурации:
+
+* ``name`` - Имя столбца.
+* ``type`` - Тип столбца. Задает способ интерпретации данных в источнике. Например, в случае MySQL, в таблице-источнике поле может быть ``TEXT``, ``VARCHAR``, ``BLOB``, но загружено может быть как ``String``.
+* ``null_value`` - Значение по умолчанию для несуществующего элемента. В примере - пустая строка.
+* ``expression`` - Атрибут может быть выражением. Тег не обязательный.
+* ``hierarchical`` - Поддержка иерархии. Отображение в идентификатор родителя. По умолчанию, ``false``.
+* ``injective`` - Признак инъективности отображения ``id -> attribute``. Если ``true``, то можно оптимизировать ``GROUP BY``. По умолчанию, ``false``.
diff --git a/docs/ru/dicts/index.rst b/docs/ru/dicts/index.rst
index a8ad75e9cd2..8cd65b643e7 100644
--- a/docs/ru/dicts/index.rst
+++ b/docs/ru/dicts/index.rst
@@ -7,6 +7,6 @@
 Существуют встроенные и подключаемые (внешние) словари.
 
 .. toctree::
-    :glob:
 
-    *
+   external_dicts
+   internal_dicts
diff --git a/docs/ru/formats/index.rst b/docs/ru/formats/index.rst
index 6db2890830f..b7510f79d3b 100644
--- a/docs/ru/formats/index.rst
+++ b/docs/ru/formats/index.rst
@@ -1,3 +1,5 @@
+.. _formats:
+
 Форматы
 =======
 
diff --git a/docs/ru/functions/ext_dict_functions.rst b/docs/ru/functions/ext_dict_functions.rst
index 0e27687efe3..963c670c5af 100644
--- a/docs/ru/functions/ext_dict_functions.rst
+++ b/docs/ru/functions/ext_dict_functions.rst
@@ -1,6 +1,8 @@
+.. _ext_dict_functions:
+
 Функции для работы с внешними словарями
 ---------------------------------------
-Подробнее смотрите в разделе "Внешние словари".
+Информация о подключении и настройке внешних словарей смотрите в разделе :ref:`dicts-external_dicts`.
 
 dictGetUInt8, dictGetUInt16, dictGetUInt32, dictGetUInt64
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/docs/ru/functions/other_functions.rst b/docs/ru/functions/other_functions.rst
index 32a9c0a3ed3..b9b284e4293 100644
--- a/docs/ru/functions/other_functions.rst
+++ b/docs/ru/functions/other_functions.rst
@@ -108,6 +108,8 @@ bar
   │ 23 │ 400397 │ █████████████▎     │
   └────┴────────┴────────────────────┘
 
+.. _other_functions-transform:
+
 transform
 ~~~~~~~~~
 Преобразовать значение согласно явно указанному отображению одних элементов на другие.
diff --git a/docs/ru/operations/server_settings/settings.rst b/docs/ru/operations/server_settings/settings.rst
index c99054a18fa..9d7370e895a 100644
--- a/docs/ru/operations/server_settings/settings.rst
+++ b/docs/ru/operations/server_settings/settings.rst
@@ -98,9 +98,13 @@ default_profile
 
 dictionaries_config
 -------------------
-Конфигурация внешних словарей.
+Путь к конфигурации внешних словарей.
 
-Смотрите раздел :ref:`dicts-external_dicts`.
+Путь:
+  * Указывается абсолютным или относительно конфигурационного файла сервера.
+  * Может содержать wildcard-ы \* и ?.
+
+Про внешние словари читайте в разделе :ref:`dicts-external_dicts`.
 
 **Пример**
 
@@ -116,7 +120,11 @@ dictionaries_lazy_load
 
 Отложенная загрузка словарей.
 
-С установленным параметром словари подгружаются не при запуске сервера, а при первом обращении.
+Если ``true``, то каждый словарь создаётся при первом использовании. Если словарь не удалось создать, то вызов функции, использующей словарь, сгенерирует исключение. 
+
+Если ``false``, то все словари создаются при старте сервера, и в случае ошибки сервер завершает работу.
+
+По умолчанию - ``true``.
 
 **Пример**
 

From 0d876de370e72b24e2ceca411b7c58ed94973f40 Mon Sep 17 00:00:00 2001
From: BayoNet <bayonet@virtUbuntu16.04>
Date: Mon, 31 Jul 2017 12:17:48 +0300
Subject: [PATCH 006/281] Default `make html` language changed to russian.

---
 docs/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/Makefile b/docs/Makefile
index 95802e63d01..67c6263a5cd 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -15,7 +15,7 @@ endif
 # Internal variables.
 PAPEROPT_a4     = -D latex_paper_size=a4
 PAPEROPT_letter = -D latex_paper_size=letter
-ALLSPHINXOPTS   = $(SPHINXOPTS) en
+ALLSPHINXOPTS   = $(SPHINXOPTS) ru
 # the i18n builder cannot share the environment and doctrees with the others
 I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) ru
 

From a0cc54457e9e0f5d82fe01592d3c6cb17b9b3d63 Mon Sep 17 00:00:00 2001
From: Pawel Rog <prog88@gmail.com>
Date: Fri, 28 Jul 2017 16:14:07 +0200
Subject: [PATCH 007/281] Added compression NONE

---
 dbms/src/IO/CompressedReadBufferBase.cpp      | 12 +++++++++--
 dbms/src/IO/CompressedStream.h                |  4 +++-
 dbms/src/IO/CompressedWriteBuffer.cpp         | 20 +++++++++++++++++++
 dbms/src/Storages/CompressionMethodSelector.h |  2 ++
 dbms/src/Storages/MergeTree/MergeTreeData.cpp |  2 +-
 .../Storages/MergeTree/MergeTreeDataPart.cpp  |  2 +-
 .../MergeTree/MergeTreeDataWriter.cpp         |  2 +-
 utils/compressor/main.cpp                     |  4 ++++
 8 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/dbms/src/IO/CompressedReadBufferBase.cpp b/dbms/src/IO/CompressedReadBufferBase.cpp
index 77b7907e403..487c4b6d77d 100644
--- a/dbms/src/IO/CompressedReadBufferBase.cpp
+++ b/dbms/src/IO/CompressedReadBufferBase.cpp
@@ -2,6 +2,7 @@
 
 #include <vector>
 
+#include <string.h>
 #include <city.h>
 
 #ifdef USE_QUICKLZ
@@ -57,7 +58,11 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
 
     size_t & size_compressed = size_compressed_without_checksum;
 
-    if (method < 0x80)
+    if (method == static_cast<UInt8> (CompressionMethodByte::NONE)) {
+      size_compressed = unalignedLoad<UInt32>(&own_compressed_buffer[1]);
+      size_decompressed = unalignedLoad<UInt32>(&own_compressed_buffer[5]);
+    }
+    else if (method < 0x80)
     {
     #ifdef USE_QUICKLZ
         size_compressed = qlz_size_compressed(&own_compressed_buffer[0]);
@@ -108,7 +113,10 @@ void CompressedReadBufferBase::decompress(char * to, size_t size_decompressed, s
 
     UInt8 method = compressed_buffer[0];    /// See CompressedWriteBuffer.h
 
-    if (method < 0x80)
+    if (method == static_cast<UInt8> (CompressionMethodByte::NONE)) {
+      memcpy(to, &compressed_buffer[COMPRESSED_BLOCK_HEADER_SIZE], size_decompressed);
+    }
+    else if (method < 0x80)
     {
     #ifdef USE_QUICKLZ
         if (!qlz_state)
diff --git a/dbms/src/IO/CompressedStream.h b/dbms/src/IO/CompressedStream.h
index 45b7a0b308c..585a6cb34ad 100644
--- a/dbms/src/IO/CompressedStream.h
+++ b/dbms/src/IO/CompressedStream.h
@@ -20,6 +20,7 @@ enum class CompressionMethod
     LZ4 = 1,
     LZ4HC = 2,        /// The format is the same as for LZ4. The difference is only in compression.
     ZSTD = 3,         /// Experimental algorithm: https://github.com/Cyan4973/zstd
+    NONE = 4,         /// No compression
 };
 
 /** The compressed block format is as follows:
@@ -53,7 +54,8 @@ enum class CompressionMethod
 
 enum class CompressionMethodByte : uint8_t
 {
-    LZ4     = 0x82,
+    NONE     = 0x02,
+    LZ4      = 0x82,
     ZSTD     = 0x90,
 };
 
diff --git a/dbms/src/IO/CompressedWriteBuffer.cpp b/dbms/src/IO/CompressedWriteBuffer.cpp
index eb00b400196..429cf825d0d 100644
--- a/dbms/src/IO/CompressedWriteBuffer.cpp
+++ b/dbms/src/IO/CompressedWriteBuffer.cpp
@@ -8,6 +8,7 @@
 #include <lz4.h>
 #include <lz4hc.h>
 #include <zstd.h>
+#include <string.h>
 
 #include <Common/unaligned.h>
 #include <Core/Types.h>
@@ -121,6 +122,25 @@ void CompressedWriteBuffer::nextImpl()
             compressed_buffer_ptr = &compressed_buffer[0];
             break;
         }
+        case CompressionMethod::NONE:
+        {
+            static constexpr size_t header_size = 1 + sizeof (UInt32) + sizeof (UInt32);
+
+            compressed_size = header_size + uncompressed_size;
+            UInt32 uncompressed_size_32 = uncompressed_size;
+            UInt32 compressed_size_32 = compressed_size;
+
+            compressed_buffer.resize(compressed_size);
+
+            compressed_buffer[0] = static_cast<UInt8> (CompressionMethodByte::NONE);
+
+            unalignedStore(&compressed_buffer[1], compressed_size_32);
+            unalignedStore(&compressed_buffer[5], uncompressed_size_32);
+            memcpy(&compressed_buffer[9], working_buffer.begin(), uncompressed_size);
+
+            compressed_buffer_ptr = &compressed_buffer[0];
+            break;
+        }
         default:
             throw Exception("Unknown compression method", ErrorCodes::UNKNOWN_COMPRESSION_METHOD);
     }
diff --git a/dbms/src/Storages/CompressionMethodSelector.h b/dbms/src/Storages/CompressionMethodSelector.h
index 5113a662b16..c762bae1513 100644
--- a/dbms/src/Storages/CompressionMethodSelector.h
+++ b/dbms/src/Storages/CompressionMethodSelector.h
@@ -51,6 +51,8 @@ private:
                 method = CompressionMethod::LZ4;
             else if (name == "zstd")
                 method = CompressionMethod::ZSTD;
+            else if (name == "none")
+              method = CompressionMethod::NONE;
             else
                 throw Exception("Unknown compression method " + name, ErrorCodes::UNKNOWN_COMPRESSION_METHOD);
         }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
index af1b8256e7f..71001947913 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
@@ -1027,7 +1027,7 @@ MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart(
             false, nullptr, "", false, 0, DBMS_DEFAULT_BUFFER_SIZE, false);
 
         ExpressionBlockInputStream in(part_in, expression);
-        MergedColumnOnlyOutputStream out(*this, full_path + part->name + '/', true, CompressionMethod::LZ4, false);
+        MergedColumnOnlyOutputStream out(*this, full_path + part->name + '/', true, CompressionMethod::NONE, false);
         in.readPrefix();
         out.writePrefix();
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
index 1baebb13c86..863cd209632 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
@@ -200,7 +200,7 @@ void MergeTreeDataPartChecksums::write(WriteBuffer & to) const
 {
     writeString("checksums format version: 4\n", to);
 
-    CompressedWriteBuffer out{to, CompressionMethod::LZ4, 1 << 16};
+    CompressedWriteBuffer out{to, CompressionMethod::NONE, 1 << 16};
     writeVarUInt(files.size(), out);
 
     for (const auto & it : files)
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index c548ee560ee..f2b1b2228cc 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -138,7 +138,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithDa
     }
 
     NamesAndTypesList columns = data.getColumnsList().filter(block.getColumnsList().getNames());
-    MergedBlockOutputStream out(data, new_data_part->getFullPath(), columns, CompressionMethod::LZ4);
+    MergedBlockOutputStream out(data, new_data_part->getFullPath(), columns, CompressionMethod::NONE);
 
     out.writePrefix();
     out.writeWithPermutation(block, perm_ptr);
diff --git a/utils/compressor/main.cpp b/utils/compressor/main.cpp
index e244561ebec..18263741d64 100644
--- a/utils/compressor/main.cpp
+++ b/utils/compressor/main.cpp
@@ -59,6 +59,7 @@ int main(int argc, char ** argv)
         ("qlz", "use QuickLZ (level 1) instead of LZ4")
     #endif
         ("zstd", "use ZSTD instead of LZ4")
+        ("none", "use no compression instead of LZ4")
         ("stat", "print block statistics of compressed data")
     ;
 
@@ -85,6 +86,7 @@ int main(int argc, char ** argv)
         bool use_lz4hc = options.count("hc");
         bool use_zstd = options.count("zstd");
         bool stat_mode = options.count("stat");
+        bool use_none = options.count("none");
         unsigned block_size = options["block-size"].as<unsigned>();
 
         DB::CompressionMethod method = DB::CompressionMethod::LZ4;
@@ -95,6 +97,8 @@ int main(int argc, char ** argv)
             method = DB::CompressionMethod::LZ4HC;
         else if (use_zstd)
             method = DB::CompressionMethod::ZSTD;
+        else if (use_none)
+            method = DB::CompressionMethod::NONE;
 
         DB::ReadBufferFromFileDescriptor rb(STDIN_FILENO);
         DB::WriteBufferFromFileDescriptor wb(STDOUT_FILENO);

From e1ab721d3db6fe6887e20052651dd444328e2c22 Mon Sep 17 00:00:00 2001
From: Pawel Rog <prog88@gmail.com>
Date: Mon, 31 Jul 2017 13:05:49 +0200
Subject: [PATCH 008/281] Use compression method from configs

---
 dbms/src/Storages/MergeTree/MergeTreeData.cpp       | 5 ++++-
 dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp   | 2 +-
 dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp | 6 +++++-
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
index 71001947913..8b2c006c642 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
@@ -1026,8 +1026,11 @@ MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart(
             *this, part, DEFAULT_MERGE_BLOCK_SIZE, 0, expression->getRequiredColumns(), ranges,
             false, nullptr, "", false, 0, DBMS_DEFAULT_BUFFER_SIZE, false);
 
+        auto compression_method = this->context.chooseCompressionMethod(
+            this->getTotalActiveSizeInBytes(),
+            static_cast<double> (this->getTotalCompressedSize()) / this->getTotalActiveSizeInBytes());
         ExpressionBlockInputStream in(part_in, expression);
-        MergedColumnOnlyOutputStream out(*this, full_path + part->name + '/', true, CompressionMethod::NONE, false);
+        MergedColumnOnlyOutputStream out(*this, full_path + part->name + '/', true, compression_method, false);
         in.readPrefix();
         out.writePrefix();
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
index 863cd209632..1baebb13c86 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
@@ -200,7 +200,7 @@ void MergeTreeDataPartChecksums::write(WriteBuffer & to) const
 {
     writeString("checksums format version: 4\n", to);
 
-    CompressedWriteBuffer out{to, CompressionMethod::NONE, 1 << 16};
+    CompressedWriteBuffer out{to, CompressionMethod::LZ4, 1 << 16};
     writeVarUInt(files.size(), out);
 
     for (const auto & it : files)
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index f2b1b2228cc..62b50062252 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -137,8 +137,12 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithDa
             ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterBlocksAlreadySorted);
     }
 
+    auto compression_method = data.context.chooseCompressionMethod(
+        data.getTotalActiveSizeInBytes(),
+        static_cast<double> (data.getTotalCompressedSize()) / data.getTotalActiveSizeInBytes());
+
     NamesAndTypesList columns = data.getColumnsList().filter(block.getColumnsList().getNames());
-    MergedBlockOutputStream out(data, new_data_part->getFullPath(), columns, CompressionMethod::NONE);
+    MergedBlockOutputStream out(data, new_data_part->getFullPath(), columns, compression_method);
 
     out.writePrefix();
     out.writeWithPermutation(block, perm_ptr);

From 3d2ec763ed650ceccb75641fbb1aaca98b89db50 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Mon, 31 Jul 2017 20:40:56 +0300
Subject: [PATCH 009/281] Update CompressedReadBufferBase.cpp

---
 dbms/src/IO/CompressedReadBufferBase.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/IO/CompressedReadBufferBase.cpp b/dbms/src/IO/CompressedReadBufferBase.cpp
index 346b844e0dc..33208f95175 100644
--- a/dbms/src/IO/CompressedReadBufferBase.cpp
+++ b/dbms/src/IO/CompressedReadBufferBase.cpp
@@ -59,7 +59,7 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
         size_compressed = unalignedLoad<UInt32>(&own_compressed_buffer[1]);
         size_decompressed = unalignedLoad<UInt32>(&own_compressed_buffer[5]);
     }
-    else if (method == static_cast<UInt8> (CompressionMethodByte::NONE))
+    else if (method == static_cast<UInt8>(CompressionMethodByte::NONE))
     {
         size_compressed = unalignedLoad<UInt32>(&own_compressed_buffer[1]);
         size_decompressed = unalignedLoad<UInt32>(&own_compressed_buffer[5]);

From 50244880ccf2028a734c26e614fcf80dda967e8b Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Mon, 31 Jul 2017 20:42:23 +0300
Subject: [PATCH 010/281] Update CompressedReadBufferBase.cpp

---
 dbms/src/IO/CompressedReadBufferBase.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/IO/CompressedReadBufferBase.cpp b/dbms/src/IO/CompressedReadBufferBase.cpp
index 33208f95175..abcfc35702b 100644
--- a/dbms/src/IO/CompressedReadBufferBase.cpp
+++ b/dbms/src/IO/CompressedReadBufferBase.cpp
@@ -115,7 +115,7 @@ void CompressedReadBufferBase::decompress(char * to, size_t size_decompressed, s
         if (ZSTD_isError(res))
             throw Exception("Cannot ZSTD_decompress: " + std::string(ZSTD_getErrorName(res)), ErrorCodes::CANNOT_DECOMPRESS);
     }
-    else if (method == static_cast<UInt8> (CompressionMethodByte::NONE))
+    else if (method == static_cast<UInt8>(CompressionMethodByte::NONE))
     {
         memcpy(to, &compressed_buffer[COMPRESSED_BLOCK_HEADER_SIZE], size_decompressed);
     }

From fdb7e080bed2ddd0f1a092c37843e4cd0d1947ec Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Mon, 31 Jul 2017 20:43:33 +0300
Subject: [PATCH 011/281] Update CompressedWriteBuffer.cpp

---
 dbms/src/IO/CompressedWriteBuffer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/IO/CompressedWriteBuffer.cpp b/dbms/src/IO/CompressedWriteBuffer.cpp
index 93eee532754..4575278b12c 100644
--- a/dbms/src/IO/CompressedWriteBuffer.cpp
+++ b/dbms/src/IO/CompressedWriteBuffer.cpp
@@ -109,7 +109,7 @@ void CompressedWriteBuffer::nextImpl()
 
             compressed_buffer.resize(compressed_size);
 
-            compressed_buffer[0] = static_cast<UInt8> (CompressionMethodByte::NONE);
+            compressed_buffer[0] = static_cast<UInt8>(CompressionMethodByte::NONE);
 
             unalignedStore(&compressed_buffer[1], compressed_size_32);
             unalignedStore(&compressed_buffer[5], uncompressed_size_32);

From 4877aa394c30932114dc370759096f8bd1d85d87 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Mon, 31 Jul 2017 20:44:41 +0300
Subject: [PATCH 012/281] Update MergeTreeData.cpp

---
 dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
index 5dbf55236ed..0c22298dd1d 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
@@ -1032,7 +1032,7 @@ MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart(
 
         auto compression_method = this->context.chooseCompressionMethod(
             this->getTotalActiveSizeInBytes(),
-            static_cast<double> (this->getTotalCompressedSize()) / this->getTotalActiveSizeInBytes());
+            static_cast<double>(this->getTotalCompressedSize()) / this->getTotalActiveSizeInBytes());
         ExpressionBlockInputStream in(part_in, expression);
         MergedColumnOnlyOutputStream out(*this, full_path + part->name + '/', true, compression_method, false);
         in.readPrefix();

From d174ebc5c902f14634271963ba62a71de9369f54 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Mon, 31 Jul 2017 20:44:58 +0300
Subject: [PATCH 013/281] Update MergeTreeDataWriter.cpp

---
 dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index 2ae318468c2..2b3d869967e 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -148,7 +148,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithDa
 
     auto compression_method = data.context.chooseCompressionMethod(
         data.getTotalActiveSizeInBytes(),
-        static_cast<double> (data.getTotalCompressedSize()) / data.getTotalActiveSizeInBytes());
+        static_cast<double>(data.getTotalCompressedSize()) / data.getTotalActiveSizeInBytes());
 
     NamesAndTypesList columns = data.getColumnsList().filter(block.getColumnsList().getNames());
     MergedBlockOutputStream out(data, new_data_part->getFullPath(), columns, compression_method);

From 6578fb264563fce36081928860cd3edeb52651ce Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 1 Aug 2017 00:39:24 +0300
Subject: [PATCH 014/281] Fixed error [#CLICKHOUSE-3191].

---
 dbms/src/Columns/ColumnAggregateFunction.cpp  |   9 +-
 dbms/src/Columns/ColumnConst.cpp              |  14 +-
 dbms/src/Common/Exception.cpp                 |   9 +-
 dbms/src/Common/FileChecker.cpp               |  23 +-
 dbms/src/Common/formatReadable.cpp            |  15 +-
 dbms/src/Common/tests/hash_table.cpp          |  27 +-
 dbms/src/Common/tests/small_table.cpp         |  27 +-
 dbms/src/Core/Block.cpp                       |  38 +-
 dbms/src/Core/ColumnWithTypeAndName.cpp       |  25 +-
 dbms/src/Core/Field.cpp                       |   2 -
 dbms/src/Core/FieldVisitors.cpp               |  53 ++-
 dbms/src/Core/NamesAndTypes.cpp               |  10 +-
 dbms/src/Core/SortDescription.cpp             |  13 +-
 dbms/src/Core/tests/string_ref_hash.cpp       |   1 -
 dbms/src/DataStreams/CSVRowInputStream.cpp    |  11 +-
 dbms/src/DataStreams/JSONRowOutputStream.cpp  |  10 +-
 .../DataStreams/PrettyBlockOutputStream.cpp   |   1 +
 dbms/src/DataStreams/TSKVRowOutputStream.cpp  |  12 +-
 .../TabSeparatedRowInputStream.cpp            |  12 +-
 .../DataTypes/DataTypeAggregateFunction.cpp   |   5 +-
 dbms/src/DataTypes/DataTypeArray.cpp          |  10 +-
 dbms/src/DataTypes/DataTypeEnum.cpp           |  34 +-
 dbms/src/DataTypes/DataTypeNested.cpp         |   6 +-
 dbms/src/Databases/DatabaseCloud.cpp          |  27 +-
 .../src/Dictionaries/ExternalQueryBuilder.cpp | 384 +++++++++---------
 .../Dictionaries/MySQLDictionarySource.cpp    |  11 +-
 dbms/src/Functions/Conditional/ArgsInfo.cpp   |   8 +-
 .../Functions/Conditional/getArrayType.cpp    |   8 +-
 dbms/src/Functions/FunctionsConversion.cpp    |  29 +-
 dbms/src/Functions/FunctionsFormatting.h      |   1 -
 dbms/src/IO/ReadHelpers.cpp                   |  38 +-
 .../IO/WriteBufferFromHTTPServerResponse.cpp  |   9 +-
 dbms/src/IO/WriteBufferFromString.h           |  19 +
 dbms/src/IO/WriteHelpers.h                    |  11 +-
 .../gtest_cascade_and_memory_write_buffer.cpp |   1 -
 dbms/src/IO/tests/parse_int_perf.cpp          |   5 -
 dbms/src/Interpreters/Compiler.cpp            |  11 +-
 dbms/src/Interpreters/DDLWorker.cpp           |  17 +-
 .../Interpreters/InterpreterCreateQuery.cpp   |   1 -
 dbms/src/Interpreters/InterserverIOHandler.h  |   6 +-
 dbms/src/Parsers/ASTFunction.cpp              |   6 +-
 dbms/src/Parsers/IAST.cpp                     |  11 +-
 dbms/src/Parsers/parseQuery.cpp               |  49 +--
 dbms/src/Server/Client.cpp                    |   1 -
 dbms/src/Server/HTTPHandler.cpp               |   1 -
 dbms/src/Server/LocalServer.cpp               |   6 +-
 dbms/src/Storages/ColumnsDescription.cpp      |   5 +-
 .../Storages/MergeTree/ActiveDataPartSet.cpp  |  30 +-
 dbms/src/Storages/MergeTree/MergeTreeData.cpp |  24 +-
 .../Storages/MergeTree/MergeTreeDataPart.cpp  |   9 +-
 .../MergeTree/ReplicatedMergeTreeAddress.h    |   9 +-
 .../MergeTree/ReplicatedMergeTreeLogEntry.cpp |  10 +-
 .../ReplicatedMergeTreeQuorumEntry.h          |   9 +-
 dbms/src/Storages/MergeTree/ReshardingJob.cpp |   6 +-
 .../Storages/MergeTree/ReshardingWorker.cpp   |  65 ++-
 .../Storages/StorageReplicatedMergeTree.cpp   |   5 +-
 56 files changed, 516 insertions(+), 683 deletions(-)

diff --git a/dbms/src/Columns/ColumnAggregateFunction.cpp b/dbms/src/Columns/ColumnAggregateFunction.cpp
index b54d68eefb1..57a83a1fda8 100644
--- a/dbms/src/Columns/ColumnAggregateFunction.cpp
+++ b/dbms/src/Columns/ColumnAggregateFunction.cpp
@@ -165,12 +165,9 @@ ColumnPtr ColumnAggregateFunction::permute(const Permutation & perm, size_t limi
 /// Is required to support operations with Set
 void ColumnAggregateFunction::updateHashWithValue(size_t n, SipHash & hash) const
 {
-    String buf;
-    {
-        WriteBufferFromString wbuf(buf);
-        func->serialize(getData()[n], wbuf);
-    }
-    hash.update(buf.c_str(), buf.size());
+    WriteBufferFromOwnString wbuf;
+    func->serialize(getData()[n], wbuf);
+    hash.update(wbuf.str().c_str(), wbuf.str().size());
 }
 
 /// NOTE: Highly overestimates size of a column if it was produced in AggregatingBlockInputStream (it contains size of other columns)
diff --git a/dbms/src/Columns/ColumnConst.cpp b/dbms/src/Columns/ColumnConst.cpp
index caa05fd759c..a28dcd5b956 100644
--- a/dbms/src/Columns/ColumnConst.cpp
+++ b/dbms/src/Columns/ColumnConst.cpp
@@ -33,16 +33,12 @@ ColumnPtr ColumnConst::convertToFullColumn() const
 
 String ColumnConst::dump() const
 {
-    String res;
-    WriteBufferFromString out(res);
+    WriteBufferFromOwnString out;
+    out << "ColumnConst, size: " << s << ", nested column: " << data->getName() << ", nested size: " << data->size();
+    if (data->size())
+        out << ", value: " << applyVisitor(FieldVisitorDump(), (*data)[0]);
 
-    {
-        out << "ColumnConst, size: " << s << ", nested column: " << data->getName() << ", nested size: " << data->size();
-        if (data->size())
-            out << ", value: " << applyVisitor(FieldVisitorDump(), (*data)[0]);
-    }
-
-    return res;
+    return out.str();
 }
 
 }
diff --git a/dbms/src/Common/Exception.cpp b/dbms/src/Common/Exception.cpp
index 6301e2b9b30..afca8873335 100644
--- a/dbms/src/Common/Exception.cpp
+++ b/dbms/src/Common/Exception.cpp
@@ -231,12 +231,9 @@ std::string getExceptionMessage(std::exception_ptr e, bool with_stacktrace)
 
 std::string ExecutionStatus::serializeText() const
 {
-    std::string res;
-    {
-        WriteBufferFromString wb(res);
-        wb << code << "\n" << escape << message;
-    }
-    return res;
+    WriteBufferFromOwnString wb;
+    wb << code << "\n" << escape << message;
+    return wb.str();
 }
 
 void ExecutionStatus::deserializeText(const std::string & data)
diff --git a/dbms/src/Common/FileChecker.cpp b/dbms/src/Common/FileChecker.cpp
index 66e315fd754..e3b1db745ca 100644
--- a/dbms/src/Common/FileChecker.cpp
+++ b/dbms/src/Common/FileChecker.cpp
@@ -131,21 +131,18 @@ void FileChecker::load(Map & map) const
     if (!Poco::File(files_info_path).exists())
         return;
 
-    std::string content;
-    {
-        ReadBufferFromFile in(files_info_path);
-        WriteBufferFromString out(content);
+    ReadBufferFromFile in(files_info_path);
+    WriteBufferFromOwnString out;
 
-        /// The JSON library does not support whitespace. We delete them. Inefficient.
-        while (!in.eof())
-        {
-            char c;
-            readChar(c, in);
-            if (!isspace(c))
-                writeChar(c, out);
-        }
+    /// The JSON library does not support whitespace. We delete them. Inefficient.
+    while (!in.eof())
+    {
+        char c;
+        readChar(c, in);
+        if (!isspace(c))
+            writeChar(c, out);
     }
-    JSON json(content);
+    JSON json(out.str());
 
     JSON files = json["yandex"];
     for (const auto & name_value : files)
diff --git a/dbms/src/Common/formatReadable.cpp b/dbms/src/Common/formatReadable.cpp
index f78fd9ce9f5..1741197344f 100644
--- a/dbms/src/Common/formatReadable.cpp
+++ b/dbms/src/Common/formatReadable.cpp
@@ -35,10 +35,9 @@ void formatReadableSizeWithBinarySuffix(double value, DB::WriteBuffer & out, int
 
 std::string formatReadableSizeWithBinarySuffix(double value, int precision)
 {
-    std::string res;
-    DB::WriteBufferFromString out(res);
+    DB::WriteBufferFromOwnString out;
     formatReadableSizeWithBinarySuffix(value, out, precision);
-    return res;
+    return out.str();
 }
 
 
@@ -50,10 +49,9 @@ void formatReadableSizeWithDecimalSuffix(double value, DB::WriteBuffer & out, in
 
 std::string formatReadableSizeWithDecimalSuffix(double value, int precision)
 {
-    std::string res;
-    DB::WriteBufferFromString out(res);
+    DB::WriteBufferFromOwnString out;
     formatReadableSizeWithDecimalSuffix(value, out, precision);
-    return res;
+    return out.str();
 }
 
 
@@ -65,8 +63,7 @@ void formatReadableQuantity(double value, DB::WriteBuffer & out, int precision)
 
 std::string formatReadableQuantity(double value, int precision)
 {
-    std::string res;
-    DB::WriteBufferFromString out(res);
+    DB::WriteBufferFromOwnString out;
     formatReadableQuantity(value, out, precision);
-    return res;
+    return out.str();
 }
diff --git a/dbms/src/Common/tests/hash_table.cpp b/dbms/src/Common/tests/hash_table.cpp
index f99a9d72ab3..5093b9ebc48 100644
--- a/dbms/src/Common/tests/hash_table.cpp
+++ b/dbms/src/Common/tests/hash_table.cpp
@@ -29,13 +29,10 @@ int main(int argc, char ** argv)
         for (auto x : cont)
             std::cerr << x << std::endl;
 
-        std::string dump;
-        {
-            DB::WriteBufferFromString wb(dump);
-            cont.writeText(wb);
-        }
+        DB::WriteBufferFromOwnString wb;
+        cont.writeText(wb);
 
-        std::cerr << "dump: " << dump << std::endl;
+        std::cerr << "dump: " << wb.str() << std::endl;
     }
 
     {
@@ -48,13 +45,10 @@ int main(int argc, char ** argv)
         for (auto x : cont)
             std::cerr << x.first << " -> " << x.second << std::endl;
 
-        std::string dump;
-        {
-            DB::WriteBufferFromString wb(dump);
-            cont.writeText(wb);
-        }
+        DB::WriteBufferFromOwnString wb;
+        cont.writeText(wb);
 
-        std::cerr << "dump: " << dump << std::endl;
+        std::cerr << "dump: " << wb.str() << std::endl;
     }
 
     {
@@ -63,13 +57,10 @@ int main(int argc, char ** argv)
             DB::UInt128TrivialHash>;
         Cont cont;
 
-        std::string dump;
-        {
-            DB::WriteBufferFromString wb(dump);
-            cont.write(wb);
-        }
+        DB::WriteBufferFromOwnString wb;
+        cont.write(wb);
 
-        std::cerr << "dump: " << dump << std::endl;
+        std::cerr << "dump: " << wb.str() << std::endl;
     }
 
     return 0;
diff --git a/dbms/src/Common/tests/small_table.cpp b/dbms/src/Common/tests/small_table.cpp
index b542f8d60f2..d11570f6d0f 100644
--- a/dbms/src/Common/tests/small_table.cpp
+++ b/dbms/src/Common/tests/small_table.cpp
@@ -28,13 +28,10 @@ int main(int argc, char ** argv)
         for (auto x : cont)
             std::cerr << x << std::endl;
 
-        std::string dump;
-        {
-            DB::WriteBufferFromString wb(dump);
-            cont.writeText(wb);
-        }
+        DB::WriteBufferFromOwnString wb;
+        cont.writeText(wb);
 
-        std::cerr << "dump: " << dump << std::endl;
+        std::cerr << "dump: " << wb.str() << std::endl;
     }
 
     {
@@ -47,26 +44,20 @@ int main(int argc, char ** argv)
         for (auto x : cont)
             std::cerr << x.first << " -> " << x.second << std::endl;
 
-        std::string dump;
-        {
-            DB::WriteBufferFromString wb(dump);
-            cont.writeText(wb);
-        }
+        DB::WriteBufferFromOwnString wb;
+        cont.writeText(wb);
 
-        std::cerr << "dump: " << dump << std::endl;
+        std::cerr << "dump: " << wb.str(); << std::endl;
     }
 
     {
         using Cont = SmallSet<DB::UInt128, 16>;
         Cont cont;
 
-        std::string dump;
-        {
-            DB::WriteBufferFromString wb(dump);
-            cont.write(wb);
-        }
+        DB::WriteBufferFromOwnString wb;
+        cont.write(wb);
 
-        std::cerr << "dump: " << dump << std::endl;
+        std::cerr << "dump: " << wb.str() << std::endl;
     }
 
     return 0;
diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp
index a1ef634905d..31a3bccb7dd 100644
--- a/dbms/src/Core/Block.cpp
+++ b/dbms/src/Core/Block.cpp
@@ -317,39 +317,33 @@ size_t Block::allocatedBytes() const
 
 std::string Block::dumpNames() const
 {
-    std::string res;
+    WriteBufferFromOwnString out;
+    for (auto it = data.begin(); it != data.end(); ++it)
     {
-        WriteBufferFromString out(res);
-        for (auto it = data.begin(); it != data.end(); ++it)
-        {
-            if (it != data.begin())
-                out << ", ";
-            out << it->name;
-        }
+        if (it != data.begin())
+            out << ", ";
+        out << it->name;
     }
-    return res;
+    return out.str();
 }
 
 
 std::string Block::dumpStructure() const
 {
-    std::string res;
+    WriteBufferFromOwnString out;
+    for (auto it = data.begin(); it != data.end(); ++it)
     {
-        WriteBufferFromString out(res);
-        for (auto it = data.begin(); it != data.end(); ++it)
-        {
-            if (it != data.begin())
-                out << ", ";
+        if (it != data.begin())
+            out << ", ";
 
-            out << it->name << ' ' << it->type->getName();
+        out << it->name << ' ' << it->type->getName();
 
-            if (it->column)
-                out << ' ' << it->column->getName() << ' ' << it->column->size();
-            else
-                out << " nullptr";
-        }
+        if (it->column)
+            out << ' ' << it->column->getName() << ' ' << it->column->size();
+        else
+            out << " nullptr";
     }
-    return res;
+    return out.str();
 }
 
 
diff --git a/dbms/src/Core/ColumnWithTypeAndName.cpp b/dbms/src/Core/ColumnWithTypeAndName.cpp
index 9e0f31e43c3..045ba60dbff 100644
--- a/dbms/src/Core/ColumnWithTypeAndName.cpp
+++ b/dbms/src/Core/ColumnWithTypeAndName.cpp
@@ -29,22 +29,19 @@ bool ColumnWithTypeAndName::operator== (const ColumnWithTypeAndName & other) con
 
 String ColumnWithTypeAndName::prettyPrint() const
 {
-    String res;
+    WriteBufferFromOwnString out;
+    writeString(name, out);
+    if (type)
     {
-        WriteBufferFromString out(res);
-        writeString(name, out);
-        if (type)
-        {
-            writeChar(' ', out);
-            writeString(type->getName(), out);
-        }
-        if (column)
-        {
-            writeChar(' ', out);
-            writeString(column->getName(), out);
-        }
+        writeChar(' ', out);
+        writeString(type->getName(), out);
     }
-    return res;
+    if (column)
+    {
+        writeChar(' ', out);
+        writeString(column->getName(), out);
+    }
+    return out.str();
 }
 
 }
diff --git a/dbms/src/Core/Field.cpp b/dbms/src/Core/Field.cpp
index 1e54768d63c..74bc9e389a5 100644
--- a/dbms/src/Core/Field.cpp
+++ b/dbms/src/Core/Field.cpp
@@ -2,8 +2,6 @@
 #include <IO/WriteBuffer.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
-#include <IO/ReadBufferFromString.h>
-#include <IO/WriteBufferFromString.h>
 
 #include <Core/Field.h>
 #include <Core/FieldVisitors.h>
diff --git a/dbms/src/Core/FieldVisitors.cpp b/dbms/src/Core/FieldVisitors.cpp
index 47c9ae8cf69..6589b8a76eb 100644
--- a/dbms/src/Core/FieldVisitors.cpp
+++ b/dbms/src/Core/FieldVisitors.cpp
@@ -15,41 +15,37 @@ namespace DB
 template <typename T>
 static inline String formatQuoted(T x)
 {
-    String res;
-    WriteBufferFromString wb(res);
+    WriteBufferFromOwnString wb;
     writeQuoted(x, wb);
-    return res;
+    return wb.str();
 }
 
 template <typename T>
 static inline String formatQuotedWithPrefix(T x, const char * prefix)
 {
-    String res;
-    WriteBufferFromString wb(res);
+    WriteBufferFromOwnString wb;
     wb.write(prefix, strlen(prefix));
     writeQuoted(x, wb);
-    return res;
+    return wb.str();
 }
 
 
-String FieldVisitorDump::operator() (const Null     & x) const { return "NULL"; }
-String FieldVisitorDump::operator() (const UInt64     & x) const { return formatQuotedWithPrefix(x, "UInt64_"); }
-String FieldVisitorDump::operator() (const Int64     & x) const { return formatQuotedWithPrefix(x, "Int64_"); }
-String FieldVisitorDump::operator() (const Float64     & x) const { return formatQuotedWithPrefix(x, "Float64_"); }
+String FieldVisitorDump::operator() (const Null & x) const { return "NULL"; }
+String FieldVisitorDump::operator() (const UInt64 & x) const { return formatQuotedWithPrefix(x, "UInt64_"); }
+String FieldVisitorDump::operator() (const Int64 & x) const { return formatQuotedWithPrefix(x, "Int64_"); }
+String FieldVisitorDump::operator() (const Float64 & x) const { return formatQuotedWithPrefix(x, "Float64_"); }
 
 
 String FieldVisitorDump::operator() (const String & x) const
 {
-    String res;
-    WriteBufferFromString wb(res);
+    WriteBufferFromOwnString wb;
     writeQuoted(x, wb);
-    return res;
+    return wb.str();
 }
 
 String FieldVisitorDump::operator() (const Array & x) const
 {
-    String res;
-    WriteBufferFromString wb(res);
+    WriteBufferFromOwnString wb;
 
     wb.write("Array_[", 7);
     for (auto it = x.begin(); it != x.end(); ++it)
@@ -60,14 +56,13 @@ String FieldVisitorDump::operator() (const Array & x) const
     }
     writeChar(']', wb);
 
-    return res;
+    return wb.str();
 }
 
 String FieldVisitorDump::operator() (const Tuple & x_def) const
 {
     auto & x = x_def.t;
-    String res;
-    WriteBufferFromString wb(res);
+    WriteBufferFromOwnString wb;
 
     wb.write("Tuple_(", 7);
     for (auto it = x.begin(); it != x.end(); ++it)
@@ -78,7 +73,7 @@ String FieldVisitorDump::operator() (const Tuple & x_def) const
     }
     writeChar(')', wb);
 
-    return res;
+    return wb.str();
 }
 
 
@@ -104,17 +99,16 @@ static String formatFloat(const Float64 x)
 }
 
 
-String FieldVisitorToString::operator() (const Null     & x) const { return "NULL"; }
-String FieldVisitorToString::operator() (const UInt64     & x) const { return formatQuoted(x); }
-String FieldVisitorToString::operator() (const Int64     & x) const { return formatQuoted(x); }
-String FieldVisitorToString::operator() (const Float64     & x) const { return formatFloat(x); }
-String FieldVisitorToString::operator() (const String     & x) const { return formatQuoted(x); }
+String FieldVisitorToString::operator() (const Null & x) const { return "NULL"; }
+String FieldVisitorToString::operator() (const UInt64 & x) const { return formatQuoted(x); }
+String FieldVisitorToString::operator() (const Int64 & x) const { return formatQuoted(x); }
+String FieldVisitorToString::operator() (const Float64 & x) const { return formatFloat(x); }
+String FieldVisitorToString::operator() (const String & x) const { return formatQuoted(x); }
 
 
 String FieldVisitorToString::operator() (const Array & x) const
 {
-    String res;
-    WriteBufferFromString wb(res);
+    WriteBufferFromOwnString wb;
 
     writeChar('[', wb);
     for (Array::const_iterator it = x.begin(); it != x.end(); ++it)
@@ -125,14 +119,13 @@ String FieldVisitorToString::operator() (const Array & x) const
     }
     writeChar(']', wb);
 
-    return res;
+    return wb.str();
 }
 
 String FieldVisitorToString::operator() (const Tuple & x_def) const
 {
     auto & x = x_def.t;
-    String res;
-    WriteBufferFromString wb(res);
+    WriteBufferFromOwnString wb;
 
     writeChar('(', wb);
     for (auto it = x.begin(); it != x.end(); ++it)
@@ -143,7 +136,7 @@ String FieldVisitorToString::operator() (const Tuple & x_def) const
     }
     writeChar(')', wb);
 
-    return res;
+    return wb.str();
 }
 
 
diff --git a/dbms/src/Core/NamesAndTypes.cpp b/dbms/src/Core/NamesAndTypes.cpp
index 8a8f1078e57..1baa3fbee25 100644
--- a/dbms/src/Core/NamesAndTypes.cpp
+++ b/dbms/src/Core/NamesAndTypes.cpp
@@ -5,6 +5,7 @@
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
 #include <IO/ReadBufferFromString.h>
+#include <IO/WriteBufferFromString.h>
 #include <sparsehash/dense_hash_map>
 
 
@@ -53,12 +54,9 @@ void NamesAndTypesList::writeText(WriteBuffer & buf) const
 
 String NamesAndTypesList::toString() const
 {
-    String s;
-    {
-        WriteBufferFromString out(s);
-        writeText(out);
-    }
-    return s;
+    WriteBufferFromOwnString out;
+    writeText(out);
+    return out.str();
 }
 
 NamesAndTypesList NamesAndTypesList::parse(const String & s)
diff --git a/dbms/src/Core/SortDescription.cpp b/dbms/src/Core/SortDescription.cpp
index f8e69cbd96c..2a23679d39e 100644
--- a/dbms/src/Core/SortDescription.cpp
+++ b/dbms/src/Core/SortDescription.cpp
@@ -10,14 +10,11 @@ namespace DB
 
 std::string SortColumnDescription::getID() const
 {
-    std::string res;
-    {
-        WriteBufferFromString out(res);
-        out << column_name << ", " << column_number << ", " << direction << ", " << nulls_direction;
-        if (collator)
-            out << ", collation locale: " << collator->getLocale();
-    }
-    return res;
+    WriteBufferFromOwnString out;
+    out << column_name << ", " << column_number << ", " << direction << ", " << nulls_direction;
+    if (collator)
+        out << ", collation locale: " << collator->getLocale();
+    return out.str();
 }
 
 }
diff --git a/dbms/src/Core/tests/string_ref_hash.cpp b/dbms/src/Core/tests/string_ref_hash.cpp
index cca25bd6996..786d103600a 100644
--- a/dbms/src/Core/tests/string_ref_hash.cpp
+++ b/dbms/src/Core/tests/string_ref_hash.cpp
@@ -1,6 +1,5 @@
 #include <IO/ReadBufferFromFileDescriptor.h>
 #include <IO/WriteBufferFromFileDescriptor.h>
-#include <IO/WriteBufferFromString.h>
 #include <IO/Operators.h>
 #include <common/StringRef.h>
 
diff --git a/dbms/src/DataStreams/CSVRowInputStream.cpp b/dbms/src/DataStreams/CSVRowInputStream.cpp
index efdfc8a9ca7..ae4397fb3b7 100644
--- a/dbms/src/DataStreams/CSVRowInputStream.cpp
+++ b/dbms/src/DataStreams/CSVRowInputStream.cpp
@@ -140,8 +140,7 @@ String CSVRowInputStream::getDiagnosticInfo()
     if (istr.eof())        /// Buffer has gone, cannot extract information about what has been parsed.
         return {};
 
-    String res;
-    WriteBufferFromString out(res);
+    WriteBufferFromOwnString out;
     Block block = sample.cloneEmpty();
 
     /// It is possible to display detailed diagnostics only if the last and next to last rows are still in the read buffer.
@@ -149,7 +148,7 @@ String CSVRowInputStream::getDiagnosticInfo()
     if (bytes_read_at_start_of_buffer != bytes_read_at_start_of_buffer_on_prev_row)
     {
         out << "Could not print diagnostic info because two last rows aren't in buffer (rare case)\n";
-        return res;
+        return out.str();
     }
 
     size_t max_length_of_column_name = 0;
@@ -170,14 +169,14 @@ String CSVRowInputStream::getDiagnosticInfo()
 
         out << "\nRow " << (row_num - 1) << ":\n";
         if (!parseRowAndPrintDiagnosticInfo(block, out, max_length_of_column_name, max_length_of_data_type_name))
-            return res;
+            return out.str();
     }
     else
     {
         if (!pos_of_current_row)
         {
             out << "Could not print diagnostic info because parsing of data hasn't started.\n";
-            return res;
+            return out.str();
         }
 
         istr.position() = pos_of_current_row;
@@ -187,7 +186,7 @@ String CSVRowInputStream::getDiagnosticInfo()
     parseRowAndPrintDiagnosticInfo(block, out, max_length_of_column_name, max_length_of_data_type_name);
     out << "\n";
 
-    return res;
+    return out.str();
 }
 
 
diff --git a/dbms/src/DataStreams/JSONRowOutputStream.cpp b/dbms/src/DataStreams/JSONRowOutputStream.cpp
index 98b5a9eb111..5d2f78e4aa8 100644
--- a/dbms/src/DataStreams/JSONRowOutputStream.cpp
+++ b/dbms/src/DataStreams/JSONRowOutputStream.cpp
@@ -18,12 +18,10 @@ JSONRowOutputStream::JSONRowOutputStream(WriteBuffer & ostr_, const Block & samp
         if (!sample_.getByPosition(i).type->isNumeric())
             have_non_numeric_columns = true;
 
-        String field_name_quoted;
-        {
-            WriteBufferFromString out(field_name_quoted);
-            writeJSONString(fields[i].name, out);
-        }
-        fields[i].name = field_name_quoted;
+        WriteBufferFromOwnString out;
+        writeJSONString(fields[i].name, out);
+
+        fields[i].name = out.str();
     }
 
     if (have_non_numeric_columns)
diff --git a/dbms/src/DataStreams/PrettyBlockOutputStream.cpp b/dbms/src/DataStreams/PrettyBlockOutputStream.cpp
index 20376557b6d..1c75cf227f8 100644
--- a/dbms/src/DataStreams/PrettyBlockOutputStream.cpp
+++ b/dbms/src/DataStreams/PrettyBlockOutputStream.cpp
@@ -7,6 +7,7 @@
 #include <Columns/ColumnsNumber.h>
 #include <IO/WriteBuffer.h>
 #include <IO/WriteHelpers.h>
+#include <IO/WriteBufferFromString.h>
 #include <Common/UTF8Helpers.h>
 
 
diff --git a/dbms/src/DataStreams/TSKVRowOutputStream.cpp b/dbms/src/DataStreams/TSKVRowOutputStream.cpp
index 43a27d4dddc..87e2b166208 100644
--- a/dbms/src/DataStreams/TSKVRowOutputStream.cpp
+++ b/dbms/src/DataStreams/TSKVRowOutputStream.cpp
@@ -1,4 +1,5 @@
 #include <IO/WriteHelpers.h>
+#include <IO/WriteBufferFromString.h>
 #include <DataStreams/TSKVRowOutputStream.h>
 
 
@@ -13,13 +14,10 @@ TSKVRowOutputStream::TSKVRowOutputStream(WriteBuffer & ostr_, const Block & samp
 
     for (auto & field : fields)
     {
-        String prepared_field_name;
-        {
-            WriteBufferFromString wb(prepared_field_name);
-            writeAnyEscapedString<'='>(field.name.data(), field.name.data() + field.name.size(), wb);
-            writeCString("=", wb);
-        }
-        field.name = prepared_field_name;
+        WriteBufferFromOwnString wb;
+        writeAnyEscapedString<'='>(field.name.data(), field.name.data() + field.name.size(), wb);
+        writeCString("=", wb);
+        field.name = wb.str();
     }
 }
 
diff --git a/dbms/src/DataStreams/TabSeparatedRowInputStream.cpp b/dbms/src/DataStreams/TabSeparatedRowInputStream.cpp
index 513c9ca2069..fdbed86c550 100644
--- a/dbms/src/DataStreams/TabSeparatedRowInputStream.cpp
+++ b/dbms/src/DataStreams/TabSeparatedRowInputStream.cpp
@@ -1,4 +1,5 @@
 #include <IO/ReadHelpers.h>
+#include <IO/WriteBufferFromString.h>
 #include <IO/Operators.h>
 
 #include <DataStreams/TabSeparatedRowInputStream.h>
@@ -109,8 +110,7 @@ String TabSeparatedRowInputStream::getDiagnosticInfo()
     if (istr.eof())        /// Buffer has gone, cannot extract information about what has been parsed.
         return {};
 
-    String res;
-    WriteBufferFromString out(res);
+    WriteBufferFromOwnString out;
     Block block = sample.cloneEmpty();
 
     /// It is possible to display detailed diagnostics only if the last and next to last lines are still in the read buffer.
@@ -118,7 +118,7 @@ String TabSeparatedRowInputStream::getDiagnosticInfo()
     if (bytes_read_at_start_of_buffer != bytes_read_at_start_of_buffer_on_prev_row)
     {
         out << "Could not print diagnostic info because two last rows aren't in buffer (rare case)\n";
-        return res;
+        return out.str();
     }
 
     size_t max_length_of_column_name = 0;
@@ -139,14 +139,14 @@ String TabSeparatedRowInputStream::getDiagnosticInfo()
 
         out << "\nRow " << (row_num - 1) << ":\n";
         if (!parseRowAndPrintDiagnosticInfo(block, out, max_length_of_column_name, max_length_of_data_type_name))
-            return res;
+            return out.str();
     }
     else
     {
         if (!pos_of_current_row)
         {
             out << "Could not print diagnostic info because parsing of data hasn't started.\n";
-            return res;
+            return out.str();
         }
 
         istr.position() = pos_of_current_row;
@@ -156,7 +156,7 @@ String TabSeparatedRowInputStream::getDiagnosticInfo()
     parseRowAndPrintDiagnosticInfo(block, out, max_length_of_column_name, max_length_of_data_type_name);
     out << "\n";
 
-    return res;
+    return out.str();
 }
 
 
diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp
index d8c8057b348..5da0fd620a0 100644
--- a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp
+++ b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp
@@ -146,10 +146,9 @@ void DataTypeAggregateFunction::deserializeBinaryBulk(IColumn & column, ReadBuff
 
 static String serializeToString(const AggregateFunctionPtr & function, const IColumn & column, size_t row_num)
 {
-    String res;
-    WriteBufferFromString buffer(res);
+    WriteBufferFromOwnString buffer;
     function.get()->serialize(static_cast<const ColumnAggregateFunction &>(column).getData()[row_num], buffer);
-    return res;
+    return buffer.str();
 }
 
 static void deserializeFromString(const AggregateFunctionPtr & function, IColumn & column, const String & s)
diff --git a/dbms/src/DataTypes/DataTypeArray.cpp b/dbms/src/DataTypes/DataTypeArray.cpp
index 06224e59708..a73f31787be 100644
--- a/dbms/src/DataTypes/DataTypeArray.cpp
+++ b/dbms/src/DataTypes/DataTypeArray.cpp
@@ -5,6 +5,7 @@
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
 #include <IO/ReadBufferFromString.h>
+#include <IO/WriteBufferFromString.h>
 
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeArray.h>
@@ -364,12 +365,9 @@ void DataTypeArray::serializeTextXML(const IColumn & column, size_t row_num, Wri
 void DataTypeArray::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr) const
 {
     /// There is no good way to serialize an array in CSV. Therefore, we serialize it into a string, and then write the resulting string in CSV.
-    String s;
-    {
-        WriteBufferFromString wb(s);
-        serializeText(column, row_num, wb);
-    }
-    writeCSV(s, ostr);
+    WriteBufferFromOwnString wb;
+    serializeText(column, row_num, wb);
+    writeCSV(wb.str(), ostr);
 }
 
 
diff --git a/dbms/src/DataTypes/DataTypeEnum.cpp b/dbms/src/DataTypes/DataTypeEnum.cpp
index 0dd97254b47..a4a5b5b3924 100644
--- a/dbms/src/DataTypes/DataTypeEnum.cpp
+++ b/dbms/src/DataTypes/DataTypeEnum.cpp
@@ -36,31 +36,27 @@ const char * DataTypeEnum<Type>::getFamilyName() const
 template <typename Type>
 std::string DataTypeEnum<Type>::generateName(const Values & values)
 {
-    std::string name;
+    WriteBufferFromOwnString out;
 
+    writeString(EnumName<FieldType>::value, out);
+    writeChar('(', out);
+
+    auto first = true;
+    for (const auto & name_and_value : values)
     {
-        WriteBufferFromString out{name};
+        if (!first)
+            writeString(", ", out);
 
-        writeString(EnumName<FieldType>::value, out);
-        writeChar('(', out);
+        first = false;
 
-        auto first = true;
-        for (const auto & name_and_value : values)
-        {
-            if (!first)
-                writeString(", ", out);
-
-            first = false;
-
-            writeQuotedString(name_and_value.first, out);
-            writeString(" = ", out);
-            writeText(name_and_value.second, out);
-        }
-
-        writeChar(')', out);
+        writeQuotedString(name_and_value.first, out);
+        writeString(" = ", out);
+        writeText(name_and_value.second, out);
     }
 
-    return name;
+    writeChar(')', out);
+
+    return out.str();
 }
 
 template <typename Type>
diff --git a/dbms/src/DataTypes/DataTypeNested.cpp b/dbms/src/DataTypes/DataTypeNested.cpp
index 06320329a3c..bf81432198b 100644
--- a/dbms/src/DataTypes/DataTypeNested.cpp
+++ b/dbms/src/DataTypes/DataTypeNested.cpp
@@ -56,8 +56,7 @@ std::string DataTypeNested::extractNestedColumnName(const std::string & nested_n
 
 std::string DataTypeNested::getName() const
 {
-    std::string res;
-    WriteBufferFromString out(res);
+    WriteBufferFromOwnString out;
 
     writeCString("Nested(", out);
 
@@ -71,8 +70,7 @@ std::string DataTypeNested::getName() const
     }
 
     writeChar(')', out);
-
-    return res;
+    return out.str();
 }
 
 
diff --git a/dbms/src/Databases/DatabaseCloud.cpp b/dbms/src/Databases/DatabaseCloud.cpp
index 4f0351e2ac6..62c7914b0b2 100644
--- a/dbms/src/Databases/DatabaseCloud.cpp
+++ b/dbms/src/Databases/DatabaseCloud.cpp
@@ -115,12 +115,9 @@ Hash DatabaseCloud::getTableHash(const String & table_name) const
 String DatabaseCloud::getNameOfNodeWithTables(const String & table_name) const
 {
     Hash hash = getTableHash(table_name);
-    String res;
-    {
-        WriteBufferFromString out(res);
-        writeText(hash.first % TABLE_TO_NODE_DIVISOR, out);
-    }
-    return res;
+    WriteBufferFromOwnString out;
+    writeText(hash.first % TABLE_TO_NODE_DIVISOR, out);
+    return out.str();
 }
 
 
@@ -187,12 +184,9 @@ struct TableSet
 
     String toString() const
     {
-        String res;
-        {
-            WriteBufferFromString out(res);
-            write(out);
-        }
-        return res;
+        WriteBufferFromOwnString out;
+        write(out);
+        return out.str();
     }
 
     void write(WriteBuffer & buf) const
@@ -241,12 +235,9 @@ struct LocalTableSet
 
     String toString() const
     {
-        String res;
-        {
-            WriteBufferFromString out(res);
-            write(out);
-        }
-        return res;
+        WriteBufferFromOwnString out;
+        write(out);
+        return out.str();
     }
 
     void write(WriteBuffer & buf) const
diff --git a/dbms/src/Dictionaries/ExternalQueryBuilder.cpp b/dbms/src/Dictionaries/ExternalQueryBuilder.cpp
index 3ee27bae44c..862621327fe 100644
--- a/dbms/src/Dictionaries/ExternalQueryBuilder.cpp
+++ b/dbms/src/Dictionaries/ExternalQueryBuilder.cpp
@@ -49,110 +49,11 @@ void ExternalQueryBuilder::writeQuoted(const std::string & s, WriteBuffer & out)
 
 std::string ExternalQueryBuilder::composeLoadAllQuery() const
 {
-    std::string query;
+    WriteBufferFromOwnString out;
+    writeString("SELECT ", out);
 
+    if (dict_struct.id)
     {
-        WriteBufferFromString out{query};
-        writeString("SELECT ", out);
-
-        if (dict_struct.id)
-        {
-            if (!dict_struct.id.value().expression.empty())
-            {
-                writeParenthesisedString(dict_struct.id.value().expression, out);
-                writeString(" AS ", out);
-            }
-
-            writeQuoted(dict_struct.id.value().name, out);
-
-            if (dict_struct.range_min && dict_struct.range_max)
-            {
-                writeString(", ", out);
-
-                if (!dict_struct.range_min.value().expression.empty())
-                {
-                    writeParenthesisedString(dict_struct.range_min.value().expression, out);
-                    writeString(" AS ", out);
-                }
-
-                writeQuoted(dict_struct.range_min.value().name, out);
-
-                writeString(", ", out);
-
-                if (!dict_struct.range_max.value().expression.empty())
-                {
-                    writeParenthesisedString(dict_struct.range_max.value().expression, out);
-                    writeString(" AS ", out);
-                }
-
-                writeQuoted(dict_struct.range_max.value().name, out);
-            }
-        }
-        else if (dict_struct.key)
-        {
-            auto first = true;
-            for (const auto & key : *dict_struct.key)
-            {
-                if (!first)
-                    writeString(", ", out);
-
-                first = false;
-
-                if (!key.expression.empty())
-                {
-                    writeParenthesisedString(key.expression, out);
-                    writeString(" AS ", out);
-                }
-
-                writeQuoted(key.name, out);
-            }
-        }
-
-        for (const auto & attr : dict_struct.attributes)
-        {
-            writeString(", ", out);
-
-            if (!attr.expression.empty())
-            {
-                writeParenthesisedString(attr.expression, out);
-                writeString(" AS ", out);
-            }
-
-            writeQuoted(attr.name, out);
-        }
-
-        writeString(" FROM ", out);
-        if (!db.empty())
-        {
-            writeQuoted(db, out);
-            writeChar('.', out);
-        }
-        writeQuoted(table, out);
-
-        if (!where.empty())
-        {
-            writeString(" WHERE ", out);
-            writeString(where, out);
-        }
-
-        writeChar(';', out);
-    }
-
-    return query;
-}
-
-
-std::string ExternalQueryBuilder::composeLoadIdsQuery(const std::vector<UInt64> & ids)
-{
-    if (!dict_struct.id)
-        throw Exception{"Simple key required for method", ErrorCodes::UNSUPPORTED_METHOD};
-
-    std::string query;
-
-    {
-        WriteBufferFromString out{query};
-        writeString("SELECT ", out);
-
         if (!dict_struct.id.value().expression.empty())
         {
             writeParenthesisedString(dict_struct.id.value().expression, out);
@@ -161,52 +62,143 @@ std::string ExternalQueryBuilder::composeLoadIdsQuery(const std::vector<UInt64>
 
         writeQuoted(dict_struct.id.value().name, out);
 
-        for (const auto & attr : dict_struct.attributes)
+        if (dict_struct.range_min && dict_struct.range_max)
         {
             writeString(", ", out);
 
-            if (!attr.expression.empty())
+            if (!dict_struct.range_min.value().expression.empty())
             {
-                writeParenthesisedString(attr.expression, out);
+                writeParenthesisedString(dict_struct.range_min.value().expression, out);
                 writeString(" AS ", out);
             }
 
-            writeQuoted(attr.name, out);
+            writeQuoted(dict_struct.range_min.value().name, out);
+
+            writeString(", ", out);
+
+            if (!dict_struct.range_max.value().expression.empty())
+            {
+                writeParenthesisedString(dict_struct.range_max.value().expression, out);
+                writeString(" AS ", out);
+            }
+
+            writeQuoted(dict_struct.range_max.value().name, out);
         }
-
-        writeString(" FROM ", out);
-        if (!db.empty())
-        {
-            writeQuoted(db, out);
-            writeChar('.', out);
-        }
-        writeQuoted(table, out);
-
-        writeString(" WHERE ", out);
-
-        if (!where.empty())
-        {
-            writeString(where, out);
-            writeString(" AND ", out);
-        }
-
-        writeQuoted(dict_struct.id.value().name, out);
-        writeString(" IN (", out);
-
+    }
+    else if (dict_struct.key)
+    {
         auto first = true;
-        for (const auto id : ids)
+        for (const auto & key : *dict_struct.key)
         {
             if (!first)
                 writeString(", ", out);
 
             first = false;
-            writeString(DB::toString(id), out);
-        }
 
-        writeString(");", out);
+            if (!key.expression.empty())
+            {
+                writeParenthesisedString(key.expression, out);
+                writeString(" AS ", out);
+            }
+
+            writeQuoted(key.name, out);
+        }
     }
 
-    return query;
+    for (const auto & attr : dict_struct.attributes)
+    {
+        writeString(", ", out);
+
+        if (!attr.expression.empty())
+        {
+            writeParenthesisedString(attr.expression, out);
+            writeString(" AS ", out);
+        }
+
+        writeQuoted(attr.name, out);
+    }
+
+    writeString(" FROM ", out);
+    if (!db.empty())
+    {
+        writeQuoted(db, out);
+        writeChar('.', out);
+    }
+    writeQuoted(table, out);
+
+    if (!where.empty())
+    {
+        writeString(" WHERE ", out);
+        writeString(where, out);
+    }
+
+    writeChar(';', out);
+
+    return out.str();
+}
+
+
+std::string ExternalQueryBuilder::composeLoadIdsQuery(const std::vector<UInt64> & ids)
+{
+    if (!dict_struct.id)
+        throw Exception{"Simple key required for method", ErrorCodes::UNSUPPORTED_METHOD};
+
+    WriteBufferFromOwnString out;
+    writeString("SELECT ", out);
+
+    if (!dict_struct.id.value().expression.empty())
+    {
+        writeParenthesisedString(dict_struct.id.value().expression, out);
+        writeString(" AS ", out);
+    }
+
+    writeQuoted(dict_struct.id.value().name, out);
+
+    for (const auto & attr : dict_struct.attributes)
+    {
+        writeString(", ", out);
+
+        if (!attr.expression.empty())
+        {
+            writeParenthesisedString(attr.expression, out);
+            writeString(" AS ", out);
+        }
+
+        writeQuoted(attr.name, out);
+    }
+
+    writeString(" FROM ", out);
+    if (!db.empty())
+    {
+        writeQuoted(db, out);
+        writeChar('.', out);
+    }
+    writeQuoted(table, out);
+
+    writeString(" WHERE ", out);
+
+    if (!where.empty())
+    {
+        writeString(where, out);
+        writeString(" AND ", out);
+    }
+
+    writeQuoted(dict_struct.id.value().name, out);
+    writeString(" IN (", out);
+
+    auto first = true;
+    for (const auto id : ids)
+    {
+        if (!first)
+            writeString(", ", out);
+
+        first = false;
+        writeString(DB::toString(id), out);
+    }
+
+    writeString(");", out);
+
+    return out.str();
 }
 
 
@@ -218,85 +210,81 @@ std::string ExternalQueryBuilder::composeLoadKeysQuery(
     if (!dict_struct.key)
         throw Exception{"Composite key required for method", ErrorCodes::UNSUPPORTED_METHOD};
 
-    std::string query;
+    WriteBufferFromOwnString out;
+    writeString("SELECT ", out);
 
+    auto first = true;
+    for (const auto & key_or_attribute : boost::join(*dict_struct.key, dict_struct.attributes))
     {
-        WriteBufferFromString out{query};
-        writeString("SELECT ", out);
+        if (!first)
+            writeString(", ", out);
 
-        auto first = true;
-        for (const auto & key_or_attribute : boost::join(*dict_struct.key, dict_struct.attributes))
+        first = false;
+
+        if (!key_or_attribute.expression.empty())
+        {
+            writeParenthesisedString(key_or_attribute.expression, out);
+            writeString(" AS ", out);
+        }
+
+        writeQuoted(key_or_attribute.name, out);
+    }
+
+    writeString(" FROM ", out);
+    if (!db.empty())
+    {
+        writeQuoted(db, out);
+        writeChar('.', out);
+    }
+    writeQuoted(table, out);
+
+    writeString(" WHERE ", out);
+
+    if (!where.empty())
+    {
+        writeString("(", out);
+        writeString(where, out);
+        writeString(") AND (", out);
+    }
+
+    if (method == AND_OR_CHAIN)
+    {
+        first = true;
+        for (const auto row : requested_rows)
+        {
+            if (!first)
+                writeString(" OR ", out);
+
+            first = false;
+            composeKeyCondition(key_columns, row, out);
+        }
+    }
+    else if (method == IN_WITH_TUPLES)
+    {
+        writeString(composeKeyTupleDefinition(), out);
+        writeString(" IN (", out);
+
+        first = true;
+        for (const auto row : requested_rows)
         {
             if (!first)
                 writeString(", ", out);
 
             first = false;
-
-            if (!key_or_attribute.expression.empty())
-            {
-                writeParenthesisedString(key_or_attribute.expression, out);
-                writeString(" AS ", out);
-            }
-
-            writeQuoted(key_or_attribute.name, out);
+            composeKeyTuple(key_columns, row, out);
         }
 
-        writeString(" FROM ", out);
-        if (!db.empty())
-        {
-            writeQuoted(db, out);
-            writeChar('.', out);
-        }
-        writeQuoted(table, out);
-
-        writeString(" WHERE ", out);
-
-        if (!where.empty())
-        {
-            writeString("(", out);
-            writeString(where, out);
-            writeString(") AND (", out);
-        }
-
-        if (method == AND_OR_CHAIN)
-        {
-            first = true;
-            for (const auto row : requested_rows)
-            {
-                if (!first)
-                    writeString(" OR ", out);
-
-                first = false;
-                composeKeyCondition(key_columns, row, out);
-            }
-        }
-        else if (method == IN_WITH_TUPLES)
-        {
-            writeString(composeKeyTupleDefinition(), out);
-            writeString(" IN (", out);
-
-            first = true;
-            for (const auto row : requested_rows)
-            {
-                if (!first)
-                    writeString(", ", out);
-
-                first = false;
-                composeKeyTuple(key_columns, row, out);
-            }
-
-            writeString(")", out);
-        }
-
-        if (!where.empty())
-        {
-            writeString(")", out);
-        }
-
-        writeString(";", out);
+        writeString(")", out);
     }
 
-    return query;
+    if (!where.empty())
+    {
+        writeString(")", out);
+    }
+
+    writeString(";", out);
+
+    return out.str();
 }
 
 
diff --git a/dbms/src/Dictionaries/MySQLDictionarySource.cpp b/dbms/src/Dictionaries/MySQLDictionarySource.cpp
index 97a323edc96..05290f9b5c6 100644
--- a/dbms/src/Dictionaries/MySQLDictionarySource.cpp
+++ b/dbms/src/Dictionaries/MySQLDictionarySource.cpp
@@ -1,6 +1,8 @@
+#include <IO/WriteBufferFromString.h>
 #include <DataTypes/DataTypeString.h>
 #include <Columns/ColumnString.h>
 #include <Common/config.h>
+
 #if USE_MYSQL
 
 #include <common/logger_useful.h>
@@ -119,12 +121,9 @@ std::string MySQLDictionarySource::quoteForLike(const std::string s)
         tmp.push_back(c);
     }
 
-    std::string res;
-    {
-        WriteBufferFromString out(res);
-        writeQuoted(tmp, out);
-    }
-    return res;
+    WriteBufferFromOwnString out;
+    writeQuoted(tmp, out);
+    return out.str();
 }
 
 LocalDateTime MySQLDictionarySource::getLastModification() const
diff --git a/dbms/src/Functions/Conditional/ArgsInfo.cpp b/dbms/src/Functions/Conditional/ArgsInfo.cpp
index 37f0322322d..780749e756f 100644
--- a/dbms/src/Functions/Conditional/ArgsInfo.cpp
+++ b/dbms/src/Functions/Conditional/ArgsInfo.cpp
@@ -22,8 +22,7 @@ namespace
 
 std::string dumpArgTypes(const DataTypes & args)
 {
-    std::string out;
-    WriteBufferFromString buf{out};
+    WriteBufferFromOwnString buf;
 
     size_t else_arg = elseArg(args);
 
@@ -40,10 +39,7 @@ std::string dumpArgTypes(const DataTypes & args)
 
     writeString("; ", buf);
     writeString(args[else_arg]->getName(), buf);
-
-    buf.next();
-
-    return out;
+    return buf.str();
 }
 
 /// Forward declarations.
diff --git a/dbms/src/Functions/Conditional/getArrayType.cpp b/dbms/src/Functions/Conditional/getArrayType.cpp
index 3e18cc7b5a0..c55430af557 100644
--- a/dbms/src/Functions/Conditional/getArrayType.cpp
+++ b/dbms/src/Functions/Conditional/getArrayType.cpp
@@ -17,8 +17,7 @@ namespace
 
 std::string dumpArgTypes(const DataTypes & args)
 {
-    std::string out;
-    WriteBufferFromString buf{out};
+    WriteBufferFromOwnString buf;
 
     bool is_first = true;
     for (size_t i = 0; i < args.size(); ++i)
@@ -30,10 +29,7 @@ std::string dumpArgTypes(const DataTypes & args)
 
         writeString(args[i]->getName(), buf);
     }
-
-    buf.next();
-
-    return out;
+    return buf.str();
 }
 
 /// Forward declarations.
diff --git a/dbms/src/Functions/FunctionsConversion.cpp b/dbms/src/Functions/FunctionsConversion.cpp
index ff20db6215f..54fb0ccfd5a 100644
--- a/dbms/src/Functions/FunctionsConversion.cpp
+++ b/dbms/src/Functions/FunctionsConversion.cpp
@@ -25,26 +25,23 @@ const DateLUTImpl * extractTimeZoneFromFunctionArguments(Block & block, const Co
 void throwExceptionForIncompletelyParsedValue(
     ReadBuffer & read_buffer, Block & block, const ColumnNumbers & arguments, size_t result)
 {
-    std::string message;
-    {
-        const IDataType & to_type = *block.getByPosition(result).type;
+    const IDataType & to_type = *block.getByPosition(result).type;
 
-        WriteBufferFromString message_buf(message);
-        message_buf << "Cannot parse string " << quote << String(read_buffer.buffer().begin(), read_buffer.buffer().size())
-            << " as " << to_type.getName()
-            << ": syntax error";
+    WriteBufferFromOwnString message_buf;
+    message_buf << "Cannot parse string " << quote << String(read_buffer.buffer().begin(), read_buffer.buffer().size())
+        << " as " << to_type.getName()
+        << ": syntax error";
 
-        if (read_buffer.offset())
-            message_buf << " at position " << read_buffer.offset()
-                << " (parsed just " << quote << String(read_buffer.buffer().begin(), read_buffer.offset()) << ")";
-        else
-            message_buf << " at begin of string";
+    if (read_buffer.offset())
+        message_buf << " at position " << read_buffer.offset()
+            << " (parsed just " << quote << String(read_buffer.buffer().begin(), read_buffer.offset()) << ")";
+    else
+        message_buf << " at begin of string";
 
-        if (to_type.behavesAsNumber())
-            message_buf << ". Note: there are to" << to_type.getName() << "OrZero function, which returns zero instead of throwing exception.";
-    }
+    if (to_type.behavesAsNumber())
+        message_buf << ". Note: there are to" << to_type.getName() << "OrZero function, which returns zero instead of throwing exception.";
 
-    throw Exception(message, ErrorCodes::CANNOT_PARSE_TEXT);
+    throw Exception(message_buf.str(), ErrorCodes::CANNOT_PARSE_TEXT);
 }
 
 
diff --git a/dbms/src/Functions/FunctionsFormatting.h b/dbms/src/Functions/FunctionsFormatting.h
index 147346b3c47..466f1b0ec71 100644
--- a/dbms/src/Functions/FunctionsFormatting.h
+++ b/dbms/src/Functions/FunctionsFormatting.h
@@ -6,7 +6,6 @@
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <IO/WriteBufferFromVector.h>
-#include <IO/WriteBufferFromString.h>
 #include <IO/WriteHelpers.h>
 #include <Common/formatReadable.h>
 #include <Common/typeid_cast.h>
diff --git a/dbms/src/IO/ReadHelpers.cpp b/dbms/src/IO/ReadHelpers.cpp
index f0aed1c356d..507d429acab 100644
--- a/dbms/src/IO/ReadHelpers.cpp
+++ b/dbms/src/IO/ReadHelpers.cpp
@@ -58,18 +58,15 @@ void parseUUID(const UInt8 * src36, std::reverse_iterator<UInt8 *> dst16)
 
 static void __attribute__((__noinline__)) throwAtAssertionFailed(const char * s, ReadBuffer & buf)
 {
-    std::string message;
-    {
-        WriteBufferFromString out(message);
-        out <<  "Cannot parse input: expected " << escape << s;
+    WriteBufferFromOwnString out;
+    out <<  "Cannot parse input: expected " << escape << s;
 
-        if (buf.eof())
-            out << " at end of stream.";
-        else
-            out << " before: " << escape << String(buf.position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf.buffer().end() - buf.position()));
-    }
+    if (buf.eof())
+        out << " at end of stream.";
+    else
+        out << " before: " << escape << String(buf.position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf.buffer().end() - buf.position()));
 
-    throw Exception(message, ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED);
+    throw Exception(out.str(), ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED);
 }
 
 
@@ -753,28 +750,25 @@ void readException(Exception & e, ReadBuffer & buf, const String & additional_me
     readBinary(stack_trace, buf);
     readBinary(has_nested, buf);
 
-    std::string new_message;
-    {
-        WriteBufferFromString out(new_message);
+    WriteBufferFromOwnString out;
 
-        if (!additional_message.empty())
-            out << additional_message << ". ";
+    if (!additional_message.empty())
+        out << additional_message << ". ";
 
-        if (name != "DB::Exception")
-            out << name << ". ";
+    if (name != "DB::Exception")
+        out << name << ". ";
 
-        out << message
-            << ". Stack trace:\n\n" << stack_trace;
-    }
+    out << message
+        << ". Stack trace:\n\n" << stack_trace;
 
     if (has_nested)
     {
         Exception nested;
         readException(nested, buf);
-        e = Exception(new_message, nested, code);
+        e = Exception(out.str(), nested, code);
     }
     else
-        e = Exception(new_message, code);
+        e = Exception(out.str(), code);
 }
 
 void readAndThrowException(ReadBuffer & buf, const String & additional_message)
diff --git a/dbms/src/IO/WriteBufferFromHTTPServerResponse.cpp b/dbms/src/IO/WriteBufferFromHTTPServerResponse.cpp
index 25351ff1a95..105d3787075 100644
--- a/dbms/src/IO/WriteBufferFromHTTPServerResponse.cpp
+++ b/dbms/src/IO/WriteBufferFromHTTPServerResponse.cpp
@@ -144,14 +144,11 @@ void WriteBufferFromHTTPServerResponse::onProgress(const Progress & progress)
         /// Send all common headers before our special progress headers.
         startSendHeaders();
 
-        std::string progress_string;
-        {
-            WriteBufferFromString progress_string_writer(progress_string);
-            accumulated_progress.writeJSON(progress_string_writer);
-        }
+        WriteBufferFromOwnString progress_string_writer;
+        accumulated_progress.writeJSON(progress_string_writer);
 
 #if POCO_CLICKHOUSE_PATCH
-        *response_header_ostr << "X-ClickHouse-Progress: " << progress_string << "\r\n" << std::flush;
+        *response_header_ostr << "X-ClickHouse-Progress: " << progress_string_writer.str() << "\r\n" << std::flush;
 #endif
     }
 }
diff --git a/dbms/src/IO/WriteBufferFromString.h b/dbms/src/IO/WriteBufferFromString.h
index ec50571d2d3..9303a5b817f 100644
--- a/dbms/src/IO/WriteBufferFromString.h
+++ b/dbms/src/IO/WriteBufferFromString.h
@@ -6,6 +6,7 @@
 
 #define WRITE_BUFFER_FROM_STRING_INITIAL_SIZE_IF_EMPTY 32
 
+
 namespace DB
 {
 
@@ -25,6 +26,7 @@ private:
         working_buffer = internal_buffer;
     }
 
+protected:
     void finish()
     {
         s.resize(count());
@@ -47,4 +49,21 @@ public:
     }
 };
 
+
+/// Creates the string by itself and allows to get it.
+class WriteBufferFromOwnString : public WriteBufferFromString
+{
+private:
+    std::string s;
+
+public:
+    WriteBufferFromOwnString() : WriteBufferFromString(s) {}
+
+    std::string & str()
+    {
+        finish();
+        return s;
+    }
+};
+
 }
diff --git a/dbms/src/IO/WriteHelpers.h b/dbms/src/IO/WriteHelpers.h
index 295817302a6..64801cd896a 100644
--- a/dbms/src/IO/WriteHelpers.h
+++ b/dbms/src/IO/WriteHelpers.h
@@ -21,8 +21,8 @@
 #include <IO/WriteBuffer.h>
 #include <IO/WriteIntText.h>
 #include <IO/VarInt.h>
-#include <IO/WriteBufferFromString.h>
 #include <IO/DoubleConverter.h>
+#include <IO/WriteBufferFromString.h>
 
 
 namespace DB
@@ -754,12 +754,9 @@ void writeException(const Exception & e, WriteBuffer & buf);
 template <typename T>
 inline String toString(const T & x)
 {
-    String res;
-    {
-        WriteBufferFromString buf(res);
-        writeText(x, buf);
-    }
-    return res;
+    WriteBufferFromOwnString buf;
+    writeText(x, buf);
+    return buf.str();
 }
 
 }
diff --git a/dbms/src/IO/tests/gtest_cascade_and_memory_write_buffer.cpp b/dbms/src/IO/tests/gtest_cascade_and_memory_write_buffer.cpp
index 6132831800c..67f76450255 100644
--- a/dbms/src/IO/tests/gtest_cascade_and_memory_write_buffer.cpp
+++ b/dbms/src/IO/tests/gtest_cascade_and_memory_write_buffer.cpp
@@ -11,7 +11,6 @@
 #include <IO/WriteBufferFromString.h>
 
 #include <IO/ConcatReadBuffer.h>
-#include <IO/WriteBufferFromString.h>
 #include <IO/copyData.h>
 
 #include <Common/typeid_cast.h>
diff --git a/dbms/src/IO/tests/parse_int_perf.cpp b/dbms/src/IO/tests/parse_int_perf.cpp
index 7f75309def6..c21c08987a5 100644
--- a/dbms/src/IO/tests/parse_int_perf.cpp
+++ b/dbms/src/IO/tests/parse_int_perf.cpp
@@ -1144,11 +1144,6 @@ UInt64 rdtsc()
 
 int main(int argc, char ** argv)
 {
-/*    std::string s(' ', 20);
-    DB::WriteBufferFromString wb(s);
-    DB::Faster::writeIntText(DB::parse<UInt64>(argv[1]), wb);
-    std::cerr << s << std::endl;*/
-
     try
     {
         using T = UInt8;
diff --git a/dbms/src/Interpreters/Compiler.cpp b/dbms/src/Interpreters/Compiler.cpp
index 37e1910a146..8ca50c4de72 100644
--- a/dbms/src/Interpreters/Compiler.cpp
+++ b/dbms/src/Interpreters/Compiler.cpp
@@ -67,14 +67,9 @@ static Compiler::HashedKey getHash(const std::string & key)
 /// Without .so extension.
 static std::string hashedKeyToFileName(Compiler::HashedKey hashed_key)
 {
-    std::string file_name;
-
-    {
-        WriteBufferFromString out(file_name);
-        out << hashed_key.low << '_' << hashed_key.high;
-    }
-
-    return file_name;
+    WriteBufferFromOwnString out;
+    out << hashed_key.low << '_' << hashed_key.high;
+    return out.str();
 }
 
 
diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp
index cb457d3b659..1dc88d8192c 100644
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@@ -64,18 +64,15 @@ struct DDLLogEntry
 
     String toString()
     {
-        String res;
-        {
-            WriteBufferFromString wb(res);
+        WriteBufferFromOwnString wb;
 
-            auto version = CURRENT_VERSION;
-            wb << "version: " << version << "\n";
-            wb << "query: " << escape << query << "\n";
-            wb << "hosts: " << hosts << "\n";
-            wb << "initiator: " << initiator << "\n";
-        }
+        auto version = CURRENT_VERSION;
+        wb << "version: " << version << "\n";
+        wb << "query: " << escape << query << "\n";
+        wb << "hosts: " << hosts << "\n";
+        wb << "initiator: " << initiator << "\n";
 
-        return res;
+        return wb.str();
     }
 
     void parse(const String & data)
diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp
index 6ea6e6600b2..b17ba8dd66f 100644
--- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp
@@ -5,7 +5,6 @@
 
 #include <Common/escapeForFileName.h>
 
-#include <IO/WriteBufferFromString.h>
 #include <IO/WriteBufferFromFile.h>
 #include <IO/WriteHelpers.h>
 
diff --git a/dbms/src/Interpreters/InterserverIOHandler.h b/dbms/src/Interpreters/InterserverIOHandler.h
index 29717951253..d899ded4de7 100644
--- a/dbms/src/Interpreters/InterserverIOHandler.h
+++ b/dbms/src/Interpreters/InterserverIOHandler.h
@@ -45,13 +45,11 @@ public:
     /// Serializes the location.
     std::string toString() const
     {
-        std::string serialized_location;
-        WriteBufferFromString buf(serialized_location);
+        WriteBufferFromOwnString buf;
         writeBinary(name, buf);
         writeBinary(host, buf);
         writeBinary(port, buf);
-        buf.next();
-        return serialized_location;
+        return buf.str();
     }
 
 public:
diff --git a/dbms/src/Parsers/ASTFunction.cpp b/dbms/src/Parsers/ASTFunction.cpp
index 416eccbcb0b..dc3605bd94c 100644
--- a/dbms/src/Parsers/ASTFunction.cpp
+++ b/dbms/src/Parsers/ASTFunction.cpp
@@ -10,8 +10,7 @@ namespace DB
 
 String ASTFunction::getColumnName() const
 {
-    String res;
-    WriteBufferFromString wb(res);
+    WriteBufferFromOwnString wb;
     writeString(name, wb);
 
     if (parameters)
@@ -34,8 +33,7 @@ String ASTFunction::getColumnName() const
         writeString((*it)->getColumnName(), wb);
     }
     writeChar(')', wb);
-
-    return res;
+    return wb.str();
 }
 
 /** Get the text that identifies this element. */
diff --git a/dbms/src/Parsers/IAST.cpp b/dbms/src/Parsers/IAST.cpp
index 90758a1a96c..b951affd648 100644
--- a/dbms/src/Parsers/IAST.cpp
+++ b/dbms/src/Parsers/IAST.cpp
@@ -24,7 +24,7 @@ const char * IAST::hilite_alias      = "\033[0;32m";
 const char * IAST::hilite_none       = "\033[0m";
 
 
-/// Quota the identifier with backquotes, if required.
+/// Quote the identifier with backquotes, if required.
 String backQuoteIfNeed(const String & x)
 {
     String res(x.size(), '\0');
@@ -63,12 +63,9 @@ size_t IAST::checkSize(size_t max_size) const
 
 String IAST::getTreeID() const
 {
-    String res;
-    {
-        WriteBufferFromString out(res);
-        getTreeIDImpl(out);
-    }
-    return res;
+    WriteBufferFromOwnString out;
+    getTreeIDImpl(out);
+    return out.str();
 }
 
 
diff --git a/dbms/src/Parsers/parseQuery.cpp b/dbms/src/Parsers/parseQuery.cpp
index 54a66f2c79c..93bc39f75c3 100644
--- a/dbms/src/Parsers/parseQuery.cpp
+++ b/dbms/src/Parsers/parseQuery.cpp
@@ -7,6 +7,7 @@
 #include <Common/typeid_cast.h>
 #include <Common/UTF8Helpers.h>
 #include <IO/WriteHelpers.h>
+#include <IO/WriteBufferFromString.h>
 #include <IO/Operators.h>
 
 
@@ -156,18 +157,14 @@ std::string getSyntaxErrorMessage(
     bool hilite,
     const std::string & query_description)
 {
-    String message;
+    WriteBufferFromOwnString out;
+    writeCommonErrorMessage(out, begin, end, last_token, query_description);
+    writeQueryAroundTheError(out, begin, end, hilite, &last_token, 1);
 
-    {
-        WriteBufferFromString out(message);
-        writeCommonErrorMessage(out, begin, end, last_token, query_description);
-        writeQueryAroundTheError(out, begin, end, hilite, &last_token, 1);
+    if (!expected.variants.empty())
+        out << "Expected " << expected;
 
-        if (!expected.variants.empty())
-            out << "Expected " << expected;
-    }
-
-    return message;
+    return out.str();
 }
 
 
@@ -178,17 +175,13 @@ std::string getLexicalErrorMessage(
     bool hilite,
     const std::string & query_description)
 {
-    String message;
+    WriteBufferFromOwnString out;
+    writeCommonErrorMessage(out, begin, end, last_token, query_description);
+    writeQueryAroundTheError(out, begin, end, hilite, &last_token, 1);
 
-    {
-        WriteBufferFromString out(message);
-        writeCommonErrorMessage(out, begin, end, last_token, query_description);
-        writeQueryAroundTheError(out, begin, end, hilite, &last_token, 1);
+    out << getErrorTokenDescription(last_token.type);
 
-        out << getErrorTokenDescription(last_token.type);
-    }
-
-    return message;
+    return out.str();
 }
 
 
@@ -199,19 +192,15 @@ std::string getUnmatchedParenthesesErrorMessage(
     bool hilite,
     const std::string & query_description)
 {
-    String message;
+    WriteBufferFromOwnString out;
+    writeCommonErrorMessage(out, begin, end, unmatched_parens[0], query_description);
+    writeQueryAroundTheError(out, begin, end, hilite, unmatched_parens.data(), unmatched_parens.size());
 
-    {
-        WriteBufferFromString out(message);
-        writeCommonErrorMessage(out, begin, end, unmatched_parens[0], query_description);
-        writeQueryAroundTheError(out, begin, end, hilite, unmatched_parens.data(), unmatched_parens.size());
+    out << "Unmatched parentheses: ";
+    for (const Token & paren : unmatched_parens)
+        out << *paren.begin;
 
-        out << "Unmatched parentheses: ";
-        for (const Token & paren : unmatched_parens)
-            out << *paren.begin;
-    }
-
-    return message;
+    return out.str();
 }
 
 }
diff --git a/dbms/src/Server/Client.cpp b/dbms/src/Server/Client.cpp
index 05dd03a202f..8ef9241550d 100644
--- a/dbms/src/Server/Client.cpp
+++ b/dbms/src/Server/Client.cpp
@@ -29,7 +29,6 @@
 #include <IO/ReadBufferFromFileDescriptor.h>
 #include <IO/WriteBufferFromFileDescriptor.h>
 #include <IO/WriteBufferFromFile.h>
-#include <IO/WriteBufferFromString.h>
 #include <IO/ReadBufferFromMemory.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
diff --git a/dbms/src/Server/HTTPHandler.cpp b/dbms/src/Server/HTTPHandler.cpp
index 1e027176015..1babb58a743 100644
--- a/dbms/src/Server/HTTPHandler.cpp
+++ b/dbms/src/Server/HTTPHandler.cpp
@@ -16,7 +16,6 @@
 #include <IO/ConcatReadBuffer.h>
 #include <IO/CompressedReadBuffer.h>
 #include <IO/CompressedWriteBuffer.h>
-#include <IO/WriteBufferFromString.h>
 #include <IO/WriteBufferFromHTTPServerResponse.h>
 #include <IO/WriteBufferFromFile.h>
 #include <IO/WriteHelpers.h>
diff --git a/dbms/src/Server/LocalServer.cpp b/dbms/src/Server/LocalServer.cpp
index a7616d4d865..17bc099921c 100644
--- a/dbms/src/Server/LocalServer.cpp
+++ b/dbms/src/Server/LocalServer.cpp
@@ -15,6 +15,7 @@
 #include <Common/ConfigProcessor.h>
 #include <Common/escapeForFileName.h>
 #include <IO/ReadBufferFromString.h>
+#include <IO/WriteBufferFromString.h>
 #include <IO/WriteBufferFromFileDescriptor.h>
 #include <Parsers/parseQuery.h>
 #include <Parsers/IAST.h>
@@ -352,10 +353,9 @@ catch (const Exception & e)
 
 inline String getQuotedString(const String & s)
 {
-    String res;
-    WriteBufferFromString buf(res);
+    WriteBufferFromOwnString buf;
     writeQuotedString(s, buf);
-    return res;
+    return buf.str();
 }
 
 
diff --git a/dbms/src/Storages/ColumnsDescription.cpp b/dbms/src/Storages/ColumnsDescription.cpp
index c586d13e32f..c0bfabea727 100644
--- a/dbms/src/Storages/ColumnsDescription.cpp
+++ b/dbms/src/Storages/ColumnsDescription.cpp
@@ -23,8 +23,7 @@ namespace ErrorCodes
 template <bool store>
 String ColumnsDescription<store>::toString() const
 {
-    String s;
-    WriteBufferFromString buf{s};
+    WriteBufferFromOwnString buf;
 
     writeString("columns format version: 1\n", buf);
     writeText(columns.size() + materialized.size() + alias.size(), buf);
@@ -58,7 +57,7 @@ String ColumnsDescription<store>::toString() const
     write_columns(materialized);
     write_columns(alias);
 
-    return s;
+    return buf.str();
 }
 
 
diff --git a/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp b/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp
index e62731de771..106fabba78b 100644
--- a/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp
+++ b/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp
@@ -120,25 +120,23 @@ String ActiveDataPartSet::getPartName(DayNum_t left_date, DayNum_t right_date, I
     const auto & date_lut = DateLUT::instance();
 
     /// Directory name for the part has form: `YYYYMMDD_YYYYMMDD_N_N_L`.
-    String res;
-    {
-        unsigned left_date_id = date_lut.toNumYYYYMMDD(left_date);
-        unsigned right_date_id = date_lut.toNumYYYYMMDD(right_date);
 
-        WriteBufferFromString wb(res);
+    unsigned left_date_id = date_lut.toNumYYYYMMDD(left_date);
+    unsigned right_date_id = date_lut.toNumYYYYMMDD(right_date);
 
-        writeIntText(left_date_id, wb);
-        writeChar('_', wb);
-        writeIntText(right_date_id, wb);
-        writeChar('_', wb);
-        writeIntText(left_id, wb);
-        writeChar('_', wb);
-        writeIntText(right_id, wb);
-        writeChar('_', wb);
-        writeIntText(level, wb);
-    }
+    WriteBufferFromOwnString wb;
 
-    return res;
+    writeIntText(left_date_id, wb);
+    writeChar('_', wb);
+    writeIntText(right_date_id, wb);
+    writeChar('_', wb);
+    writeIntText(left_id, wb);
+    writeChar('_', wb);
+    writeIntText(right_id, wb);
+    writeChar('_', wb);
+    writeIntText(level, wb);
+
+    return wb.str();
 }
 
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
index 4bda89fe020..f90004fca5c 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
@@ -9,6 +9,7 @@
 #include <DataStreams/ExpressionBlockInputStream.h>
 #include <DataStreams/copyData.h>
 #include <IO/WriteBufferFromFile.h>
+#include <IO/WriteBufferFromString.h>
 #include <IO/CompressedReadBuffer.h>
 #include <IO/HexWriteBuffer.h>
 #include <DataTypes/DataTypeDate.h>
@@ -879,21 +880,18 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name
 
     if (part && !out_rename_map.empty())
     {
-        std::string message;
+        WriteBufferFromOwnString out;
+        out << "Will rename ";
+        bool first = true;
+        for (const auto & from_to : out_rename_map)
         {
-            WriteBufferFromString out(message);
-            out << "Will rename ";
-            bool first = true;
-            for (const auto & from_to : out_rename_map)
-            {
-                if (!first)
-                    out << ", ";
-                first = false;
-                out << from_to.first << " to " << from_to.second;
-            }
-            out << " in part " << part->name;
+            if (!first)
+                out << ", ";
+            first = false;
+            out << from_to.first << " to " << from_to.second;
         }
-        LOG_DEBUG(log, message);
+        out << " in part " << part->name;
+        LOG_DEBUG(log, out.str());
     }
 }
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
index 4eac19e2a48..9e7585ed8be 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
@@ -256,12 +256,9 @@ void MergeTreeDataPartChecksums::summaryDataChecksum(SipHash & hash) const
 
 String MergeTreeDataPartChecksums::toString() const
 {
-    String s;
-    {
-        WriteBufferFromString out(s);
-        write(out);
-    }
-    return s;
+    WriteBufferFromOwnString out;
+    write(out);
+    return out.str();
 }
 
 MergeTreeDataPartChecksums MergeTreeDataPartChecksums::parse(const String & s)
diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAddress.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAddress.h
index e1447b33f39..325b2dc617b 100644
--- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAddress.h
+++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAddress.h
@@ -46,12 +46,9 @@ struct ReplicatedMergeTreeAddress
 
     String toString() const
     {
-        String res;
-        {
-            WriteBufferFromString out(res);
-            writeText(out);
-        }
-        return res;
+        WriteBufferFromOwnString out;
+        writeText(out);
+        return out.str();
     }
 
     void fromString(const String & str)
diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp
index 94687df0dd2..fb1a5b4ed86 100644
--- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp
+++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp
@@ -3,6 +3,7 @@
 #include <Storages/MergeTree/ReplicatedMergeTreeLogEntry.h>
 #include <IO/Operators.h>
 #include <IO/ReadBufferFromString.h>
+#include <IO/WriteBufferFromString.h>
 
 
 namespace DB
@@ -133,12 +134,9 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in)
 
 String ReplicatedMergeTreeLogEntryData::toString() const
 {
-    String s;
-    {
-        WriteBufferFromString out(s);
-        writeText(out);
-    }
-    return s;
+    WriteBufferFromOwnString out;
+    writeText(out);
+    return out.str();
 }
 
 ReplicatedMergeTreeLogEntry::Ptr ReplicatedMergeTreeLogEntry::parse(const String & s, const zkutil::Stat & stat)
diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h
index d1cdba5ac9d..c44f0b78f84 100644
--- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h
+++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h
@@ -61,12 +61,9 @@ struct ReplicatedMergeTreeQuorumEntry
 
     String toString() const
     {
-        String res;
-        {
-            WriteBufferFromString out(res);
-            writeText(out);
-        }
-        return res;
+        WriteBufferFromOwnString out;
+        writeText(out);
+        return out.str();
     }
 
     void fromString(const String & str)
diff --git a/dbms/src/Storages/MergeTree/ReshardingJob.cpp b/dbms/src/Storages/MergeTree/ReshardingJob.cpp
index ebf570c5696..2e2f718d68b 100644
--- a/dbms/src/Storages/MergeTree/ReshardingJob.cpp
+++ b/dbms/src/Storages/MergeTree/ReshardingJob.cpp
@@ -72,8 +72,7 @@ ReshardingJob::operator bool() const
 
 std::string ReshardingJob::toString() const
 {
-    std::string serialized_job;
-    WriteBufferFromString buf{serialized_job};
+    WriteBufferFromOwnString buf;
 
     writeBinary(database_name, buf);
     writeBinary(table_name, buf);
@@ -89,9 +88,8 @@ std::string ReshardingJob::toString() const
         writeBinary(path.first, buf);
         writeVarUInt(path.second, buf);
     }
-    buf.next();
 
-    return serialized_job;
+    return buf.str();
 }
 
 bool ReshardingJob::isCoordinated() const
diff --git a/dbms/src/Storages/MergeTree/ReshardingWorker.cpp b/dbms/src/Storages/MergeTree/ReshardingWorker.cpp
index df3dbadde90..6ef968975c7 100644
--- a/dbms/src/Storages/MergeTree/ReshardingWorker.cpp
+++ b/dbms/src/Storages/MergeTree/ReshardingWorker.cpp
@@ -801,8 +801,7 @@ void ReshardingWorker::storeTargetShardsInfo()
 
     zookeeper->tryRemove(getLocalJobPath() + "/shards");
 
-    std::string out;
-    WriteBufferFromString buf{out};
+    WriteBufferFromOwnString buf;
 
     size_t entries_count = 0;
     for (const auto & entry : per_shard_data_parts)
@@ -830,9 +829,7 @@ void ReshardingWorker::storeTargetShardsInfo()
         writeBinary(hash, buf);
     }
 
-    buf.next();
-
-    (void) zookeeper->create(getLocalJobPath() + "/shards", out,
+    zookeeper->create(getLocalJobPath() + "/shards", buf.str(),
         zkutil::CreateMode::Persistent);
 }
 
@@ -2111,45 +2108,36 @@ std::string ReshardingWorker::dumpCoordinatorState(const std::string & coordinat
 
     auto current_host = getFQDNOrHostName();
 
-    try
+    WriteBufferFromOwnString buf;
+
+    writeString("Coordinator dump: ", buf);
+    writeString("ID: {", buf);
+    writeString(coordinator_id + "}; ", buf);
+
+    auto zookeeper = context.getZooKeeper();
+
+    Status status(zookeeper->get(getCoordinatorPath(coordinator_id) + "/status"));
+
+    if (status.getCode() != STATUS_OK)
     {
-        WriteBufferFromString buf{out};
+        writeString("Global status: {", buf);
+        writeString(status.getMessage() + "}; ", buf);
+    }
 
-        writeString("Coordinator dump: ", buf);
-        writeString("ID: {", buf);
-        writeString(coordinator_id + "}; ", buf);
-
-        auto zookeeper = context.getZooKeeper();
-
-        Status status(zookeeper->get(getCoordinatorPath(coordinator_id) + "/status"));
+    auto hosts = zookeeper->getChildren(getCoordinatorPath(coordinator_id) + "/status");
+    for (const auto & host : hosts)
+    {
+        Status status(zookeeper->get(getCoordinatorPath(coordinator_id) + "/status/" + host));
 
         if (status.getCode() != STATUS_OK)
         {
-            writeString("Global status: {", buf);
+            writeString("NODE ", buf);
+            writeString(((host == current_host) ? "localhost" : host) + ": {", buf);
             writeString(status.getMessage() + "}; ", buf);
         }
-
-        auto hosts = zookeeper->getChildren(getCoordinatorPath(coordinator_id) + "/status");
-        for (const auto & host : hosts)
-        {
-            Status status(zookeeper->get(getCoordinatorPath(coordinator_id) + "/status/" + host));
-
-            if (status.getCode() != STATUS_OK)
-            {
-                writeString("NODE ", buf);
-                writeString(((host == current_host) ? "localhost" : host) + ": {", buf);
-                writeString(status.getMessage() + "}; ", buf);
-            }
-        }
-
-        buf.next();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
     }
 
-    return out;
+    return buf.str();
 }
 
 /// Compute the hash function from the checksum files of a given part.
@@ -2716,8 +2704,7 @@ void ReshardingWorker::LogRecord::writeBack()
 
 std::string ReshardingWorker::LogRecord::toString()
 {
-    std::string out;
-    WriteBufferFromString buf{out};
+    WriteBufferFromOwnString buf;
 
     writeVarUInt(static_cast<unsigned int>(operation), buf);
     writeVarUInt(static_cast<unsigned int>(state), buf);
@@ -2732,9 +2719,7 @@ std::string ReshardingWorker::LogRecord::toString()
         writeBinary(entry.second, buf);
     }
 
-    buf.next();
-
-    return out;
+    return buf.str();
 }
 
 }
diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
index 5aa1866c272..c320db083cd 100644
--- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
@@ -395,10 +395,9 @@ namespace
 
         String toString() const
         {
-            String res;
-            WriteBufferFromString out(res);
+            WriteBufferFromOwnString out;
             write(out);
-            return res;
+            return out.str();
         }
 
         void check(ReadBuffer & in) const

From f8a761fbd9b728567a977838ba217a6e6d4ea79c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 1 Aug 2017 00:44:57 +0300
Subject: [PATCH 015/281] Fixed error [#CLICKHOUSE-3191].

---
 dbms/src/Common/tests/small_table.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Common/tests/small_table.cpp b/dbms/src/Common/tests/small_table.cpp
index d11570f6d0f..825047bf53f 100644
--- a/dbms/src/Common/tests/small_table.cpp
+++ b/dbms/src/Common/tests/small_table.cpp
@@ -47,7 +47,7 @@ int main(int argc, char ** argv)
         DB::WriteBufferFromOwnString wb;
         cont.writeText(wb);
 
-        std::cerr << "dump: " << wb.str(); << std::endl;
+        std::cerr << "dump: " << wb.str() << std::endl;
     }
 
     {

From 844990db2784637bf5d6f42245ad3c28833145d8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 1 Aug 2017 00:50:21 +0300
Subject: [PATCH 016/281] Fixed build with gcc-7.1 [#CLICKHOUSE-2].

---
 dbms/src/Common/tests/lru_cache.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dbms/src/Common/tests/lru_cache.cpp b/dbms/src/Common/tests/lru_cache.cpp
index a3d58c4fa6b..dea6bb6e14a 100644
--- a/dbms/src/Common/tests/lru_cache.cpp
+++ b/dbms/src/Common/tests/lru_cache.cpp
@@ -5,6 +5,8 @@
 #include <string>
 #include <thread>
 #include <chrono>
+#include <functional>
+
 
 namespace
 {

From a6cb383c4f53aa342e74c7e28e09be5ced863f73 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 1 Aug 2017 04:25:03 +0300
Subject: [PATCH 017/281] Fixed build with gcc-7.1 [#CLICKHOUSE-2].

---
 dbms/src/Core/NamesAndTypes.h               | 4 ++++
 dbms/src/DataStreams/tests/glue_streams.cpp | 4 ++--
 dbms/src/Functions/FunctionsMath.h          | 8 ++++----
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/dbms/src/Core/NamesAndTypes.h b/dbms/src/Core/NamesAndTypes.h
index 86c0b17b66c..e8b9bf258ad 100644
--- a/dbms/src/Core/NamesAndTypes.h
+++ b/dbms/src/Core/NamesAndTypes.h
@@ -38,6 +38,10 @@ class NamesAndTypesList : public std::list<NameAndTypePair>
 public:
     using std::list<NameAndTypePair>::list;
 
+    /// Without this constructor, gcc 7.1.0 get confused.
+    template <typename Iterator>
+    NamesAndTypesList(Iterator begin, Iterator end) : std::list<NameAndTypePair>(begin, end) {}
+
     void readText(ReadBuffer & buf);
     void writeText(WriteBuffer & buf) const;
 
diff --git a/dbms/src/DataStreams/tests/glue_streams.cpp b/dbms/src/DataStreams/tests/glue_streams.cpp
index e2ebe2c1e37..78205b9c0ce 100644
--- a/dbms/src/DataStreams/tests/glue_streams.cpp
+++ b/dbms/src/DataStreams/tests/glue_streams.cpp
@@ -52,13 +52,13 @@ try
             " GROUP BY SearchPhrase"
             " ORDER BY count() DESC"
             " LIMIT 10",
-        context, QueryProcessingStage::Complete);
+        context, false, QueryProcessingStage::Complete);
 
     BlockIO io2 = executeQuery(
         "SELECT count()"
             " FROM hits"
             " WHERE SearchPhrase != ''",
-        context, QueryProcessingStage::Complete);
+        context, false, QueryProcessingStage::Complete);
 
     WriteBufferFromFileDescriptor wb(STDOUT_FILENO);
 
diff --git a/dbms/src/Functions/FunctionsMath.h b/dbms/src/Functions/FunctionsMath.h
index 7fda2a03fa4..fea7266e379 100644
--- a/dbms/src/Functions/FunctionsMath.h
+++ b/dbms/src/Functions/FunctionsMath.h
@@ -165,7 +165,7 @@ private:
 };
 
 
-template <typename Name, Float64(&Function)(Float64)>
+template <typename Name, Float64(Function)(Float64)>
 struct UnaryFunctionPlain
 {
     static constexpr auto name = Name::name;
@@ -180,7 +180,7 @@ struct UnaryFunctionPlain
 
 #if USE_VECTORCLASS
 
-template <typename Name, Vec2d(&Function)(const Vec2d &)>
+template <typename Name, Vec2d(Function)(const Vec2d &)>
 struct UnaryFunctionVectorized
 {
     static constexpr auto name = Name::name;
@@ -437,7 +437,7 @@ private:
 };
 
 
-template <typename Name, Float64(&Function)(Float64, Float64)>
+template <typename Name, Float64(Function)(Float64, Float64)>
 struct BinaryFunctionPlain
 {
     static constexpr auto name = Name::name;
@@ -452,7 +452,7 @@ struct BinaryFunctionPlain
 
 #if USE_VECTORCLASS
 
-template <typename Name, Vec2d(&Function)(const Vec2d &, const Vec2d &)>
+template <typename Name, Vec2d(Function)(const Vec2d &, const Vec2d &)>
 struct BinaryFunctionVectorized
 {
     static constexpr auto name = Name::name;

From 9a546b54142fc7b91fb3787cddfe1895f15933f4 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 1 Aug 2017 04:58:18 +0300
Subject: [PATCH 018/281] Fixed build with gcc 6 [#CLICKHOUSE-2].

---
 dbms/src/Core/NamesAndTypes.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/dbms/src/Core/NamesAndTypes.h b/dbms/src/Core/NamesAndTypes.h
index e8b9bf258ad..32387a889b1 100644
--- a/dbms/src/Core/NamesAndTypes.h
+++ b/dbms/src/Core/NamesAndTypes.h
@@ -4,6 +4,7 @@
 #include <list>
 #include <string>
 #include <set>
+#include <initializer_list>
 
 #include <DataTypes/IDataType.h>
 #include <Core/Names.h>
@@ -36,12 +37,14 @@ using NamesAndTypes = std::vector<NameAndTypePair>;
 class NamesAndTypesList : public std::list<NameAndTypePair>
 {
 public:
-    using std::list<NameAndTypePair>::list;
+    NamesAndTypesList() {}
+
+    NamesAndTypesList(std::initializer_list<NameAndTypePair> init) : std::list<NameAndTypePair>(init) {}
 
-    /// Without this constructor, gcc 7.1.0 get confused.
     template <typename Iterator>
     NamesAndTypesList(Iterator begin, Iterator end) : std::list<NameAndTypePair>(begin, end) {}
 
+
     void readText(ReadBuffer & buf);
     void writeText(WriteBuffer & buf) const;
 

From 9f799820d9e62c73792cf4907d0b6a7640e3d752 Mon Sep 17 00:00:00 2001
From: Pawel Rog <prog88@gmail.com>
Date: Tue, 1 Aug 2017 10:12:15 +0200
Subject: [PATCH 019/281] Applied changes requested by Alexey

---
 dbms/src/IO/CompressedReadBufferBase.cpp            | 9 +++------
 dbms/src/Storages/MergeTree/MergeTreeData.cpp       | 2 +-
 dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp | 2 +-
 3 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/dbms/src/IO/CompressedReadBufferBase.cpp b/dbms/src/IO/CompressedReadBufferBase.cpp
index abcfc35702b..55c5b94f13d 100644
--- a/dbms/src/IO/CompressedReadBufferBase.cpp
+++ b/dbms/src/IO/CompressedReadBufferBase.cpp
@@ -54,12 +54,9 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
     size_t & size_compressed = size_compressed_without_checksum;
 
 
-    if (method == static_cast<UInt8>(CompressionMethodByte::LZ4) || method == static_cast<UInt8>(CompressionMethodByte::ZSTD))
-    {
-        size_compressed = unalignedLoad<UInt32>(&own_compressed_buffer[1]);
-        size_decompressed = unalignedLoad<UInt32>(&own_compressed_buffer[5]);
-    }
-    else if (method == static_cast<UInt8>(CompressionMethodByte::NONE))
+    if (method == static_cast<UInt8>(CompressionMethodByte::LZ4) ||
+            method == static_cast<UInt8>(CompressionMethodByte::ZSTD) ||
+            method == static_cast<UInt8>(CompressionMethodByte::NONE))
     {
         size_compressed = unalignedLoad<UInt32>(&own_compressed_buffer[1]);
         size_decompressed = unalignedLoad<UInt32>(&own_compressed_buffer[5]);
diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
index 0c22298dd1d..70e763c9dd3 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
@@ -1032,7 +1032,7 @@ MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart(
 
         auto compression_method = this->context.chooseCompressionMethod(
             this->getTotalActiveSizeInBytes(),
-            static_cast<double>(this->getTotalCompressedSize()) / this->getTotalActiveSizeInBytes());
+            static_cast<double>(this->getTotalActiveSizeInBytes()) / this->getTotalCompressedSize());
         ExpressionBlockInputStream in(part_in, expression);
         MergedColumnOnlyOutputStream out(*this, full_path + part->name + '/', true, compression_method, false);
         in.readPrefix();
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index 2b3d869967e..67de3bcf30e 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -148,7 +148,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithDa
 
     auto compression_method = data.context.chooseCompressionMethod(
         data.getTotalActiveSizeInBytes(),
-        static_cast<double>(data.getTotalCompressedSize()) / data.getTotalActiveSizeInBytes());
+        static_cast<double>(data.getTotalActiveSizeInBytes()) / data.getTotalCompressedSize());
 
     NamesAndTypesList columns = data.getColumnsList().filter(block.getColumnsList().getNames());
     MergedBlockOutputStream out(data, new_data_part->getFullPath(), columns, compression_method);

From db984d92a8eeb4ca862d037d96462159d02e5461 Mon Sep 17 00:00:00 2001
From: proller <proller@github.com>
Date: Tue, 1 Aug 2017 16:31:38 +0300
Subject: [PATCH 020/281] Fix crash on start

---
 dbms/src/IO/WriteBufferFromString.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/dbms/src/IO/WriteBufferFromString.h b/dbms/src/IO/WriteBufferFromString.h
index 9303a5b817f..4f9748c1740 100644
--- a/dbms/src/IO/WriteBufferFromString.h
+++ b/dbms/src/IO/WriteBufferFromString.h
@@ -50,19 +50,22 @@ public:
 };
 
 
+class StringHolder {
+protected:
+    std::string ss;
+};
+
 /// Creates the string by itself and allows to get it.
-class WriteBufferFromOwnString : public WriteBufferFromString
+class WriteBufferFromOwnString : public StringHolder, public WriteBufferFromString
 {
-private:
-    std::string s;
 
 public:
-    WriteBufferFromOwnString() : WriteBufferFromString(s) {}
+    WriteBufferFromOwnString() : WriteBufferFromString(ss) {}
 
     std::string & str()
     {
         finish();
-        return s;
+        return ss;
     }
 };
 

From fd5ac57b8e45387945e8c38ce2dba6fb62422a0c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 1 Aug 2017 17:33:38 +0300
Subject: [PATCH 021/281] Addition to prev. revision [#CLICKHOUSE-2].

---
 dbms/src/IO/WriteBufferFromString.h | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/dbms/src/IO/WriteBufferFromString.h b/dbms/src/IO/WriteBufferFromString.h
index 4f9748c1740..9545562669f 100644
--- a/dbms/src/IO/WriteBufferFromString.h
+++ b/dbms/src/IO/WriteBufferFromString.h
@@ -50,22 +50,27 @@ public:
 };
 
 
-class StringHolder {
-protected:
-    std::string ss;
-};
+namespace detail
+{
+    /// For correct order of initialization.
+    class StringHolder
+    {
+    protected:
+        std::string value;
+    };
+}
 
 /// Creates the string by itself and allows to get it.
-class WriteBufferFromOwnString : public StringHolder, public WriteBufferFromString
+class WriteBufferFromOwnString : public detail::StringHolder, public WriteBufferFromString
 {
 
 public:
-    WriteBufferFromOwnString() : WriteBufferFromString(ss) {}
+    WriteBufferFromOwnString() : WriteBufferFromString(value) {}
 
     std::string & str()
     {
         finish();
-        return ss;
+        return value;
     }
 };
 

From 34ac4fa3b83e4772319509559ded03c1c19977f6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 1 Aug 2017 17:34:06 +0300
Subject: [PATCH 022/281] Added metric with revision number for better
 monitoring of deployment [#CLICKHOUSE-2].

---
 dbms/src/Common/CurrentMetrics.cpp |  1 +
 dbms/src/Server/Server.cpp         | 12 +++++++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Common/CurrentMetrics.cpp b/dbms/src/Common/CurrentMetrics.cpp
index 5f23ded3e1b..f664acd7ca3 100644
--- a/dbms/src/Common/CurrentMetrics.cpp
+++ b/dbms/src/Common/CurrentMetrics.cpp
@@ -34,6 +34,7 @@
     M(StorageBufferRows) \
     M(StorageBufferBytes) \
     M(DictCacheRequests) \
+    M(Revision) \
 
 
 namespace CurrentMetrics
diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp
index 61a45c535ec..ebce924f76d 100644
--- a/dbms/src/Server/Server.cpp
+++ b/dbms/src/Server/Server.cpp
@@ -18,6 +18,8 @@
 #include <Common/getMultipleKeysFromConfig.h>
 #include <common/getMemoryAmount.h>
 #include <Common/getNumberOfPhysicalCPUCores.h>
+#include <Common/CurrentMetrics.h>
+#include <Common/ClickHouseRevision.h>
 #include <IO/HTTPCommon.h>
 #include <Interpreters/AsynchronousMetrics.h>
 #include <Interpreters/ProcessList.h>
@@ -46,9 +48,15 @@
 #include <TableFunctions/registerTableFunctions.h>
 
 
+namespace CurrentMetrics
+{
+    extern const Metric Revision;
+}
+
 namespace DB
 {
-namespace ErrorCodes
+
+    namespace ErrorCodes
 {
     extern const int NO_ELEMENTS_IN_CONFIG;
     extern const int SUPPORT_IS_DISABLED;
@@ -221,6 +229,8 @@ int Server::main(const std::vector<std::string> & args)
     registerAggregateFunctions();
     registerTableFunctions();
 
+    CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::get());
+
     /** Context contains all that query execution is dependent:
       *  settings, available functions, data types, aggregate functions, databases...
       */

From d6a2056851bc89dfbf273537b58bce9b37c01a10 Mon Sep 17 00:00:00 2001
From: Pawel Rog <prog88@gmail.com>
Date: Tue, 1 Aug 2017 16:48:24 +0200
Subject: [PATCH 023/281] Applied changes requested by Alexey

---
 dbms/src/Storages/MergeTree/MergeTreeData.cpp       | 4 ++--
 dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
index 70e763c9dd3..c9f1bac787b 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
@@ -1031,8 +1031,8 @@ MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart(
             false, nullptr, "", false, 0, DBMS_DEFAULT_BUFFER_SIZE, false);
 
         auto compression_method = this->context.chooseCompressionMethod(
-            this->getTotalActiveSizeInBytes(),
-            static_cast<double>(this->getTotalActiveSizeInBytes()) / this->getTotalCompressedSize());
+            part->size_in_bytes,
+            static_cast<double>(part->size_in_bytes) / this->getTotalActiveSizeInBytes());
         ExpressionBlockInputStream in(part_in, expression);
         MergedColumnOnlyOutputStream out(*this, full_path + part->name + '/', true, compression_method, false);
         in.readPrefix();
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp
index 82076becfbe..e7b9ad90c5f 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp
@@ -620,8 +620,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMerger::mergePartsToTemporaryPart
         merged_stream = std::make_shared<DistinctSortedBlockInputStream>(merged_stream, Limits(), 0 /*limit_hint*/, Names());
 
     auto compression_method = data.context.chooseCompressionMethod(
-        merge_entry->total_size_bytes_compressed,
-        static_cast<double>(merge_entry->total_size_bytes_compressed) / data.getTotalActiveSizeInBytes());
+            merge_entry->total_size_bytes_compressed,
+            static_cast<double> (merge_entry->total_size_bytes_compressed) / data.getTotalActiveSizeInBytes());
 
     MergedBlockOutputStream to{
         data, new_part_tmp_path, merging_columns, compression_method, merged_column_to_size, aio_threshold};

From 8f6c2d4e476fa87d2a21e16ea74a88d0c5ed5c99 Mon Sep 17 00:00:00 2001
From: proller <proller@github.com>
Date: Tue, 1 Aug 2017 16:26:19 +0300
Subject: [PATCH 024/281] Fix .h compile

---
 .../src/AggregateFunctions/AggregateFunctionQuantileTiming.h | 5 +++++
 dbms/src/Functions/FunctionsCoding.h                         | 1 +
 2 files changed, 6 insertions(+)

diff --git a/dbms/src/AggregateFunctions/AggregateFunctionQuantileTiming.h b/dbms/src/AggregateFunctions/AggregateFunctionQuantileTiming.h
index bcf9273b2c9..9f4bb9eeda2 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionQuantileTiming.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionQuantileTiming.h
@@ -21,6 +21,11 @@
 #include <ext/range.h>
 
 
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
 namespace DB
 {
 
diff --git a/dbms/src/Functions/FunctionsCoding.h b/dbms/src/Functions/FunctionsCoding.h
index 6ec2a1b269d..33e335219e3 100644
--- a/dbms/src/Functions/FunctionsCoding.h
+++ b/dbms/src/Functions/FunctionsCoding.h
@@ -32,6 +32,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int TOO_LESS_ARGUMENTS_FOR_FUNCTION;
+    extern const int LOGICAL_ERROR;
 }
 
 

From aaff26336df1cedfd4d21f76121e47a27b2344f0 Mon Sep 17 00:00:00 2001
From: felixoid <felixoid@yandex-team.ru>
Date: Tue, 1 Aug 2017 18:55:40 +0300
Subject: [PATCH 025/281] remove login mode from su in debian init-script,
 CLICKHOUSE-3195

---
 debian/clickhouse-server.init | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/debian/clickhouse-server.init b/debian/clickhouse-server.init
index 7d867db8b46..1eaa433a6c2 100755
--- a/debian/clickhouse-server.init
+++ b/debian/clickhouse-server.init
@@ -88,7 +88,7 @@ die()
 check_config()
 {
     if [ -x "$BINDIR/$GENERIC_PROGRAM" ]; then
-        su -l $CLICKHOUSE_USER -s $SHELL -c "$BINDIR/$GENERIC_PROGRAM --extract-from-config --config-file=\"$CLICKHOUSE_CONFIG\" --key=path" >/dev/null || die "Configuration file ${CLICKHOUSE_CONFIG} doesn't parse successfully. Won't restart server. You may use forcerestart if you are sure.";
+        su -s $SHELL ${CLICKHOUSE_USER} -c "$BINDIR/$GENERIC_PROGRAM --extract-from-config --config-file=\"$CLICKHOUSE_CONFIG\" --key=path" >/dev/null || die "Configuration file ${CLICKHOUSE_CONFIG} doesn't parse successfully. Won't restart server. You may use forcerestart if you are sure.";
     fi
 }
 
@@ -96,7 +96,7 @@ check_config()
 initdb()
 {
     if [ -x "$BINDIR/$GENERIC_PROGRAM" ]; then
-        CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -l $CLICKHOUSE_USER -s $SHELL -c "$BINDIR/$GENERIC_PROGRAM --extract-from-config --config-file=\"$CLICKHOUSE_CONFIG\" --key=path")
+        CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$BINDIR/$GENERIC_PROGRAM --extract-from-config --config-file=\"$CLICKHOUSE_CONFIG\" --key=path")
         if [ "(" "$?" -ne "0" ")" -o "(" -z "${CLICKHOUSE_DATADIR_FROM_CONFIG}" ")" ]; then
             die "Cannot obtain value of path from config file: ${CLICKHOUSE_CONFIG}";
         fi
@@ -167,7 +167,7 @@ start()
             rm -f $CLICKHOUSE_PIDFILE
             # Lock should not be held while running child process, so we release the lock. Note: obviously, there is race condition.
             # But clickhouse-server has protection from simultaneous runs with same data directory.
-            su -l $CLICKHOUSE_USER -s $SHELL -c "flock -u 9; exec -a \"$PROGRAM\" \"$BINDIR/$PROGRAM\" --daemon --pid-file=\"$CLICKHOUSE_PIDFILE\" --config-file=\"$CLICKHOUSE_CONFIG\""
+            su -s $SHELL ${CLICKHOUSE_USER} -c "flock -u 9; exec -a \"$PROGRAM\" \"$BINDIR/$PROGRAM\" --daemon --pid-file=\"$CLICKHOUSE_PIDFILE\" --config-file=\"$CLICKHOUSE_CONFIG\""
             EXIT_STATUS=$?
             if [ $EXIT_STATUS -ne 0 ]; then
                 break

From fd9b8c6fbe661205c69dfe010569d65bf7b2c796 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 1 Aug 2017 20:02:41 +0300
Subject: [PATCH 026/281] Fixed error [#METR-25976].

---
 dbms/src/DataStreams/CSVRowInputStream.cpp    |  6 ++---
 .../TabSeparatedRowInputStream.cpp            | 24 ++++++++++---------
 dbms/src/Parsers/ParserInsertQuery.cpp        |  5 +++-
 3 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/dbms/src/DataStreams/CSVRowInputStream.cpp b/dbms/src/DataStreams/CSVRowInputStream.cpp
index ae4397fb3b7..7521abd690e 100644
--- a/dbms/src/DataStreams/CSVRowInputStream.cpp
+++ b/dbms/src/DataStreams/CSVRowInputStream.cpp
@@ -115,13 +115,13 @@ void CSVRowInputStream::readPrefix()
 
 bool CSVRowInputStream::read(Block & block)
 {
+    if (istr.eof())
+        return false;
+
     updateDiagnosticInfo();
 
     size_t size = data_types.size();
 
-    if (istr.eof())
-        return false;
-
     for (size_t i = 0; i < size; ++i)
     {
         skipWhitespacesAndTabs(istr);
diff --git a/dbms/src/DataStreams/TabSeparatedRowInputStream.cpp b/dbms/src/DataStreams/TabSeparatedRowInputStream.cpp
index fdbed86c550..35bc8ac89a1 100644
--- a/dbms/src/DataStreams/TabSeparatedRowInputStream.cpp
+++ b/dbms/src/DataStreams/TabSeparatedRowInputStream.cpp
@@ -60,7 +60,7 @@ void TabSeparatedRowInputStream::readPrefix()
 }
 
 
- /** Check for a common error case - usage of Windows line feed.
+/** Check for a common error case - usage of Windows line feed.
   */
 static void checkForCarriageReturn(ReadBuffer & istr)
 {
@@ -75,13 +75,13 @@ static void checkForCarriageReturn(ReadBuffer & istr)
 
 bool TabSeparatedRowInputStream::read(Block & block)
 {
+    if (istr.eof())
+        return false;
+
     updateDiagnosticInfo();
 
     size_t size = data_types.size();
 
-    if (istr.eof())
-        return false;
-
     for (size_t i = 0; i < size; ++i)
     {
         data_types[i].get()->deserializeTextEscaped(*block.getByPosition(i).column.get(), istr);
@@ -307,15 +307,17 @@ void TabSeparatedRowInputStream::syncAfterError()
 {
     skipToUnescapedNextLineOrEOF(istr);
 }
+
+
 void TabSeparatedRowInputStream::updateDiagnosticInfo()
-    {
-        ++row_num;
+{
+    ++row_num;
 
-        bytes_read_at_start_of_buffer_on_prev_row = bytes_read_at_start_of_buffer_on_current_row;
-        bytes_read_at_start_of_buffer_on_current_row = istr.count() - istr.offset();
+    bytes_read_at_start_of_buffer_on_prev_row = bytes_read_at_start_of_buffer_on_current_row;
+    bytes_read_at_start_of_buffer_on_current_row = istr.count() - istr.offset();
 
-        pos_of_prev_row = pos_of_current_row;
-        pos_of_current_row = istr.position();
-    }
+    pos_of_prev_row = pos_of_current_row;
+    pos_of_current_row = istr.position();
+}
 
 }
diff --git a/dbms/src/Parsers/ParserInsertQuery.cpp b/dbms/src/Parsers/ParserInsertQuery.cpp
index 9222c00116f..1bb43d47f3d 100644
--- a/dbms/src/Parsers/ParserInsertQuery.cpp
+++ b/dbms/src/Parsers/ParserInsertQuery.cpp
@@ -75,6 +75,8 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     }
     else if (s_format.ignore(pos, expected))
     {
+        auto name_pos = pos;
+
         if (!name_p.parse(pos, format, expected))
             return false;
 
@@ -88,7 +90,8 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
                 "Note that there is no ';' in first line.", ErrorCodes::SYNTAX_ERROR);
 
         /// Data starts after the first newline, if there is one, or after all the whitespace characters, otherwise.
-        data = pos->begin;
+
+        data = name_pos->end;
 
         while (data < end && (*data == ' ' || *data == '\t' || *data == '\f'))
             ++data;

From b5913042652d03644a081b06ac7eabd188bda311 Mon Sep 17 00:00:00 2001
From: robot-metrika-test <robot-metrika-test@yandex-team.ru>
Date: Tue, 1 Aug 2017 20:14:45 +0300
Subject: [PATCH 027/281] Auto version update to [54266]

---
 dbms/cmake/version.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake
index f7e5f0a1e65..07461982e61 100644
--- a/dbms/cmake/version.cmake
+++ b/dbms/cmake/version.cmake
@@ -1,6 +1,6 @@
 # This strings autochanged from release_lib.sh:
-set(VERSION_DESCRIBE v1.1.54265-testing)
-set(VERSION_REVISION 54265)
+set(VERSION_DESCRIBE v1.1.54266-testing)
+set(VERSION_REVISION 54266)
 # end of autochange
 
 set (VERSION_MAJOR 1)

From 35ff7ab9f2943e215e9f7d63767eb9a4d8dc5221 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 1 Aug 2017 20:26:36 +0300
Subject: [PATCH 028/281] Added test [#METR-25976].

---
 .../0_stateless/00485_http_insert_format.reference |  6 ++++++
 .../0_stateless/00485_http_insert_format.sh        | 14 ++++++++++++++
 2 files changed, 20 insertions(+)
 create mode 100644 dbms/tests/queries/0_stateless/00485_http_insert_format.reference
 create mode 100755 dbms/tests/queries/0_stateless/00485_http_insert_format.sh

diff --git a/dbms/tests/queries/0_stateless/00485_http_insert_format.reference b/dbms/tests/queries/0_stateless/00485_http_insert_format.reference
new file mode 100644
index 00000000000..08737b88a9d
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00485_http_insert_format.reference
@@ -0,0 +1,6 @@
+{"s":"","x":"ABC"}
+{"s":"","x":"DEF"}
+{"s":"","x":"JKL"}
+{"s":"","x":"MNO"}
+{"s":"hello","x":"GHI"}
+{"s":"hello","x":"PQR"}
diff --git a/dbms/tests/queries/0_stateless/00485_http_insert_format.sh b/dbms/tests/queries/0_stateless/00485_http_insert_format.sh
new file mode 100755
index 00000000000..1ff3702e435
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00485_http_insert_format.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+
+clickhouse-client --query="DROP TABLE IF EXISTS test.format"
+clickhouse-client --query="CREATE TABLE test.format (s String, x FixedString(3)) ENGINE = Memory"
+
+echo -ne '\tABC\n' | curl -sS "http://localhost:8123/?query=INSERT+INTO+test.format+FORMAT+TabSeparated" --data-binary @-
+echo -ne 'INSERT INTO test.format FORMAT TabSeparated\n\tDEF\n' | curl -sS "http://localhost:8123/" --data-binary @-
+echo -ne 'INSERT INTO test.format FORMAT TabSeparated hello\tGHI\n' | curl -sS "http://localhost:8123/" --data-binary @-
+echo -ne 'INSERT INTO test.format FORMAT TabSeparated\r\n\tJKL\n' | curl -sS "http://localhost:8123/" --data-binary @-
+echo -ne 'INSERT INTO test.format FORMAT TabSeparated   \t\r\n\tMNO\n' | curl -sS "http://localhost:8123/" --data-binary @-
+echo -ne 'INSERT INTO test.format FORMAT TabSeparated\t\t\thello\tPQR\n' | curl -sS "http://localhost:8123/" --data-binary @-
+
+clickhouse-client --query="SELECT * FROM test.format ORDER BY s, x FORMAT JSONEachRow"
+clickhouse-client --query="DROP TABLE test.format"

From 2ae6f1ebdb9007c8664a0bc25f04405e17fe7011 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Tue, 1 Aug 2017 23:07:16 +0300
Subject: [PATCH 029/281] Update MergeTreeDataWriter.cpp

---
 dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index 67de3bcf30e..dff3d22f168 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -146,9 +146,9 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithDa
             ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterBlocksAlreadySorted);
     }
 
-    auto compression_method = data.context.chooseCompressionMethod(
-        data.getTotalActiveSizeInBytes(),
-        static_cast<double>(data.getTotalActiveSizeInBytes()) / data.getTotalCompressedSize());
+    /// This effectively chooses minimal compression method:
+    ///  either default lz4 or compression method with zero thresholds on absolute and relative part size.
+    auto compression_method = data.context.chooseCompressionMethod(0, 0);
 
     NamesAndTypesList columns = data.getColumnsList().filter(block.getColumnsList().getNames());
     MergedBlockOutputStream out(data, new_data_part->getFullPath(), columns, compression_method);

From afc4b1bef28cfeb48334a70463b4f7349c0cfd55 Mon Sep 17 00:00:00 2001
From: robot-metrika-test <robot-metrika-test@yandex-team.ru>
Date: Wed, 2 Aug 2017 17:13:14 +0300
Subject: [PATCH 030/281] Auto version update to [54267]

---
 dbms/cmake/version.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake
index 07461982e61..9b572b85bae 100644
--- a/dbms/cmake/version.cmake
+++ b/dbms/cmake/version.cmake
@@ -1,6 +1,6 @@
 # This strings autochanged from release_lib.sh:
-set(VERSION_DESCRIBE v1.1.54266-testing)
-set(VERSION_REVISION 54266)
+set(VERSION_DESCRIBE v1.1.54267-testing)
+set(VERSION_REVISION 54267)
 # end of autochange
 
 set (VERSION_MAJOR 1)

From 003d2a94788351b08d757645285505a4b693b339 Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Thu, 3 Aug 2017 03:21:50 +0300
Subject: [PATCH 031/281] Add comprehensive exception message for
 ZooKeeper::multi(). [#CLICKHOUSE-2]

---
 dbms/src/Common/ZooKeeper/ZooKeeper.cpp       | 15 +++++-
 .../src/Common/ZooKeeper/tests/CMakeLists.txt |  3 ++
 .../tests/zkutil_test_multi_exception.cpp     | 49 +++++++++++++++++++
 3 files changed, 66 insertions(+), 1 deletion(-)
 create mode 100644 dbms/src/Common/ZooKeeper/tests/zkutil_test_multi_exception.cpp

diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp
index c1fcb86a86f..11bf5cc45bd 100644
--- a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp
@@ -568,7 +568,20 @@ int32_t ZooKeeper::multiImpl(const Ops & ops_, OpResultsPtr * out_results_)
 OpResultsPtr ZooKeeper::multi(const Ops & ops)
 {
     OpResultsPtr results;
-    check(tryMulti(ops, &results));
+    int code = tryMulti(ops, &results);
+    if (code != ZOK)
+    {
+        if (results && results->size() == ops.size())
+        {
+            for (size_t i = 0; i < ops.size(); ++i)
+            {
+                if (results->at(i).err == code)
+                    throw KeeperException("multi() failed at op #" + std::to_string(i) + ", " + ops[i]->describe(), code);
+            }
+        }
+
+        throw KeeperException(code);
+    }
     return results;
 }
 
diff --git a/dbms/src/Common/ZooKeeper/tests/CMakeLists.txt b/dbms/src/Common/ZooKeeper/tests/CMakeLists.txt
index 82da63e8ab3..93409ca6f53 100644
--- a/dbms/src/Common/ZooKeeper/tests/CMakeLists.txt
+++ b/dbms/src/Common/ZooKeeper/tests/CMakeLists.txt
@@ -15,3 +15,6 @@ target_link_libraries(zkutil_zookeeper_holder dbms)
 
 add_executable (zk_many_watches_reconnect zk_many_watches_reconnect.cpp)
 target_link_libraries (zk_many_watches_reconnect dbms)
+
+add_executable (zkutil_test_multi_exception zkutil_test_multi_exception.cpp)
+target_link_libraries (zkutil_test_multi_exception dbms)
diff --git a/dbms/src/Common/ZooKeeper/tests/zkutil_test_multi_exception.cpp b/dbms/src/Common/ZooKeeper/tests/zkutil_test_multi_exception.cpp
new file mode 100644
index 00000000000..bcf5d6614fd
--- /dev/null
+++ b/dbms/src/Common/ZooKeeper/tests/zkutil_test_multi_exception.cpp
@@ -0,0 +1,49 @@
+#include <iostream>
+#include <Common/ZooKeeper/ZooKeeper.h>
+#include <Common/Exception.h>
+
+using namespace DB;
+
+int main()
+{
+    auto zookeeper = std::make_unique<zkutil::ZooKeeper>("localhost:2181");
+
+    try
+    {
+        auto acl = zookeeper->getDefaultACL();
+        zkutil::Ops ops;
+
+        zookeeper->tryRemoveRecursive("/clickhouse_test_zkutil_multi");
+
+        ops.emplace_back(new zkutil::Op::Create("/clickhouse_test_zkutil_multi", "_", acl, zkutil::CreateMode::Persistent));
+        ops.emplace_back(new zkutil::Op::Create("/clickhouse_test_zkutil_multi/a", "_", acl, zkutil::CreateMode::Persistent));
+        zookeeper->multi(ops);
+
+        ops.clear();
+        ops.emplace_back(new zkutil::Op::Create("/clickhouse_test_zkutil_multi/c", "_", acl, zkutil::CreateMode::Persistent));
+        ops.emplace_back(new zkutil::Op::Remove("/clickhouse_test_zkutil_multi/c", -1));
+        ops.emplace_back(new zkutil::Op::Create("/clickhouse_test_zkutil_multi/a", "BadBoy", acl, zkutil::CreateMode::Persistent));
+        ops.emplace_back(new zkutil::Op::Create("/clickhouse_test_zkutil_multi/b", "_", acl, zkutil::CreateMode::Persistent));
+        ops.emplace_back(new zkutil::Op::Create("/clickhouse_test_zkutil_multi/a", "_", acl, zkutil::CreateMode::Persistent));
+
+        zookeeper->multi(ops);
+    }
+    catch (...)
+    {
+        zookeeper->tryRemoveRecursive("/clickhouse_test_zkutil_multi");
+
+        String msg = getCurrentExceptionMessage(false);
+
+        if (msg.find("/clickhouse_test_zkutil_multi/a") == std::string::npos || msg.find("#2") == std::string::npos)
+        {
+            std::cerr << "Wrong: " << msg;
+            return -1;
+        }
+
+        std::cout << "Ok: " << msg;
+        return 0;
+    }
+
+    std::cerr << "Unexpected";
+    return -1;
+}

From d6833a0d5569a9dbd4d1cdf67d95015712f13cb6 Mon Sep 17 00:00:00 2001
From: proller <proller@users.noreply.github.com>
Date: Thu, 3 Aug 2017 15:44:39 +0300
Subject: [PATCH 032/281] Cmake: remove global include_directories (#1055)

* Fix .h compile

* Cmake: remove global include_directories

* boost include hide

* fix cctz

* add \n
---
 cmake/find_boost.cmake                       |  4 --
 contrib/libboost/CMakeLists.txt              |  5 ++
 contrib/libbtrie/CMakeLists.txt              |  4 +-
 contrib/libcctz/CMakeLists.txt               | 13 ++--
 contrib/libcityhash/CMakeLists.txt           |  5 +-
 contrib/libfarmhash/CMakeLists.txt           |  4 +-
 contrib/liblz4/CMakeLists.txt                |  3 +-
 contrib/libmetrohash/CMakeLists.txt          |  4 +-
 contrib/libre2/CMakeLists.txt                |  6 +-
 contrib/libtcmalloc/CMakeLists.txt           | 73 ++++++++++----------
 contrib/libunwind/CMakeLists.txt             | 10 ++-
 contrib/libzookeeper/CMakeLists.txt          | 17 ++---
 dbms/cmake/find_vectorclass.cmake            | 13 ++--
 dbms/src/Functions/CMakeLists.txt            |  4 ++
 dbms/tests/CMakeLists.txt                    |  2 +-
 libs/libcommon/CMakeLists.txt                |  4 +-
 libs/libcommon/cmake/find_cctz.cmake         |  2 -
 libs/libcommon/cmake/find_gperftools.cmake   |  4 --
 libs/libcommon/cmake/find_jemalloc.cmake     |  3 +-
 libs/libdaemon/CMakeLists.txt                |  8 +--
 libs/libdaemon/cmake/find_unwind.cmake       |  2 -
 libs/libmysqlxx/CMakeLists.txt               |  4 +-
 libs/libmysqlxx/cmake/find_mysqlclient.cmake |  1 -
 libs/libmysqlxx/src/tests/CMakeLists.txt     |  1 -
 libs/libpocoext/CMakeLists.txt               |  6 +-
 utils/corrector_utf8/CMakeLists.txt          |  1 -
 utils/iotest/CMakeLists.txt                  |  2 -
 27 files changed, 100 insertions(+), 105 deletions(-)

diff --git a/cmake/find_boost.cmake b/cmake/find_boost.cmake
index 12dc9ec809c..a7f72d8c68e 100644
--- a/cmake/find_boost.cmake
+++ b/cmake/find_boost.cmake
@@ -16,9 +16,6 @@ if (NOT USE_INTERNAL_BOOST_LIBRARY)
         set (Boost_SYSTEM_LIBRARY "")
     endif ()
 
-    if (Boost_INCLUDE_DIRS)
-        include_directories (${Boost_INCLUDE_DIRS})
-    endif ()
 endif ()
 
 if (NOT Boost_SYSTEM_LIBRARY)
@@ -28,7 +25,6 @@ if (NOT Boost_SYSTEM_LIBRARY)
     set (Boost_SYSTEM_LIBRARY boost_system_internal)
     set (Boost_FILESYSTEM_LIBRARY boost_filesystem_internal)
     set (Boost_INCLUDE_DIRS "${ClickHouse_SOURCE_DIR}/contrib/libboost/boost_1_62_0/")
-    include_directories (BEFORE ${Boost_INCLUDE_DIRS})
 endif ()
 
 message (STATUS "Using Boost: ${Boost_INCLUDE_DIRS} : ${Boost_PROGRAM_OPTIONS_LIBRARY},${Boost_SYSTEM_LIBRARY},${Boost_FILESYSTEM_LIBRARY}")
diff --git a/contrib/libboost/CMakeLists.txt b/contrib/libboost/CMakeLists.txt
index 45f01045b31..8c630ee1dba 100644
--- a/contrib/libboost/CMakeLists.txt
+++ b/contrib/libboost/CMakeLists.txt
@@ -48,3 +48,8 @@ boost_1_62_0/libs/test/src/unit_test_monitor.cpp
 boost_1_62_0/libs/test/src/unit_test_parameters.cpp
 boost_1_62_0/libs/test/src/xml_log_formatter.cpp
 boost_1_62_0/libs/test/src/xml_report_formatter.cpp)
+
+target_include_directories (boost_program_options_internal BEFORE PUBLIC ${Boost_INCLUDE_DIRS})
+target_include_directories (boost_filesystem_internal BEFORE PUBLIC ${Boost_INCLUDE_DIRS})
+target_include_directories (boost_system_internal BEFORE PUBLIC ${Boost_INCLUDE_DIRS})
+target_include_directories (boost_test_internal BEFORE PUBLIC ${Boost_INCLUDE_DIRS})
diff --git a/contrib/libbtrie/CMakeLists.txt b/contrib/libbtrie/CMakeLists.txt
index 8d91eb1c316..7423fc57bea 100644
--- a/contrib/libbtrie/CMakeLists.txt
+++ b/contrib/libbtrie/CMakeLists.txt
@@ -1,6 +1,6 @@
-include_directories (BEFORE include)
-
 add_library (btrie
     src/btrie.c
     include/btrie.h
 )
+
+target_include_directories (btrie PUBLIC include)
diff --git a/contrib/libcctz/CMakeLists.txt b/contrib/libcctz/CMakeLists.txt
index 14767e6c7f7..e87a7da4c44 100644
--- a/contrib/libcctz/CMakeLists.txt
+++ b/contrib/libcctz/CMakeLists.txt
@@ -1,9 +1,3 @@
-include_directories (include)
-
-if (CMAKE_SYSTEM MATCHES "FreeBSD")
-	# yes, need linux, because bsd check inside linux in time_zone_libc.cc:24
-	add_definitions (-D__USE_BSD -Dlinux -D_XOPEN_SOURCE=600)
-endif ()
 
 add_library(cctz
     src/time_zone_libc.cc
@@ -24,3 +18,10 @@ add_library(cctz
     include/time_zone.h
     include/civil_time_detail.h
     include/civil_time.h)
+
+if (CMAKE_SYSTEM MATCHES "FreeBSD")
+    # yes, need linux, because bsd check inside linux in time_zone_libc.cc:24
+    target_compile_definitions (cctz PRIVATE __USE_BSD linux _XOPEN_SOURCE=600)
+endif ()
+
+target_include_directories (cctz PUBLIC include)
diff --git a/contrib/libcityhash/CMakeLists.txt b/contrib/libcityhash/CMakeLists.txt
index 3bc7fe8a041..8c3716d8526 100644
--- a/contrib/libcityhash/CMakeLists.txt
+++ b/contrib/libcityhash/CMakeLists.txt
@@ -1,8 +1,9 @@
-include_directories (include src)
-
 add_library(cityhash
 	src/city.cc
 
 	include/citycrc.h
 	include/city.h
 	src/config.h)
+
+target_include_directories (cityhash PUBLIC include)
+target_include_directories (cityhash PRIVATE src)
diff --git a/contrib/libfarmhash/CMakeLists.txt b/contrib/libfarmhash/CMakeLists.txt
index 170fb17ac53..7781028094d 100644
--- a/contrib/libfarmhash/CMakeLists.txt
+++ b/contrib/libfarmhash/CMakeLists.txt
@@ -1,5 +1,5 @@
-include_directories (${CMAKE_CURRENT_BINARY_DIR})
-
 add_library(farmhash
 	farmhash.cc
 	farmhash.h)
+
+target_include_directories (farmhash PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
diff --git a/contrib/liblz4/CMakeLists.txt b/contrib/liblz4/CMakeLists.txt
index 033b923e8e3..98f13476887 100644
--- a/contrib/liblz4/CMakeLists.txt
+++ b/contrib/liblz4/CMakeLists.txt
@@ -1,5 +1,3 @@
-include_directories (BEFORE include)
-
 add_library (lz4
     src/lz4.c
     src/lz4hc.c
@@ -8,3 +6,4 @@ add_library (lz4
     include/lz4/lz4hc.h
     include/lz4/lz4opt.h)
 
+target_include_directories(lz4 PUBLIC include)
diff --git a/contrib/libmetrohash/CMakeLists.txt b/contrib/libmetrohash/CMakeLists.txt
index f9a2d147e22..6947b92e054 100644
--- a/contrib/libmetrohash/CMakeLists.txt
+++ b/contrib/libmetrohash/CMakeLists.txt
@@ -1,5 +1,3 @@
-include_directories (${CMAKE_CURRENT_BINARY_DIR})
-
 if (HAVE_SSE42) # Not used. Pretty easy to port.
     set (SOURCES_SSE42_ONLY src/metrohash128crc.cpp)
 endif ()
@@ -11,3 +9,5 @@ add_library(metrohash
     src/metrohash64.cpp
     src/metrohash128.cpp
     ${SOURCES_SSE42_ONLY})
+
+target_include_directories(metrohash PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
diff --git a/contrib/libre2/CMakeLists.txt b/contrib/libre2/CMakeLists.txt
index c76162fa371..111c60f3f49 100644
--- a/contrib/libre2/CMakeLists.txt
+++ b/contrib/libre2/CMakeLists.txt
@@ -34,14 +34,12 @@ set (re2_sources
 # re2 changes its state during matching of regular expression, e.g. creates temporary DFA.
 # It uses RWLock to process the same regular expression object from different threads.
 # In order to avoid redundant locks in some cases, we use not thread-safe version of the library (re2_st).
-add_definitions (-DNDEBUG)
-
-include_directories (BEFORE .)
 
 add_library (re2 ${re2_sources})
 add_library (re2_st ${re2_sources})
 
-set_target_properties (re2_st PROPERTIES COMPILE_DEFINITIONS "NO_THREADS;re2=re2_st")
+target_compile_definitions (re2 PRIVATE NDEBUG)
+target_compile_definitions (re2_st PRIVATE NDEBUG NO_THREADS re2=re2_st)
 
 file (MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/re2_st)
 foreach (FILENAME filtered_re2.h re2.h set.h stringpiece.h variadic_function.h)
diff --git a/contrib/libtcmalloc/CMakeLists.txt b/contrib/libtcmalloc/CMakeLists.txt
index d07ed17a0b2..57b7cd9c553 100644
--- a/contrib/libtcmalloc/CMakeLists.txt
+++ b/contrib/libtcmalloc/CMakeLists.txt
@@ -1,39 +1,5 @@
-add_definitions(
-    -DNO_TCMALLOC_SAMPLES
-    -DNDEBUG
-    -DNO_FRAME_POINTER
-    -Wwrite-strings
-    -Wno-sign-compare
-    -Wno-unused-result
-    -Wno-deprecated-declarations
-    -Wno-unused-function
-    -Wno-unused-private-field
-    -fno-builtin-malloc
-    -fno-builtin-free
-    -fno-builtin-realloc
-    -fno-builtin-calloc
-    -fno-builtin-cfree
-    -fno-builtin-memalign
-    -fno-builtin-posix_memalign
-    -fno-builtin-valloc
-    -fno-builtin-pvalloc
-)
 
-if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 3.9)
-    add_definitions( -Wno-dynamic-exception-spec )
-endif()
-
-if(CMAKE_SYSTEM MATCHES "FreeBSD" AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-    add_definitions(-Wno-unused-but-set-variable)
-endif()
-
-if(CMAKE_SYSTEM MATCHES "FreeBSD")
-    add_definitions(-D_GNU_SOURCE)
-endif()
-
-include_directories (include src)
-
-message(STATUS "Building: tcmalloc_minimal_internal")
+message (STATUS "Building: tcmalloc_minimal_internal")
 
 add_library (tcmalloc_minimal_internal
 ./src/malloc_hook.cc
@@ -71,3 +37,40 @@ add_library (tcmalloc_minimal_internal
 ./src/raw_printer.cc
 ./src/memory_region_map.cc
 )
+
+target_compile_options (tcmalloc_minimal_internal PUBLIC
+    -DNO_TCMALLOC_SAMPLES
+    -DNDEBUG
+    -DNO_FRAME_POINTER
+    -Wwrite-strings
+    -Wno-sign-compare
+    -Wno-unused-result
+    -Wno-deprecated-declarations
+    -Wno-unused-function
+    -Wno-unused-private-field
+    -fno-builtin-malloc
+    -fno-builtin-free
+    -fno-builtin-realloc
+    -fno-builtin-calloc
+    -fno-builtin-cfree
+    -fno-builtin-memalign
+    -fno-builtin-posix_memalign
+    -fno-builtin-valloc
+    -fno-builtin-pvalloc
+)
+
+if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 3.9)
+    target_compile_options(tcmalloc_minimal_internal PUBLIC -Wno-dynamic-exception-spec )
+endif ()
+
+if (CMAKE_SYSTEM MATCHES "FreeBSD" AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+    target_compile_options(tcmalloc_minimal_internal PUBLIC -Wno-unused-but-set-variable)
+endif ()
+
+if (CMAKE_SYSTEM MATCHES "FreeBSD")
+    target_compile_definitions(tcmalloc_minimal_internal PUBLIC _GNU_SOURCE)
+endif ()
+
+target_include_directories (tcmalloc_minimal_internal PUBLIC include)
+target_include_directories (tcmalloc_minimal_internal PRIVATE src)
+
diff --git a/contrib/libunwind/CMakeLists.txt b/contrib/libunwind/CMakeLists.txt
index 69f67c52a39..1a1b1e79bc3 100644
--- a/contrib/libunwind/CMakeLists.txt
+++ b/contrib/libunwind/CMakeLists.txt
@@ -1,6 +1,3 @@
-add_definitions(-DHAVE_CONFIG_H=1 -D_XOPEN_SOURCE -D_GNU_SOURCE -Wno-visibility -Wno-header-guard)
-
-include_directories(include include/tdep src)
 
 enable_language(ASM)
 
@@ -55,3 +52,10 @@ src/elf64.c
 src/os-linux.c
 src/x86_64/Los-linux.c
 )
+
+target_compile_definitions (unwind PRIVATE HAVE_CONFIG_H=1 _XOPEN_SOURCE _GNU_SOURCE)
+target_compile_options (unwind PRIVATE -Wno-visibility -Wno-header-guard)
+
+target_include_directories (unwind PUBLIC include)
+target_include_directories (unwind PRIVATE include/tdep)
+target_include_directories (unwind PRIVATE src)
diff --git a/contrib/libzookeeper/CMakeLists.txt b/contrib/libzookeeper/CMakeLists.txt
index db13694527b..1d9eb050c9c 100644
--- a/contrib/libzookeeper/CMakeLists.txt
+++ b/contrib/libzookeeper/CMakeLists.txt
@@ -1,8 +1,3 @@
-add_definitions(-DHAVE_CONFIG_H -DTHREADED)
-
-if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
-	add_definitions(-Wno-unused-but-set-variable)
-endif()
 
 add_library (zookeeper_mt
 src/zookeeper.c
@@ -15,8 +10,14 @@ src/hashtable/hashtable.c
 src/hashtable/hashtable_itr.c
 )
 
+target_compile_definitions (zookeeper_mt PRIVATE HAVE_CONFIG_H THREADED)
+
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+    target_compile_options (zookeeper_mt PRIVATE -Wno-unused-but-set-variable)
+endif()
+
 target_include_directories (zookeeper_mt
-	PRIVATE include/zookeeper
-	PRIVATE src
-	INTERFACE include
+    PRIVATE include/zookeeper
+    PRIVATE src
+    INTERFACE include
 )
diff --git a/dbms/cmake/find_vectorclass.cmake b/dbms/cmake/find_vectorclass.cmake
index c94bb4640e3..021929a4090 100644
--- a/dbms/cmake/find_vectorclass.cmake
+++ b/dbms/cmake/find_vectorclass.cmake
@@ -3,14 +3,13 @@ option (ENABLE_VECTORCLASS "Faster math functions with vectorclass lib" OFF)
 
 if (ENABLE_VECTORCLASS)
 
-set (VECTORCLASS_INCLUDE_PATHS "${ClickHouse_SOURCE_DIR}/contrib/vectorclass" CACHE STRING "Path of vectorclass library")
-find_path (VECTORCLASS_INCLUDE_DIR NAMES vectorf128.h PATHS ${VECTORCLASS_INCLUDE_PATHS})
+    set (VECTORCLASS_INCLUDE_PATHS "${ClickHouse_SOURCE_DIR}/contrib/vectorclass" CACHE STRING "Path of vectorclass library")
+    find_path (VECTORCLASS_INCLUDE_DIR NAMES vectorf128.h PATHS ${VECTORCLASS_INCLUDE_PATHS})
 
-if (VECTORCLASS_INCLUDE_DIR)
-    set (USE_VECTORCLASS 1)
-    include_directories (BEFORE ${VECTORCLASS_INCLUDE_DIR})
-endif ()
+    if (VECTORCLASS_INCLUDE_DIR)
+        set (USE_VECTORCLASS 1)
+    endif ()
 
-message (STATUS "Using vectorclass=${ENABLE_VECTORCLASS}: ${VECTORCLASS_INCLUDE_DIR}")
+    message (STATUS "Using vectorclass=${USE_VECTORCLASS}: ${VECTORCLASS_INCLUDE_DIR}")
 
 endif ()
diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt
index 36017fd0d18..d92e2a4603f 100644
--- a/dbms/src/Functions/CMakeLists.txt
+++ b/dbms/src/Functions/CMakeLists.txt
@@ -68,6 +68,10 @@ target_include_directories (clickhouse_functions BEFORE PUBLIC ${ClickHouse_SOUR
 target_include_directories (clickhouse_functions BEFORE PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/libmetrohash/src)
 target_include_directories (clickhouse_functions BEFORE PUBLIC ${CITYHASH_INCLUDE_DIR})
 
+if (USE_VECTORCLASS)
+    target_include_directories (clickhouse_functions BEFORE PUBLIC ${VECTORCLASS_INCLUDE_DIR})
+endif ()
+
 if (ENABLE_TESTS)
     add_subdirectory (tests)
 endif ()
diff --git a/dbms/tests/CMakeLists.txt b/dbms/tests/CMakeLists.txt
index 7ec702449f2..3eff0f02cc9 100644
--- a/dbms/tests/CMakeLists.txt
+++ b/dbms/tests/CMakeLists.txt
@@ -11,7 +11,7 @@ endif ()
 # Google Test from sources
 add_subdirectory(${ClickHouse_SOURCE_DIR}/contrib/googletest ${CMAKE_CURRENT_BINARY_DIR}/googletest)
 # avoid problems with <regexp.h>
-target_compile_definitions (gtest INTERFACE -DGTEST_HAS_POSIX_RE=0)
+target_compile_definitions (gtest INTERFACE GTEST_HAS_POSIX_RE=0)
 target_include_directories (gtest INTERFACE ${ClickHouse_SOURCE_DIR}/contrib/googletest/include)
 
 
diff --git a/libs/libcommon/CMakeLists.txt b/libs/libcommon/CMakeLists.txt
index 534d728e49b..0c50a44089d 100644
--- a/libs/libcommon/CMakeLists.txt
+++ b/libs/libcommon/CMakeLists.txt
@@ -1,5 +1,3 @@
-include_directories (include)
-
 if (APPLE)
     if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin" AND NOT "${CMAKE_SYSTEM_VERSION}" VERSION_LESS "16.1.0")
         set (APPLE_SIERRA_OR_NEWER 1)
@@ -80,7 +78,7 @@ find_package (Threads)
 
 target_include_directories (common BEFORE PRIVATE ${CCTZ_INCLUDE_DIR})
 target_include_directories (common BEFORE PUBLIC ${CITYHASH_INCLUDE_DIR})
-target_include_directories (common BEFORE PUBLIC ${COMMON_INCLUDE_DIR})
+target_include_directories (common PUBLIC ${COMMON_INCLUDE_DIR})
 
 target_link_libraries (
     common
diff --git a/libs/libcommon/cmake/find_cctz.cmake b/libs/libcommon/cmake/find_cctz.cmake
index 807cb9eb56c..a8a4d99521d 100644
--- a/libs/libcommon/cmake/find_cctz.cmake
+++ b/libs/libcommon/cmake/find_cctz.cmake
@@ -6,11 +6,9 @@ if (NOT USE_INTERNAL_CCTZ_LIBRARY)
 endif ()
 
 if (CCTZ_LIBRARY AND CCTZ_INCLUDE_DIR)
-    #include_directories (${CCTZ_INCLUDE_DIR})
 else ()
     set (USE_INTERNAL_CCTZ_LIBRARY 1)
     set (CCTZ_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libcctz/include")
-    #include_directories (BEFORE ${CCTZ_INCLUDE_DIR})
     set (CCTZ_LIBRARY cctz)
 endif ()
 
diff --git a/libs/libcommon/cmake/find_gperftools.cmake b/libs/libcommon/cmake/find_gperftools.cmake
index 052755c9d51..ff95e2a480a 100644
--- a/libs/libcommon/cmake/find_gperftools.cmake
+++ b/libs/libcommon/cmake/find_gperftools.cmake
@@ -10,16 +10,12 @@ if (ENABLE_LIBTCMALLOC)
     #contrib/libtcmalloc doesnt build debug version, try find in system
     if (DEBUG_LIBTCMALLOC OR NOT USE_INTERNAL_GPERFTOOLS_LIBRARY)
         find_package (Gperftools)
-        if (GPERFTOOLS_FOUND)
-            include_directories (${GPERFTOOLS_INCLUDE_DIR})
-        endif ()
     endif ()
 
     if (NOT (GPERFTOOLS_FOUND AND GPERFTOOLS_INCLUDE_DIR AND GPERFTOOLS_TCMALLOC_MINIMAL))
         set (USE_INTERNAL_GPERFTOOLS_LIBRARY 1)
         set (GPERFTOOLS_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libtcmalloc/include")
         set (GPERFTOOLS_TCMALLOC_MINIMAL tcmalloc_minimal_internal)
-        include_directories (BEFORE ${GPERFTOOLS_INCLUDE_DIR})
     endif ()
 
     set (USE_TCMALLOC 1)
diff --git a/libs/libcommon/cmake/find_jemalloc.cmake b/libs/libcommon/cmake/find_jemalloc.cmake
index bac53073be9..f8fb20fe9b6 100644
--- a/libs/libcommon/cmake/find_jemalloc.cmake
+++ b/libs/libcommon/cmake/find_jemalloc.cmake
@@ -1,10 +1,9 @@
 option (ENABLE_JEMALLOC "Set to TRUE to use jemalloc instead of tcmalloc" OFF)
 
 if (ENABLE_JEMALLOC)
-    find_package(JeMalloc)
+    find_package (JeMalloc)
 
     if (JEMALLOC_INCLUDE_DIR AND JEMALLOC_LIBRARIES)
-        include_directories (${JEMALLOC_INCLUDE_DIR})
         set (USE_JEMALLOC 1)
         if (USE_TCMALLOC)
             message (WARNING "Disabling tcmalloc")
diff --git a/libs/libdaemon/CMakeLists.txt b/libs/libdaemon/CMakeLists.txt
index e20a99a78cc..6a2fde64588 100644
--- a/libs/libdaemon/CMakeLists.txt
+++ b/libs/libdaemon/CMakeLists.txt
@@ -1,5 +1,3 @@
-include_directories (include)
-
 add_library (daemon
     src/BaseDaemon.cpp
     src/GraphiteWriter.cpp
@@ -11,10 +9,12 @@ add_library (daemon
 )
 
 if (USE_UNWIND)
+    target_compile_definitions (daemon PRIVATE USE_UNWIND=1)
     target_include_directories (daemon BEFORE PRIVATE ${UNWIND_INCLUDE_DIR})
-    add_definitions(-DUSE_UNWIND=1)
     target_link_libraries (daemon ${UNWIND_LIBRARY})
 endif ()
 
-target_link_libraries (daemon dbms)
+target_include_directories (daemon PUBLIC include)
 target_include_directories (daemon PRIVATE ${ClickHouse_SOURCE_DIR}/libs/libpocoext/include)
+
+target_link_libraries (daemon dbms)
diff --git a/libs/libdaemon/cmake/find_unwind.cmake b/libs/libdaemon/cmake/find_unwind.cmake
index 007240ad88b..0c55715b60c 100644
--- a/libs/libdaemon/cmake/find_unwind.cmake
+++ b/libs/libdaemon/cmake/find_unwind.cmake
@@ -33,12 +33,10 @@ if (NOT USE_INTERNAL_UNWIND_LIBRARY)
 endif ()
 
 if (UNWIND_LIBRARY AND UNWIND_INCLUDE_DIR)
-    #include_directories (${UNWIND_INCLUDE_DIR})
     set (USE_UNWIND 1)
 elseif (CMAKE_SYSTEM MATCHES "Linux" AND NOT ARM)
     set (USE_INTERNAL_UNWIND_LIBRARY 1)
     set (UNWIND_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libunwind/include")
-    #include_directories (BEFORE ${UNWIND_INCLUDE_DIR})
     set (UNWIND_LIBRARY unwind)
     set (USE_UNWIND 1)
 endif ()
diff --git a/libs/libmysqlxx/CMakeLists.txt b/libs/libmysqlxx/CMakeLists.txt
index f3455f780e1..df732d24d02 100644
--- a/libs/libmysqlxx/CMakeLists.txt
+++ b/libs/libmysqlxx/CMakeLists.txt
@@ -1,5 +1,3 @@
-include_directories (include)
-include_directories (${ClickHouse_SOURCE_DIR}/libs/libcommon/include)
 
 add_library (mysqlxx
     src/Connection.cpp
@@ -30,6 +28,8 @@ add_library (mysqlxx
     include/mysqlxx/Value.h
 )
 
+target_include_directories (mysqlxx PUBLIC include)
+
 set(PLATFORM_LIBS ${CMAKE_DL_LIBS})
 
 if (USE_STATIC_LIBRARIES AND STATIC_MYSQLCLIENT_LIB)
diff --git a/libs/libmysqlxx/cmake/find_mysqlclient.cmake b/libs/libmysqlxx/cmake/find_mysqlclient.cmake
index b98852919f6..ae77349e444 100644
--- a/libs/libmysqlxx/cmake/find_mysqlclient.cmake
+++ b/libs/libmysqlxx/cmake/find_mysqlclient.cmake
@@ -29,7 +29,6 @@ if (ENABLE_MYSQL)
 
     if (MYSQL_INCLUDE_DIR AND (STATIC_MYSQLCLIENT_LIB OR MYSQLCLIENT_LIB))
         set (MYSQL_FOUND 1)
-        include_directories (${MYSQL_INCLUDE_DIR})
     endif ()
 
     if (MYSQL_FOUND)
diff --git a/libs/libmysqlxx/src/tests/CMakeLists.txt b/libs/libmysqlxx/src/tests/CMakeLists.txt
index fa419a666da..86fadceb885 100644
--- a/libs/libmysqlxx/src/tests/CMakeLists.txt
+++ b/libs/libmysqlxx/src/tests/CMakeLists.txt
@@ -1,4 +1,3 @@
-include_directories (${CMAKE_CURRENT_BINARY_DIR})
 
 add_executable (mysqlxx_test mysqlxx_test.cpp)
 add_executable (failover failover.cpp)
diff --git a/libs/libpocoext/CMakeLists.txt b/libs/libpocoext/CMakeLists.txt
index 5800bfcc5b6..18eaaabf67d 100644
--- a/libs/libpocoext/CMakeLists.txt
+++ b/libs/libpocoext/CMakeLists.txt
@@ -1,6 +1,3 @@
-include_directories (include)
-include_directories (${ClickHouse_SOURCE_DIR}/libs/libcommon/include)
-
 add_library (pocoext
     src/LevelFilterChannel.cpp
     src/ThreadNumber.cpp
@@ -8,4 +5,7 @@ add_library (pocoext
     include/Poco/Ext/LevelFilterChannel.h
     include/Poco/Ext/ThreadNumber.h)
 
+target_include_directories (pocoext PUBLIC include PRIVATE ${COMMON_INCLUDE_DIR})
+
+
 target_link_libraries(pocoext ${Poco_Util_LIBRARY} ${Poco_Net_LIBRARY} ${Poco_XML_LIBRARY} ${Poco_Foundation_LIBRARY})
diff --git a/utils/corrector_utf8/CMakeLists.txt b/utils/corrector_utf8/CMakeLists.txt
index a48433c5804..db5cfc4964b 100644
--- a/utils/corrector_utf8/CMakeLists.txt
+++ b/utils/corrector_utf8/CMakeLists.txt
@@ -1,4 +1,3 @@
-include_directories (${ClickHouse_SOURCE_DIR}/dbms/src)
 
 add_executable(corrector_utf8 corrector_utf8.cpp)
 
diff --git a/utils/iotest/CMakeLists.txt b/utils/iotest/CMakeLists.txt
index 9c1bb50cb2c..f4f916b3f7f 100644
--- a/utils/iotest/CMakeLists.txt
+++ b/utils/iotest/CMakeLists.txt
@@ -1,5 +1,3 @@
-include_directories (${ClickHouse_SOURCE_DIR}/dbms/src)
-include_directories (${ClickHouse_SOURCE_DIR}/libs/libcommon/include)
 
 add_executable (iotest iotest.cpp ${SRCS})
 target_link_libraries (iotest dbms)

From 0c15b2c6fc74a02708fb4dd1bb2e7d4b7a09f539 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 1 Aug 2017 16:04:48 +0300
Subject: [PATCH 033/281] fixed number of rows to read from first granula in
 mergetree

---
 .../MergeTreeBaseBlockInputStream.cpp         | 25 +++++++++++++++----
 .../MergeTree/MergeTreeRangeReader.cpp        |  2 ++
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp
index b798f59b4db..ad288f0be5c 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp
@@ -13,6 +13,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER;
+    extern const int LOGICAL_ERROR;
 }
 
 
@@ -208,7 +209,9 @@ Block MergeTreeBaseBlockInputStream::readFromPart()
             if (!res)
             {
                 if (!pre_range_reader)
+                {
                     task->current_range_reader = std::experimental::nullopt;
+                }
                 return res;
             }
 
@@ -297,7 +300,7 @@ Block MergeTreeBaseBlockInputStream::readFromPart()
                     if (!task->current_range_reader)
                     {
                         if (next_range_idx == ranges_to_read.size())
-                            throw Exception("Nothing to read");
+                            throw Exception("Not enough ranges to read after prewhere.", ErrorCodes::LOGICAL_ERROR);
                         const auto & range = ranges_to_read[next_range_idx++];
                         task->current_range_reader = reader->readRange(range.begin, range.end);
                     }
@@ -307,7 +310,7 @@ Block MergeTreeBaseBlockInputStream::readFromPart()
 
                     /// Now we need to read the same number of rows as in prewhere.
                     size_t rows_to_read = next_range_idx == ranges_to_read.size()
-                        ? rows_was_read_in_last_range : task->current_range_reader->unreadRows();
+                        ? rows_was_read_in_last_range : (task->current_range_reader->unreadRows() - number_of_rows_to_skip);
 
                     auto readRows = [&]()
                     {
@@ -352,8 +355,18 @@ Block MergeTreeBaseBlockInputStream::readFromPart()
                             if (will_read_until_mark)
                             {
                                 /// Can skip the rest of granule with false prewhere conditon right now.
-                                current_range_rows_read += range_reader.skipToNextMark() - number_of_rows_to_skip;
-                                number_of_rows_to_skip = 0;
+                                do
+                                {
+                                    size_t rows_was_skipped = range_reader.skipToNextMark();
+                                    if (number_of_rows_to_skip < rows_was_skipped)
+                                    {
+                                        current_range_rows_read += rows_was_skipped - number_of_rows_to_skip;
+                                        number_of_rows_to_skip = 0;
+                                    }
+                                    else
+                                        number_of_rows_to_skip -= rows_was_skipped;
+                                }
+                                while (number_of_rows_to_skip);
                             }
                             else
                             {
@@ -364,7 +377,6 @@ Block MergeTreeBaseBlockInputStream::readFromPart()
 
                             pre_filter_begin_pos = limit;
                         }
-
                         pre_filter_pos = limit;
                     }
 
@@ -405,6 +417,7 @@ Block MergeTreeBaseBlockInputStream::readFromPart()
                 /// Replace column with condition value from PREWHERE to a constant.
                 if (!task->remove_prewhere_column)
                     res.getByName(prewhere_column).column = DataTypeUInt8().createConstColumn(rows, UInt64(1));
+
             }
             else
                 throw Exception{
@@ -445,7 +458,9 @@ Block MergeTreeBaseBlockInputStream::readFromPart()
                 task->current_range_reader = std::experimental::nullopt;
 
             if (res && task->size_predictor)
+            {
                 task->size_predictor->update(res);
+            }
 
             space_left -= rows_was_read;
         }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp
index 7b795074716..93dcd0ef370 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp
@@ -15,6 +15,8 @@ size_t MergeTreeRangeReader::skipToNextMark()
     auto unread_rows_in_current_part = unreadRowsInCurrentGranule();
     continue_reading = false;
     ++current_mark;
+    if (current_mark == last_mark)
+        is_reading_finished = true;
     read_rows_after_current_mark = 0;
     return unread_rows_in_current_part;
 }

From 83cd2a40e4209b6f7cba7b2a67d8ecc5ea770758 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 1 Aug 2017 20:52:05 +0300
Subject: [PATCH 034/281] added test

---
 .../00484_preferred_max_column_in_block_size_bytes.sql      | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/dbms/tests/queries/0_stateless/00484_preferred_max_column_in_block_size_bytes.sql b/dbms/tests/queries/0_stateless/00484_preferred_max_column_in_block_size_bytes.sql
index 4c4b89cd2fe..6ab513feac1 100644
--- a/dbms/tests/queries/0_stateless/00484_preferred_max_column_in_block_size_bytes.sql
+++ b/dbms/tests/queries/0_stateless/00484_preferred_max_column_in_block_size_bytes.sql
@@ -12,3 +12,9 @@ set preferred_max_column_in_block_size_bytes = 2097152;
 select max(blockSize()), min(blockSize()), any(ignore(*)) from test.tab;
 set preferred_max_column_in_block_size_bytes = 4194304;
 select max(blockSize()), min(blockSize()), any(ignore(*)) from test.tab;
+
+drop table if exists test.tab;
+create table test.tab (date Date, x UInt64, s FixedString(128)) engine = MergeTree(date, (date, x), 32);
+insert into test.tab select today(), number, toFixedString('', 128) from system.numbers limit 47;
+set preferred_max_column_in_block_size_bytes = 1152;
+select blockSize(), * from test.tab where x = 1 or x > 36 format Null;

From 23263a1acc45cd95fdf8669eb009469711d8402e Mon Sep 17 00:00:00 2001
From: proller <proller@github.com>
Date: Thu, 3 Aug 2017 16:23:41 +0300
Subject: [PATCH 035/281] Fix apple build

---
 libs/libcommon/CMakeLists.txt           | 1 +
 libs/libcommon/src/tests/CMakeLists.txt | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/libs/libcommon/CMakeLists.txt b/libs/libcommon/CMakeLists.txt
index 0c50a44089d..c2aa28b2a9d 100644
--- a/libs/libcommon/CMakeLists.txt
+++ b/libs/libcommon/CMakeLists.txt
@@ -14,6 +14,7 @@ if (APPLE)
         src/apple_rt.cpp
         include/common/apple_rt.h
     )
+    target_include_directories (apple_rt PUBLIC ${COMMON_INCLUDE_DIR})
 endif ()
 
 add_library (common
diff --git a/libs/libcommon/src/tests/CMakeLists.txt b/libs/libcommon/src/tests/CMakeLists.txt
index b092cb2694e..65a1d0bc77e 100644
--- a/libs/libcommon/src/tests/CMakeLists.txt
+++ b/libs/libcommon/src/tests/CMakeLists.txt
@@ -1,7 +1,5 @@
 include (${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake)
 
-include_directories (${CMAKE_CURRENT_BINARY_DIR})
-
 add_executable (date_lut_init date_lut_init.cpp)
 add_executable (date_lut2 date_lut2.cpp)
 add_executable (date_lut3 date_lut3.cpp)

From 63ec81e2e3dc9ded591dd895827b173fe57af660 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 3 Aug 2017 20:42:31 +0300
Subject: [PATCH 036/281] added threadpool for sync insertion into distributed

---
 .../DistributedBlockOutputStream.cpp          | 114 +++++++++++++++++-
 .../DistributedBlockOutputStream.h            |  20 ++-
 2 files changed, 131 insertions(+), 3 deletions(-)

diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
index 3d6200c0620..919cf062205 100644
--- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
+++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
@@ -21,12 +21,15 @@
 #include <Common/typeid_cast.h>
 #include <Common/Exception.h>
 #include <Common/ProfileEvents.h>
+#include <Common/MemoryTracker.h>
 #include <common/logger_useful.h>
 
 #include <Poco/DirectoryIterator.h>
 
 #include <iostream>
 #include <future>
+#include <condition_variable>
+#include <mutex>
 
 namespace CurrentMetrics
 {
@@ -52,8 +55,27 @@ DistributedBlockOutputStream::DistributedBlockOutputStream(StorageDistributed &
 {
 }
 
+DistributedBlockOutputStream::writePrefix()
+{
+    deadline = std::chrono::steady_clock::now() + std::chrono::seconds(insert_timeout);
+    remote_jobs_count = 0;
+    if (storage.getShardingKeyExpr())
+    {
+        const auto & shards_info = cluster->getShardsInfo();
+        for (const auto & shard_info : shards_info)
+            remote_jobs_count += shard_info.dir_names.size();
+    }
+}
 
 void DistributedBlockOutputStream::write(const Block & block)
+{
+    if (insert_sync)
+        writeSync(block);
+    else
+        writeAsync(block);
+}
+
+void DistributedBlockOutputStream::writeAsync(const Block & block)
 {
     if (storage.getShardingKeyExpr() && (cluster->getShardsInfo().size() > 1))
         return writeSplit(block);
@@ -62,6 +84,85 @@ void DistributedBlockOutputStream::write(const Block & block)
     ++blocks_inserted;
 }
 
+ThreadPool::Job createWritingJob(std::vector<bool> & done_jobs, std::atomic<unsigned> & finished_jobs_count,
+                                 std::condition_variable & cond_var, const Block & block, size_t job_id,
+                                 const Cluster::ShardInfo & shard_info, size_t replica_id)
+{
+    auto memory_tracker = current_memory_tracker;
+    return [this, memory_tracker, & done_jobs, & finished_jobs_count, & cond_var, & block,
+            size_t job_id, const Cluster::ShardInfo & shard_info, size_t replica_id]()
+    {
+        if (!current_memory_tracker)
+        {
+            current_memory_tracker = memory_tracker;
+            setThreadName("DistributedBlockOutputStreamProc");
+        }
+        try
+        {
+            this->writeToShardSync(block, shard_info, replica_id);
+            ++finished_jobs_count;
+            done_jobs[job_id] = true;
+            cond_var.notify_one();
+        }
+        catch (...)
+        {
+            ++finished_jobs_count;
+            cond_var.notify_one();
+            throw;
+        }
+    };
+}
+
+void DistributedBlockOutputStream::writeToLocal(const Blocks & blocks)
+{
+    const Cluster::ShardInfo & shard_info = cluster->getShardsInfo();
+    for (size_t shard_id: ext::range(0, shards_info.size()))
+    {
+        const auto & shard_info = shards_info[shard_id];
+        if (shard_info.getLocalNodeCount() > 0)
+            writeToLocal(blocks[shard_id], shard_info.getLocalNodeCount());
+    }
+}
+
+
+std::string getCurrentStateDescription(const std::vector<bool> & done_jobs)
+{
+}
+
+void DistributedBlockOutputStream::writeSync(const Block & block)
+{
+    if (!pool)
+        pool = ThreadPool(remote_jobs_count);
+
+    std::vector<bool> done_jobs(remote_jobs_count, false);
+    std::atomic<unsigned> finished_jobs_count = 0;
+    std::mutex mutex;
+    std::condition_variable cond_var;
+
+    const Cluster::ShardInfo & shard_info = cluster->getShardsInfo();
+    Blocks blocks = shard_info.size() > 1 ? splitBlocks(block) : Blocks({block});
+
+    size_t job_id = 0;
+    for (size_t shard_id: ext::range(0, blocks.size()))
+        for (size_t replica_id : ext::range(0, shards_info[shard_id].dir_names.size()))
+            pool->schledule(createWritingJob(jobs_done, finished_jobs_count, cond_var,
+                                             blocks[shard_id], job_id++, shards_info[shard_id], replica_id));
+    try
+        writeToLocal(blocks);
+    catch(Exception & exception)
+    {
+        try
+            pool->wait();
+        catch(Exception & exception)
+        {
+
+            throw;
+        }
+        throw;
+    }
+
+    ++blocks_inserted;
+}
 
 IColumn::Selector DistributedBlockOutputStream::createSelector(Block block)
 {
@@ -89,7 +190,7 @@ IColumn::Selector DistributedBlockOutputStream::createSelector(Block block)
 }
 
 
-void DistributedBlockOutputStream::writeSplit(const Block & block)
+Blocks DistributedBlockOutputStream::splitBlocks(const Block & block)
 {
     const auto num_cols = block.columns();
     /// cache column pointers for later reuse
@@ -115,6 +216,15 @@ void DistributedBlockOutputStream::writeSplit(const Block & block)
             splitted_blocks[shard_idx].getByPosition(col_idx_in_block).column = std::move(splitted_columns[shard_idx]);
     }
 
+    return splitted_blocks;
+}
+
+
+void DistributedBlockOutputStream::writeSplit(const Block & block)
+{
+    Blocks splitted_blocks = splitBlocks(block);
+    const size_t num_shards = splitted_blocks.size();
+
     for (size_t shard_idx = 0; shard_idx < num_shards; ++shard_idx)
         if (splitted_blocks[shard_idx].rows())
             writeImpl(splitted_blocks[shard_idx], shard_idx);
@@ -185,7 +295,7 @@ void DistributedBlockOutputStream::writeToShardSync(const Block & block, const s
             ProfileEvents::increment(ProfileEvents::DistributedSyncInsertionTimeoutExceeded);
 
             String message;
-            WriteBufferFromString out(message);
+            ut(message);
             out << "Timeout exceeded.";
             writeNodeDescription(out, *connection);
             throw Exception(message, ErrorCodes::TIMEOUT_EXCEEDED);
diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h
index 469134afdea..cc3acef1c97 100644
--- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h
+++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h
@@ -3,9 +3,11 @@
 #include <Parsers/formatAST.h>
 #include <DataStreams/IBlockOutputStream.h>
 #include <Core/Block.h>
+#include <common/ThreadPool.h>
 #include <atomic>
 #include <memory>
 #include <chrono>
+#include <experimental/optional>
 
 namespace Poco
 {
@@ -35,11 +37,25 @@ public:
 
     void write(const Block & block) override;
 
-    void writePrefix() override { deadline = std::chrono::steady_clock::now() + std::chrono::seconds(insert_timeout); }
+    void writePrefix() override;
 
 private:
+    void writeAsync(const Block & block);
+
+    void writeSync(const Block & block);
+
+    ThreadPool::Job createWritingJob(std::vector<bool> & done_jobs, std::atomic<unsigned> & finished_jobs_count,
+                                     std::condition_variable & cond_var, const Block & block, size_t job_id,
+                                     const Cluster::ShardInfo & shard_info, size_t replica_id);
+
+    void writeToLocal(const Blocks & blocks);
+
+    std::string getCurrentStateDescription(const std::vector<bool> & done_jobs);
+
     IColumn::Selector createSelector(Block block);
 
+    Blocks splitBlock(const Block & block);
+
     void writeSplit(const Block & block);
 
     void writeImpl(const Block & block, const size_t shard_id = 0);
@@ -60,6 +76,8 @@ private:
     UInt64 insert_timeout;
     size_t blocks_inserted = 0;
     std::chrono::steady_clock::time_point deadline;
+    size_t remote_jobs_count;
+    std::experimental::optional<ThreadPool> pool;
 };
 
 }

From 10c14cfccb7a8c595b8d154d9e7d3f34e376114e Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Fri, 4 Aug 2017 17:00:26 +0300
Subject: [PATCH 037/281] Small code enhancements according to clang-tidy.
 [#CLICKHOUSE-2931]

---
 dbms/src/Common/PoolWithFailoverBase.h        |  2 +-
 .../tests/zkutil_expiration_test.cpp          |  2 +-
 .../IProfilingBlockInputStream.cpp            |  2 +-
 dbms/src/Interpreters/executeQuery.cpp        |  6 ++--
 dbms/src/Server/Client.cpp                    |  2 +-
 .../Storages/MergeTree/DataPartsExchange.cpp  |  2 +-
 dbms/src/Storages/MergeTree/MergeTreeData.cpp | 28 +++++++++----------
 .../MergeTree/MergeTreeDataMerger.cpp         |  6 ++--
 .../MergeTree/MergeTreeDataWriter.cpp         |  2 +-
 .../ReplicatedMergeTreeBlockOutputStream.cpp  |  2 +-
 .../ReplicatedMergeTreePartCheckThread.cpp    |  2 +-
 .../MergeTree/ReplicatedMergeTreeQueue.cpp    |  6 ++--
 .../ReplicatedMergeTreeRestartingThread.cpp   |  2 +-
 .../MergeTree/ShardedPartitionUploader.cpp    |  2 +-
 dbms/src/Storages/StorageBuffer.cpp           |  4 +--
 dbms/src/Storages/StorageMergeTree.cpp        |  2 +-
 .../Storages/StorageReplicatedMergeTree.cpp   | 10 +++----
 dbms/src/Storages/StorageTrivialBuffer.cpp    |  4 +--
 libs/libcommon/include/common/logger_useful.h | 16 +++++------
 19 files changed, 51 insertions(+), 51 deletions(-)

diff --git a/dbms/src/Common/PoolWithFailoverBase.h b/dbms/src/Common/PoolWithFailoverBase.h
index 34d74f50354..0478bece49c 100644
--- a/dbms/src/Common/PoolWithFailoverBase.h
+++ b/dbms/src/Common/PoolWithFailoverBase.h
@@ -350,7 +350,7 @@ PoolWithFailoverBase<TNestedPool>::updatePoolStates()
         for (auto & state : shared_pool_states)
             state.randomize();
 
-        time_t current_time = time(0);
+        time_t current_time = time(nullptr);
 
         if (last_error_decrease_time)
         {
diff --git a/dbms/src/Common/ZooKeeper/tests/zkutil_expiration_test.cpp b/dbms/src/Common/ZooKeeper/tests/zkutil_expiration_test.cpp
index b612e60172c..4ee2b147447 100644
--- a/dbms/src/Common/ZooKeeper/tests/zkutil_expiration_test.cpp
+++ b/dbms/src/Common/ZooKeeper/tests/zkutil_expiration_test.cpp
@@ -27,7 +27,7 @@ int main(int argc, char ** argv)
 
         std::cerr << "Please run `./nozk.sh && sleep 40s && ./yeszk.sh`" << std::endl;
 
-        time_t time0 = time(0);
+        time_t time0 = time(nullptr);
 
         while (true)
         {
diff --git a/dbms/src/DataStreams/IProfilingBlockInputStream.cpp b/dbms/src/DataStreams/IProfilingBlockInputStream.cpp
index 01490e895c7..2c3e6603bf1 100644
--- a/dbms/src/DataStreams/IProfilingBlockInputStream.cpp
+++ b/dbms/src/DataStreams/IProfilingBlockInputStream.cpp
@@ -203,7 +203,7 @@ void IProfilingBlockInputStream::checkQuota(Block & block)
 
         case LIMITS_CURRENT:
         {
-            time_t current_time = time(0);
+            time_t current_time = time(nullptr);
             double total_elapsed = info.total_stopwatch.elapsedSeconds();
 
             quota->checkAndAddResultRowsBytes(current_time, block.rows(), block.bytes());
diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp
index 9ea3a2e85da..782d73974cc 100644
--- a/dbms/src/Interpreters/executeQuery.cpp
+++ b/dbms/src/Interpreters/executeQuery.cpp
@@ -128,7 +128,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
     QueryProcessingStage::Enum stage)
 {
     ProfileEvents::increment(ProfileEvents::Query);
-    time_t current_time = time(0);
+    time_t current_time = time(nullptr);
 
     const Settings & settings = context.getSettingsRef();
 
@@ -253,7 +253,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
 
                 elem.type = QueryLogElement::QUERY_FINISH;
 
-                elem.event_time = time(0);
+                elem.event_time = time(nullptr);
                 elem.query_duration_ms = elapsed_seconds * 1000;
 
                 elem.read_rows = process_list_elem->progress_in.rows;
@@ -305,7 +305,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
 
                 elem.type = QueryLogElement::EXCEPTION_WHILE_PROCESSING;
 
-                elem.event_time = time(0);
+                elem.event_time = time(nullptr);
                 elem.query_duration_ms = 1000 * (elem.event_time - elem.query_start_time);
                 elem.exception = getCurrentExceptionMessage(false);
 
diff --git a/dbms/src/Server/Client.cpp b/dbms/src/Server/Client.cpp
index 8ef9241550d..11839e96090 100644
--- a/dbms/src/Server/Client.cpp
+++ b/dbms/src/Server/Client.cpp
@@ -249,7 +249,7 @@ private:
     /// Should we celebrate a bit?
     bool isNewYearMode()
     {
-        time_t current_time = time(0);
+        time_t current_time = time(nullptr);
 
         /// It's bad to be intrusive.
         if (current_time % 3 != 0)
diff --git a/dbms/src/Storages/MergeTree/DataPartsExchange.cpp b/dbms/src/Storages/MergeTree/DataPartsExchange.cpp
index 380583369fe..a9164a36214 100644
--- a/dbms/src/Storages/MergeTree/DataPartsExchange.cpp
+++ b/dbms/src/Storages/MergeTree/DataPartsExchange.cpp
@@ -265,7 +265,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPartImpl(
     assertEOF(in);
 
     ActiveDataPartSet::parsePartName(part_name, *new_data_part);
-    new_data_part->modification_time = time(0);
+    new_data_part->modification_time = time(nullptr);
     new_data_part->loadColumns(true);
     new_data_part->loadChecksums(true);
     new_data_part->loadIndex();
diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
index ad649119111..6efe3336dbe 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
@@ -483,7 +483,7 @@ void MergeTreeData::clearOldTemporaryDirectories(ssize_t custom_directories_life
     if (!lock.try_lock())
         return;
 
-    time_t current_time = time(0);
+    time_t current_time = time(nullptr);
     ssize_t deadline = (custom_directories_lifetime_seconds >= 0)
         ? current_time - custom_directories_lifetime_seconds
         : current_time - settings.temporary_directories_lifetime;
@@ -522,12 +522,12 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts()
     if (!lock.try_lock())
         return res;
 
-    time_t now = time(0);
+    time_t now = time(nullptr);
 
     {
-        std::lock_guard<std::mutex> lock(all_data_parts_mutex);
+        std::lock_guard<std::mutex> lock_all_parts(all_data_parts_mutex);
 
-        for (DataParts::iterator it = all_data_parts.begin(); it != all_data_parts.end();)
+        for (auto it = all_data_parts.begin(); it != all_data_parts.end();)
         {
             if (it->unique() && /// After this ref_count cannot increase.
                 (*it)->remove_time < now &&
@@ -1047,7 +1047,7 @@ MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart(
     DataPart::Checksums new_checksums = part->checksums;
     for (auto it : transaction->rename_map)
     {
-        if (it.second == "")
+        if (it.second.empty())
             new_checksums.files.erase(it.first);
         else
             new_checksums.files[it.second] = add_checksums.files[it.first];
@@ -1110,7 +1110,7 @@ void MergeTreeData::AlterDataPartTransaction::commit()
                 Poco::File{path + it.first}.renameTo(path + it.second);
         }
 
-        DataPart & mutable_part = const_cast<DataPart &>(*data_part);
+        auto & mutable_part = const_cast<DataPart &>(*data_part);
         mutable_part.checksums = new_checksums;
         mutable_part.columns = new_columns;
 
@@ -1231,7 +1231,7 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace(
 
         /// Parts contained in the part are consecutive in data_parts, intersecting the insertion place
         /// for the part itself.
-        DataParts::iterator it = data_parts.lower_bound(part);
+        auto it = data_parts.lower_bound(part);
         /// Go to the left.
         while (it != data_parts.begin())
         {
@@ -1244,7 +1244,7 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace(
                 break;
             }
             replaced.push_back(*it);
-            (*it)->remove_time = time(0);
+            (*it)->remove_time = time(nullptr);
             removePartContributionToColumnSizes(*it);
             data_parts.erase(it++); /// Yes, ++, not --.
         }
@@ -1259,7 +1259,7 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace(
                 break;
             }
             replaced.push_back(*it);
-            (*it)->remove_time = time(0);
+            (*it)->remove_time = time(nullptr);
             removePartContributionToColumnSizes(*it);
             data_parts.erase(it++);
         }
@@ -1267,7 +1267,7 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace(
         if (obsolete)
         {
             LOG_WARNING(log, "Obsolete part " << part->name << " added");
-            part->remove_time = time(0);
+            part->remove_time = time(nullptr);
         }
         else
         {
@@ -1488,7 +1488,7 @@ MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(const String &
     std::lock_guard<std::mutex> lock(data_parts_mutex);
 
     /// The part can be covered only by the previous or the next one in data_parts.
-    DataParts::iterator it = data_parts.lower_bound(tmp_part);
+    auto it = data_parts.lower_bound(tmp_part);
 
     if (it != data_parts.end())
     {
@@ -1514,7 +1514,7 @@ MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_na
     ActiveDataPartSet::parsePartName(part_name, *tmp_part);
 
     std::lock_guard<std::mutex> lock(all_data_parts_mutex);
-    DataParts::iterator it = all_data_parts.lower_bound(tmp_part);
+    auto it = all_data_parts.lower_bound(tmp_part);
     if (it != all_data_parts.end() && (*it)->name == part_name)
         return *it;
 
@@ -1527,8 +1527,8 @@ MergeTreeData::DataPartPtr MergeTreeData::getShardedPartIfExists(const String &
 
     if (part_from_shard->name == part_name)
         return part_from_shard;
-    else
-        return nullptr;
+
+    return nullptr;
 }
 
 MergeTreeData::MutableDataPartPtr MergeTreeData::loadPartAndFixMetadata(const String & relative_path)
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp
index e7b9ad90c5f..aeff72dbab7 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp
@@ -262,7 +262,7 @@ bool MergeTreeDataMerger::selectAllPartsToMergeWithinPartition(
     /// Enough disk space to cover the new merge with a margin.
     if (available_disk_space <= sum_bytes * DISK_USAGE_COEFFICIENT_TO_SELECT)
     {
-        time_t now = time(0);
+        time_t now = time(nullptr);
         if (now - disk_space_warning_time > 3600)
         {
             disk_space_warning_time = now;
@@ -758,7 +758,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMerger::mergePartsToTemporaryPart
         throw Exception("Empty part after merge", ErrorCodes::LOGICAL_ERROR);
 
     new_data_part->size = to.marksCount();
-    new_data_part->modification_time = time(0);
+    new_data_part->modification_time = time(nullptr);
     new_data_part->size_in_bytes = MergeTreeData::DataPart::calcTotalSize(new_part_tmp_path);
     new_data_part->is_sharded = false;
 
@@ -1080,7 +1080,7 @@ MergeTreeData::PerShardDataParts MergeTreeDataMerger::reshardPartition(
         data_part->checksums = output_stream->writeSuffixAndGetChecksums();
         data_part->index.swap(output_stream->getIndex());
         data_part->size = output_stream->marksCount();
-        data_part->modification_time = time(0);
+        data_part->modification_time = time(nullptr);
         data_part->size_in_bytes = MergeTreeData::DataPart::calcTotalSize(output_stream->getPartPath());
         data_part->is_sharded = true;
         data_part->shard_no = shard_no;
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index dff3d22f168..0817e4a8e71 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -163,7 +163,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithDa
     new_data_part->right = temp_index;
     new_data_part->level = 0;
     new_data_part->size = part_size;
-    new_data_part->modification_time = time(0);
+    new_data_part->modification_time = time(nullptr);
     new_data_part->month = min_month;
     new_data_part->columns = columns;
     new_data_part->checksums = checksums;
diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp
index 173a1443c96..94c6579d7a2 100644
--- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp
+++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp
@@ -182,7 +182,7 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo
 
     StorageReplicatedMergeTree::LogEntry log_entry;
     log_entry.type = StorageReplicatedMergeTree::LogEntry::GET_PART;
-    log_entry.create_time = time(0);
+    log_entry.create_time = time(nullptr);
     log_entry.source_replica = storage.replica_name;
     log_entry.new_part_name = part_name;
     log_entry.quorum = quorum;
diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
index 6958cb7cd69..d377c50f00b 100644
--- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
+++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
@@ -305,7 +305,7 @@ void ReplicatedMergeTreePartCheckThread::run()
     {
         try
         {
-            time_t current_time = time(0);
+            time_t current_time = time(nullptr);
 
             /// Take part from the queue for verification.
             PartsToCheckQueue::iterator selected = parts_queue.end();    /// end from std::list is not get invalidated
diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
index 721d5dad046..3e8d3c0c887 100644
--- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@@ -341,7 +341,7 @@ bool ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, z
                     insertUnlocked(copied_entries[i]);
                 }
 
-                last_queue_update = time(0);
+                last_queue_update = time(nullptr);
             }
             catch (...)
             {
@@ -644,7 +644,7 @@ ReplicatedMergeTreeQueue::CurrentlyExecuting::CurrentlyExecuting(ReplicatedMerge
 {
     entry->currently_executing = true;
     ++entry->num_tries;
-    entry->last_attempt_time = time(0);
+    entry->last_attempt_time = time(nullptr);
 
     if (!queue.future_parts.insert(entry->new_part_name).second)
         throw Exception("Tagging already tagged future part " + entry->new_part_name + ". This is a bug.", ErrorCodes::LOGICAL_ERROR);
@@ -708,7 +708,7 @@ ReplicatedMergeTreeQueue::SelectedEntry ReplicatedMergeTreeQueue::selectEntryToP
         else
         {
             ++(*it)->num_postponed;
-            (*it)->last_postpone_time = time(0);
+            (*it)->last_postpone_time = time(nullptr);
         }
     }
 
diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp
index 2f6ac26726f..db05fd937b9 100644
--- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp
+++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp
@@ -112,7 +112,7 @@ void ReplicatedMergeTreeRestartingThread::run()
                 first_time = false;
             }
 
-            time_t current_time = time(0);
+            time_t current_time = time(nullptr);
             if (current_time >= prev_time_of_check_delay + static_cast<time_t>(storage.data.settings.check_delay_period))
             {
                 /// Find out lag of replicas.
diff --git a/dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp b/dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp
index 55b48108278..175b261fafa 100644
--- a/dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp
+++ b/dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp
@@ -105,7 +105,7 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & body
     assertEOF(body);
 
     ActiveDataPartSet::parsePartName(part_name, *data_part);
-    data_part->modification_time = time(0);
+    data_part->modification_time = time(nullptr);
     data_part->loadColumns(true);
     data_part->loadChecksums(true);
     data_part->loadIndex();
diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp
index 9298bca5b12..3b531e90fb8 100644
--- a/dbms/src/Storages/StorageBuffer.cpp
+++ b/dbms/src/Storages/StorageBuffer.cpp
@@ -296,7 +296,7 @@ private:
 
     void insertIntoBuffer(const Block & block, StorageBuffer::Buffer & buffer, std::unique_lock<std::mutex> && lock)
     {
-        time_t current_time = time(0);
+        time_t current_time = time(nullptr);
 
         /// Sort the columns in the block. This is necessary to make it easier to concatenate the blocks later.
         Block sorted_block = block.sortColumns();
@@ -434,7 +434,7 @@ void StorageBuffer::flushAllBuffers(const bool check_thresholds)
 void StorageBuffer::flushBuffer(Buffer & buffer, bool check_thresholds)
 {
     Block block_to_write;
-    time_t current_time = time(0);
+    time_t current_time = time(nullptr);
 
     size_t rows = 0;
     size_t bytes = 0;
diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp
index c9962f894a2..bc1ad69b13a 100644
--- a/dbms/src/Storages/StorageMergeTree.cpp
+++ b/dbms/src/Storages/StorageMergeTree.cpp
@@ -343,7 +343,7 @@ bool StorageMergeTree::merge(
     if (auto part_log = context.getPartLog(database_name, table_name))
     {
         PartLogElement elem;
-        elem.event_time = time(0);
+        elem.event_time = time(nullptr);
 
         elem.merged_from.reserve(merging_tagger->parts.size());
         for (const auto & part : merging_tagger->parts)
diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
index c320db083cd..ea3532893e4 100644
--- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1120,7 +1120,7 @@ bool StorageReplicatedMergeTree::executeLogEntry(const LogEntry & entry)
                 if (auto part_log = context.getPartLog(database_name, table_name))
                 {
                     PartLogElement elem;
-                    elem.event_time = time(0);
+                    elem.event_time = time(nullptr);
 
                     elem.merged_from.reserve(parts.size());
                     for (const auto & part : parts)
@@ -1843,7 +1843,7 @@ bool StorageReplicatedMergeTree::createLogEntryToMergeParts(
     entry.source_replica = replica_name;
     entry.new_part_name = merged_name;
     entry.deduplicate = deduplicate;
-    entry.create_time = time(0);
+    entry.create_time = time(nullptr);
 
     for (const auto & part : parts)
         entry.parts_to_merge.push_back(part->name);
@@ -2147,7 +2147,7 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Strin
         if (auto part_log = context.getPartLog(database_name, table_name))
         {
             PartLogElement elem;
-            elem.event_time = time(0);
+            elem.event_time = time(nullptr);
             elem.event_type = PartLogElement::DOWNLOAD_PART;
             elem.size_in_bytes = part->size_in_bytes;
             elem.duration_ms = stopwatch.elapsed() / 10000000;
@@ -2650,7 +2650,7 @@ void StorageReplicatedMergeTree::clearColumnInPartition(
     entry.type = LogEntry::CLEAR_COLUMN;
     entry.new_part_name = fake_part_name;
     entry.column_name = column_name.safeGet<String>();
-    entry.create_time = time(0);
+    entry.create_time = time(nullptr);
 
     String log_znode_path = getZooKeeper()->create(zookeeper_path + "/log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential);
     entry.znode_name = log_znode_path.substr(log_znode_path.find_last_of('/') + 1);
@@ -2700,7 +2700,7 @@ void StorageReplicatedMergeTree::dropPartition(const ASTPtr & query, const Field
     entry.source_replica = replica_name;
     entry.new_part_name = fake_part_name;
     entry.detach = detach;
-    entry.create_time = time(0);
+    entry.create_time = time(nullptr);
 
     String log_znode_path = getZooKeeper()->create(zookeeper_path + "/log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential);
     entry.znode_name = log_znode_path.substr(log_znode_path.find_last_of('/') + 1);
diff --git a/dbms/src/Storages/StorageTrivialBuffer.cpp b/dbms/src/Storages/StorageTrivialBuffer.cpp
index ee403aa62d6..bccde1673ad 100644
--- a/dbms/src/Storages/StorageTrivialBuffer.cpp
+++ b/dbms/src/Storages/StorageTrivialBuffer.cpp
@@ -191,7 +191,7 @@ void StorageTrivialBuffer::addBlock(const Block & block, DeduplicationController
 void StorageTrivialBuffer::flush(bool check_thresholds, bool is_called_from_background)
 {
     Block block_to_write;
-    time_t current_time = time(0);
+    time_t current_time = time(nullptr);
 
     time_t time_passed = 0;
 
@@ -316,7 +316,7 @@ public:
             }
         }
 
-        time_t current_time = time(0);
+        time_t current_time = time(nullptr);
         if (buffer.checkThresholds(current_time, rows, bytes))
         {
             /** We'll try to flush the buffer if thresholds are overdrafted.
diff --git a/libs/libcommon/include/common/logger_useful.h b/libs/libcommon/include/common/logger_useful.h
index c21eca30fda..7ecd6f3bcd2 100644
--- a/libs/libcommon/include/common/logger_useful.h
+++ b/libs/libcommon/include/common/logger_useful.h
@@ -17,46 +17,46 @@ using Poco::Logger;
     if ((logger)->trace()) {\
     std::stringstream oss_internal_rare;    \
     oss_internal_rare << message; \
-    (logger)->trace(oss_internal_rare.str());}} while(0)
+    (logger)->trace(oss_internal_rare.str());}} while(false)
 
 #define LOG_DEBUG(logger, message) do { \
     if ((logger)->debug()) {\
     std::stringstream oss_internal_rare;    \
     oss_internal_rare << message; \
-    (logger)->debug(oss_internal_rare.str());}} while(0)
+    (logger)->debug(oss_internal_rare.str());}} while(false)
 
 #define LOG_INFO(logger, message) do { \
     if ((logger)->information()) {\
     std::stringstream oss_internal_rare;    \
     oss_internal_rare << message; \
-    (logger)->information(oss_internal_rare.str());}} while(0)
+    (logger)->information(oss_internal_rare.str());}} while(false)
 
 #define LOG_NOTICE(logger, message) do { \
     if ((logger)->notice()) {\
     std::stringstream oss_internal_rare;    \
     oss_internal_rare << message; \
-    (logger)->notice(oss_internal_rare.str());}} while(0)
+    (logger)->notice(oss_internal_rare.str());}} while(false)
 
 #define LOG_WARNING(logger, message) do { \
     if ((logger)->warning()) {\
     std::stringstream oss_internal_rare;    \
     oss_internal_rare << message; \
-    (logger)->warning(oss_internal_rare.str());}} while(0)
+    (logger)->warning(oss_internal_rare.str());}} while(false)
 
 #define LOG_ERROR(logger, message) do { \
     if ((logger)->error()) {\
     std::stringstream oss_internal_rare;    \
     oss_internal_rare << message; \
-    (logger)->error(oss_internal_rare.str());}} while(0)
+    (logger)->error(oss_internal_rare.str());}} while(false)
 
 #define LOG_CRITICAL(logger, message) do { \
     if ((logger)->critical()) {\
     std::stringstream oss_internal_rare;    \
     oss_internal_rare << message; \
-    (logger)->critical(oss_internal_rare.str());}} while(0)
+    (logger)->critical(oss_internal_rare.str());}} while(false)
 
 #define LOG_FATAL(logger, message) do { \
     if ((logger)->fatal()) {\
     std::stringstream oss_internal_rare;    \
     oss_internal_rare << message; \
-    (logger)->fatal(oss_internal_rare.str());}} while(0)
+    (logger)->fatal(oss_internal_rare.str());}} while(false)

From 515a0f061f9aa4f0f34af70af013ffcd719f68c8 Mon Sep 17 00:00:00 2001
From: Ivan Blinkov <ivan@blinkov.ru>
Date: Fri, 4 Aug 2017 17:52:08 +0300
Subject: [PATCH 038/281] Work in progress on CLICKHOUSE-2720 & CLICKHOUSE-3067
 (#1046)

* update presentations

* CLICKHOUSE-2936: redirect from clickhouse.yandex.ru and clickhouse.yandex.com

* update submodule

* lost files

* CLICKHOUSE-2981: prefer sphinx docs over original reference

* CLICKHOUSE-2981: docs styles more similar to main website + add flags to switch language links

* update presentations

* Less confusing directory structure (docs -> doc/reference/)

* Minify sphinx docs too

* Website release script: fail fast + pass docker hash on deploy

* Do not underline links in docs

* shorter

* cleanup docker images

* tune nginx config

* CLICKHOUSE-3043: get rid of habrastorage links

* Lost translation

* CLICKHOUSE-2936: temporary client-side redirect

* behaves weird in test

* put redirect back

* CLICKHOUSE-3047: copy docs txts to public too

* move to proper file

* remove old pages to avoid confusion

* Remove reference redirect warning for now

* Refresh README.md

* Yellow buttons in docs

* Use svg flags instead of unicode ones in docs

* fix test website instance

* Put flags to separate files

* wrong flag

* Copy Yandex.Metrica introduction from main page to docs

* Yet another home page structure change, couple new blocks (CLICKHOUSE-3045)

* Update Contacts section

* CLICKHOUSE-2849: more detailed legal information

* CLICKHOUSE-2978 preparation - split by files

* More changes in Contacts block

* Tune texts on index page

* update presentations

* One more benchmark

* Add usage sections to index page, adapted from slides

* Get the roadmap started, based on slides from last ClickHouse Meetup

* CLICKHOUSE-2977: some rendering tuning

* Get rid of excessive section in the end of getting started

* Make headers linkable

* CLICKHOUSE-2981: links to editing reference - https://github.com/yandex/ClickHouse/issues/849

* CLICKHOUSE-2981: fix mobile styles in docs

* Ban crawling of duplicating docs

* Open some external links in new tab

* Ban old docs too

* Lots of trivial fixes in english docs

* Lots of trivial fixes in russian docs

* Remove getting started copies in markdown

* Add Yandex.Webmaster

* Fix some sphinx warnings

* More warnings fixed in english docs

* More sphinx warnings fixed

* Add code-block:: text

* More code-block:: text

* These headers look not that well

* Better switch between documentation languages

* merge use_case.rst into ya_metrika_task.rst

* Edit the agg_functions.rst texts

* Add lost empty lines

* Lost blank lines

* Add new logo sizes

* update presentations

* Next step in migrating to new documentation

* Fix all warnings in en reference

* Fix all warnings in ru reference

* Re-arrange existing reference

* Move operation tips to main reference

* Fix typos noticed by milovidov@

* Get rid of zookeeper.md

* Looks like duplicate of tutorial.html

* Fix some mess with html tags in tutorial

* No idea why nobody noticed this before, but it was completely not clear whet to get the data

* Match code block styling between main and tutorial pages (in favor of the latter)

* Get rid of some copypaste in tutorial

* Normalize header styles

* Move example_datasets to sphinx

* Move presentations submodule to website

* Move and update README.md

* No point in duplicating articles from habrahabr here

* Move development-related docs as is for now

* doc/reference/ -> docs/ (to match the URL on website)

* Adapt links to match the previous commit

* Adapt development docs to rst (still lacks translation and strikethrough support)

* clean on release

* blacklist presentations in gulp

* strikethrough support in sphinx

* just copy development folder for now

* fix weird introduction in style article

* Style guide translation (WIP)

* Finish style guide translation to English

* gulp clean separately

* Update year in LICENSE

* Initial CONTRIBUTING.md

* Fix remaining links to old docs in tutorial

* Some tutorial fixes

* Typo

* Another typo

* Update list of authors from yandex-team accoding to git log

* Fix diff with master

* couple fixes in en what_is_clickhouse.rst

* Try different link to blog in Russian

* Swap words

* Slightly larger line height

* CLICKHOUSE-3089: disable hyphenation in docs

* update presentations

* Fix copying of txt files

* update submodule

* CLICKHOUSE-3108: fix overflow issues in mobile version

* Less weird tutorial header in mobile version

* CLICKHOUSE-3073: skip sourcemaps by default

* CLICKHOUSE-3067: rename item in docs navigation

* fix list markup

* CLICKHOUSE-3067: some documentation style tuning

* CLICKHOUSE-3067: less laggy single page documentation
---
 docs/Makefile                              |  4 +-
 docs/_singlehtml_templates/navigation.html | 10 ----
 docs/_static/custom.css                    | 55 ++++++++++++++++++++--
 docs/_templates/layout.html                | 10 +++-
 docs/_templates/navigation.html            |  8 ++++
 docs/_templates/page.html                  |  6 +++
 docs/_templates/search.html                | 50 ++++++++++++++++++++
 docs/_templates/searchbox.html             | 11 +++++
 docs/en/conf.py                            |  9 +++-
 docs/en/query_language/queries.rst         |  1 +
 docs/ru/conf.py                            |  9 +++-
 docs/ru/query_language/queries.rst         |  1 +
 website/gulpfile.js                        |  8 +++-
 website/index.css                          | 32 ++++++++++---
 website/tutorial.html                      |  2 +-
 15 files changed, 185 insertions(+), 31 deletions(-)
 delete mode 100644 docs/_singlehtml_templates/navigation.html
 create mode 100644 docs/_templates/navigation.html
 create mode 100644 docs/_templates/page.html
 create mode 100644 docs/_templates/search.html
 create mode 100644 docs/_templates/searchbox.html

diff --git a/docs/Makefile b/docs/Makefile
index 95802e63d01..285b71a53b6 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -24,8 +24,8 @@ I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) ru
 default:
 	$(SPHINXBUILD) -b html $(SPHINXOPTS) ru $(BUILDDIR)/docs/ru
 	$(SPHINXBUILD) -b html $(SPHINXOPTS) en $(BUILDDIR)/docs/en
-	SPHINX_TEMPLATES=../_singlehtml_templates $(SPHINXBUILD) -b singlehtml $(SPHINXOPTS) ru $(BUILDDIR)/docs/ru/single
-	SPHINX_TEMPLATES=../_singlehtml_templates $(SPHINXBUILD) -b singlehtml $(SPHINXOPTS) en $(BUILDDIR)/docs/en/single
+	$(SPHINXBUILD) -b singlehtml $(SPHINXOPTS) ru $(BUILDDIR)/docs/ru/single
+	$(SPHINXBUILD) -b singlehtml $(SPHINXOPTS) en $(BUILDDIR)/docs/en/single
 
 help:
 	@echo "Please use \`make <target>' where <target> is one of"
diff --git a/docs/_singlehtml_templates/navigation.html b/docs/_singlehtml_templates/navigation.html
deleted file mode 100644
index f98fc4161f7..00000000000
--- a/docs/_singlehtml_templates/navigation.html
+++ /dev/null
@@ -1,10 +0,0 @@
-<h3>{{ _('Navigation') }}</h3>
-{{ toctree(includehidden=theme_sidebar_includehidden, collapse=False) }}
-{% if theme_extra_nav_links %}
-<hr />
-<ul>
-    {% for text, uri in theme_extra_nav_links.items() %}
-        <li class="toctree-l1"><a href="{{ uri }}">{{ text }}</a></li>
-            {% endfor %}
-            </ul>
-{% endif %}
diff --git a/docs/_static/custom.css b/docs/_static/custom.css
index e9e25250dcd..11b6c05856a 100644
--- a/docs/_static/custom.css
+++ b/docs/_static/custom.css
@@ -1,23 +1,65 @@
-div.document, div.footer {
+div.document {
+	margin: 20px auto 0 auto;
+}
+div.document,
+div.footer {
 	width: 1240px;
 }
 div.sphinxsidebarwrapper {
 	padding: 2px 10px;
 }
-div.body dd, div.body li, div.body p {
+div.body p {
     line-height: 1.6em;
 }
-div.body blockquote, div.body dd, div.body li, div.body p {
+div.body dd,
+div.body li {
+	line-height: 1.2em;
+	margin-bottom: 0.4em;
+}
+div.body blockquote,
+div.body dd,
+div.body li,
+div.body p {
     -moz-hyphens: none;
     -ms-hyphens: none;
     -webkit-hyphens: none;
     hyphens: none;
 }
+input {
+	padding: 2px;
+}
+div.sphinxsidebar #searchbox input[type="text"] {
+	width: 190px;
+}
+div.sphinxsidebar #searchbox input[type="submit"],
+#search-form input[type="submit"] {
+	padding: 4px 8px;
+}
+#searchbox, #extra-nav-links {
+	margin: 20px 0;
+}
+ol ol ul,
+ol ul ul,
+ul ol ul,
+ul ul ul,
+ul ul,
+ol ul,
+div.sphinxsidebar ul ul,
+div.sphinxsidebar ul.want-points
+{
+	list-style-type: disc;
+}
+blockquote {
+	line-height: 1.4em;
+	border-left: 4px solid #ccc;
+	margin: 0;
+	padding: 0 0 0 20px;
+}
 
 div.sphinxsidebarwrapper p.logo {
     float: left;
     text-align: left;
-    margin: -4px 4px 0 0;
+    margin: -2px 6px 0 0;
 }
 
 div.sphinxsidebar a {
@@ -26,7 +68,10 @@ div.sphinxsidebar a {
 }
 
 pre {
-    padding: 4px;
+    padding: 6px;
+}
+code {
+	padding: 2px;
 }
 
 input {
diff --git a/docs/_templates/layout.html b/docs/_templates/layout.html
index 0decfc367b1..a79653125d5 100644
--- a/docs/_templates/layout.html
+++ b/docs/_templates/layout.html
@@ -72,7 +72,7 @@
                     clickmap:true,
                     trackLinks:true,
                     accurateTrackBounce:true,
-                    webvisor:true
+                    webvisor: {% if builder != "singlehtml" %}true{% else %}false{% endif %}
                 });
             } catch(e) { }
         });
@@ -93,5 +93,11 @@
     <div><img src="https://mc.yandex.ru/watch/18343495" style="position:absolute; left:-9999px;" alt=""/></div>
 </noscript>
 <!-- /Yandex.Metrika counter -->
-
+{% if builder == "singlehtml" %}
+<script type="text/javascript" async="async">
+    $(function() {
+        $(".body").html({{ body|escapejs }});
+    });
+</script>
+{% endif %}
 {%- endblock %}
diff --git a/docs/_templates/navigation.html b/docs/_templates/navigation.html
new file mode 100644
index 00000000000..6a2deb8eb57
--- /dev/null
+++ b/docs/_templates/navigation.html
@@ -0,0 +1,8 @@
+{% if theme_extra_nav_links %}
+<ul id="extra-nav-links">
+    {% for text, uri in theme_extra_nav_links.items() %}
+    <li class="toctree-l1"><a href="{{ uri }}">{{ text }}</a></li>
+    {% endfor %}
+</ul>
+{% endif %}
+{{ toctree(includehidden=theme_sidebar_includehidden, collapse=builder != "singlehtml") }}
diff --git a/docs/_templates/page.html b/docs/_templates/page.html
new file mode 100644
index 00000000000..ca1d158efa7
--- /dev/null
+++ b/docs/_templates/page.html
@@ -0,0 +1,6 @@
+{%- extends "layout.html" %}
+{% block body %}
+{% if builder != "singlehtml" %}
+{{ body }}
+{% endif %}
+{% endblock %}
diff --git a/docs/_templates/search.html b/docs/_templates/search.html
new file mode 100644
index 00000000000..458afd627bc
--- /dev/null
+++ b/docs/_templates/search.html
@@ -0,0 +1,50 @@
+{%- extends "layout.html" %}
+{% set title = _('Search') %}
+{% set script_files = script_files + ['_static/searchtools.js'] %}
+{% block extrahead %}
+  <script type="text/javascript">
+    jQuery(function() { Search.loadIndex("{{ pathto('searchindex.js', 1) }}"); });
+  </script>
+  {# this is used when loading the search index using $.ajax fails,
+     such as on Chrome for documents on localhost #}
+  <script type="text/javascript" id="searchindexloader"></script>
+  {{ super() }}
+{% endblock %}
+{% block body %}
+  <h1 id="search-documentation">{{ _('Search') }}</h1>
+  <div id="fallback" class="admonition warning">
+  <script type="text/javascript">$('#fallback').hide();</script>
+  <p>
+    {% trans %}Please activate JavaScript to enable the search
+    functionality.{% endtrans %}
+  </p>
+  </div>
+  <p>
+    {% trans %}From here you can search these documents. Enter your search
+    words into the box below and click "search". Note that the search
+    function will automatically search for all of the words. Pages
+    containing fewer words won't appear in the result list.{% endtrans %}
+  </p>
+  <form id="search-form" action="" method="get">
+    <input type="text" name="q" value="" />
+    <input type="submit" value="{{ _('Search') }}" />
+    <span id="search-progress" style="padding-left: 10px"></span>
+  </form>
+  {% if search_performed %}
+    <h2>{{ _('Search Results') }}</h2>
+    {% if not search_results %}
+      <p>{{ _('Your search did not match any documents. Please make sure that all words are spelled correctly and that you\'ve selected enough categories.') }}</p>
+    {% endif %}
+  {% endif %}
+  <div id="search-results">
+  {% if search_results %}
+    <ul>
+    {% for href, caption, context in search_results %}
+      <li><a href="{{ pathto(item.href) }}">{{ caption }}</a>
+        <div class="context">{{ context|e }}</div>
+      </li>
+    {% endfor %}
+    </ul>
+  {% endif %}
+  </div>
+{% endblock %}
diff --git a/docs/_templates/searchbox.html b/docs/_templates/searchbox.html
new file mode 100644
index 00000000000..ec3564f6960
--- /dev/null
+++ b/docs/_templates/searchbox.html
@@ -0,0 +1,11 @@
+{%- if pagename != "search" and builder != "singlehtml" %}
+<div id="searchbox" style="display: none" role="search">
+    <form class="search" action="{{ pathto('search') }}" method="get">
+      <div><input type="text" name="q" placeholder="{{ _('Quick search') }}" /></div>
+      <div><input type="submit" value="{{ _('Go') }}" /></div>
+      <input type="hidden" name="check_keywords" value="yes" />
+      <input type="hidden" name="area" value="default" />
+    </form>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+{%- endif %}
diff --git a/docs/en/conf.py b/docs/en/conf.py
index 5750d284d24..c58139ce779 100644
--- a/docs/en/conf.py
+++ b/docs/en/conf.py
@@ -128,7 +128,7 @@ html_theme_options = {
         ('Switch to Russian <img id="svg-flag" src="/docs/en/_static/ru.svg" width="20" height="12" />', '#ru'),
         ('Single page documentation', '/docs/en/single/'),
         ('Website home', '/'),
-        ('ClickHouse repository', 'https://github.com/yandex/ClickHouse'),
+        ('Source code', 'https://github.com/yandex/ClickHouse'),
         ('Edit this page', '#edit'),
     ])
 }
@@ -174,9 +174,9 @@ html_static_path = ['../_static']
 html_sidebars = {
     '**': [
         'about.html',
+        'searchbox.html',
         'navigation.html',
         'relations.html',
-        'searchbox.html',
     ]
 }
 
@@ -292,5 +292,10 @@ texinfo_documents = [
 # If true, do not generate a @detailmenu in the "Top" node's menu.
 #texinfo_no_detailmenu = False
 
+def add_filters(app):
+    import json
+    app.builder.templates.environment.filters[str('escapejs')] = lambda x: json.dumps(unicode(x))
+
 def setup(app):
     app.add_javascript('custom.js')
+    app.connect(str('builder-inited'), add_filters)
diff --git a/docs/en/query_language/queries.rst b/docs/en/query_language/queries.rst
index 25d526358f0..17896c5f6fa 100644
--- a/docs/en/query_language/queries.rst
+++ b/docs/en/query_language/queries.rst
@@ -373,6 +373,7 @@ As an alternative, you can manually copy data from the ``/var/lib/clickhouse/dat
 ``ALTER TABLE ... FREEZE PARTITION`` only copies data, not table metadata. To make a backup of table metadata, copy the file  ``/var/lib/clickhouse/metadata/database/table.sql``
 
 To restore from a backup:
+
 * Use the CREATE query to create the table if it doesn't exist. The query can be taken from an .sql file (replace ATTACH in it with CREATE).
 * Copy data from the ``data/database/table/`` directory inside the backup to the ``/var/lib/clickhouse/data/database/table/detached/`` directory.
 * Run ``ALTER TABLE ... ATTACH PARTITION YYYYMM``queries where ``YYYYMM`` is the month, for every month.
diff --git a/docs/ru/conf.py b/docs/ru/conf.py
index 97e513a29af..ec57feedd43 100644
--- a/docs/ru/conf.py
+++ b/docs/ru/conf.py
@@ -128,7 +128,7 @@ html_theme_options = {
         ('Switch to English <img id="svg-flag" src="/docs/ru/_static/en.svg" width="20" height="12" />', '#en'),
         ('Документация на одной странице', '/docs/ru/single/'),
         ('Главная страница сайта', '/'),
-        ('Репозиторий ClickHouse', 'https://github.com/yandex/ClickHouse'),
+        ('Исходный код', 'https://github.com/yandex/ClickHouse'),
         ('Редактировать страницу', '#edit'),
     ])
 }
@@ -174,9 +174,9 @@ html_static_path = ['../_static']
 html_sidebars = {
     '**': [
         'about.html',
+        'searchbox.html',
         'navigation.html',
         'relations.html',
-        'searchbox.html',
     ]
 }
 
@@ -292,5 +292,10 @@ texinfo_documents = [
 # If true, do not generate a @detailmenu in the "Top" node's menu.
 #texinfo_no_detailmenu = False
 
+def add_filters(app):
+    import json
+    app.builder.templates.environment.filters[str('escapejs')] = lambda x: json.dumps(unicode(x))
+
 def setup(app):
     app.add_javascript('custom.js')
+    app.connect(str('builder-inited'), add_filters)
diff --git a/docs/ru/query_language/queries.rst b/docs/ru/query_language/queries.rst
index 90fbbf45d22..7a54620765a 100644
--- a/docs/ru/query_language/queries.rst
+++ b/docs/ru/query_language/queries.rst
@@ -379,6 +379,7 @@ ALTER
 ``ALTER TABLE ... FREEZE PARTITION`` копирует только данные, но не метаданные таблицы. Чтобы сделать бэкап метаданных таблицы, скопируйте файл  ``/var/lib/clickhouse/metadata/database/table.sql``
 
 Для восстановления из бэкапа:
+
  * создайте таблицу, если её нет, с помощью запроса CREATE. Запрос можно взять из .sql файла (замените в нём ``ATTACH`` на ``CREATE``);
  * скопируйте данные из директории data/database/table/ внутри бэкапа в директорию ``/var/lib/clickhouse/data/database/table/detached/``
  * выполните запросы ``ALTER TABLE ... ATTACH PARTITION YYYYMM``, где ``YYYYMM`` - месяц, для каждого месяца.
diff --git a/website/gulpfile.js b/website/gulpfile.js
index d46a6ed76f0..a47e286e433 100644
--- a/website/gulpfile.js
+++ b/website/gulpfile.js
@@ -82,7 +82,7 @@ gulp.task('htmls', ['docs', 'docstxt'], function () {
         .pipe(gulp.dest(outputDir))
 });
 
-gulp.task('scripts', ['docs'], function () {
+gulp.task('sourcemaps', ['docs'], function () {
     return gulp.src(paths.scripts)
         .pipe(sourcemaps.init())
         .pipe(uglify())
@@ -90,6 +90,12 @@ gulp.task('scripts', ['docs'], function () {
         .pipe(gulp.dest(outputDir))
 });
 
+gulp.task('scripts', ['docs'], function () {
+    return gulp.src(paths.scripts)
+        .pipe(uglify())
+        .pipe(gulp.dest(outputDir))
+});
+
 gulp.task('styles', ['docs'], function () {
     return gulp.src(paths.styles)
         .pipe(cleanCss({inline: ['none']}))
diff --git a/website/index.css b/website/index.css
index 859acab3b9a..bd419701f44 100644
--- a/website/index.css
+++ b/website/index.css
@@ -244,12 +244,6 @@ a:hover, a:active {
     color: #ededed;
 }
 
-#ubuntu-install {
-    overflow: auto;
-    overflow-y: hidden;
-    -ms-overflow-y: hidden;
-}
-
 #footer {
     text-align: right;
     padding: 8px 0 0 0;
@@ -266,6 +260,7 @@ pre {
     border-left: 5px solid #ffdb4d;
     padding: 5px 10px;
     background-color: #fff8e8;
+    overflow: scroll;
 }
 
 ul {
@@ -287,6 +282,10 @@ ul.dashed > li:before {
     text-indent: 1em;
 }
 
+img {
+    width: 100%;
+}
+
 .warranty {
     margin-top: 6em;
     font-size: 50%;
@@ -306,6 +305,13 @@ ul.dashed > li:before {
     border-bottom: 1px solid #08f;
 }
 
+#tutorial_logo {
+    float: left;
+    margin-right: -100%;
+    margin-top: 34px;
+    margin-left: 3px;
+}
+
 #tutorial_title {
     font: normal 100px 'Yandex Sans Display Web', Arial, sans-serif;
     margin-top: 25px;
@@ -460,4 +466,18 @@ ul.dashed > li:before {
         width: 100%;
         float: none;
     }
+
+    #tutorial_logo, #tutorial_title, #tutorial_subtitle {
+        float: none;
+        text-align: left;
+        margin: 10px 0;
+        padding: 0;
+    }
+
+    #tutorial_title {
+        font-size: 75px;
+    }
+
+
+
 }
diff --git a/website/tutorial.html b/website/tutorial.html
index b75c0ffe5c9..a9ce64bc355 100644
--- a/website/tutorial.html
+++ b/website/tutorial.html
@@ -17,7 +17,7 @@
 <div class="page">
 
     <div>
-        <div style="float: left; margin-right: -100%; margin-top: 34px; margin-left: 3px;">
+        <div id="tutorial_logo">
             <a href="/">
                 <svg xmlns="http://www.w3.org/2000/svg" width="90" height="80" viewBox="0 0 9 8">
                     <path class="red" d="M0,7 h1 v1 h-1 z"></path>

From b9d12ae2345c88e426fd0f6a6b9527f42c514f1f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 5 Aug 2017 02:16:43 +0300
Subject: [PATCH 039/281] Translated comments [#CLICKHOUSE-3].

---
 libs/libcommon/include/common/LocalDate.h     | 19 +++---
 libs/libcommon/include/common/LocalDateTime.h | 61 ++++++++++---------
 2 files changed, 41 insertions(+), 39 deletions(-)

diff --git a/libs/libcommon/include/common/LocalDate.h b/libs/libcommon/include/common/LocalDate.h
index 0083439b610..dc6904ba7e6 100644
--- a/libs/libcommon/include/common/LocalDate.h
+++ b/libs/libcommon/include/common/LocalDate.h
@@ -7,15 +7,16 @@
 #include <common/DateLUT.h>
 
 
-/** Хранит дату в broken-down виде.
-  * Может быть инициализирован из даты в текстовом виде '2011-01-01' и из time_t.
-  * Может быть инициализирован из даты в текстовом виде '20110101... (используются первые 8 символов)
-  * Неявно преобразуется в time_t.
-  * Сериализуется в ostream в текстовом виде.
-  * Внимание: преобразование в unix timestamp и обратно производится в текущей тайм-зоне!
-  * При переводе стрелок назад, возникает неоднозначность - преобразование производится в меньшее значение.
+/** Stores a calendar date in broken-down form (year, month, day-in-month).
+  * Could be initialized from date in text form, like '2011-01-01' or from time_t with rounding to date.
+  * Also could be initialized from date in text form like '20110101... (only first 8 symbols are used).
+  * Could be implicitly casted to time_t.
+  * NOTE: Transforming between time_t and LocalDate is done in local time zone!
   *
-  * packed - для memcmp (из-за того, что m_year - 2 байта, little endian, работает корректно только до 2047 года)
+  * When local time was shifted backwards (due to daylight saving time or whatever reason)
+  *  - then to resolve the ambiguity of transforming to time_t, lowest of two possible values is selected.
+  *
+  * packed - for memcmp to work naturally (but because m_year is 2 bytes, on little endian, comparison is correct only before year 2047)
   */
 class __attribute__ ((__packed__)) LocalDate
 {
@@ -161,7 +162,7 @@ public:
         return !(*this == other);
     }
 
-    /// NOTE Неэффективно.
+    /// NOTE Inefficient.
     std::string toString(char separator = '-') const
     {
         std::stringstream ss;
diff --git a/libs/libcommon/include/common/LocalDateTime.h b/libs/libcommon/include/common/LocalDateTime.h
index 016f5c4c87d..67124385676 100644
--- a/libs/libcommon/include/common/LocalDateTime.h
+++ b/libs/libcommon/include/common/LocalDateTime.h
@@ -7,14 +7,15 @@
 #include <common/LocalDate.h>
 
 
-/** Хранит дату и время в broken-down виде.
-  * Может быть инициализирован из даты и времени в текстовом виде '2011-01-01 00:00:00' и из time_t.
-  * Неявно преобразуется в time_t.
-  * Сериализуется в ostream в текстовом виде.
-  * Внимание: преобразование в unix timestamp и обратно производится в текущей тайм-зоне!
-  * При переводе стрелок назад, возникает неоднозначность - преобразование производится в меньшее значение.
+/** Stores calendar date and time in broken-down form.
+  * Could be initialized from date and time in text form like '2011-01-01 00:00:00' or from time_t.
+  * Could be implicitly casted to time_t.
+  * NOTE: Transforming between time_t and LocalDate is done in local time zone!
   *
-  * packed - для memcmp (из-за того, что m_year - 2 байта, little endian, работает корректно только до 2047 года)
+  * When local time was shifted backwards (due to daylight saving time or whatever reason)
+  *  - then to resolve the ambiguity of transforming to time_t, lowest of two possible values is selected.
+  *
+  * packed - for memcmp to work naturally (but because m_year is 2 bytes, on little endian, comparison is correct only before year 2047)
   */
 class __attribute__ ((__packed__)) LocalDateTime
 {
@@ -30,12 +31,12 @@ private:
     {
         if (unlikely(time > DATE_LUT_MAX || time == 0))
         {
-            m_year         = 0;
-            m_month     = 0;
-            m_day         = 0;
-            m_hour         = 0;
-            m_minute     = 0;
-            m_second     = 0;
+            m_year = 0;
+            m_month = 0;
+            m_day = 0;
+            m_hour = 0;
+            m_minute = 0;
+            m_second = 0;
 
             return;
         }
@@ -124,19 +125,19 @@ public:
             : DateLUT::instance().makeDateTime(m_year, m_month, m_day, m_hour, m_minute, m_second);
     }
 
-    unsigned short year() const     { return m_year; }
-    unsigned char month() const     { return m_month; }
-    unsigned char day() const         { return m_day; }
-    unsigned char hour() const         { return m_hour; }
-    unsigned char minute() const     { return m_minute; }
-    unsigned char second() const     { return m_second; }
+    unsigned short year() const { return m_year; }
+    unsigned char month() const { return m_month; }
+    unsigned char day() const { return m_day; }
+    unsigned char hour() const { return m_hour; }
+    unsigned char minute() const { return m_minute; }
+    unsigned char second() const { return m_second; }
 
-    void year(unsigned short x)     { m_year = x; }
-    void month(unsigned char x)     { m_month = x; }
-    void day(unsigned char x)         { m_day = x; }
-    void hour(unsigned char x)         { m_hour = x; }
-    void minute(unsigned char x)     { m_minute = x; }
-    void second(unsigned char x)     { m_second = x; }
+    void year(unsigned short x) { m_year = x; }
+    void month(unsigned char x) { m_month = x; }
+    void day(unsigned char x) { m_day = x; }
+    void hour(unsigned char x) { m_hour = x; }
+    void minute(unsigned char x) { m_minute = x; }
+    void second(unsigned char x) { m_second = x; }
 
     LocalDate toDate() const { return LocalDate(m_year, m_month, m_day); }
 
@@ -177,11 +178,11 @@ inline std::ostream & operator<< (std::ostream & ostr, const LocalDateTime & dat
 {
     ostr << std::setfill('0') << std::setw(4) << datetime.year();
 
-    ostr << '-' << (datetime.month() / 10)     << (datetime.month() % 10)
-        << '-' << (datetime.day() / 10)     << (datetime.day() % 10)
-        << ' ' << (datetime.hour() / 10)     << (datetime.hour() % 10)
-        << ':' << (datetime.minute() / 10)     << (datetime.minute() % 10)
-        << ':' << (datetime.second() / 10)     << (datetime.second() % 10);
+    ostr << '-' << (datetime.month() / 10) << (datetime.month() % 10)
+        << '-' << (datetime.day() / 10) << (datetime.day() % 10)
+        << ' ' << (datetime.hour() / 10) << (datetime.hour() % 10)
+        << ':' << (datetime.minute() / 10) << (datetime.minute() % 10)
+        << ':' << (datetime.second() / 10) << (datetime.second() % 10);
 
     return ostr;
 }

From aecafc093165af659a7d491c6627e7ef81bc8532 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 5 Aug 2017 02:18:51 +0300
Subject: [PATCH 040/281] Fixed function if of FixedString arguments
 [#CLICKHOUSE-3202].

---
 dbms/src/Functions/FunctionsConditional.h | 146 +++++++++-------------
 dbms/src/Functions/GatherUtils.h          |  28 ++++-
 2 files changed, 89 insertions(+), 85 deletions(-)

diff --git a/dbms/src/Functions/FunctionsConditional.h b/dbms/src/Functions/FunctionsConditional.h
index 80cd1b4a867..678d88a3b79 100644
--- a/dbms/src/Functions/FunctionsConditional.h
+++ b/dbms/src/Functions/FunctionsConditional.h
@@ -17,6 +17,8 @@
 #include <Functions/FunctionHelpers.h>
 #include <DataTypes/NumberTraits.h>
 #include <DataTypes/DataTypeTraits.h>
+#include <Functions/GatherUtils.h>
+
 
 namespace DB
 {
@@ -1114,92 +1116,68 @@ private:
         const ColumnString * col_else = checkAndGetColumn<ColumnString>(col_else_untyped);
         const ColumnFixedString * col_then_fixed = checkAndGetColumn<ColumnFixedString>(col_then_untyped);
         const ColumnFixedString * col_else_fixed = checkAndGetColumn<ColumnFixedString>(col_else_untyped);
-        const ColumnConst * col_then_const = checkAndGetColumnConstStringOrFixedString(col_then_untyped);
-        const ColumnConst * col_else_const = checkAndGetColumnConstStringOrFixedString(col_else_untyped);
+        const ColumnConst * col_then_const = checkAndGetColumnConst<ColumnString>(col_then_untyped);
+        const ColumnConst * col_else_const = checkAndGetColumnConst<ColumnString>(col_else_untyped);
+        const ColumnConst * col_then_const_fixed = checkAndGetColumnConst<ColumnFixedString>(col_then_untyped);
+        const ColumnConst * col_else_const_fixed = checkAndGetColumnConst<ColumnFixedString>(col_else_untyped);
 
-        if ((col_then || col_then_const || col_then_fixed) && (col_else || col_else_const || col_else_fixed))
+        const PaddedPODArray<UInt8> & cond_data = cond_col->getData();
+        size_t rows = cond_data.size();
+
+        if ((col_then_fixed || col_then_const_fixed)
+            && (col_else_fixed || col_else_const_fixed))
         {
+            /// The result is FixedString.
+
+            auto col_res_untyped = col_then_untyped->cloneEmpty();
+            block.getByPosition(result).column = col_res_untyped;
+            ColumnFixedString * col_res = static_cast<ColumnFixedString *>(col_res_untyped.get());
+            auto sink = FixedStringSink(*col_res, rows);
+
             if (col_then_fixed && col_else_fixed)
-            {
-                /// The result is FixedString.
+                conditional(FixedStringSource(*col_then_fixed), FixedStringSource(*col_else_fixed), sink, cond_data);
+            else if (col_then_fixed && col_else_const_fixed)
+                conditional(FixedStringSource(*col_then_fixed), ConstSource<FixedStringSource>(*col_else_const_fixed), sink, cond_data);
+            else if (col_then_const_fixed && col_else_fixed)
+                conditional(ConstSource<FixedStringSource>(*col_then_const_fixed), FixedStringSource(*col_else_fixed), sink, cond_data);
+            else if (col_then_const_fixed && col_else_const_fixed)
+                conditional(ConstSource<FixedStringSource>(*col_then_const_fixed), ConstSource<FixedStringSource>(*col_else_const_fixed), sink, cond_data);
 
-                if (col_then_fixed->getN() != col_else_fixed->getN())
-                    throw Exception("FixedString columns as 'then' and 'else' arguments of function 'if' has different sizes", ErrorCodes::ILLEGAL_COLUMN);
+            return true;
+        }
 
-                size_t N = col_then_fixed->getN();
+        if ((col_then || col_then_const || col_then_fixed || col_then_const_fixed)
+            && (col_else || col_else_const || col_else_fixed || col_else_const_fixed))
+        {
+            /// The result is String.
+            std::shared_ptr<ColumnString> col_res = std::make_shared<ColumnString>();
+            block.getByPosition(result).column = col_res;
+            auto sink = StringSink(*col_res, rows);
 
-                auto col_res = std::make_shared<ColumnFixedString>(N);
-                block.getByPosition(result).column = col_res;
-
-                ColumnFixedString::Chars_t & res_vec = col_res->getChars();
-
-                StringIfImpl::vector_fixed_vector_fixed(
-                    cond_col->getData(),
-                    col_then_fixed->getChars(),
-                    col_else_fixed->getChars(),
-                    N,
-                    res_vec);
-            }
-            else
-            {
-                /// The result is String.
-                std::shared_ptr<ColumnString> col_res = std::make_shared<ColumnString>();
-                block.getByPosition(result).column = col_res;
-
-                ColumnString::Chars_t & res_vec = col_res->getChars();
-                ColumnString::Offsets_t & res_offsets = col_res->getOffsets();
-
-                if (col_then && col_else)
-                    StringIfImpl::vector_vector(
-                        cond_col->getData(),
-                        col_then->getChars(), col_then->getOffsets(),
-                        col_else->getChars(), col_else->getOffsets(),
-                        res_vec, res_offsets);
-                else if (col_then && col_else_const)
-                    StringIfImpl::vector_constant(
-                        cond_col->getData(),
-                        col_then->getChars(), col_then->getOffsets(),
-                        col_else_const->getValue<String>(),
-                        res_vec, res_offsets);
-                else if (col_then_const && col_else)
-                    StringIfImpl::constant_vector(
-                        cond_col->getData(),
-                        col_then_const->getValue<String>(),
-                        col_else->getChars(), col_else->getOffsets(),
-                        res_vec, res_offsets);
-                else if (col_then_const && col_else_const)
-                    StringIfImpl::constant_constant(
-                        cond_col->getData(),
-                        col_then_const->getValue<String>(),
-                        col_else_const->getValue<String>(),
-                        res_vec, res_offsets);
-                else if (col_then && col_else_fixed)
-                    StringIfImpl::vector_vector_fixed(
-                        cond_col->getData(),
-                        col_then->getChars(), col_then->getOffsets(),
-                        col_else_fixed->getChars(), col_else_fixed->getN(),
-                        res_vec, res_offsets);
-                else if (col_then_fixed && col_else)
-                    StringIfImpl::vector_fixed_vector(
-                        cond_col->getData(),
-                        col_then_fixed->getChars(), col_then_fixed->getN(),
-                        col_else->getChars(), col_else->getOffsets(),
-                        res_vec, res_offsets);
-                else if (col_then_const && col_else_fixed)
-                    StringIfImpl::constant_vector_fixed(
-                        cond_col->getData(),
-                        col_then_const->getValue<String>(),
-                        col_else_fixed->getChars(), col_else_fixed->getN(),
-                        res_vec, res_offsets);
-                else if (col_then_fixed && col_else_const)
-                    StringIfImpl::vector_fixed_constant(
-                        cond_col->getData(),
-                        col_then_fixed->getChars(), col_then_fixed->getN(),
-                        col_else_const->getValue<String>(),
-                        res_vec, res_offsets);
-                else
-                    return false;
-            }
+            if (col_then && col_else)
+                conditional(StringSource(*col_then), StringSource(*col_else), sink, cond_data);
+            else if (col_then && col_else_const)
+                conditional(StringSource(*col_then), ConstSource<StringSource>(*col_else_const), sink, cond_data);
+            else if (col_then_const && col_else)
+                conditional(ConstSource<StringSource>(*col_then_const), StringSource(*col_else), sink, cond_data);
+            else if (col_then_const && col_else_const)
+                conditional(ConstSource<StringSource>(*col_then_const), ConstSource<StringSource>(*col_else_const), sink, cond_data);
+            else if (col_then && col_else_fixed)
+                conditional(StringSource(*col_then), FixedStringSource(*col_else_fixed), sink, cond_data);
+            else if (col_then_fixed && col_else)
+                conditional(FixedStringSource(*col_then_fixed), StringSource(*col_else), sink, cond_data);
+            else if (col_then_const && col_else_fixed)
+                conditional(ConstSource<StringSource>(*col_then_const), FixedStringSource(*col_else_fixed), sink, cond_data);
+            else if (col_then_fixed && col_else_const)
+                conditional(FixedStringSource(*col_then_fixed), ConstSource<StringSource>(*col_else_const), sink, cond_data);
+            else if (col_then && col_else_const_fixed)
+                conditional(StringSource(*col_then), ConstSource<FixedStringSource>(*col_else_const_fixed), sink, cond_data);
+            else if (col_then_const_fixed && col_else)
+                conditional(ConstSource<FixedStringSource>(*col_then_const_fixed), StringSource(*col_else), sink, cond_data);
+            else if (col_then_const && col_else_const_fixed)
+                conditional(ConstSource<StringSource>(*col_then_const), ConstSource<FixedStringSource>(*col_else_const_fixed), sink, cond_data);
+            else if (col_then_const_fixed && col_else_const)
+                conditional(ConstSource<FixedStringSource>(*col_then_const_fixed), ConstSource<StringSource>(*col_else_const), sink, cond_data);
 
             return true;
         }
@@ -1224,25 +1202,25 @@ private:
 
             if (col_then_elements && col_else_elements)
                 StringArrayIfImpl::vector_vector(
-                    cond_col->getData(),
+                    cond_data,
                     col_then_elements->getChars(), col_then_elements->getOffsets(), col_arr_then->getOffsets(),
                     col_else_elements->getChars(), col_else_elements->getOffsets(), col_arr_else->getOffsets(),
                     res_chars, res_string_offsets, res_array_offsets);
             else if (col_then_elements && col_arr_else_const)
                 StringArrayIfImpl::vector_constant(
-                    cond_col->getData(),
+                    cond_data,
                     col_then_elements->getChars(), col_then_elements->getOffsets(), col_arr_then->getOffsets(),
                     col_arr_else_const->getValue<Array>(),
                     res_chars, res_string_offsets, res_array_offsets);
             else if (col_arr_then_const && col_else_elements)
                 StringArrayIfImpl::constant_vector(
-                    cond_col->getData(),
+                    cond_data,
                     col_arr_then_const->getValue<Array>(),
                     col_else_elements->getChars(), col_else_elements->getOffsets(), col_arr_else->getOffsets(),
                     res_chars, res_string_offsets, res_array_offsets);
             else if (col_arr_then_const && col_arr_else_const)
                 StringArrayIfImpl::constant_constant(
-                    cond_col->getData(),
+                    cond_data,
                     col_arr_then_const->getValue<Array>(),
                     col_arr_else_const->getValue<Array>(),
                     res_chars, res_string_offsets, res_array_offsets);
diff --git a/dbms/src/Functions/GatherUtils.h b/dbms/src/Functions/GatherUtils.h
index eff7b1e3eed..b4c9e6c0069 100644
--- a/dbms/src/Functions/GatherUtils.h
+++ b/dbms/src/Functions/GatherUtils.h
@@ -390,7 +390,7 @@ struct FixedStringSink
     ColumnString::Offset_t current_offset = 0;
 
     FixedStringSink(ColumnFixedString & col, size_t column_size)
-        : elements(col.getChars()), total_rows(column_size)
+        : elements(col.getChars()), string_size(col.getN()), total_rows(column_size)
     {
         elements.resize(column_size * string_size);
     }
@@ -589,6 +589,11 @@ inline void writeSlice(const StringSource::Slice & slice, StringSink & sink)
     sink.current_offset += slice.size;
 }
 
+inline void writeSlice(const StringSource::Slice & slice, FixedStringSink & sink)
+{
+    memcpySmallAllowReadWriteOverflow15(&sink.elements[sink.current_offset], slice.data, slice.size);
+}
+
 /// Assuming same types of underlying columns for slice and sink.
 inline void writeSlice(const GenericArraySlice & slice, GenericArraySink & sink)
 {
@@ -723,4 +728,25 @@ void sliceDynamicOffsetBounded(Source && src, Sink && sink, IColumn & offset_col
     }
 }
 
+
+template <typename SourceA, typename SourceB, typename Sink>
+void conditional(SourceA && src_a, SourceB && src_b, Sink && sink, const PaddedPODArray<UInt8> & condition)
+{
+    const UInt8 * cond_pos = &condition[0];
+    const UInt8 * cond_end = cond_pos + condition.size();
+
+    while (cond_pos < cond_end)
+    {
+        if (*cond_pos)
+            writeSlice(src_a.getWhole(), sink);
+        else
+            writeSlice(src_b.getWhole(), sink);
+
+        ++cond_pos;
+        src_a.next();
+        src_b.next();
+        sink.next();
+    }
+}
+
 }

From 0c0c8da4f830a266d720ffef17c1c19927302039 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 5 Aug 2017 04:43:29 +0300
Subject: [PATCH 041/281] Fixed function if of FixedString arguments
 [#CLICKHOUSE-3202].

---
 dbms/src/Functions/FunctionsConditional.h |  2 +-
 dbms/src/Functions/GatherUtils.h          | 34 +++++++++++------------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/dbms/src/Functions/FunctionsConditional.h b/dbms/src/Functions/FunctionsConditional.h
index 678d88a3b79..8955215b0a5 100644
--- a/dbms/src/Functions/FunctionsConditional.h
+++ b/dbms/src/Functions/FunctionsConditional.h
@@ -1129,7 +1129,7 @@ private:
         {
             /// The result is FixedString.
 
-            auto col_res_untyped = col_then_untyped->cloneEmpty();
+            auto col_res_untyped = block.getByPosition(result).type->createColumn();
             block.getByPosition(result).column = col_res_untyped;
             ColumnFixedString * col_res = static_cast<ColumnFixedString *>(col_res_untyped.get());
             auto sink = FixedStringSink(*col_res, rows);
diff --git a/dbms/src/Functions/GatherUtils.h b/dbms/src/Functions/GatherUtils.h
index b4c9e6c0069..91805713e29 100644
--- a/dbms/src/Functions/GatherUtils.h
+++ b/dbms/src/Functions/GatherUtils.h
@@ -316,28 +316,28 @@ struct FixedStringSource
     using Slice = NumericArraySlice<UInt8>;
     using Column = ColumnFixedString;
 
-    const typename ColumnString::Chars_t & elements;
+    const UInt8 * pos;
+    const UInt8 * end;
     size_t string_size;
-
     size_t row_num = 0;
-    size_t total_rows;
-
-    ColumnString::Offset_t prev_offset = 0;
 
     FixedStringSource(const ColumnFixedString & col)
-        : elements(col.getChars()), string_size(col.getN()), total_rows(col.size())
+        : string_size(col.getN())
     {
+        const auto & chars = col.getChars();
+        pos = chars.data();
+        end = pos + col.size();
     }
 
     void next()
     {
-        prev_offset += string_size;
+        pos += string_size;
         ++row_num;
     }
 
     bool isEnd() const
     {
-        return row_num == total_rows;
+        return pos == end;
     }
 
     size_t rowNum() const
@@ -347,35 +347,35 @@ struct FixedStringSource
 
     Slice getWhole() const
     {
-        return {&elements[prev_offset], string_size};
+        return {pos, string_size};
     }
 
     Slice getSliceFromLeft(size_t offset) const
     {
         if (offset >= string_size)
-            return {&elements[prev_offset], 0};
-        return {&elements[prev_offset + offset], string_size - offset};
+            return {pos, 0};
+        return {pos + offset, string_size - offset};
     }
 
     Slice getSliceFromLeft(size_t offset, size_t length) const
     {
         if (offset >= string_size)
-            return {&elements[prev_offset], 0};
-        return {&elements[prev_offset + offset], std::min(length, string_size - offset)};
+            return {pos, 0};
+        return {pos + offset, std::min(length, string_size - offset)};
     }
 
     Slice getSliceFromRight(size_t offset) const
     {
         if (offset >= string_size)
-            return {&elements[prev_offset], 0};
-        return {&elements[prev_offset + string_size - offset], offset};
+            return {pos, 0};
+        return {pos + string_size - offset, offset};
     }
 
     Slice getSliceFromRight(size_t offset, size_t length) const
     {
         if (offset >= string_size)
-            return {&elements[prev_offset], 0};
-        return {&elements[prev_offset + string_size - offset], std::min(length, offset)};
+            return {pos, 0};
+        return {pos + string_size - offset, std::min(length, offset)};
     }
 };
 

From 46486a5cba5bdaa6f7db7a26358df0b9a833ead4 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 5 Aug 2017 04:54:38 +0300
Subject: [PATCH 042/281] Fixed function if of FixedString arguments
 [#CLICKHOUSE-3202].

---
 dbms/src/Functions/FunctionsConditional.h | 257 ----------------------
 dbms/src/Functions/GatherUtils.h          |  12 +-
 2 files changed, 6 insertions(+), 263 deletions(-)

diff --git a/dbms/src/Functions/FunctionsConditional.h b/dbms/src/Functions/FunctionsConditional.h
index 8955215b0a5..5391cb053f3 100644
--- a/dbms/src/Functions/FunctionsConditional.h
+++ b/dbms/src/Functions/FunctionsConditional.h
@@ -140,263 +140,6 @@ public:
 };
 
 
-struct StringIfImpl
-{
-    static void vector_vector(
-        const PaddedPODArray<UInt8> & cond,
-        const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_offsets,
-        const ColumnString::Chars_t & b_data, const ColumnString::Offsets_t & b_offsets,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
-    {
-        size_t size = cond.size();
-        c_offsets.resize(size);
-        c_data.reserve(std::max(a_data.size(), b_data.size()));
-
-        ColumnString::Offset_t a_prev_offset = 0;
-        ColumnString::Offset_t b_prev_offset = 0;
-        ColumnString::Offset_t c_prev_offset = 0;
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            if (cond[i])
-            {
-                size_t size_to_write = a_offsets[i] - a_prev_offset;
-                c_data.resize(c_data.size() + size_to_write);
-                memcpySmallAllowReadWriteOverflow15(&c_data[c_prev_offset], &a_data[a_prev_offset], size_to_write);
-                c_prev_offset += size_to_write;
-                c_offsets[i] = c_prev_offset;
-            }
-            else
-            {
-                size_t size_to_write = b_offsets[i] - b_prev_offset;
-                c_data.resize(c_data.size() + size_to_write);
-                memcpySmallAllowReadWriteOverflow15(&c_data[c_prev_offset], &b_data[b_prev_offset], size_to_write);
-                c_prev_offset += size_to_write;
-                c_offsets[i] = c_prev_offset;
-            }
-
-            a_prev_offset = a_offsets[i];
-            b_prev_offset = b_offsets[i];
-        }
-    }
-
-    static void vector_fixed_vector_fixed(
-        const PaddedPODArray<UInt8> & cond,
-        const ColumnFixedString::Chars_t & a_data,
-        const ColumnFixedString::Chars_t & b_data,
-        const size_t N,
-        ColumnFixedString::Chars_t & c_data)
-    {
-        size_t size = cond.size();
-        c_data.resize(a_data.size());
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            if (cond[i])
-                memcpySmallAllowReadWriteOverflow15(&c_data[i * N], &a_data[i * N], N);
-            else
-                memcpySmallAllowReadWriteOverflow15(&c_data[i * N], &b_data[i * N], N);
-        }
-    }
-
-    template <bool negative>
-    static void vector_vector_fixed_impl(
-        const PaddedPODArray<UInt8> & cond,
-        const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_offsets,
-        const ColumnFixedString::Chars_t & b_data, const size_t b_N,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
-    {
-        size_t size = cond.size();
-        c_offsets.resize(size);
-        c_data.reserve(std::max(a_data.size(), b_data.size() + size));
-
-        ColumnString::Offset_t a_prev_offset = 0;
-        ColumnString::Offset_t c_prev_offset = 0;
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            if (negative != cond[i])
-            {
-                size_t size_to_write = a_offsets[i] - a_prev_offset;
-                c_data.resize(c_data.size() + size_to_write);
-                memcpySmallAllowReadWriteOverflow15(&c_data[c_prev_offset], &a_data[a_prev_offset], size_to_write);
-                c_prev_offset += size_to_write;
-                c_offsets[i] = c_prev_offset;
-            }
-            else
-            {
-                size_t size_to_write = b_N;
-                c_data.resize(c_data.size() + size_to_write + 1);
-                memcpySmallAllowReadWriteOverflow15(&c_data[c_prev_offset], &b_data[i * b_N], size_to_write);
-                c_data.back() = 0;
-                c_prev_offset += size_to_write + 1;
-                c_offsets[i] = c_prev_offset;
-            }
-
-            a_prev_offset = a_offsets[i];
-        }
-    }
-
-    static void vector_vector_fixed(
-        const PaddedPODArray<UInt8> & cond,
-        const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_offsets,
-        const ColumnFixedString::Chars_t & b_data, const size_t b_N,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
-    {
-        vector_vector_fixed_impl<false>(cond, a_data, a_offsets, b_data, b_N, c_data, c_offsets);
-    }
-
-    static void vector_fixed_vector(
-        const PaddedPODArray<UInt8> & cond,
-        const ColumnFixedString::Chars_t & a_data, const size_t a_N,
-        const ColumnString::Chars_t & b_data, const ColumnString::Offsets_t & b_offsets,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
-    {
-        vector_vector_fixed_impl<true>(cond, b_data, b_offsets, a_data, a_N, c_data, c_offsets);
-    }
-
-    template <bool negative>
-    static void vector_constant_impl(
-        const PaddedPODArray<UInt8> & cond,
-        const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_offsets,
-        const String & b,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
-    {
-        size_t size = cond.size();
-        c_offsets.resize(size);
-        c_data.reserve(a_data.size());
-
-        ColumnString::Offset_t a_prev_offset = 0;
-        ColumnString::Offset_t c_prev_offset = 0;
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            if (negative != cond[i])
-            {
-                size_t size_to_write = a_offsets[i] - a_prev_offset;
-                c_data.resize(c_data.size() + size_to_write);
-                memcpySmallAllowReadWriteOverflow15(&c_data[c_prev_offset], &a_data[a_prev_offset], size_to_write);
-                c_prev_offset += size_to_write;
-                c_offsets[i] = c_prev_offset;
-            }
-            else
-            {
-                size_t size_to_write = b.size() + 1;
-                c_data.resize(c_data.size() + size_to_write);
-                memcpy(&c_data[c_prev_offset], b.data(), size_to_write);
-                c_prev_offset += size_to_write;
-                c_offsets[i] = c_prev_offset;
-            }
-
-            a_prev_offset = a_offsets[i];
-        }
-    }
-
-    static void vector_constant(
-        const PaddedPODArray<UInt8> & cond,
-        const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_offsets,
-        const String & b,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
-    {
-        return vector_constant_impl<false>(cond, a_data, a_offsets, b, c_data, c_offsets);
-    }
-
-    static void constant_vector(
-        const PaddedPODArray<UInt8> & cond,
-        const String & a,
-        const ColumnString::Chars_t & b_data, const ColumnString::Offsets_t & b_offsets,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
-    {
-        return vector_constant_impl<true>(cond, b_data, b_offsets, a, c_data, c_offsets);
-    }
-
-    template <bool negative>
-    static void vector_fixed_constant_impl(
-        const PaddedPODArray<UInt8> & cond,
-        const ColumnFixedString::Chars_t & a_data, const size_t a_N,
-        const String & b,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
-    {
-        size_t size = cond.size();
-        c_offsets.resize(size);
-        c_data.reserve(a_data.size());
-
-        ColumnString::Offset_t c_prev_offset = 0;
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            if (negative != cond[i])
-            {
-                size_t size_to_write = a_N;
-                c_data.resize(c_data.size() + size_to_write + 1);
-                memcpySmallAllowReadWriteOverflow15(&c_data[c_prev_offset], &a_data[i * a_N], size_to_write);
-                c_data.back() = 0;
-                c_prev_offset += size_to_write + 1;
-                c_offsets[i] = c_prev_offset;
-            }
-            else
-            {
-                size_t size_to_write = b.size() + 1;
-                c_data.resize(c_data.size() + size_to_write);
-                memcpy(&c_data[c_prev_offset], b.data(), size_to_write);
-                c_prev_offset += size_to_write;
-                c_offsets[i] = c_prev_offset;
-            }
-        }
-    }
-
-    static void vector_fixed_constant(
-        const PaddedPODArray<UInt8> & cond,
-        const ColumnFixedString::Chars_t & a_data, const size_t N,
-        const String & b,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
-    {
-        vector_fixed_constant_impl<false>(cond, a_data, N, b, c_data, c_offsets);
-    }
-
-    static void constant_vector_fixed(
-        const PaddedPODArray<UInt8> & cond,
-        const String & a,
-        const ColumnFixedString::Chars_t & b_data, const size_t N,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
-    {
-        vector_fixed_constant_impl<true>(cond, b_data, N, a, c_data, c_offsets);
-    }
-
-    static void constant_constant(
-        const PaddedPODArray<UInt8> & cond,
-        const String & a, const String & b,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
-    {
-        size_t size = cond.size();
-        c_offsets.resize(size);
-        c_data.reserve((std::max(a.size(), b.size()) + 1) * size);
-
-        ColumnString::Offset_t c_prev_offset = 0;
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            if (cond[i])
-            {
-                size_t size_to_write = a.size() + 1;
-                c_data.resize(c_data.size() + size_to_write);
-                memcpy(&c_data[c_prev_offset], a.data(), size_to_write);
-                c_prev_offset += size_to_write;
-                c_offsets[i] = c_prev_offset;
-            }
-            else
-            {
-                size_t size_to_write = b.size() + 1;
-                c_data.resize(c_data.size() + size_to_write);
-                memcpy(&c_data[c_prev_offset], b.data(), size_to_write);
-                c_prev_offset += size_to_write;
-                c_offsets[i] = c_prev_offset;
-            }
-        }
-    }
-};
-
-
 template <typename A, typename B, typename ResultType>
 struct NumArrayIfImpl
 {
diff --git a/dbms/src/Functions/GatherUtils.h b/dbms/src/Functions/GatherUtils.h
index 91805713e29..22329fe3bd4 100644
--- a/dbms/src/Functions/GatherUtils.h
+++ b/dbms/src/Functions/GatherUtils.h
@@ -424,7 +424,7 @@ struct IStringSource
 };
 
 template <typename Impl>
-struct DynamicStringSource : IStringSource
+struct DynamicStringSource final : IStringSource
 {
     Impl impl;
 
@@ -564,7 +564,7 @@ struct GenericArraySink
 /// Methods to copy Slice to Sink, overloaded for various combinations of types.
 
 template <typename T>
-void writeSlice(const NumericArraySlice<T> & slice, NumericArraySink<T> & sink)
+void ALWAYS_INLINE writeSlice(const NumericArraySlice<T> & slice, NumericArraySink<T> & sink)
 {
     sink.elements.resize(sink.current_offset + slice.size);
     memcpySmallAllowReadWriteOverflow15(&sink.elements[sink.current_offset], slice.data, slice.size * sizeof(T));
@@ -572,7 +572,7 @@ void writeSlice(const NumericArraySlice<T> & slice, NumericArraySink<T> & sink)
 }
 
 template <typename T, typename U>
-void writeSlice(const NumericArraySlice<T> & slice, NumericArraySink<U> & sink)
+void ALWAYS_INLINE writeSlice(const NumericArraySlice<T> & slice, NumericArraySink<U> & sink)
 {
     sink.elements.resize(sink.current_offset + slice.size);
     for (size_t i = 0; i < slice.size; ++i)
@@ -582,20 +582,20 @@ void writeSlice(const NumericArraySlice<T> & slice, NumericArraySink<U> & sink)
     }
 }
 
-inline void writeSlice(const StringSource::Slice & slice, StringSink & sink)
+inline ALWAYS_INLINE void writeSlice(const StringSource::Slice & slice, StringSink & sink)
 {
     sink.elements.resize(sink.current_offset + slice.size);
     memcpySmallAllowReadWriteOverflow15(&sink.elements[sink.current_offset], slice.data, slice.size);
     sink.current_offset += slice.size;
 }
 
-inline void writeSlice(const StringSource::Slice & slice, FixedStringSink & sink)
+inline ALWAYS_INLINE void writeSlice(const StringSource::Slice & slice, FixedStringSink & sink)
 {
     memcpySmallAllowReadWriteOverflow15(&sink.elements[sink.current_offset], slice.data, slice.size);
 }
 
 /// Assuming same types of underlying columns for slice and sink.
-inline void writeSlice(const GenericArraySlice & slice, GenericArraySink & sink)
+inline ALWAYS_INLINE void writeSlice(const GenericArraySlice & slice, GenericArraySink & sink)
 {
     sink.elements.insertRangeFrom(*slice.elements, slice.begin, slice.size);
     sink.current_offset += slice.size;

From 39392a29617311ca399eac00b1f6ef32e057d1d4 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 5 Aug 2017 05:03:49 +0300
Subject: [PATCH 043/281] Fixed function if of FixedString arguments
 [#CLICKHOUSE-3202].

---
 dbms/src/Functions/GatherUtils.h | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/dbms/src/Functions/GatherUtils.h b/dbms/src/Functions/GatherUtils.h
index 22329fe3bd4..9f898e24e30 100644
--- a/dbms/src/Functions/GatherUtils.h
+++ b/dbms/src/Functions/GatherUtils.h
@@ -76,6 +76,12 @@ struct NumericArraySource
         return row_num;
     }
 
+    /// Get size for corresponding call or Sink::reserve to reserve memory for elements.
+    size_t getSizeForReserve() const
+    {
+        return elements.size();
+    }
+
     Slice getWhole() const
     {
         return {&elements[prev_offset], offsets[row_num] - prev_offset};
@@ -146,6 +152,11 @@ struct ConstSource
         return row_num;
     }
 
+    size_t getSizeForReserve() const
+    {
+        return total_rows * base.getSizeForReserve();
+    }
+
     Slice getWhole() const
     {
         return base.getWhole();
@@ -291,7 +302,7 @@ struct StringSink
         offsets.resize(column_size);
     }
 
-    void next()
+    void ALWAYS_INLINE next()
     {
         elements.push_back(0);
         ++current_offset;
@@ -605,7 +616,7 @@ inline ALWAYS_INLINE void writeSlice(const GenericArraySlice & slice, GenericArr
 /// Algorithms
 
 template <typename SourceA, typename SourceB, typename Sink>
-void concat(SourceA && src_a, SourceB && src_b, Sink && sink)
+void NO_INLINE concat(SourceA && src_a, SourceB && src_b, Sink && sink)
 {
     while (!src_a.isEnd())
     {
@@ -619,7 +630,7 @@ void concat(SourceA && src_a, SourceB && src_b, Sink && sink)
 }
 
 template <typename Sink>
-void concat(StringSources & sources, Sink && sink)
+void NO_INLINE concat(StringSources & sources, Sink && sink)
 {
     while (!sink.isEnd())
     {
@@ -634,7 +645,7 @@ void concat(StringSources & sources, Sink && sink)
 
 
 template <typename Source, typename Sink>
-void sliceFromLeftConstantOffsetUnbounded(Source && src, Sink && sink, size_t offset)
+void NO_INLINE sliceFromLeftConstantOffsetUnbounded(Source && src, Sink && sink, size_t offset)
 {
     while (!src.isEnd())
     {
@@ -645,7 +656,7 @@ void sliceFromLeftConstantOffsetUnbounded(Source && src, Sink && sink, size_t of
 }
 
 template <typename Source, typename Sink>
-void sliceFromLeftConstantOffsetBounded(Source && src, Sink && sink, size_t offset, size_t length)
+void NO_INLINE sliceFromLeftConstantOffsetBounded(Source && src, Sink && sink, size_t offset, size_t length)
 {
     while (!src.isEnd())
     {
@@ -656,7 +667,7 @@ void sliceFromLeftConstantOffsetBounded(Source && src, Sink && sink, size_t offs
 }
 
 template <typename Source, typename Sink>
-void sliceFromRightConstantOffsetUnbounded(Source && src, Sink && sink, size_t offset)
+void NO_INLINE sliceFromRightConstantOffsetUnbounded(Source && src, Sink && sink, size_t offset)
 {
     while (!src.isEnd())
     {
@@ -667,7 +678,7 @@ void sliceFromRightConstantOffsetUnbounded(Source && src, Sink && sink, size_t o
 }
 
 template <typename Source, typename Sink>
-void sliceFromRightConstantOffsetBounded(Source && src, Sink && sink, size_t offset, size_t length)
+void NO_INLINE sliceFromRightConstantOffsetBounded(Source && src, Sink && sink, size_t offset, size_t length)
 {
     while (!src.isEnd())
     {
@@ -679,7 +690,7 @@ void sliceFromRightConstantOffsetBounded(Source && src, Sink && sink, size_t off
 
 
 template <typename Source, typename Sink>
-void sliceDynamicOffsetUnbounded(Source && src, Sink && sink, IColumn & offset_column)
+void NO_INLINE sliceDynamicOffsetUnbounded(Source && src, Sink && sink, IColumn & offset_column)
 {
     while (!src.isEnd())
     {
@@ -703,7 +714,7 @@ void sliceDynamicOffsetUnbounded(Source && src, Sink && sink, IColumn & offset_c
 }
 
 template <typename Source, typename Sink>
-void sliceDynamicOffsetBounded(Source && src, Sink && sink, IColumn & offset_column, IColumn & length_column)
+void NO_INLINE sliceDynamicOffsetBounded(Source && src, Sink && sink, IColumn & offset_column, IColumn & length_column)
 {
     while (!src.isEnd())
     {
@@ -730,7 +741,7 @@ void sliceDynamicOffsetBounded(Source && src, Sink && sink, IColumn & offset_col
 
 
 template <typename SourceA, typename SourceB, typename Sink>
-void conditional(SourceA && src_a, SourceB && src_b, Sink && sink, const PaddedPODArray<UInt8> & condition)
+void NO_INLINE conditional(SourceA && src_a, SourceB && src_b, Sink && sink, const PaddedPODArray<UInt8> & condition)
 {
     const UInt8 * cond_pos = &condition[0];
     const UInt8 * cond_end = cond_pos + condition.size();

From 11da533de1c03f956086a26f4e0cea8655428d7e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 5 Aug 2017 05:13:38 +0300
Subject: [PATCH 044/281] Fixed function if of FixedString arguments
 [#CLICKHOUSE-3202].

---
 dbms/src/Functions/GatherUtils.h | 181 +++++++++++++++++++------------
 1 file changed, 111 insertions(+), 70 deletions(-)

diff --git a/dbms/src/Functions/GatherUtils.h b/dbms/src/Functions/GatherUtils.h
index 9f898e24e30..78d74f7828d 100644
--- a/dbms/src/Functions/GatherUtils.h
+++ b/dbms/src/Functions/GatherUtils.h
@@ -184,39 +184,6 @@ struct ConstSource
 };
 
 
-template <typename T>
-struct NumericArraySink
-{
-    typename ColumnVector<T>::Container_t & elements;
-    typename ColumnArray::Offsets_t & offsets;
-
-    size_t row_num = 0;
-    ColumnArray::Offset_t current_offset = 0;
-
-    NumericArraySink(ColumnArray & arr, size_t column_size)
-        : elements(typeid_cast<ColumnVector<T> &>(arr.getData()).getData()), offsets(arr.getOffsets())
-    {
-        offsets.resize(column_size);
-    }
-
-    void next()
-    {
-        offsets[row_num] = current_offset;
-        ++row_num;
-    }
-
-    bool isEnd() const
-    {
-        return row_num == offsets.size();
-    }
-
-    size_t rowNum() const
-    {
-        return row_num;
-    }
-};
-
-
 struct StringSource
 {
     using Slice = NumericArraySlice<UInt8>;
@@ -249,6 +216,11 @@ struct StringSource
         return row_num;
     }
 
+    size_t getSizeForReserve() const
+    {
+        return elements.size();
+    }
+
     Slice getWhole() const
     {
         return {&elements[prev_offset], offsets[row_num] - prev_offset - 1};
@@ -288,40 +260,6 @@ struct StringSource
 };
 
 
-struct StringSink
-{
-    typename ColumnString::Chars_t & elements;
-    typename ColumnString::Offsets_t & offsets;
-
-    size_t row_num = 0;
-    ColumnString::Offset_t current_offset = 0;
-
-    StringSink(ColumnString & col, size_t column_size)
-        : elements(col.getChars()), offsets(col.getOffsets())
-    {
-        offsets.resize(column_size);
-    }
-
-    void ALWAYS_INLINE next()
-    {
-        elements.push_back(0);
-        ++current_offset;
-        offsets[row_num] = current_offset;
-        ++row_num;
-    }
-
-    bool isEnd() const
-    {
-        return row_num == offsets.size();
-    }
-
-    size_t rowNum() const
-    {
-        return row_num;
-    }
-};
-
-
 struct FixedStringSource
 {
     using Slice = NumericArraySlice<UInt8>;
@@ -356,6 +294,11 @@ struct FixedStringSource
         return row_num;
     }
 
+    size_t getSizeForReserve() const
+    {
+        return end - pos;
+    }
+
     Slice getWhole() const
     {
         return {pos, string_size};
@@ -391,6 +334,83 @@ struct FixedStringSource
 };
 
 
+template <typename T>
+struct NumericArraySink
+{
+    typename ColumnVector<T>::Container_t & elements;
+    typename ColumnArray::Offsets_t & offsets;
+
+    size_t row_num = 0;
+    ColumnArray::Offset_t current_offset = 0;
+
+    NumericArraySink(ColumnArray & arr, size_t column_size)
+        : elements(typeid_cast<ColumnVector<T> &>(arr.getData()).getData()), offsets(arr.getOffsets())
+    {
+        offsets.resize(column_size);
+    }
+
+    void next()
+    {
+        offsets[row_num] = current_offset;
+        ++row_num;
+    }
+
+    bool isEnd() const
+    {
+        return row_num == offsets.size();
+    }
+
+    size_t rowNum() const
+    {
+        return row_num;
+    }
+
+    void reserve(size_t num_elements)
+    {
+        elements.reserve(num_elements);
+    }
+};
+
+
+struct StringSink
+{
+    typename ColumnString::Chars_t & elements;
+    typename ColumnString::Offsets_t & offsets;
+
+    size_t row_num = 0;
+    ColumnString::Offset_t current_offset = 0;
+
+    StringSink(ColumnString & col, size_t column_size)
+        : elements(col.getChars()), offsets(col.getOffsets())
+    {
+        offsets.resize(column_size);
+    }
+
+    void ALWAYS_INLINE next()
+    {
+        elements.push_back(0);
+        ++current_offset;
+        offsets[row_num] = current_offset;
+        ++row_num;
+    }
+
+    bool isEnd() const
+    {
+        return row_num == offsets.size();
+    }
+
+    size_t rowNum() const
+    {
+        return row_num;
+    }
+
+    void reserve(size_t num_elements)
+    {
+        elements.reserve(num_elements);
+    }
+};
+
+
 struct FixedStringSink
 {
     typename ColumnString::Chars_t & elements;
@@ -421,6 +441,11 @@ struct FixedStringSink
     {
         return row_num;
     }
+
+    void reserve(size_t num_elements)
+    {
+        elements.reserve(num_elements);
+    }
 };
 
 
@@ -430,6 +455,7 @@ struct IStringSource
 
     virtual void next() = 0;
     virtual bool isEnd() const = 0;
+    virtual size_t getSizeForReserve() const = 0;
     virtual Slice getWhole() const = 0;
     virtual ~IStringSource() {}
 };
@@ -441,9 +467,10 @@ struct DynamicStringSource final : IStringSource
 
     DynamicStringSource(const IColumn & col) : impl(static_cast<const typename Impl::Column &>(col)) {}
 
-    void next() override { impl.next(); };
-    bool isEnd() const override { return impl.isEnd(); };
-    Slice getWhole() const override { return impl.getWhole(); };
+    void next() override { impl.next(); }
+    bool isEnd() const override { return impl.isEnd(); }
+    size_t getSizeForReserve() const override { return impl.getSizeForReserve(); }
+    Slice getWhole() const override { return impl.getWhole(); }
 };
 
 inline std::unique_ptr<IStringSource> createDynamicStringSource(const IColumn & col)
@@ -502,6 +529,11 @@ struct GenericArraySource
         return row_num;
     }
 
+    size_t getSizeForReserve() const
+    {
+        return elements.size();
+    }
+
     Slice getWhole() const
     {
         return {&elements, prev_offset, offsets[row_num] - prev_offset};
@@ -569,6 +601,11 @@ struct GenericArraySink
     {
         return row_num;
     }
+
+    void reserve(size_t num_elements)
+    {
+        elements.reserve(num_elements);
+    }
 };
 
 
@@ -618,6 +655,8 @@ inline ALWAYS_INLINE void writeSlice(const GenericArraySlice & slice, GenericArr
 template <typename SourceA, typename SourceB, typename Sink>
 void NO_INLINE concat(SourceA && src_a, SourceB && src_b, Sink && sink)
 {
+    sink.reserve(src_a.getSizeForReserve() + src_b.getSizeForReserve());
+
     while (!src_a.isEnd())
     {
         writeSlice(src_a.getWhole(), sink);
@@ -743,6 +782,8 @@ void NO_INLINE sliceDynamicOffsetBounded(Source && src, Sink && sink, IColumn &
 template <typename SourceA, typename SourceB, typename Sink>
 void NO_INLINE conditional(SourceA && src_a, SourceB && src_b, Sink && sink, const PaddedPODArray<UInt8> & condition)
 {
+    sink.reserve(std::max(src_a.getSizeForReserve(), src_b.getSizeForReserve()));
+
     const UInt8 * cond_pos = &condition[0];
     const UInt8 * cond_end = cond_pos + condition.size();
 

From 57b2dbfa93cc33830af8eaa9df4239d966c5f518 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 5 Aug 2017 05:24:13 +0300
Subject: [PATCH 045/281] Added test [#CLICKHOUSE-3202].

---
 .../00486_if_fixed_string.reference           | 80 +++++++++++++++++++
 .../0_stateless/00486_if_fixed_string.sql     | 19 +++++
 2 files changed, 99 insertions(+)
 create mode 100644 dbms/tests/queries/0_stateless/00486_if_fixed_string.reference
 create mode 100644 dbms/tests/queries/0_stateless/00486_if_fixed_string.sql

diff --git a/dbms/tests/queries/0_stateless/00486_if_fixed_string.reference b/dbms/tests/queries/0_stateless/00486_if_fixed_string.reference
new file mode 100644
index 00000000000..34cd1f1cf8c
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00486_if_fixed_string.reference
@@ -0,0 +1,80 @@
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
diff --git a/dbms/tests/queries/0_stateless/00486_if_fixed_string.sql b/dbms/tests/queries/0_stateless/00486_if_fixed_string.sql
new file mode 100644
index 00000000000..f1ef7c441a3
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00486_if_fixed_string.sql
@@ -0,0 +1,19 @@
+SELECT number % 2 ? 'hello' : 'world' FROM system.numbers LIMIT 5;
+SELECT number % 2 ? materialize('hello') : 'world' FROM system.numbers LIMIT 5;
+SELECT number % 2 ? 'hello' : materialize('world') FROM system.numbers LIMIT 5;
+SELECT number % 2 ? materialize('hello') : materialize('world') FROM system.numbers LIMIT 5;
+
+SELECT number % 2 ? toFixedString('hello', 5) : 'world' FROM system.numbers LIMIT 5;
+SELECT number % 2 ? materialize(toFixedString('hello', 5)) : 'world' FROM system.numbers LIMIT 5;
+SELECT number % 2 ? toFixedString('hello', 5) : materialize('world') FROM system.numbers LIMIT 5;
+SELECT number % 2 ? materialize(toFixedString('hello', 5)) : materialize('world') FROM system.numbers LIMIT 5;
+
+SELECT number % 2 ? 'hello' : toFixedString('world', 5) FROM system.numbers LIMIT 5;
+SELECT number % 2 ? materialize('hello') : toFixedString('world', 5) FROM system.numbers LIMIT 5;
+SELECT number % 2 ? 'hello' : materialize(toFixedString('world', 5)) FROM system.numbers LIMIT 5;
+SELECT number % 2 ? materialize('hello') : materialize(toFixedString('world', 5)) FROM system.numbers LIMIT 5;
+
+SELECT number % 2 ? toFixedString('hello', 5) : toFixedString('world', 5) FROM system.numbers LIMIT 5;
+SELECT number % 2 ? materialize(toFixedString('hello', 5)) : toFixedString('world', 5) FROM system.numbers LIMIT 5;
+SELECT number % 2 ? toFixedString('hello', 5) : materialize(toFixedString('world', 5)) FROM system.numbers LIMIT 5;
+SELECT number % 2 ? materialize(toFixedString('hello', 5)) : materialize(toFixedString('world', 5)) FROM system.numbers LIMIT 5;

From 767d025fb35b29344b40e6e9a0e6080dfc274e00 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 5 Aug 2017 02:18:51 +0300
Subject: [PATCH 046/281] Fixed function if of FixedString arguments
 [#CLICKHOUSE-3202].

---
 dbms/src/Functions/FunctionsConditional.h | 146 +++++++++-------------
 dbms/src/Functions/GatherUtils.h          |  28 ++++-
 2 files changed, 89 insertions(+), 85 deletions(-)

diff --git a/dbms/src/Functions/FunctionsConditional.h b/dbms/src/Functions/FunctionsConditional.h
index 80cd1b4a867..678d88a3b79 100644
--- a/dbms/src/Functions/FunctionsConditional.h
+++ b/dbms/src/Functions/FunctionsConditional.h
@@ -17,6 +17,8 @@
 #include <Functions/FunctionHelpers.h>
 #include <DataTypes/NumberTraits.h>
 #include <DataTypes/DataTypeTraits.h>
+#include <Functions/GatherUtils.h>
+
 
 namespace DB
 {
@@ -1114,92 +1116,68 @@ private:
         const ColumnString * col_else = checkAndGetColumn<ColumnString>(col_else_untyped);
         const ColumnFixedString * col_then_fixed = checkAndGetColumn<ColumnFixedString>(col_then_untyped);
         const ColumnFixedString * col_else_fixed = checkAndGetColumn<ColumnFixedString>(col_else_untyped);
-        const ColumnConst * col_then_const = checkAndGetColumnConstStringOrFixedString(col_then_untyped);
-        const ColumnConst * col_else_const = checkAndGetColumnConstStringOrFixedString(col_else_untyped);
+        const ColumnConst * col_then_const = checkAndGetColumnConst<ColumnString>(col_then_untyped);
+        const ColumnConst * col_else_const = checkAndGetColumnConst<ColumnString>(col_else_untyped);
+        const ColumnConst * col_then_const_fixed = checkAndGetColumnConst<ColumnFixedString>(col_then_untyped);
+        const ColumnConst * col_else_const_fixed = checkAndGetColumnConst<ColumnFixedString>(col_else_untyped);
 
-        if ((col_then || col_then_const || col_then_fixed) && (col_else || col_else_const || col_else_fixed))
+        const PaddedPODArray<UInt8> & cond_data = cond_col->getData();
+        size_t rows = cond_data.size();
+
+        if ((col_then_fixed || col_then_const_fixed)
+            && (col_else_fixed || col_else_const_fixed))
         {
+            /// The result is FixedString.
+
+            auto col_res_untyped = col_then_untyped->cloneEmpty();
+            block.getByPosition(result).column = col_res_untyped;
+            ColumnFixedString * col_res = static_cast<ColumnFixedString *>(col_res_untyped.get());
+            auto sink = FixedStringSink(*col_res, rows);
+
             if (col_then_fixed && col_else_fixed)
-            {
-                /// The result is FixedString.
+                conditional(FixedStringSource(*col_then_fixed), FixedStringSource(*col_else_fixed), sink, cond_data);
+            else if (col_then_fixed && col_else_const_fixed)
+                conditional(FixedStringSource(*col_then_fixed), ConstSource<FixedStringSource>(*col_else_const_fixed), sink, cond_data);
+            else if (col_then_const_fixed && col_else_fixed)
+                conditional(ConstSource<FixedStringSource>(*col_then_const_fixed), FixedStringSource(*col_else_fixed), sink, cond_data);
+            else if (col_then_const_fixed && col_else_const_fixed)
+                conditional(ConstSource<FixedStringSource>(*col_then_const_fixed), ConstSource<FixedStringSource>(*col_else_const_fixed), sink, cond_data);
 
-                if (col_then_fixed->getN() != col_else_fixed->getN())
-                    throw Exception("FixedString columns as 'then' and 'else' arguments of function 'if' has different sizes", ErrorCodes::ILLEGAL_COLUMN);
+            return true;
+        }
 
-                size_t N = col_then_fixed->getN();
+        if ((col_then || col_then_const || col_then_fixed || col_then_const_fixed)
+            && (col_else || col_else_const || col_else_fixed || col_else_const_fixed))
+        {
+            /// The result is String.
+            std::shared_ptr<ColumnString> col_res = std::make_shared<ColumnString>();
+            block.getByPosition(result).column = col_res;
+            auto sink = StringSink(*col_res, rows);
 
-                auto col_res = std::make_shared<ColumnFixedString>(N);
-                block.getByPosition(result).column = col_res;
-
-                ColumnFixedString::Chars_t & res_vec = col_res->getChars();
-
-                StringIfImpl::vector_fixed_vector_fixed(
-                    cond_col->getData(),
-                    col_then_fixed->getChars(),
-                    col_else_fixed->getChars(),
-                    N,
-                    res_vec);
-            }
-            else
-            {
-                /// The result is String.
-                std::shared_ptr<ColumnString> col_res = std::make_shared<ColumnString>();
-                block.getByPosition(result).column = col_res;
-
-                ColumnString::Chars_t & res_vec = col_res->getChars();
-                ColumnString::Offsets_t & res_offsets = col_res->getOffsets();
-
-                if (col_then && col_else)
-                    StringIfImpl::vector_vector(
-                        cond_col->getData(),
-                        col_then->getChars(), col_then->getOffsets(),
-                        col_else->getChars(), col_else->getOffsets(),
-                        res_vec, res_offsets);
-                else if (col_then && col_else_const)
-                    StringIfImpl::vector_constant(
-                        cond_col->getData(),
-                        col_then->getChars(), col_then->getOffsets(),
-                        col_else_const->getValue<String>(),
-                        res_vec, res_offsets);
-                else if (col_then_const && col_else)
-                    StringIfImpl::constant_vector(
-                        cond_col->getData(),
-                        col_then_const->getValue<String>(),
-                        col_else->getChars(), col_else->getOffsets(),
-                        res_vec, res_offsets);
-                else if (col_then_const && col_else_const)
-                    StringIfImpl::constant_constant(
-                        cond_col->getData(),
-                        col_then_const->getValue<String>(),
-                        col_else_const->getValue<String>(),
-                        res_vec, res_offsets);
-                else if (col_then && col_else_fixed)
-                    StringIfImpl::vector_vector_fixed(
-                        cond_col->getData(),
-                        col_then->getChars(), col_then->getOffsets(),
-                        col_else_fixed->getChars(), col_else_fixed->getN(),
-                        res_vec, res_offsets);
-                else if (col_then_fixed && col_else)
-                    StringIfImpl::vector_fixed_vector(
-                        cond_col->getData(),
-                        col_then_fixed->getChars(), col_then_fixed->getN(),
-                        col_else->getChars(), col_else->getOffsets(),
-                        res_vec, res_offsets);
-                else if (col_then_const && col_else_fixed)
-                    StringIfImpl::constant_vector_fixed(
-                        cond_col->getData(),
-                        col_then_const->getValue<String>(),
-                        col_else_fixed->getChars(), col_else_fixed->getN(),
-                        res_vec, res_offsets);
-                else if (col_then_fixed && col_else_const)
-                    StringIfImpl::vector_fixed_constant(
-                        cond_col->getData(),
-                        col_then_fixed->getChars(), col_then_fixed->getN(),
-                        col_else_const->getValue<String>(),
-                        res_vec, res_offsets);
-                else
-                    return false;
-            }
+            if (col_then && col_else)
+                conditional(StringSource(*col_then), StringSource(*col_else), sink, cond_data);
+            else if (col_then && col_else_const)
+                conditional(StringSource(*col_then), ConstSource<StringSource>(*col_else_const), sink, cond_data);
+            else if (col_then_const && col_else)
+                conditional(ConstSource<StringSource>(*col_then_const), StringSource(*col_else), sink, cond_data);
+            else if (col_then_const && col_else_const)
+                conditional(ConstSource<StringSource>(*col_then_const), ConstSource<StringSource>(*col_else_const), sink, cond_data);
+            else if (col_then && col_else_fixed)
+                conditional(StringSource(*col_then), FixedStringSource(*col_else_fixed), sink, cond_data);
+            else if (col_then_fixed && col_else)
+                conditional(FixedStringSource(*col_then_fixed), StringSource(*col_else), sink, cond_data);
+            else if (col_then_const && col_else_fixed)
+                conditional(ConstSource<StringSource>(*col_then_const), FixedStringSource(*col_else_fixed), sink, cond_data);
+            else if (col_then_fixed && col_else_const)
+                conditional(FixedStringSource(*col_then_fixed), ConstSource<StringSource>(*col_else_const), sink, cond_data);
+            else if (col_then && col_else_const_fixed)
+                conditional(StringSource(*col_then), ConstSource<FixedStringSource>(*col_else_const_fixed), sink, cond_data);
+            else if (col_then_const_fixed && col_else)
+                conditional(ConstSource<FixedStringSource>(*col_then_const_fixed), StringSource(*col_else), sink, cond_data);
+            else if (col_then_const && col_else_const_fixed)
+                conditional(ConstSource<StringSource>(*col_then_const), ConstSource<FixedStringSource>(*col_else_const_fixed), sink, cond_data);
+            else if (col_then_const_fixed && col_else_const)
+                conditional(ConstSource<FixedStringSource>(*col_then_const_fixed), ConstSource<StringSource>(*col_else_const), sink, cond_data);
 
             return true;
         }
@@ -1224,25 +1202,25 @@ private:
 
             if (col_then_elements && col_else_elements)
                 StringArrayIfImpl::vector_vector(
-                    cond_col->getData(),
+                    cond_data,
                     col_then_elements->getChars(), col_then_elements->getOffsets(), col_arr_then->getOffsets(),
                     col_else_elements->getChars(), col_else_elements->getOffsets(), col_arr_else->getOffsets(),
                     res_chars, res_string_offsets, res_array_offsets);
             else if (col_then_elements && col_arr_else_const)
                 StringArrayIfImpl::vector_constant(
-                    cond_col->getData(),
+                    cond_data,
                     col_then_elements->getChars(), col_then_elements->getOffsets(), col_arr_then->getOffsets(),
                     col_arr_else_const->getValue<Array>(),
                     res_chars, res_string_offsets, res_array_offsets);
             else if (col_arr_then_const && col_else_elements)
                 StringArrayIfImpl::constant_vector(
-                    cond_col->getData(),
+                    cond_data,
                     col_arr_then_const->getValue<Array>(),
                     col_else_elements->getChars(), col_else_elements->getOffsets(), col_arr_else->getOffsets(),
                     res_chars, res_string_offsets, res_array_offsets);
             else if (col_arr_then_const && col_arr_else_const)
                 StringArrayIfImpl::constant_constant(
-                    cond_col->getData(),
+                    cond_data,
                     col_arr_then_const->getValue<Array>(),
                     col_arr_else_const->getValue<Array>(),
                     res_chars, res_string_offsets, res_array_offsets);
diff --git a/dbms/src/Functions/GatherUtils.h b/dbms/src/Functions/GatherUtils.h
index eff7b1e3eed..b4c9e6c0069 100644
--- a/dbms/src/Functions/GatherUtils.h
+++ b/dbms/src/Functions/GatherUtils.h
@@ -390,7 +390,7 @@ struct FixedStringSink
     ColumnString::Offset_t current_offset = 0;
 
     FixedStringSink(ColumnFixedString & col, size_t column_size)
-        : elements(col.getChars()), total_rows(column_size)
+        : elements(col.getChars()), string_size(col.getN()), total_rows(column_size)
     {
         elements.resize(column_size * string_size);
     }
@@ -589,6 +589,11 @@ inline void writeSlice(const StringSource::Slice & slice, StringSink & sink)
     sink.current_offset += slice.size;
 }
 
+inline void writeSlice(const StringSource::Slice & slice, FixedStringSink & sink)
+{
+    memcpySmallAllowReadWriteOverflow15(&sink.elements[sink.current_offset], slice.data, slice.size);
+}
+
 /// Assuming same types of underlying columns for slice and sink.
 inline void writeSlice(const GenericArraySlice & slice, GenericArraySink & sink)
 {
@@ -723,4 +728,25 @@ void sliceDynamicOffsetBounded(Source && src, Sink && sink, IColumn & offset_col
     }
 }
 
+
+template <typename SourceA, typename SourceB, typename Sink>
+void conditional(SourceA && src_a, SourceB && src_b, Sink && sink, const PaddedPODArray<UInt8> & condition)
+{
+    const UInt8 * cond_pos = &condition[0];
+    const UInt8 * cond_end = cond_pos + condition.size();
+
+    while (cond_pos < cond_end)
+    {
+        if (*cond_pos)
+            writeSlice(src_a.getWhole(), sink);
+        else
+            writeSlice(src_b.getWhole(), sink);
+
+        ++cond_pos;
+        src_a.next();
+        src_b.next();
+        sink.next();
+    }
+}
+
 }

From 0bb8b32cb92ac3d34f950a5b841f2f723bdae427 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 5 Aug 2017 04:43:29 +0300
Subject: [PATCH 047/281] Fixed function if of FixedString arguments
 [#CLICKHOUSE-3202].

---
 dbms/src/Functions/FunctionsConditional.h |  2 +-
 dbms/src/Functions/GatherUtils.h          | 34 +++++++++++------------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/dbms/src/Functions/FunctionsConditional.h b/dbms/src/Functions/FunctionsConditional.h
index 678d88a3b79..8955215b0a5 100644
--- a/dbms/src/Functions/FunctionsConditional.h
+++ b/dbms/src/Functions/FunctionsConditional.h
@@ -1129,7 +1129,7 @@ private:
         {
             /// The result is FixedString.
 
-            auto col_res_untyped = col_then_untyped->cloneEmpty();
+            auto col_res_untyped = block.getByPosition(result).type->createColumn();
             block.getByPosition(result).column = col_res_untyped;
             ColumnFixedString * col_res = static_cast<ColumnFixedString *>(col_res_untyped.get());
             auto sink = FixedStringSink(*col_res, rows);
diff --git a/dbms/src/Functions/GatherUtils.h b/dbms/src/Functions/GatherUtils.h
index b4c9e6c0069..91805713e29 100644
--- a/dbms/src/Functions/GatherUtils.h
+++ b/dbms/src/Functions/GatherUtils.h
@@ -316,28 +316,28 @@ struct FixedStringSource
     using Slice = NumericArraySlice<UInt8>;
     using Column = ColumnFixedString;
 
-    const typename ColumnString::Chars_t & elements;
+    const UInt8 * pos;
+    const UInt8 * end;
     size_t string_size;
-
     size_t row_num = 0;
-    size_t total_rows;
-
-    ColumnString::Offset_t prev_offset = 0;
 
     FixedStringSource(const ColumnFixedString & col)
-        : elements(col.getChars()), string_size(col.getN()), total_rows(col.size())
+        : string_size(col.getN())
     {
+        const auto & chars = col.getChars();
+        pos = chars.data();
+        end = pos + col.size();
     }
 
     void next()
     {
-        prev_offset += string_size;
+        pos += string_size;
         ++row_num;
     }
 
     bool isEnd() const
     {
-        return row_num == total_rows;
+        return pos == end;
     }
 
     size_t rowNum() const
@@ -347,35 +347,35 @@ struct FixedStringSource
 
     Slice getWhole() const
     {
-        return {&elements[prev_offset], string_size};
+        return {pos, string_size};
     }
 
     Slice getSliceFromLeft(size_t offset) const
     {
         if (offset >= string_size)
-            return {&elements[prev_offset], 0};
-        return {&elements[prev_offset + offset], string_size - offset};
+            return {pos, 0};
+        return {pos + offset, string_size - offset};
     }
 
     Slice getSliceFromLeft(size_t offset, size_t length) const
     {
         if (offset >= string_size)
-            return {&elements[prev_offset], 0};
-        return {&elements[prev_offset + offset], std::min(length, string_size - offset)};
+            return {pos, 0};
+        return {pos + offset, std::min(length, string_size - offset)};
     }
 
     Slice getSliceFromRight(size_t offset) const
     {
         if (offset >= string_size)
-            return {&elements[prev_offset], 0};
-        return {&elements[prev_offset + string_size - offset], offset};
+            return {pos, 0};
+        return {pos + string_size - offset, offset};
     }
 
     Slice getSliceFromRight(size_t offset, size_t length) const
     {
         if (offset >= string_size)
-            return {&elements[prev_offset], 0};
-        return {&elements[prev_offset + string_size - offset], std::min(length, offset)};
+            return {pos, 0};
+        return {pos + string_size - offset, std::min(length, offset)};
     }
 };
 

From dadf5ac4005e7116cdfe69f1c4b8415ceae222b3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 5 Aug 2017 04:54:38 +0300
Subject: [PATCH 048/281] Fixed function if of FixedString arguments
 [#CLICKHOUSE-3202].

---
 dbms/src/Functions/FunctionsConditional.h | 257 ----------------------
 dbms/src/Functions/GatherUtils.h          |  12 +-
 2 files changed, 6 insertions(+), 263 deletions(-)

diff --git a/dbms/src/Functions/FunctionsConditional.h b/dbms/src/Functions/FunctionsConditional.h
index 8955215b0a5..5391cb053f3 100644
--- a/dbms/src/Functions/FunctionsConditional.h
+++ b/dbms/src/Functions/FunctionsConditional.h
@@ -140,263 +140,6 @@ public:
 };
 
 
-struct StringIfImpl
-{
-    static void vector_vector(
-        const PaddedPODArray<UInt8> & cond,
-        const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_offsets,
-        const ColumnString::Chars_t & b_data, const ColumnString::Offsets_t & b_offsets,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
-    {
-        size_t size = cond.size();
-        c_offsets.resize(size);
-        c_data.reserve(std::max(a_data.size(), b_data.size()));
-
-        ColumnString::Offset_t a_prev_offset = 0;
-        ColumnString::Offset_t b_prev_offset = 0;
-        ColumnString::Offset_t c_prev_offset = 0;
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            if (cond[i])
-            {
-                size_t size_to_write = a_offsets[i] - a_prev_offset;
-                c_data.resize(c_data.size() + size_to_write);
-                memcpySmallAllowReadWriteOverflow15(&c_data[c_prev_offset], &a_data[a_prev_offset], size_to_write);
-                c_prev_offset += size_to_write;
-                c_offsets[i] = c_prev_offset;
-            }
-            else
-            {
-                size_t size_to_write = b_offsets[i] - b_prev_offset;
-                c_data.resize(c_data.size() + size_to_write);
-                memcpySmallAllowReadWriteOverflow15(&c_data[c_prev_offset], &b_data[b_prev_offset], size_to_write);
-                c_prev_offset += size_to_write;
-                c_offsets[i] = c_prev_offset;
-            }
-
-            a_prev_offset = a_offsets[i];
-            b_prev_offset = b_offsets[i];
-        }
-    }
-
-    static void vector_fixed_vector_fixed(
-        const PaddedPODArray<UInt8> & cond,
-        const ColumnFixedString::Chars_t & a_data,
-        const ColumnFixedString::Chars_t & b_data,
-        const size_t N,
-        ColumnFixedString::Chars_t & c_data)
-    {
-        size_t size = cond.size();
-        c_data.resize(a_data.size());
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            if (cond[i])
-                memcpySmallAllowReadWriteOverflow15(&c_data[i * N], &a_data[i * N], N);
-            else
-                memcpySmallAllowReadWriteOverflow15(&c_data[i * N], &b_data[i * N], N);
-        }
-    }
-
-    template <bool negative>
-    static void vector_vector_fixed_impl(
-        const PaddedPODArray<UInt8> & cond,
-        const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_offsets,
-        const ColumnFixedString::Chars_t & b_data, const size_t b_N,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
-    {
-        size_t size = cond.size();
-        c_offsets.resize(size);
-        c_data.reserve(std::max(a_data.size(), b_data.size() + size));
-
-        ColumnString::Offset_t a_prev_offset = 0;
-        ColumnString::Offset_t c_prev_offset = 0;
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            if (negative != cond[i])
-            {
-                size_t size_to_write = a_offsets[i] - a_prev_offset;
-                c_data.resize(c_data.size() + size_to_write);
-                memcpySmallAllowReadWriteOverflow15(&c_data[c_prev_offset], &a_data[a_prev_offset], size_to_write);
-                c_prev_offset += size_to_write;
-                c_offsets[i] = c_prev_offset;
-            }
-            else
-            {
-                size_t size_to_write = b_N;
-                c_data.resize(c_data.size() + size_to_write + 1);
-                memcpySmallAllowReadWriteOverflow15(&c_data[c_prev_offset], &b_data[i * b_N], size_to_write);
-                c_data.back() = 0;
-                c_prev_offset += size_to_write + 1;
-                c_offsets[i] = c_prev_offset;
-            }
-
-            a_prev_offset = a_offsets[i];
-        }
-    }
-
-    static void vector_vector_fixed(
-        const PaddedPODArray<UInt8> & cond,
-        const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_offsets,
-        const ColumnFixedString::Chars_t & b_data, const size_t b_N,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
-    {
-        vector_vector_fixed_impl<false>(cond, a_data, a_offsets, b_data, b_N, c_data, c_offsets);
-    }
-
-    static void vector_fixed_vector(
-        const PaddedPODArray<UInt8> & cond,
-        const ColumnFixedString::Chars_t & a_data, const size_t a_N,
-        const ColumnString::Chars_t & b_data, const ColumnString::Offsets_t & b_offsets,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
-    {
-        vector_vector_fixed_impl<true>(cond, b_data, b_offsets, a_data, a_N, c_data, c_offsets);
-    }
-
-    template <bool negative>
-    static void vector_constant_impl(
-        const PaddedPODArray<UInt8> & cond,
-        const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_offsets,
-        const String & b,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
-    {
-        size_t size = cond.size();
-        c_offsets.resize(size);
-        c_data.reserve(a_data.size());
-
-        ColumnString::Offset_t a_prev_offset = 0;
-        ColumnString::Offset_t c_prev_offset = 0;
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            if (negative != cond[i])
-            {
-                size_t size_to_write = a_offsets[i] - a_prev_offset;
-                c_data.resize(c_data.size() + size_to_write);
-                memcpySmallAllowReadWriteOverflow15(&c_data[c_prev_offset], &a_data[a_prev_offset], size_to_write);
-                c_prev_offset += size_to_write;
-                c_offsets[i] = c_prev_offset;
-            }
-            else
-            {
-                size_t size_to_write = b.size() + 1;
-                c_data.resize(c_data.size() + size_to_write);
-                memcpy(&c_data[c_prev_offset], b.data(), size_to_write);
-                c_prev_offset += size_to_write;
-                c_offsets[i] = c_prev_offset;
-            }
-
-            a_prev_offset = a_offsets[i];
-        }
-    }
-
-    static void vector_constant(
-        const PaddedPODArray<UInt8> & cond,
-        const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_offsets,
-        const String & b,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
-    {
-        return vector_constant_impl<false>(cond, a_data, a_offsets, b, c_data, c_offsets);
-    }
-
-    static void constant_vector(
-        const PaddedPODArray<UInt8> & cond,
-        const String & a,
-        const ColumnString::Chars_t & b_data, const ColumnString::Offsets_t & b_offsets,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
-    {
-        return vector_constant_impl<true>(cond, b_data, b_offsets, a, c_data, c_offsets);
-    }
-
-    template <bool negative>
-    static void vector_fixed_constant_impl(
-        const PaddedPODArray<UInt8> & cond,
-        const ColumnFixedString::Chars_t & a_data, const size_t a_N,
-        const String & b,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
-    {
-        size_t size = cond.size();
-        c_offsets.resize(size);
-        c_data.reserve(a_data.size());
-
-        ColumnString::Offset_t c_prev_offset = 0;
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            if (negative != cond[i])
-            {
-                size_t size_to_write = a_N;
-                c_data.resize(c_data.size() + size_to_write + 1);
-                memcpySmallAllowReadWriteOverflow15(&c_data[c_prev_offset], &a_data[i * a_N], size_to_write);
-                c_data.back() = 0;
-                c_prev_offset += size_to_write + 1;
-                c_offsets[i] = c_prev_offset;
-            }
-            else
-            {
-                size_t size_to_write = b.size() + 1;
-                c_data.resize(c_data.size() + size_to_write);
-                memcpy(&c_data[c_prev_offset], b.data(), size_to_write);
-                c_prev_offset += size_to_write;
-                c_offsets[i] = c_prev_offset;
-            }
-        }
-    }
-
-    static void vector_fixed_constant(
-        const PaddedPODArray<UInt8> & cond,
-        const ColumnFixedString::Chars_t & a_data, const size_t N,
-        const String & b,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
-    {
-        vector_fixed_constant_impl<false>(cond, a_data, N, b, c_data, c_offsets);
-    }
-
-    static void constant_vector_fixed(
-        const PaddedPODArray<UInt8> & cond,
-        const String & a,
-        const ColumnFixedString::Chars_t & b_data, const size_t N,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
-    {
-        vector_fixed_constant_impl<true>(cond, b_data, N, a, c_data, c_offsets);
-    }
-
-    static void constant_constant(
-        const PaddedPODArray<UInt8> & cond,
-        const String & a, const String & b,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
-    {
-        size_t size = cond.size();
-        c_offsets.resize(size);
-        c_data.reserve((std::max(a.size(), b.size()) + 1) * size);
-
-        ColumnString::Offset_t c_prev_offset = 0;
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            if (cond[i])
-            {
-                size_t size_to_write = a.size() + 1;
-                c_data.resize(c_data.size() + size_to_write);
-                memcpy(&c_data[c_prev_offset], a.data(), size_to_write);
-                c_prev_offset += size_to_write;
-                c_offsets[i] = c_prev_offset;
-            }
-            else
-            {
-                size_t size_to_write = b.size() + 1;
-                c_data.resize(c_data.size() + size_to_write);
-                memcpy(&c_data[c_prev_offset], b.data(), size_to_write);
-                c_prev_offset += size_to_write;
-                c_offsets[i] = c_prev_offset;
-            }
-        }
-    }
-};
-
-
 template <typename A, typename B, typename ResultType>
 struct NumArrayIfImpl
 {
diff --git a/dbms/src/Functions/GatherUtils.h b/dbms/src/Functions/GatherUtils.h
index 91805713e29..22329fe3bd4 100644
--- a/dbms/src/Functions/GatherUtils.h
+++ b/dbms/src/Functions/GatherUtils.h
@@ -424,7 +424,7 @@ struct IStringSource
 };
 
 template <typename Impl>
-struct DynamicStringSource : IStringSource
+struct DynamicStringSource final : IStringSource
 {
     Impl impl;
 
@@ -564,7 +564,7 @@ struct GenericArraySink
 /// Methods to copy Slice to Sink, overloaded for various combinations of types.
 
 template <typename T>
-void writeSlice(const NumericArraySlice<T> & slice, NumericArraySink<T> & sink)
+void ALWAYS_INLINE writeSlice(const NumericArraySlice<T> & slice, NumericArraySink<T> & sink)
 {
     sink.elements.resize(sink.current_offset + slice.size);
     memcpySmallAllowReadWriteOverflow15(&sink.elements[sink.current_offset], slice.data, slice.size * sizeof(T));
@@ -572,7 +572,7 @@ void writeSlice(const NumericArraySlice<T> & slice, NumericArraySink<T> & sink)
 }
 
 template <typename T, typename U>
-void writeSlice(const NumericArraySlice<T> & slice, NumericArraySink<U> & sink)
+void ALWAYS_INLINE writeSlice(const NumericArraySlice<T> & slice, NumericArraySink<U> & sink)
 {
     sink.elements.resize(sink.current_offset + slice.size);
     for (size_t i = 0; i < slice.size; ++i)
@@ -582,20 +582,20 @@ void writeSlice(const NumericArraySlice<T> & slice, NumericArraySink<U> & sink)
     }
 }
 
-inline void writeSlice(const StringSource::Slice & slice, StringSink & sink)
+inline ALWAYS_INLINE void writeSlice(const StringSource::Slice & slice, StringSink & sink)
 {
     sink.elements.resize(sink.current_offset + slice.size);
     memcpySmallAllowReadWriteOverflow15(&sink.elements[sink.current_offset], slice.data, slice.size);
     sink.current_offset += slice.size;
 }
 
-inline void writeSlice(const StringSource::Slice & slice, FixedStringSink & sink)
+inline ALWAYS_INLINE void writeSlice(const StringSource::Slice & slice, FixedStringSink & sink)
 {
     memcpySmallAllowReadWriteOverflow15(&sink.elements[sink.current_offset], slice.data, slice.size);
 }
 
 /// Assuming same types of underlying columns for slice and sink.
-inline void writeSlice(const GenericArraySlice & slice, GenericArraySink & sink)
+inline ALWAYS_INLINE void writeSlice(const GenericArraySlice & slice, GenericArraySink & sink)
 {
     sink.elements.insertRangeFrom(*slice.elements, slice.begin, slice.size);
     sink.current_offset += slice.size;

From 03662e3d77cc986457479b251c697b61d6dbb12b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 5 Aug 2017 05:03:49 +0300
Subject: [PATCH 049/281] Fixed function if of FixedString arguments
 [#CLICKHOUSE-3202].

---
 dbms/src/Functions/GatherUtils.h | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/dbms/src/Functions/GatherUtils.h b/dbms/src/Functions/GatherUtils.h
index 22329fe3bd4..9f898e24e30 100644
--- a/dbms/src/Functions/GatherUtils.h
+++ b/dbms/src/Functions/GatherUtils.h
@@ -76,6 +76,12 @@ struct NumericArraySource
         return row_num;
     }
 
+    /// Get size for corresponding call or Sink::reserve to reserve memory for elements.
+    size_t getSizeForReserve() const
+    {
+        return elements.size();
+    }
+
     Slice getWhole() const
     {
         return {&elements[prev_offset], offsets[row_num] - prev_offset};
@@ -146,6 +152,11 @@ struct ConstSource
         return row_num;
     }
 
+    size_t getSizeForReserve() const
+    {
+        return total_rows * base.getSizeForReserve();
+    }
+
     Slice getWhole() const
     {
         return base.getWhole();
@@ -291,7 +302,7 @@ struct StringSink
         offsets.resize(column_size);
     }
 
-    void next()
+    void ALWAYS_INLINE next()
     {
         elements.push_back(0);
         ++current_offset;
@@ -605,7 +616,7 @@ inline ALWAYS_INLINE void writeSlice(const GenericArraySlice & slice, GenericArr
 /// Algorithms
 
 template <typename SourceA, typename SourceB, typename Sink>
-void concat(SourceA && src_a, SourceB && src_b, Sink && sink)
+void NO_INLINE concat(SourceA && src_a, SourceB && src_b, Sink && sink)
 {
     while (!src_a.isEnd())
     {
@@ -619,7 +630,7 @@ void concat(SourceA && src_a, SourceB && src_b, Sink && sink)
 }
 
 template <typename Sink>
-void concat(StringSources & sources, Sink && sink)
+void NO_INLINE concat(StringSources & sources, Sink && sink)
 {
     while (!sink.isEnd())
     {
@@ -634,7 +645,7 @@ void concat(StringSources & sources, Sink && sink)
 
 
 template <typename Source, typename Sink>
-void sliceFromLeftConstantOffsetUnbounded(Source && src, Sink && sink, size_t offset)
+void NO_INLINE sliceFromLeftConstantOffsetUnbounded(Source && src, Sink && sink, size_t offset)
 {
     while (!src.isEnd())
     {
@@ -645,7 +656,7 @@ void sliceFromLeftConstantOffsetUnbounded(Source && src, Sink && sink, size_t of
 }
 
 template <typename Source, typename Sink>
-void sliceFromLeftConstantOffsetBounded(Source && src, Sink && sink, size_t offset, size_t length)
+void NO_INLINE sliceFromLeftConstantOffsetBounded(Source && src, Sink && sink, size_t offset, size_t length)
 {
     while (!src.isEnd())
     {
@@ -656,7 +667,7 @@ void sliceFromLeftConstantOffsetBounded(Source && src, Sink && sink, size_t offs
 }
 
 template <typename Source, typename Sink>
-void sliceFromRightConstantOffsetUnbounded(Source && src, Sink && sink, size_t offset)
+void NO_INLINE sliceFromRightConstantOffsetUnbounded(Source && src, Sink && sink, size_t offset)
 {
     while (!src.isEnd())
     {
@@ -667,7 +678,7 @@ void sliceFromRightConstantOffsetUnbounded(Source && src, Sink && sink, size_t o
 }
 
 template <typename Source, typename Sink>
-void sliceFromRightConstantOffsetBounded(Source && src, Sink && sink, size_t offset, size_t length)
+void NO_INLINE sliceFromRightConstantOffsetBounded(Source && src, Sink && sink, size_t offset, size_t length)
 {
     while (!src.isEnd())
     {
@@ -679,7 +690,7 @@ void sliceFromRightConstantOffsetBounded(Source && src, Sink && sink, size_t off
 
 
 template <typename Source, typename Sink>
-void sliceDynamicOffsetUnbounded(Source && src, Sink && sink, IColumn & offset_column)
+void NO_INLINE sliceDynamicOffsetUnbounded(Source && src, Sink && sink, IColumn & offset_column)
 {
     while (!src.isEnd())
     {
@@ -703,7 +714,7 @@ void sliceDynamicOffsetUnbounded(Source && src, Sink && sink, IColumn & offset_c
 }
 
 template <typename Source, typename Sink>
-void sliceDynamicOffsetBounded(Source && src, Sink && sink, IColumn & offset_column, IColumn & length_column)
+void NO_INLINE sliceDynamicOffsetBounded(Source && src, Sink && sink, IColumn & offset_column, IColumn & length_column)
 {
     while (!src.isEnd())
     {
@@ -730,7 +741,7 @@ void sliceDynamicOffsetBounded(Source && src, Sink && sink, IColumn & offset_col
 
 
 template <typename SourceA, typename SourceB, typename Sink>
-void conditional(SourceA && src_a, SourceB && src_b, Sink && sink, const PaddedPODArray<UInt8> & condition)
+void NO_INLINE conditional(SourceA && src_a, SourceB && src_b, Sink && sink, const PaddedPODArray<UInt8> & condition)
 {
     const UInt8 * cond_pos = &condition[0];
     const UInt8 * cond_end = cond_pos + condition.size();

From 49bdf266b01429601c695e6731010e81282d3cfc Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 5 Aug 2017 05:13:38 +0300
Subject: [PATCH 050/281] Fixed function if of FixedString arguments
 [#CLICKHOUSE-3202].

---
 dbms/src/Functions/GatherUtils.h | 181 +++++++++++++++++++------------
 1 file changed, 111 insertions(+), 70 deletions(-)

diff --git a/dbms/src/Functions/GatherUtils.h b/dbms/src/Functions/GatherUtils.h
index 9f898e24e30..78d74f7828d 100644
--- a/dbms/src/Functions/GatherUtils.h
+++ b/dbms/src/Functions/GatherUtils.h
@@ -184,39 +184,6 @@ struct ConstSource
 };
 
 
-template <typename T>
-struct NumericArraySink
-{
-    typename ColumnVector<T>::Container_t & elements;
-    typename ColumnArray::Offsets_t & offsets;
-
-    size_t row_num = 0;
-    ColumnArray::Offset_t current_offset = 0;
-
-    NumericArraySink(ColumnArray & arr, size_t column_size)
-        : elements(typeid_cast<ColumnVector<T> &>(arr.getData()).getData()), offsets(arr.getOffsets())
-    {
-        offsets.resize(column_size);
-    }
-
-    void next()
-    {
-        offsets[row_num] = current_offset;
-        ++row_num;
-    }
-
-    bool isEnd() const
-    {
-        return row_num == offsets.size();
-    }
-
-    size_t rowNum() const
-    {
-        return row_num;
-    }
-};
-
-
 struct StringSource
 {
     using Slice = NumericArraySlice<UInt8>;
@@ -249,6 +216,11 @@ struct StringSource
         return row_num;
     }
 
+    size_t getSizeForReserve() const
+    {
+        return elements.size();
+    }
+
     Slice getWhole() const
     {
         return {&elements[prev_offset], offsets[row_num] - prev_offset - 1};
@@ -288,40 +260,6 @@ struct StringSource
 };
 
 
-struct StringSink
-{
-    typename ColumnString::Chars_t & elements;
-    typename ColumnString::Offsets_t & offsets;
-
-    size_t row_num = 0;
-    ColumnString::Offset_t current_offset = 0;
-
-    StringSink(ColumnString & col, size_t column_size)
-        : elements(col.getChars()), offsets(col.getOffsets())
-    {
-        offsets.resize(column_size);
-    }
-
-    void ALWAYS_INLINE next()
-    {
-        elements.push_back(0);
-        ++current_offset;
-        offsets[row_num] = current_offset;
-        ++row_num;
-    }
-
-    bool isEnd() const
-    {
-        return row_num == offsets.size();
-    }
-
-    size_t rowNum() const
-    {
-        return row_num;
-    }
-};
-
-
 struct FixedStringSource
 {
     using Slice = NumericArraySlice<UInt8>;
@@ -356,6 +294,11 @@ struct FixedStringSource
         return row_num;
     }
 
+    size_t getSizeForReserve() const
+    {
+        return end - pos;
+    }
+
     Slice getWhole() const
     {
         return {pos, string_size};
@@ -391,6 +334,83 @@ struct FixedStringSource
 };
 
 
+template <typename T>
+struct NumericArraySink
+{
+    typename ColumnVector<T>::Container_t & elements;
+    typename ColumnArray::Offsets_t & offsets;
+
+    size_t row_num = 0;
+    ColumnArray::Offset_t current_offset = 0;
+
+    NumericArraySink(ColumnArray & arr, size_t column_size)
+        : elements(typeid_cast<ColumnVector<T> &>(arr.getData()).getData()), offsets(arr.getOffsets())
+    {
+        offsets.resize(column_size);
+    }
+
+    void next()
+    {
+        offsets[row_num] = current_offset;
+        ++row_num;
+    }
+
+    bool isEnd() const
+    {
+        return row_num == offsets.size();
+    }
+
+    size_t rowNum() const
+    {
+        return row_num;
+    }
+
+    void reserve(size_t num_elements)
+    {
+        elements.reserve(num_elements);
+    }
+};
+
+
+struct StringSink
+{
+    typename ColumnString::Chars_t & elements;
+    typename ColumnString::Offsets_t & offsets;
+
+    size_t row_num = 0;
+    ColumnString::Offset_t current_offset = 0;
+
+    StringSink(ColumnString & col, size_t column_size)
+        : elements(col.getChars()), offsets(col.getOffsets())
+    {
+        offsets.resize(column_size);
+    }
+
+    void ALWAYS_INLINE next()
+    {
+        elements.push_back(0);
+        ++current_offset;
+        offsets[row_num] = current_offset;
+        ++row_num;
+    }
+
+    bool isEnd() const
+    {
+        return row_num == offsets.size();
+    }
+
+    size_t rowNum() const
+    {
+        return row_num;
+    }
+
+    void reserve(size_t num_elements)
+    {
+        elements.reserve(num_elements);
+    }
+};
+
+
 struct FixedStringSink
 {
     typename ColumnString::Chars_t & elements;
@@ -421,6 +441,11 @@ struct FixedStringSink
     {
         return row_num;
     }
+
+    void reserve(size_t num_elements)
+    {
+        elements.reserve(num_elements);
+    }
 };
 
 
@@ -430,6 +455,7 @@ struct IStringSource
 
     virtual void next() = 0;
     virtual bool isEnd() const = 0;
+    virtual size_t getSizeForReserve() const = 0;
     virtual Slice getWhole() const = 0;
     virtual ~IStringSource() {}
 };
@@ -441,9 +467,10 @@ struct DynamicStringSource final : IStringSource
 
     DynamicStringSource(const IColumn & col) : impl(static_cast<const typename Impl::Column &>(col)) {}
 
-    void next() override { impl.next(); };
-    bool isEnd() const override { return impl.isEnd(); };
-    Slice getWhole() const override { return impl.getWhole(); };
+    void next() override { impl.next(); }
+    bool isEnd() const override { return impl.isEnd(); }
+    size_t getSizeForReserve() const override { return impl.getSizeForReserve(); }
+    Slice getWhole() const override { return impl.getWhole(); }
 };
 
 inline std::unique_ptr<IStringSource> createDynamicStringSource(const IColumn & col)
@@ -502,6 +529,11 @@ struct GenericArraySource
         return row_num;
     }
 
+    size_t getSizeForReserve() const
+    {
+        return elements.size();
+    }
+
     Slice getWhole() const
     {
         return {&elements, prev_offset, offsets[row_num] - prev_offset};
@@ -569,6 +601,11 @@ struct GenericArraySink
     {
         return row_num;
     }
+
+    void reserve(size_t num_elements)
+    {
+        elements.reserve(num_elements);
+    }
 };
 
 
@@ -618,6 +655,8 @@ inline ALWAYS_INLINE void writeSlice(const GenericArraySlice & slice, GenericArr
 template <typename SourceA, typename SourceB, typename Sink>
 void NO_INLINE concat(SourceA && src_a, SourceB && src_b, Sink && sink)
 {
+    sink.reserve(src_a.getSizeForReserve() + src_b.getSizeForReserve());
+
     while (!src_a.isEnd())
     {
         writeSlice(src_a.getWhole(), sink);
@@ -743,6 +782,8 @@ void NO_INLINE sliceDynamicOffsetBounded(Source && src, Sink && sink, IColumn &
 template <typename SourceA, typename SourceB, typename Sink>
 void NO_INLINE conditional(SourceA && src_a, SourceB && src_b, Sink && sink, const PaddedPODArray<UInt8> & condition)
 {
+    sink.reserve(std::max(src_a.getSizeForReserve(), src_b.getSizeForReserve()));
+
     const UInt8 * cond_pos = &condition[0];
     const UInt8 * cond_end = cond_pos + condition.size();
 

From 07b24dd64607151363e761f467ac480c0a2d87b9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 5 Aug 2017 05:24:13 +0300
Subject: [PATCH 051/281] Added test [#CLICKHOUSE-3202].

---
 .../00486_if_fixed_string.reference           | 80 +++++++++++++++++++
 .../0_stateless/00486_if_fixed_string.sql     | 19 +++++
 2 files changed, 99 insertions(+)
 create mode 100644 dbms/tests/queries/0_stateless/00486_if_fixed_string.reference
 create mode 100644 dbms/tests/queries/0_stateless/00486_if_fixed_string.sql

diff --git a/dbms/tests/queries/0_stateless/00486_if_fixed_string.reference b/dbms/tests/queries/0_stateless/00486_if_fixed_string.reference
new file mode 100644
index 00000000000..34cd1f1cf8c
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00486_if_fixed_string.reference
@@ -0,0 +1,80 @@
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
+world
+hello
+world
+hello
+world
diff --git a/dbms/tests/queries/0_stateless/00486_if_fixed_string.sql b/dbms/tests/queries/0_stateless/00486_if_fixed_string.sql
new file mode 100644
index 00000000000..f1ef7c441a3
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00486_if_fixed_string.sql
@@ -0,0 +1,19 @@
+SELECT number % 2 ? 'hello' : 'world' FROM system.numbers LIMIT 5;
+SELECT number % 2 ? materialize('hello') : 'world' FROM system.numbers LIMIT 5;
+SELECT number % 2 ? 'hello' : materialize('world') FROM system.numbers LIMIT 5;
+SELECT number % 2 ? materialize('hello') : materialize('world') FROM system.numbers LIMIT 5;
+
+SELECT number % 2 ? toFixedString('hello', 5) : 'world' FROM system.numbers LIMIT 5;
+SELECT number % 2 ? materialize(toFixedString('hello', 5)) : 'world' FROM system.numbers LIMIT 5;
+SELECT number % 2 ? toFixedString('hello', 5) : materialize('world') FROM system.numbers LIMIT 5;
+SELECT number % 2 ? materialize(toFixedString('hello', 5)) : materialize('world') FROM system.numbers LIMIT 5;
+
+SELECT number % 2 ? 'hello' : toFixedString('world', 5) FROM system.numbers LIMIT 5;
+SELECT number % 2 ? materialize('hello') : toFixedString('world', 5) FROM system.numbers LIMIT 5;
+SELECT number % 2 ? 'hello' : materialize(toFixedString('world', 5)) FROM system.numbers LIMIT 5;
+SELECT number % 2 ? materialize('hello') : materialize(toFixedString('world', 5)) FROM system.numbers LIMIT 5;
+
+SELECT number % 2 ? toFixedString('hello', 5) : toFixedString('world', 5) FROM system.numbers LIMIT 5;
+SELECT number % 2 ? materialize(toFixedString('hello', 5)) : toFixedString('world', 5) FROM system.numbers LIMIT 5;
+SELECT number % 2 ? toFixedString('hello', 5) : materialize(toFixedString('world', 5)) FROM system.numbers LIMIT 5;
+SELECT number % 2 ? materialize(toFixedString('hello', 5)) : materialize(toFixedString('world', 5)) FROM system.numbers LIMIT 5;

From 8d7214e554d33992fe095f0bf07abcda7281cf41 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 5 Aug 2017 05:29:38 +0300
Subject: [PATCH 052/281] Added performance test [#CLICKHOUSE-3202].

---
 .../performance/gather/if_string_const.xml    | 24 ++++++++++++++++
 .../performance/gather/if_string_hits.xml     | 28 +++++++++++++++++++
 2 files changed, 52 insertions(+)
 create mode 100644 dbms/tests/performance/gather/if_string_const.xml
 create mode 100644 dbms/tests/performance/gather/if_string_hits.xml

diff --git a/dbms/tests/performance/gather/if_string_const.xml b/dbms/tests/performance/gather/if_string_const.xml
new file mode 100644
index 00000000000..aba635c8e83
--- /dev/null
+++ b/dbms/tests/performance/gather/if_string_const.xml
@@ -0,0 +1,24 @@
+<test>
+    <name>if_string_const</name>
+    <type>once</type>
+
+    <stop_conditions>
+        <any_of>
+            <average_speed_not_changing_for_ms>10000</average_speed_not_changing_for_ms>
+            <total_time_ms>1000</total_time_ms>
+        </any_of>
+    </stop_conditions>
+
+    <metrics>
+        <max_bytes_per_second />
+    </metrics>
+
+    <main_metric>
+        <max_bytes_per_second />
+    </main_metric>
+
+    <query>SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? 'hello' : 'world')</query>
+    <query>SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? 'hello' : '')</query>
+    <query>SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? toFixedString('hello', 5) : toFixedString('world', 5))</query>
+    <query>SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? '' : toFixedString('world', 5))</query>
+</test>
diff --git a/dbms/tests/performance/gather/if_string_hits.xml b/dbms/tests/performance/gather/if_string_hits.xml
new file mode 100644
index 00000000000..ddf50934320
--- /dev/null
+++ b/dbms/tests/performance/gather/if_string_hits.xml
@@ -0,0 +1,28 @@
+<test>
+    <name>if_string_hits</name>
+    <type>loop</type>
+
+    <stop_conditions>
+        <all_of>
+            <iterations>5</iterations>
+            <min_time_not_changing_for_ms>10000</min_time_not_changing_for_ms>
+        </all_of>
+        <any_of>
+            <iterations>50</iterations>
+            <total_time_ms>60000</total_time_ms>
+        </any_of>
+    </stop_conditions>
+
+    <main_metric>
+        <min_time/>
+    </main_metric>
+
+    <preconditions>
+        <table_exists>test.hits</table_exists>
+    </preconditions>
+
+    <query>SELECT count() FROM test.hits WHERE NOT ignore(rand() % 2 ? URL : Referer)</query>
+    <query>SELECT count() FROM test.hits WHERE NOT ignore(rand() % 2 ? URL : '')</query>
+    <query>SELECT count() FROM test.hits WHERE NOT ignore(rand() % 2 ? SearchPhrase : MobilePhoneModel)</query>
+    <query>SELECT count() FROM test.hits WHERE NOT ignore(rand() % 2 ? '' : PageCharset)</query>
+</test>

From 6648d6dee4bab47ba57690089c49188ab817c74e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 5 Aug 2017 06:15:47 +0300
Subject: [PATCH 053/281] Using GatherUtils for if on numeric arrays
 [#CLICKHOUSE-2].

---
 dbms/src/Functions/FunctionsConditional.h | 286 ++++------------------
 dbms/src/Functions/GatherUtils.h          |  92 ++++++-
 2 files changed, 138 insertions(+), 240 deletions(-)

diff --git a/dbms/src/Functions/FunctionsConditional.h b/dbms/src/Functions/FunctionsConditional.h
index 5391cb053f3..797484c3579 100644
--- a/dbms/src/Functions/FunctionsConditional.h
+++ b/dbms/src/Functions/FunctionsConditional.h
@@ -140,223 +140,6 @@ public:
 };
 
 
-template <typename A, typename B, typename ResultType>
-struct NumArrayIfImpl
-{
-    template <typename FromT>
-    static ALWAYS_INLINE void copy_from_vector(
-        size_t i,
-        const PaddedPODArray<FromT> & from_data, const ColumnArray::Offsets_t & from_offsets, ColumnArray::Offset_t from_prev_offset,
-        PaddedPODArray<ResultType> & to_data, ColumnArray::Offsets_t & to_offsets, ColumnArray::Offset_t & to_prev_offset)
-    {
-        size_t size_to_write = from_offsets[i] - from_prev_offset;
-        to_data.resize(to_data.size() + size_to_write);
-
-        for (size_t i = 0; i < size_to_write; ++i)
-            to_data[to_prev_offset + i] = static_cast<ResultType>(from_data[from_prev_offset + i]);
-
-        to_prev_offset += size_to_write;
-        to_offsets[i] = to_prev_offset;
-    }
-
-    static ALWAYS_INLINE void copy_from_constant(
-        size_t i,
-        const PaddedPODArray<ResultType> & from_data,
-        PaddedPODArray<ResultType> & to_data, ColumnArray::Offsets_t & to_offsets, ColumnArray::Offset_t & to_prev_offset)
-    {
-        size_t size_to_write = from_data.size();
-        to_data.resize(to_data.size() + size_to_write);
-        memcpy(&to_data[to_prev_offset], from_data.data(), size_to_write * sizeof(from_data[0]));
-        to_prev_offset += size_to_write;
-        to_offsets[i] = to_prev_offset;
-    }
-
-    static void create_result_column(
-        Block & block, size_t result,
-        PaddedPODArray<ResultType> ** c_data, ColumnArray::Offsets_t ** c_offsets)
-    {
-        auto col_res_vec = std::make_shared<ColumnVector<ResultType>>();
-        auto col_res_array = std::make_shared<ColumnArray>(col_res_vec);
-        block.getByPosition(result).column = col_res_array;
-
-        *c_data = &col_res_vec->getData();
-        *c_offsets = &col_res_array->getOffsets();
-    }
-
-
-    static void vector_vector(
-        const PaddedPODArray<UInt8> & cond,
-        const PaddedPODArray<A> & a_data, const ColumnArray::Offsets_t & a_offsets,
-        const PaddedPODArray<B> & b_data, const ColumnArray::Offsets_t & b_offsets,
-        Block & block, size_t result)
-    {
-        PaddedPODArray<ResultType> * c_data = nullptr;
-        ColumnArray::Offsets_t * c_offsets = nullptr;
-        create_result_column(block, result, &c_data, &c_offsets);
-
-        size_t size = cond.size();
-        c_offsets->resize(size);
-        c_data->reserve(std::max(a_data.size(), b_data.size()));
-
-        ColumnArray::Offset_t a_prev_offset = 0;
-        ColumnArray::Offset_t b_prev_offset = 0;
-        ColumnArray::Offset_t c_prev_offset = 0;
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            if (cond[i])
-                copy_from_vector(i, a_data, a_offsets, a_prev_offset, *c_data, *c_offsets, c_prev_offset);
-            else
-                copy_from_vector(i, b_data, b_offsets, b_prev_offset, *c_data, *c_offsets, c_prev_offset);
-
-            a_prev_offset = a_offsets[i];
-            b_prev_offset = b_offsets[i];
-        }
-    }
-
-    static void vector_constant(
-        const PaddedPODArray<UInt8> & cond,
-        const PaddedPODArray<A> & a_data, const ColumnArray::Offsets_t & a_offsets,
-        const Array & b,
-        Block & block, size_t result)
-    {
-        PaddedPODArray<ResultType> * c_data = nullptr;
-        ColumnArray::Offsets_t * c_offsets = nullptr;
-        create_result_column(block, result, &c_data, &c_offsets);
-
-        PaddedPODArray<ResultType> b_converted(b.size());
-        for (size_t i = 0, size = b.size(); i < size; ++i)
-            b_converted[i] = b[i].get<typename NearestFieldType<B>::Type>();
-
-        size_t size = cond.size();
-        c_offsets->resize(size);
-        c_data->reserve(a_data.size());
-
-        ColumnArray::Offset_t a_prev_offset = 0;
-        ColumnArray::Offset_t c_prev_offset = 0;
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            if (cond[i])
-                copy_from_vector(i, a_data, a_offsets, a_prev_offset, *c_data, *c_offsets, c_prev_offset);
-            else
-                copy_from_constant(i, b_converted, *c_data, *c_offsets, c_prev_offset);
-
-            a_prev_offset = a_offsets[i];
-        }
-    }
-
-    static void constant_vector(
-        const PaddedPODArray<UInt8> & cond,
-        const Array & a,
-        const PaddedPODArray<B> & b_data, const ColumnArray::Offsets_t & b_offsets,
-        Block & block, size_t result)
-    {
-        PaddedPODArray<ResultType> * c_data = nullptr;
-        ColumnArray::Offsets_t * c_offsets = nullptr;
-        create_result_column(block, result, &c_data, &c_offsets);
-
-        PaddedPODArray<ResultType> a_converted(a.size());
-        for (size_t i = 0, size = a.size(); i < size; ++i)
-            a_converted[i] = a[i].get<typename NearestFieldType<A>::Type>();
-
-        size_t size = cond.size();
-        c_offsets->resize(size);
-        c_data->reserve(b_data.size());
-
-        ColumnArray::Offset_t b_prev_offset = 0;
-        ColumnArray::Offset_t c_prev_offset = 0;
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            if (cond[i])
-                copy_from_constant(i, a_converted, *c_data, *c_offsets, c_prev_offset);
-            else
-                copy_from_vector(i, b_data, b_offsets, b_prev_offset, *c_data, *c_offsets, c_prev_offset);
-
-            b_prev_offset = b_offsets[i];
-        }
-    }
-
-    static void constant_constant(
-        const PaddedPODArray<UInt8> & cond,
-        const Array & a, const Array & b,
-        Block & block, size_t result)
-    {
-        PaddedPODArray<ResultType> * c_data = nullptr;
-        ColumnArray::Offsets_t * c_offsets = nullptr;
-        create_result_column(block, result, &c_data, &c_offsets);
-
-        PaddedPODArray<ResultType> a_converted(a.size());
-        for (size_t i = 0, size = a.size(); i < size; ++i)
-            a_converted[i] = a[i].get<typename NearestFieldType<A>::Type>();
-
-        PaddedPODArray<ResultType> b_converted(b.size());
-        for (size_t i = 0, size = b.size(); i < size; ++i)
-            b_converted[i] = b[i].get<typename NearestFieldType<B>::Type>();
-
-        size_t size = cond.size();
-        c_offsets->resize(size);
-        c_data->reserve((std::max(a.size(), b.size())) * size);
-
-        ColumnArray::Offset_t c_prev_offset = 0;
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            if (cond[i])
-                copy_from_constant(i, a_converted, *c_data, *c_offsets, c_prev_offset);
-            else
-                copy_from_constant(i, b_converted, *c_data, *c_offsets, c_prev_offset);
-        }
-    }
-};
-
-template <typename A, typename B>
-struct NumArrayIfImpl<A, B, NumberTraits::Error>
-{
-private:
-    static void throw_error()
-    {
-        throw Exception("Internal logic error: invalid types of arguments 2 and 3 of if", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-    }
-public:
-    static void vector_vector(
-        const PaddedPODArray<UInt8> & cond,
-        const PaddedPODArray<A> & a_data, const ColumnArray::Offsets_t & a_offsets,
-        const PaddedPODArray<B> & b_data, const ColumnArray::Offsets_t & b_offsets,
-        Block & block, size_t result)
-    {
-        throw_error();
-    }
-
-    static void vector_constant(
-        const PaddedPODArray<UInt8> & cond,
-        const PaddedPODArray<A> & a_data, const ColumnArray::Offsets_t & a_offsets,
-        const Array & b,
-        Block & block, size_t result)
-    {
-        throw_error();
-    }
-
-    static void constant_vector(
-        const PaddedPODArray<UInt8> & cond,
-        const Array & a,
-        const PaddedPODArray<B> & b_data, const ColumnArray::Offsets_t & b_offsets,
-        Block & block, size_t result)
-    {
-        throw_error();
-    }
-
-    static void constant_constant(
-        const PaddedPODArray<UInt8> & cond,
-        const Array & a, const Array & b,
-        Block & block, size_t result)
-    {
-        throw_error();
-    }
-};
-
-
 /** Implementation for string arrays.
   * NOTE: The code is too complex because it works with the internals of the arrays of strings.
   * NOTE: Arrays of FixedString are not supported.
@@ -654,7 +437,8 @@ private:
     }
 
     template <typename T0, typename T1>
-    bool executeRightTypeArray(
+    typename std::enable_if<!std::is_same<NumberTraits::Error, typename NumberTraits::ResultOfIf<T0, T1>::Type>::value, bool>::type
+    executeRightTypeArray(
         const ColumnUInt8 * cond_col,
         Block & block,
         const ColumnNumbers & arguments,
@@ -679,11 +463,13 @@ private:
             if (!col_right_vec)
                 return false;
 
-            NumArrayIfImpl<T0, T1, ResultType>::vector_vector(
-                cond_col->getData(),
-                col_left->getData(), col_left_array->getOffsets(),
-                col_right_vec->getData(), col_right_array->getOffsets(),
-                block, result);
+            block.getByPosition(result).column = block.getByPosition(result).type->createColumn();
+
+            conditional(
+                NumericArraySource<T0>(*col_left_array),
+                NumericArraySource<T1>(*col_right_array),
+                NumericArraySink<ResultType>(static_cast<ColumnArray &>(*block.getByPosition(result).column), block.rows()),
+                cond_col->getData());
         }
         else
         {
@@ -691,18 +477,21 @@ private:
             if (!checkColumn<ColumnVector<T1>>(&col_right_const_array_data->getData()))
                 return false;
 
-            NumArrayIfImpl<T0, T1, ResultType>::vector_constant(
-                cond_col->getData(),
-                col_left->getData(), col_left_array->getOffsets(),
-                col_right_const_array->getValue<Array>(),
-                block, result);
+            block.getByPosition(result).column = block.getByPosition(result).type->createColumn();
+
+            conditional(
+                NumericArraySource<T0>(*col_left_array),
+                ConstSource<NumericArraySource<T1>>(*col_right_const_array),
+                NumericArraySink<ResultType>(static_cast<ColumnArray &>(*block.getByPosition(result).column), block.rows()),
+                cond_col->getData());
         }
 
         return true;
     }
 
     template <typename T0, typename T1>
-    bool executeConstRightTypeArray(
+    typename std::enable_if<!std::is_same<NumberTraits::Error, typename NumberTraits::ResultOfIf<T0, T1>::Type>::value, bool>::type
+    executeConstRightTypeArray(
         const ColumnUInt8 * cond_col,
         Block & block,
         const ColumnNumbers & arguments,
@@ -726,11 +515,13 @@ private:
             if (!col_right_vec)
                 return false;
 
-            NumArrayIfImpl<T0, T1, ResultType>::constant_vector(
-                cond_col->getData(),
-                col_left_const_array->getValue<Array>(),
-                col_right_vec->getData(), col_right_array->getOffsets(),
-                block, result);
+            block.getByPosition(result).column = block.getByPosition(result).type->createColumn();
+
+            conditional(
+                ConstSource<NumericArraySource<T0>>(*col_left_const_array),
+                NumericArraySource<T1>(*col_right_array),
+                NumericArraySink<ResultType>(static_cast<ColumnArray &>(*block.getByPosition(result).column), block.rows()),
+                cond_col->getData());
         }
         else
         {
@@ -738,16 +529,33 @@ private:
             if (!checkColumn<ColumnVector<T1>>(&col_right_const_array_data->getData()))
                 return false;
 
-            NumArrayIfImpl<T0, T1, ResultType>::constant_constant(
-                cond_col->getData(),
-                col_left_const_array->getValue<Array>(),
-                col_right_const_array->getValue<Array>(),
-                block, result);
+            block.getByPosition(result).column = block.getByPosition(result).type->createColumn();
+
+            conditional(
+                ConstSource<NumericArraySource<T0>>(*col_left_const_array),
+                ConstSource<NumericArraySource<T1>>(*col_right_const_array),
+                NumericArraySink<ResultType>(static_cast<ColumnArray &>(*block.getByPosition(result).column), block.rows()),
+                cond_col->getData());
         }
 
         return true;
     }
 
+    /// Specializations for incompatible data types. Example: if(cond, Int64, UInt64) cannot be executed, because Int64 and UInt64 are incompatible.
+    template <typename T0, typename T1, typename... Args>
+    typename std::enable_if<std::is_same<NumberTraits::Error, typename NumberTraits::ResultOfIf<T0, T1>::Type>::value, bool>::type
+    executeRightTypeArray(Args &&... args)
+    {
+        return false;
+    }
+
+    template <typename T0, typename T1, typename... Args>
+    typename std::enable_if<std::is_same<NumberTraits::Error, typename NumberTraits::ResultOfIf<T0, T1>::Type>::value, bool>::type
+    executeConstRightTypeArray(Args &&... args)
+    {
+        return false;
+    }
+
     template <typename T0>
     bool executeLeftType(const ColumnUInt8 * cond_col, Block & block, const ColumnNumbers & arguments, size_t result)
     {
diff --git a/dbms/src/Functions/GatherUtils.h b/dbms/src/Functions/GatherUtils.h
index 78d74f7828d..5bfbf71d896 100644
--- a/dbms/src/Functions/GatherUtils.h
+++ b/dbms/src/Functions/GatherUtils.h
@@ -9,6 +9,7 @@
 #include <Columns/ColumnConst.h>
 
 #include <Functions/FunctionHelpers.h>
+#include <DataTypes/NumberTraits.h>
 
 #include <Common/typeid_cast.h>
 #include <Common/memcpySmall.h>
@@ -609,6 +610,90 @@ struct GenericArraySink
 };
 
 
+template <typename T>
+using NumericSlice = const T *;
+
+template <typename T>
+struct NumericSource
+{
+    using Slice = NumericSlice<T>;
+    using Column = ColumnVector<T>;
+
+    const T * begin;
+    const T * pos;
+    const T * end;
+
+    NumericSource(const Column & col)
+    {
+        const auto & container = col.getData();
+        begin = container.data();
+        pos = begin;
+        end = begin + container.size();
+    }
+
+    void next()
+    {
+        ++pos;
+    }
+
+    bool isEnd() const
+    {
+        return pos == end;
+    }
+
+    size_t rowNum() const
+    {
+        return pos - begin;
+    }
+
+    size_t getSizeForReserve() const
+    {
+        return 0;   /// Simple numeric columns are resized before fill, no need to reserve.
+    }
+
+    Slice getWhole() const
+    {
+        return pos;
+    }
+};
+
+template <typename T>
+struct NumericSink
+{
+    T * begin;
+    T * pos;
+    T * end;
+
+    NumericSink(ColumnVector<T> & col, size_t column_size)
+    {
+        auto & container = col.getData();
+        container.resize(column_size);
+        begin = container.data();
+        pos = begin;
+        end = begin + container.size();
+    }
+
+    void next()
+    {
+        ++pos;
+    }
+
+    bool isEnd() const
+    {
+        return pos == end;
+    }
+
+    size_t rowNum() const
+    {
+        return pos - begin;
+    }
+
+    void reserve(size_t num_elements)
+    {
+    }
+};
+
+
 /// Methods to copy Slice to Sink, overloaded for various combinations of types.
 
 template <typename T>
@@ -649,6 +734,12 @@ inline ALWAYS_INLINE void writeSlice(const GenericArraySlice & slice, GenericArr
     sink.current_offset += slice.size;
 }
 
+template <typename T, typename U>
+void ALWAYS_INLINE writeSlice(const NumericSlice<T> & slice, NumericSink<U> & sink)
+{
+    *sink.pos = *slice;
+}
+
 
 /// Algorithms
 
@@ -727,7 +818,6 @@ void NO_INLINE sliceFromRightConstantOffsetBounded(Source && src, Sink && sink,
     }
 }
 
-
 template <typename Source, typename Sink>
 void NO_INLINE sliceDynamicOffsetUnbounded(Source && src, Sink && sink, IColumn & offset_column)
 {

From 19ff0db9f1b6d03725857c7796bca6d2382d1d71 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 5 Aug 2017 06:19:55 +0300
Subject: [PATCH 054/281] Using GatherUtils for if on numeric arrays
 [#CLICKHOUSE-2].

---
 dbms/src/Functions/GatherUtils.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/src/Functions/GatherUtils.h b/dbms/src/Functions/GatherUtils.h
index 5bfbf71d896..b33ecc442b9 100644
--- a/dbms/src/Functions/GatherUtils.h
+++ b/dbms/src/Functions/GatherUtils.h
@@ -697,7 +697,7 @@ struct NumericSink
 /// Methods to copy Slice to Sink, overloaded for various combinations of types.
 
 template <typename T>
-void ALWAYS_INLINE writeSlice(const NumericArraySlice<T> & slice, NumericArraySink<T> & sink)
+void writeSlice(const NumericArraySlice<T> & slice, NumericArraySink<T> & sink)
 {
     sink.elements.resize(sink.current_offset + slice.size);
     memcpySmallAllowReadWriteOverflow15(&sink.elements[sink.current_offset], slice.data, slice.size * sizeof(T));
@@ -705,7 +705,7 @@ void ALWAYS_INLINE writeSlice(const NumericArraySlice<T> & slice, NumericArraySi
 }
 
 template <typename T, typename U>
-void ALWAYS_INLINE writeSlice(const NumericArraySlice<T> & slice, NumericArraySink<U> & sink)
+void writeSlice(const NumericArraySlice<T> & slice, NumericArraySink<U> & sink)
 {
     sink.elements.resize(sink.current_offset + slice.size);
     for (size_t i = 0; i < slice.size; ++i)

From 24d8f0aedf39f245aef02011a8325aabedb9e518 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 5 Aug 2017 06:27:18 +0300
Subject: [PATCH 055/281] Added performance test [#CLICKHOUSE-2].

---
 .../tests/performance/gather/if_array_num.xml | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 dbms/tests/performance/gather/if_array_num.xml

diff --git a/dbms/tests/performance/gather/if_array_num.xml b/dbms/tests/performance/gather/if_array_num.xml
new file mode 100644
index 00000000000..1c9fc1d3453
--- /dev/null
+++ b/dbms/tests/performance/gather/if_array_num.xml
@@ -0,0 +1,26 @@
+<test>
+    <name>if_array_num</name>
+    <type>once</type>
+
+    <stop_conditions>
+        <any_of>
+            <average_speed_not_changing_for_ms>10000</average_speed_not_changing_for_ms>
+            <total_time_ms>1000</total_time_ms>
+        </any_of>
+    </stop_conditions>
+
+    <metrics>
+        <max_rows_per_second />
+    </metrics>
+
+    <main_metric>
+        <max_rows_per_second />
+    </main_metric>
+
+    <query>SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? [1, 2, 3] : [4, 5])</query>
+    <query>SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? [1, 2, 3] : materialize([4, 5]))</query>
+    <query>SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? materialize([1, 2, 3]) : materialize([4, 5]))</query>
+    <query>SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? [1, 2, 3] : [400, 500])</query>
+    <query>SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? [1, 2, 3] : materialize([400, 500]))</query>
+    <query>SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? materialize([1, 2, 3]) : materialize([400, 500]))</query>
+</test>

From 8536fb96f7369a5ec753a76b8b3495cb705a9554 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 5 Aug 2017 06:31:52 +0300
Subject: [PATCH 056/281] Fixed error message in case of syntax error
 [#CLICKHOUSE-2].

---
 dbms/src/Parsers/parseQuery.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Parsers/parseQuery.cpp b/dbms/src/Parsers/parseQuery.cpp
index 93bc39f75c3..0e67bf7b55d 100644
--- a/dbms/src/Parsers/parseQuery.cpp
+++ b/dbms/src/Parsers/parseQuery.cpp
@@ -114,7 +114,7 @@ void writeQueryAroundTheError(
     else
     {
         if (num_positions_to_hilite)
-            out << ": " << std::string(positions_to_hilite[0].begin, std::min(SHOW_CHARS_ON_SYNTAX_ERROR, end - positions_to_hilite[0].begin));
+            out << ": " << std::string(positions_to_hilite[0].begin, std::min(SHOW_CHARS_ON_SYNTAX_ERROR, end - positions_to_hilite[0].begin)) << ". ";
     }
 }
 

From 5ddc3549891861f473cde543526984d4023f579b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 5 Aug 2017 06:50:13 +0300
Subject: [PATCH 057/281] Using GatherUtils for function if on arguments of
 type Array(String) [#CLICKHOUSE-2].

---
 dbms/src/Functions/FunctionsConditional.h | 57 +++++++++--------------
 dbms/src/Functions/GatherUtils.h          |  1 +
 2 files changed, 24 insertions(+), 34 deletions(-)

diff --git a/dbms/src/Functions/FunctionsConditional.h b/dbms/src/Functions/FunctionsConditional.h
index 797484c3579..a759f0569ec 100644
--- a/dbms/src/Functions/FunctionsConditional.h
+++ b/dbms/src/Functions/FunctionsConditional.h
@@ -733,48 +733,36 @@ private:
             return true;
         }
 
+        return false;
+    }
+
+    bool executeGeneric(const ColumnUInt8 * cond_col, Block & block, const ColumnNumbers & arguments, size_t result)
+    {
+        const IColumn * col_then_untyped = block.getByPosition(arguments[1]).column.get();
+        const IColumn * col_else_untyped = block.getByPosition(arguments[2]).column.get();
+
         const ColumnArray * col_arr_then = checkAndGetColumn<ColumnArray>(col_then_untyped);
         const ColumnArray * col_arr_else = checkAndGetColumn<ColumnArray>(col_else_untyped);
         const ColumnConst * col_arr_then_const = checkAndGetColumnConst<ColumnArray>(col_then_untyped);
         const ColumnConst * col_arr_else_const = checkAndGetColumnConst<ColumnArray>(col_else_untyped);
-        const ColumnString * col_then_elements = col_arr_then ? checkAndGetColumn<ColumnString>(&col_arr_then->getData()) : nullptr;
-        const ColumnString * col_else_elements = col_arr_else ? checkAndGetColumn<ColumnString>(&col_arr_else->getData()) : nullptr;
 
-        if (((col_arr_then && col_then_elements) || col_arr_then_const)
-            && ((col_arr_else && col_else_elements) || col_arr_else_const))
+        const PaddedPODArray<UInt8> & cond_data = cond_col->getData();
+        size_t rows = cond_data.size();
+
+        if ((col_arr_then || col_arr_then_const)
+            && (col_arr_else || col_arr_else_const))
         {
-            auto col_res_elements = std::make_shared<ColumnString>();
-            auto col_res = std::make_shared<ColumnArray>(col_res_elements);
-            block.getByPosition(result).column = col_res;
+            block.getByPosition(result).column = block.getByPosition(result).type->createColumn();
+            auto col_res = static_cast<ColumnArray *>(block.getByPosition(result).column.get());
 
-            ColumnString::Chars_t & res_chars = col_res_elements->getChars();
-            ColumnString::Offsets_t & res_string_offsets = col_res_elements->getOffsets();
-            ColumnArray::Offsets_t & res_array_offsets = col_res->getOffsets();
-
-            if (col_then_elements && col_else_elements)
-                StringArrayIfImpl::vector_vector(
-                    cond_data,
-                    col_then_elements->getChars(), col_then_elements->getOffsets(), col_arr_then->getOffsets(),
-                    col_else_elements->getChars(), col_else_elements->getOffsets(), col_arr_else->getOffsets(),
-                    res_chars, res_string_offsets, res_array_offsets);
-            else if (col_then_elements && col_arr_else_const)
-                StringArrayIfImpl::vector_constant(
-                    cond_data,
-                    col_then_elements->getChars(), col_then_elements->getOffsets(), col_arr_then->getOffsets(),
-                    col_arr_else_const->getValue<Array>(),
-                    res_chars, res_string_offsets, res_array_offsets);
-            else if (col_arr_then_const && col_else_elements)
-                StringArrayIfImpl::constant_vector(
-                    cond_data,
-                    col_arr_then_const->getValue<Array>(),
-                    col_else_elements->getChars(), col_else_elements->getOffsets(), col_arr_else->getOffsets(),
-                    res_chars, res_string_offsets, res_array_offsets);
+            if (col_arr_then && col_arr_else)
+                conditional(GenericArraySource(*col_arr_then), GenericArraySource(*col_arr_else), GenericArraySink(*col_res, rows), cond_data);
+            else if (col_arr_then && col_arr_else_const)
+                conditional(GenericArraySource(*col_arr_then), ConstSource<GenericArraySource>(*col_arr_else_const), GenericArraySink(*col_res, rows), cond_data);
+            else if (col_arr_then_const && col_arr_else)
+                conditional(ConstSource<GenericArraySource>(*col_arr_then_const), GenericArraySource(*col_arr_else), GenericArraySink(*col_res, rows), cond_data);
             else if (col_arr_then_const && col_arr_else_const)
-                StringArrayIfImpl::constant_constant(
-                    cond_data,
-                    col_arr_then_const->getValue<Array>(),
-                    col_arr_else_const->getValue<Array>(),
-                    res_chars, res_string_offsets, res_array_offsets);
+                conditional(ConstSource<GenericArraySource>(*col_arr_then_const), ConstSource<GenericArraySource>(*col_arr_else_const), GenericArraySink(*col_res, rows), cond_data);
             else
                 return false;
 
@@ -1261,6 +1249,7 @@ public:
                 || executeLeftType<Float32>(cond_col, block, arguments, result)
                 || executeLeftType<Float64>(cond_col, block, arguments, result)
                 || executeString(cond_col, block, arguments, result)
+                || executeGeneric(cond_col, block, arguments, result)
                 || executeTuple(cond_col, block, arguments, result)))
                 throw Exception("Illegal columns " + arg_then.column->getName()
                     + " and " + arg_else.column->getName()
diff --git a/dbms/src/Functions/GatherUtils.h b/dbms/src/Functions/GatherUtils.h
index b33ecc442b9..75492770de4 100644
--- a/dbms/src/Functions/GatherUtils.h
+++ b/dbms/src/Functions/GatherUtils.h
@@ -502,6 +502,7 @@ struct GenericArraySlice
 struct GenericArraySource
 {
     using Slice = GenericArraySlice;
+    using Column = ColumnArray;
 
     const IColumn & elements;
     const typename ColumnArray::Offsets_t & offsets;

From d70f59e9bb0df55436ac5035a4ab16c1c6b8ba2d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 5 Aug 2017 07:07:40 +0300
Subject: [PATCH 058/281] Added performance test [#CLICKHOUSE-2].

---
 .../performance/gather/if_array_string.xml    | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 dbms/tests/performance/gather/if_array_string.xml

diff --git a/dbms/tests/performance/gather/if_array_string.xml b/dbms/tests/performance/gather/if_array_string.xml
new file mode 100644
index 00000000000..4b8d839f6fe
--- /dev/null
+++ b/dbms/tests/performance/gather/if_array_string.xml
@@ -0,0 +1,26 @@
+<test>
+    <name>if_array_string</name>
+    <type>once</type>
+
+    <stop_conditions>
+        <any_of>
+            <average_speed_not_changing_for_ms>10000</average_speed_not_changing_for_ms>
+            <total_time_ms>1000</total_time_ms>
+        </any_of>
+    </stop_conditions>
+
+    <metrics>
+        <max_rows_per_second />
+    </metrics>
+
+    <main_metric>
+        <max_rows_per_second />
+    </main_metric>
+
+    <query>SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? ['Hello', 'World'] : ['a', 'b', 'c'])</query>
+    <query>SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? materialize(['Hello', 'World']) : ['a', 'b', 'c'])</query>
+    <query>SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? ['Hello', 'World'] : materialize(['a', 'b', 'c']))</query>
+    <query>SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? materialize(['Hello', 'World']) : materialize(['a', 'b', 'c']))</query>
+    <query>SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? materialize(['', '']) : emptyArrayString())</query>
+    <query>SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? materialize(['https://github.com/yandex/ClickHouse/pull/1070', 'https://www.google.ru/search?newwindow=1&amp;site=&amp;source=hp&amp;q=zookeeper+wire+protocol+exists&amp;oq=zookeeper+wire+protocol+exists&amp;gs_l=psy-ab.3...330.6300.0.6687.33.28.0.0.0.0.386.4838.0j5j9j5.19.0....0...1.1.64.psy-ab..14.17.4448.0..0j35i39k1j0i131k1j0i22i30k1j0i19k1j33i21k1.r_3uFoNOrSU']) : emptyArrayString())</query>
+</test>

From c2a32990bb26641cf833858b7b4a5b47ea1d258a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 5 Aug 2017 07:11:42 +0300
Subject: [PATCH 059/281] Added test [#CLICKHOUSE-2].

---
 .../00487_if_array_fixed_string.reference        | 16 ++++++++++++++++
 .../0_stateless/00487_if_array_fixed_string.sql  |  4 ++++
 2 files changed, 20 insertions(+)
 create mode 100644 dbms/tests/queries/0_stateless/00487_if_array_fixed_string.reference
 create mode 100644 dbms/tests/queries/0_stateless/00487_if_array_fixed_string.sql

diff --git a/dbms/tests/queries/0_stateless/00487_if_array_fixed_string.reference b/dbms/tests/queries/0_stateless/00487_if_array_fixed_string.reference
new file mode 100644
index 00000000000..df2f576f3b9
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00487_if_array_fixed_string.reference
@@ -0,0 +1,16 @@
+['a\0\0\0\0','b\0\0\0\0','c\0\0\0\0']
+['hello','world']
+['a\0\0\0\0','b\0\0\0\0','c\0\0\0\0']
+['hello','world']
+['a\0\0\0\0','b\0\0\0\0','c\0\0\0\0']
+['hello','world']
+['a\0\0\0\0','b\0\0\0\0','c\0\0\0\0']
+['hello','world']
+['a\0\0\0\0','b\0\0\0\0','c\0\0\0\0']
+['hello','world']
+['a\0\0\0\0','b\0\0\0\0','c\0\0\0\0']
+['hello','world']
+['a\0\0\0\0','b\0\0\0\0','c\0\0\0\0']
+['hello','world']
+['a\0\0\0\0','b\0\0\0\0','c\0\0\0\0']
+['hello','world']
diff --git a/dbms/tests/queries/0_stateless/00487_if_array_fixed_string.sql b/dbms/tests/queries/0_stateless/00487_if_array_fixed_string.sql
new file mode 100644
index 00000000000..743b88dac07
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00487_if_array_fixed_string.sql
@@ -0,0 +1,4 @@
+SELECT number % 2 ? arrayMap(x -> toFixedString(x, 5), ['hello', 'world']) : arrayMap(x -> toFixedString(x, 5), ['a', 'b', 'c']) FROM system.numbers LIMIT 4;
+SELECT number % 2 ? materialize(arrayMap(x -> toFixedString(x, 5), ['hello', 'world'])) : arrayMap(x -> toFixedString(x, 5), ['a', 'b', 'c']) FROM system.numbers LIMIT 4;
+SELECT number % 2 ? arrayMap(x -> toFixedString(x, 5), ['hello', 'world']) : materialize(arrayMap(x -> toFixedString(x, 5), ['a', 'b', 'c'])) FROM system.numbers LIMIT 4;
+SELECT number % 2 ? materialize(arrayMap(x -> toFixedString(x, 5), ['hello', 'world'])) : materialize(arrayMap(x -> toFixedString(x, 5), ['a', 'b', 'c'])) FROM system.numbers LIMIT 4;

From f386788278bb442aadf2ec88f18a2e5161f2bba2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 5 Aug 2017 07:12:15 +0300
Subject: [PATCH 060/281] Using GatherUtils for function if on arguments of
 type Array(String) [#CLICKHOUSE-2].

---
 dbms/src/Functions/FunctionsConditional.h | 211 +---------------------
 1 file changed, 6 insertions(+), 205 deletions(-)

diff --git a/dbms/src/Functions/FunctionsConditional.h b/dbms/src/Functions/FunctionsConditional.h
index a759f0569ec..7381985177e 100644
--- a/dbms/src/Functions/FunctionsConditional.h
+++ b/dbms/src/Functions/FunctionsConditional.h
@@ -140,209 +140,6 @@ public:
 };
 
 
-/** Implementation for string arrays.
-  * NOTE: The code is too complex because it works with the internals of the arrays of strings.
-  * NOTE: Arrays of FixedString are not supported.
-  */
-struct StringArrayIfImpl
-{
-    static ALWAYS_INLINE void copy_from_vector(
-        size_t i,
-        const ColumnString::Chars_t & from_data,
-        const ColumnString::Offsets_t & from_string_offsets,
-        const ColumnArray::Offsets_t & from_array_offsets,
-        const ColumnArray::Offset_t & from_array_prev_offset,
-        const ColumnString::Offset_t & from_string_prev_offset,
-        ColumnString::Chars_t & to_data,
-        ColumnString::Offsets_t & to_string_offsets,
-        ColumnArray::Offsets_t & to_array_offsets,
-        ColumnArray::Offset_t & to_array_prev_offset,
-        ColumnString::Offset_t & to_string_prev_offset)
-    {
-        size_t array_size = from_array_offsets[i] - from_array_prev_offset;
-
-        size_t bytes_to_copy = 0;
-        size_t from_string_prev_offset_local = from_string_prev_offset;
-        for (size_t j = 0; j < array_size; ++j)
-        {
-            size_t string_size = from_string_offsets[from_array_prev_offset + j] - from_string_prev_offset_local;
-
-            to_string_prev_offset += string_size;
-            to_string_offsets.push_back(to_string_prev_offset);
-
-            from_string_prev_offset_local += string_size;
-            bytes_to_copy += string_size;
-        }
-
-        size_t to_data_old_size = to_data.size();
-        to_data.resize(to_data_old_size + bytes_to_copy);
-        memcpy(&to_data[to_data_old_size], &from_data[from_string_prev_offset], bytes_to_copy);
-
-        to_array_prev_offset += array_size;
-        to_array_offsets[i] = to_array_prev_offset;
-    }
-
-    static ALWAYS_INLINE void copy_from_constant(
-        size_t i,
-        const Array & from_data,
-        ColumnString::Chars_t & to_data,
-        ColumnString::Offsets_t & to_string_offsets,
-        ColumnArray::Offsets_t & to_array_offsets,
-        ColumnArray::Offset_t & to_array_prev_offset,
-        ColumnString::Offset_t & to_string_prev_offset)
-    {
-        size_t array_size = from_data.size();
-
-        for (size_t j = 0; j < array_size; ++j)
-        {
-            const String & str = from_data[j].get<const String &>();
-            size_t string_size = str.size() + 1;    /// Including 0 at the end.
-
-            to_data.resize(to_string_prev_offset + string_size);
-            memcpy(&to_data[to_string_prev_offset], str.data(), string_size);
-
-            to_string_prev_offset += string_size;
-            to_string_offsets.push_back(to_string_prev_offset);
-        }
-
-        to_array_prev_offset += array_size;
-        to_array_offsets[i] = to_array_prev_offset;
-    }
-
-
-    static void vector_vector(
-        const PaddedPODArray<UInt8> & cond,
-        const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_string_offsets, const ColumnArray::Offsets_t & a_array_offsets,
-        const ColumnString::Chars_t & b_data, const ColumnString::Offsets_t & b_string_offsets, const ColumnArray::Offsets_t & b_array_offsets,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_string_offsets, ColumnArray::Offsets_t & c_array_offsets)
-    {
-        size_t size = cond.size();
-        c_array_offsets.resize(size);
-        c_string_offsets.reserve(std::max(a_string_offsets.size(), b_string_offsets.size()));
-        c_data.reserve(std::max(a_data.size(), b_data.size()));
-
-        ColumnArray::Offset_t a_array_prev_offset = 0;
-        ColumnArray::Offset_t b_array_prev_offset = 0;
-        ColumnArray::Offset_t c_array_prev_offset = 0;
-
-        ColumnString::Offset_t a_string_prev_offset = 0;
-        ColumnString::Offset_t b_string_prev_offset = 0;
-        ColumnString::Offset_t c_string_prev_offset = 0;
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            if (cond[i])
-                copy_from_vector(i,
-                    a_data, a_string_offsets, a_array_offsets, a_array_prev_offset, a_string_prev_offset,
-                    c_data, c_string_offsets, c_array_offsets, c_array_prev_offset, c_string_prev_offset);
-            else
-                copy_from_vector(i,
-                    b_data, b_string_offsets, b_array_offsets, b_array_prev_offset, b_string_prev_offset,
-                    c_data, c_string_offsets, c_array_offsets, c_array_prev_offset, c_string_prev_offset);
-
-            a_array_prev_offset = a_array_offsets[i];
-            b_array_prev_offset = b_array_offsets[i];
-
-            if (a_array_prev_offset)
-                a_string_prev_offset = a_string_offsets[a_array_prev_offset - 1];
-
-            if (b_array_prev_offset)
-                b_string_prev_offset = b_string_offsets[b_array_prev_offset - 1];
-        }
-    }
-
-    template <bool reverse>
-    static void vector_constant_impl(
-        const PaddedPODArray<UInt8> & cond,
-        const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_string_offsets, const ColumnArray::Offsets_t & a_array_offsets,
-        const Array & b,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_string_offsets, ColumnArray::Offsets_t & c_array_offsets)
-    {
-        size_t size = cond.size();
-        c_array_offsets.resize(size);
-        c_string_offsets.reserve(a_string_offsets.size());
-        c_data.reserve(a_data.size());
-
-        ColumnArray::Offset_t a_array_prev_offset = 0;
-        ColumnArray::Offset_t c_array_prev_offset = 0;
-
-        ColumnString::Offset_t a_string_prev_offset = 0;
-        ColumnString::Offset_t c_string_prev_offset = 0;
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            if (reverse != cond[i])
-                copy_from_vector(i,
-                    a_data, a_string_offsets, a_array_offsets, a_array_prev_offset, a_string_prev_offset,
-                    c_data, c_string_offsets, c_array_offsets, c_array_prev_offset, c_string_prev_offset);
-            else
-                copy_from_constant(i,
-                     b,
-                     c_data, c_string_offsets, c_array_offsets, c_array_prev_offset, c_string_prev_offset);
-
-            a_array_prev_offset = a_array_offsets[i];
-
-            if (a_array_prev_offset)
-                a_string_prev_offset = a_string_offsets[a_array_prev_offset - 1];
-        }
-    }
-
-    static void vector_constant(
-        const PaddedPODArray<UInt8> & cond,
-        const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_string_offsets, const ColumnArray::Offsets_t & a_array_offsets,
-        const Array & b,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_string_offsets, ColumnArray::Offsets_t & c_array_offsets)
-    {
-        vector_constant_impl<false>(cond, a_data, a_string_offsets, a_array_offsets, b, c_data, c_string_offsets, c_array_offsets);
-    }
-
-    static void constant_vector(
-        const PaddedPODArray<UInt8> & cond,
-        const Array & a,
-        const ColumnString::Chars_t & b_data, const ColumnString::Offsets_t & b_string_offsets, const ColumnArray::Offsets_t & b_array_offsets,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_string_offsets, ColumnArray::Offsets_t & c_array_offsets)
-    {
-        vector_constant_impl<true>(cond, b_data, b_string_offsets, b_array_offsets, a, c_data, c_string_offsets, c_array_offsets);
-    }
-
-    static void constant_constant(
-        const PaddedPODArray<UInt8> & cond,
-        const Array & a,
-        const Array & b,
-        ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_string_offsets, ColumnArray::Offsets_t & c_array_offsets)
-    {
-        size_t size = cond.size();
-        c_array_offsets.resize(size);
-        c_string_offsets.reserve(std::max(a.size(), b.size()) * size);
-
-        size_t sum_size_a = 0;
-        for (const auto & s : a)
-            sum_size_a += s.get<const String &>().size() + 1;
-
-        size_t sum_size_b = 0;
-        for (const auto & s : b)
-            sum_size_b += s.get<const String &>().size() + 1;
-
-        c_data.reserve(std::max(sum_size_a, sum_size_b) * size);
-
-        ColumnArray::Offset_t c_array_prev_offset = 0;
-        ColumnString::Offset_t c_string_prev_offset = 0;
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            if (cond[i])
-                copy_from_constant(i,
-                    a,
-                    c_data, c_string_offsets, c_array_offsets, c_array_prev_offset, c_string_prev_offset);
-            else
-                copy_from_constant(i,
-                    b,
-                    c_data, c_string_offsets, c_array_offsets, c_array_prev_offset, c_string_prev_offset);
-        }
-    }
-};
-
-
 class FunctionIf : public IFunction
 {
 public:
@@ -736,8 +533,12 @@ private:
         return false;
     }
 
-    bool executeGeneric(const ColumnUInt8 * cond_col, Block & block, const ColumnNumbers & arguments, size_t result)
+    bool executeGenericArray(const ColumnUInt8 * cond_col, Block & block, const ColumnNumbers & arguments, size_t result)
     {
+        /// For generic implementation, arrays must be of same type.
+        if (!block.getByPosition(arguments[1]).type->equals(*block.getByPosition(arguments[2]).type))
+            return false;
+
         const IColumn * col_then_untyped = block.getByPosition(arguments[1]).column.get();
         const IColumn * col_else_untyped = block.getByPosition(arguments[2]).column.get();
 
@@ -1249,7 +1050,7 @@ public:
                 || executeLeftType<Float32>(cond_col, block, arguments, result)
                 || executeLeftType<Float64>(cond_col, block, arguments, result)
                 || executeString(cond_col, block, arguments, result)
-                || executeGeneric(cond_col, block, arguments, result)
+                || executeGenericArray(cond_col, block, arguments, result)
                 || executeTuple(cond_col, block, arguments, result)))
                 throw Exception("Illegal columns " + arg_then.column->getName()
                     + " and " + arg_else.column->getName()

From ff54c93a22e00642469021df4e39cab5d278f11f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 7 Aug 2017 04:36:20 +0300
Subject: [PATCH 061/281] Miscellaneous [#CLICKHOUSE-2].

---
 dbms/src/Functions/FunctionsConditional.cpp | 28 ++++++++++-----------
 dbms/src/Functions/FunctionsConditional.h   | 14 +++++------
 dbms/src/IO/CompressedReadBufferBase.cpp    |  1 -
 dbms/src/Parsers/ParserCase.cpp             |  4 +--
 4 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/dbms/src/Functions/FunctionsConditional.cpp b/dbms/src/Functions/FunctionsConditional.cpp
index b88f0999e21..8c1989f19fc 100644
--- a/dbms/src/Functions/FunctionsConditional.cpp
+++ b/dbms/src/Functions/FunctionsConditional.cpp
@@ -18,8 +18,8 @@ void registerFunctionsConditional(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionIf>();
     factory.registerFunction<FunctionMultiIf>();
-    factory.registerFunction<FunctionCaseWithExpr>();
-    factory.registerFunction<FunctionCaseWithoutExpr>();
+    factory.registerFunction<FunctionCaseWithExpression>();
+    factory.registerFunction<FunctionCaseWithoutExpression>();
 }
 
 namespace
@@ -369,24 +369,23 @@ bool FunctionMultiIf::performTrivialCase(Block & block, const ColumnNumbers & ar
     return true;
 }
 
-/// Implementation of FunctionCaseWithExpr.
 
-FunctionPtr FunctionCaseWithExpr::create(const Context & context_)
+FunctionPtr FunctionCaseWithExpression::create(const Context & context_)
 {
-    return std::make_shared<FunctionCaseWithExpr>(context_);
+    return std::make_shared<FunctionCaseWithExpression>(context_);
 }
 
-FunctionCaseWithExpr::FunctionCaseWithExpr(const Context & context_)
+FunctionCaseWithExpression::FunctionCaseWithExpression(const Context & context_)
     : context{context_}
 {
 }
 
-String FunctionCaseWithExpr::getName() const
+String FunctionCaseWithExpression::getName() const
 {
     return name;
 }
 
-DataTypePtr FunctionCaseWithExpr::getReturnTypeImpl(const DataTypes & args) const
+DataTypePtr FunctionCaseWithExpression::getReturnTypeImpl(const DataTypes & args) const
 {
     /// See the comments in executeImpl() to understand why we actually have to
     /// get the return type of a transform function.
@@ -413,7 +412,7 @@ DataTypePtr FunctionCaseWithExpr::getReturnTypeImpl(const DataTypes & args) cons
     return fun_transform.getReturnTypeImpl({args.front(), src_array_type, dst_array_type, args.back()});
 }
 
-void FunctionCaseWithExpr::executeImpl(Block & block, const ColumnNumbers & args, size_t result)
+void FunctionCaseWithExpression::executeImpl(Block & block, const ColumnNumbers & args, size_t result)
 {
     /// In the following code, we turn the construction:
     /// CASE expr WHEN val[0] THEN branch[0] ... WHEN val[N-1] then branch[N-1] ELSE branchN
@@ -470,25 +469,24 @@ void FunctionCaseWithExpr::executeImpl(Block & block, const ColumnNumbers & args
     block.getByPosition(result).column = std::move(temp_block.getByPosition(result).column);
 }
 
-/// Implementation of FunctionCaseWithoutExpr.
 
-FunctionPtr FunctionCaseWithoutExpr::create(const Context & context_)
+FunctionPtr FunctionCaseWithoutExpression::create(const Context & context_)
 {
-    return std::make_shared<FunctionCaseWithoutExpr>();
+    return std::make_shared<FunctionCaseWithoutExpression>();
 }
 
-String FunctionCaseWithoutExpr::getName() const
+String FunctionCaseWithoutExpression::getName() const
 {
     return name;
 }
 
-DataTypePtr FunctionCaseWithoutExpr::getReturnTypeImpl(const DataTypes & args) const
+DataTypePtr FunctionCaseWithoutExpression::getReturnTypeImpl(const DataTypes & args) const
 {
     FunctionMultiIf fun_multi_if;
     return fun_multi_if.getReturnTypeImpl(args);
 }
 
-void FunctionCaseWithoutExpr::executeImpl(Block & block, const ColumnNumbers & args, size_t result)
+void FunctionCaseWithoutExpression::executeImpl(Block & block, const ColumnNumbers & args, size_t result)
 {
     /// A CASE construction without any expression is a straightforward multiIf.
     FunctionMultiIf fun_multi_if;
diff --git a/dbms/src/Functions/FunctionsConditional.h b/dbms/src/Functions/FunctionsConditional.h
index 7381985177e..38e0d134b0d 100644
--- a/dbms/src/Functions/FunctionsConditional.h
+++ b/dbms/src/Functions/FunctionsConditional.h
@@ -1115,16 +1115,16 @@ private:
     bool performTrivialCase(Block & block, const ColumnNumbers & args, size_t result, Conditional::NullMapBuilder & builder);
 };
 
-/// Function caseWithExpr which implements the CASE construction when it is
+/// Implements the CASE construction when it is
 /// provided an expression. Users should not call this function.
-class FunctionCaseWithExpr : public IFunction
+class FunctionCaseWithExpression : public IFunction
 {
 public:
-    static constexpr auto name = "caseWithExpr";
+    static constexpr auto name = "caseWithExpression";
     static FunctionPtr create(const Context & context_);
 
 public:
-    FunctionCaseWithExpr(const Context & context_);
+    FunctionCaseWithExpression(const Context & context_);
     bool isVariadic() const override { return true; }
     size_t getNumberOfArguments() const override { return 0; }
     String getName() const override;
@@ -1135,12 +1135,12 @@ private:
     const Context & context;
 };
 
-/// Function caseWithoutExpr which implements the CASE construction when it
+/// Implements the CASE construction when it
 /// isn't provided any expression. Users should not call this function.
-class FunctionCaseWithoutExpr : public IFunction
+class FunctionCaseWithoutExpression : public IFunction
 {
 public:
-    static constexpr auto name = "caseWithoutExpr";
+    static constexpr auto name = "caseWithoutExpression";
     static FunctionPtr create(const Context & context_);
 
 public:
diff --git a/dbms/src/IO/CompressedReadBufferBase.cpp b/dbms/src/IO/CompressedReadBufferBase.cpp
index 55c5b94f13d..1c51a3e1900 100644
--- a/dbms/src/IO/CompressedReadBufferBase.cpp
+++ b/dbms/src/IO/CompressedReadBufferBase.cpp
@@ -53,7 +53,6 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
 
     size_t & size_compressed = size_compressed_without_checksum;
 
-
     if (method == static_cast<UInt8>(CompressionMethodByte::LZ4) ||
             method == static_cast<UInt8>(CompressionMethodByte::ZSTD) ||
             method == static_cast<UInt8>(CompressionMethodByte::NONE))
diff --git a/dbms/src/Parsers/ParserCase.cpp b/dbms/src/Parsers/ParserCase.cpp
index 82cf144416b..dfa30bf8355 100644
--- a/dbms/src/Parsers/ParserCase.cpp
+++ b/dbms/src/Parsers/ParserCase.cpp
@@ -86,7 +86,7 @@ bool ParserCase::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
         function_args->children = std::move(args);
 
         auto function = std::make_shared<ASTFunction>(StringRange{begin, pos});
-        function->name = "caseWithExpr";
+        function->name = "caseWithExpression";
         function->arguments = function_args;
         function->children.push_back(function->arguments);
 
@@ -101,7 +101,7 @@ bool ParserCase::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
         function_args->children = std::move(args);
 
         auto function = std::make_shared<ASTFunction>(StringRange{begin, pos});
-        function->name = "caseWithoutExpr";
+        function->name = "caseWithoutExpression";
         function->arguments = function_args;
         function->children.push_back(function->arguments);
 

From 46be81bc88a2e4a8b71e209eb632e5f2cad26be0 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 7 Aug 2017 11:03:23 +0300
Subject: [PATCH 062/281] Fixed error [#CLICKHOUSE-2].

---
 dbms/src/Common/escapeForFileName.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Common/escapeForFileName.cpp b/dbms/src/Common/escapeForFileName.cpp
index 5fd5e7fc6c5..e948b422eb0 100644
--- a/dbms/src/Common/escapeForFileName.cpp
+++ b/dbms/src/Common/escapeForFileName.cpp
@@ -13,7 +13,7 @@ std::string escapeForFileName(const std::string & s)
 
     while (pos != end)
     {
-        char c = *pos;
+        unsigned char c = *pos;
 
         if (isWordCharASCII(c))
             res += c;

From 8fb22b1dcb621807ba7c794d27d920755e075dd4 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 7 Aug 2017 11:05:03 +0300
Subject: [PATCH 063/281] Added failing test [#CLICKHOUSE-2].

---
 .../queries/0_stateless/00488_non_ascii_column_names.sql    | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 dbms/tests/queries/0_stateless/00488_non_ascii_column_names.sql

diff --git a/dbms/tests/queries/0_stateless/00488_non_ascii_column_names.sql b/dbms/tests/queries/0_stateless/00488_non_ascii_column_names.sql
new file mode 100644
index 00000000000..3a0d0542c36
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00488_non_ascii_column_names.sql
@@ -0,0 +1,6 @@
+DROP TABLE IF EXISTS test.non_ascii;
+CREATE TABLE test.non_ascii (`привет` String, `мир` String) ENGINE = TinyLog;
+INSERT INTO test.non_ascii VALUES ('hello', 'world');
+SELECT `привет` FROM test.non_ascii;
+SELECT * FROM test.non_ascii;
+DROP TABLE test.non_ascii;

From ea2b486735a282e49ab15424566a0c7f5c38a88d Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Mon, 7 Aug 2017 23:26:28 +0300
Subject: [PATCH 064/281] added threadpool to DistributedBlockOutputStream

---
 .../DistributedBlockOutputStream.cpp          | 220 ++++++++++--------
 .../DistributedBlockOutputStream.h            |  20 +-
 libs/libcommon/include/common/ThreadPool.h    |   3 +-
 3 files changed, 138 insertions(+), 105 deletions(-)

diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
index 919cf062205..994ed9e4c7d 100644
--- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
+++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
@@ -12,21 +12,22 @@
 #include <DataStreams/NativeBlockOutputStream.h>
 #include <DataStreams/RemoteBlockOutputStream.h>
 #include <Interpreters/InterpreterInsertQuery.h>
-#include <Interpreters/Cluster.h>
 #include <Interpreters/createBlockSelector.h>
 
 #include <DataTypes/DataTypesNumber.h>
+#include <Common/setThreadName.h>
 #include <Common/ClickHouseRevision.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/typeid_cast.h>
 #include <Common/Exception.h>
 #include <Common/ProfileEvents.h>
 #include <Common/MemoryTracker.h>
+#include <Common/escapeForFileName.h>
 #include <common/logger_useful.h>
+#include <ext/range.h>
 
 #include <Poco/DirectoryIterator.h>
 
-#include <iostream>
 #include <future>
 #include <condition_variable>
 #include <mutex>
@@ -55,16 +56,9 @@ DistributedBlockOutputStream::DistributedBlockOutputStream(StorageDistributed &
 {
 }
 
-DistributedBlockOutputStream::writePrefix()
+void DistributedBlockOutputStream::writePrefix()
 {
     deadline = std::chrono::steady_clock::now() + std::chrono::seconds(insert_timeout);
-    remote_jobs_count = 0;
-    if (storage.getShardingKeyExpr())
-    {
-        const auto & shards_info = cluster->getShardsInfo();
-        for (const auto & shard_info : shards_info)
-            remote_jobs_count += shard_info.dir_names.size();
-    }
 }
 
 void DistributedBlockOutputStream::write(const Block & block)
@@ -84,13 +78,12 @@ void DistributedBlockOutputStream::writeAsync(const Block & block)
     ++blocks_inserted;
 }
 
-ThreadPool::Job createWritingJob(std::vector<bool> & done_jobs, std::atomic<unsigned> & finished_jobs_count,
-                                 std::condition_variable & cond_var, const Block & block, size_t job_id,
-                                 const Cluster::ShardInfo & shard_info, size_t replica_id)
+ThreadPool::Job DistributedBlockOutputStream::createWritingJob(
+    std::vector<bool> & done_jobs, std::atomic<unsigned> & finished_jobs_count, std::condition_variable & cond_var,
+    const Block & block, size_t job_id, const Cluster::ShardInfo & shard_info, size_t replica_id)
 {
     auto memory_tracker = current_memory_tracker;
-    return [this, memory_tracker, & done_jobs, & finished_jobs_count, & cond_var, & block,
-            size_t job_id, const Cluster::ShardInfo & shard_info, size_t replica_id]()
+    return [this, memory_tracker, & done_jobs, & finished_jobs_count, & cond_var, & block, job_id, & shard_info, replica_id]()
     {
         if (!current_memory_tracker)
         {
@@ -113,51 +106,126 @@ ThreadPool::Job createWritingJob(std::vector<bool> & done_jobs, std::atomic<unsi
     };
 }
 
-void DistributedBlockOutputStream::writeToLocal(const Blocks & blocks)
+void DistributedBlockOutputStream::writeToLocal(const Blocks & blocks, size_t & finished_writings_count)
 {
-    const Cluster::ShardInfo & shard_info = cluster->getShardsInfo();
-    for (size_t shard_id: ext::range(0, shards_info.size()))
+    const Cluster::ShardsInfo & shards_info = cluster->getShardsInfo();
+    for (size_t shard_id : ext::range(0, shards_info.size()))
     {
         const auto & shard_info = shards_info[shard_id];
         if (shard_info.getLocalNodeCount() > 0)
-            writeToLocal(blocks[shard_id], shard_info.getLocalNodeCount());
+            writeToLocal(blocks[shard_id], shard_info.getLocalNodeCount(), finished_writings_count);
     }
 }
 
 
-std::string getCurrentStateDescription(const std::vector<bool> & done_jobs)
+std::string DistributedBlockOutputStream::getCurrentStateDescription(
+    const std::vector<bool> & done_jobs, size_t finished_local_nodes_count)
 {
+    const Cluster::ShardsInfo & shards_info = cluster->getShardsInfo();
+    String description;
+    WriteBufferFromString buffer(description);
+
+    buffer << "Insertion status:\n";
+
+    auto writeDescription = [&buffer](const std::string & address, size_t shard_id, size_t blocks_wrote)
+    {
+        buffer << "Wrote " << blocks_wrote << " blocks on shard " << shard_id << " replica ";
+        buffer << unescapeForFileName(address) << '\n';
+    };
+
+    size_t job_id = 0;
+    for (size_t shard_id : ext::range(0, shards_info.size()))
+    {
+        const auto & shard_info = shards_info[shard_id];
+        const auto & local_addresses = shard_info.local_addresses;
+
+        for (const auto & address : local_addresses)
+        {
+            writeDescription(address.toStringFull(), shard_id, blocks_inserted + (finished_local_nodes_count ? 1 : 0));
+            if (finished_local_nodes_count)
+                --finished_local_nodes_count;
+        }
+
+        for (const auto & dir_name : shard_info.dir_names)
+            writeDescription(dir_name, shard_id, blocks_inserted + (done_jobs[job_id++] ? 1 : 0));
+    }
+
+    return description;
+}
+
+void DistributedBlockOutputStream::calculateRemoteJobsCount()
+{
+    remote_jobs_count = 0;
+    const auto & shards_info = cluster->getShardsInfo();
+    for (const auto & shard_info : shards_info)
+        remote_jobs_count += shard_info.dir_names.size();
 }
 
 void DistributedBlockOutputStream::writeSync(const Block & block)
 {
     if (!pool)
-        pool = ThreadPool(remote_jobs_count);
+    {
+        /// Deferred initialization. Only for sync insertion.
+        calculateRemoteJobsCount();
+        pool.emplace(remote_jobs_count);
+    }
 
     std::vector<bool> done_jobs(remote_jobs_count, false);
-    std::atomic<unsigned> finished_jobs_count = 0;
+    std::atomic<unsigned> finished_jobs_count(0);
     std::mutex mutex;
     std::condition_variable cond_var;
 
-    const Cluster::ShardInfo & shard_info = cluster->getShardsInfo();
-    Blocks blocks = shard_info.size() > 1 ? splitBlocks(block) : Blocks({block});
+    const Cluster::ShardsInfo & shards_info = cluster->getShardsInfo();
+    Blocks blocks = shards_info.size() > 1 ? splitBlock(block) : Blocks({block});
 
     size_t job_id = 0;
-    for (size_t shard_id: ext::range(0, blocks.size()))
-        for (size_t replica_id : ext::range(0, shards_info[shard_id].dir_names.size()))
-            pool->schledule(createWritingJob(jobs_done, finished_jobs_count, cond_var,
-                                             blocks[shard_id], job_id++, shards_info[shard_id], replica_id));
+    for (size_t shard_id : ext::range(0, blocks.size()))
+        for (size_t replica_id: ext::range(0, shards_info[shard_id].dir_names.size()))
+            pool->schedule(createWritingJob(done_jobs, finished_jobs_count, cond_var,
+                                            blocks[shard_id], job_id++, shards_info[shard_id], replica_id));
+
+    const size_t jobs_count = job_id;
+    size_t finished_local_nodes_count;
+    const auto time_point = deadline;
+    auto timeout = insert_timeout;
+    auto & pool = this->pool;
+    auto wait = [& mutex, & cond_var, time_point, & finished_jobs_count, jobs_count, & pool, timeout]()
+    {
+        std::unique_lock<std::mutex> lock(mutex);
+        auto cond = [& finished_jobs_count, jobs_count] { return finished_jobs_count == jobs_count; };
+        if (timeout)
+        {
+            if (!cond_var.wait_until(lock, time_point, cond))
+            {
+                pool->wait();
+                ProfileEvents::increment(ProfileEvents::DistributedSyncInsertionTimeoutExceeded);
+                throw Exception("Timeout exceeded.", ErrorCodes::TIMEOUT_EXCEEDED);
+            }
+        }
+        else
+            cond_var.wait(lock, cond);
+        pool->wait();
+    };
+
+    std::exception_ptr exception;
     try
-        writeToLocal(blocks);
+    {
+        writeToLocal(blocks, finished_local_nodes_count);
+    }
+    catch (...)
+    {
+        exception = std::current_exception();
+    }
+
+    try
+    {
+        wait();
+        if (exception)
+            std::rethrow_exception(exception);
+    }
     catch(Exception & exception)
     {
-        try
-            pool->wait();
-        catch(Exception & exception)
-        {
-
-            throw;
-        }
+        exception.addMessage(getCurrentStateDescription(done_jobs, finished_local_nodes_count));
         throw;
     }
 
@@ -190,7 +258,7 @@ IColumn::Selector DistributedBlockOutputStream::createSelector(Block block)
 }
 
 
-Blocks DistributedBlockOutputStream::splitBlocks(const Block & block)
+Blocks DistributedBlockOutputStream::splitBlock(const Block & block)
 {
     const auto num_cols = block.columns();
     /// cache column pointers for later reuse
@@ -222,7 +290,7 @@ Blocks DistributedBlockOutputStream::splitBlocks(const Block & block)
 
 void DistributedBlockOutputStream::writeSplit(const Block & block)
 {
-    Blocks splitted_blocks = splitBlocks(block);
+    Blocks splitted_blocks = splitBlock(block);
     const size_t num_shards = splitted_blocks.size();
 
     for (size_t shard_idx = 0; shard_idx < num_shards; ++shard_idx)
@@ -236,29 +304,17 @@ void DistributedBlockOutputStream::writeSplit(const Block & block)
 void DistributedBlockOutputStream::writeImpl(const Block & block, const size_t shard_id)
 {
     const auto & shard_info = cluster->getShardsInfo()[shard_id];
+    size_t finished_writings_count = 0;
     if (shard_info.getLocalNodeCount() > 0)
-        writeToLocal(block, shard_info.getLocalNodeCount());
+        writeToLocal(block, shard_info.getLocalNodeCount(), finished_writings_count);
 
     /// dir_names is empty if shard has only local addresses
     if (!shard_info.dir_names.empty())
-    {
-        if (!insert_sync)
-            writeToShard(block, shard_info.dir_names);
-        else
-        {
-            std::atomic<bool> timeout_exceeded(false);
-            auto launch = insert_timeout ? std::launch::async : std::launch::deferred;
-            auto result = std::async(launch, &DistributedBlockOutputStream::writeToShardSync, this, std::cref(block),
-                                     std::cref(shard_info.dir_names), shard_id, std::ref(timeout_exceeded));
-            if (insert_timeout && result.wait_until(deadline) == std::future_status::timeout)
-                timeout_exceeded = true;
-            result.get();
-        }
-    }
+        writeToShard(block, shard_info.dir_names);
 }
 
 
-void DistributedBlockOutputStream::writeToLocal(const Block & block, const size_t repeats)
+void DistributedBlockOutputStream::writeToLocal(const Block & block, const size_t repeats, size_t & finished_writings_count)
 {
     InterpreterInsertQuery interp{query_ast, storage.context};
 
@@ -266,60 +322,33 @@ void DistributedBlockOutputStream::writeToLocal(const Block & block, const size_
     block_io.out->writePrefix();
 
     for (size_t i = 0; i < repeats; ++i)
+    {
         block_io.out->write(block);
+        ++finished_writings_count;
+    }
 
     block_io.out->writeSuffix();
 }
 
 
-void DistributedBlockOutputStream::writeToShardSync(const Block & block, const std::vector<std::string> & dir_names,
-                                                    size_t shard_id, const std::atomic<bool> & timeout_exceeded)
+void DistributedBlockOutputStream::writeToShardSync(
+    const Block & block, const Cluster::ShardInfo & shard_info, size_t replica_id)
 {
-    auto & blocks_inserted = this->blocks_inserted;
-    auto writeNodeDescription = [shard_id, & blocks_inserted](WriteBufferFromString & out, const Connection & connection)
-    {
-        out << " (While insertion to " << connection.getDescription() << " shard " << shard_id;
-        out << " Inserted blocks: " << blocks_inserted << ")";
-    };
+    const auto & dir_name = shard_info.dir_names[replica_id];
+    auto pool = storage.requireConnectionPool(dir_name);
+    auto connection = pool->get();
 
     const auto & query_string = queryToString(query_ast);
-    for (const auto & dir_name : dir_names)
-    {
-        auto pool = storage.requireConnectionPool(dir_name);
-        auto connection = pool->get();
+    RemoteBlockOutputStream remote{*connection, query_string};
 
-        CurrentMetrics::Increment metric_increment{CurrentMetrics::DistributedSend};
+    CurrentMetrics::Increment metric_increment{CurrentMetrics::DistributedSend};
 
-        if (timeout_exceeded)
-        {
-            ProfileEvents::increment(ProfileEvents::DistributedSyncInsertionTimeoutExceeded);
-
-            String message;
-            ut(message);
-            out << "Timeout exceeded.";
-            writeNodeDescription(out, *connection);
-            throw Exception(message, ErrorCodes::TIMEOUT_EXCEEDED);
-        }
-
-        try
-        {
-            RemoteBlockOutputStream remote{*connection, query_string};
-
-            remote.writePrefix();
-            remote.write(block);
-            remote.writeSuffix();
-        }
-        catch (Exception & exception)
-        {
-            String message;
-            WriteBufferFromString out(message);
-            writeNodeDescription(out, *connection);
-            exception.addMessage(message);
-            exception.rethrow();
-        }
-    }
+    remote.writePrefix();
+    remote.write(block);
+    remote.writeSuffix();
 }
 
+
 void DistributedBlockOutputStream::writeToShard(const Block & block, const std::vector<std::string> & dir_names)
 {
     /** tmp directory is used to ensure atomicity of transactions
@@ -374,4 +403,5 @@ void DistributedBlockOutputStream::writeToShard(const Block & block, const std::
     Poco::File(first_file_tmp_path).remove();
 }
 
+
 }
diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h
index cc3acef1c97..3ed4ed04e45 100644
--- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h
+++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h
@@ -8,6 +8,7 @@
 #include <memory>
 #include <chrono>
 #include <experimental/optional>
+#include <Interpreters/Cluster.h>
 
 namespace Poco
 {
@@ -18,8 +19,6 @@ namespace DB
 {
 
 class StorageDistributed;
-class Cluster;
-using ClusterPtr = std::shared_ptr<Cluster>;
 
 /** If insert_sync_ is true, the write is synchronous. Uses insert_timeout_ if it is not zero.
  *  Otherwise, the write is asynchronous - the data is first written to the local filesystem, and then sent to the remote servers.
@@ -42,31 +41,36 @@ public:
 private:
     void writeAsync(const Block & block);
 
+    /// Performs synchronous insertion to remote nodes. If timeout_exceeded flag was set, throws.
     void writeSync(const Block & block);
 
+    void calculateRemoteJobsCount();
+
     ThreadPool::Job createWritingJob(std::vector<bool> & done_jobs, std::atomic<unsigned> & finished_jobs_count,
                                      std::condition_variable & cond_var, const Block & block, size_t job_id,
                                      const Cluster::ShardInfo & shard_info, size_t replica_id);
 
-    void writeToLocal(const Blocks & blocks);
+    void writeToLocal(const Blocks & blocks, size_t & finished_writings_count);
 
-    std::string getCurrentStateDescription(const std::vector<bool> & done_jobs);
+    /// Returns the number of blocks was read for each cluster node. Uses during exception handling.
+    std::string getCurrentStateDescription(const std::vector<bool> & done_jobs, size_t finished_local_nodes_count);
 
     IColumn::Selector createSelector(Block block);
 
+    /// Split block between shards.
     Blocks splitBlock(const Block & block);
 
     void writeSplit(const Block & block);
 
     void writeImpl(const Block & block, const size_t shard_id = 0);
 
-    void writeToLocal(const Block & block, const size_t repeats);
+    /// Increments finished_writings_count after each repeat.
+    void writeToLocal(const Block & block, const size_t repeats, size_t & finished_writings_count);
 
     void writeToShard(const Block & block, const std::vector<std::string> & dir_names);
 
-    /// Performs synchronous insertion to remote nodes. If timeout_exceeded flag was set, throws.
-    void writeToShardSync(const Block & block, const std::vector<std::string> & dir_names,
-                          size_t shard_id, const std::atomic<bool> & timeout_exceeded);
+    /// Performs synchronous insertion to remote node.
+    void writeToShardSync(const Block & block, const Cluster::ShardInfo & shard_info, size_t replica_id);
 
 private:
     StorageDistributed & storage;
diff --git a/libs/libcommon/include/common/ThreadPool.h b/libs/libcommon/include/common/ThreadPool.h
index 66fba974692..7e20294ec08 100644
--- a/libs/libcommon/include/common/ThreadPool.h
+++ b/libs/libcommon/include/common/ThreadPool.h
@@ -16,10 +16,9 @@
 
 class ThreadPool
 {
-private:
+public:
     using Job = std::function<void()>;
 
-public:
     /// Size is constant, all threads are created immediately.
     ThreadPool(size_t m_size);
 

From 692b923b0ca805c72fdadd252e393b388fa44fdb Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 8 Aug 2017 03:06:21 +0300
Subject: [PATCH 065/281] Use in SELECT even shards with zero weight
 [#CLICKHOUSE-3204].

---
 dbms/src/Interpreters/Cluster.cpp | 16 ++++++++--------
 dbms/src/Interpreters/Cluster.h   |  2 +-
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/dbms/src/Interpreters/Cluster.cpp b/dbms/src/Interpreters/Cluster.cpp
index d3999f6fd4e..87d92444958 100644
--- a/dbms/src/Interpreters/Cluster.cpp
+++ b/dbms/src/Interpreters/Cluster.cpp
@@ -24,7 +24,7 @@ namespace
 {
 
 /// Default shard weight.
-static constexpr int default_weight = 1;
+static constexpr UInt32 default_weight = 1;
 
 inline bool isLocal(const Cluster::Address & address)
 {
@@ -195,8 +195,6 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se
 
             const auto & prefix = config_prefix + key;
             const auto weight = config.getInt(prefix + ".weight", default_weight);
-            if (weight == 0)
-                continue;
 
             addresses.emplace_back(config, prefix);
             addresses.back().replica_num = 1;
@@ -225,7 +223,9 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se
                         std::move(pools), settings.load_balancing, settings.connections_with_failover_max_tries);
             }
 
-            slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size());
+            if (weight)
+                slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size());
+
             shards_info.push_back(info);
         }
         else if (startsWith(key, "shard"))
@@ -240,9 +240,7 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se
             UInt32 current_replica_num = 1;
 
             const auto & partial_prefix = config_prefix + key + ".";
-            const auto weight = config.getInt(partial_prefix + ".weight", default_weight);
-            if (weight == 0)
-                continue;
+            const auto weight = config.getUInt(partial_prefix + ".weight", default_weight);
 
             bool internal_replication = config.getBool(partial_prefix + ".internal_replication", false);
 
@@ -310,7 +308,9 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se
                 shard_pool = std::make_shared<ConnectionPoolWithFailover>(
                         std::move(replicas), settings.load_balancing, settings.connections_with_failover_max_tries);
 
-            slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size());
+            if (weight)
+                slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size());
+
             shards_info.push_back({std::move(dir_names), current_shard_num, weight, shard_local_addresses, shard_pool, internal_replication});
         }
         else
diff --git a/dbms/src/Interpreters/Cluster.h b/dbms/src/Interpreters/Cluster.h
index 88948f89890..9a7b1470d6c 100644
--- a/dbms/src/Interpreters/Cluster.h
+++ b/dbms/src/Interpreters/Cluster.h
@@ -84,7 +84,7 @@ public:
         std::vector<std::string> dir_names;
         /// Number of the shard, the indexation begins with 1
         UInt32 shard_num;
-        int weight;
+        UInt32 weight;
         Addresses local_addresses;
         ConnectionPoolWithFailoverPtr pool;
         bool has_internal_replication;

From 65a90288dce7b2fcd1e63d8d5488b0057dfce09c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Vavru=C5=A1a?= <mvavrusa@cloudflare.com>
Date: Tue, 1 Aug 2017 12:01:38 -0700
Subject: [PATCH 066/281] PKCondition: added tests for subexpr inference (#951)

---
 .../00486_pk_subexpression.reference          | 20 +++++++++++++
 .../0_stateless/00486_pk_subexpression.sql    | 29 +++++++++++++++++++
 2 files changed, 49 insertions(+)
 create mode 100644 dbms/tests/queries/0_stateless/00486_pk_subexpression.reference
 create mode 100644 dbms/tests/queries/0_stateless/00486_pk_subexpression.sql

diff --git a/dbms/tests/queries/0_stateless/00486_pk_subexpression.reference b/dbms/tests/queries/0_stateless/00486_pk_subexpression.reference
new file mode 100644
index 00000000000..629defd3fc7
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00486_pk_subexpression.reference
@@ -0,0 +1,20 @@
+2000-01-01      1970-01-01 00:00:01     11      1235
+2000-01-01      1970-01-01 00:00:02     11      4395
+2000-01-01      1970-01-01 00:00:03     22      3545
+2000-01-01      1970-01-01 00:00:04     22      6984
+2000-01-01      1970-01-01 00:00:05     33      4596
+2000-01-01      1970-01-01 00:02:03     33      1235
+2000-01-01      1970-01-01 00:02:01     33      2791
+2000-01-01      1970-01-01 00:02:02     33      2791
+2000-01-01      1970-01-01 00:02:05     44      4578
+2000-01-01      1970-01-01 00:02:04     44      4935
+2000-01-01      1970-01-01 00:02:08     55      1235
+2000-01-01      1970-01-01 00:02:07     55      2791
+2000-01-01      1970-01-01 00:02:06     55      5786
+2000-01-01      1970-01-01 00:00:01     11      1235
+2000-01-01      1970-01-01 00:01:03     11      3572
+2000-01-01      1970-01-01 00:01:01     11      4563
+2000-01-01      1970-01-01 00:01:02     11      4578
+2000-01-01      1970-01-01 00:01:03     11      3572
+2000-01-01      1970-01-01 00:01:01     11      4563
+2000-01-01      1970-01-01 00:01:02     11      4578
\ No newline at end of file
diff --git a/dbms/tests/queries/0_stateless/00486_pk_subexpression.sql b/dbms/tests/queries/0_stateless/00486_pk_subexpression.sql
new file mode 100644
index 00000000000..f5d0364bb63
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00486_pk_subexpression.sql
@@ -0,0 +1,29 @@
+DROP TABLE IF EXISTS test.pk;
+
+CREATE TABLE test.pk (d Date DEFAULT '2000-01-01', x DateTime, y UInt64, z UInt64) ENGINE = MergeTree(d, (toStartOfMinute(x), y, z), 1);
+
+INSERT INTO test.pk (x, y, z) VALUES (1, 11, 1235), (2, 11, 4395), (3, 22, 3545), (4, 22, 6984), (5, 33, 4596), (61, 11, 4563), (62, 11, 4578), (63, 11, 3572), (64, 22, 5786), (65, 22, 5786), (66, 22, 2791), (67, 22, 2791), (121, 33, 2791), (122, 33, 2791), (123, 33, 1235), (124, 44, 4935), (125, 44, 4578), (126, 55, 5786), (127, 55, 2791), (128, 55, 1235);
+
+SET min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0;
+SET max_block_size = 1;
+
+-- Test inferred limit
+SET max_rows_to_read = 5;
+SELECT * FROM test.pk WHERE x BETWEEN toDateTime(0) AND toDateTime(59);
+
+SET max_rows_to_read = 9;
+SELECT * FROM test.pk WHERE x BETWEEN toDateTime(120) AND toDateTime(240);
+
+-- Index is coarse, cannot read single row
+SET max_rows_to_read = 5;
+SELECT * FROM test.pk WHERE x = toDateTime(1);
+
+-- Index works on interval 00:01:00 - 00:01:59
+SET max_rows_to_read = 4;
+SELECT * FROM test.pk WHERE x BETWEEN toDateTime(60) AND toDateTime(119) AND y = 11;
+
+-- Cannot read less rows as PK is coarser on interval 00:01:00 - 00:02:00
+SET max_rows_to_read = 5;
+SELECT * FROM test.pk WHERE x BETWEEN toDateTime(60) AND toDateTime(120) AND y = 11;
+
+DROP TABLE test.pk;

From 864cae7107723cfda5b0b8aff8f94ed5456188cf Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 8 Aug 2017 03:25:05 +0300
Subject: [PATCH 067/281] Fixed test [#CLICKHOUSE-3].

---
 ...subexpression.reference => 00489_pk_subexpression.reference} | 2 +-
 .../{00486_pk_subexpression.sql => 00489_pk_subexpression.sql}  | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename dbms/tests/queries/0_stateless/{00486_pk_subexpression.reference => 00489_pk_subexpression.reference} (95%)
 rename dbms/tests/queries/0_stateless/{00486_pk_subexpression.sql => 00489_pk_subexpression.sql} (100%)

diff --git a/dbms/tests/queries/0_stateless/00486_pk_subexpression.reference b/dbms/tests/queries/0_stateless/00489_pk_subexpression.reference
similarity index 95%
rename from dbms/tests/queries/0_stateless/00486_pk_subexpression.reference
rename to dbms/tests/queries/0_stateless/00489_pk_subexpression.reference
index 629defd3fc7..5f46ec305ae 100644
--- a/dbms/tests/queries/0_stateless/00486_pk_subexpression.reference
+++ b/dbms/tests/queries/0_stateless/00489_pk_subexpression.reference
@@ -17,4 +17,4 @@
 2000-01-01      1970-01-01 00:01:02     11      4578
 2000-01-01      1970-01-01 00:01:03     11      3572
 2000-01-01      1970-01-01 00:01:01     11      4563
-2000-01-01      1970-01-01 00:01:02     11      4578
\ No newline at end of file
+2000-01-01      1970-01-01 00:01:02     11      4578
diff --git a/dbms/tests/queries/0_stateless/00486_pk_subexpression.sql b/dbms/tests/queries/0_stateless/00489_pk_subexpression.sql
similarity index 100%
rename from dbms/tests/queries/0_stateless/00486_pk_subexpression.sql
rename to dbms/tests/queries/0_stateless/00489_pk_subexpression.sql

From 00ce8c9603cf333f4296a3bdd246341c0f08122d Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Mon, 7 Aug 2017 00:40:38 +0300
Subject: [PATCH 068/281] Add ctime caching in MergeTree's cleaner.
 [#CLICKHOUSE-3173]

---
 .../ReplicatedMergeTreeCleanupThread.cpp      | 38 ++++++++++++++++---
 .../ReplicatedMergeTreeCleanupThread.h        |  4 ++
 2 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp
index 59b1ee39093..b1fc214a292 100644
--- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp
+++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp
@@ -113,13 +113,26 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks()
 {
     auto zookeeper = storage.getZooKeeper();
 
+    Strings blocks;
     zkutil::Stat stat;
-    if (!zookeeper->exists(storage.zookeeper_path + "/blocks", &stat))
+    if (ZOK != zookeeper->tryGetChildren(storage.zookeeper_path + "/blocks", blocks, &stat))
         throw Exception(storage.zookeeper_path + "/blocks doesn't exist", ErrorCodes::NOT_FOUND_NODE);
 
-    LOG_TRACE(log, "Checking " << stat.numChildren << " blocks to clear old ones from ZooKeeper. This might take several minutes.");
+    /// Clear already deleted blocks from the cache, cached_block_ctime should be subset of blocks
+    {
+        NameSet blocks_set(blocks.begin(), blocks.end());
+        for (auto it = cached_block_ctime.begin(); it != cached_block_ctime.end();)
+        {
+            if (!blocks_set.count(it->first))
+                it = cached_block_ctime.erase(it);
+            else
+                ++it;
+        }
+    }
 
-    Strings blocks = zookeeper->getChildren(storage.zookeeper_path + "/blocks");
+    auto not_cached_blocks = stat.numChildren - cached_block_ctime.size();
+    LOG_TRACE(log, "Checking " << stat.numChildren << " blocks  (" << not_cached_blocks << " are not cached)"
+            << " to clear old ones from ZooKeeper. This might take several minutes.");
 
     /// Time -> block hash from ZooKeeper (from node name)
     using TimedBlock = std::pair<Int64, String>;
@@ -128,9 +141,21 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks()
 
     for (const String & block : blocks)
     {
-        zkutil::Stat stat;
-        zookeeper->exists(storage.zookeeper_path + "/blocks/" + block, &stat);
-        timed_blocks.emplace_back(stat.ctime, block);
+        auto it = cached_block_ctime.find(block);
+
+        if (it == cached_block_ctime.end())
+        {
+            /// New block. Fetch its stat and put it into the cache
+            zkutil::Stat block_stat;
+            zookeeper->exists(storage.zookeeper_path + "/blocks/" + block, &block_stat);
+            cached_block_ctime.emplace(block, block_stat.ctime);
+            timed_blocks.emplace_back(block_stat.ctime, block);
+        }
+        else
+        {
+            /// Cached block
+            timed_blocks.emplace_back(it->second, block);
+        }
     }
 
     if (timed_blocks.empty())
@@ -152,6 +177,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks()
     {
         /// TODO After about half a year, we could replace this to multi op, because there will be no obsolete children nodes.
         zookeeper->removeRecursive(storage.zookeeper_path + "/blocks/" + it->second);
+        cached_block_ctime.erase(it->second);
     }
 
     LOG_TRACE(log, "Cleared " << timed_blocks.end() - first_outdated_block << " old blocks from ZooKeeper");
diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h
index 2a63e8a6c7d..38f43595b40 100644
--- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h
+++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h
@@ -1,7 +1,9 @@
 #pragma once
 
+#include <Core/Types.h>
 #include <common/logger_useful.h>
 #include <thread>
+#include <map>
 
 
 namespace DB
@@ -37,6 +39,8 @@ private:
     /// Remove old block hashes from ZooKeeper. This makes a leading replica.
     void clearOldBlocks();
 
+    std::map<String, Int64> cached_block_ctime;
+
     /// TODO Removing old quorum/failed_parts
     /// TODO Removing old nonincrement_block_numbers
 };

From 4c0a0d07e99dcbd60f805463c3985e6b42b5f999 Mon Sep 17 00:00:00 2001
From: robot-metrika-test <robot-metrika-test@yandex-team.ru>
Date: Tue, 8 Aug 2017 14:50:37 +0300
Subject: [PATCH 069/281] Auto version update to [54268]

---
 dbms/cmake/version.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake
index 9b572b85bae..3512a5b70bc 100644
--- a/dbms/cmake/version.cmake
+++ b/dbms/cmake/version.cmake
@@ -1,6 +1,6 @@
 # This strings autochanged from release_lib.sh:
-set(VERSION_DESCRIBE v1.1.54267-testing)
-set(VERSION_REVISION 54267)
+set(VERSION_DESCRIBE v1.1.54268-testing)
+set(VERSION_REVISION 54268)
 # end of autochange
 
 set (VERSION_MAJOR 1)

From b0b7f1b161cb14d073db9a26a530d30de27e759a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 8 Aug 2017 23:58:18 +0300
Subject: [PATCH 070/281] Miscellaneous [#CLICKHOUSE-2].

---
 dbms/src/Client/ConnectionPoolWithFailover.cpp | 8 ++++++++
 dbms/src/Client/ConnectionPoolWithFailover.h   | 7 -------
 dbms/src/TableFunctions/TableFunctionRemote.h  | 2 +-
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/dbms/src/Client/ConnectionPoolWithFailover.cpp b/dbms/src/Client/ConnectionPoolWithFailover.cpp
index 0e38f141494..24261f554d2 100644
--- a/dbms/src/Client/ConnectionPoolWithFailover.cpp
+++ b/dbms/src/Client/ConnectionPoolWithFailover.cpp
@@ -17,6 +17,14 @@ namespace ProfileEvents
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int NETWORK_ERROR;
+    extern const int SOCKET_TIMEOUT;
+    extern const int LOGICAL_ERROR;
+}
+
+
 ConnectionPoolWithFailover::ConnectionPoolWithFailover(
         ConnectionPoolPtrs nested_pools_,
         LoadBalancing load_balancing,
diff --git a/dbms/src/Client/ConnectionPoolWithFailover.h b/dbms/src/Client/ConnectionPoolWithFailover.h
index bf2155c04fd..78fd350a7e2 100644
--- a/dbms/src/Client/ConnectionPoolWithFailover.h
+++ b/dbms/src/Client/ConnectionPoolWithFailover.h
@@ -7,13 +7,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int NETWORK_ERROR;
-    extern const int SOCKET_TIMEOUT;
-    extern const int LOGICAL_ERROR;
-}
-
 /** Connection pool with fault tolerance.
   * Initialized by several other IConnectionPools.
   * When a connection is received, it tries to create or select a live connection from a pool,
diff --git a/dbms/src/TableFunctions/TableFunctionRemote.h b/dbms/src/TableFunctions/TableFunctionRemote.h
index 3a58e4b0c28..1891dbd3795 100644
--- a/dbms/src/TableFunctions/TableFunctionRemote.h
+++ b/dbms/src/TableFunctions/TableFunctionRemote.h
@@ -10,7 +10,7 @@ namespace DB
  * To get the table structure, a DESC TABLE request is made to the remote server.
  * For example
  * SELECT count() FROM remote('example01-01-1', merge, hits) - go to `example01-01-1`, in the merge database, the hits table.
- * An expression that generates a lot of shards and replicas can also be specified as the host name - see below.
+ * An expression that generates a set of shards and replicas can also be specified as the host name - see below.
  */
 class TableFunctionRemote : public ITableFunction
 {

From 9bbcfbd35ce57221638ed7fe48cd32eba3eedaee Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 8 Aug 2017 23:57:23 +0300
Subject: [PATCH 071/281] Fixed error [#CLICKHOUSE-2].

---
 dbms/src/DataTypes/DataTypeString.cpp |  4 ++--
 dbms/src/IO/WriteHelpers.h            | 29 ++++++++++++++++++---------
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/dbms/src/DataTypes/DataTypeString.cpp b/dbms/src/DataTypes/DataTypeString.cpp
index 1156e41208b..255dd6943a4 100644
--- a/dbms/src/DataTypes/DataTypeString.cpp
+++ b/dbms/src/DataTypes/DataTypeString.cpp
@@ -138,8 +138,8 @@ static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars_t & data, Column
 
                 while (sse_src_pos < sse_src_end)
                 {
-                    /// NOTE gcc 4.9.2 expands the loop, but for some reason uses only one xmm register.
-                    ///for (size_t j = 0; j < UNROLL_TIMES; ++j)
+                    /// NOTE gcc 4.9.2 unrolls the loop, but for some reason uses only one xmm register.
+                    /// for (size_t j = 0; j < UNROLL_TIMES; ++j)
                     ///    _mm_storeu_si128(sse_dst_pos + j, _mm_loadu_si128(sse_src_pos + j));
 
                     sse_src_pos += UNROLL_TIMES;
diff --git a/dbms/src/IO/WriteHelpers.h b/dbms/src/IO/WriteHelpers.h
index 64801cd896a..0ede999eef9 100644
--- a/dbms/src/IO/WriteHelpers.h
+++ b/dbms/src/IO/WriteHelpers.h
@@ -149,12 +149,12 @@ inline void writeString(const StringRef & ref, WriteBuffer & buf)
     (buf).write((s), strlen(s))
 
 /** Writes a string for use in the JSON format:
- *  - the string is outputted in double quotes
- *  - forward slash character '/' is escaped
+ *  - the string is written in double quotes
+ *  - slash character '/' is escaped for compatibility with JavaScript
  *  - bytes from the range 0x00-0x1F except `\b', '\f', '\n', '\r', '\t' are escaped as \u00XX
  *  - code points U+2028 and U+2029 (byte sequences in UTF-8: e2 80 a8, e2 80 a9) are escaped as \u2028 and \u2029
- *  - it is assumed that string is the UTF-8 encoded, the invalid UTF-8 is not processed
- *  - non-ASCII characters remain as is
+ *  - it is assumed that string is in UTF-8, the invalid UTF-8 is not processed
+ *  - all other non-ASCII characters remain as is
  */
 inline void writeJSONString(const char * begin, const char * end, WriteBuffer & buf)
 {
@@ -196,10 +196,13 @@ inline void writeJSONString(const char * begin, const char * end, WriteBuffer &
                 writeChar('"', buf);
                 break;
             default:
-                if (0x00 <= *it && *it <= 0x1F)
+                UInt8 c = *it;
+                if (0x00 <= c && c <= 0x1F)
                 {
-                    char higher_half = (*it) >> 4;
-                    char lower_half = (*it) & 0xF;
+                    /// Escaping of ASCII control characters.
+
+                    UInt8 higher_half = c >> 4;
+                    UInt8 lower_half = c & 0xF;
 
                     writeCString("\\u00", buf);
                     writeChar('0' + higher_half, buf);
@@ -209,12 +212,18 @@ inline void writeJSONString(const char * begin, const char * end, WriteBuffer &
                     else
                         writeChar('A' + lower_half - 10, buf);
                 }
-                else if (end - it >= 3 && it[0] == '\xE2' && it[1] == '\x80' && (it[2] == '\xA8' || it[2] == '\xA9'))
+                else if (end - it >= 3 && it[0] == 0xE2 && it[1] == 0x80 && (it[2] == 0xA8 || it[2] == 0xA9))
                 {
-                    if (it[2] == '\xA8')
+                    /// This is for compatibility with JavaScript, because unescaped line separators are prohibited in string literals,
+                    ///  and these code points are alternative line separators.
+
+                    if (it[2] == 0xA8)
                         writeCString("\\u2028", buf);
-                    if (it[2] == '\xA9')
+                    if (it[2] == 0xA9)
                         writeCString("\\u2029", buf);
+
+                    /// Byte sequence is 3 bytes long. We have additional two bytes to skip.
+                    it += 2;
                 }
                 else
                     writeChar(*it, buf);

From 1fc854801860a95e7c1e0273ccd5020e4df2a85b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 9 Aug 2017 00:07:14 +0300
Subject: [PATCH 072/281] Fixing garbage (incomplete) [#CLICKHOUSE-2].

---
 dbms/src/Functions/FunctionsVisitParam.h | 10 +++++-----
 dbms/src/IO/ReadHelpers.cpp              |  2 +-
 dbms/src/IO/WriteHelpers.h               |  6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/dbms/src/Functions/FunctionsVisitParam.h b/dbms/src/Functions/FunctionsVisitParam.h
index 909a4a0bdfd..b3b6d9c8569 100644
--- a/dbms/src/Functions/FunctionsVisitParam.h
+++ b/dbms/src/Functions/FunctionsVisitParam.h
@@ -143,12 +143,12 @@ struct ExtractRaw
 
 struct ExtractString
 {
-    static UInt64 unhexCodePoint(const UInt8 * pos)
+    static UInt16 unhexCodePoint(const UInt8 * pos)
     {
-        return unhex(pos[0]) * 0xFFF
-             + unhex(pos[1]) * 0xFF
-             + unhex(pos[2]) * 0xF
-             + unhex(pos[3]);
+        return UInt16(unhex(pos[0])) * 0xFFF
+             + UInt16(unhex(pos[1])) * 0xFF
+             + UInt16(unhex(pos[2])) * 0xF
+             + UInt16(unhex(pos[3]));
     }
 
     static bool tryExtract(const UInt8 * pos, const UInt8 * end, ColumnString::Chars_t & res_data)
diff --git a/dbms/src/IO/ReadHelpers.cpp b/dbms/src/IO/ReadHelpers.cpp
index 507d429acab..16922eb75e5 100644
--- a/dbms/src/IO/ReadHelpers.cpp
+++ b/dbms/src/IO/ReadHelpers.cpp
@@ -250,7 +250,7 @@ static void parseComplexEscapeSequence(Vector & s, ReadBuffer & buf)
 }
 
 
-/// TODO Compute with the code in FunctionsVisitParam.h and JSON.h
+/// TODO Unify with the code in FunctionsVisitParam.h and JSON.h
 template <typename Vector>
 static void parseJSONEscapeSequence(Vector & s, ReadBuffer & buf)
 {
diff --git a/dbms/src/IO/WriteHelpers.h b/dbms/src/IO/WriteHelpers.h
index 0ede999eef9..76def04e0a4 100644
--- a/dbms/src/IO/WriteHelpers.h
+++ b/dbms/src/IO/WriteHelpers.h
@@ -212,14 +212,14 @@ inline void writeJSONString(const char * begin, const char * end, WriteBuffer &
                     else
                         writeChar('A' + lower_half - 10, buf);
                 }
-                else if (end - it >= 3 && it[0] == 0xE2 && it[1] == 0x80 && (it[2] == 0xA8 || it[2] == 0xA9))
+                else if (end - it >= 3 && it[0] == '\xE2' && it[1] == '\x80' && (it[2] == '\xA8' || it[2] == '\xA9'))
                 {
                     /// This is for compatibility with JavaScript, because unescaped line separators are prohibited in string literals,
                     ///  and these code points are alternative line separators.
 
-                    if (it[2] == 0xA8)
+                    if (it[2] == '\xA8')
                         writeCString("\\u2028", buf);
-                    if (it[2] == 0xA9)
+                    if (it[2] == '\xA9')
                         writeCString("\\u2029", buf);
 
                     /// Byte sequence is 3 bytes long. We have additional two bytes to skip.

From 6edb14ba6bfdf689d9698f8675cbef5f89b9fc0a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 9 Aug 2017 04:34:01 +0300
Subject: [PATCH 073/281] Fixed garbage [#CLICKHOUSE-2].

---
 dbms/src/Common/escapeForFileName.cpp         | 15 +--
 dbms/src/Common/hex.h                         | 25 ++++-
 dbms/src/Functions/FunctionsCoding.h          | 10 +-
 dbms/src/Functions/FunctionsVisitParam.h      | 95 +------------------
 dbms/src/IO/ReadHelpers.cpp                   | 91 ++++++++++--------
 dbms/src/IO/ReadHelpers.h                     |  9 +-
 ...rs_and_characters_outside_of_bmp.reference |  3 +
 ...parators_and_characters_outside_of_bmp.sql |  3 +
 8 files changed, 97 insertions(+), 154 deletions(-)
 create mode 100644 dbms/tests/queries/0_stateless/00490_special_line_separators_and_characters_outside_of_bmp.reference
 create mode 100644 dbms/tests/queries/0_stateless/00490_special_line_separators_and_characters_outside_of_bmp.sql

diff --git a/dbms/src/Common/escapeForFileName.cpp b/dbms/src/Common/escapeForFileName.cpp
index e948b422eb0..2aae8c4d75c 100644
--- a/dbms/src/Common/escapeForFileName.cpp
+++ b/dbms/src/Common/escapeForFileName.cpp
@@ -38,20 +38,13 @@ std::string unescapeForFileName(const std::string & s)
 
     while (pos != end)
     {
-        if (*pos != '%')
+        if (!(*pos == '%' && pos + 2 < end))
             res += *pos;
         else
         {
-            /// skip '%'
-            if (++pos == end) break;
-
-            char val = unhex(*pos) * 16;
-
-            if (++pos == end) break;
-
-            val += unhex(*pos);
-
-            res += val;
+            ++pos;
+            res += unhex2(pos);
+            pos += 2;
         }
 
         ++pos;
diff --git a/dbms/src/Common/hex.h b/dbms/src/Common/hex.h
index aac8e02501a..f8840ba01ef 100644
--- a/dbms/src/Common/hex.h
+++ b/dbms/src/Common/hex.h
@@ -20,17 +20,20 @@ inline char hexDigitLowercase(unsigned char c)
 #include <cstring>
 #include <cstddef>
 
+#include <common/Types.h>
+
+
 /// Maps 0..255 to 00..FF or 00..ff correspondingly
 
 extern const char * const hex_byte_to_char_uppercase_table;
 extern const char * const hex_byte_to_char_lowercase_table;
 
-inline void writeHexByteUppercase(unsigned char byte, void * out)
+inline void writeHexByteUppercase(UInt8 byte, void * out)
 {
     memcpy(out, &hex_byte_to_char_uppercase_table[static_cast<size_t>(byte) * 2], 2);
 }
 
-inline void writeHexByteLowercase(unsigned char byte, void * out)
+inline void writeHexByteLowercase(UInt8 byte, void * out)
 {
     memcpy(out, &hex_byte_to_char_lowercase_table[static_cast<size_t>(byte) * 2], 2);
 }
@@ -42,5 +45,21 @@ extern const char * const hex_char_to_digit_table;
 
 inline char unhex(char c)
 {
-    return hex_char_to_digit_table[static_cast<unsigned char>(c)];
+    return hex_char_to_digit_table[static_cast<UInt8>(c)];
+}
+
+inline char unhex2(const char * data)
+{
+    return
+          static_cast<UInt8>(unhex(data[0])) * 0x10
+        + static_cast<UInt8>(unhex(data[1]));
+}
+
+inline UInt16 unhex4(const char * data)
+{
+    return
+          static_cast<UInt16>(unhex(data[0])) * 0x1000
+        + static_cast<UInt16>(unhex(data[1])) * 0x100
+        + static_cast<UInt16>(unhex(data[2])) * 0x10
+        + static_cast<UInt16>(unhex(data[3]));
 }
diff --git a/dbms/src/Functions/FunctionsCoding.h b/dbms/src/Functions/FunctionsCoding.h
index 33e335219e3..fc7efe33fd4 100644
--- a/dbms/src/Functions/FunctionsCoding.h
+++ b/dbms/src/Functions/FunctionsCoding.h
@@ -1039,7 +1039,7 @@ private:
         size_t dst_pos = 0;
         for (; dst_pos < num_bytes; ++dst_pos)
         {
-            dst[dst_pos] = unhex(src[src_pos]) * 16 + unhex(src[src_pos + 1]);
+            dst[dst_pos] = unhex2(reinterpret_cast<const char *>(src));
             src_pos += 2;
         }
     }
@@ -1450,12 +1450,8 @@ public:
         }
         while (pos < end)
         {
-            UInt8 major = unhex(*pos);
-            ++pos;
-            UInt8 minor = unhex(*pos);
-            ++pos;
-
-            *out = (major << 4) | minor;
+            *out = unhex2(pos);
+            pos += 2;
             ++out;
         }
         *out = '\0';
diff --git a/dbms/src/Functions/FunctionsVisitParam.h b/dbms/src/Functions/FunctionsVisitParam.h
index b3b6d9c8569..279bb0cf252 100644
--- a/dbms/src/Functions/FunctionsVisitParam.h
+++ b/dbms/src/Functions/FunctionsVisitParam.h
@@ -143,102 +143,11 @@ struct ExtractRaw
 
 struct ExtractString
 {
-    static UInt16 unhexCodePoint(const UInt8 * pos)
-    {
-        return UInt16(unhex(pos[0])) * 0xFFF
-             + UInt16(unhex(pos[1])) * 0xFF
-             + UInt16(unhex(pos[2])) * 0xF
-             + UInt16(unhex(pos[3]));
-    }
-
-    static bool tryExtract(const UInt8 * pos, const UInt8 * end, ColumnString::Chars_t & res_data)
-    {
-        if (pos == end || *pos != '"')
-            return false;
-
-        ++pos;
-        while (pos != end)
-        {
-            switch (*pos)
-            {
-                case '\\':
-                    ++pos;
-                    if (pos >= end)
-                        return false;
-
-                    switch(*pos)
-                    {
-                        case '"':
-                            res_data.push_back('"');
-                            break;
-                        case '\\':
-                            res_data.push_back('\\');
-                            break;
-                        case '/':
-                            res_data.push_back('/');
-                            break;
-                        case 'b':
-                            res_data.push_back('\b');
-                            break;
-                        case 'f':
-                            res_data.push_back('\f');
-                            break;
-                        case 'n':
-                            res_data.push_back('\n');
-                            break;
-                        case 'r':
-                            res_data.push_back('\r');
-                            break;
-                        case 't':
-                            res_data.push_back('\t');
-                            break;
-                        case 'u':
-                        {
-                            ++pos;
-
-                            if (pos + 4 > end)
-                                return false;
-
-                            UInt16 code_point = unhexCodePoint(pos);
-                            pos += 3;
-
-                            static constexpr size_t max_code_point_byte_length = 4;
-
-                            size_t old_size = res_data.size();
-                            res_data.resize(old_size + max_code_point_byte_length);
-
-                            Poco::UTF8Encoding utf8;
-                            int length = utf8.convert(code_point,
-                                &res_data[old_size], max_code_point_byte_length);
-
-                            if (!length)
-                                return false;
-
-                            res_data.resize(old_size + length);
-                            break;
-                        }
-                        default:
-                            res_data.push_back(*pos);
-                            break;
-                    }
-                    ++pos;
-                    break;
-                case '"':
-                    return true;
-                default:
-                    res_data.push_back(*pos);
-                    ++pos;
-                    break;
-            }
-        }
-        return false;
-    }
-
     static void extract(const UInt8 * pos, const UInt8 * end, ColumnString::Chars_t & res_data)
     {
         size_t old_size = res_data.size();
-
-        if (!tryExtract(pos, end, res_data))
+        ReadBufferFromMemory in(pos, end - pos);
+        if (!tryReadJSONStringInto(res_data, in))
             res_data.resize(old_size);
     }
 };
diff --git a/dbms/src/IO/ReadHelpers.cpp b/dbms/src/IO/ReadHelpers.cpp
index 16922eb75e5..dc05d706aa5 100644
--- a/dbms/src/IO/ReadHelpers.cpp
+++ b/dbms/src/IO/ReadHelpers.cpp
@@ -26,7 +26,7 @@ void parseHex(IteratorSrc src, IteratorDst dst, const size_t num_bytes)
     size_t dst_pos = 0;
     for (; dst_pos < num_bytes; ++dst_pos)
     {
-        dst[dst_pos] = unhex(src[src_pos]) * 16 + unhex(src[src_pos + 1]);
+        dst[dst_pos] = UInt8(unhex(src[src_pos])) * 16 + UInt8(unhex(src[src_pos + 1]));
         src_pos += 2;
     }
 }
@@ -229,12 +229,10 @@ static void parseComplexEscapeSequence(Vector & s, ReadBuffer & buf)
     if (*buf.position() == 'x')
     {
         ++buf.position();
-        /// escape sequence of the form \ xAA
-        UInt8 c1;
-        UInt8 c2;
-        readPODBinary(c1, buf);
-        readPODBinary(c2, buf);
-        s.push_back(static_cast<char>(unhex(c1) * 16 + unhex(c2)));
+        /// escape sequence of the form \xAA
+        char hex_code[2];
+        readPODBinary(hex_code, buf);
+        s.push_back(unhex2(hex_code));
     }
     else if (*buf.position() == 'N')
     {
@@ -251,14 +249,23 @@ static void parseComplexEscapeSequence(Vector & s, ReadBuffer & buf)
 
 
 /// TODO Unify with the code in FunctionsVisitParam.h and JSON.h
-template <typename Vector>
-static void parseJSONEscapeSequence(Vector & s, ReadBuffer & buf)
+template <typename Vector, typename ReturnType>
+static ReturnType parseJSONEscapeSequence(Vector & s, ReadBuffer & buf)
 {
+    static constexpr bool throw_exception = std::is_same<ReturnType, void>::value;
+
+    auto error = [](const char * message, int code)
+    {
+        if (throw_exception)
+            throw Exception(message, code);
+        return ReturnType(false);
+    };
+
     ++buf.position();
     if (buf.eof())
-        throw Exception("Cannot parse escape sequence", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
+        return error("Cannot parse escape sequence", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
 
-    switch(*buf.position())
+    switch (*buf.position())
     {
         case '"':
             s.push_back('"');
@@ -289,26 +296,23 @@ static void parseJSONEscapeSequence(Vector & s, ReadBuffer & buf)
             ++buf.position();
 
             char hex_code[4];
-            readPODBinary(hex_code, buf);
+            if (4 != buf.read(hex_code, 4))
+                return error("Cannot parse escape sequence: less than four bytes after \\u", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
 
             /// \u0000 - special case
-             if (0 == memcmp(hex_code, "0000", 4))
+            if (0 == memcmp(hex_code, "0000", 4))
             {
                 s.push_back(0);
-                return;
+                return ReturnType(true);
             }
 
-            UInt16 code_point =
-                unhex(hex_code[0]) * 4096
-                + unhex(hex_code[1]) * 256
-                + unhex(hex_code[2]) * 16
-                + unhex(hex_code[3]);
+            UInt16 code_point = unhex4(hex_code);
 
             if (code_point <= 0x7F)
             {
                 s.push_back(code_point);
             }
-            else if (code_point <= 0x7FF)
+            else if (code_point <= 0x07FF)
             {
                 s.push_back(((code_point >> 6) & 0x1F) | 0xC0);
                 s.push_back((code_point & 0x3F) | 0x80);
@@ -318,15 +322,15 @@ static void parseJSONEscapeSequence(Vector & s, ReadBuffer & buf)
                 /// Surrogate pair.
                 if (code_point >= 0xD800 && code_point <= 0xDBFF)
                 {
-                    assertString("\\u", buf);
-                    char second_hex_code[4];
-                    readPODBinary(second_hex_code, buf);
+                    if (!checkString("\\u", buf))
+                        return error("Cannot parse escape sequence: missing second part of surrogate pair", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
 
-                    UInt16 second_code_point =
-                        unhex(second_hex_code[0]) * 4096
-                        + unhex(second_hex_code[1]) * 256
-                        + unhex(second_hex_code[2]) * 16
-                        + unhex(second_hex_code[3]);
+                    char second_hex_code[4];
+                    if (4 != buf.read(second_hex_code, 4))
+                        return error("Cannot parse escape sequence: less than four bytes after \\u of second part of surrogate pair",
+                            ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
+
+                    UInt16 second_code_point = unhex4(second_hex_code);
 
                     if (second_code_point >= 0xDC00 && second_code_point <= 0xDFFF)
                     {
@@ -338,7 +342,7 @@ static void parseJSONEscapeSequence(Vector & s, ReadBuffer & buf)
                         s.push_back((full_code_point & 0x3F) | 0x80);
                     }
                     else
-                        throw Exception("Incorrect surrogate pair of unicode escape sequences in JSON", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
+                        return error("Incorrect surrogate pair of unicode escape sequences in JSON", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
                 }
                 else
                 {
@@ -348,7 +352,7 @@ static void parseJSONEscapeSequence(Vector & s, ReadBuffer & buf)
                 }
             }
 
-            return;
+            return ReturnType(true);
         }
         default:
             s.push_back(*buf.position());
@@ -356,6 +360,7 @@ static void parseJSONEscapeSequence(Vector & s, ReadBuffer & buf)
     }
 
     ++buf.position();
+    return ReturnType(true);
 }
 
 
@@ -581,12 +586,20 @@ void readCSVString(String & s, ReadBuffer & buf, const char delimiter)
 template void readCSVStringInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf, const char delimiter);
 
 
-template <typename Vector>
-void readJSONStringInto(Vector & s, ReadBuffer & buf)
+template <typename Vector, typename ReturnType>
+ReturnType readJSONStringInto(Vector & s, ReadBuffer & buf)
 {
+    static constexpr bool throw_exception = std::is_same<ReturnType, void>::value;
+
+    auto error = [](const char * message, int code)
+    {
+        if (throw_exception)
+            throw Exception(message, code);
+        return ReturnType(false);
+    };
+
     if (buf.eof() || *buf.position() != '"')
-        throw Exception("Cannot parse JSON string: expected opening quote",
-            ErrorCodes::CANNOT_PARSE_QUOTED_STRING);
+        return error("Cannot parse JSON string: expected opening quote", ErrorCodes::CANNOT_PARSE_QUOTED_STRING);
     ++buf.position();
 
     while (!buf.eof())
@@ -602,15 +615,14 @@ void readJSONStringInto(Vector & s, ReadBuffer & buf)
         if (*buf.position() == '"')
         {
             ++buf.position();
-            return;
+            return ReturnType(true);
         }
 
         if (*buf.position() == '\\')
-            parseJSONEscapeSequence(s, buf);
+            parseJSONEscapeSequence<Vector, ReturnType>(s, buf);
     }
 
-    throw Exception("Cannot parse JSON string: expected closing quote",
-        ErrorCodes::CANNOT_PARSE_QUOTED_STRING);
+    return error("Cannot parse JSON string: expected closing quote", ErrorCodes::CANNOT_PARSE_QUOTED_STRING);
 }
 
 void readJSONString(String & s, ReadBuffer & buf)
@@ -619,7 +631,8 @@ void readJSONString(String & s, ReadBuffer & buf)
     readJSONStringInto(s, buf);
 }
 
-template void readJSONStringInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
+template void readJSONStringInto<PaddedPODArray<UInt8>, void>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
+template bool readJSONStringInto<PaddedPODArray<UInt8>, bool>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
 template void readJSONStringInto<NullSink>(NullSink & s, ReadBuffer & buf);
 
 
diff --git a/dbms/src/IO/ReadHelpers.h b/dbms/src/IO/ReadHelpers.h
index 21805b4d559..6d105d104d4 100644
--- a/dbms/src/IO/ReadHelpers.h
+++ b/dbms/src/IO/ReadHelpers.h
@@ -549,8 +549,15 @@ void readStringUntilEOFInto(Vector & s, ReadBuffer & buf);
 template <typename Vector>
 void readCSVStringInto(Vector & s, ReadBuffer & buf, const char delimiter = ',');
 
+/// ReturnType is either bool or void. If bool, the function will return false instead of throwing an exception.
+template <typename Vector, typename ReturnType = void>
+ReturnType readJSONStringInto(Vector & s, ReadBuffer & buf);
+
 template <typename Vector>
-void readJSONStringInto(Vector & s, ReadBuffer & buf);
+bool tryReadJSONStringInto(Vector & s, ReadBuffer & buf)
+{
+    return readJSONStringInto<Vector, bool>(s, buf);
+}
 
 /// This could be used as template parameter for functions above, if you want to just skip data.
 struct NullSink
diff --git a/dbms/tests/queries/0_stateless/00490_special_line_separators_and_characters_outside_of_bmp.reference b/dbms/tests/queries/0_stateless/00490_special_line_separators_and_characters_outside_of_bmp.reference
new file mode 100644
index 00000000000..7ab142a6a4d
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00490_special_line_separators_and_characters_outside_of_bmp.reference
@@ -0,0 +1,3 @@
+{"x":"𐌸","y":"Hello \u2028 World \u2029 !"}
+{"x":"Hello\u2028World\u2029!","h":"48656C6C6FE280A8576F726C64E280A921"}
+Hello World !	48656C6C6FE280A8576F726C64E280A921
diff --git a/dbms/tests/queries/0_stateless/00490_special_line_separators_and_characters_outside_of_bmp.sql b/dbms/tests/queries/0_stateless/00490_special_line_separators_and_characters_outside_of_bmp.sql
new file mode 100644
index 00000000000..4227190edf4
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00490_special_line_separators_and_characters_outside_of_bmp.sql
@@ -0,0 +1,3 @@
+SELECT visitParamExtractString('{"x":"\\uD800\\udf38"}', 'x') AS x, visitParamExtractString('{"x":"Hello \\u2028 World \\u2029 !"}', 'x') AS y FORMAT JSONEachRow;
+SELECT 'Hello' || convertCharset(unhex('2028'), 'utf16be', 'utf8') || 'World' || convertCharset(unhex('2029'), 'utf16be', 'utf8') || '!' AS x, hex(x) AS h FORMAT JSONEachRow;
+SELECT 'Hello' || convertCharset(unhex('2028'), 'utf16be', 'utf8') || 'World' || convertCharset(unhex('2029'), 'utf16be', 'utf8') || '!' AS x, hex(x) AS h FORMAT TSV;

From 4f633f316cd529ae2d5d6da02ba5a7e63b4705a3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 9 Aug 2017 04:38:56 +0300
Subject: [PATCH 074/281] Removed TODO [#CLICKHOUSE-2].

---
 dbms/src/IO/ReadHelpers.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/dbms/src/IO/ReadHelpers.cpp b/dbms/src/IO/ReadHelpers.cpp
index dc05d706aa5..6698ca7d5f4 100644
--- a/dbms/src/IO/ReadHelpers.cpp
+++ b/dbms/src/IO/ReadHelpers.cpp
@@ -248,7 +248,6 @@ static void parseComplexEscapeSequence(Vector & s, ReadBuffer & buf)
 }
 
 
-/// TODO Unify with the code in FunctionsVisitParam.h and JSON.h
 template <typename Vector, typename ReturnType>
 static ReturnType parseJSONEscapeSequence(Vector & s, ReadBuffer & buf)
 {

From 6a4460c69f802b1d8c431645d897f865d14af3d3 Mon Sep 17 00:00:00 2001
From: robot-metrika-test <robot-metrika-test@yandex-team.ru>
Date: Wed, 9 Aug 2017 16:36:38 +0300
Subject: [PATCH 075/281] Auto version update to [54269]

---
 dbms/cmake/version.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake
index 3512a5b70bc..f5e604aa938 100644
--- a/dbms/cmake/version.cmake
+++ b/dbms/cmake/version.cmake
@@ -1,6 +1,6 @@
 # This strings autochanged from release_lib.sh:
-set(VERSION_DESCRIBE v1.1.54268-testing)
-set(VERSION_REVISION 54268)
+set(VERSION_DESCRIBE v1.1.54269-testing)
+set(VERSION_REVISION 54269)
 # end of autochange
 
 set (VERSION_MAJOR 1)

From 1daf11153a6a35a8879e1f560d7742f9becc5d4f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 9 Aug 2017 18:34:09 +0300
Subject: [PATCH 076/281] Fixed typo [#CLICKHOUSE-2].

---
 dbms/src/Server/Server.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp
index ebce924f76d..ecc59de0682 100644
--- a/dbms/src/Server/Server.cpp
+++ b/dbms/src/Server/Server.cpp
@@ -372,7 +372,7 @@ int Server::main(const std::vector<std::string> & args)
         [&](ConfigurationPtr config) { global_context->setUsersConfig(config); },
         /* already_loaded = */ false);
 
-    /// Limit on total number of coucurrently executed queries.
+    /// Limit on total number of concurrently executed queries.
     global_context->getProcessList().setMaxSize(config().getInt("max_concurrent_queries", 0));
 
     /// Setup protection to avoid accidental DROP for big tables (that are greater than 50 GB by default)

From ac178bde1fa3c27adb97d8dfcae2992074c88caa Mon Sep 17 00:00:00 2001
From: proller <proller@users.noreply.github.com>
Date: Wed, 9 Aug 2017 23:52:55 +0300
Subject: [PATCH 077/281] Cmake: link ltdl with pocoodbc static only. rename
 ARM -> ARCH_ARM (#1083)

* Cmake: link ltdl with pocoodbc static only. rename ARM -> ARCH_ARM

* Fix lib find order

* Allow define non-contrib cityhash farmhash metrohash

* Fix message

* Cmake: fixes

* clean

* Update CMakeLists.txt
---
 CMakeLists.txt                                | 18 +++++---
 cmake/Modules/Findbtrie.cmake                 | 44 +++++++++++++++++++
 cmake/Modules/Findcityhash.cmake              | 44 +++++++++++++++++++
 cmake/Modules/Finddouble-conversion.cmake     | 44 +++++++++++++++++++
 cmake/Modules/Findfarmhash.cmake              | 44 +++++++++++++++++++
 cmake/Modules/Findmetrohash.cmake             | 44 +++++++++++++++++++
 cmake/find_contrib_lib.cmake                  | 21 +++++++++
 cmake/find_double-conversion.cmake            | 17 -------
 cmake/{find_libtool.cmake => find_ltdl.cmake} |  0
 cmake/find_lz4.cmake                          |  2 -
 cmake/find_poco.cmake                         |  7 +++
 cmake/find_re2.cmake                          |  4 --
 cmake/find_sparsehash.cmake                   |  1 -
 cmake/find_zlib.cmake                         |  4 --
 cmake/find_zookeeper.cmake                    |  2 -
 cmake/find_zstd.cmake                         |  2 -
 cmake/lib_name.cmake                          |  8 +---
 cmake/print_include_directories.cmake         | 10 ++++-
 cmake/test_cpu.cmake                          |  2 +-
 contrib/CMakeLists.txt                        | 21 ++++++---
 contrib/libcpuid/CMakeLists.txt               |  2 +
 contrib/libdouble-conversion/CMakeLists.txt   |  2 +
 contrib/liblz4/CMakeLists.txt                 |  2 +-
 contrib/libre2/CMakeLists.txt                 |  3 ++
 contrib/libzstd/CMakeLists.txt                |  2 +
 dbms/CMakeLists.txt                           | 24 +++++-----
 dbms/src/AggregateFunctions/CMakeLists.txt    |  1 -
 dbms/src/Functions/CMakeLists.txt             |  2 +-
 dbms/src/Functions/tests/CMakeLists.txt       |  2 +-
 dbms/src/Interpreters/CMakeLists.txt          |  4 ++
 libs/libcommon/CMakeLists.txt                 |  1 +
 libs/libpocoext/CMakeLists.txt                |  1 -
 32 files changed, 317 insertions(+), 68 deletions(-)
 create mode 100644 cmake/Modules/Findbtrie.cmake
 create mode 100644 cmake/Modules/Findcityhash.cmake
 create mode 100644 cmake/Modules/Finddouble-conversion.cmake
 create mode 100644 cmake/Modules/Findfarmhash.cmake
 create mode 100644 cmake/Modules/Findmetrohash.cmake
 create mode 100644 cmake/find_contrib_lib.cmake
 delete mode 100644 cmake/find_double-conversion.cmake
 rename cmake/{find_libtool.cmake => find_ltdl.cmake} (100%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8dd3524d2c7..11eb0790aed 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -45,10 +45,10 @@ message (STATUS "CMAKE_BUILD_TYPE: " ${CMAKE_BUILD_TYPE} )
 set (CMAKE_CONFIGURATION_TYPES "RelWithDebInfo;Debug;Release;MinSizeRel;ASan;UBSan" CACHE STRING "" FORCE)
 
 if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
-    set (AARCH64 1)
+    set (ARCH_AARCH64 1)
 endif ()
-if (AARCH64 OR CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
-    set (ARM 1)
+if (ARCH_AARCH64 OR CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
+    set (ARCH_ARM 1)
 endif ()
 
 set (COMMON_WARNING_FLAGS "-Wall")    # -Werror is also added inside directories with our own code.
@@ -175,6 +175,7 @@ if (NOT OPENSSL_FOUND)
     message (FATAL_ERROR "Need openssl for build. debian tip: sudo apt install libssl-dev")
 endif ()
 
+include (cmake/lib_name.cmake)
 include (cmake/find_icu4c.cmake)
 include (cmake/find_boost.cmake)
 # openssl, zlib before poco
@@ -183,19 +184,24 @@ include (cmake/find_zstd.cmake)
 include (cmake/find_poco.cmake)
 include (cmake/find_lz4.cmake)
 include (cmake/find_sparsehash.cmake)
-include (cmake/find_libtool.cmake)
 include (cmake/find_rt.cmake)
 include (cmake/find_readline_edit.cmake)
 include (cmake/find_zookeeper.cmake)
-include (cmake/find_double-conversion.cmake)
 include (cmake/find_re2.cmake)
+
+include (cmake/find_contrib_lib.cmake)
+find_contrib_lib(cityhash)
+find_contrib_lib(farmhash)
+find_contrib_lib(metrohash)
+find_contrib_lib(btrie)
+find_contrib_lib(double-conversion)
+
 # Need to process before "contrib" dir:
 include (libs/libcommon/cmake/find_gperftools.cmake)
 include (libs/libcommon/cmake/find_jemalloc.cmake)
 include (libs/libcommon/cmake/find_cctz.cmake)
 include (libs/libmysqlxx/cmake/find_mysqlclient.cmake)
 include (libs/libdaemon/cmake/find_unwind.cmake)
-include (cmake/lib_name.cmake)
 
 
 set (FULL_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE}}")
diff --git a/cmake/Modules/Findbtrie.cmake b/cmake/Modules/Findbtrie.cmake
new file mode 100644
index 00000000000..4f3c27f5225
--- /dev/null
+++ b/cmake/Modules/Findbtrie.cmake
@@ -0,0 +1,44 @@
+# - Try to find btrie headers and libraries.
+#
+# Usage of this module as follows:
+#
+#     find_package(btrie)
+#
+# Variables used by this module, they can change the default behaviour and need
+# to be set before calling find_package:
+#
+#  BTRIE_ROOT_DIR Set this variable to the root installation of
+#                    btrie if the module has problems finding
+#                    the proper installation path.
+#
+# Variables defined by this module:
+#
+#  BTRIE_FOUND             System has btrie libs/headers
+#  BTRIE_LIBRARIES         The btrie library/libraries
+#  BTRIE_INCLUDE_DIR       The location of btrie headers
+
+find_path(BTRIE_ROOT_DIR
+    NAMES include/btrie.h
+)
+
+find_library(BTRIE_LIBRARIES
+    NAMES btrie
+    PATHS ${BTRIE_ROOT_DIR}/lib ${BTRIE_LIBRARIES_PATHS}
+)
+
+find_path(BTRIE_INCLUDE_DIR
+    NAMES btrie.h
+    PATHS ${BTRIE_ROOT_DIR}/include ${BTRIE_INCLUDE_PATHS}
+)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(btrie DEFAULT_MSG
+    BTRIE_LIBRARIES
+    BTRIE_INCLUDE_DIR
+)
+
+mark_as_advanced(
+    BTRIE_ROOT_DIR
+    BTRIE_LIBRARIES
+    BTRIE_INCLUDE_DIR
+)
diff --git a/cmake/Modules/Findcityhash.cmake b/cmake/Modules/Findcityhash.cmake
new file mode 100644
index 00000000000..5250df2e0a6
--- /dev/null
+++ b/cmake/Modules/Findcityhash.cmake
@@ -0,0 +1,44 @@
+# - Try to find cityhash headers and libraries.
+#
+# Usage of this module as follows:
+#
+#     find_package(cityhash)
+#
+# Variables used by this module, they can change the default behaviour and need
+# to be set before calling find_package:
+#
+#  CITYHASH_ROOT_DIR Set this variable to the root installation of
+#                    cityhash if the module has problems finding
+#                    the proper installation path.
+#
+# Variables defined by this module:
+#
+#  CITYHASH_FOUND             System has cityhash libs/headers
+#  CITYHASH_LIBRARIES         The cityhash library/libraries
+#  CITYHASH_INCLUDE_DIR       The location of cityhash headers
+
+find_path(CITYHASH_ROOT_DIR
+    NAMES include/city.h
+)
+
+find_library(CITYHASH_LIBRARIES
+    NAMES cityhash
+    PATHS ${CITYHASH_ROOT_DIR}/lib ${CITYHASH_LIBRARIES_PATHS}
+)
+
+find_path(CITYHASH_INCLUDE_DIR
+    NAMES city.h
+    PATHS ${CITYHASH_ROOT_DIR}/include ${CITYHASH_INCLUDE_PATHS}
+)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(cityhash DEFAULT_MSG
+    CITYHASH_LIBRARIES
+    CITYHASH_INCLUDE_DIR
+)
+
+mark_as_advanced(
+    CITYHASH_ROOT_DIR
+    CITYHASH_LIBRARIES
+    CITYHASH_INCLUDE_DIR
+)
diff --git a/cmake/Modules/Finddouble-conversion.cmake b/cmake/Modules/Finddouble-conversion.cmake
new file mode 100644
index 00000000000..178964b8e2e
--- /dev/null
+++ b/cmake/Modules/Finddouble-conversion.cmake
@@ -0,0 +1,44 @@
+# - Try to find double-conversion headers and libraries.
+#
+# Usage of this module as follows:
+#
+#     find_package(double-conversion)
+#
+# Variables used by this module, they can change the default behaviour and need
+# to be set before calling find_package:
+#
+#  DOUBLE_CONVERSION_ROOT_DIR Set this variable to the root installation of
+#                    double-conversion if the module has problems finding
+#                    the proper installation path.
+#
+# Variables defined by this module:
+#
+#  DOUBLE_CONVERSION_FOUND             System has double-conversion libs/headers
+#  DOUBLE_CONVERSION_LIBRARIES         The double-conversion library/libraries
+#  DOUBLE_CONVERSION_INCLUDE_DIR       The location of double-conversion headers
+
+find_path(DOUBLE_CONVERSION_ROOT_DIR
+    NAMES include/double-conversion.h
+)
+
+find_library(DOUBLE_CONVERSION_LIBRARIES
+    NAMES double-conversion
+    PATHS ${DOUBLE_CONVERSION_ROOT_DIR}/lib ${BTRIE_CITYHASH_PATHS}
+)
+
+find_path(DOUBLE_CONVERSION_INCLUDE_DIR
+    NAMES double-conversion.h
+    PATHS ${DOUBLE_CONVERSION_ROOT_DIR}/include ${DOUBLE_CONVERSION_INCLUDE_PATHS}
+)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(double_conversion DEFAULT_MSG
+    DOUBLE_CONVERSION_LIBRARIES
+    DOUBLE_CONVERSION_INCLUDE_DIR
+)
+
+mark_as_advanced(
+    DOUBLE_CONVERSION_ROOT_DIR
+    DOUBLE_CONVERSION_LIBRARIES
+    DOUBLE_CONVERSION_INCLUDE_DIR
+)
diff --git a/cmake/Modules/Findfarmhash.cmake b/cmake/Modules/Findfarmhash.cmake
new file mode 100644
index 00000000000..2b45fde2c67
--- /dev/null
+++ b/cmake/Modules/Findfarmhash.cmake
@@ -0,0 +1,44 @@
+# - Try to find farmhash headers and libraries.
+#
+# Usage of this module as follows:
+#
+#     find_package(farmhash)
+#
+# Variables used by this module, they can change the default behaviour and need
+# to be set before calling find_package:
+#
+#  FARMHASH_ROOT_DIR Set this variable to the root installation of
+#                    farmhash if the module has problems finding
+#                    the proper installation path.
+#
+# Variables defined by this module:
+#
+#  FARMHASH_FOUND             System has farmhash libs/headers
+#  FARMHASH_LIBRARIES         The farmhash library/libraries
+#  FARMHASH_INCLUDE_DIR       The location of farmhash headers
+
+find_path(FARMHASH_ROOT_DIR
+    NAMES include/farmhash.h
+)
+
+find_library(FARMHASH_LIBRARIES
+    NAMES farmhash
+    PATHS ${FARMHASH_ROOT_DIR}/lib ${FARMHASH_LIBRARIES_PATHS}
+)
+
+find_path(FARMHASH_INCLUDE_DIR
+    NAMES farmhash.h
+    PATHS ${FARMHASH_ROOT_DIR}/include ${FARMHASH_INCLUDE_PATHS}
+)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(farmhash DEFAULT_MSG
+    FARMHASH_LIBRARIES
+    FARMHASH_INCLUDE_DIR
+)
+
+mark_as_advanced(
+    FARMHASH_ROOT_DIR
+    FARMHASH_LIBRARIES
+    FARMHASH_INCLUDE_DIR
+)
diff --git a/cmake/Modules/Findmetrohash.cmake b/cmake/Modules/Findmetrohash.cmake
new file mode 100644
index 00000000000..9efc1ed2db8
--- /dev/null
+++ b/cmake/Modules/Findmetrohash.cmake
@@ -0,0 +1,44 @@
+# - Try to find metrohash headers and libraries.
+#
+# Usage of this module as follows:
+#
+#     find_package(metrohash)
+#
+# Variables used by this module, they can change the default behaviour and need
+# to be set before calling find_package:
+#
+#  METROHASH_ROOT_DIR Set this variable to the root installation of
+#                    metrohash if the module has problems finding
+#                    the proper installation path.
+#
+# Variables defined by this module:
+#
+#  METROHASH_FOUND             System has metrohash libs/headers
+#  METROHASH_LIBRARIES         The metrohash library/libraries
+#  METROHASH_INCLUDE_DIR       The location of metrohash headers
+
+find_path(METROHASH_ROOT_DIR
+    NAMES include/metrohash.h
+)
+
+find_library(METROHASH_LIBRARIES
+    NAMES metrohash
+    PATHS ${METROHASH_ROOT_DIR}/lib ${METROHASH_LIBRARIES_PATHS}
+)
+
+find_path(METROHASH_INCLUDE_DIR
+    NAMES metrohash.h
+    PATHS ${METROHASH_ROOT_DIR}/include ${METROHASH_INCLUDE_PATHS}
+)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(metrohash DEFAULT_MSG
+    METROHASH_LIBRARIES
+    METROHASH_INCLUDE_DIR
+)
+
+mark_as_advanced(
+    METROHASH_ROOT_DIR
+    METROHASH_LIBRARIES
+    METROHASH_INCLUDE_DIR
+)
diff --git a/cmake/find_contrib_lib.cmake b/cmake/find_contrib_lib.cmake
new file mode 100644
index 00000000000..f9bf3ce2837
--- /dev/null
+++ b/cmake/find_contrib_lib.cmake
@@ -0,0 +1,21 @@
+macro(find_contrib_lib LIB_NAME)
+
+    string(TOLOWER ${LIB_NAME} LIB_NAME_LC)
+    string(TOUPPER ${LIB_NAME} LIB_NAME_UC)
+    string(REPLACE "-" "_" LIB_NAME_UC ${LIB_NAME_UC})
+
+    option (USE_INTERNAL_${LIB_NAME_UC}_LIBRARY "Use bundled library ${LIB_NAME} instead of system" ${NOT_UNBUNDLED})
+
+    if (NOT USE_INTERNAL_${LIB_NAME_UC}_LIBRARY)
+        find_package ("${LIB_NAME}")
+    endif ()
+
+    if (NOT ${LIB_NAME_UC}_FOUND)
+        set (USE_INTERNAL_${LIB_NAME_UC}_LIBRARY 1)
+        set (${LIB_NAME_UC}_LIBRARIES ${LIB_NAME_LC})
+        set (${LIB_NAME_UC}_INCLUDE_DIR ${${LIB_NAME_UC}_CONTRIB_INCLUDE_DIR})
+    endif ()
+
+    message (STATUS "Using ${LIB_NAME}: ${${LIB_NAME_UC}_INCLUDE_DIR} : ${${LIB_NAME_UC}_LIBRARIES}")
+
+endmacro()
diff --git a/cmake/find_double-conversion.cmake b/cmake/find_double-conversion.cmake
deleted file mode 100644
index 167f22ff04f..00000000000
--- a/cmake/find_double-conversion.cmake
+++ /dev/null
@@ -1,17 +0,0 @@
-option (USE_INTERNAL_DOUBLE_CONVERSION_LIBRARY "Set to FALSE to use system double-conversion library instead of bundled" ${NOT_UNBUNDLED})
-
-if (NOT USE_INTERNAL_DOUBLE_CONVERSION_LIBRARY)
-    find_library (DOUBLE_CONVERSION_LIBRARY double-conversion)
-    find_path (DOUBLE_CONVERSION_INCLUDE_DIR NAMES double-conversion/double-conversion.h PATHS ${DOUBLE_CONVERSION_INCLUDE_PATHS})
-endif ()
-
-if (DOUBLE_CONVERSION_LIBRARY AND DOUBLE_CONVERSION_INCLUDE_DIR)
-    include_directories (${DOUBLE_CONVERSION_INCLUDE_DIR})
-else ()
-    set (USE_INTERNAL_DOUBLE_CONVERSION_LIBRARY 1)
-    set (DOUBLE_CONVERSION_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libdouble-conversion")
-    include_directories (BEFORE ${DOUBLE_CONVERSION_INCLUDE_DIR})
-    set (DOUBLE_CONVERSION_LIBRARY double-conversion)
-endif ()
-
-message (STATUS "Using double-conversion: ${DOUBLE_CONVERSION_INCLUDE_DIR} : ${DOUBLE_CONVERSION_LIBRARY}")
diff --git a/cmake/find_libtool.cmake b/cmake/find_ltdl.cmake
similarity index 100%
rename from cmake/find_libtool.cmake
rename to cmake/find_ltdl.cmake
diff --git a/cmake/find_lz4.cmake b/cmake/find_lz4.cmake
index a4c97fcaeb9..f7c703fe44a 100644
--- a/cmake/find_lz4.cmake
+++ b/cmake/find_lz4.cmake
@@ -9,8 +9,6 @@ if (LZ4_LIBRARY AND LZ4_INCLUDE_DIR)
     include_directories (${LZ4_INCLUDE_DIR})
 else ()
     set (USE_INTERNAL_LZ4_LIBRARY 1)
-    set (LZ4_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/liblz4/include/lz4")
-    include_directories (BEFORE ${LZ4_INCLUDE_DIR})
     set (LZ4_LIBRARY lz4)
 endif ()
 
diff --git a/cmake/find_poco.cmake b/cmake/find_poco.cmake
index d1e0bcdce83..d8e9aecdbef 100644
--- a/cmake/find_poco.cmake
+++ b/cmake/find_poco.cmake
@@ -1,5 +1,9 @@
 option (USE_INTERNAL_POCO_LIBRARY "Set to FALSE to use system poco library instead of bundled" ${NOT_UNBUNDLED})
 
+if (USE_STATIC_LIBRARIES)
+    include (cmake/find_ltdl.cmake)
+endif ()
+
 if (NOT USE_INTERNAL_POCO_LIBRARY)
     find_package (Poco COMPONENTS Net NetSSL XML Data Crypto DataODBC MongoDB)
 endif ()
@@ -29,6 +33,9 @@ else ()
     if (ODBC_FOUND)
         set (Poco_DataODBC_FOUND 1)
         set (Poco_DataODBC_LIBRARY PocoDataODBC)
+        if (USE_STATIC_LIBRARIES)
+            list (APPEND Poco_DataODBC_LIBRARY ${LTDL_LIB})
+        endif ()
         list (APPEND Poco_INCLUDE_DIRS "${ClickHouse_SOURCE_DIR}/contrib/libpoco/Data/ODBC/include/")
     endif ()
 
diff --git a/cmake/find_re2.cmake b/cmake/find_re2.cmake
index 2dd6bfaec96..618b28a154c 100644
--- a/cmake/find_re2.cmake
+++ b/cmake/find_re2.cmake
@@ -10,10 +10,6 @@ if (RE2_LIBRARY AND RE2_INCLUDE_DIR)
     set (RE2_ST_LIBRARY ${RE2_LIBRARY})
 else ()
     set (USE_INTERNAL_RE2_LIBRARY 1)
-    set (RE2_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libre2")
-    set (RE2_ST_INCLUDE_DIR "${ClickHouse_BINARY_DIR}/contrib/libre2")
-    include_directories (BEFORE ${RE2_INCLUDE_DIR})
-    include_directories (BEFORE ${RE2_ST_INCLUDE_DIR})
     set (RE2_LIBRARY re2)
     set (RE2_ST_LIBRARY re2_st)
     set (USE_RE2_ST 1)
diff --git a/cmake/find_sparsehash.cmake b/cmake/find_sparsehash.cmake
index 80c423308ae..405d1b2621f 100644
--- a/cmake/find_sparsehash.cmake
+++ b/cmake/find_sparsehash.cmake
@@ -9,7 +9,6 @@ if (SPARCEHASH_INCLUDE_DIR)
 else ()
     set (USE_INTERNAL_SPARCEHASH_LIBRARY 1)
     set (SPARCEHASH_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libsparsehash")
-    include_directories (BEFORE ${SPARCEHASH_INCLUDE_DIR})
 endif ()
 
 message (STATUS "Using sparsehash: ${SPARCEHASH_INCLUDE_DIR}")
diff --git a/cmake/find_zlib.cmake b/cmake/find_zlib.cmake
index f2c03b24223..e9f3d89eb08 100644
--- a/cmake/find_zlib.cmake
+++ b/cmake/find_zlib.cmake
@@ -2,15 +2,11 @@ option (USE_INTERNAL_ZLIB_LIBRARY "Set to FALSE to use system zlib library inste
 
 if (NOT USE_INTERNAL_ZLIB_LIBRARY)
     find_package (ZLIB)
-    if (ZLIB_FOUND)
-        include_directories (${ZLIB_INCLUDE_DIRS})
-    endif ()
 endif ()
 
 if (NOT ZLIB_FOUND)
     set (USE_INTERNAL_ZLIB_LIBRARY 1)
     set (ZLIB_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libzlib-ng")
-    include_directories (BEFORE ${ZLIB_INCLUDE_DIR})
     if (USE_STATIC_LIBRARIES)
         set (ZLIB_LIBRARIES zlibstatic)
     else ()
diff --git a/cmake/find_zookeeper.cmake b/cmake/find_zookeeper.cmake
index edcf7c067e6..26effbc9115 100644
--- a/cmake/find_zookeeper.cmake
+++ b/cmake/find_zookeeper.cmake
@@ -9,8 +9,6 @@ if (ZOOKEEPER_LIBRARY AND ZOOKEEPER_INCLUDE_DIR)
     include_directories (${ZOOKEEPER_INCLUDE_DIR})
 else ()
     set (USE_INTERNAL_ZOOKEEPER_LIBRARY 1)
-    set (ZOOKEEPER_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libzookeeper/include")
-    include_directories (BEFORE ${ZOOKEEPER_INCLUDE_DIR})
     set (ZOOKEEPER_LIBRARY zookeeper_mt)
 endif ()
 
diff --git a/cmake/find_zstd.cmake b/cmake/find_zstd.cmake
index fa3d00513f6..86bd420acff 100644
--- a/cmake/find_zstd.cmake
+++ b/cmake/find_zstd.cmake
@@ -9,8 +9,6 @@ if (ZSTD_LIBRARY AND ZSTD_INCLUDE_DIR)
     include_directories (${ZSTD_INCLUDE_DIR})
 else ()
     set (USE_INTERNAL_ZSTD_LIBRARY 1)
-    set (ZSTD_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libzstd/include/zstd")
-    include_directories (BEFORE ${ZSTD_INCLUDE_DIR})
     set (ZSTD_LIBRARY zstd)
 endif ()
 
diff --git a/cmake/lib_name.cmake b/cmake/lib_name.cmake
index 1f4f272c8bb..ea1144ec305 100644
--- a/cmake/lib_name.cmake
+++ b/cmake/lib_name.cmake
@@ -1,10 +1,6 @@
 
-set(CITYHASH_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libcityhash/include)
-set(CPUID_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libcpuid/include)
 set(DIVIDE_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libdivide)
-set(BTRIE_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libbtrie/include)
-set(CITYHASH_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libcityhash/include)
-set(MYSQLXX_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/libs/libmysqlxx/include)
-set(POCOEXT_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/libs/libpocoext/include)
+set(CITYHASH_CONTRIB_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libcityhash/include)
 set(COMMON_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/libs/libcommon/include ${ClickHouse_BINARY_DIR}/libs/libcommon/include)
 set(DBMS_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/dbms/src ${ClickHouse_BINARY_DIR}/dbms/src)
+set(DOUBLE_CONVERSION_CONTRIB_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libdouble-conversion)
diff --git a/cmake/print_include_directories.cmake b/cmake/print_include_directories.cmake
index 64c655a199f..ba0bba4c9df 100644
--- a/cmake/print_include_directories.cmake
+++ b/cmake/print_include_directories.cmake
@@ -1,4 +1,12 @@
-get_property (dirs TARGET dbms PROPERTY INCLUDE_DIRECTORIES)
+
+# TODO? Maybe recursive collect on all deps
+
+get_property (dirs1 TARGET dbms PROPERTY INCLUDE_DIRECTORIES)
+list(APPEND dirs ${dirs1})
+
+get_property (dirs1 TARGET common PROPERTY INCLUDE_DIRECTORIES)
+list(APPEND dirs ${dirs1})
+
 list(REMOVE_DUPLICATES dirs)
 file (WRITE ${CMAKE_CURRENT_BINARY_DIR}/include_directories.txt "")
 foreach (dir ${dirs})
diff --git a/cmake/test_cpu.cmake b/cmake/test_cpu.cmake
index b4cb60a69ee..8a301983963 100644
--- a/cmake/test_cpu.cmake
+++ b/cmake/test_cpu.cmake
@@ -57,7 +57,7 @@ check_cxx_source_compiles("
     }
 " HAVE_POPCNT)
 
-if (HAVE_POPCNT AND NOT AARCH64)
+if (HAVE_POPCNT AND NOT ARCH_AARCH64)
     set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
 endif ()
 
diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index 6cd7ea0c445..9caf6cad51e 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -28,10 +28,21 @@ if (USE_INTERNAL_ZOOKEEPER_LIBRARY)
 	add_subdirectory (libzookeeper)
 endif ()
 
-add_subdirectory (libcityhash)
-add_subdirectory (libfarmhash)
-add_subdirectory (libmetrohash)
-add_subdirectory (libbtrie)
+if (USE_INTERNAL_CITYHASH_LIBRARY)
+    add_subdirectory (libcityhash)
+endif ()
+
+if (USE_INTERNAL_FARMHASH_LIBRARY)
+    add_subdirectory (libfarmhash)
+endif ()
+
+if (USE_INTERNAL_METROHASH_LIBRARY)
+    add_subdirectory (libmetrohash)
+endif ()
+
+if (USE_INTERNAL_BTRIE_LIBRARY)
+    add_subdirectory (libbtrie)
+endif ()
 
 if (USE_INTERNAL_UNWIND_LIBRARY)
 	add_subdirectory (libunwind)
@@ -49,6 +60,6 @@ if (ENABLE_LIBTCMALLOC AND USE_INTERNAL_GPERFTOOLS_LIBRARY)
 	add_subdirectory (libtcmalloc)
 endif ()
 
-if (NOT ARM)
+if (NOT ARCH_ARM)
 	add_subdirectory (libcpuid)
 endif ()
diff --git a/contrib/libcpuid/CMakeLists.txt b/contrib/libcpuid/CMakeLists.txt
index dc1b4713aff..4c257b20de8 100644
--- a/contrib/libcpuid/CMakeLists.txt
+++ b/contrib/libcpuid/CMakeLists.txt
@@ -16,3 +16,5 @@ include/cpuid/rdtsc.h
 include/cpuid/recog_amd.h
 include/cpuid/recog_intel.h
 )
+
+target_include_directories (cpuid PUBLIC include)
diff --git a/contrib/libdouble-conversion/CMakeLists.txt b/contrib/libdouble-conversion/CMakeLists.txt
index 6b19e861b1e..f44ba25c142 100644
--- a/contrib/libdouble-conversion/CMakeLists.txt
+++ b/contrib/libdouble-conversion/CMakeLists.txt
@@ -18,3 +18,5 @@ double-conversion/strtod.cc
 double-conversion/strtod.h
 double-conversion/utils.h
 )
+
+target_include_directories (double-conversion PUBLIC .)
diff --git a/contrib/liblz4/CMakeLists.txt b/contrib/liblz4/CMakeLists.txt
index 98f13476887..865c0dca2bf 100644
--- a/contrib/liblz4/CMakeLists.txt
+++ b/contrib/liblz4/CMakeLists.txt
@@ -6,4 +6,4 @@ add_library (lz4
     include/lz4/lz4hc.h
     include/lz4/lz4opt.h)
 
-target_include_directories(lz4 PUBLIC include)
+target_include_directories(lz4 PUBLIC include/lz4)
diff --git a/contrib/libre2/CMakeLists.txt b/contrib/libre2/CMakeLists.txt
index 111c60f3f49..bf39e6531db 100644
--- a/contrib/libre2/CMakeLists.txt
+++ b/contrib/libre2/CMakeLists.txt
@@ -41,6 +41,9 @@ add_library (re2_st ${re2_sources})
 target_compile_definitions (re2 PRIVATE NDEBUG)
 target_compile_definitions (re2_st PRIVATE NDEBUG NO_THREADS re2=re2_st)
 
+target_include_directories (re2 PUBLIC .)
+target_include_directories (re2_st PUBLIC .)
+
 file (MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/re2_st)
 foreach (FILENAME filtered_re2.h re2.h set.h stringpiece.h variadic_function.h)
 	add_custom_command (OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/re2_st/${FILENAME}"
diff --git a/contrib/libzstd/CMakeLists.txt b/contrib/libzstd/CMakeLists.txt
index bfdde074b3f..00d20cf2146 100644
--- a/contrib/libzstd/CMakeLists.txt
+++ b/contrib/libzstd/CMakeLists.txt
@@ -92,3 +92,5 @@ IF (ZSTD_LEGACY_SUPPORT)
 ENDIF (ZSTD_LEGACY_SUPPORT)
 
 ADD_LIBRARY(zstd ${Sources} ${Headers})
+
+target_include_directories (zstd PUBLIC include/zstd) 
diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt
index 0dcf288fe64..af1b2a33496 100644
--- a/dbms/CMakeLists.txt
+++ b/dbms/CMakeLists.txt
@@ -28,6 +28,7 @@ add_subdirectory (src)
 add_library(string_utils
     src/Common/StringUtils.h
     src/Common/StringUtils.cpp)
+target_include_directories (string_utils PRIVATE ${DBMS_INCLUDE_DIR})
 
 set(dbms_headers)
 set(dbms_sources)
@@ -150,7 +151,7 @@ if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
         PROPERTIES COMPILE_FLAGS -g0)
 endif ()
 
-if (NOT ARM)
+if (NOT ARCH_ARM)
     set (LINK_LIBRARIES_ONLY_ON_X86_64 cpuid)
 endif()
 
@@ -163,12 +164,13 @@ endif()
 target_link_libraries (dbms
     common
     ${MYSQLXX_LIBRARY}
-    cityhash farmhash metrohash
+    ${FARMHASH_LIBRARIES}
+    ${METROHASH_LIBRARIES}
     ${LZ4_LIBRARY}
     ${ZSTD_LIBRARY}
     ${ZOOKEEPER_LIBRARY}
     string_utils
-    ${DOUBLE_CONVERSION_LIBRARY}
+    ${DOUBLE_CONVERSION_LIBRARIES}
     ${ZLIB_LIBRARIES}
     ${LINK_LIBRARIES_ONLY_ON_X86_64}
     ${RE2_LIBRARY}
@@ -176,7 +178,7 @@ target_link_libraries (dbms
     ${OPENSSL_CRYPTO_LIBRARY}
     ${Boost_SYSTEM_LIBRARY}
     ${Poco_Data_LIBRARY}
-    btrie
+    ${BTRIE_LIBRARIES}
 )
 
 if (Poco_DataODBC_FOUND)
@@ -200,19 +202,17 @@ endif ()
 target_link_libraries (dbms
     ${PLATFORM_LIBS}
     ${CMAKE_DL_LIBS}
-    ${LTDL_LIB}
     ${CMAKE_THREAD_LIBS_INIT}
 )
 
-target_include_directories (dbms BEFORE PRIVATE ${CPUID_INCLUDE_DIR})
-target_include_directories (dbms BEFORE PUBLIC ${DIVIDE_INCLUDE_DIR})
-target_include_directories (dbms BEFORE PRIVATE ${BTRIE_INCLUDE_DIR})
-target_include_directories (dbms BEFORE PRIVATE ${CITYHASH_INCLUDE_DIR})
-target_include_directories (dbms PUBLIC ${MYSQLXX_INCLUDE_DIR})
-target_include_directories (dbms PRIVATE ${POCOEXT_INCLUDE_DIR})
-target_include_directories (dbms PRIVATE ${COMMON_INCLUDE_DIR})
+target_include_directories (dbms BEFORE PRIVATE ${DIVIDE_INCLUDE_DIR})
 target_include_directories (dbms PUBLIC ${DBMS_INCLUDE_DIR})
 
+# only for copy_headers.sh:
+target_include_directories (dbms PRIVATE ${COMMON_INCLUDE_DIR})
+target_include_directories (dbms BEFORE PRIVATE ${DOUBLE_CONVERSION_INCLUDE_DIR})
+
+
 if (ENABLE_TESTS)
     add_subdirectory (tests)
     # attach all dbms gtest sources
diff --git a/dbms/src/AggregateFunctions/CMakeLists.txt b/dbms/src/AggregateFunctions/CMakeLists.txt
index b39c5c98efc..f3fb20b6101 100644
--- a/dbms/src/AggregateFunctions/CMakeLists.txt
+++ b/dbms/src/AggregateFunctions/CMakeLists.txt
@@ -28,5 +28,4 @@ list(REMOVE_ITEM clickhouse_aggregate_functions_headers
 
 add_library(clickhouse_aggregate_functions ${clickhouse_aggregate_functions_sources})
 target_link_libraries(clickhouse_aggregate_functions dbms)
-target_include_directories (clickhouse_aggregate_functions BEFORE PUBLIC ${CITYHASH_INCLUDE_DIR})
 target_include_directories (clickhouse_aggregate_functions PRIVATE ${COMMON_INCLUDE_DIR})
diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt
index d92e2a4603f..a0827afdd1e 100644
--- a/dbms/src/Functions/CMakeLists.txt
+++ b/dbms/src/Functions/CMakeLists.txt
@@ -66,7 +66,7 @@ add_library(clickhouse_functions ${clickhouse_functions_sources})
 target_link_libraries(clickhouse_functions dbms)
 target_include_directories (clickhouse_functions BEFORE PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/libfarmhash)
 target_include_directories (clickhouse_functions BEFORE PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/libmetrohash/src)
-target_include_directories (clickhouse_functions BEFORE PUBLIC ${CITYHASH_INCLUDE_DIR})
+target_include_directories (clickhouse_functions BEFORE PUBLIC ${DIVIDE_INCLUDE_DIR})
 
 if (USE_VECTORCLASS)
     target_include_directories (clickhouse_functions BEFORE PUBLIC ${VECTORCLASS_INCLUDE_DIR})
diff --git a/dbms/src/Functions/tests/CMakeLists.txt b/dbms/src/Functions/tests/CMakeLists.txt
index c693f7f0933..b18ef7ccaaf 100644
--- a/dbms/src/Functions/tests/CMakeLists.txt
+++ b/dbms/src/Functions/tests/CMakeLists.txt
@@ -4,7 +4,7 @@ add_executable (number_traits number_traits.cpp)
 target_link_libraries (number_traits dbms)
 
 add_executable (functions_arithmetic functions_arithmetic.cpp)
-target_link_libraries (functions_arithmetic dbms)
+target_link_libraries (functions_arithmetic dbms clickhouse_functions)
 
 add_executable (logical_functions_performance logical_functions_performance.cpp)
 target_link_libraries (logical_functions_performance dbms)
diff --git a/dbms/src/Interpreters/CMakeLists.txt b/dbms/src/Interpreters/CMakeLists.txt
index 5a91acffcf8..2e34a62a60c 100644
--- a/dbms/src/Interpreters/CMakeLists.txt
+++ b/dbms/src/Interpreters/CMakeLists.txt
@@ -28,6 +28,10 @@ set (INTERNAL_COMPILER_CUSTOM_ROOT ON CACHE BOOL "")
 list(GET Poco_INCLUDE_DIRS 0 Poco_Foundation_INCLUDE_DIR)
 list(GET Poco_INCLUDE_DIRS 1 Poco_Util_INCLUDE_DIR)
 
+if (NOT DOUBLE_CONVERSION_INCLUDE_DIR)
+    get_target_property(DOUBLE_CONVERSION_INCLUDE_DIR ${DOUBLE_CONVERSION_LIBRARIES} INTERFACE_INCLUDE_DIRECTORIES)
+endif ()
+
 string (REPLACE ${ClickHouse_SOURCE_DIR} ${INTERNAL_COMPILER_HEADERS} INTERNAL_DOUBLE_CONVERSION_INCLUDE_DIR ${DOUBLE_CONVERSION_INCLUDE_DIR})
 string (REPLACE ${ClickHouse_SOURCE_DIR} ${INTERNAL_COMPILER_HEADERS} INTERNAL_Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIRS})
 string (REPLACE ${ClickHouse_SOURCE_DIR} ${INTERNAL_COMPILER_HEADERS} INTERNAL_Poco_Foundation_INCLUDE_DIR ${Poco_Foundation_INCLUDE_DIR})
diff --git a/libs/libcommon/CMakeLists.txt b/libs/libcommon/CMakeLists.txt
index c2aa28b2a9d..a3a4ff14326 100644
--- a/libs/libcommon/CMakeLists.txt
+++ b/libs/libcommon/CMakeLists.txt
@@ -84,6 +84,7 @@ target_include_directories (common PUBLIC ${COMMON_INCLUDE_DIR})
 target_link_libraries (
     common
     pocoext
+    ${CITYHASH_LIBRARIES}
     ${CCTZ_LIBRARY}
     ${Boost_SYSTEM_LIBRARY}
     ${Boost_FILESYSTEM_LIBRARY}
diff --git a/libs/libpocoext/CMakeLists.txt b/libs/libpocoext/CMakeLists.txt
index 18eaaabf67d..354a47b8b78 100644
--- a/libs/libpocoext/CMakeLists.txt
+++ b/libs/libpocoext/CMakeLists.txt
@@ -7,5 +7,4 @@ add_library (pocoext
 
 target_include_directories (pocoext PUBLIC include PRIVATE ${COMMON_INCLUDE_DIR})
 
-
 target_link_libraries(pocoext ${Poco_Util_LIBRARY} ${Poco_Net_LIBRARY} ${Poco_XML_LIBRARY} ${Poco_Foundation_LIBRARY})

From 27174d9dd2c4f93bd7808d2c64cf958f4fe1b0d0 Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Wed, 9 Aug 2017 16:31:13 +0300
Subject: [PATCH 078/281] Avoid many checks of sending parts if sendings fail
 due to network error. [#CLICKHOUSE-2]

---
 dbms/src/Storages/MergeTree/DataPartsExchange.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Storages/MergeTree/DataPartsExchange.cpp b/dbms/src/Storages/MergeTree/DataPartsExchange.cpp
index a9164a36214..2977d838c6b 100644
--- a/dbms/src/Storages/MergeTree/DataPartsExchange.cpp
+++ b/dbms/src/Storages/MergeTree/DataPartsExchange.cpp
@@ -24,6 +24,7 @@ namespace ErrorCodes
     extern const int ABORTED;
     extern const int BAD_SIZE_OF_FILE_IN_DATA_PART;
     extern const int TOO_MUCH_SIMULTANEOUS_QUERIES;
+    extern const int CANNOT_WRITE_TO_OSTREAM;
 }
 
 namespace DataPartsExchange
@@ -143,7 +144,7 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & body
     }
     catch (const Exception & e)
     {
-        if (e.code() != ErrorCodes::ABORTED)
+        if (e.code() != ErrorCodes::ABORTED && e.code() != ErrorCodes::CANNOT_WRITE_TO_OSTREAM)
             typeid_cast<StorageReplicatedMergeTree &>(*owned_storage).enqueuePartForCheck(part_name);
         throw;
     }

From 56a5bb53fac52cc8030e60f3f8798a2960b1cfff Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 10 Aug 2017 00:07:01 +0300
Subject: [PATCH 079/281] Miscellaneous [#CLICKHOUSE-2].

---
 dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp | 2 +-
 dbms/src/Storages/MergeTree/MergeTreeDataPart.h   | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
index 9e7585ed8be..6d5de63053a 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
@@ -234,7 +234,7 @@ void MergeTreeDataPartChecksums::add(MergeTreeDataPartChecksums && rhs_checksums
     rhs_checksums.files.clear();
 }
 
-/// Control sum computed from the set of control sums of .bin files.
+/// Checksum computed from the set of control sums of .bin files.
 void MergeTreeDataPartChecksums::summaryDataChecksum(SipHash & hash) const
 {
     /// We use fact that iteration is in deterministic (lexicographical) order.
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h
index 4acad6b1b62..fb64c4a701a 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h
@@ -44,6 +44,7 @@ struct MergeTreeDataPartChecksums
 {
     using Checksum = MergeTreeDataPartChecksum;
 
+    /// The order is important.
     using FileChecksums = std::map<String, Checksum>;
     FileChecksums files;
 

From c351b367fbdc4fe2d5e1e8be49e3dde18b0501c6 Mon Sep 17 00:00:00 2001
From: proller <proller@users.noreply.github.com>
Date: Thu, 10 Aug 2017 02:11:28 +0300
Subject: [PATCH 080/281] Fix sparcehash include (#1084)

* Fix sparcehash include

* Fix re2_st include
---
 contrib/libre2/CMakeLists.txt | 2 +-
 dbms/CMakeLists.txt           | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/contrib/libre2/CMakeLists.txt b/contrib/libre2/CMakeLists.txt
index bf39e6531db..e6ec2cbe3af 100644
--- a/contrib/libre2/CMakeLists.txt
+++ b/contrib/libre2/CMakeLists.txt
@@ -42,7 +42,7 @@ target_compile_definitions (re2 PRIVATE NDEBUG)
 target_compile_definitions (re2_st PRIVATE NDEBUG NO_THREADS re2=re2_st)
 
 target_include_directories (re2 PUBLIC .)
-target_include_directories (re2_st PUBLIC .)
+target_include_directories (re2_st PRIVATE . PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
 
 file (MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/re2_st)
 foreach (FILENAME filtered_re2.h re2.h set.h stringpiece.h variadic_function.h)
diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt
index af1b2a33496..4ecaf1e375d 100644
--- a/dbms/CMakeLists.txt
+++ b/dbms/CMakeLists.txt
@@ -206,6 +206,7 @@ target_link_libraries (dbms
 )
 
 target_include_directories (dbms BEFORE PRIVATE ${DIVIDE_INCLUDE_DIR})
+target_include_directories (dbms BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
 target_include_directories (dbms PUBLIC ${DBMS_INCLUDE_DIR})
 
 # only for copy_headers.sh:

From 46b5c9ce03e4793f0536254a00bcc5512e1b309b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 10 Aug 2017 02:57:54 +0300
Subject: [PATCH 081/281] Miscellaneous [#CLICKHOUSE-2].

---
 dbms/src/IO/ReadHelpers.cpp | 9 +++------
 dbms/src/IO/ReadHelpers.h   | 2 +-
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/dbms/src/IO/ReadHelpers.cpp b/dbms/src/IO/ReadHelpers.cpp
index 6698ca7d5f4..9853baef198 100644
--- a/dbms/src/IO/ReadHelpers.cpp
+++ b/dbms/src/IO/ReadHelpers.cpp
@@ -170,13 +170,10 @@ void readStringInto(Vector & s, ReadBuffer & buf)
 {
     while (!buf.eof())
     {
-        size_t bytes = 0;
-        for (; buf.position() + bytes != buf.buffer().end(); ++bytes)
-            if (buf.position()[bytes] == '\t' || buf.position()[bytes] == '\n')
-                break;
+        const char * next_pos = find_first_symbols<'\t', '\n'>(buf.position(), buf.buffer().end());
 
-        appendToStringOrVector(s, buf.position(), buf.position() + bytes);
-        buf.position() += bytes;
+        appendToStringOrVector(s, buf.position(), next_pos);
+        buf.position() += next_pos - buf.position();    /// Code looks complicated, because "buf.position() = next_pos" doens't work due to const-ness.
 
         if (buf.hasPendingData())
             return;
diff --git a/dbms/src/IO/ReadHelpers.h b/dbms/src/IO/ReadHelpers.h
index 6d105d104d4..544f4e25c7b 100644
--- a/dbms/src/IO/ReadHelpers.h
+++ b/dbms/src/IO/ReadHelpers.h
@@ -494,7 +494,7 @@ inline void readFloatText(T & x, ReadBuffer & buf)
     readFloatTextImpl<T, void>(x, buf);
 }
 
-/// rough; all until '\n' or '\t'
+/// simple: all until '\n' or '\t'
 void readString(String & s, ReadBuffer & buf);
 
 void readEscapedString(String & s, ReadBuffer & buf);

From 2ea63defa53ac818aaae88d489df3526395631a3 Mon Sep 17 00:00:00 2001
From: proller <proller@users.noreply.github.com>
Date: Thu, 10 Aug 2017 03:28:29 +0300
Subject: [PATCH 082/281] Fix test/string_pool include (#1086)

* Fix sparcehash include

* Fix re2_st include

* Fix test/string_pool include
---
 dbms/src/Core/tests/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dbms/src/Core/tests/CMakeLists.txt b/dbms/src/Core/tests/CMakeLists.txt
index 7577cd77fe1..d87fb00c4c4 100644
--- a/dbms/src/Core/tests/CMakeLists.txt
+++ b/dbms/src/Core/tests/CMakeLists.txt
@@ -3,6 +3,7 @@ target_link_libraries (exception dbms)
 
 add_executable (string_pool string_pool.cpp)
 target_link_libraries (string_pool dbms)
+target_include_directories (string_pool BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
 
 add_executable (field field.cpp)
 target_link_libraries (field dbms)

From 4f3547694dd8f4bdf0c49db21e9e24de2ad564aa Mon Sep 17 00:00:00 2001
From: Yegor Andreenko <f1yegor@gmail.com>
Date: Mon, 7 Aug 2017 16:34:17 +0200
Subject: [PATCH 083/281] Correct sentence about State Combinator

---
 docs/en/agg_functions/index.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/agg_functions/index.rst b/docs/en/agg_functions/index.rst
index d4ba751b51b..a1bcda0933f 100644
--- a/docs/en/agg_functions/index.rst
+++ b/docs/en/agg_functions/index.rst
@@ -280,7 +280,7 @@ Examples: ``uniqArrayIf(arr, cond)``,  ``quantilesTimingArrayIf(level1, level2)(
 
 State combinator
 ----------------
-If this combinator is used, the aggregate function returns a non-completed/non-finished value (for example, in the case of the ``uniq`` function, the number of unique values), and the intermediate aggregation state (for example, in the case of the ``uniq`` function, a hash table for calculating the number of unique values), which has type of ``AggregateFunction(...)`` and can be used for further processing or can be saved to a table for subsequent pre-aggregation - see the sections "AggregatingMergeTree" and "functions for working with intermediate aggregation states".
+If this combinator is used, the aggregate function returns intermediate aggregation state (for example, in the case of the ``uniq`` function, a HyperLogLog structure for calculating the number of unique values), which has type of ``AggregateFunction(...)`` and can be used for further processing or can be saved to a table for subsequent pre-aggregation - see the sections "AggregatingMergeTree" and "functions for working with intermediate aggregation states".
 
 Merge combinator
 ----------------

From ce09d0b99ad1a41d2d20221ed2482689a53e7009 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Thu, 10 Aug 2017 06:07:20 +0300
Subject: [PATCH 084/281] Update index.rst

---
 docs/en/agg_functions/index.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/agg_functions/index.rst b/docs/en/agg_functions/index.rst
index a1bcda0933f..8db5af9ab24 100644
--- a/docs/en/agg_functions/index.rst
+++ b/docs/en/agg_functions/index.rst
@@ -280,7 +280,7 @@ Examples: ``uniqArrayIf(arr, cond)``,  ``quantilesTimingArrayIf(level1, level2)(
 
 State combinator
 ----------------
-If this combinator is used, the aggregate function returns intermediate aggregation state (for example, in the case of the ``uniq`` function, a HyperLogLog structure for calculating the number of unique values), which has type of ``AggregateFunction(...)`` and can be used for further processing or can be saved to a table for subsequent pre-aggregation - see the sections "AggregatingMergeTree" and "functions for working with intermediate aggregation states".
+If this combinator is used, the aggregate function returns intermediate aggregation state (for example, in the case of the ``uniqCombined`` function, a HyperLogLog structure for calculating the number of unique values), which has type of ``AggregateFunction(...)`` and can be used for further processing or can be saved to a table for subsequent pre-aggregation - see the sections "AggregatingMergeTree" and "functions for working with intermediate aggregation states".
 
 Merge combinator
 ----------------

From f1e88e074c14d66d3f9dd18abb28441268b8e8e0 Mon Sep 17 00:00:00 2001
From: Vadim Skipin <vskipin@yandex-team.ru>
Date: Wed, 9 Aug 2017 14:57:09 +0300
Subject: [PATCH 085/281] Introduce IServer to host handlers

---
 dbms/src/Server/HTTPHandler.cpp              |  6 ++---
 dbms/src/Server/HTTPHandler.h                |  7 +++---
 dbms/src/Server/IServer.h                    | 26 ++++++++++++++++++++
 dbms/src/Server/InterserverIOHTTPHandler.cpp |  2 +-
 dbms/src/Server/InterserverIOHTTPHandler.h   |  9 ++++---
 dbms/src/Server/Server.cpp                   |  2 +-
 dbms/src/Server/Server.h                     | 24 +++++++++++++++---
 dbms/src/Server/TCPHandler.cpp               |  4 +--
 dbms/src/Server/TCPHandler.h                 | 12 +++++----
 9 files changed, 68 insertions(+), 24 deletions(-)
 create mode 100644 dbms/src/Server/IServer.h

diff --git a/dbms/src/Server/HTTPHandler.cpp b/dbms/src/Server/HTTPHandler.cpp
index 1babb58a743..7e41e0184e4 100644
--- a/dbms/src/Server/HTTPHandler.cpp
+++ b/dbms/src/Server/HTTPHandler.cpp
@@ -184,7 +184,7 @@ void HTTPHandler::pushDelayedResults(Output & used_output)
 }
 
 
-HTTPHandler::HTTPHandler(Server & server_)
+HTTPHandler::HTTPHandler(IServer & server_)
     : server(server_)
     , log(&Logger::get("HTTPHandler"))
 {
@@ -224,8 +224,8 @@ void HTTPHandler::processQuery(
     std::string quota_key = request.get("X-ClickHouse-Quota", params.get("quota_key", ""));
     std::string query_id = params.get("query_id", "");
 
-    Context context = *server.global_context;
-    context.setGlobalContext(*server.global_context);
+    Context context = server.context();
+    context.setGlobalContext(server.context());
 
     context.setUser(user, password, request.clientAddress(), quota_key);
     context.setCurrentQueryId(query_id);
diff --git a/dbms/src/Server/HTTPHandler.h b/dbms/src/Server/HTTPHandler.h
index 3474a6393b6..1688a7e7925 100644
--- a/dbms/src/Server/HTTPHandler.h
+++ b/dbms/src/Server/HTTPHandler.h
@@ -19,7 +19,7 @@ class CascadeWriteBuffer;
 class HTTPHandler : public Poco::Net::HTTPRequestHandler
 {
 public:
-    explicit HTTPHandler(Server & server_);
+    explicit HTTPHandler(IServer & server_);
 
     void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
 
@@ -47,12 +47,11 @@ private:
         }
     };
 
-    Server & server;
+    IServer & server;
+    Logger * log;
 
     CurrentMetrics::Increment metric_increment{CurrentMetrics::HTTPConnection};
 
-    Logger * log;
-
     /// Also initializes 'used_output'.
     void processQuery(
         Poco::Net::HTTPServerRequest & request,
diff --git a/dbms/src/Server/IServer.h b/dbms/src/Server/IServer.h
new file mode 100644
index 00000000000..e747a120f17
--- /dev/null
+++ b/dbms/src/Server/IServer.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include <Interpreters/Context.h>
+
+#include <Poco/Logger.h>
+#include <Poco/Util/LayeredConfiguration.h>
+
+namespace DB
+{
+
+class IServer
+{
+public:
+    /// Returns the application's configuration.
+    virtual Poco::Util::LayeredConfiguration & config() const = 0;
+
+    /// Returns the application's logger.
+    virtual Poco::Logger & logger() const = 0;
+
+    /// Returns global application's context.
+    virtual Context & context() const = 0;
+
+    virtual ~IServer() {}
+};
+
+}
diff --git a/dbms/src/Server/InterserverIOHTTPHandler.cpp b/dbms/src/Server/InterserverIOHTTPHandler.cpp
index 5c7af625d1c..7433a3b6bdc 100644
--- a/dbms/src/Server/InterserverIOHTTPHandler.cpp
+++ b/dbms/src/Server/InterserverIOHTTPHandler.cpp
@@ -32,7 +32,7 @@ void InterserverIOHTTPHandler::processQuery(Poco::Net::HTTPServerRequest & reque
 
     WriteBufferFromHTTPServerResponse out(response);
 
-    auto endpoint = server.global_context->getInterserverIOHandler().getEndpoint(endpoint_name);
+    auto endpoint = server.context().getInterserverIOHandler().getEndpoint(endpoint_name);
 
     if (compress)
     {
diff --git a/dbms/src/Server/InterserverIOHTTPHandler.h b/dbms/src/Server/InterserverIOHTTPHandler.h
index b1a7d4585d7..95826d59b72 100644
--- a/dbms/src/Server/InterserverIOHTTPHandler.h
+++ b/dbms/src/Server/InterserverIOHTTPHandler.h
@@ -15,7 +15,7 @@ namespace DB
 class InterserverIOHTTPHandler : public Poco::Net::HTTPRequestHandler
 {
 public:
-    InterserverIOHTTPHandler(Server & server_)
+    InterserverIOHTTPHandler(IServer & server_)
         : server(server_)
         , log(&Logger::get("InterserverIOHTTPHandler"))
     {
@@ -24,11 +24,12 @@ public:
     void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
 
 private:
-    Server & server;
-    CurrentMetrics::Increment metric_increment{CurrentMetrics::InterserverConnection};
+    IServer & server;
     Logger * log;
 
-     void processQuery(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response);
+    CurrentMetrics::Increment metric_increment{CurrentMetrics::InterserverConnection};
+
+    void processQuery(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response);
 };
 
 }
diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp
index ecc59de0682..e7138189332 100644
--- a/dbms/src/Server/Server.cpp
+++ b/dbms/src/Server/Server.cpp
@@ -164,7 +164,7 @@ public:
             if (uri == "/ping")
                 return new PingRequestHandler;
             else if (startsWith(uri, "/replicas_status"))
-                return new ReplicasStatusHandler(*server.global_context);
+                return new ReplicasStatusHandler(server.context());
         }
 
         if (uri.find('?') != std::string::npos || request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
diff --git a/dbms/src/Server/Server.h b/dbms/src/Server/Server.h
index b8ab55eca24..5f0adcccc68 100644
--- a/dbms/src/Server/Server.h
+++ b/dbms/src/Server/Server.h
@@ -1,5 +1,7 @@
 #pragma once
 
+#include "IServer.h"
+
 #include <Poco/URI.h>
 
 #include <Poco/Util/LayeredConfiguration.h>
@@ -37,11 +39,23 @@
 namespace DB
 {
 
-class Server : public BaseDaemon
+class Server : public BaseDaemon, public IServer
 {
 public:
-    /// Global settings of server.
-    std::unique_ptr<Context> global_context;
+    Poco::Util::LayeredConfiguration & config() const override
+    {
+        return BaseDaemon::config();
+    }
+
+    Poco::Logger & logger() const override
+    {
+        return BaseDaemon::logger();
+    }
+
+    Context & context() const override
+    {
+        return *global_context;
+    }
 
 protected:
     void initialize(Application & self) override
@@ -58,8 +72,10 @@ protected:
 
     int main(const std::vector<std::string> & args) override;
 
-private:
     std::string getDefaultCorePath() const override;
+
+private:
+    std::unique_ptr<Context> global_context;
 };
 
 }
diff --git a/dbms/src/Server/TCPHandler.cpp b/dbms/src/Server/TCPHandler.cpp
index 28d6ef551ff..c48c9cd0cc6 100644
--- a/dbms/src/Server/TCPHandler.cpp
+++ b/dbms/src/Server/TCPHandler.cpp
@@ -49,10 +49,10 @@ namespace ErrorCodes
 
 void TCPHandler::runImpl()
 {
-    connection_context = *server.global_context;
+    connection_context = server.context();
     connection_context.setSessionContext(connection_context);
 
-    Settings global_settings = server.global_context->getSettings();
+    Settings global_settings = connection_context.getSettings();
 
     socket().setReceiveTimeout(global_settings.receive_timeout);
     socket().setSendTimeout(global_settings.send_timeout);
diff --git a/dbms/src/Server/TCPHandler.h b/dbms/src/Server/TCPHandler.h
index 7555e8b672b..7122d0047b8 100644
--- a/dbms/src/Server/TCPHandler.h
+++ b/dbms/src/Server/TCPHandler.h
@@ -71,17 +71,19 @@ struct QueryState
 class TCPHandler : public Poco::Net::TCPServerConnection
 {
 public:
-    TCPHandler(Server & server_, const Poco::Net::StreamSocket & socket_)
-        : Poco::Net::TCPServerConnection(socket_), server(server_),
-        log(&Logger::get("TCPHandler")), client_revision(0),
-        connection_context(*server.global_context), query_context(connection_context)
+    TCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_)
+        : Poco::Net::TCPServerConnection(socket_)
+        , server(server_)
+        , log(&Logger::get("TCPHandler"))
+        , connection_context(server.context())
+        , query_context(server.context())
     {
     }
 
     void run();
 
 private:
-    Server & server;
+    IServer & server;
     Logger * log;
 
     String client_name;

From e6344f9048c301833bbdbe81dea9ba34292332fd Mon Sep 17 00:00:00 2001
From: Vadim Skipin <vskipin@yandex-team.ru>
Date: Wed, 9 Aug 2017 17:33:07 +0300
Subject: [PATCH 086/281] Extract handlers to allow reuse

---
 dbms/src/Server/CMakeLists.txt               |  13 +-
 dbms/src/Server/HTTPHandler.cpp              |   6 +-
 dbms/src/Server/HTTPHandler.h                |  13 +-
 dbms/src/Server/HTTPHandlerFactory.h         |  74 ++++++
 dbms/src/Server/IServer.h                    |  14 +-
 dbms/src/Server/InterserverIOHTTPHandler.cpp |  12 +-
 dbms/src/Server/InterserverIOHTTPHandler.h   |  10 +-
 dbms/src/Server/NotFoundHandler.cpp          |  37 +++
 dbms/src/Server/NotFoundHandler.h            |  18 ++
 dbms/src/Server/PingRequestHandler.cpp       |  29 +++
 dbms/src/Server/PingRequestHandler.h         |  18 ++
 dbms/src/Server/RootRequestHandler.cpp       |  30 +++
 dbms/src/Server/RootRequestHandler.h         |  27 +++
 dbms/src/Server/Server.cpp                   | 223 ++++---------------
 dbms/src/Server/Server.h                     |  25 +--
 dbms/src/Server/TCPHandler.cpp               |   7 +-
 dbms/src/Server/TCPHandler.h                 |  13 +-
 dbms/src/Server/TCPHandlerFactory.h          |  37 +++
 libs/libdaemon/include/daemon/BaseDaemon.h   |   2 +-
 19 files changed, 380 insertions(+), 228 deletions(-)
 create mode 100644 dbms/src/Server/HTTPHandlerFactory.h
 create mode 100644 dbms/src/Server/NotFoundHandler.cpp
 create mode 100644 dbms/src/Server/NotFoundHandler.h
 create mode 100644 dbms/src/Server/PingRequestHandler.cpp
 create mode 100644 dbms/src/Server/PingRequestHandler.h
 create mode 100644 dbms/src/Server/RootRequestHandler.cpp
 create mode 100644 dbms/src/Server/RootRequestHandler.h
 create mode 100644 dbms/src/Server/TCPHandlerFactory.h

diff --git a/dbms/src/Server/CMakeLists.txt b/dbms/src/Server/CMakeLists.txt
index 3be2e38c380..7741ba83e4d 100644
--- a/dbms/src/Server/CMakeLists.txt
+++ b/dbms/src/Server/CMakeLists.txt
@@ -3,14 +3,19 @@
 # each of them is built and linked as a separate library, defined below.
 
 add_library(clickhouse-server
-    Server.cpp
+    ConfigReloader.cpp
     HTTPHandler.cpp
-    TCPHandler.cpp
     InterserverIOHTTPHandler.cpp
     MetricsTransmitter.cpp
-    ConfigReloader.cpp
+    NotFoundHandler.cpp
+    PingRequestHandler.cpp
+    ReplicasStatusHandler.cpp
+    RootRequestHandler.cpp
+    Server.cpp
     StatusFile.cpp
-    ReplicasStatusHandler.cpp)
+    TCPHandler.cpp
+    )
+
 target_link_libraries(clickhouse-server daemon clickhouse_storages_system clickhouse_functions clickhouse_aggregate_functions clickhouse_table_functions)
 target_include_directories (clickhouse-server PUBLIC ${ClickHouse_SOURCE_DIR}/libs/libdaemon/include)
 
diff --git a/dbms/src/Server/HTTPHandler.cpp b/dbms/src/Server/HTTPHandler.cpp
index 7e41e0184e4..57ba1491e3d 100644
--- a/dbms/src/Server/HTTPHandler.cpp
+++ b/dbms/src/Server/HTTPHandler.cpp
@@ -1,8 +1,11 @@
 #include <chrono>
 #include <iomanip>
 
-#include <Poco/Net/HTTPBasicCredentials.h>
 #include <Poco/File.h>
+#include <Poco/Net/HTTPBasicCredentials.h>
+#include <Poco/Net/HTTPServerRequest.h>
+#include <Poco/Net/HTTPServerResponse.h>
+#include <Poco/Net/NetException.h>
 
 #include <ext/scope_guard.h>
 
@@ -16,6 +19,7 @@
 #include <IO/ConcatReadBuffer.h>
 #include <IO/CompressedReadBuffer.h>
 #include <IO/CompressedWriteBuffer.h>
+#include <IO/WriteBufferFromString.h>
 #include <IO/WriteBufferFromHTTPServerResponse.h>
 #include <IO/WriteBufferFromFile.h>
 #include <IO/WriteHelpers.h>
diff --git a/dbms/src/Server/HTTPHandler.h b/dbms/src/Server/HTTPHandler.h
index 1688a7e7925..8ce4c8c489a 100644
--- a/dbms/src/Server/HTTPHandler.h
+++ b/dbms/src/Server/HTTPHandler.h
@@ -1,7 +1,11 @@
 #pragma once
 
+#include "IServer.h"
+
+#include <Poco/Net/HTTPRequestHandler.h>
+
 #include <Common/CurrentMetrics.h>
-#include "Server.h"
+#include <Common/HTMLForm.h>
 
 
 namespace CurrentMetrics
@@ -59,8 +63,11 @@ private:
         Poco::Net::HTTPServerResponse & response,
         Output & used_output);
 
-    void trySendExceptionToClient(const std::string & s, int exception_code,
-        Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response,
+    void trySendExceptionToClient(
+        const std::string & s,
+        int exception_code,
+        Poco::Net::HTTPServerRequest & request,
+        Poco::Net::HTTPServerResponse & response,
         Output & used_output);
 
     void pushDelayedResults(Output & used_output);
diff --git a/dbms/src/Server/HTTPHandlerFactory.h b/dbms/src/Server/HTTPHandlerFactory.h
new file mode 100644
index 00000000000..8201141745c
--- /dev/null
+++ b/dbms/src/Server/HTTPHandlerFactory.h
@@ -0,0 +1,74 @@
+#pragma once
+
+#include <Poco/Net/HTTPRequestHandlerFactory.h>
+#include <Poco/Net/HTTPServerRequest.h>
+#include <Poco/Net/HTTPServerResponse.h>
+
+#include "IServer.h"
+
+#include "HTTPHandler.h"
+#include "InterserverIOHTTPHandler.h"
+#include "NotFoundHandler.h"
+#include "PingRequestHandler.h"
+#include "ReplicasStatusHandler.h"
+#include "RootRequestHandler.h"
+
+
+namespace DB
+{
+
+template <typename HandlerType>
+class HTTPRequestHandlerFactory : public Poco::Net::HTTPRequestHandlerFactory
+{
+private:
+    IServer & server;
+    Logger * log;
+    std::string name;
+
+public:
+    HTTPRequestHandlerFactory(IServer & server_, const std::string & name_) : server(server_), log(&Logger::get(name_)), name(name_)
+    {
+    }
+
+    Poco::Net::HTTPRequestHandler * createRequestHandler(const Poco::Net::HTTPServerRequest & request) override
+    {
+        LOG_TRACE(log,
+            "HTTP Request for " << name << ". "
+                                << "Method: "
+                                << request.getMethod()
+                                << ", Address: "
+                                << request.clientAddress().toString()
+                                << ", User-Agent: "
+                                << (request.has("User-Agent") ? request.get("User-Agent") : "none"));
+
+        const auto & uri = request.getURI();
+
+        if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET || request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD)
+        {
+            if (uri == "/")
+                return new RootRequestHandler(server);
+            if (uri == "/ping")
+                return new PingRequestHandler;
+            else if (startsWith(uri, "/replicas_status"))
+                return new ReplicasStatusHandler(server.context());
+        }
+
+        if (uri.find('?') != std::string::npos || request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
+        {
+            return new HandlerType(server);
+        }
+
+        if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET || request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD
+            || request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
+        {
+            return new NotFoundHandler;
+        }
+
+        return nullptr;
+    }
+};
+
+using HTTPHandlerFactory = HTTPRequestHandlerFactory<HTTPHandler>;
+using InterserverIOHTTPHandlerFactory = HTTPRequestHandlerFactory<InterserverIOHTTPHandler>;
+
+}
diff --git a/dbms/src/Server/IServer.h b/dbms/src/Server/IServer.h
index e747a120f17..699e26b1d60 100644
--- a/dbms/src/Server/IServer.h
+++ b/dbms/src/Server/IServer.h
@@ -1,13 +1,20 @@
 #pragma once
 
-#include <Interpreters/Context.h>
-
 #include <Poco/Logger.h>
 #include <Poco/Util/LayeredConfiguration.h>
 
+#include <Interpreters/Context.h>
+
+
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int NO_ELEMENTS_IN_CONFIG;
+    extern const int SUPPORT_IS_DISABLED;
+}
+
 class IServer
 {
 public:
@@ -20,6 +27,9 @@ public:
     /// Returns global application's context.
     virtual Context & context() const = 0;
 
+    /// Returns true if shutdown signaled.
+    virtual bool isCancelled() const = 0;
+
     virtual ~IServer() {}
 };
 
diff --git a/dbms/src/Server/InterserverIOHTTPHandler.cpp b/dbms/src/Server/InterserverIOHTTPHandler.cpp
index 7433a3b6bdc..4e3c9589723 100644
--- a/dbms/src/Server/InterserverIOHTTPHandler.cpp
+++ b/dbms/src/Server/InterserverIOHTTPHandler.cpp
@@ -1,9 +1,15 @@
-#include "InterserverIOHTTPHandler.h"
-#include <Interpreters/InterserverIOHandler.h>
-#include <IO/WriteBufferFromHTTPServerResponse.h>
+#include <Poco/Net/HTTPServerRequest.h>
+#include <Poco/Net/HTTPServerResponse.h>
+
+#include <common/logger_useful.h>
+
+#include <Common/HTMLForm.h>
 #include <IO/CompressedWriteBuffer.h>
 #include <IO/ReadBufferFromIStream.h>
+#include <IO/WriteBufferFromHTTPServerResponse.h>
+#include <Interpreters/InterserverIOHandler.h>
 
+#include "InterserverIOHTTPHandler.h"
 
 namespace DB
 {
diff --git a/dbms/src/Server/InterserverIOHTTPHandler.h b/dbms/src/Server/InterserverIOHTTPHandler.h
index 95826d59b72..a2132a39840 100644
--- a/dbms/src/Server/InterserverIOHTTPHandler.h
+++ b/dbms/src/Server/InterserverIOHTTPHandler.h
@@ -1,8 +1,12 @@
 #pragma once
 
-#include "Server.h"
+#include <Poco/Logger.h>
+#include <Poco/Net/HTTPRequestHandler.h>
+
 #include <Common/CurrentMetrics.h>
 
+#include "IServer.h"
+
 
 namespace CurrentMetrics
 {
@@ -17,7 +21,7 @@ class InterserverIOHTTPHandler : public Poco::Net::HTTPRequestHandler
 public:
     InterserverIOHTTPHandler(IServer & server_)
         : server(server_)
-        , log(&Logger::get("InterserverIOHTTPHandler"))
+        , log(&Poco::Logger::get("InterserverIOHTTPHandler"))
     {
     }
 
@@ -25,7 +29,7 @@ public:
 
 private:
     IServer & server;
-    Logger * log;
+    Poco::Logger * log;
 
     CurrentMetrics::Increment metric_increment{CurrentMetrics::InterserverConnection};
 
diff --git a/dbms/src/Server/NotFoundHandler.cpp b/dbms/src/Server/NotFoundHandler.cpp
new file mode 100644
index 00000000000..766e8895784
--- /dev/null
+++ b/dbms/src/Server/NotFoundHandler.cpp
@@ -0,0 +1,37 @@
+#include "NotFoundHandler.h"
+
+#include <IO/HTTPCommon.h>
+
+#include <Common/Exception.h>
+
+#include <Poco/Net/HTTPServerRequest.h>
+#include <Poco/Net/HTTPServerResponse.h>
+
+namespace DB
+{
+
+void NotFoundHandler::handleRequest(
+    Poco::Net::HTTPServerRequest & request,
+    Poco::Net::HTTPServerResponse & response)
+{
+    try
+    {
+        response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_NOT_FOUND);
+        response.send() << "There is no handle " << request.getURI() << "\n\n"
+                        << "Use / or /ping for health checks.\n"
+                        << "Or /replicas_status for more sophisticated health checks.\n\n"
+                        << "Send queries from your program with POST method or GET /?query=...\n\n"
+                        << "Use clickhouse-client:\n\n"
+                        << "For interactive data analysis:\n"
+                        << "    clickhouse-client\n\n"
+                        << "For batch query processing:\n"
+                        << "    clickhouse-client --query='SELECT 1' > result\n"
+                        << "    clickhouse-client < query > result\n";
+    }
+    catch (...)
+    {
+        tryLogCurrentException("NotFoundHandler");
+    }
+}
+
+}
diff --git a/dbms/src/Server/NotFoundHandler.h b/dbms/src/Server/NotFoundHandler.h
new file mode 100644
index 00000000000..7f758e49d0d
--- /dev/null
+++ b/dbms/src/Server/NotFoundHandler.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <Poco/Net/HTTPRequestHandler.h>
+
+
+namespace DB
+{
+
+/// Response with 404 and verbose description.
+class NotFoundHandler : public Poco::Net::HTTPRequestHandler
+{
+public:
+    void handleRequest(
+        Poco::Net::HTTPServerRequest & request,
+        Poco::Net::HTTPServerResponse & response) override;
+};
+
+}
diff --git a/dbms/src/Server/PingRequestHandler.cpp b/dbms/src/Server/PingRequestHandler.cpp
new file mode 100644
index 00000000000..492d2a5d9d1
--- /dev/null
+++ b/dbms/src/Server/PingRequestHandler.cpp
@@ -0,0 +1,29 @@
+#include "PingRequestHandler.h"
+
+#include <IO/HTTPCommon.h>
+
+#include <Common/Exception.h>
+
+#include <Poco/Net/HTTPServerRequest.h>
+#include <Poco/Net/HTTPServerResponse.h>
+
+namespace DB
+{
+
+void PingRequestHandler::handleRequest(
+    Poco::Net::HTTPServerRequest & request,
+    Poco::Net::HTTPServerResponse & response)
+{
+    try
+    {
+        setResponseDefaultHeaders(response);
+        const char * data = "Ok.\n";
+        response.sendBuffer(data, strlen(data));
+    }
+    catch (...)
+    {
+        tryLogCurrentException("PingRequestHandler");
+    }
+}
+
+}
diff --git a/dbms/src/Server/PingRequestHandler.h b/dbms/src/Server/PingRequestHandler.h
new file mode 100644
index 00000000000..f81b45e376e
--- /dev/null
+++ b/dbms/src/Server/PingRequestHandler.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <Poco/Net/HTTPRequestHandler.h>
+
+
+namespace DB
+{
+
+/// Response with "Ok.\n". Used for availability checks.
+class PingRequestHandler : public Poco::Net::HTTPRequestHandler
+{
+public:
+    void handleRequest(
+        Poco::Net::HTTPServerRequest & request,
+        Poco::Net::HTTPServerResponse & response) override;
+};
+
+}
diff --git a/dbms/src/Server/RootRequestHandler.cpp b/dbms/src/Server/RootRequestHandler.cpp
new file mode 100644
index 00000000000..6020f9f5850
--- /dev/null
+++ b/dbms/src/Server/RootRequestHandler.cpp
@@ -0,0 +1,30 @@
+#include "RootRequestHandler.h"
+
+#include <IO/HTTPCommon.h>
+
+#include <Common/Exception.h>
+
+#include <Poco/Net/HTTPServerRequest.h>
+#include <Poco/Net/HTTPServerResponse.h>
+
+namespace DB
+{
+
+void RootRequestHandler::handleRequest(
+    Poco::Net::HTTPServerRequest & request,
+    Poco::Net::HTTPServerResponse & response)
+{
+    try
+    {
+        setResponseDefaultHeaders(response);
+        response.setContentType("text/html; charset=UTF-8");
+        const std::string data = server.config().getString("http_server_default_response", "Ok.\n");
+        response.sendBuffer(data.data(), data.size());
+    }
+    catch (...)
+    {
+        tryLogCurrentException("RootRequestHandler");
+    }
+}
+
+}
diff --git a/dbms/src/Server/RootRequestHandler.h b/dbms/src/Server/RootRequestHandler.h
new file mode 100644
index 00000000000..eed65485577
--- /dev/null
+++ b/dbms/src/Server/RootRequestHandler.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <Poco/Net/HTTPRequestHandler.h>
+
+#include "IServer.h"
+
+
+namespace DB
+{
+
+/// Response with custom string. Can be used for browser.
+class RootRequestHandler : public Poco::Net::HTTPRequestHandler
+{
+private:
+    IServer & server;
+
+public:
+    RootRequestHandler(IServer & server_) : server(server_)
+    {
+    }
+
+    void handleRequest(
+        Poco::Net::HTTPServerRequest & request,
+        Poco::Net::HTTPServerResponse & response) override;
+};
+
+}
diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp
index e7138189332..f935fac6c93 100644
--- a/dbms/src/Server/Server.cpp
+++ b/dbms/src/Server/Server.cpp
@@ -2,51 +2,54 @@
 
 #include <memory>
 #include <sys/resource.h>
+
 #include <Poco/DirectoryIterator.h>
-#include <Poco/Net/DNS.h>
-#include <Poco/Net/HTTPServerRequest.h>
+#include <Poco/Net/HTTPServer.h>
 #include <Poco/Net/NetException.h>
-#include <Poco/Util/XMLConfiguration.h>
+
+#include <ext/scope_guard.h>
+
 #include <common/ApplicationServerExt.h>
 #include <common/ErrorHandlers.h>
-#include <ext/scope_guard.h>
-#include <Common/ZooKeeper/ZooKeeper.h>
-#include <Common/ZooKeeper/ZooKeeperNodeCache.h>
+#include <common/getMemoryAmount.h>
+
+#include <Common/ClickHouseRevision.h>
+#include <Common/CurrentMetrics.h>
 #include <Common/Macros.h>
 #include <Common/StringUtils.h>
+#include <Common/ZooKeeper/ZooKeeper.h>
+#include <Common/ZooKeeper/ZooKeeperNodeCache.h>
+#include <Common/config.h>
 #include <Common/getFQDNOrHostName.h>
 #include <Common/getMultipleKeysFromConfig.h>
-#include <common/getMemoryAmount.h>
 #include <Common/getNumberOfPhysicalCPUCores.h>
-#include <Common/CurrentMetrics.h>
-#include <Common/ClickHouseRevision.h>
+
 #include <IO/HTTPCommon.h>
+
 #include <Interpreters/AsynchronousMetrics.h>
+#include <Interpreters/DDLWorker.h>
 #include <Interpreters/ProcessList.h>
 #include <Interpreters/loadMetadata.h>
-#include <Interpreters/DDLWorker.h>
 
 #include <Storages/MergeTree/ReshardingWorker.h>
 #include <Storages/StorageReplicatedMergeTree.h>
 #include <Storages/System/attachSystemTables.h>
-#include "ConfigReloader.h"
-#include "HTTPHandler.h"
-#include "InterserverIOHTTPHandler.h"
-#include "MetricsTransmitter.h"
-#include "ReplicasStatusHandler.h"
-#include "StatusFile.h"
-#include "TCPHandler.h"
 
-#include <Common/config.h>
+#include <AggregateFunctions/registerAggregateFunctions.h>
+#include <Functions/registerFunctions.h>
+#include <TableFunctions/registerTableFunctions.h>
+
+#include "ConfigReloader.h"
+#include "HTTPHandlerFactory.h"
+#include "MetricsTransmitter.h"
+#include "StatusFile.h"
+#include "TCPHandlerFactory.h"
+
 #if Poco_NetSSL_FOUND
 #include <Poco/Net/Context.h>
 #include <Poco/Net/SecureServerSocket.h>
 #endif
 
-#include <Functions/registerFunctions.h>
-#include <AggregateFunctions/registerAggregateFunctions.h>
-#include <TableFunctions/registerTableFunctions.h>
-
 
 namespace CurrentMetrics
 {
@@ -56,156 +59,6 @@ namespace CurrentMetrics
 namespace DB
 {
 
-    namespace ErrorCodes
-{
-    extern const int NO_ELEMENTS_IN_CONFIG;
-    extern const int SUPPORT_IS_DISABLED;
-}
-
-
-/// Response with "Ok.\n". Used for availability checks.
-class PingRequestHandler : public Poco::Net::HTTPRequestHandler
-{
-public:
-    void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override
-    {
-        try
-        {
-            setResponseDefaultHeaders(response);
-            const char * data = "Ok.\n";
-            response.sendBuffer(data, strlen(data));
-        }
-        catch (...)
-        {
-            tryLogCurrentException("PingRequestHandler");
-        }
-    }
-};
-
-/// Response with custom string. Can be used for browser.
-class RootRequestHandler : public Poco::Net::HTTPRequestHandler
-{
-public:
-    void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override
-    {
-        try
-        {
-            setResponseDefaultHeaders(response);
-            response.setContentType("text/html; charset=UTF-8");
-            const std::string data = Poco::Util::Application::instance().config().getString("http_server_default_response", "Ok.\n");
-            response.sendBuffer(data.data(), data.size());
-        }
-        catch (...)
-        {
-            tryLogCurrentException("RootRequestHandler");
-        }
-    }
-};
-
-
-/// Response with 404 and verbose description.
-class NotFoundHandler : public Poco::Net::HTTPRequestHandler
-{
-public:
-    void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override
-    {
-        try
-        {
-            response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_NOT_FOUND);
-            response.send() << "There is no handle " << request.getURI() << "\n\n"
-                            << "Use / or /ping for health checks.\n"
-                            << "Or /replicas_status for more sophisticated health checks.\n\n"
-                            << "Send queries from your program with POST method or GET /?query=...\n\n"
-                            << "Use clickhouse-client:\n\n"
-                            << "For interactive data analysis:\n"
-                            << "    clickhouse-client\n\n"
-                            << "For batch query processing:\n"
-                            << "    clickhouse-client --query='SELECT 1' > result\n"
-                            << "    clickhouse-client < query > result\n";
-        }
-        catch (...)
-        {
-            tryLogCurrentException("NotFoundHandler");
-        }
-    }
-};
-
-
-template <typename HandlerType>
-class HTTPRequestHandlerFactory : public Poco::Net::HTTPRequestHandlerFactory
-{
-private:
-    Server & server;
-    Logger * log;
-    std::string name;
-
-public:
-    HTTPRequestHandlerFactory(Server & server_, const std::string & name_) : server(server_), log(&Logger::get(name_)), name(name_)
-    {
-    }
-
-    Poco::Net::HTTPRequestHandler * createRequestHandler(const Poco::Net::HTTPServerRequest & request) override
-    {
-        LOG_TRACE(log,
-            "HTTP Request for " << name << ". "
-                                << "Method: "
-                                << request.getMethod()
-                                << ", Address: "
-                                << request.clientAddress().toString()
-                                << ", User-Agent: "
-                                << (request.has("User-Agent") ? request.get("User-Agent") : "none"));
-
-        const auto & uri = request.getURI();
-
-        if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET || request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD)
-        {
-            if (uri == "/")
-                return new RootRequestHandler;
-            if (uri == "/ping")
-                return new PingRequestHandler;
-            else if (startsWith(uri, "/replicas_status"))
-                return new ReplicasStatusHandler(server.context());
-        }
-
-        if (uri.find('?') != std::string::npos || request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
-        {
-            return new HandlerType(server);
-        }
-
-        if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET || request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD
-            || request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
-        {
-            return new NotFoundHandler;
-        }
-
-        return nullptr;
-    }
-};
-
-
-class TCPConnectionFactory : public Poco::Net::TCPServerConnectionFactory
-{
-private:
-    Server & server;
-    Logger * log;
-
-public:
-    TCPConnectionFactory(Server & server_) : server(server_), log(&Logger::get("TCPConnectionFactory"))
-    {
-    }
-
-    Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket) override
-    {
-        LOG_TRACE(log,
-            "TCP Request. "
-                << "Address: "
-                << socket.peerAddress().toString());
-
-        return new TCPHandler(server, socket);
-    }
-};
-
-
 static std::string getCanonicalPath(std::string && path)
 {
     Poco::trimInPlace(path);
@@ -499,7 +352,10 @@ int Server::main(const std::vector<std::string> & args)
                     http_socket.setSendTimeout(settings.send_timeout);
 
                     servers.emplace_back(new Poco::Net::HTTPServer(
-                        new HTTPRequestHandlerFactory<HTTPHandler>(*this, "HTTPHandler-factory"), server_pool, http_socket, http_params));
+                        new HTTPHandlerFactory(*this, "HTTPHandler-factory"),
+                        server_pool,
+                        http_socket,
+                        http_params));
 
                     LOG_INFO(log, "Listening http://" + http_socket_address.toString());
                 }
@@ -507,7 +363,7 @@ int Server::main(const std::vector<std::string> & args)
                 /// HTTPS
                 if (config().has("https_port"))
                 {
-                #if Poco_NetSSL_FOUND
+#if Poco_NetSSL_FOUND
                     std::call_once(ssl_init_once, SSLInit);
                     Poco::Net::SocketAddress http_socket_address = make_socket_address(listen_host, config().getInt("https_port"));
                     Poco::Net::SecureServerSocket http_socket(http_socket_address);
@@ -515,13 +371,16 @@ int Server::main(const std::vector<std::string> & args)
                     http_socket.setSendTimeout(settings.send_timeout);
 
                     servers.emplace_back(new Poco::Net::HTTPServer(
-                        new HTTPRequestHandlerFactory<HTTPHandler>(*this, "HTTPHandler-factory"), server_pool, http_socket, http_params));
+                        new HTTPHandlerFactory(*this, "HTTPHandler-factory"),
+                        server_pool,
+                        http_socket,
+                        http_params));
 
                     LOG_INFO(log, "Listening https://" + http_socket_address.toString());
-                #else
+#else
                     throw Exception{"https protocol disabled because poco library built without NetSSL support.",
                         ErrorCodes::SUPPORT_IS_DISABLED};
-                #endif
+#endif
                 }
 
                 /// TCP
@@ -531,8 +390,11 @@ int Server::main(const std::vector<std::string> & args)
                     Poco::Net::ServerSocket tcp_socket(tcp_address);
                     tcp_socket.setReceiveTimeout(settings.receive_timeout);
                     tcp_socket.setSendTimeout(settings.send_timeout);
-                    servers.emplace_back(
-                        new Poco::Net::TCPServer(new TCPConnectionFactory(*this), server_pool, tcp_socket, new Poco::Net::TCPServerParams));
+                    servers.emplace_back(new Poco::Net::TCPServer(
+                        new TCPHandlerFactory(*this),
+                        server_pool,
+                        tcp_socket,
+                        new Poco::Net::TCPServerParams));
 
                     LOG_INFO(log, "Listening tcp: " + tcp_address.toString());
                 }
@@ -549,7 +411,7 @@ int Server::main(const std::vector<std::string> & args)
                     interserver_io_http_socket.setReceiveTimeout(settings.receive_timeout);
                     interserver_io_http_socket.setSendTimeout(settings.send_timeout);
                     servers.emplace_back(new Poco::Net::HTTPServer(
-                        new HTTPRequestHandlerFactory<InterserverIOHTTPHandler>(*this, "InterserverIOHTTPHandler-factory"),
+                        new InterserverIOHTTPHandlerFactory(*this, "InterserverIOHTTPHandler-factory"),
                         server_pool,
                         interserver_io_http_socket,
                         http_params));
@@ -568,7 +430,6 @@ int Server::main(const std::vector<std::string> & args)
                 else
                     throw;
             }
-
         }
 
         if (servers.empty())
diff --git a/dbms/src/Server/Server.h b/dbms/src/Server/Server.h
index 5f0adcccc68..b92b3755ee2 100644
--- a/dbms/src/Server/Server.h
+++ b/dbms/src/Server/Server.h
@@ -2,28 +2,8 @@
 
 #include "IServer.h"
 
-#include <Poco/URI.h>
-
-#include <Poco/Util/LayeredConfiguration.h>
-
-#include <Poco/Net/HTTPServer.h>
-#include <Poco/Net/HTTPRequestHandlerFactory.h>
-#include <Poco/Net/HTTPRequestHandler.h>
-#include <Poco/Net/HTTPRequest.h>
-#include <Poco/Net/HTTPServerParams.h>
-#include <Poco/Net/HTTPServerRequest.h>
-#include <Poco/Net/HTTPServerResponse.h>
-#include <Poco/Net/HTMLForm.h>
-
-#include <Poco/Net/TCPServer.h>
-#include <Poco/Net/TCPServerConnectionFactory.h>
-#include <Poco/Net/TCPServerConnection.h>
-
 #include <common/logger_useful.h>
 #include <daemon/BaseDaemon.h>
-#include <Common/HTMLForm.h>
-
-#include <Interpreters/Context.h>
 
 /** Server provides three interfaces:
   * 1. HTTP - simple interface for any applications.
@@ -57,6 +37,11 @@ public:
         return *global_context;
     }
 
+    bool isCancelled() const override
+    {
+        return BaseDaemon::isCancelled();
+    }
+
 protected:
     void initialize(Application & self) override
     {
diff --git a/dbms/src/Server/TCPHandler.cpp b/dbms/src/Server/TCPHandler.cpp
index c48c9cd0cc6..d93d60bd61e 100644
--- a/dbms/src/Server/TCPHandler.cpp
+++ b/dbms/src/Server/TCPHandler.cpp
@@ -117,11 +117,11 @@ void TCPHandler::runImpl()
     while (1)
     {
         /// We are waiting for a packet from the client. Thus, every `POLL_INTERVAL` seconds check whether we need to shut down.
-        while (!static_cast<ReadBufferFromPocoSocket &>(*in).poll(global_settings.poll_interval * 1000000) && !BaseDaemon::instance().isCancelled())
+        while (!static_cast<ReadBufferFromPocoSocket &>(*in).poll(global_settings.poll_interval * 1000000) && !server.isCancelled())
             ;
 
         /// If we need to shut down, or client disconnects.
-        if (BaseDaemon::instance().isCancelled() || in->eof())
+        if (server.isCancelled() || in->eof())
             break;
 
         Stopwatch watch;
@@ -257,7 +257,7 @@ void TCPHandler::readData(const Settings & global_settings)
                 break;
 
             /// Do we need to shut down?
-            if (BaseDaemon::instance().isCancelled())
+            if (server.isCancelled())
                 return;
 
             /** Have we waited for data for too long?
@@ -778,5 +778,4 @@ void TCPHandler::run()
     }
 }
 
-
 }
diff --git a/dbms/src/Server/TCPHandler.h b/dbms/src/Server/TCPHandler.h
index 7122d0047b8..575568a63f5 100644
--- a/dbms/src/Server/TCPHandler.h
+++ b/dbms/src/Server/TCPHandler.h
@@ -1,15 +1,17 @@
 #pragma once
 
+#include <Poco/Net/TCPServerConnection.h>
+
+#include <Common/CurrentMetrics.h>
+#include <Common/Stopwatch.h>
+#include <Core/Progress.h>
 #include <Core/Protocol.h>
 #include <Core/QueryProcessingStage.h>
+#include <DataStreams/BlockIO.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
-#include <DataStreams/BlockIO.h>
-#include <Common/Stopwatch.h>
-#include <Common/CurrentMetrics.h>
-#include <Core/Progress.h>
-#include "Server.h"
 
+#include "IServer.h"
 
 namespace CurrentMetrics
 {
@@ -145,5 +147,4 @@ private:
     void updateProgress(const Progress & value);
 };
 
-
 }
diff --git a/dbms/src/Server/TCPHandlerFactory.h b/dbms/src/Server/TCPHandlerFactory.h
new file mode 100644
index 00000000000..9514a2deb3b
--- /dev/null
+++ b/dbms/src/Server/TCPHandlerFactory.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include <Poco/Net/TCPServerConnectionFactory.h>
+
+#include "IServer.h"
+
+#include "TCPHandler.h"
+
+
+namespace DB
+{
+
+class TCPHandlerFactory : public Poco::Net::TCPServerConnectionFactory
+{
+private:
+    IServer & server;
+    Logger * log;
+
+public:
+    TCPHandlerFactory(IServer & server_)
+        : server(server_)
+        , log(&Logger::get("TCPHandlerFactory"))
+    {
+    }
+
+    Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket) override
+    {
+        LOG_TRACE(log,
+            "TCP Request. "
+                << "Address: "
+                << socket.peerAddress().toString());
+
+        return new TCPHandler(server, socket);
+    }
+};
+
+}
diff --git a/libs/libdaemon/include/daemon/BaseDaemon.h b/libs/libdaemon/include/daemon/BaseDaemon.h
index 00ba95467b8..33a6889d610 100644
--- a/libs/libdaemon/include/daemon/BaseDaemon.h
+++ b/libs/libdaemon/include/daemon/BaseDaemon.h
@@ -79,7 +79,7 @@ public:
     void kill();
 
     /// Получен ли сигнал на завершение?
-    bool isCancelled()
+    bool isCancelled() const
     {
         return is_cancelled;
     }

From 139d9e5c192139b883a8fac98bb3a6b573b670fa Mon Sep 17 00:00:00 2001
From: proller <proller@github.com>
Date: Mon, 7 Aug 2017 16:36:03 +0300
Subject: [PATCH 087/281] Config: Allow multiple dictionaries_config

---
 dbms/src/Common/getMultipleKeysFromConfig.cpp |  4 ++--
 dbms/src/Common/getMultipleKeysFromConfig.h   |  4 ++--
 .../src/Interpreters/ExternalDictionaries.cpp | 20 ++++++++++---------
 3 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/dbms/src/Common/getMultipleKeysFromConfig.cpp b/dbms/src/Common/getMultipleKeysFromConfig.cpp
index c6a462f0867..a6cfed57d22 100644
--- a/dbms/src/Common/getMultipleKeysFromConfig.cpp
+++ b/dbms/src/Common/getMultipleKeysFromConfig.cpp
@@ -5,7 +5,7 @@
 
 namespace DB
 {
-std::vector<std::string> getMultipleKeysFromConfig(Poco::Util::AbstractConfiguration & config, const std::string & root, const std::string & name)
+std::vector<std::string> getMultipleKeysFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & root, const std::string & name)
 {
     std::vector<std::string> values;
     Poco::Util::AbstractConfiguration::Keys config_keys;
@@ -20,7 +20,7 @@ std::vector<std::string> getMultipleKeysFromConfig(Poco::Util::AbstractConfigura
 }
 
 
-std::vector<std::string> getMultipleValuesFromConfig(Poco::Util::AbstractConfiguration & config, const std::string & root, const std::string & name)
+std::vector<std::string> getMultipleValuesFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & root, const std::string & name)
 {
     std::vector<std::string> values;
     for (const auto & key : DB::getMultipleKeysFromConfig(config, root, name))
diff --git a/dbms/src/Common/getMultipleKeysFromConfig.h b/dbms/src/Common/getMultipleKeysFromConfig.h
index 45b0d80cd7a..1c58af7bb45 100644
--- a/dbms/src/Common/getMultipleKeysFromConfig.h
+++ b/dbms/src/Common/getMultipleKeysFromConfig.h
@@ -12,7 +12,7 @@ namespace Util
 namespace DB
 {
 /// get all internal key names for given key
-std::vector<std::string> getMultipleKeysFromConfig(Poco::Util::AbstractConfiguration & config, const std::string & root, const std::string & name);
+std::vector<std::string> getMultipleKeysFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & root, const std::string & name);
 /// Get all values for given key
-std::vector<std::string> getMultipleValuesFromConfig(Poco::Util::AbstractConfiguration & config, const std::string & root, const std::string & name);
+std::vector<std::string> getMultipleValuesFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & root, const std::string & name);
 }
diff --git a/dbms/src/Interpreters/ExternalDictionaries.cpp b/dbms/src/Interpreters/ExternalDictionaries.cpp
index 929f1d79da2..4d3dc137131 100644
--- a/dbms/src/Interpreters/ExternalDictionaries.cpp
+++ b/dbms/src/Interpreters/ExternalDictionaries.cpp
@@ -4,6 +4,7 @@
 #include <Dictionaries/IDictionarySource.h>
 #include <Common/StringUtils.h>
 #include <Common/MemoryTracker.h>
+#include <Common/getMultipleKeysFromConfig.h>
 #include <ext/scope_guard.h>
 #include <Poco/Util/Application.h>
 #include <Poco/Glob.h>
@@ -64,17 +65,17 @@ ExternalDictionaries::~ExternalDictionaries()
     reloading_thread.join();
 }
 
-
-
 namespace
 {
-    std::set<std::string> getDictionariesConfigPaths(const Poco::Util::AbstractConfiguration & config)
+std::set<std::string> getDictionariesConfigPaths(const Poco::Util::AbstractConfiguration & config)
+{
+    std::set<std::string> files;
+    auto patterns = getMultipleValuesFromConfig(config, "", "dictionaries_config");
+    for (auto & pattern : patterns)
     {
-        auto pattern = config.getString("dictionaries_config", "");
         if (pattern.empty())
-            return {};
+            continue;
 
-        std::set<std::string> files;
         if (pattern[0] != '/')
         {
             const auto app_config_path = config.getString("config-file", "config.xml");
@@ -82,13 +83,14 @@ namespace
             const auto absolute_path = config_dir + pattern;
             Poco::Glob::glob(absolute_path, files, 0);
             if (!files.empty())
-                return files;
+                continue;
         }
 
         Poco::Glob::glob(pattern, files, 0);
-
-        return files;
     }
+
+    return files;
+}
 }
 
 void ExternalDictionaries::reloadImpl(const bool throw_on_error)

From 7937903d56d412f5c47be3518973d3d5fe42c43b Mon Sep 17 00:00:00 2001
From: proller <proller@github.com>
Date: Fri, 4 Aug 2017 19:59:25 +0300
Subject: [PATCH 088/281] Improve debug helpers

---
 dbms/src/Common/iostream_debug_helpers.h      |  8 +++++++
 .../include/common/iostream_debug_helpers.h   | 21 +++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/dbms/src/Common/iostream_debug_helpers.h b/dbms/src/Common/iostream_debug_helpers.h
index 5f9f7b5692b..a7a68fa1919 100644
--- a/dbms/src/Common/iostream_debug_helpers.h
+++ b/dbms/src/Common/iostream_debug_helpers.h
@@ -35,6 +35,14 @@ std::ostream & operator<<(std::ostream & stream, const DB::IColumn & what);
 #include <Client/Connection.h>
 std::ostream & operator<<(std::ostream & stream, const DB::Connection::Packet & what);
 
+#include <Common/PODArray.h>
+template <typename T, size_t INITIAL_SIZE, typename TAllocator, size_t pad_right_>
+std::ostream & operator<<(std::ostream & stream, const DB::PODArray<T, INITIAL_SIZE, TAllocator, pad_right_> & what) {
+    stream << "PODArray(size = " << what.size() << ", capacity = " << what.capacity() << ")";
+    dumpContainer(stream, what);
+    return stream;
+};
+
 
 /// some operator<< should be declared before operator<<(... std::shared_ptr<>)
 #include <common/iostream_debug_helpers.h>
diff --git a/libs/libcommon/include/common/iostream_debug_helpers.h b/libs/libcommon/include/common/iostream_debug_helpers.h
index 96c5cf8128e..92fefb9db33 100644
--- a/libs/libcommon/include/common/iostream_debug_helpers.h
+++ b/libs/libcommon/include/common/iostream_debug_helpers.h
@@ -1,6 +1,15 @@
 #pragma once
 #include <iostream>
 
+// TODO: https://stackoverflow.com/questions/16464032/how-to-enhance-this-variable-dumping-debug-macro-to-be-variadic
+#define DUMPS(VAR) #VAR " = " << VAR
+#define DUMPHEAD std::cerr << __FILE__ << ":" << __LINE__ << " "
+#define DUMP(V1) DUMPHEAD << DUMPS(V1) << "\n";
+#define DUMP2(V1, V2) DUMPHEAD << DUMPS(V1) << ", " << DUMPS(V2) << "\n";
+#define DUMP3(V1, V2, V3) DUMPHEAD << DUMPS(V1) << ", " << DUMPS(V2) << ", " << DUMPS(V3) << "\n";
+#define DUMP4(V1, V2, V3, V4) DUMPHEAD << DUMPS(V1) << ", " << DUMPS(V2) << ", " << DUMPS(V3)<< ", " << DUMPS(V4) << "\n";
+#define DUMP5(V1, V2, V3, V4, V5) DUMPHEAD << DUMPS(V1) << ", " << DUMPS(V2) << ", " << DUMPS(V3)<< ", " << DUMPS(V4) << ", " << DUMPS(V5) << "\n";
+
 
 #include <utility>
 
@@ -144,6 +153,18 @@ std::ostream & operator<<(std::ostream & stream, const std::shared_ptr<T> & what
     return stream;
 }
 
+template <class T>
+std::ostream & operator<<(std::ostream & stream, const std::unique_ptr<T> & what)
+{
+    stream << "unique_ptr {";
+    if (what)
+        stream << *what;
+    else
+        stream << "nullptr";
+    stream << "}";
+    return stream;
+}
+
 
 #include <experimental/optional>
 

From 6ca798a3571022dc121da8aaaf00bc9546c0e697 Mon Sep 17 00:00:00 2001
From: proller <proller@github.com>
Date: Fri, 4 Aug 2017 19:59:50 +0300
Subject: [PATCH 089/281] Fix infinite loop in dictGetHierarchy if id chain
 looped

---
 dbms/src/Dictionaries/CacheDictionary.cpp          | 5 +++++
 dbms/src/Functions/FunctionsExternalDictionaries.h | 9 ++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Dictionaries/CacheDictionary.cpp b/dbms/src/Dictionaries/CacheDictionary.cpp
index c55284c3a8f..0eef171feaa 100644
--- a/dbms/src/Dictionaries/CacheDictionary.cpp
+++ b/dbms/src/Dictionaries/CacheDictionary.cpp
@@ -138,6 +138,11 @@ void CacheDictionary::isInImpl(
                 out[out_idx] = 1;
             }
             /// Found intermediate parent, add this value to search at next loop iteration
+            else if (children[new_children_idx] == parents[parents_idx])
+            {
+                // Loop detected
+                out[out_idx] = 0;
+            }
             else
             {
                 children[new_children_idx] = parents[parents_idx];
diff --git a/dbms/src/Functions/FunctionsExternalDictionaries.h b/dbms/src/Functions/FunctionsExternalDictionaries.h
index 89434353739..d2d5d8edd7e 100644
--- a/dbms/src/Functions/FunctionsExternalDictionaries.h
+++ b/dbms/src/Functions/FunctionsExternalDictionaries.h
@@ -1431,9 +1431,16 @@ private:
                     if (0 == id)
                         continue;
 
+
+                    auto & hierarchy = hierarchies[i];
+
+                    //Checking for loop
+                    if (std::find(std::begin(hierarchy), std::end(hierarchy), id) != std::end(hierarchy))
+                        continue;
+
                     all_zeroes = false;
                     /// place id at it's corresponding place
-                    hierarchies[i].push_back(id);
+                    hierarchy.push_back(id);
 
                     ++total_count;
                 }

From 4cda5b4470bd6437400eff688fb0857079d496b7 Mon Sep 17 00:00:00 2001
From: proller <proller@github.com>
Date: Mon, 7 Aug 2017 22:02:30 +0300
Subject: [PATCH 090/281] Fix result

---
 dbms/src/Dictionaries/CacheDictionary.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Dictionaries/CacheDictionary.cpp b/dbms/src/Dictionaries/CacheDictionary.cpp
index 0eef171feaa..62ea644f316 100644
--- a/dbms/src/Dictionaries/CacheDictionary.cpp
+++ b/dbms/src/Dictionaries/CacheDictionary.cpp
@@ -141,7 +141,7 @@ void CacheDictionary::isInImpl(
             else if (children[new_children_idx] == parents[parents_idx])
             {
                 // Loop detected
-                out[out_idx] = 0;
+                out[out_idx] = 1;
             }
             else
             {

From 0fbda791b5a20cb5295803fbba4747d4cf61d5c0 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Thu, 10 Aug 2017 06:19:55 +0300
Subject: [PATCH 091/281] Update iostream_debug_helpers.h

---
 dbms/src/Common/iostream_debug_helpers.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Common/iostream_debug_helpers.h b/dbms/src/Common/iostream_debug_helpers.h
index a7a68fa1919..168bc991597 100644
--- a/dbms/src/Common/iostream_debug_helpers.h
+++ b/dbms/src/Common/iostream_debug_helpers.h
@@ -37,7 +37,8 @@ std::ostream & operator<<(std::ostream & stream, const DB::Connection::Packet &
 
 #include <Common/PODArray.h>
 template <typename T, size_t INITIAL_SIZE, typename TAllocator, size_t pad_right_>
-std::ostream & operator<<(std::ostream & stream, const DB::PODArray<T, INITIAL_SIZE, TAllocator, pad_right_> & what) {
+std::ostream & operator<<(std::ostream & stream, const DB::PODArray<T, INITIAL_SIZE, TAllocator, pad_right_> & what)
+{
     stream << "PODArray(size = " << what.size() << ", capacity = " << what.capacity() << ")";
     dumpContainer(stream, what);
     return stream;

From b01f0c6d132ad95a3cce360467db2bad1bfd9a85 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Thu, 10 Aug 2017 06:22:43 +0300
Subject: [PATCH 092/281] Update CacheDictionary.cpp

---
 dbms/src/Dictionaries/CacheDictionary.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/src/Dictionaries/CacheDictionary.cpp b/dbms/src/Dictionaries/CacheDictionary.cpp
index 62ea644f316..a991d63f96b 100644
--- a/dbms/src/Dictionaries/CacheDictionary.cpp
+++ b/dbms/src/Dictionaries/CacheDictionary.cpp
@@ -137,12 +137,12 @@ void CacheDictionary::isInImpl(
             {
                 out[out_idx] = 1;
             }
-            /// Found intermediate parent, add this value to search at next loop iteration
+            /// Loop detected
             else if (children[new_children_idx] == parents[parents_idx])
             {
-                // Loop detected
                 out[out_idx] = 1;
             }
+            /// Found intermediate parent, add this value to search at next loop iteration
             else
             {
                 children[new_children_idx] = parents[parents_idx];

From a122cd613aac412eb7a334bea23e147990b7b4f4 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Thu, 10 Aug 2017 06:23:02 +0300
Subject: [PATCH 093/281] Update FunctionsExternalDictionaries.h

---
 dbms/src/Functions/FunctionsExternalDictionaries.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Functions/FunctionsExternalDictionaries.h b/dbms/src/Functions/FunctionsExternalDictionaries.h
index d2d5d8edd7e..f38d97255f2 100644
--- a/dbms/src/Functions/FunctionsExternalDictionaries.h
+++ b/dbms/src/Functions/FunctionsExternalDictionaries.h
@@ -1434,7 +1434,7 @@ private:
 
                     auto & hierarchy = hierarchies[i];
 
-                    //Checking for loop
+                    /// Checking for loop
                     if (std::find(std::begin(hierarchy), std::end(hierarchy), id) != std::end(hierarchy))
                         continue;
 

From 59e06c9a2c3bfad895536ee610830ce9d8ff2179 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Thu, 10 Aug 2017 06:28:28 +0300
Subject: [PATCH 094/281] Update iostream_debug_helpers.h

---
 libs/libcommon/include/common/iostream_debug_helpers.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/libs/libcommon/include/common/iostream_debug_helpers.h b/libs/libcommon/include/common/iostream_debug_helpers.h
index 92fefb9db33..fce65616fa0 100644
--- a/libs/libcommon/include/common/iostream_debug_helpers.h
+++ b/libs/libcommon/include/common/iostream_debug_helpers.h
@@ -124,14 +124,14 @@ std::ostream & operator<<(std::ostream & stream, const std::ratio<Num, Denom> &
 }
 
 #include <chrono>
-template <class clock, class duration>
+template <typename clock, typename duration>
 std::ostream & operator<<(std::ostream & stream, const std::chrono::duration<clock, duration> & what)
 {
     stream << "chrono::duration<clock=" << clock() << ", duration=" << duration() << ">{" << what.count() << "}";
     return stream;
 }
 
-template <class clock, class duration>
+template <typename clock, typename duration>
 std::ostream & operator<<(std::ostream & stream, const std::chrono::time_point<clock, duration> & what)
 {
     stream << "chrono::time_point{" << what.time_since_epoch() << "}";
@@ -141,7 +141,7 @@ std::ostream & operator<<(std::ostream & stream, const std::chrono::time_point<c
 
 #include <memory>
 
-template <class T>
+template <typename T>
 std::ostream & operator<<(std::ostream & stream, const std::shared_ptr<T> & what)
 {
     stream << "shared_ptr(use_count = " << what.use_count() << ") {";
@@ -153,7 +153,7 @@ std::ostream & operator<<(std::ostream & stream, const std::shared_ptr<T> & what
     return stream;
 }
 
-template <class T>
+template <typename T>
 std::ostream & operator<<(std::ostream & stream, const std::unique_ptr<T> & what)
 {
     stream << "unique_ptr {";
@@ -168,7 +168,7 @@ std::ostream & operator<<(std::ostream & stream, const std::unique_ptr<T> & what
 
 #include <experimental/optional>
 
-template <class T>
+template <typename T>
 std::ostream & operator<<(std::ostream & stream, const std::experimental::optional<T> & what)
 {
     stream << "optional{";

From bf77c602de680089310b50233926594e25677892 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Thu, 10 Aug 2017 06:44:34 +0300
Subject: [PATCH 095/281] Update Settings.h

---
 dbms/src/Interpreters/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h
index 60ff6e49b13..5d7d5d8173a 100644
--- a/dbms/src/Interpreters/Settings.h
+++ b/dbms/src/Interpreters/Settings.h
@@ -290,7 +290,7 @@ struct Settings
     \
     /** If setting is enabled, insert query into distributed waits until data will be sent to all nodes in cluster. \
      */ \
-    M(SettingBool, insert_distributed_sync, 0) \
+    M(SettingBool, insert_distributed_sync, false) \
     /** Timeout for insert query into distributed. Setting is used only with insert_distributed_sync enabled. \
      *  Zero value means no timeout. \
      */ \

From 577383297941456d1b801eb34b2188b6f23084fe Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Thu, 10 Aug 2017 06:45:57 +0300
Subject: [PATCH 096/281] Update DirectoryMonitor.cpp

---
 dbms/src/Storages/Distributed/DirectoryMonitor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Storages/Distributed/DirectoryMonitor.cpp b/dbms/src/Storages/Distributed/DirectoryMonitor.cpp
index 07bed4a6cf4..b531953cde2 100644
--- a/dbms/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/dbms/src/Storages/Distributed/DirectoryMonitor.cpp
@@ -86,7 +86,7 @@ namespace
 }
 
 
-StorageDistributedDirectoryMonitor::StorageDistributedDirectoryMonitor(StorageDistributed & storage, const std::string & name, ConnectionPoolPtr pool)
+StorageDistributedDirectoryMonitor::StorageDistributedDirectoryMonitor(StorageDistributed & storage, const std::string & name, const ConnectionPoolPtr & pool)
     : storage(storage), pool{pool}, path{storage.path + name + '/'}
     , current_batch_file_path{path + "current_batch.txt"}
     , default_sleep_time{storage.context.getSettingsRef().distributed_directory_monitor_sleep_time_ms.totalMilliseconds()}

From c665c4c4e55acf2e63abd6b4b961374ad29d7d10 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Thu, 10 Aug 2017 06:46:16 +0300
Subject: [PATCH 097/281] Update DirectoryMonitor.h

---
 dbms/src/Storages/Distributed/DirectoryMonitor.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Storages/Distributed/DirectoryMonitor.h b/dbms/src/Storages/Distributed/DirectoryMonitor.h
index 0b556fdbbfd..cb7ff07a5ef 100644
--- a/dbms/src/Storages/Distributed/DirectoryMonitor.h
+++ b/dbms/src/Storages/Distributed/DirectoryMonitor.h
@@ -16,7 +16,7 @@ namespace DB
 class StorageDistributedDirectoryMonitor
 {
 public:
-    StorageDistributedDirectoryMonitor(StorageDistributed & storage, const std::string & name, ConnectionPoolPtr pool);
+    StorageDistributedDirectoryMonitor(StorageDistributed & storage, const std::string & name, const ConnectionPoolPtr & pool);
     ~StorageDistributedDirectoryMonitor();
 
     static ConnectionPoolPtr createPool(const std::string & name, const StorageDistributed & storage);

From 57a9d22b0cfa3b238772f1ea8ace18dad3896e35 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Thu, 10 Aug 2017 06:49:03 +0300
Subject: [PATCH 098/281] Update DistributedBlockOutputStream.cpp

---
 dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
index 994ed9e4c7d..7b9e09ecf4d 100644
--- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
+++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
@@ -223,7 +223,7 @@ void DistributedBlockOutputStream::writeSync(const Block & block)
         if (exception)
             std::rethrow_exception(exception);
     }
-    catch(Exception & exception)
+    catch (Exception & exception)
     {
         exception.addMessage(getCurrentStateDescription(done_jobs, finished_local_nodes_count));
         throw;

From 0285f99c4efa8c2da19590076385d03e3d3b32dd Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Thu, 10 Aug 2017 06:54:02 +0300
Subject: [PATCH 099/281] Update DistributedBlockOutputStream.cpp

---
 dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
index 7b9e09ecf4d..4758b1c48e0 100644
--- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
+++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
@@ -88,7 +88,7 @@ ThreadPool::Job DistributedBlockOutputStream::createWritingJob(
         if (!current_memory_tracker)
         {
             current_memory_tracker = memory_tracker;
-            setThreadName("DistributedBlockOutputStreamProc");
+            setThreadName("DistrOutStrProc");
         }
         try
         {

From 2717c94a363199461bc39fd5910c6381cbeb602b Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Thu, 10 Aug 2017 07:05:16 +0300
Subject: [PATCH 100/281] Update DistributedBlockOutputStream.cpp

---
 .../Storages/Distributed/DistributedBlockOutputStream.cpp    | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
index 4758b1c48e0..a6afe07997d 100644
--- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
+++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
@@ -147,7 +147,10 @@ std::string DistributedBlockOutputStream::getCurrentStateDescription(
         }
 
         for (const auto & dir_name : shard_info.dir_names)
-            writeDescription(dir_name, shard_id, blocks_inserted + (done_jobs[job_id++] ? 1 : 0));
+        {
+            writeDescription(dir_name, shard_id, blocks_inserted + (done_jobs[job_id] ? 1 : 0));
+            ++job_id;
+        }
     }
 
     return description;

From 0282d516a4569788a1b4f839a751fde89ab673ef Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Thu, 10 Aug 2017 07:10:36 +0300
Subject: [PATCH 101/281] Update DistributedBlockOutputStream.cpp

---
 .../Storages/Distributed/DistributedBlockOutputStream.cpp  | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
index a6afe07997d..a957f1ac630 100644
--- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
+++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
@@ -183,9 +183,14 @@ void DistributedBlockOutputStream::writeSync(const Block & block)
 
     size_t job_id = 0;
     for (size_t shard_id : ext::range(0, blocks.size()))
+    {
         for (size_t replica_id: ext::range(0, shards_info[shard_id].dir_names.size()))
+        {
             pool->schedule(createWritingJob(done_jobs, finished_jobs_count, cond_var,
-                                            blocks[shard_id], job_id++, shards_info[shard_id], replica_id));
+                                            blocks[shard_id], job_id, shards_info[shard_id], replica_id));
+            ++job_id;
+        }
+    }
 
     const size_t jobs_count = job_id;
     size_t finished_local_nodes_count;

From 4398d71ef5e6fc5d42fd68fd1c075fce6a2af201 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Thu, 10 Aug 2017 07:37:35 +0300
Subject: [PATCH 102/281] Update StorageDistributed.h

---
 dbms/src/Storages/StorageDistributed.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Storages/StorageDistributed.h b/dbms/src/Storages/StorageDistributed.h
index de79c4ed1da..d9e17b3d644 100644
--- a/dbms/src/Storages/StorageDistributed.h
+++ b/dbms/src/Storages/StorageDistributed.h
@@ -118,7 +118,7 @@ private:
 
     /// create directory monitors for each existing subdirectory
     void createDirectoryMonitors();
-    /// ensure directory monitor thread by subdirectory name creation
+    /// ensure directory monitor thread creation by subdirectory name
     void requireDirectoryMonitor(const std::string & name);
     /// ensure connection pool creation and return it
     ConnectionPoolPtr requireConnectionPool(const std::string & name);

From c62901a871ff0455aebb5674090ec713912f3493 Mon Sep 17 00:00:00 2001
From: proller <proller@users.noreply.github.com>
Date: Thu, 10 Aug 2017 07:44:28 +0300
Subject: [PATCH 103/281] =?UTF-8?q?Internal=20compiler:=20first=20try=20fi?=
 =?UTF-8?q?nd=20and=20use=20clang=20with=20version=20in=20postf=E2=80=A6?=
 =?UTF-8?q?=20(#1029)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Fix missing includes

* clean

* Allow compile with clang 3.8 with warning

* Add message

* Internal compiler: first try find and use clang vith version in postfix (clang-4.0)

* Try use clang-3.8

* try use all clangs

* Update copy_clang_binaries.sh

* Update rules
---
 debian/copy_clang_binaries.sh | 5 +++--
 debian/rules                  | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/debian/copy_clang_binaries.sh b/debian/copy_clang_binaries.sh
index 6d6d46dce83..1bed815c1a9 100755
--- a/debian/copy_clang_binaries.sh
+++ b/debian/copy_clang_binaries.sh
@@ -7,6 +7,7 @@ DST=${1:-.};
 PATH="/usr/local/bin:/usr/local/sbin:/usr/bin:$PATH"
 LD=$(command -v gold || command -v ld.gold || command -v ld)
 
+# Should be runned with correct path to clang
 if [ -z "$CLANG" ]; then
     CLANG=$(which clang)
 fi
@@ -21,8 +22,8 @@ if [ ! -x "$LD" ]; then
     exit 1
 fi
 
-cp "$CLANG" $DST
-cp "$LD" ${DST}/ld
+cp "$CLANG" "${DST}/clang"
+cp "$LD" "${DST}/ld"
 
 STDCPP=$(ldd $CLANG | grep -oE '/[^ ]+libstdc++[^ ]+')
 
diff --git a/debian/rules b/debian/rules
index 76fd2afebf3..ae7875f6b83 100755
--- a/debian/rules
+++ b/debian/rules
@@ -21,7 +21,8 @@ DEB_HOST_MULTIARCH ?= $(shell dpkg-architecture -qDEB_HOST_MULTIARCH)
 
 DEB_CC ?= gcc-6
 DEB_CXX ?= g++-6
-DEB_CLANG ?= $(shell which clang)
+DEB_CLANG ?= $(shell which clang-6.0 || which clang-5.0 || which clang-4.0 || which clang || which clang-3.9 || which clang-3.8)
+# CMAKE_FLAGS_ADD += -DINTERNAL_COMPILER_EXECUTABLE=$(basename $(DEB_CLANG)) # TODO: this is actual only if you will also change clang name in copy_clang_binaries.sh
 
 DEB_BUILD_GNU_TYPE := $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE)
 DEB_HOST_GNU_TYPE  := $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE)

From 101302b56c428f1b4df743f2a92ea75dea1c73db Mon Sep 17 00:00:00 2001
From: proller <proller@users.noreply.github.com>
Date: Thu, 10 Aug 2017 20:12:52 +0300
Subject: [PATCH 104/281] Cmake: always link ltdl for internal PocoDataODBC
 (#1089)

* Cmake: always link ltdl for internal PocoDataODBC
---
 cmake/find_poco.cmake | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/cmake/find_poco.cmake b/cmake/find_poco.cmake
index d8e9aecdbef..cd14b68fa4a 100644
--- a/cmake/find_poco.cmake
+++ b/cmake/find_poco.cmake
@@ -1,9 +1,5 @@
 option (USE_INTERNAL_POCO_LIBRARY "Set to FALSE to use system poco library instead of bundled" ${NOT_UNBUNDLED})
 
-if (USE_STATIC_LIBRARIES)
-    include (cmake/find_ltdl.cmake)
-endif ()
-
 if (NOT USE_INTERNAL_POCO_LIBRARY)
     find_package (Poco COMPONENTS Net NetSSL XML Data Crypto DataODBC MongoDB)
 endif ()
@@ -14,6 +10,7 @@ else ()
 
     set (USE_INTERNAL_POCO_LIBRARY 1)
 
+    include (${ClickHouse_SOURCE_DIR}/cmake/find_ltdl.cmake)
     include (${ClickHouse_SOURCE_DIR}/contrib/libpoco/cmake/FindODBC.cmake)
 
     list (APPEND Poco_INCLUDE_DIRS
@@ -33,9 +30,7 @@ else ()
     if (ODBC_FOUND)
         set (Poco_DataODBC_FOUND 1)
         set (Poco_DataODBC_LIBRARY PocoDataODBC)
-        if (USE_STATIC_LIBRARIES)
-            list (APPEND Poco_DataODBC_LIBRARY ${LTDL_LIB})
-        endif ()
+        list (APPEND Poco_DataODBC_LIBRARY ${LTDL_LIB})
         list (APPEND Poco_INCLUDE_DIRS "${ClickHouse_SOURCE_DIR}/contrib/libpoco/Data/ODBC/include/")
     endif ()
 

From 2344fe7daef14ea4003df02242ccc3c018a3c7b7 Mon Sep 17 00:00:00 2001
From: proller <proller@users.noreply.github.com>
Date: Thu, 10 Aug 2017 20:51:45 +0300
Subject: [PATCH 105/281] Cmake: fix tests includes (#1090)

Cmake: fix tests includes
---
 dbms/src/Interpreters/tests/CMakeLists.txt | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/dbms/src/Interpreters/tests/CMakeLists.txt b/dbms/src/Interpreters/tests/CMakeLists.txt
index fd9d7afe0f1..233b4a8e5b7 100644
--- a/dbms/src/Interpreters/tests/CMakeLists.txt
+++ b/dbms/src/Interpreters/tests/CMakeLists.txt
@@ -11,15 +11,18 @@ add_executable (aggregate aggregate.cpp)
 target_link_libraries (aggregate dbms)
 
 add_executable (hash_map hash_map.cpp)
+target_include_directories (hash_map BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
 target_link_libraries (hash_map dbms)
 
 add_executable (hash_map2 hash_map2.cpp)
+target_include_directories (hash_map2 BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
 target_link_libraries (hash_map2 dbms)
 
 add_executable (hash_map3 hash_map3.cpp)
 target_link_libraries (hash_map3 dbms)
 
 add_executable (hash_map_string hash_map_string.cpp)
+target_include_directories (hash_map_string BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
 target_link_libraries (hash_map_string dbms)
 
 add_executable (hash_map_string_2 hash_map_string_2.cpp)
@@ -31,9 +34,11 @@ target_include_directories (hash_map_string_3 BEFORE PRIVATE ${ClickHouse_SOURCE
 target_include_directories (hash_map_string_3 BEFORE PRIVATE ${ClickHouse_SOURCE_DIR}/contrib/libmetrohash/src)
 
 add_executable (hash_map_string_small hash_map_string_small.cpp)
+target_include_directories (hash_map_string_small BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
 target_link_libraries (hash_map_string_small dbms)
 
 add_executable (two_level_hash_map two_level_hash_map.cpp)
+target_include_directories (two_level_hash_map BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
 target_link_libraries (two_level_hash_map dbms)
 
 add_executable (compiler_test compiler_test.cpp)

From a5f4815d4ea25349cc9fc19cd66d834f9632f1ca Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 10 Aug 2017 22:00:05 +0300
Subject: [PATCH 106/281] Fixed error in doc (tnx. feriat) [#CLICKHOUSE-2].

---
 docs/en/interfaces/cli.rst | 2 +-
 docs/ru/interfaces/cli.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/interfaces/cli.rst b/docs/en/interfaces/cli.rst
index d6525e350c4..b7a34b04f30 100644
--- a/docs/en/interfaces/cli.rst
+++ b/docs/en/interfaces/cli.rst
@@ -43,7 +43,7 @@ By default, files are searched for in this order:
 .. code-block:: text
 
     ./clickhouse-client.xml
-    ~/./clickhouse-client/config.xml
+    ~/.clickhouse-client/config.xml
     /etc/clickhouse-client/config.xml
 
 Settings are only taken from the first file found.
diff --git a/docs/ru/interfaces/cli.rst b/docs/ru/interfaces/cli.rst
index cd51b2c7ea9..d2df0e88738 100644
--- a/docs/ru/interfaces/cli.rst
+++ b/docs/ru/interfaces/cli.rst
@@ -41,7 +41,7 @@
 ``--config-file`` - имя конфигурационного файла, в котором есть дополнительные настройки или изменены умолчания для настроек, указанных выше.
 По умолчанию, ищутся файлы в следующем порядке:
 ./clickhouse-client.xml
-~/./clickhouse-client/config.xml
+~/.clickhouse-client/config.xml
 /etc/clickhouse-client/config.xml
 Настройки берутся только из первого найденного файла.
 

From e6739cc35d6aa30f6939146afc3e13b761c8acc5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 10 Aug 2017 22:41:21 +0300
Subject: [PATCH 107/281] Whitespace [#CLICKHOUSE-2].

---
 dbms/src/Storages/StorageFactory.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/dbms/src/Storages/StorageFactory.cpp b/dbms/src/Storages/StorageFactory.cpp
index 2bd1ca7ab0e..e33e93605c7 100644
--- a/dbms/src/Storages/StorageFactory.cpp
+++ b/dbms/src/Storages/StorageFactory.cpp
@@ -290,7 +290,6 @@ StoragePtr StorageFactory::get(
     }
     else if (name == "Dictionary")
     {
-
         return StorageDictionary::create(
             table_name, context, query, columns,
             materialized_columns, alias_columns, column_defaults);

From 351a0905d509f803873ef17f2a431a4c67f2aeb2 Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Thu, 10 Aug 2017 00:09:44 +0300
Subject: [PATCH 108/281] Implemented fast block and parts cleaning.
 [#CLICKHOUSE-3207]

---
 dbms/src/Common/ZooKeeper/Types.h             |   2 +-
 dbms/src/Common/ZooKeeper/ZooKeeper.cpp       |  65 ++++++-
 dbms/src/Common/ZooKeeper/ZooKeeper.h         |  22 ++-
 .../src/Common/ZooKeeper/tests/CMakeLists.txt |   2 +-
 .../tests/zkutil_test_multi_exception.cpp     |  65 +++++--
 .../ReplicatedMergeTreeCleanupThread.cpp      | 171 ++++++++++++++----
 .../ReplicatedMergeTreeCleanupThread.h        |  14 +-
 .../Storages/StorageReplicatedMergeTree.cpp   |  87 +++++----
 .../src/Storages/StorageReplicatedMergeTree.h |   5 +-
 9 files changed, 323 insertions(+), 110 deletions(-)

diff --git a/dbms/src/Common/ZooKeeper/Types.h b/dbms/src/Common/ZooKeeper/Types.h
index 138c5474c41..72b6e2852a4 100644
--- a/dbms/src/Common/ZooKeeper/Types.h
+++ b/dbms/src/Common/ZooKeeper/Types.h
@@ -11,7 +11,7 @@ namespace zkutil
 {
 
 using ACLPtr = const ACL_vector *;
-using Stat = Stat;
+using Stat = ::Stat;
 
 struct Op
 {
diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp
index 11bf5cc45bd..4b9ca2f6805 100644
--- a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp
@@ -555,7 +555,7 @@ int32_t ZooKeeper::multiImpl(const Ops & ops_, OpResultsPtr * out_results_)
     for (const auto & op : ops_)
         ops.push_back(*(op->data));
 
-    int32_t code = zoo_multi(impl, ops.size(), ops.data(), out_results->data());
+    int32_t code = zoo_multi(impl, static_cast<int>(ops.size()), ops.data(), out_results->data());
     ProfileEvents::increment(ProfileEvents::ZooKeeperMulti);
     ProfileEvents::increment(ProfileEvents::ZooKeeperTransactions);
 
@@ -612,15 +612,13 @@ int32_t ZooKeeper::tryMultiWithRetries(const Ops & ops, OpResultsPtr * out_resul
     return code;
 }
 
-static const int BATCH_SIZE = 100;
-
 void ZooKeeper::removeChildrenRecursive(const std::string & path)
 {
     Strings children = getChildren(path);
     while (!children.empty())
     {
         zkutil::Ops ops;
-        for (size_t i = 0; i < BATCH_SIZE && !children.empty(); ++i)
+        for (size_t i = 0; i < MULTI_BATCH_SIZE && !children.empty(); ++i)
         {
             removeChildrenRecursive(path + "/" + children.back());
             ops.emplace_back(std::make_unique<Op::Remove>(path + "/" + children.back(), -1));
@@ -639,7 +637,7 @@ void ZooKeeper::tryRemoveChildrenRecursive(const std::string & path)
     {
         zkutil::Ops ops;
         Strings batch;
-        for (size_t i = 0; i < BATCH_SIZE && !children.empty(); ++i)
+        for (size_t i = 0; i < MULTI_BATCH_SIZE && !children.empty(); ++i)
         {
             batch.push_back(path + "/" + children.back());
             children.pop_back();
@@ -904,4 +902,61 @@ ZooKeeper::RemoveFuture ZooKeeper::asyncRemove(const std::string & path)
     return future;
 }
 
+ZooKeeper::MultiFuture ZooKeeper::asyncMultiImpl(const zkutil::Ops & ops_, bool throw_exception)
+{
+    size_t count = ops_.size();
+    OpResultsPtr results(new OpResults(count));
+
+    MultiFuture future{ [throw_exception, results] (int rc) {
+        OpResultsAndCode res;
+        res.code = rc;
+        res.results = results;
+        if (throw_exception && rc != ZOK)
+            throw zkutil::KeeperException(rc);
+        return res;
+    }};
+
+    if (ops_.empty())
+    {
+        (**future.task)(ZOK);
+        return future;
+    }
+
+    /// Workaround of the libzookeeper bug.
+    /// TODO: check if the bug is fixed in the latest version of libzookeeper.
+    if (expired())
+        throw KeeperException(ZINVALIDSTATE);
+
+    /// There is no need to hold these ops until the end of the passed callback
+    std::vector<zoo_op_t> ops;
+    for (const auto & op : ops_)
+        ops.push_back(*(op->data));
+
+    int32_t code = zoo_amulti(impl, static_cast<int>(ops.size()), ops.data(), results->data(),
+                              [] (int rc, const void * data)
+                              {
+                                  MultiFuture::TaskPtr owned_task =
+                                          std::move(const_cast<MultiFuture::TaskPtr &>(*static_cast<const MultiFuture::TaskPtr *>(data)));
+                                  (*owned_task)(rc);
+                              }, future.task.get());
+
+    ProfileEvents::increment(ProfileEvents::ZooKeeperMulti);
+    ProfileEvents::increment(ProfileEvents::ZooKeeperTransactions);
+
+    if (code != ZOK)
+        throw KeeperException(code);
+
+    return future;
+}
+
+ZooKeeper::MultiFuture ZooKeeper::tryAsyncMulti(const zkutil::Ops & ops)
+{
+    return asyncMultiImpl(ops, false);
+}
+
+ZooKeeper::MultiFuture ZooKeeper::asyncMulti(const zkutil::Ops & ops)
+{
+    return asyncMultiImpl(ops, true);
+}
+
 }
diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.h b/dbms/src/Common/ZooKeeper/ZooKeeper.h
index 80328aa59c8..ba75e372bc8 100644
--- a/dbms/src/Common/ZooKeeper/ZooKeeper.h
+++ b/dbms/src/Common/ZooKeeper/ZooKeeper.h
@@ -31,6 +31,9 @@ const UInt32 DEFAULT_SESSION_TIMEOUT = 30000;
 const UInt32 MEDIUM_SESSION_TIMEOUT = 120000;
 const UInt32 BIG_SESSION_TIMEOUT = 600000;
 
+/// Preferred size of multi() command (in number of ops)
+constexpr size_t MULTI_BATCH_SIZE = 100;
+
 struct WatchContext;
 
 
@@ -46,7 +49,7 @@ struct WatchContext;
 /// Modifying methods do not retry, because it leads to problems of the double-delete type.
 ///
 /// Methods with names not starting at try- raise KeeperException on any error.
- class ZooKeeper
+class ZooKeeper
 {
 public:
     using Ptr = std::shared_ptr<ZooKeeper>;
@@ -241,7 +244,7 @@ public:
         /// The caller is responsible for ensuring that the context lives until the callback
         /// is finished and we can't simply pass ownership of the context into function object.
         /// Instead, we save the context in a Future object and return it to the caller.
-        /// The cantext will live until the Future lives.
+        /// The context will live until the Future lives.
         /// Context data is wrapped in an unique_ptr so that its address (which is passed to
         /// libzookeeper) remains unchanged after the Future is returned from the function.
         ///
@@ -320,6 +323,19 @@ public:
     RemoveFuture asyncRemove(const std::string & path);
 
 
+    struct OpResultsAndCode
+    {
+        OpResultsPtr results;
+        Ops ops;
+        int code;
+    };
+
+    using MultiFuture = Future<OpResultsAndCode, int>;
+    MultiFuture asyncMulti(const zkutil::Ops & ops);
+    /// Like the previous one but don't throw any exceptions on future.get()
+    MultiFuture tryAsyncMulti(const zkutil::Ops & ops);
+
+
     static std::string error2string(int32_t code);
 
     /// Max size of node contents in bytes.
@@ -378,6 +394,8 @@ private:
     int32_t multiImpl(const Ops & ops, OpResultsPtr * out_results = nullptr);
     int32_t existsImpl(const std::string & path, Stat * stat_, WatchCallback watch_callback);
 
+    MultiFuture asyncMultiImpl(const zkutil::Ops & ops_, bool throw_exception);
+
     std::string hosts;
     int32_t session_timeout_ms;
 
diff --git a/dbms/src/Common/ZooKeeper/tests/CMakeLists.txt b/dbms/src/Common/ZooKeeper/tests/CMakeLists.txt
index 93409ca6f53..1a213c6c923 100644
--- a/dbms/src/Common/ZooKeeper/tests/CMakeLists.txt
+++ b/dbms/src/Common/ZooKeeper/tests/CMakeLists.txt
@@ -17,4 +17,4 @@ add_executable (zk_many_watches_reconnect zk_many_watches_reconnect.cpp)
 target_link_libraries (zk_many_watches_reconnect dbms)
 
 add_executable (zkutil_test_multi_exception zkutil_test_multi_exception.cpp)
-target_link_libraries (zkutil_test_multi_exception dbms)
+target_link_libraries (zkutil_test_multi_exception dbms gtest_main)
diff --git a/dbms/src/Common/ZooKeeper/tests/zkutil_test_multi_exception.cpp b/dbms/src/Common/ZooKeeper/tests/zkutil_test_multi_exception.cpp
index bcf5d6614fd..77af1db15d8 100644
--- a/dbms/src/Common/ZooKeeper/tests/zkutil_test_multi_exception.cpp
+++ b/dbms/src/Common/ZooKeeper/tests/zkutil_test_multi_exception.cpp
@@ -1,24 +1,27 @@
 #include <iostream>
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Common/Exception.h>
+#include <gtest/gtest.h>
 
 using namespace DB;
 
-int main()
+TEST(zkutil, multi_nice_exception_msg)
 {
     auto zookeeper = std::make_unique<zkutil::ZooKeeper>("localhost:2181");
 
-    try
-    {
-        auto acl = zookeeper->getDefaultACL();
-        zkutil::Ops ops;
+    auto acl = zookeeper->getDefaultACL();
+    zkutil::Ops ops;
 
+    ASSERT_NO_THROW(
         zookeeper->tryRemoveRecursive("/clickhouse_test_zkutil_multi");
 
         ops.emplace_back(new zkutil::Op::Create("/clickhouse_test_zkutil_multi", "_", acl, zkutil::CreateMode::Persistent));
         ops.emplace_back(new zkutil::Op::Create("/clickhouse_test_zkutil_multi/a", "_", acl, zkutil::CreateMode::Persistent));
         zookeeper->multi(ops);
+    );
 
+    try
+    {
         ops.clear();
         ops.emplace_back(new zkutil::Op::Create("/clickhouse_test_zkutil_multi/c", "_", acl, zkutil::CreateMode::Persistent));
         ops.emplace_back(new zkutil::Op::Remove("/clickhouse_test_zkutil_multi/c", -1));
@@ -27,6 +30,7 @@ int main()
         ops.emplace_back(new zkutil::Op::Create("/clickhouse_test_zkutil_multi/a", "_", acl, zkutil::CreateMode::Persistent));
 
         zookeeper->multi(ops);
+        FAIL();
     }
     catch (...)
     {
@@ -34,16 +38,47 @@ int main()
 
         String msg = getCurrentExceptionMessage(false);
 
-        if (msg.find("/clickhouse_test_zkutil_multi/a") == std::string::npos || msg.find("#2") == std::string::npos)
-        {
-            std::cerr << "Wrong: " << msg;
-            return -1;
-        }
+        bool msg_has_reqired_patterns = msg.find("/clickhouse_test_zkutil_multi/a") != std::string::npos && msg.find("#2") != std::string::npos;
+        EXPECT_TRUE(msg_has_reqired_patterns) << msg;
+    }
+}
 
-        std::cout << "Ok: " << msg;
-        return 0;
+
+TEST(zkutil, multi_async)
+{
+    auto zookeeper = std::make_unique<zkutil::ZooKeeper>("localhost:2181");
+    auto acl = zookeeper->getDefaultACL();
+    zkutil::Ops ops;
+
+    zookeeper->tryRemoveRecursive("/clickhouse_test_zkutil_multi");
+
+    {
+        ops.clear();
+        auto fut = zookeeper->asyncMulti(ops);
     }
 
-    std::cerr << "Unexpected";
-    return -1;
-}
+    {
+        ops.clear();
+        ops.emplace_back(new zkutil::Op::Create("/clickhouse_test_zkutil_multi", "", acl, zkutil::CreateMode::Persistent));
+        ops.emplace_back(new zkutil::Op::Create("/clickhouse_test_zkutil_multi/a", "", acl, zkutil::CreateMode::Persistent));
+
+        auto fut = zookeeper->tryAsyncMulti(ops);
+        ops.clear();
+
+        auto res = fut.get();
+        ASSERT_TRUE(res.code == ZOK);
+    }
+
+    {
+        ops.clear();
+        ops.emplace_back(new zkutil::Op::Create("/clickhouse_test_zkutil_multi", "_", acl, zkutil::CreateMode::Persistent));
+        ops.emplace_back(new zkutil::Op::Create("/clickhouse_test_zkutil_multi/a", "_", acl, zkutil::CreateMode::Persistent));
+
+        auto fut = zookeeper->tryAsyncMulti(ops);
+        ops.clear();
+
+        auto res = fut.get();
+        ASSERT_TRUE(res.code == ZNODEEXISTS);
+        ASSERT_EQ(res.results->size(), 2);
+    }
+}
\ No newline at end of file
diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp
index b1fc214a292..5435479eea7 100644
--- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp
+++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp
@@ -16,7 +16,8 @@ namespace ErrorCodes
 ReplicatedMergeTreeCleanupThread::ReplicatedMergeTreeCleanupThread(StorageReplicatedMergeTree & storage_)
     : storage(storage_),
     log(&Logger::get(storage.database_name + "." + storage.table_name + " (StorageReplicatedMergeTree, CleanupThread)")),
-    thread([this] { run(); }) {}
+    thread([this] { run(); }),
+    cached_block_stats(std::make_unique<NodesStatCache>()) {}
 
 
 void ReplicatedMergeTreeCleanupThread::run()
@@ -108,11 +109,117 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs()
     LOG_DEBUG(log, "Removed " << entries.size() << " old log entries: " << entries.front() << " - " << entries.back());
 }
 
+namespace
+{
+
+struct RequiredStat
+{
+    Int64 ctime;
+    int numChildren;
+
+    RequiredStat() = default;
+    RequiredStat(const RequiredStat &) = default;
+    explicit RequiredStat(const zkutil::Stat & s) : ctime(s.ctime), numChildren(s.numChildren) {};
+    explicit RequiredStat(Int64 ctime_) : ctime(ctime_) {}
+};
+
+}
+
+
+class ReplicatedMergeTreeCleanupThread::NodesStatCache : public std::map<String, RequiredStat>
+{
+};
+
+
+struct ReplicatedMergeTreeCleanupThread::NodeWithStat
+{
+    String node;
+    RequiredStat stat;
+
+    NodeWithStat() = default;
+    NodeWithStat(const String & node_, const RequiredStat & stat_) : node(node_), stat(stat_) {}
+
+    static bool greaterByTime (const NodeWithStat & lhs, const NodeWithStat & rhs)
+    {
+        return (lhs.stat.ctime != rhs.stat.ctime) ? lhs.stat.ctime > rhs.stat.ctime : lhs.node > rhs.node;
+    }
+};
+
 
 void ReplicatedMergeTreeCleanupThread::clearOldBlocks()
 {
     auto zookeeper = storage.getZooKeeper();
 
+    std::vector<NodeWithStat> timed_blocks;
+    getBlocksSortedByTime(zookeeper, timed_blocks);
+
+    if (timed_blocks.empty())
+        return;
+
+    /// Use ZooKeeper's first node (last according to time) timestamp as "current" time.
+    Int64 current_time = timed_blocks.front().stat.ctime;
+    Int64 time_threshold = std::max(0L, current_time - static_cast<Int64>(storage.data.settings.replicated_deduplication_window_seconds));
+    NodeWithStat block_threshold("", RequiredStat(time_threshold));
+
+    size_t current_deduplication_window = std::min(timed_blocks.size(), storage.data.settings.replicated_deduplication_window);
+    auto first_outdated_block_fixed_threshold = timed_blocks.begin() + current_deduplication_window;
+    auto first_outdated_block_time_threshold = std::upper_bound(timed_blocks.begin(), timed_blocks.end(), block_threshold, NodeWithStat::greaterByTime);
+    auto first_outdated_block = std::min(first_outdated_block_fixed_threshold, first_outdated_block_time_threshold);
+
+    /// TODO After about half a year, we could remain only multi op, because there will be no obsolete children nodes.
+    std::vector<zkutil::ZooKeeper::MultiFuture> multi_futures;
+    zkutil::Ops ops;
+    for (auto it = first_outdated_block; it != timed_blocks.end(); ++it)
+    {
+        String path = storage.zookeeper_path + "/blocks/" + it->node;
+
+        if (it->stat.numChildren == 0)
+        {
+            ops.emplace_back(new zkutil::Op::Remove(path, -1));
+            if (ops.size() >= zkutil::MULTI_BATCH_SIZE)
+            {
+                multi_futures.emplace_back(zookeeper->tryAsyncMulti(ops));
+                ops.clear();
+            }
+        }
+        else
+            zookeeper->removeRecursive(path);
+    }
+
+    if (!ops.empty())
+    {
+        multi_futures.emplace_back(zookeeper->tryAsyncMulti(ops));
+        ops.clear();
+    }
+
+    auto num_nodes_to_delete = timed_blocks.end() - first_outdated_block;
+    size_t num_nodes_not_deleted = 0;
+    int last_error_code = ZOK;
+
+    for (auto & future : multi_futures)
+    {
+        auto res = future.get();
+        if (res.code != ZOK)
+        {
+            num_nodes_not_deleted += res.results->size();
+            last_error_code = res.code;
+        }
+    }
+
+    if (num_nodes_not_deleted)
+    {
+        LOG_ERROR(log, "There was a problem with deleting " << num_nodes_not_deleted << " (from " << num_nodes_to_delete << ")"
+                       << " old blocks from ZooKeeper, error: " << zkutil::ZooKeeper::error2string(last_error_code));
+    }
+    else
+        LOG_TRACE(log, "Cleared " << num_nodes_to_delete << " old blocks from ZooKeeper");
+}
+
+
+void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(zkutil::ZooKeeperPtr & zookeeper, std::vector<NodeWithStat> & timed_blocks)
+{
+    timed_blocks.clear();
+
     Strings blocks;
     zkutil::Stat stat;
     if (ZOK != zookeeper->tryGetChildren(storage.zookeeper_path + "/blocks", blocks, &stat))
@@ -121,66 +228,54 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks()
     /// Clear already deleted blocks from the cache, cached_block_ctime should be subset of blocks
     {
         NameSet blocks_set(blocks.begin(), blocks.end());
-        for (auto it = cached_block_ctime.begin(); it != cached_block_ctime.end();)
+        for (auto it = cached_block_stats->begin(); it != cached_block_stats->end();)
         {
             if (!blocks_set.count(it->first))
-                it = cached_block_ctime.erase(it);
+                it = cached_block_stats->erase(it);
             else
                 ++it;
         }
     }
 
-    auto not_cached_blocks = stat.numChildren - cached_block_ctime.size();
+    auto not_cached_blocks = stat.numChildren - cached_block_stats->size();
     LOG_TRACE(log, "Checking " << stat.numChildren << " blocks  (" << not_cached_blocks << " are not cached)"
             << " to clear old ones from ZooKeeper. This might take several minutes.");
 
-    /// Time -> block hash from ZooKeeper (from node name)
-    using TimedBlock = std::pair<Int64, String>;
-    using TimedBlocksComparator = std::greater<TimedBlock>;
-    std::vector<TimedBlock> timed_blocks;
-
+    std::vector<std::pair<String, zkutil::ZooKeeper::ExistsFuture>> exists_futures;
     for (const String & block : blocks)
     {
-        auto it = cached_block_ctime.find(block);
-
-        if (it == cached_block_ctime.end())
+        auto it = cached_block_stats->find(block);
+        if (it == cached_block_stats->end())
         {
-            /// New block. Fetch its stat and put it into the cache
-            zkutil::Stat block_stat;
-            zookeeper->exists(storage.zookeeper_path + "/blocks/" + block, &block_stat);
-            cached_block_ctime.emplace(block, block_stat.ctime);
-            timed_blocks.emplace_back(block_stat.ctime, block);
+            /// New block. Fetch its stat stat asynchronously
+            exists_futures.emplace_back(block, zookeeper->asyncExists(storage.zookeeper_path + "/blocks/" + block));
         }
         else
         {
             /// Cached block
-            timed_blocks.emplace_back(it->second, block);
+            timed_blocks.emplace_back(block, it->second);
         }
     }
 
-    if (timed_blocks.empty())
-        return;
-
-    std::sort(timed_blocks.begin(), timed_blocks.end(), TimedBlocksComparator());
-
-    /// Use ZooKeeper's first node (last according to time) timestamp as "current" time.
-    Int64 current_time = timed_blocks.front().first;
-    Int64 time_threshold = std::max(0L, current_time - static_cast<Int64>(storage.data.settings.replicated_deduplication_window_seconds));
-    TimedBlock block_threshold(time_threshold, "");
-
-    size_t current_deduplication_window = std::min(timed_blocks.size(), storage.data.settings.replicated_deduplication_window);
-    auto first_outdated_block_fixed_threshold = timed_blocks.begin() + current_deduplication_window;
-    auto first_outdated_block_time_threshold = std::upper_bound(timed_blocks.begin(), timed_blocks.end(), block_threshold, TimedBlocksComparator());
-    auto first_outdated_block = std::min(first_outdated_block_fixed_threshold, first_outdated_block_time_threshold);
-
-    for (auto it = first_outdated_block; it != timed_blocks.end(); ++it)
+    /// Put fetched stats into the cache
+    for (auto & elem : exists_futures)
     {
-        /// TODO After about half a year, we could replace this to multi op, because there will be no obsolete children nodes.
-        zookeeper->removeRecursive(storage.zookeeper_path + "/blocks/" + it->second);
-        cached_block_ctime.erase(it->second);
+        zkutil::ZooKeeper::StatAndExists status = elem.second.get();
+        if (!status.exists)
+            throw zkutil::KeeperException("A block node was suddenly deleted", ZNONODE);
+
+        cached_block_stats->emplace(elem.first, status.stat);
+        timed_blocks.emplace_back(elem.first, RequiredStat(status.stat));
     }
 
-    LOG_TRACE(log, "Cleared " << timed_blocks.end() - first_outdated_block << " old blocks from ZooKeeper");
+    std::sort(timed_blocks.begin(), timed_blocks.end(), NodeWithStat::greaterByTime);
+}
+
+
+ReplicatedMergeTreeCleanupThread::~ReplicatedMergeTreeCleanupThread()
+{
+    if (thread.joinable())
+        thread.join();
 }
 
 }
diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h
index 38f43595b40..9edff46c3a6 100644
--- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h
+++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Core/Types.h>
+#include <Common/ZooKeeper/Types.h>
 #include <common/logger_useful.h>
 #include <thread>
 #include <map>
@@ -19,11 +20,7 @@ class ReplicatedMergeTreeCleanupThread
 public:
     ReplicatedMergeTreeCleanupThread(StorageReplicatedMergeTree & storage_);
 
-    ~ReplicatedMergeTreeCleanupThread()
-    {
-        if (thread.joinable())
-            thread.join();
-    }
+    ~ReplicatedMergeTreeCleanupThread();
 
 private:
     StorageReplicatedMergeTree & storage;
@@ -39,7 +36,12 @@ private:
     /// Remove old block hashes from ZooKeeper. This makes a leading replica.
     void clearOldBlocks();
 
-    std::map<String, Int64> cached_block_ctime;
+    class NodesStatCache;
+    struct NodeWithStat;
+    std::unique_ptr<NodesStatCache> cached_block_stats;
+
+    /// Returns list of blocks with stat sorted by ctime
+    void getBlocksSortedByTime(std::shared_ptr<zkutil::ZooKeeper> & zookeeper, std::vector<NodeWithStat> & timed_blocks);
 
     /// TODO Removing old quorum/failed_parts
     /// TODO Removing old nonincrement_block_numbers
diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
index ea3532893e4..13074932363 100644
--- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
@@ -833,13 +833,11 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
     }
 
     /// Remove from ZK information about the parts covered by the newly added ones.
-    for (const String & name : expected_parts)
     {
-        LOG_ERROR(log, "Removing unexpectedly merged local part from ZooKeeper: " << name);
+        for (const String & name : expected_parts)
+            LOG_ERROR(log, "Removing unexpectedly merged local part from ZooKeeper: " << name);
 
-        zkutil::Ops ops;
-        removePossiblyIncompletePartNodeFromZooKeeper(name, ops, zookeeper);
-        zookeeper->multi(ops);
+        removePartsFromZooKeeper(zookeeper, Strings(expected_parts.begin(), expected_parts.end()));
     }
 
     /// Add to the queue job to pick up the missing parts from other replicas and remove from ZK the information that we have them.
@@ -855,7 +853,7 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
 
         /// We assume that this occurs before the queue is loaded (queue.initialize).
         zkutil::Ops ops;
-        removePossiblyIncompletePartNodeFromZooKeeper(name, ops, zookeeper);
+        removePartFromZooKeeper(name, ops);
         ops.emplace_back(std::make_unique<zkutil::Op::Create>(
             replica_path + "/queue/queue-", log_entry.toString(), zookeeper->getDefaultACL(), zkutil::CreateMode::PersistentSequential));
         zookeeper->multi(ops);
@@ -1879,25 +1877,6 @@ void StorageReplicatedMergeTree::removePartFromZooKeeper(const String & part_nam
 }
 
 
-/// Workarond for known ZooKeeper problem, see CLICKHOUSE-3040 and ZOOKEEPER-2362
-/// Multi operation was non-atomic on special wrongly-patched version of ZooKeeper
-/// (occasionally used in AdFox) in case of exceeded quota.
-void StorageReplicatedMergeTree::removePossiblyIncompletePartNodeFromZooKeeper(const String & part_name, zkutil::Ops & ops, const zkutil::ZooKeeperPtr & zookeeper)
-{
-    String part_path = replica_path + "/parts/" + part_name;
-    Names children_ = zookeeper->getChildren(part_path);
-    NameSet children(children_.begin(), children_.end());
-
-    if (children.size() != 2)
-        LOG_WARNING(log, "Will remove incomplete part node " << part_path << " from ZooKeeper");
-
-    if (children.count("checksums"))
-        ops.emplace_back(std::make_unique<zkutil::Op::Remove>(part_path + "/checksums", -1));
-    if (children.count("columns"))
-        ops.emplace_back(std::make_unique<zkutil::Op::Remove>(part_path + "/columns", -1));
-    ops.emplace_back(std::make_unique<zkutil::Op::Remove>(part_path, -1));
-}
-
 void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_name)
 {
     auto zookeeper = getZooKeeper();
@@ -3812,26 +3791,27 @@ void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZK(Logger * log_)
 
     try
     {
+        LOG_DEBUG(log, "Removing " << parts.size() << " old parts from file system");
+
+        Strings part_names;
         while (!parts.empty())
         {
             MergeTreeData::DataPartPtr & part = parts.back();
-
-            LOG_DEBUG(log, "Removing " << part->name);
-
-            try
-            {
-                zkutil::Ops ops;
-                removePossiblyIncompletePartNodeFromZooKeeper(part->name, ops, zookeeper);
-                zookeeper->multi(ops);
-            }
-            catch (const zkutil::KeeperException & e)
-            {
-                LOG_WARNING(log, "Couldn't remove " << part->name << " from ZooKeeper: " << zkutil::ZooKeeper::error2string(e.code));
-            }
-
             part->remove();
+            part_names.emplace_back(part->name);
             parts.pop_back();
         }
+
+        LOG_DEBUG(log, "Removed " << part_names.size() << " old parts from file system. Removing them from ZooKeeper.");
+
+        try
+        {
+            removePartsFromZooKeeper(zookeeper, part_names);
+        }
+        catch (const zkutil::KeeperException & e)
+        {
+            LOG_ERROR(log, "There is a problem with deleting parts from ZooKeeper: " << getCurrentExceptionMessage(false));
+        }
     }
     catch (...)
     {
@@ -3844,4 +3824,33 @@ void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZK(Logger * log_)
 }
 
 
+void StorageReplicatedMergeTree::removePartsFromZooKeeper(zkutil::ZooKeeperPtr & zookeeper, const Strings & part_names)
+{
+    zkutil::Ops ops;
+    std::vector<zkutil::ZooKeeper::MultiFuture> futures;
+
+    for (auto it = part_names.cbegin(); it != part_names.cend(); ++it)
+    {
+        removePartFromZooKeeper(*it, ops);
+
+        if (ops.size() >= zkutil::MULTI_BATCH_SIZE || next(it) == part_names.cend())
+        {
+            futures.emplace_back(zookeeper->tryAsyncMulti(ops));
+            ops.clear();
+        }
+    }
+
+    int last_error_code = ZOK;
+    for (auto & future : futures)
+    {
+        auto res = future.get();
+        if (res.code != ZOK)
+            last_error_code = res.code;
+    }
+
+    if (last_error_code != ZOK)
+        throw zkutil::KeeperException(last_error_code);
+}
+
+
 }
diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h
index 2cc514db494..5dd3471384f 100644
--- a/dbms/src/Storages/StorageReplicatedMergeTree.h
+++ b/dbms/src/Storages/StorageReplicatedMergeTree.h
@@ -374,9 +374,8 @@ private:
     /// Adds actions to `ops` that remove a part from ZooKeeper.
     void removePartFromZooKeeper(const String & part_name, zkutil::Ops & ops);
 
-    /// Like removePartFromZooKeeper, but handles absence of some nodes and remove other nodes anyway, see CLICKHOUSE-3040
-    /// Use it only in non-critical places for cleaning.
-    void removePossiblyIncompletePartNodeFromZooKeeper(const String & part_name, zkutil::Ops & ops, const zkutil::ZooKeeperPtr & zookeeper);
+    /// Quickly removes big set of parts from ZooKeeper (using async multi queries)
+    void removePartsFromZooKeeper(zkutil::ZooKeeperPtr & zookeeper, const Strings & part_names);
 
     /// Removes a part from ZooKeeper and adds a task to the queue to download it. It is supposed to do this with broken parts.
     void removePartAndEnqueueFetch(const String & part_name);

From 23af9ddd5e37a7160e1d0015643443395abf5076 Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Thu, 10 Aug 2017 02:07:56 +0300
Subject: [PATCH 109/281] Fixed segfault: the future owns source ops.
 [#CLICKHOUSE-3207]

---
 dbms/src/Common/ZooKeeper/Types.h             | 41 +++++++++++++++----
 dbms/src/Common/ZooKeeper/ZooKeeper.cpp       | 25 +++++++----
 dbms/src/Common/ZooKeeper/ZooKeeper.h         |  6 +--
 .../tests/zkutil_test_multi_exception.cpp     | 29 ++++++++++++-
 4 files changed, 83 insertions(+), 18 deletions(-)

diff --git a/dbms/src/Common/ZooKeeper/Types.h b/dbms/src/Common/ZooKeeper/Types.h
index 72b6e2852a4..a30e4715bc5 100644
--- a/dbms/src/Common/ZooKeeper/Types.h
+++ b/dbms/src/Common/ZooKeeper/Types.h
@@ -19,6 +19,8 @@ public:
     Op() : data(new zoo_op_t) {}
     virtual ~Op() {}
 
+    virtual std::unique_ptr<Op> clone() const = 0;
+
     virtual std::string describe() = 0;
 
     std::unique_ptr<zoo_op_t> data;
@@ -31,21 +33,32 @@ public:
 
 struct Op::Remove : public Op
 {
-    Remove(const std::string & path_, int32_t version) :
-        path(path_)
+    Remove(const std::string & path_, int32_t version_) :
+        path(path_), version(version_)
     {
         zoo_delete_op_init(data.get(), path.c_str(), version);
     }
 
+    std::unique_ptr<Op> clone() const override
+    {
+        return std::unique_ptr<zkutil::Op>(new Remove(path, version));
+    }
+
     std::string describe() override { return "command: remove, path: " + path; }
 
 private:
     std::string path;
+    int32_t version;
 };
 
 struct Op::Create : public Op
 {
-    Create(const std::string & path_, const std::string & value_, ACLPtr acl, int32_t flags);
+    Create(const std::string & path_, const std::string & value_, ACLPtr acl_, int32_t flags_);
+
+    std::unique_ptr<Op> clone() const override
+    {
+        return std::unique_ptr<zkutil::Op>(new Create(path, value, acl, flags));
+    }
 
     std::string getPathCreated()
     {
@@ -62,17 +75,24 @@ struct Op::Create : public Op
 private:
     std::string path;
     std::string value;
+    ACLPtr acl;
+    int32_t flags;
     std::vector<char> created_path;
 };
 
 struct Op::SetData : public Op
 {
-    SetData(const std::string & path_, const std::string & value_, int32_t version) :
-        path(path_), value(value_)
+    SetData(const std::string & path_, const std::string & value_, int32_t version_) :
+        path(path_), value(value_), version(version_)
     {
         zoo_set_op_init(data.get(), path.c_str(), value.c_str(), value.size(), version, &stat);
     }
 
+    std::unique_ptr<Op> clone() const override
+    {
+        return std::unique_ptr<zkutil::Op>(new SetData(path, value, version));
+    }
+
     std::string describe() override
     {
         return
@@ -85,21 +105,28 @@ struct Op::SetData : public Op
 private:
     std::string path;
     std::string value;
+    int32_t version;
     Stat stat;
 };
 
 struct Op::Check : public Op
 {
-    Check(const std::string & path_, int32_t version) :
-        path(path_)
+    Check(const std::string & path_, int32_t version_) :
+        path(path_), version(version_)
     {
         zoo_check_op_init(data.get(), path.c_str(), version);
     }
 
+    std::unique_ptr<Op> clone() const override
+    {
+        return std::unique_ptr<zkutil::Op>(new Check(path, version));
+    }
+
     std::string describe() override { return "command: check, path: " + path; }
 
 private:
     std::string path;
+    int32_t version;
 };
 
 struct OpResult : public zoo_op_result_t
diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp
index 4b9ca2f6805..e03b7ab7182 100644
--- a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp
@@ -710,8 +710,8 @@ ZooKeeperPtr ZooKeeper::startNewSession() const
     return std::make_shared<ZooKeeper>(hosts, session_timeout_ms);
 }
 
-Op::Create::Create(const std::string & path_, const std::string & value_, ACLPtr acl, int32_t flags)
-    : path(path_), value(value_), created_path(path.size() + ZooKeeper::SEQUENTIAL_SUFFIX_SIZE)
+Op::Create::Create(const std::string & path_, const std::string & value_, ACLPtr acl_, int32_t flags_)
+    : path(path_), value(value_), acl(acl_), flags(flags_), created_path(path.size() + ZooKeeper::SEQUENTIAL_SUFFIX_SIZE)
 {
     zoo_create_op_init(data.get(), path.c_str(), value.c_str(), value.size(), acl, flags, created_path.data(), created_path.size());
 }
@@ -907,10 +907,24 @@ ZooKeeper::MultiFuture ZooKeeper::asyncMultiImpl(const zkutil::Ops & ops_, bool
     size_t count = ops_.size();
     OpResultsPtr results(new OpResults(count));
 
-    MultiFuture future{ [throw_exception, results] (int rc) {
+    /// We need to hold all references to ops data until the end of multi callback
+    struct OpsHolder
+    {
+        std::shared_ptr<zkutil::Ops> ops_ptr = std::make_shared<zkutil::Ops>();
+        std::shared_ptr<std::vector<zoo_op_t>> ops_raw_ptr = std::make_shared<std::vector<zoo_op_t>>();;
+    } holder;
+
+    for (const auto & op : ops_)
+    {
+        holder.ops_ptr->emplace_back(op->clone());
+        holder.ops_raw_ptr->push_back(*holder.ops_ptr->back()->data);
+    }
+
+    MultiFuture future{ [throw_exception, results, holder] (int rc) {
         OpResultsAndCode res;
         res.code = rc;
         res.results = results;
+        res.ops_ptr = holder.ops_ptr;
         if (throw_exception && rc != ZOK)
             throw zkutil::KeeperException(rc);
         return res;
@@ -927,10 +941,7 @@ ZooKeeper::MultiFuture ZooKeeper::asyncMultiImpl(const zkutil::Ops & ops_, bool
     if (expired())
         throw KeeperException(ZINVALIDSTATE);
 
-    /// There is no need to hold these ops until the end of the passed callback
-    std::vector<zoo_op_t> ops;
-    for (const auto & op : ops_)
-        ops.push_back(*(op->data));
+    auto & ops = *holder.ops_raw_ptr;
 
     int32_t code = zoo_amulti(impl, static_cast<int>(ops.size()), ops.data(), results->data(),
                               [] (int rc, const void * data)
diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.h b/dbms/src/Common/ZooKeeper/ZooKeeper.h
index ba75e372bc8..197cb8083b2 100644
--- a/dbms/src/Common/ZooKeeper/ZooKeeper.h
+++ b/dbms/src/Common/ZooKeeper/ZooKeeper.h
@@ -326,14 +326,14 @@ public:
     struct OpResultsAndCode
     {
         OpResultsPtr results;
-        Ops ops;
+        std::shared_ptr<Ops> ops_ptr;
         int code;
     };
 
     using MultiFuture = Future<OpResultsAndCode, int>;
-    MultiFuture asyncMulti(const zkutil::Ops & ops);
+    MultiFuture asyncMulti(const Ops & ops);
     /// Like the previous one but don't throw any exceptions on future.get()
-    MultiFuture tryAsyncMulti(const zkutil::Ops & ops);
+    MultiFuture tryAsyncMulti(const Ops & ops);
 
 
     static std::string error2string(int32_t code);
diff --git a/dbms/src/Common/ZooKeeper/tests/zkutil_test_multi_exception.cpp b/dbms/src/Common/ZooKeeper/tests/zkutil_test_multi_exception.cpp
index 77af1db15d8..d728d9f0ca6 100644
--- a/dbms/src/Common/ZooKeeper/tests/zkutil_test_multi_exception.cpp
+++ b/dbms/src/Common/ZooKeeper/tests/zkutil_test_multi_exception.cpp
@@ -1,6 +1,7 @@
-#include <iostream>
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Common/Exception.h>
+#include <iostream>
+#include <chrono>
 #include <gtest/gtest.h>
 
 using namespace DB;
@@ -67,8 +68,33 @@ TEST(zkutil, multi_async)
 
         auto res = fut.get();
         ASSERT_TRUE(res.code == ZOK);
+        ASSERT_EQ(res.results->size(), 2);
+        ASSERT_EQ(res.ops_ptr->size(), 2);
     }
 
+    EXPECT_ANY_THROW
+    (
+        std::vector<zkutil::ZooKeeper::MultiFuture> futures;
+
+        for (size_t i = 0; i < 10000; ++i)
+        {
+            ops.clear();
+            ops.emplace_back(new zkutil::Op::Remove("/clickhouse_test_zkutil_multi", -1));
+            ops.emplace_back(new zkutil::Op::Create("/clickhouse_test_zkutil_multi", "_", acl, zkutil::CreateMode::Persistent));
+            ops.emplace_back(new zkutil::Op::Check("/clickhouse_test_zkutil_multi", -1));
+            ops.emplace_back(new zkutil::Op::SetData("/clickhouse_test_zkutil_multi", "xxx", 42));
+            ops.emplace_back(new zkutil::Op::Create("/clickhouse_test_zkutil_multi/a", "_", acl, zkutil::CreateMode::Persistent));
+
+            futures.emplace_back(zookeeper->asyncMulti(ops));
+        }
+
+        futures[0].get();
+    );
+
+    /// Check there are no segfaults for remaining 999 futures
+    using namespace std::chrono_literals;
+    std::this_thread::sleep_for(1s);
+
     {
         ops.clear();
         ops.emplace_back(new zkutil::Op::Create("/clickhouse_test_zkutil_multi", "_", acl, zkutil::CreateMode::Persistent));
@@ -80,5 +106,6 @@ TEST(zkutil, multi_async)
         auto res = fut.get();
         ASSERT_TRUE(res.code == ZNODEEXISTS);
         ASSERT_EQ(res.results->size(), 2);
+        ASSERT_EQ(res.ops_ptr->size(), 2);
     }
 }
\ No newline at end of file

From 5d3bff48023a7857e6c57f8a9102317974c35fef Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Thu, 10 Aug 2017 05:38:18 +0300
Subject: [PATCH 110/281] Update ZooKeeper.cpp

---
 dbms/src/Common/ZooKeeper/ZooKeeper.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp
index e03b7ab7182..2597c3f2066 100644
--- a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp
@@ -911,7 +911,7 @@ ZooKeeper::MultiFuture ZooKeeper::asyncMultiImpl(const zkutil::Ops & ops_, bool
     struct OpsHolder
     {
         std::shared_ptr<zkutil::Ops> ops_ptr = std::make_shared<zkutil::Ops>();
-        std::shared_ptr<std::vector<zoo_op_t>> ops_raw_ptr = std::make_shared<std::vector<zoo_op_t>>();;
+        std::shared_ptr<std::vector<zoo_op_t>> ops_raw_ptr = std::make_shared<std::vector<zoo_op_t>>();
     } holder;
 
     for (const auto & op : ops_)

From e44614d01724e42b626ac61666e9b1cde9061f23 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Thu, 10 Aug 2017 05:40:53 +0300
Subject: [PATCH 111/281] Update ZooKeeper.cpp

---
 dbms/src/Common/ZooKeeper/ZooKeeper.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp
index 2597c3f2066..6ba093719e6 100644
--- a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp
@@ -947,7 +947,7 @@ ZooKeeper::MultiFuture ZooKeeper::asyncMultiImpl(const zkutil::Ops & ops_, bool
                               [] (int rc, const void * data)
                               {
                                   MultiFuture::TaskPtr owned_task =
-                                          std::move(const_cast<MultiFuture::TaskPtr &>(*static_cast<const MultiFuture::TaskPtr *>(data)));
+                                      std::move(const_cast<MultiFuture::TaskPtr &>(*static_cast<const MultiFuture::TaskPtr *>(data)));
                                   (*owned_task)(rc);
                               }, future.task.get());
 

From 1626637a89f44f106960c760d5d27bb8a76290a2 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Thu, 10 Aug 2017 05:43:24 +0300
Subject: [PATCH 112/281] Update zkutil_test_multi_exception.cpp

---
 dbms/src/Common/ZooKeeper/tests/zkutil_test_multi_exception.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Common/ZooKeeper/tests/zkutil_test_multi_exception.cpp b/dbms/src/Common/ZooKeeper/tests/zkutil_test_multi_exception.cpp
index d728d9f0ca6..d3695c31b6b 100644
--- a/dbms/src/Common/ZooKeeper/tests/zkutil_test_multi_exception.cpp
+++ b/dbms/src/Common/ZooKeeper/tests/zkutil_test_multi_exception.cpp
@@ -108,4 +108,4 @@ TEST(zkutil, multi_async)
         ASSERT_EQ(res.results->size(), 2);
         ASSERT_EQ(res.ops_ptr->size(), 2);
     }
-}
\ No newline at end of file
+}

From cd3a696d8b3c0d56055df5ec95b2de4fb64f0b4c Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Thu, 10 Aug 2017 18:19:36 +0300
Subject: [PATCH 113/281] Add requested changes. [#CLICKHOUSE-3207]

---
 .../ReplicatedMergeTreeCleanupThread.cpp      | 29 ++++++++++---------
 .../ReplicatedMergeTreeCleanupThread.h        |  2 +-
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp
index 5435479eea7..a988cf80ac7 100644
--- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp
+++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp
@@ -97,7 +97,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs()
     {
         ops.emplace_back(std::make_unique<zkutil::Op::Remove>(storage.zookeeper_path + "/log/" + entries[i], -1));
 
-        if (ops.size() > 400 || i + 1 == entries.size())
+        if (ops.size() > 4 * zkutil::MULTI_BATCH_SIZE || i + 1 == entries.size())
         {
             /// Simultaneously with clearing the log, we check to see if replica was added since we received replicas list.
             ops.emplace_back(std::make_unique<zkutil::Op::Check>(storage.zookeeper_path + "/replicas", stat.version));
@@ -109,13 +109,15 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs()
     LOG_DEBUG(log, "Removed " << entries.size() << " old log entries: " << entries.front() << " - " << entries.back());
 }
 
+
 namespace
 {
 
+/// Just a subset of zkutil::Stat fields required for the cache
 struct RequiredStat
 {
-    Int64 ctime;
-    int numChildren;
+    int64_t ctime = 0;
+    int32_t numChildren = 0;
 
     RequiredStat() = default;
     RequiredStat(const RequiredStat &) = default;
@@ -125,12 +127,7 @@ struct RequiredStat
 
 }
 
-
-class ReplicatedMergeTreeCleanupThread::NodesStatCache : public std::map<String, RequiredStat>
-{
-};
-
-
+/// Just a node name with its ZooKeeper's stat
 struct ReplicatedMergeTreeCleanupThread::NodeWithStat
 {
     String node;
@@ -141,10 +138,14 @@ struct ReplicatedMergeTreeCleanupThread::NodeWithStat
 
     static bool greaterByTime (const NodeWithStat & lhs, const NodeWithStat & rhs)
     {
-        return (lhs.stat.ctime != rhs.stat.ctime) ? lhs.stat.ctime > rhs.stat.ctime : lhs.node > rhs.node;
+        return std::greater<void>()(std::forward_as_tuple(lhs.stat.ctime, lhs.node), std::forward_as_tuple(rhs.stat.ctime, rhs.node));
     }
 };
 
+/// Use simple map node_name -> zkutil::Stat (only required fields) as the cache
+/// It is not declared in the header explicitly to hide extra implementation dependent structs like RequiredStat
+class ReplicatedMergeTreeCleanupThread::NodesStatCache : public std::map<String, RequiredStat> {};
+
 
 void ReplicatedMergeTreeCleanupThread::clearOldBlocks()
 {
@@ -158,7 +159,9 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks()
 
     /// Use ZooKeeper's first node (last according to time) timestamp as "current" time.
     Int64 current_time = timed_blocks.front().stat.ctime;
-    Int64 time_threshold = std::max(0L, current_time - static_cast<Int64>(storage.data.settings.replicated_deduplication_window_seconds));
+    Int64 time_threshold = std::max(0L, current_time - static_cast<Int64>(1000 * storage.data.settings.replicated_deduplication_window_seconds));
+
+    /// Virtual node, all nodes that are "greater" than this one will be deleted
     NodeWithStat block_threshold("", RequiredStat(time_threshold));
 
     size_t current_deduplication_window = std::min(timed_blocks.size(), storage.data.settings.replicated_deduplication_window);
@@ -208,7 +211,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks()
 
     if (num_nodes_not_deleted)
     {
-        LOG_ERROR(log, "There was a problem with deleting " << num_nodes_not_deleted << " (from " << num_nodes_to_delete << ")"
+        LOG_ERROR(log, "There was a problem with deleting " << num_nodes_not_deleted << " (of " << num_nodes_to_delete << ")"
                        << " old blocks from ZooKeeper, error: " << zkutil::ZooKeeper::error2string(last_error_code));
     }
     else
@@ -238,7 +241,7 @@ void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(zkutil::ZooKeeperPt
     }
 
     auto not_cached_blocks = stat.numChildren - cached_block_stats->size();
-    LOG_TRACE(log, "Checking " << stat.numChildren << " blocks  (" << not_cached_blocks << " are not cached)"
+    LOG_TRACE(log, "Checking " << stat.numChildren << " blocks (" << not_cached_blocks << " are not cached)"
             << " to clear old ones from ZooKeeper. This might take several minutes.");
 
     std::vector<std::pair<String, zkutil::ZooKeeper::ExistsFuture>> exists_futures;
diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h
index 9edff46c3a6..443a2af8ec4 100644
--- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h
+++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h
@@ -40,7 +40,7 @@ private:
     struct NodeWithStat;
     std::unique_ptr<NodesStatCache> cached_block_stats;
 
-    /// Returns list of blocks with stat sorted by ctime
+    /// Returns list of blocks (with their stat) sorted by ctime in descending order
     void getBlocksSortedByTime(std::shared_ptr<zkutil::ZooKeeper> & zookeeper, std::vector<NodeWithStat> & timed_blocks);
 
     /// TODO Removing old quorum/failed_parts

From ce4349b6cde6c7ce50f3342bb2f1ad3f7da783a1 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 11 Aug 2017 02:25:51 +0300
Subject: [PATCH 114/281] Fixed error when huge sets for IN, JOIN, DISTINCT
 cannot be created with exception message "Cannot mremap... Bad address"
 [#CLICKHOUSE-2].

---
 dbms/src/AggregateFunctions/UniquesHashSet.h  | 34 +++++++++----------
 dbms/src/Common/Allocator.cpp                 |  4 +--
 dbms/src/Common/ArenaWithFreeLists.h          |  2 +-
 dbms/src/Common/HashTable/HashTable.h         |  6 ++--
 dbms/src/Common/HashTable/TwoLevelHashTable.h |  4 +--
 dbms/src/Common/tests/int_hashes_perf.cpp     |  2 +-
 dbms/src/Functions/FunctionsArray.cpp         | 16 ++++-----
 .../tests/logical_functions_performance.cpp   |  2 +-
 dbms/src/Interpreters/tests/hash_map2.cpp     | 12 +++----
 .../Interpreters/tests/hash_map_string.cpp    | 10 +++---
 dbms/src/Server/IServer.h                     |  6 ----
 dbms/src/Server/Server.cpp                    |  7 ++++
 12 files changed, 53 insertions(+), 52 deletions(-)

diff --git a/dbms/src/AggregateFunctions/UniquesHashSet.h b/dbms/src/AggregateFunctions/UniquesHashSet.h
index c8113d78a69..28958978f0b 100644
--- a/dbms/src/AggregateFunctions/UniquesHashSet.h
+++ b/dbms/src/AggregateFunctions/UniquesHashSet.h
@@ -44,18 +44,18 @@
   */
 
 /// The maximum degree of buffer size before the values are discarded
-#define UNIQUES_HASH_MAX_SIZE_DEGREE             17
+#define UNIQUES_HASH_MAX_SIZE_DEGREE 17
 
 /// The maximum number of elements before the values are discarded
-#define UNIQUES_HASH_MAX_SIZE                     (1 << (UNIQUES_HASH_MAX_SIZE_DEGREE - 1))
+#define UNIQUES_HASH_MAX_SIZE (1ULL << (UNIQUES_HASH_MAX_SIZE_DEGREE - 1))
 
 /** The number of least significant bits used for thinning. The remaining high-order bits are used to determine the position in the hash table.
   * (high-order bits are taken because the younger bits will be constant after dropping some of the values)
   */
-#define UNIQUES_HASH_BITS_FOR_SKIP             (32 - UNIQUES_HASH_MAX_SIZE_DEGREE)
+#define UNIQUES_HASH_BITS_FOR_SKIP (32 - UNIQUES_HASH_MAX_SIZE_DEGREE)
 
 /// Initial buffer size degree
-#define UNIQUES_HASH_SET_INITIAL_SIZE_DEGREE     4
+#define UNIQUES_HASH_SET_INITIAL_SIZE_DEGREE 4
 
 
 /** This hash function is not the most optimal, but UniquesHashSet states counted with it,
@@ -71,15 +71,15 @@ struct UniquesHashSetDefaultHash
 
 
 template <typename Hash = UniquesHashSetDefaultHash>
-class UniquesHashSet : private HashTableAllocatorWithStackMemory<(1 << UNIQUES_HASH_SET_INITIAL_SIZE_DEGREE) * sizeof(UInt32)>
+class UniquesHashSet : private HashTableAllocatorWithStackMemory<(1ULL << UNIQUES_HASH_SET_INITIAL_SIZE_DEGREE) * sizeof(UInt32)>
 {
 private:
     using Value_t = UInt64;
     using HashValue_t = UInt32;
-    using Allocator = HashTableAllocatorWithStackMemory<(1 << UNIQUES_HASH_SET_INITIAL_SIZE_DEGREE) * sizeof(UInt32)>;
+    using Allocator = HashTableAllocatorWithStackMemory<(1ULL << UNIQUES_HASH_SET_INITIAL_SIZE_DEGREE) * sizeof(UInt32)>;
 
-    UInt32 m_size;            /// Number of elements
-    UInt8 size_degree;        /// The size of the table as a power of 2
+    UInt32 m_size;          /// Number of elements
+    UInt8 size_degree;      /// The size of the table as a power of 2
     UInt8 skip_degree;      /// Skip elements not divisible by 2 ^ skip_degree
     bool has_zero;          /// The hash table contains an element with a hash value of 0.
 
@@ -92,7 +92,7 @@ private:
 
     void alloc(UInt8 new_size_degree)
     {
-        buf = reinterpret_cast<HashValue_t *>(Allocator::alloc((1 << new_size_degree) * sizeof(buf[0])));
+        buf = reinterpret_cast<HashValue_t *>(Allocator::alloc((1ULL << new_size_degree) * sizeof(buf[0])));
         size_degree = new_size_degree;
     }
 
@@ -105,10 +105,10 @@ private:
         }
     }
 
-    inline size_t buf_size() const                        { return 1 << size_degree; }
-    inline size_t max_fill() const                        { return 1 << (size_degree - 1); }
-    inline size_t mask() const                            { return buf_size() - 1; }
-    inline size_t place(HashValue_t x) const             { return (x >> UNIQUES_HASH_BITS_FOR_SKIP) & mask(); }
+    inline size_t buf_size() const           { return 1ULL << size_degree; }
+    inline size_t max_fill() const           { return 1ULL << (size_degree - 1); }
+    inline size_t mask() const               { return buf_size() - 1; }
+    inline size_t place(HashValue_t x) const { return (x >> UNIQUES_HASH_BITS_FOR_SKIP) & mask(); }
 
     /// The value is divided by 2 ^ skip_degree
     inline bool good(HashValue_t hash) const
@@ -157,7 +157,7 @@ private:
             new_size_degree = size_degree + 1;
 
         /// Expand the space.
-        buf = reinterpret_cast<HashValue_t *>(Allocator::realloc(buf, old_size * sizeof(buf[0]), (1 << new_size_degree) * sizeof(buf[0])));
+        buf = reinterpret_cast<HashValue_t *>(Allocator::realloc(buf, old_size * sizeof(buf[0]), (1ULL << new_size_degree) * sizeof(buf[0])));
         size_degree = new_size_degree;
 
         /** Now some items may need to be moved to a new location.
@@ -327,12 +327,12 @@ public:
         if (0 == skip_degree)
             return m_size;
 
-        size_t res = m_size * (1 << skip_degree);
+        size_t res = m_size * (1ULL << skip_degree);
 
         /** Pseudo-random remainder - in order to be not visible,
           * that the number is divided by the power of two.
           */
-        res += (intHashCRC32(m_size) & ((1 << skip_degree) - 1));
+        res += (intHashCRC32(m_size) & ((1ULL << skip_degree) - 1));
 
         /** Correction of a systematic error due to collisions during hashing in UInt32.
           * `fixed_res(res)` formula
@@ -435,7 +435,7 @@ public:
         if (rhs_size > UNIQUES_HASH_MAX_SIZE)
             throw Poco::Exception("Cannot read UniquesHashSet: too large size_degree.");
 
-        if ((1U << size_degree) < rhs_size)
+        if ((1ULL << size_degree) < rhs_size)
         {
             UInt8 new_size_degree = std::max(UNIQUES_HASH_SET_INITIAL_SIZE_DEGREE, static_cast<int>(log2(rhs_size - 1)) + 2);
             resize(new_size_degree);
diff --git a/dbms/src/Common/Allocator.cpp b/dbms/src/Common/Allocator.cpp
index ef566ffd230..b89106c1662 100644
--- a/dbms/src/Common/Allocator.cpp
+++ b/dbms/src/Common/Allocator.cpp
@@ -37,7 +37,7 @@ namespace ErrorCodes
   *
   * PS. This is also required, because tcmalloc can not allocate a chunk of memory greater than 16 GB.
   */
-static constexpr size_t MMAP_THRESHOLD = 64 * (1 << 20);
+static constexpr size_t MMAP_THRESHOLD = 64 * (1ULL << 20);
 static constexpr size_t MMAP_MIN_ALIGNMENT = 4096;
 static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;
 
@@ -54,7 +54,7 @@ void * Allocator<clear_memory_>::alloc(size_t size, size_t alignment)
         if (alignment > MMAP_MIN_ALIGNMENT)
             throw DB::Exception("Too large alignment: more than page size.", DB::ErrorCodes::BAD_ARGUMENTS);
 
-        buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+        buf = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
         if (MAP_FAILED == buf)
             DB::throwFromErrno("Allocator: Cannot mmap.", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
 
diff --git a/dbms/src/Common/ArenaWithFreeLists.h b/dbms/src/Common/ArenaWithFreeLists.h
index 4a8801ed2f1..599e8779941 100644
--- a/dbms/src/Common/ArenaWithFreeLists.h
+++ b/dbms/src/Common/ArenaWithFreeLists.h
@@ -70,7 +70,7 @@ public:
         }
 
         /// no block of corresponding size, allocate a new one
-        return pool.alloc(1 << (list_idx + 1));
+        return pool.alloc(1ULL << (list_idx + 1));
     }
 
     void free(char * ptr, const size_t size)
diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h
index d505bf2c94c..5480cedcba5 100644
--- a/dbms/src/Common/HashTable/HashTable.h
+++ b/dbms/src/Common/HashTable/HashTable.h
@@ -155,9 +155,9 @@ struct HashTableGrower
     UInt8 size_degree = initial_size_degree;
 
     /// The size of the hash table in the cells.
-    size_t bufSize() const               { return 1 << size_degree; }
+    size_t bufSize() const               { return 1ULL << size_degree; }
 
-    size_t maxFill() const               { return 1 << (size_degree - 1); }
+    size_t maxFill() const               { return 1ULL << (size_degree - 1); }
     size_t mask() const                  { return bufSize() - 1; }
 
     /// From the hash value, get the cell number in the hash table.
@@ -200,7 +200,7 @@ struct HashTableGrower
 template <size_t key_bits>
 struct HashTableFixedGrower
 {
-    size_t bufSize() const               { return 1 << key_bits; }
+    size_t bufSize() const               { return 1ULL << key_bits; }
     size_t place(size_t x) const         { return x; }
     /// You could write __builtin_unreachable(), but the compiler does not optimize everything, and it turns out less efficiently.
     size_t next(size_t pos) const        { return pos + 1; }
diff --git a/dbms/src/Common/HashTable/TwoLevelHashTable.h b/dbms/src/Common/HashTable/TwoLevelHashTable.h
index 6d4edf49fc7..bdac3c32b4a 100644
--- a/dbms/src/Common/HashTable/TwoLevelHashTable.h
+++ b/dbms/src/Common/HashTable/TwoLevelHashTable.h
@@ -4,7 +4,7 @@
 
 
 /** Two-level hash table.
-  * Represents 256 (or 1 << BITS_FOR_BUCKET) small hash tables (buckets of the first level).
+  * Represents 256 (or 1ULL << BITS_FOR_BUCKET) small hash tables (buckets of the first level).
   * To determine which one to use, one of the bytes of the hash function is taken.
   *
   * Usually works a little slower than a simple hash table.
@@ -47,7 +47,7 @@ protected:
 public:
     using Impl = ImplTable;
 
-    static constexpr size_t NUM_BUCKETS = 1 << BITS_FOR_BUCKET;
+    static constexpr size_t NUM_BUCKETS = 1ULL << BITS_FOR_BUCKET;
     static constexpr size_t MAX_BUCKET = NUM_BUCKETS - 1;
 
     size_t hash(const Key & x) const { return Hash::operator()(x); }
diff --git a/dbms/src/Common/tests/int_hashes_perf.cpp b/dbms/src/Common/tests/int_hashes_perf.cpp
index df3f8a60a94..be1cb2e5b93 100644
--- a/dbms/src/Common/tests/int_hashes_perf.cpp
+++ b/dbms/src/Common/tests/int_hashes_perf.cpp
@@ -204,7 +204,7 @@ void report(const char * name, size_t n, double elapsed, UInt64 tsc_diff, size_t
     std::cerr << name << std::endl
         << "Done in " << elapsed
         << " (" << n / elapsed << " elem/sec."
-        << ", " << n * sizeof(UInt64) / elapsed / (1 << 30) << " GiB/sec."
+        << ", " << n * sizeof(UInt64) / elapsed / (1ULL << 30) << " GiB/sec."
         << ", " << (tsc_diff * 1.0 / n) << " tick/elem)"
         << "; res = " << res
         << std::endl << std::endl;
diff --git a/dbms/src/Functions/FunctionsArray.cpp b/dbms/src/Functions/FunctionsArray.cpp
index dfa39f48759..6fae2c7e525 100644
--- a/dbms/src/Functions/FunctionsArray.cpp
+++ b/dbms/src/Functions/FunctionsArray.cpp
@@ -1401,7 +1401,7 @@ bool FunctionArrayUniq::executeNumber(const ColumnArray * array, const IColumn *
     const typename ColumnVector<T>::Container_t & values = nested->getData();
 
     using Set = ClearableHashSet<T, DefaultHash<T>, HashTableGrower<INITIAL_SIZE_DEGREE>,
-        HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(T)>>;
+        HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>;
 
     const PaddedPODArray<UInt8> * null_map_data = nullptr;
     if (null_map)
@@ -1447,7 +1447,7 @@ bool FunctionArrayUniq::executeString(const ColumnArray * array, const IColumn *
     const ColumnArray::Offsets_t & offsets = array->getOffsets();
 
     using Set = ClearableHashSet<StringRef, StringRefHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
-        HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>;
+        HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>;
 
     const PaddedPODArray<UInt8> * null_map_data = nullptr;
     if (null_map)
@@ -1514,7 +1514,7 @@ bool FunctionArrayUniq::execute128bit(
         return false;
 
     using Set = ClearableHashSet<UInt128, UInt128HashCRC32, HashTableGrower<INITIAL_SIZE_DEGREE>,
-        HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>;
+        HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>;
 
     /// Suppose that, for a given row, each of the N columns has an array whose length is M.
     /// Denote arr_i each of these arrays (1 <= i <= N). Then the following is performed:
@@ -1575,7 +1575,7 @@ void FunctionArrayUniq::executeHashed(
     size_t count = columns.size();
 
     using Set = ClearableHashSet<UInt128, UInt128TrivialHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
-        HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>;
+        HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>;
 
     Set set;
     size_t prev_off = 0;
@@ -1727,7 +1727,7 @@ bool FunctionArrayEnumerateUniq::executeNumber(const ColumnArray * array, const
     const typename ColumnVector<T>::Container_t & values = nested->getData();
 
     using ValuesToIndices = ClearableHashMap<T, UInt32, DefaultHash<T>, HashTableGrower<INITIAL_SIZE_DEGREE>,
-        HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(T)>>;
+        HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>;
 
     const PaddedPODArray<UInt8> * null_map_data = nullptr;
     if (null_map)
@@ -1772,7 +1772,7 @@ bool FunctionArrayEnumerateUniq::executeString(const ColumnArray * array, const
 
     size_t prev_off = 0;
     using ValuesToIndices = ClearableHashMap<StringRef, UInt32, StringRefHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
-        HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>;
+        HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>;
 
     const PaddedPODArray<UInt8> * null_map_data = nullptr;
     if (null_map)
@@ -1840,7 +1840,7 @@ bool FunctionArrayEnumerateUniq::execute128bit(
         return false;
 
     using ValuesToIndices = ClearableHashMap<UInt128, UInt32, UInt128HashCRC32, HashTableGrower<INITIAL_SIZE_DEGREE>,
-        HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>;
+        HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>;
 
     ValuesToIndices indices;
     size_t prev_off = 0;
@@ -1886,7 +1886,7 @@ void FunctionArrayEnumerateUniq::executeHashed(
     size_t count = columns.size();
 
     using ValuesToIndices = ClearableHashMap<UInt128, UInt32, UInt128TrivialHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
-        HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>;
+        HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>;
 
     ValuesToIndices indices;
     size_t prev_off = 0;
diff --git a/dbms/src/Functions/tests/logical_functions_performance.cpp b/dbms/src/Functions/tests/logical_functions_performance.cpp
index 0f41e4cc64d..c315e00b131 100644
--- a/dbms/src/Functions/tests/logical_functions_performance.cpp
+++ b/dbms/src/Functions/tests/logical_functions_performance.cpp
@@ -331,7 +331,7 @@ int main(int argc, char ** argv)
 {
     try
     {
-        size_t block_size = 1 << 20;
+        size_t block_size = 1ULL << 20;
         if (argc > 1)
         {
             block_size = atoi(argv[1]);
diff --git a/dbms/src/Interpreters/tests/hash_map2.cpp b/dbms/src/Interpreters/tests/hash_map2.cpp
index 02b96a9d6dc..8a8027c726a 100644
--- a/dbms/src/Interpreters/tests/hash_map2.cpp
+++ b/dbms/src/Interpreters/tests/hash_map2.cpp
@@ -46,15 +46,15 @@ struct Grower : public HashTableGrower<>
     static const size_t initial_size_degree = 16;
     Grower() { size_degree = initial_size_degree; }
 
-//    size_t max_fill = (1 << initial_size_degree) * 0.9;
+//    size_t max_fill = (1ULL << initial_size_degree) * 0.9;
 
     /// The size of the hash table in the cells.
-    size_t bufSize() const                { return 1 << size_degree; }
+    size_t bufSize() const               { return 1ULL << size_degree; }
 
-    size_t maxFill() const                { return 1 << (size_degree - 1); }
-//    size_t maxFill() const                { return max_fill; }
+    size_t maxFill() const               { return 1ULL << (size_degree - 1); }
+//    size_t maxFill() const             { return max_fill; }
 
-    size_t mask() const                    { return bufSize() - 1; }
+    size_t mask() const                  { return bufSize() - 1; }
 
     /// From the hash value, get the cell number in the hash table.
     size_t place(size_t x) const         { return x & mask(); }
@@ -69,7 +69,7 @@ struct Grower : public HashTableGrower<>
     void increaseSize()
     {
         size_degree += size_degree >= 23 ? 1 : 2;
-//        max_fill = (1 << size_degree) * 0.9;
+//        max_fill = (1ULL << size_degree) * 0.9;
     }
 
     /// Set the buffer size by the number of elements in the hash table. Used when deserializing a hash table.
diff --git a/dbms/src/Interpreters/tests/hash_map_string.cpp b/dbms/src/Interpreters/tests/hash_map_string.cpp
index a4b288fa33e..c73a5151339 100644
--- a/dbms/src/Interpreters/tests/hash_map_string.cpp
+++ b/dbms/src/Interpreters/tests/hash_map_string.cpp
@@ -249,13 +249,13 @@ struct Grower : public HashTableGrower<>
     static const size_t initial_size_degree = 16;
     Grower() { size_degree = initial_size_degree; }
 
-    size_t max_fill = (1 << initial_size_degree) * 0.9;
+    size_t max_fill = (1ULL << initial_size_degree) * 0.9;
 
     /// The size of the hash table in the cells.
-    size_t bufSize() const                { return 1 << size_degree; }
+    size_t bufSize() const               { return 1ULL << size_degree; }
 
-    size_t maxFill() const                { return max_fill /*1 << (size_degree - 1)*/; }
-    size_t mask() const                    { return bufSize() - 1; }
+    size_t maxFill() const               { return max_fill /*1 << (size_degree - 1)*/; }
+    size_t mask() const                  { return bufSize() - 1; }
 
     /// From the hash value, get the cell number in the hash table.
     size_t place(size_t x) const         { return x & mask(); }
@@ -270,7 +270,7 @@ struct Grower : public HashTableGrower<>
     void increaseSize()
     {
         size_degree += size_degree >= 23 ? 1 : 2;
-        max_fill = (1 << size_degree) * 0.9;
+        max_fill = (1ULL << size_degree) * 0.9;
     }
 
     /// Set the buffer size by the number of elements in the hash table. Used when deserializing a hash table.
diff --git a/dbms/src/Server/IServer.h b/dbms/src/Server/IServer.h
index 699e26b1d60..29e9bc16a75 100644
--- a/dbms/src/Server/IServer.h
+++ b/dbms/src/Server/IServer.h
@@ -9,12 +9,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int NO_ELEMENTS_IN_CONFIG;
-    extern const int SUPPORT_IS_DISABLED;
-}
-
 class IServer
 {
 public:
diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp
index f935fac6c93..2b9efdd7cfc 100644
--- a/dbms/src/Server/Server.cpp
+++ b/dbms/src/Server/Server.cpp
@@ -59,6 +59,13 @@ namespace CurrentMetrics
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int NO_ELEMENTS_IN_CONFIG;
+    extern const int SUPPORT_IS_DISABLED;
+}
+
+
 static std::string getCanonicalPath(std::string && path)
 {
     Poco::trimInPlace(path);

From 1e941a137e7e54fb775de2dcaeb81a64bf2038bd Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 11 Aug 2017 02:26:38 +0300
Subject: [PATCH 115/281] Better exception message [#CLICKHOUSE-2].

---
 dbms/src/Common/Allocator.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Common/Allocator.cpp b/dbms/src/Common/Allocator.cpp
index ef566ffd230..330cde9350f 100644
--- a/dbms/src/Common/Allocator.cpp
+++ b/dbms/src/Common/Allocator.cpp
@@ -9,6 +9,8 @@
 #include <Common/Exception.h>
 #include <Common/Allocator.h>
 
+#include <IO/WriteHelpers.h>
+
 /// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS
 #ifndef MAP_ANONYMOUS
 #define MAP_ANONYMOUS MAP_ANON
@@ -128,7 +130,7 @@ void * Allocator<clear_memory_>::realloc(void * buf, size_t old_size, size_t new
 
         buf = mremap(buf, old_size, new_size, MREMAP_MAYMOVE);
         if (MAP_FAILED == buf)
-            DB::throwFromErrno("Allocator: Cannot mremap.", DB::ErrorCodes::CANNOT_MREMAP);
+            DB::throwFromErrno("Allocator: Cannot mremap memory chunk from " + DB::toString(old_size) + " to " + DB::toString(new_size) + " bytes.", DB::ErrorCodes::CANNOT_MREMAP);
 
         /// No need for zero-fill, because mmap guarantees it.
     }

From 90940d84231c746c00beba675348b4504589a28e Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 11 Aug 2017 18:02:07 +0300
Subject: [PATCH 116/281] Cluster and DistributedBlockOutputStream refactoring
 [#CLICKHOUSE-3033]

---
 dbms/src/Interpreters/Cluster.cpp             |  42 ++-
 dbms/src/Interpreters/Cluster.h               |  10 +-
 dbms/src/Interpreters/DDLWorker.cpp           |   8 +-
 .../DistributedBlockOutputStream.cpp          | 255 +++++++++++-------
 .../DistributedBlockOutputStream.h            |  35 ++-
 .../Storages/MergeTree/ReshardingWorker.cpp   |  12 +-
 .../Storages/System/StorageSystemClusters.cpp |  21 +-
 7 files changed, 205 insertions(+), 178 deletions(-)

diff --git a/dbms/src/Interpreters/Cluster.cpp b/dbms/src/Interpreters/Cluster.cpp
index 87d92444958..2ed378b12d8 100644
--- a/dbms/src/Interpreters/Cluster.cpp
+++ b/dbms/src/Interpreters/Cluster.cpp
@@ -77,6 +77,7 @@ Cluster::Address::Address(Poco::Util::AbstractConfiguration & config, const Stri
     user = config.getString(config_prefix + ".user", "default");
     password = config.getString(config_prefix + ".password", "");
     default_database = config.getString(config_prefix + ".default_database", "");
+    is_local = isLocal(*this);
 }
 
 
@@ -98,6 +99,7 @@ Cluster::Address::Address(const String & host_port_, const String & user_, const
         host_name = host_port_;
         port = default_port;
     }
+    is_local = isLocal(*this);
 }
 
 
@@ -193,6 +195,8 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se
         {
             /// Shard without replicas.
 
+            Addresses addresses;
+
             const auto & prefix = config_prefix + key;
             const auto weight = config.getInt(prefix + ".weight", default_weight);
 
@@ -204,11 +208,10 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se
             info.shard_num = current_shard_num;
             info.weight = weight;
 
-            if (isLocal(address))
+            if (address.is_local)
                 info.local_addresses.push_back(address);
             else
             {
-                info.dir_names.push_back(address.toStringFull());
                 ConnectionPoolPtrs pools;
                 pools.push_back(std::make_shared<ConnectionPool>(
                     settings.distributed_connections_pool_size,
@@ -227,6 +230,7 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se
                 slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size());
 
             shards_info.push_back(info);
+            addresses_with_failover.push_back(addresses);
         }
         else if (startsWith(key, "shard"))
         {
@@ -244,10 +248,8 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se
 
             bool internal_replication = config.getBool(partial_prefix + ".internal_replication", false);
 
-            /** in case of internal_replication we will be appending names to
-             *  the first element of vector; otherwise we will just .emplace_back
-             */
-            std::vector<std::string> dir_names{};
+            /// in case of internal_replication we will be appending names to dir_name_for_internal_replication
+            std::string dir_name_for_internal_replication;
 
             auto first = true;
             for (const auto & replica_key : replica_keys)
@@ -261,18 +263,16 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se
                     replica_addresses.back().replica_num = current_replica_num;
                     ++current_replica_num;
 
-                    if (!isLocal(replica_addresses.back()))
+                    if (!replica_addresses.back().is_local)
                     {
                         if (internal_replication)
                         {
                             auto dir_name = replica_addresses.back().toStringFull();
                             if (first)
-                                dir_names.emplace_back(std::move(dir_name));
+                                dir_name_for_internal_replication = dir_name;
                             else
-                                dir_names.front() += "," + dir_name;
+                                dir_name_for_internal_replication += "," + dir_name;
                         }
-                        else
-                            dir_names.emplace_back(replica_addresses.back().toStringFull());
 
                         if (first) first = false;
                     }
@@ -288,7 +288,7 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se
 
             for (const auto & replica : replica_addresses)
             {
-                if (isLocal(replica))
+                if (replica.is_local)
                     shard_local_addresses.push_back(replica);
                 else
                 {
@@ -311,17 +311,18 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se
             if (weight)
                 slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size());
 
-            shards_info.push_back({std::move(dir_names), current_shard_num, weight, shard_local_addresses, shard_pool, internal_replication});
+            shards_info.push_back({std::move(dir_name_for_internal_replication), current_shard_num, weight,
+                shard_local_addresses, shard_pool, internal_replication});
         }
         else
             throw Exception("Unknown element in config: " + key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
 
-        if (!addresses_with_failover.empty() && !addresses.empty())
-            throw Exception("There must be either 'node' or 'shard' elements in config", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
-
         ++current_shard_num;
     }
 
+    if (addresses_with_failover.empty())
+        throw Exception("There must be either 'node' or 'shard' elements in config", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
+
     initMisc();
 }
 
@@ -409,12 +410,7 @@ void Cluster::calculateHashOfAddresses()
 {
     std::vector<std::string> elements;
 
-    if (!addresses.empty())
-    {
-        for (const auto & address : addresses)
-            elements.push_back(address.host_name + ":" + toString(address.port));
-    }
-    else if (!addresses_with_failover.empty())
+    if (!addresses_with_failover.empty())
     {
         for (const auto & addresses : addresses_with_failover)
         {
@@ -453,8 +449,6 @@ std::unique_ptr<Cluster> Cluster::getClusterWithSingleShard(size_t index) const
 Cluster::Cluster(const Cluster & from, size_t index)
     : shards_info{from.shards_info[index]}
 {
-    if (!from.addresses.empty())
-        addresses.emplace_back(from.addresses[index]);
     if (!from.addresses_with_failover.empty())
         addresses_with_failover.emplace_back(from.addresses_with_failover[index]);
 
diff --git a/dbms/src/Interpreters/Cluster.h b/dbms/src/Interpreters/Cluster.h
index 9a7b1470d6c..cb131a00393 100644
--- a/dbms/src/Interpreters/Cluster.h
+++ b/dbms/src/Interpreters/Cluster.h
@@ -55,6 +55,7 @@ public:
         String password;
         String default_database;    /// this database is selected when no database is specified for Distributed table
         UInt32 replica_num;
+        bool is_local;
 
         Address(Poco::Util::AbstractConfiguration & config, const String & config_prefix);
         Address(const String & host_port_, const String & user_, const String & password_);
@@ -80,8 +81,8 @@ public:
         bool hasInternalReplication() const { return has_internal_replication; }
 
     public:
-        /// Contains names of directories for asynchronous write to StorageDistributed
-        std::vector<std::string> dir_names;
+        /// Name of directory for asynchronous write to StorageDistributed if has_internal_replication
+        std::string dir_name_for_internal_replication;
         /// Number of the shard, the indexation begins with 1
         UInt32 shard_num;
         UInt32 weight;
@@ -94,8 +95,7 @@ public:
 
     String getHashOfAddresses() const { return hash_of_addresses; }
     const ShardsInfo & getShardsInfo() const { return shards_info; }
-    const Addresses & getShardsAddresses() const { return addresses; }
-    const AddressesWithFailover & getShardsWithFailoverAddresses() const { return addresses_with_failover; }
+    const AddressesWithFailover & getShardsAddresses() const { return addresses_with_failover; }
 
     const ShardInfo & getAnyShardInfo() const
     {
@@ -144,8 +144,6 @@ private:
     /// Non-empty is either addresses or addresses_with_failover.
     /// The size and order of the elements in the corresponding array corresponds to shards_info.
 
-    /// An array of shards. Each shard is the address of one server.
-    Addresses addresses;
     /// An array of shards. For each shard, an array of replica addresses (servers that are considered identical).
     AddressesWithFailover addresses_with_failover;
 
diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp
index 1dc88d8192c..e0be7563b9b 100644
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@@ -273,7 +273,7 @@ void DDLWorker::processTask(const DDLLogEntry & node, const std::string & node_n
             String cluster_name = query->cluster;
             auto cluster = context.getCluster(cluster_name);
 
-            auto shard_host_num = tryGetShardAndHostNum(cluster->getShardsWithFailoverAddresses(), host_name, port);
+            auto shard_host_num = tryGetShardAndHostNum(cluster->getShardsAddresses(), host_name, port);
             if (!shard_host_num)
             {
                 throw Exception("Cannot find own address (" + host_id + ") in cluster " + cluster_name + " configuration",
@@ -283,7 +283,7 @@ void DDLWorker::processTask(const DDLLogEntry & node, const std::string & node_n
             size_t shard_num = shard_host_num->first;
             size_t host_num = shard_host_num->second;
 
-            const auto & host_address = cluster->getShardsWithFailoverAddresses().at(shard_num).at(host_num);
+            const auto & host_address = cluster->getShardsAddresses().at(shard_num).at(host_num);
             ASTPtr rewritten_ast = query->getRewrittenASTWithoutOnCluster(host_address.default_database);
             String rewritten_query = queryToString(rewritten_ast);
 
@@ -369,7 +369,7 @@ void DDLWorker::processTaskAlter(
             throw Exception("Distributed DDL alters don't work properly yet", ErrorCodes::NOT_IMPLEMENTED);
 
         Strings replica_names;
-        for (const auto & address : cluster->getShardsWithFailoverAddresses().at(shard_num))
+        for (const auto & address : cluster->getShardsAddresses().at(shard_num))
             replica_names.emplace_back(address.toString());
         std::sort(replica_names.begin(), replica_names.end());
 
@@ -700,7 +700,7 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, Context & context)
     entry.query = queryToString(query_ptr);
     entry.initiator = ddl_worker.getHostName();
 
-    Cluster::AddressesWithFailover shards = cluster->getShardsWithFailoverAddresses();
+    Cluster::AddressesWithFailover shards = cluster->getShardsAddresses();
     for (const auto & shard : shards)
     {
         for (const auto & addr : shard)
diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
index 994ed9e4c7d..239f8ba94d8 100644
--- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
+++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
@@ -45,22 +45,26 @@ namespace ProfileEvents
 namespace DB
 {
 
+
 namespace ErrorCodes
 {
     extern const int TIMEOUT_EXCEEDED;
 }
 
+
 DistributedBlockOutputStream::DistributedBlockOutputStream(StorageDistributed & storage, const ASTPtr & query_ast,
                                                            const ClusterPtr & cluster_, bool insert_sync_, UInt64 insert_timeout_)
     : storage(storage), query_ast(query_ast), cluster(cluster_), insert_sync(insert_sync_), insert_timeout(insert_timeout_)
 {
 }
 
+
 void DistributedBlockOutputStream::writePrefix()
 {
     deadline = std::chrono::steady_clock::now() + std::chrono::seconds(insert_timeout);
 }
 
+
 void DistributedBlockOutputStream::write(const Block & block)
 {
     if (insert_sync)
@@ -69,21 +73,22 @@ void DistributedBlockOutputStream::write(const Block & block)
         writeAsync(block);
 }
 
+
 void DistributedBlockOutputStream::writeAsync(const Block & block)
 {
     if (storage.getShardingKeyExpr() && (cluster->getShardsInfo().size() > 1))
-        return writeSplit(block);
+        return writeSplitAsync(block);
 
-    writeImpl(block);
+    writeAsyncImpl(block);
     ++blocks_inserted;
 }
 
+
 ThreadPool::Job DistributedBlockOutputStream::createWritingJob(
-    std::vector<bool> & done_jobs, std::atomic<unsigned> & finished_jobs_count, std::condition_variable & cond_var,
-    const Block & block, size_t job_id, const Cluster::ShardInfo & shard_info, size_t replica_id)
+    WritingJobContext & context, const Block & block, const Cluster::Address & address, size_t shard_id, size_t job_id)
 {
     auto memory_tracker = current_memory_tracker;
-    return [this, memory_tracker, & done_jobs, & finished_jobs_count, & cond_var, & block, job_id, & shard_info, replica_id]()
+    return [this, memory_tracker, & context, & block, & address, shard_id, job_id]()
     {
         if (!current_memory_tracker)
         {
@@ -92,34 +97,34 @@ ThreadPool::Job DistributedBlockOutputStream::createWritingJob(
         }
         try
         {
-            this->writeToShardSync(block, shard_info, replica_id);
-            ++finished_jobs_count;
-            done_jobs[job_id] = true;
-            cond_var.notify_one();
+            const auto & shard_info = cluster->getShardsInfo()[shard_id];
+            if (address.is_local)
+            {
+                writeToLocal(block, shard_info.getLocalNodeCount());
+                context.done_local_jobs[job_id] = true;
+            }
+            else
+            {
+                writeToShardSync(block, shard_info.hasInternalReplication()
+                                        ? shard_info.dir_name_for_internal_replication
+                                        : address.toStringFull());
+                context.done_remote_jobs[job_id] = true;
+            }
+
+            ++context.finished_jobs_count;
+            context.cond_var.notify_one();
         }
         catch (...)
         {
-            ++finished_jobs_count;
-            cond_var.notify_one();
+            ++context.finished_jobs_count;
+            context.cond_var.notify_one();
             throw;
         }
     };
 }
 
-void DistributedBlockOutputStream::writeToLocal(const Blocks & blocks, size_t & finished_writings_count)
-{
-    const Cluster::ShardsInfo & shards_info = cluster->getShardsInfo();
-    for (size_t shard_id : ext::range(0, shards_info.size()))
-    {
-        const auto & shard_info = shards_info[shard_id];
-        if (shard_info.getLocalNodeCount() > 0)
-            writeToLocal(blocks[shard_id], shard_info.getLocalNodeCount(), finished_writings_count);
-    }
-}
 
-
-std::string DistributedBlockOutputStream::getCurrentStateDescription(
-    const std::vector<bool> & done_jobs, size_t finished_local_nodes_count)
+std::string DistributedBlockOutputStream::getCurrentStateDescription(const WritingJobContext & context)
 {
     const Cluster::ShardsInfo & shards_info = cluster->getShardsInfo();
     String description;
@@ -127,111 +132,154 @@ std::string DistributedBlockOutputStream::getCurrentStateDescription(
 
     buffer << "Insertion status:\n";
 
-    auto writeDescription = [&buffer](const std::string & address, size_t shard_id, size_t blocks_wrote)
+    auto writeDescription = [&buffer](const Cluster::Address & address, size_t shard_id, size_t blocks_wrote)
     {
         buffer << "Wrote " << blocks_wrote << " blocks on shard " << shard_id << " replica ";
-        buffer << unescapeForFileName(address) << '\n';
+        buffer << address.toString() << '\n';
     };
 
-    size_t job_id = 0;
+    const auto addresses_with_failovers = cluster->getShardsAddresses();
+
+    size_t remote_job_id = 0;
+    size_t local_job_id = 0;
     for (size_t shard_id : ext::range(0, shards_info.size()))
     {
         const auto & shard_info = shards_info[shard_id];
-        const auto & local_addresses = shard_info.local_addresses;
-
-        for (const auto & address : local_addresses)
+        /// If hasInternalReplication, than prefer local replica
+        if (!shard_info.hasInternalReplication() || !shard_info.isLocal())
         {
-            writeDescription(address.toStringFull(), shard_id, blocks_inserted + (finished_local_nodes_count ? 1 : 0));
-            if (finished_local_nodes_count)
-                --finished_local_nodes_count;
+            for (const auto & address : addresses_with_failovers[shard_id])
+                if (!address.is_local)
+                {
+                    writeDescription(address, shard_id, blocks_inserted + (context.done_remote_jobs[remote_job_id++] ? 1 : 0));
+                    if (shard_info.hasInternalReplication())
+                        break;
+                }
         }
 
-        for (const auto & dir_name : shard_info.dir_names)
-            writeDescription(dir_name, shard_id, blocks_inserted + (done_jobs[job_id++] ? 1 : 0));
+        if (shard_info.isLocal())
+        {
+            const auto & address = shard_info.local_addresses.front();
+            writeDescription(address, shard_id, blocks_inserted + (context.done_local_jobs[local_job_id++] ? 1 : 0));
+        }
     }
 
     return description;
 }
 
-void DistributedBlockOutputStream::calculateRemoteJobsCount()
+
+void DistributedBlockOutputStream::createWritingJobs(WritingJobContext & context, const Blocks & blocks)
+{
+    const auto & addresses_with_failovers = cluster->getShardsAddresses();
+    const Cluster::ShardsInfo & shards_info = cluster->getShardsInfo();
+
+    size_t remote_job_id = 0;
+    size_t local_job_id = 0;
+    for (size_t shard_id : ext::range(0, blocks.size()))
+    {
+        const auto & shard_info = shards_info[shard_id];
+        /// If hasInternalReplication, than prefer local replica
+        if (!shard_info.hasInternalReplication() || !shard_info.isLocal())
+        {
+            for (const auto & address : addresses_with_failovers[shard_id])
+                if (!address.is_local)
+                {
+                    pool->schedule(createWritingJob(context, blocks[shard_id], address, shard_id, remote_job_id++));
+                    if (shard_info.hasInternalReplication())
+                        break;
+                }
+        }
+
+        if (shards_info[shard_id].isLocal())
+        {
+            const auto & address = shards_info[shard_id].local_addresses.front();
+            pool->schedule(createWritingJob(context, blocks[shard_id], address, shard_id, local_job_id++));
+        }
+    }
+}
+
+
+void DistributedBlockOutputStream::calculateJobsCount()
 {
     remote_jobs_count = 0;
+    local_jobs_count = 0;
+
+    const auto & addresses_with_failovers = cluster->getShardsAddresses();
+
     const auto & shards_info = cluster->getShardsInfo();
-    for (const auto & shard_info : shards_info)
-        remote_jobs_count += shard_info.dir_names.size();
+    for (size_t shard_id : ext::range(0, shards_info.size()))
+    {
+        const auto & shard_info = shards_info[shard_id];
+        /// If hasInternalReplication, than prefer local replica
+        if (!shard_info.hasInternalReplication() || !shard_info.isLocal())
+        {
+            for (const auto & address : addresses_with_failovers[shard_id])
+                if (!address.is_local)
+                {
+                    ++remote_jobs_count;
+                    if (shard_info.hasInternalReplication())
+                        break;
+                }
+        }
+
+        local_jobs_count += shard_info.isLocal() ? 1 : 0;
+    }
 }
 
+
+void DistributedBlockOutputStream::waitForUnfinishedJobs(WritingJobContext & context)
+{
+    std::unique_lock<std::mutex> lock(context.mutex);
+    size_t jobs_count = remote_jobs_count + local_jobs_count;
+    auto cond = [& context, jobs_count] { return context.finished_jobs_count == jobs_count; };
+
+    if (insert_timeout)
+    {
+        if (!context.cond_var.wait_until(lock, deadline, cond))
+        {
+            pool->wait();
+            ProfileEvents::increment(ProfileEvents::DistributedSyncInsertionTimeoutExceeded);
+            throw Exception("Timeout exceeded.", ErrorCodes::TIMEOUT_EXCEEDED);
+        }
+    }
+    else
+        context.cond_var.wait(lock, cond);
+    pool->wait();
+}
+
+
 void DistributedBlockOutputStream::writeSync(const Block & block)
 {
     if (!pool)
     {
         /// Deferred initialization. Only for sync insertion.
-        calculateRemoteJobsCount();
-        pool.emplace(remote_jobs_count);
+        calculateJobsCount();
+        pool.emplace(remote_jobs_count + local_jobs_count);
     }
 
-    std::vector<bool> done_jobs(remote_jobs_count, false);
-    std::atomic<unsigned> finished_jobs_count(0);
-    std::mutex mutex;
-    std::condition_variable cond_var;
+    WritingJobContext context;
+    context.done_remote_jobs.assign(remote_jobs_count, false);
+    context.done_local_jobs.assign(local_jobs_count, false);
+    context.finished_jobs_count = 0;
 
     const Cluster::ShardsInfo & shards_info = cluster->getShardsInfo();
     Blocks blocks = shards_info.size() > 1 ? splitBlock(block) : Blocks({block});
-
-    size_t job_id = 0;
-    for (size_t shard_id : ext::range(0, blocks.size()))
-        for (size_t replica_id: ext::range(0, shards_info[shard_id].dir_names.size()))
-            pool->schedule(createWritingJob(done_jobs, finished_jobs_count, cond_var,
-                                            blocks[shard_id], job_id++, shards_info[shard_id], replica_id));
-
-    const size_t jobs_count = job_id;
-    size_t finished_local_nodes_count;
-    const auto time_point = deadline;
-    auto timeout = insert_timeout;
-    auto & pool = this->pool;
-    auto wait = [& mutex, & cond_var, time_point, & finished_jobs_count, jobs_count, & pool, timeout]()
-    {
-        std::unique_lock<std::mutex> lock(mutex);
-        auto cond = [& finished_jobs_count, jobs_count] { return finished_jobs_count == jobs_count; };
-        if (timeout)
-        {
-            if (!cond_var.wait_until(lock, time_point, cond))
-            {
-                pool->wait();
-                ProfileEvents::increment(ProfileEvents::DistributedSyncInsertionTimeoutExceeded);
-                throw Exception("Timeout exceeded.", ErrorCodes::TIMEOUT_EXCEEDED);
-            }
-        }
-        else
-            cond_var.wait(lock, cond);
-        pool->wait();
-    };
-
-    std::exception_ptr exception;
-    try
-    {
-        writeToLocal(blocks, finished_local_nodes_count);
-    }
-    catch (...)
-    {
-        exception = std::current_exception();
-    }
+    createWritingJobs(context, blocks);
 
     try
     {
-        wait();
-        if (exception)
-            std::rethrow_exception(exception);
+        waitForUnfinishedJobs(context);
     }
     catch(Exception & exception)
     {
-        exception.addMessage(getCurrentStateDescription(done_jobs, finished_local_nodes_count));
+        exception.addMessage(getCurrentStateDescription(context));
         throw;
     }
 
     ++blocks_inserted;
 }
 
+
 IColumn::Selector DistributedBlockOutputStream::createSelector(Block block)
 {
     storage.getShardingKeyExpr()->execute(block);
@@ -288,33 +336,39 @@ Blocks DistributedBlockOutputStream::splitBlock(const Block & block)
 }
 
 
-void DistributedBlockOutputStream::writeSplit(const Block & block)
+void DistributedBlockOutputStream::writeSplitAsync(const Block & block)
 {
     Blocks splitted_blocks = splitBlock(block);
     const size_t num_shards = splitted_blocks.size();
 
     for (size_t shard_idx = 0; shard_idx < num_shards; ++shard_idx)
         if (splitted_blocks[shard_idx].rows())
-            writeImpl(splitted_blocks[shard_idx], shard_idx);
+            writeAsyncImpl(splitted_blocks[shard_idx], shard_idx);
 
     ++blocks_inserted;
 }
 
 
-void DistributedBlockOutputStream::writeImpl(const Block & block, const size_t shard_id)
+void DistributedBlockOutputStream::writeAsyncImpl(const Block & block, const size_t shard_id)
 {
     const auto & shard_info = cluster->getShardsInfo()[shard_id];
-    size_t finished_writings_count = 0;
     if (shard_info.getLocalNodeCount() > 0)
-        writeToLocal(block, shard_info.getLocalNodeCount(), finished_writings_count);
+        writeToLocal(block, shard_info.getLocalNodeCount());
 
-    /// dir_names is empty if shard has only local addresses
-    if (!shard_info.dir_names.empty())
-        writeToShard(block, shard_info.dir_names);
+    if (shard_info.hasInternalReplication())
+        writeToShard(block, {shard_info.dir_name_for_internal_replication});
+    else
+    {
+        std::vector<std::string> dir_names;
+        for (const auto & address : cluster->getShardsAddresses()[shard_id])
+            if (!address.is_local)
+                dir_names.push_back(address.toStringFull());
+        writeToShard(block, dir_names);
+    }
 }
 
 
-void DistributedBlockOutputStream::writeToLocal(const Block & block, const size_t repeats, size_t & finished_writings_count)
+void DistributedBlockOutputStream::writeToLocal(const Block & block, const size_t repeats)
 {
     InterpreterInsertQuery interp{query_ast, storage.context};
 
@@ -322,20 +376,15 @@ void DistributedBlockOutputStream::writeToLocal(const Block & block, const size_
     block_io.out->writePrefix();
 
     for (size_t i = 0; i < repeats; ++i)
-    {
         block_io.out->write(block);
-        ++finished_writings_count;
-    }
 
     block_io.out->writeSuffix();
 }
 
 
-void DistributedBlockOutputStream::writeToShardSync(
-    const Block & block, const Cluster::ShardInfo & shard_info, size_t replica_id)
+void DistributedBlockOutputStream::writeToShardSync(const Block & block, const std::string & connection_pool_name)
 {
-    const auto & dir_name = shard_info.dir_names[replica_id];
-    auto pool = storage.requireConnectionPool(dir_name);
+    auto pool = storage.requireConnectionPool(connection_pool_name);
     auto connection = pool->get();
 
     const auto & query_string = queryToString(query_ast);
diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h
index 3ed4ed04e45..a74cbbc2fcf 100644
--- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h
+++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h
@@ -44,33 +44,45 @@ private:
     /// Performs synchronous insertion to remote nodes. If timeout_exceeded flag was set, throws.
     void writeSync(const Block & block);
 
-    void calculateRemoteJobsCount();
+    void calculateJobsCount();
 
-    ThreadPool::Job createWritingJob(std::vector<bool> & done_jobs, std::atomic<unsigned> & finished_jobs_count,
-                                     std::condition_variable & cond_var, const Block & block, size_t job_id,
-                                     const Cluster::ShardInfo & shard_info, size_t replica_id);
+    struct WritingJobContext
+    {
+        /// Remote job per replica.
+        std::vector<bool> done_remote_jobs;
+        /// Local job per shard.
+        std::vector<bool> done_local_jobs;
+        std::atomic<unsigned> finished_jobs_count;
+        std::mutex mutex;
+        std::condition_variable cond_var;
+    };
 
-    void writeToLocal(const Blocks & blocks, size_t & finished_writings_count);
+    ThreadPool::Job createWritingJob(WritingJobContext & context, const Block & block,
+                                     const Cluster::Address & address, size_t shard_id, size_t job_id);
 
-    /// Returns the number of blocks was read for each cluster node. Uses during exception handling.
-    std::string getCurrentStateDescription(const std::vector<bool> & done_jobs, size_t finished_local_nodes_count);
+    void createWritingJobs(WritingJobContext & context, const Blocks & blocks);
+
+    void waitForUnfinishedJobs(WritingJobContext & context);
+
+    /// Returns the number of blocks was written for each cluster node. Uses during exception handling.
+    std::string getCurrentStateDescription(const WritingJobContext & context);
 
     IColumn::Selector createSelector(Block block);
 
     /// Split block between shards.
     Blocks splitBlock(const Block & block);
 
-    void writeSplit(const Block & block);
+    void writeSplitAsync(const Block & block);
 
-    void writeImpl(const Block & block, const size_t shard_id = 0);
+    void writeAsyncImpl(const Block & block, const size_t shard_id = 0);
 
     /// Increments finished_writings_count after each repeat.
-    void writeToLocal(const Block & block, const size_t repeats, size_t & finished_writings_count);
+    void writeToLocal(const Block & block, const size_t repeats);
 
     void writeToShard(const Block & block, const std::vector<std::string> & dir_names);
 
     /// Performs synchronous insertion to remote node.
-    void writeToShardSync(const Block & block, const Cluster::ShardInfo & shard_info, size_t replica_id);
+    void writeToShardSync(const Block & block, const std::string & connection_pool_name);
 
 private:
     StorageDistributed & storage;
@@ -81,6 +93,7 @@ private:
     size_t blocks_inserted = 0;
     std::chrono::steady_clock::time_point deadline;
     size_t remote_jobs_count;
+    size_t local_jobs_count;
     std::experimental::optional<ThreadPool> pool;
 };
 
diff --git a/dbms/src/Storages/MergeTree/ReshardingWorker.cpp b/dbms/src/Storages/MergeTree/ReshardingWorker.cpp
index 6ef968975c7..7dc8d622aeb 100644
--- a/dbms/src/Storages/MergeTree/ReshardingWorker.cpp
+++ b/dbms/src/Storages/MergeTree/ReshardingWorker.cpp
@@ -1700,17 +1700,7 @@ std::string ReshardingWorker::createCoordinator(const Cluster & cluster)
     if (!cluster.getShardsAddresses().empty())
     {
         size_t shard_no = 0;
-        for (const auto & address : cluster.getShardsAddresses())
-        {
-            publish_address(address.host_name, shard_no);
-            publish_address(address.resolved_address.host().toString(), shard_no);
-            ++shard_no;
-        }
-    }
-    else if (!cluster.getShardsWithFailoverAddresses().empty())
-    {
-        size_t shard_no = 0;
-        for (const auto & addresses : cluster.getShardsWithFailoverAddresses())
+        for (const auto & addresses : cluster.getShardsAddresses())
         {
             for (const auto & address : addresses)
             {
diff --git a/dbms/src/Storages/System/StorageSystemClusters.cpp b/dbms/src/Storages/System/StorageSystemClusters.cpp
index fb3b116030d..628766e0a27 100644
--- a/dbms/src/Storages/System/StorageSystemClusters.cpp
+++ b/dbms/src/Storages/System/StorageSystemClusters.cpp
@@ -72,27 +72,10 @@ BlockInputStreams StorageSystemClusters::read(
     {
         const std::string cluster_name = entry.first;
         const ClusterPtr cluster = entry.second;
-        const auto & addresses = cluster->getShardsAddresses();
-        const auto & addresses_with_failover = cluster->getShardsWithFailoverAddresses();
+        const auto & addresses_with_failover = cluster->getShardsAddresses();
         const auto & shards_info = cluster->getShardsInfo();
 
-        if (!addresses.empty())
-        {
-            auto it1 = addresses.cbegin();
-            auto it2 = shards_info.cbegin();
-
-            while (it1 != addresses.cend())
-            {
-                const auto & address = *it1;
-                const auto & shard_info = *it2;
-
-                updateColumns(cluster_name, shard_info, address);
-
-                ++it1;
-                ++it2;
-            }
-        }
-        else if (!addresses_with_failover.empty())
+        if (!addresses_with_failover.empty())
         {
             auto it1 = addresses_with_failover.cbegin();
             auto it2 = shards_info.cbegin();

From 7029a7c40669ad8e442f9ceba1d0641c586c8eb8 Mon Sep 17 00:00:00 2001
From: Bulat Gaifullin <b.gaifullin@corp.mail.ru>
Date: Fri, 11 Aug 2017 15:13:55 +0300
Subject: [PATCH 117/281] add mailru/go-clickhouse to list of go connectors

---
 docs/en/interfaces/third-party_client_libraries.rst | 1 +
 docs/ru/interfaces/third-party_client_libraries.rst | 1 +
 2 files changed, 2 insertions(+)

diff --git a/docs/en/interfaces/third-party_client_libraries.rst b/docs/en/interfaces/third-party_client_libraries.rst
index c3814194079..e0445008193 100644
--- a/docs/en/interfaces/third-party_client_libraries.rst
+++ b/docs/en/interfaces/third-party_client_libraries.rst
@@ -13,6 +13,7 @@ There exist third-party client libraries for ClickHouse:
 * Go
     - `clickhouse (Go) <https://github.com/kshvakov/clickhouse/>`_
     - `go-clickhouse <https://github.com/roistat/go-clickhouse>`_
+    - `mailru\go-clickhouse <https://github.com/mailru/go-clickhouse>`_
 * NodeJs
     - `clickhouse (NodeJs) <https://github.com/TimonKK/clickhouse>`_
     - `node-clickhouse <https://github.com/apla/node-clickhouse>`_
diff --git a/docs/ru/interfaces/third-party_client_libraries.rst b/docs/ru/interfaces/third-party_client_libraries.rst
index 1e7b391fbd2..94d69595cab 100644
--- a/docs/ru/interfaces/third-party_client_libraries.rst
+++ b/docs/ru/interfaces/third-party_client_libraries.rst
@@ -13,6 +13,7 @@
 * Go
     - `clickhouse (Go) <https://github.com/kshvakov/clickhouse/>`_
     - `go-clickhouse <https://github.com/roistat/go-clickhouse>`_
+    - `mailru\go-clickhouse <https://github.com/mailru/go-clickhouse>`_
 * NodeJs
     - `clickhouse (NodeJs) <https://github.com/TimonKK/clickhouse>`_
     - `node-clickhouse <https://github.com/apla/node-clickhouse>`_

From 37b6c98752b19cc707f851be013ba67a5ec84b73 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 10 Aug 2017 17:46:46 +0300
Subject: [PATCH 118/281] added former WITH clause to SELECT query

---
 dbms/src/Interpreters/ExpressionAnalyzer.cpp  |  2 ++
 dbms/src/Parsers/ASTFunction.cpp              |  3 ++-
 dbms/src/Parsers/ASTFunction.h                |  3 +--
 dbms/src/Parsers/ASTIdentifier.h              |  3 +--
 dbms/src/Parsers/ASTLiteral.cpp               |  2 +-
 dbms/src/Parsers/ASTLiteral.h                 |  3 +--
 dbms/src/Parsers/ASTSelectQuery.cpp           | 10 ++++++++++
 dbms/src/Parsers/ASTSelectQuery.h             |  1 +
 dbms/src/Parsers/ASTSubquery.cpp              |  2 +-
 dbms/src/Parsers/ASTSubquery.h                |  3 +--
 dbms/src/Parsers/ASTWithAlias.h               |  9 ++++++++-
 dbms/src/Parsers/ExpressionElementParsers.cpp |  3 +++
 dbms/src/Parsers/ExpressionElementParsers.h   |  6 ++++--
 dbms/src/Parsers/ExpressionListParsers.cpp    | 10 ++++++----
 dbms/src/Parsers/ExpressionListParsers.h      | 11 ++++++-----
 dbms/src/Parsers/ParserSelectQuery.cpp        | 12 ++++++++++++
 16 files changed, 60 insertions(+), 23 deletions(-)

diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp
index 90638a39437..bce55a70a89 100644
--- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp
@@ -1228,6 +1228,7 @@ static ASTPtr addTypeConversion(std::unique_ptr<ASTLiteral> && ast, const String
     auto func = std::make_shared<ASTFunction>(ast->range);
     ASTPtr res = func;
     func->alias = ast->alias;
+    func->prefer_alias_to_column_name = ast->prefer_alias_to_column_name;
     ast->alias.clear();
     func->kind = ASTFunction::FUNCTION;
     func->name = "CAST";
@@ -1300,6 +1301,7 @@ void ExpressionAnalyzer::executeScalarSubqueriesImpl(ASTPtr & ast)
         {
             auto lit = std::make_unique<ASTLiteral>(ast->range, (*block.safeGetByPosition(0).column)[0]);
             lit->alias = subquery->alias;
+            lit->prefer_alias_to_column_name = subquery->prefer_alias_to_column_name;
             ast = addTypeConversion(std::move(lit), block.safeGetByPosition(0).type->getName());
         }
         else
diff --git a/dbms/src/Parsers/ASTFunction.cpp b/dbms/src/Parsers/ASTFunction.cpp
index dc3605bd94c..1a8766c0d99 100644
--- a/dbms/src/Parsers/ASTFunction.cpp
+++ b/dbms/src/Parsers/ASTFunction.cpp
@@ -1,6 +1,7 @@
 #include <Common/typeid_cast.h>
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTFunction.h>
+#include <Parsers/ASTWithAlias.h>
 #include <IO/WriteHelpers.h>
 #include <IO/WriteBufferFromString.h>
 
@@ -8,7 +9,7 @@
 namespace DB
 {
 
-String ASTFunction::getColumnName() const
+String ASTFunction::getColumnNameImpl() const
 {
     WriteBufferFromOwnString wb;
     writeString(name, wb);
diff --git a/dbms/src/Parsers/ASTFunction.h b/dbms/src/Parsers/ASTFunction.h
index 6f35cde657b..f4e7c32d139 100644
--- a/dbms/src/Parsers/ASTFunction.h
+++ b/dbms/src/Parsers/ASTFunction.h
@@ -34,8 +34,6 @@ public:
     ASTFunction() = default;
     ASTFunction(const StringRange range_) : ASTWithAlias(range_) {}
 
-    String getColumnName() const override;
-
     /** Get text identifying the AST node. */
     String getID() const override;
 
@@ -43,6 +41,7 @@ public:
 
 protected:
     void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
+    String getColumnNameImpl() const override;
 };
 
 
diff --git a/dbms/src/Parsers/ASTIdentifier.h b/dbms/src/Parsers/ASTIdentifier.h
index 8cf866f45fa..1c424f8e50e 100644
--- a/dbms/src/Parsers/ASTIdentifier.h
+++ b/dbms/src/Parsers/ASTIdentifier.h
@@ -29,8 +29,6 @@ public:
     ASTIdentifier(const StringRange range_, const String & name_, const Kind kind_ = Column)
         : ASTWithAlias(range_), name(name_), kind(kind_) {}
 
-    String getColumnName() const override { return name; }
-
     /** Get the text that identifies this element. */
     String getID() const override { return "Identifier_" + name; }
 
@@ -43,6 +41,7 @@ public:
 
 protected:
     void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
+    String getColumnNameImpl() const override { return name; }
 };
 
 }
diff --git a/dbms/src/Parsers/ASTLiteral.cpp b/dbms/src/Parsers/ASTLiteral.cpp
index fe7d0367b97..bd88a90a301 100644
--- a/dbms/src/Parsers/ASTLiteral.cpp
+++ b/dbms/src/Parsers/ASTLiteral.cpp
@@ -8,7 +8,7 @@ namespace DB
 {
 
 
-String ASTLiteral::getColumnName() const
+String ASTLiteral::getColumnNameImpl() const
 {
     /// Special case for very large arrays. Instead of listing all elements, will use hash of them.
     /// (Otherwise column name will be too long, that will lead to significant slowdown of expression analysis.)
diff --git a/dbms/src/Parsers/ASTLiteral.h b/dbms/src/Parsers/ASTLiteral.h
index 6c77f3ce775..d7169f3b472 100644
--- a/dbms/src/Parsers/ASTLiteral.h
+++ b/dbms/src/Parsers/ASTLiteral.h
@@ -18,8 +18,6 @@ public:
     ASTLiteral() = default;
     ASTLiteral(const StringRange range_, const Field & value_) : ASTWithAlias(range_), value(value_) {}
 
-    String getColumnName() const override;
-
     /** Get the text that identifies this element. */
     String getID() const override { return "Literal_" + applyVisitor(FieldVisitorDump(), value); }
 
@@ -30,6 +28,7 @@ protected:
     {
         settings.ostr << applyVisitor(FieldVisitorToString(), value);
     }
+    String getColumnNameImpl() const override;
 };
 
 }
diff --git a/dbms/src/Parsers/ASTSelectQuery.cpp b/dbms/src/Parsers/ASTSelectQuery.cpp
index 1c1ceb958e2..9c47e16bdbc 100644
--- a/dbms/src/Parsers/ASTSelectQuery.cpp
+++ b/dbms/src/Parsers/ASTSelectQuery.cpp
@@ -197,6 +197,7 @@ std::shared_ptr<ASTSelectQuery> ASTSelectQuery::cloneImpl(bool traverse_union_al
         * And if the cloning order does not match the parsing order,
         *  then different servers will get different identifiers.
         */
+    CLONE(with_expression_list)
     CLONE(select_expression_list)
     CLONE(tables)
     CLONE(prewhere_expression)
@@ -232,6 +233,15 @@ void ASTSelectQuery::formatQueryImpl(const FormatSettings & s, FormatState & sta
     frame.need_parens = false;
     std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' ');
 
+    if (with_expression_list)
+    {
+        s.ostr << (s.hilite ? hilite_keyword : "") << indent_str << "WITH " << (s.hilite ? hilite_none : "");
+        s.one_line
+            ? with_expression_list->formatImpl(s, state, frame)
+            : typeid_cast<const ASTExpressionList &>(*with_expression_list).formatImplMultiline(s, state, frame);
+        s.ostr << s.nl_or_ws;
+    }
+
     s.ostr << (s.hilite ? hilite_keyword : "") << indent_str << "SELECT " << (distinct ? "DISTINCT " : "") << (s.hilite ? hilite_none : "");
 
     s.one_line
diff --git a/dbms/src/Parsers/ASTSelectQuery.h b/dbms/src/Parsers/ASTSelectQuery.h
index 7f56e49e60e..5f4898d33d9 100644
--- a/dbms/src/Parsers/ASTSelectQuery.h
+++ b/dbms/src/Parsers/ASTSelectQuery.h
@@ -46,6 +46,7 @@ private:
 
 public:
     bool distinct = false;
+    ASTPtr with_expression_list;
     ASTPtr select_expression_list;
     ASTPtr tables;
     ASTPtr prewhere_expression;
diff --git a/dbms/src/Parsers/ASTSubquery.cpp b/dbms/src/Parsers/ASTSubquery.cpp
index 10bb0825641..337dc6cfc31 100644
--- a/dbms/src/Parsers/ASTSubquery.cpp
+++ b/dbms/src/Parsers/ASTSubquery.cpp
@@ -3,7 +3,7 @@
 namespace DB
 {
 
-String ASTSubquery::getColumnName() const
+String ASTSubquery::getColumnNameImpl() const
 {
     /// This is a hack. We use alias, if available, because otherwise tree could change during analysis.
     return alias.empty() ? getTreeID() : alias;
diff --git a/dbms/src/Parsers/ASTSubquery.h b/dbms/src/Parsers/ASTSubquery.h
index 1588b4954e9..2ec2b4469fa 100644
--- a/dbms/src/Parsers/ASTSubquery.h
+++ b/dbms/src/Parsers/ASTSubquery.h
@@ -31,8 +31,6 @@ public:
         return ptr;
     }
 
-    String getColumnName() const override;
-
 protected:
     void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override
     {
@@ -46,6 +44,7 @@ protected:
         children[0]->formatImpl(settings, state, frame_nested);
         settings.ostr << nl_or_nothing << indent_str << ")";
     }
+    String getColumnNameImpl() const override;
 };
 
 }
diff --git a/dbms/src/Parsers/ASTWithAlias.h b/dbms/src/Parsers/ASTWithAlias.h
index f438c361b9a..2ab863b3132 100644
--- a/dbms/src/Parsers/ASTWithAlias.h
+++ b/dbms/src/Parsers/ASTWithAlias.h
@@ -14,10 +14,14 @@ class ASTWithAlias : public IAST
 public:
     /// The alias, if any, or an empty string.
     String alias;
+    /// If is true, getColumnName returns alias. Uses for aliases in former WITH section of SELECT query.
+    /// Example: 'WITH pow(2, 2) as a SELECT pow(a, 2)' returns 'pow(a, 2)' instead of 'pow(pow(2, 2), 2)'
+    bool prefer_alias_to_column_name = false;
 
     using IAST::IAST;
 
-    String getAliasOrColumnName() const override { return alias.empty() ? getColumnName() : alias; }
+    String getColumnName() const override final { return prefer_alias_to_column_name && !alias.empty() ? alias : getColumnNameImpl(); }
+    String getAliasOrColumnName() const override { return alias.empty() ? getColumnNameImpl() : alias; }
     String tryGetAlias() const override { return alias; }
     void setAlias(const String & to) override { alias = to; }
 
@@ -25,6 +29,9 @@ public:
     void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override final;
 
     virtual void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const = 0;
+
+protected:
+    virtual String getColumnNameImpl() const = 0;
 };
 
 /// helper for setting aliases and chaining result to other functions
diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp
index 0c1fcd08c21..c2203ae4f25 100644
--- a/dbms/src/Parsers/ExpressionElementParsers.cpp
+++ b/dbms/src/Parsers/ExpressionElementParsers.cpp
@@ -767,7 +767,10 @@ bool ParserWithOptionalAliasImpl<ParserAlias>::parseImpl(Pos & pos, ASTPtr & nod
         String alias_name = typeid_cast<ASTIdentifier &>(*alias_node).name;
 
         if (ASTWithAlias * ast_with_alias = dynamic_cast<ASTWithAlias *>(node.get()))
+        {
             ast_with_alias->alias = alias_name;
+            ast_with_alias->prefer_alias_to_column_name = prefer_alias_to_column_name;
+        }
         else
         {
             expected.add(pos, "alias cannot be here");
diff --git a/dbms/src/Parsers/ExpressionElementParsers.h b/dbms/src/Parsers/ExpressionElementParsers.h
index 5f8e6763fc6..523432071a2 100644
--- a/dbms/src/Parsers/ExpressionElementParsers.h
+++ b/dbms/src/Parsers/ExpressionElementParsers.h
@@ -209,11 +209,13 @@ template <typename ParserAlias>
 class ParserWithOptionalAliasImpl : public IParserBase
 {
 public:
-    ParserWithOptionalAliasImpl(ParserPtr && elem_parser_, bool allow_alias_without_as_keyword_)
-        : elem_parser(std::move(elem_parser_)), allow_alias_without_as_keyword(allow_alias_without_as_keyword_) {}
+    ParserWithOptionalAliasImpl(ParserPtr && elem_parser_, bool allow_alias_without_as_keyword_, bool prefer_alias_to_column_name_ = false)
+    : elem_parser(std::move(elem_parser_)), allow_alias_without_as_keyword(allow_alias_without_as_keyword_),
+      prefer_alias_to_column_name(prefer_alias_to_column_name_) {}
 protected:
     ParserPtr elem_parser;
     bool allow_alias_without_as_keyword;
+    bool prefer_alias_to_column_name;
 
     const char * getName() const { return "element of expression with optional alias"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected);
diff --git a/dbms/src/Parsers/ExpressionListParsers.cpp b/dbms/src/Parsers/ExpressionListParsers.cpp
index fc36ad2e251..f615324d979 100644
--- a/dbms/src/Parsers/ExpressionListParsers.cpp
+++ b/dbms/src/Parsers/ExpressionListParsers.cpp
@@ -532,14 +532,16 @@ bool ParserTupleElementExpression::parseImpl(Pos & pos, ASTPtr & node, Expected
 }
 
 
-ParserExpressionWithOptionalAlias::ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword)
-    : impl(std::make_unique<ParserWithOptionalAlias>(std::make_unique<ParserExpression>(), allow_alias_without_as_keyword))
+ParserExpressionWithOptionalAlias::ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword, bool prefer_alias_to_column_name)
+    : impl(std::make_unique<ParserWithOptionalAlias>(std::make_unique<ParserExpression>(),
+                                                     allow_alias_without_as_keyword, prefer_alias_to_column_name))
 {
 }
 
 
 ParserExpressionInCastExpression::ParserExpressionInCastExpression(bool allow_alias_without_as_keyword)
-    : impl(std::make_unique<ParserCastExpressionWithOptionalAlias>(std::make_unique<ParserExpression>(), allow_alias_without_as_keyword))
+    : impl(std::make_unique<ParserCastExpressionWithOptionalAlias>(std::make_unique<ParserExpression>(),
+                                                                   allow_alias_without_as_keyword, false))
 {
 }
 
@@ -547,7 +549,7 @@ ParserExpressionInCastExpression::ParserExpressionInCastExpression(bool allow_al
 bool ParserExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     return ParserList(
-        std::make_unique<ParserExpressionWithOptionalAlias>(allow_alias_without_as_keyword),
+        std::make_unique<ParserExpressionWithOptionalAlias>(allow_alias_without_as_keyword, prefer_alias_to_column_name),
         std::make_unique<ParserToken>(TokenType::Comma))
         .parse(pos, node, expected);
 }
diff --git a/dbms/src/Parsers/ExpressionListParsers.h b/dbms/src/Parsers/ExpressionListParsers.h
index 73ac7dd63af..87240bea12f 100644
--- a/dbms/src/Parsers/ExpressionListParsers.h
+++ b/dbms/src/Parsers/ExpressionListParsers.h
@@ -311,7 +311,7 @@ using ParserExpression = ParserLambdaExpression;
 class ParserExpressionWithOptionalAlias : public IParserBase
 {
 public:
-    ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword);
+    ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword, bool prefer_alias_to_column_name_ = false);
 protected:
     ParserPtr impl;
 
@@ -343,11 +343,12 @@ protected:
 class ParserExpressionList : public IParserBase
 {
 public:
-    ParserExpressionList(bool allow_alias_without_as_keyword_)
-        : allow_alias_without_as_keyword(allow_alias_without_as_keyword_) {}
+    ParserExpressionList(bool allow_alias_without_as_keyword_, bool prefer_alias_to_column_name_ = false)
+    : allow_alias_without_as_keyword(allow_alias_without_as_keyword_), prefer_alias_to_column_name(prefer_alias_to_column_name_) {}
 
 protected:
     bool allow_alias_without_as_keyword;
+    bool prefer_alias_to_column_name;
 
     const char * getName() const { return "list of expressions"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected);
@@ -357,8 +358,8 @@ protected:
 class ParserNotEmptyExpressionList : public IParserBase
 {
 public:
-    ParserNotEmptyExpressionList(bool allow_alias_without_as_keyword)
-        : nested_parser(allow_alias_without_as_keyword) {}
+    ParserNotEmptyExpressionList(bool allow_alias_without_as_keyword, bool prefer_alias_to_column_name = false)
+    : nested_parser(allow_alias_without_as_keyword, prefer_alias_to_column_name) {}
 private:
     ParserExpressionList nested_parser;
 protected:
diff --git a/dbms/src/Parsers/ParserSelectQuery.cpp b/dbms/src/Parsers/ParserSelectQuery.cpp
index 8befc1c4c5f..26b5de0d8f3 100644
--- a/dbms/src/Parsers/ParserSelectQuery.cpp
+++ b/dbms/src/Parsers/ParserSelectQuery.cpp
@@ -42,10 +42,20 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     ParserKeyword s_by("BY");
 
     ParserNotEmptyExpressionList exp_list(false);
+    ParserNotEmptyExpressionList exp_list_for_former_with_clause(false, true); /// Set prefer_alias_to_column_name for each alias.
     ParserNotEmptyExpressionList exp_list_for_select_clause(true);    /// Allows aliases without AS keyword.
     ParserExpression exp_elem;
     ParserOrderByExpressionList order_list;
 
+    /// WITH expr list
+    {
+        if (s_with.ignore(pos, expected))
+        {
+            if (!exp_list_for_former_with_clause.parse(pos, select_query->with_expression_list, expected))
+                return false;
+        }
+    }
+
     /// SELECT [DISTINCT] expr list
     {
         if (!s_select.ignore(pos, expected))
@@ -175,6 +185,8 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 
     select_query->range = StringRange(begin, pos);
 
+    if (select_query->with_expression_list)
+        select_query->children.push_back(select_query->with_expression_list);
     select_query->children.push_back(select_query->select_expression_list);
     if (select_query->tables)
         select_query->children.push_back(select_query->tables);

From ac87cb035f2d47242c0e1540db6255418cc562c7 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 10 Aug 2017 17:48:13 +0300
Subject: [PATCH 119/281] added test to former WITH clause in SELECT query

---
 .../tests/queries/0_stateless/00490_with_select.reference | 6 ++++++
 dbms/tests/queries/0_stateless/00490_with_select.sql      | 8 ++++++++
 2 files changed, 14 insertions(+)
 create mode 100644 dbms/tests/queries/0_stateless/00490_with_select.reference
 create mode 100644 dbms/tests/queries/0_stateless/00490_with_select.sql

diff --git a/dbms/tests/queries/0_stateless/00490_with_select.reference b/dbms/tests/queries/0_stateless/00490_with_select.reference
new file mode 100644
index 00000000000..889d98612c0
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00490_with_select.reference
@@ -0,0 +1,6 @@
+16	2	4
+16	4
+16	2	4
+16	4
+string_abc
+string_abc
diff --git a/dbms/tests/queries/0_stateless/00490_with_select.sql b/dbms/tests/queries/0_stateless/00490_with_select.sql
new file mode 100644
index 00000000000..c2190187f61
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00490_with_select.sql
@@ -0,0 +1,8 @@
+with pow(2,2) as four select pow(four, 2), 2 as two, pow(two, 2);
+select `pow(four, 2)`, `pow(2, 2)` from (with pow(2,2) as four select pow(four, 2), 2 as two, pow(two, 2));
+with (select pow(2,2)) as four select pow(four, 2), 2 as two, pow(two, 2);
+select `pow(four, 2)`, `pow(2, 2)` from (with (select pow(2,2)) as four select pow(four, 2), 2 as two, pow(two, 2));
+with 'string' as str select str || '_abc';
+select `concat(str, \'_abc\')` from (with 'string' as str select str || '_abc');
+
+

From b7f8ac1f2db2553fb02779a2b13ee09e6e99c9af Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 11 Aug 2017 21:13:47 +0300
Subject: [PATCH 120/281] rewrote rewriting of SELECT query in StorageMerge
 with WITH statement

---
 dbms/src/Common/VirtualColumnUtils.cpp        | 26 +++++++------------
 .../test_merge_table_over_distributed/test.py | 12 +++++++++
 2 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/dbms/src/Common/VirtualColumnUtils.cpp b/dbms/src/Common/VirtualColumnUtils.cpp
index 35b1c2cdd06..448281deeda 100644
--- a/dbms/src/Common/VirtualColumnUtils.cpp
+++ b/dbms/src/Common/VirtualColumnUtils.cpp
@@ -74,25 +74,17 @@ String chooseSuffixForSet(const NamesAndTypesList & columns, const std::vector<S
 void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & value)
 {
     ASTSelectQuery & select = typeid_cast<ASTSelectQuery &>(*ast);
-    ASTExpressionList & node = typeid_cast<ASTExpressionList &>(*select.select_expression_list);
-    ASTs & asts = node.children;
-    auto cur = std::make_shared<ASTLiteral>(StringRange(), value);
-    cur->alias = column_name;
-    ASTPtr column_value = cur;
-    bool is_replaced = false;
-    for (size_t i = 0; i < asts.size(); ++i)
+    if (!select.with_expression_list)
     {
-        if (const ASTIdentifier * identifier = typeid_cast<const ASTIdentifier *>(&* asts[i]))
-        {
-            if (identifier->kind == ASTIdentifier::Kind::Column && identifier->name == column_name)
-            {
-                asts[i] = column_value;
-                is_replaced = true;
-            }
-        }
+        select.with_expression_list = std::make_shared<ASTExpressionList>();
+        select.children.insert(select.children.begin(), select.with_expression_list);
     }
-    if (!is_replaced)
-        asts.insert(asts.begin(), column_value);
+
+    ASTExpressionList & with = typeid_cast<ASTExpressionList &>(*select.with_expression_list);
+    auto literal = std::make_shared<ASTLiteral>(StringRange(), value);
+    literal->alias = column_name;
+    literal->prefer_alias_to_column_name = true;
+    with.children.push_back(literal);
 }
 
 /// Verifying that the function depends only on the specified columns
diff --git a/dbms/tests/integration/test_merge_table_over_distributed/test.py b/dbms/tests/integration/test_merge_table_over_distributed/test.py
index 238993d23a0..a5b186f57e5 100644
--- a/dbms/tests/integration/test_merge_table_over_distributed/test.py
+++ b/dbms/tests/integration/test_merge_table_over_distributed/test.py
@@ -17,6 +17,7 @@ def started_cluster():
         for node in (node1, node2):
             node.query('''
 CREATE TABLE local_table(id UInt32, val String) ENGINE = TinyLog;
+CREATE TABLE local_table_2(id UInt32, val String) ENGINE = TinyLog;
 ''')
 
         node1.query("INSERT INTO local_table VALUES (1, 'node1')")
@@ -24,6 +25,7 @@ CREATE TABLE local_table(id UInt32, val String) ENGINE = TinyLog;
 
         node1.query('''
 CREATE TABLE distributed_table(id UInt32, val String) ENGINE = Distributed(test_cluster, default, local_table);
+CREATE TABLE distributed_table_2(id UInt32, val String) ENGINE = Distributed(test_cluster, default, local_table_2);
 CREATE TABLE merge_table(id UInt32, val String) ENGINE = Merge(default, '^distributed_table')
 ''')
 
@@ -49,6 +51,16 @@ def test_filtering(started_cluster):
 
     assert node1.query("SELECT id + 1 FROM merge_table WHERE val = 'node1'").rstrip() == '2'
 
+
+def test_select_table_name_from_merge_over_distributed(started_cluster):
+
+    node1.query("INSERT INTO local_table_2 VALUES (1, 'node1')")
+    node2.query("INSERT INTO local_table_2 VALUES (2, 'node2')")
+
+    node1.query("select _table == 'distributed_table' from merge_table")
+    node1.query("select * from (select _table == 'distributed_table' from merge_table limit 1)")
+
+
 if __name__ == '__main__':
     with contextmanager(started_cluster)() as cluster:
         for name, instance in cluster.instances.items():

From 04dcec32b4652aeb7189535c28d20aa395b000ad Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Sat, 12 Aug 2017 02:25:03 +0300
Subject: [PATCH 121/281] Specialized implementation of groupArray() for Date
 and DateTime. [#CLICKHOUSE-3213]

---
 .../AggregateFunctionGroupArray.cpp           | 42 +++++++++++--------
 .../AggregateFunctionGroupArray.h             |  6 ++-
 dbms/src/AggregateFunctions/Helpers.h         |  2 +-
 .../0_stateless/00113_group_array.reference   |  3 ++
 .../queries/0_stateless/00113_group_array.sql |  4 ++
 5 files changed, 37 insertions(+), 20 deletions(-)

diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
index 549ac309b6d..8098304d75f 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
+++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
@@ -8,6 +8,28 @@ namespace DB
 namespace
 {
 
+template <template <typename, typename> class AggregateFunctionTemplate, class Data, typename ... TArgs>
+static IAggregateFunction * createWithNumericOrTimeType(const IDataType & argument_type, TArgs && ... args)
+{
+         if (typeid_cast<const DataTypeDate     *>(&argument_type)) return new AggregateFunctionTemplate<UInt16, Data>(std::forward<TArgs>(args)...);
+    else if (typeid_cast<const DataTypeDateTime *>(&argument_type)) return new AggregateFunctionTemplate<UInt32, Data>(std::forward<TArgs>(args)...);
+    else return createWithNumericType<AggregateFunctionTemplate, Data, TArgs...>(argument_type, std::forward<TArgs>(args)...);
+}
+
+
+template <typename TLimit_size, typename ... TArgs>
+inline AggregateFunctionPtr createAggregateFunctionGroupArrayImpl(const DataTypePtr & argument_type, TArgs ... args)
+{
+    if (auto res = createWithNumericOrTimeType<GroupArrayNumericImpl, TLimit_size>(*argument_type, argument_type, std::forward<TArgs>(args)...))
+        return AggregateFunctionPtr(res);
+
+    if (typeid_cast<const DataTypeString *>(argument_type.get()))
+        return std::make_shared<GroupArrayGeneralListImpl<NodeString, TLimit_size::value>>(std::forward<TArgs>(args)...);
+
+    return std::make_shared<GroupArrayGeneralListImpl<NodeGeneral, TLimit_size::value>>(std::forward<TArgs>(args)...);
+};
+
+
 static AggregateFunctionPtr createAggregateFunctionGroupArray(const std::string & name, const DataTypes & argument_types, const Array & parameters)
 {
     if (argument_types.size() != 1)
@@ -15,7 +37,7 @@ static AggregateFunctionPtr createAggregateFunctionGroupArray(const std::string
             ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
     bool limit_size = false;
-    UInt64 max_elems = 0;
+    UInt64 max_elems = std::numeric_limits<UInt64>::max();
 
     if (parameters.empty())
     {
@@ -39,23 +61,9 @@ static AggregateFunctionPtr createAggregateFunctionGroupArray(const std::string
             ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
     if (!limit_size)
-    {
-        if (auto res = createWithNumericType<GroupArrayNumericImpl, std::false_type>(*argument_types[0]))
-            return AggregateFunctionPtr(res);
-        else if (typeid_cast<const DataTypeString *>(argument_types[0].get()))
-            return std::make_shared<GroupArrayGeneralListImpl<NodeString, false>>();
-        else
-            return std::make_shared<GroupArrayGeneralListImpl<NodeGeneral, false>>();
-    }
+        return createAggregateFunctionGroupArrayImpl<std::false_type>(argument_types[0]);
     else
-    {
-        if (auto res = createWithNumericType<GroupArrayNumericImpl, std::true_type>(*argument_types[0], max_elems))
-            return AggregateFunctionPtr(res);
-        else if (typeid_cast<const DataTypeString *>(argument_types[0].get()))
-            return std::make_shared<GroupArrayGeneralListImpl<NodeString, true>>(max_elems);
-        else
-            return std::make_shared<GroupArrayGeneralListImpl<NodeGeneral, true>>(max_elems);
-    }
+        return createAggregateFunctionGroupArrayImpl<std::true_type>(argument_types[0], max_elems);
 }
 
 }
diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h
index 8da72755021..b7db1f393e5 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h
@@ -51,16 +51,18 @@ class GroupArrayNumericImpl final
     : public IUnaryAggregateFunction<GroupArrayNumericData<T>, GroupArrayNumericImpl<T, Tlimit_num_elems>>
 {
     static constexpr bool limit_num_elems = Tlimit_num_elems::value;
+    DataTypePtr data_type;
     UInt64 max_elems;
 
 public:
-    GroupArrayNumericImpl(UInt64 max_elems_ = std::numeric_limits<UInt64>::max()) : max_elems(max_elems_) {}
+    explicit GroupArrayNumericImpl(const DataTypePtr & data_type_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
+            : data_type(data_type_), max_elems(max_elems_) {}
 
     String getName() const override { return "groupArray"; }
 
     DataTypePtr getReturnType() const override
     {
-        return std::make_shared<DataTypeArray>(std::make_shared<DataTypeNumber<T>>());
+        return std::make_shared<DataTypeArray>(data_type);
     }
 
     void setArgument(const DataTypePtr & argument) {}
diff --git a/dbms/src/AggregateFunctions/Helpers.h b/dbms/src/AggregateFunctions/Helpers.h
index 3eec0358aa0..d57218888da 100644
--- a/dbms/src/AggregateFunctions/Helpers.h
+++ b/dbms/src/AggregateFunctions/Helpers.h
@@ -54,7 +54,7 @@ static IAggregateFunction * createWithNumericType(const IDataType & argument_typ
 }
 
 template <template <typename, typename> class AggregateFunctionTemplate, class Data, typename ... TArgs>
-static IAggregateFunction * createWithNumericType(const IDataType & argument_type, TArgs ... args)
+static IAggregateFunction * createWithNumericType(const IDataType & argument_type, TArgs && ... args)
 {
          if (typeid_cast<const DataTypeUInt8    *>(&argument_type)) return new AggregateFunctionTemplate<UInt8, Data>(std::forward<TArgs>(args)...);
     else if (typeid_cast<const DataTypeUInt16   *>(&argument_type)) return new AggregateFunctionTemplate<UInt16, Data>(std::forward<TArgs>(args)...);
diff --git a/dbms/tests/queries/0_stateless/00113_group_array.reference b/dbms/tests/queries/0_stateless/00113_group_array.reference
index 85bb5df9d23..2031ee49589 100644
--- a/dbms/tests/queries/0_stateless/00113_group_array.reference
+++ b/dbms/tests/queries/0_stateless/00113_group_array.reference
@@ -10,6 +10,9 @@
 9	100
 
 0	1000000
+
+21
+41
 1000000	500000500000	1000000
 1000000	500000500000	1000000
 1000000	500000500000	1000000	500000500000
diff --git a/dbms/tests/queries/0_stateless/00113_group_array.sql b/dbms/tests/queries/0_stateless/00113_group_array.sql
index de7923d2434..cfdb7b6b6fd 100644
--- a/dbms/tests/queries/0_stateless/00113_group_array.sql
+++ b/dbms/tests/queries/0_stateless/00113_group_array.sql
@@ -1,5 +1,9 @@
 SELECT intDiv(number, 100) AS k, length(groupArray(number)) FROM (SELECT * FROM system.numbers LIMIT 1000000) GROUP BY k WITH TOTALS ORDER BY k LIMIT 10;
 
+SELECT '';
+SELECT length(toString(groupArrayState(toDate(number)))) FROM (SELECT * FROM system.numbers LIMIT 10);
+SELECT length(toString(groupArrayState(toDateTime(number)))) FROM (SELECT * FROM system.numbers LIMIT 10);
+
 DROP TABLE IF EXISTS test.numbers_mt;
 CREATE TABLE test.numbers_mt (number UInt64) ENGINE = Log;
 INSERT INTO test.numbers_mt SELECT * FROM system.numbers LIMIT 1, 1000000;

From f36caa9c9916766303b78c5e918ad38a05c48eae Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Sat, 12 Aug 2017 03:12:43 +0300
Subject: [PATCH 122/281] Parameter rename. [#CLICKHOUSE-3213]

---
 .../AggregateFunctions/AggregateFunctionGroupArray.cpp    | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
index 8098304d75f..12151e4b771 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
+++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
@@ -17,16 +17,16 @@ static IAggregateFunction * createWithNumericOrTimeType(const IDataType & argume
 }
 
 
-template <typename TLimit_size, typename ... TArgs>
+template <typename has_limit, typename ... TArgs>
 inline AggregateFunctionPtr createAggregateFunctionGroupArrayImpl(const DataTypePtr & argument_type, TArgs ... args)
 {
-    if (auto res = createWithNumericOrTimeType<GroupArrayNumericImpl, TLimit_size>(*argument_type, argument_type, std::forward<TArgs>(args)...))
+    if (auto res = createWithNumericOrTimeType<GroupArrayNumericImpl, has_limit>(*argument_type, argument_type, std::forward<TArgs>(args)...))
         return AggregateFunctionPtr(res);
 
     if (typeid_cast<const DataTypeString *>(argument_type.get()))
-        return std::make_shared<GroupArrayGeneralListImpl<NodeString, TLimit_size::value>>(std::forward<TArgs>(args)...);
+        return std::make_shared<GroupArrayGeneralListImpl<NodeString, has_limit::value>>(std::forward<TArgs>(args)...);
 
-    return std::make_shared<GroupArrayGeneralListImpl<NodeGeneral, TLimit_size::value>>(std::forward<TArgs>(args)...);
+    return std::make_shared<GroupArrayGeneralListImpl<NodeGeneral, has_limit::value>>(std::forward<TArgs>(args)...);
 };
 
 

From 1c8a280d63e1509da3ed24b6aa3c35328fbe5495 Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Sat, 12 Aug 2017 23:46:22 +0300
Subject: [PATCH 123/281] Fixed escapeForFileName(). [#CLICKHOUSE-2]

---
 dbms/src/Common/escapeForFileName.cpp               |  5 +++--
 dbms/src/Common/tests/gtest_unescapeForFileName.cpp | 13 +++++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)
 create mode 100644 dbms/src/Common/tests/gtest_unescapeForFileName.cpp

diff --git a/dbms/src/Common/escapeForFileName.cpp b/dbms/src/Common/escapeForFileName.cpp
index 2aae8c4d75c..e326c442cc6 100644
--- a/dbms/src/Common/escapeForFileName.cpp
+++ b/dbms/src/Common/escapeForFileName.cpp
@@ -39,15 +39,16 @@ std::string unescapeForFileName(const std::string & s)
     while (pos != end)
     {
         if (!(*pos == '%' && pos + 2 < end))
+        {
             res += *pos;
+            ++pos;
+        }
         else
         {
             ++pos;
             res += unhex2(pos);
             pos += 2;
         }
-
-        ++pos;
     }
     return res;
 }
diff --git a/dbms/src/Common/tests/gtest_unescapeForFileName.cpp b/dbms/src/Common/tests/gtest_unescapeForFileName.cpp
new file mode 100644
index 00000000000..a97a0a78ecd
--- /dev/null
+++ b/dbms/src/Common/tests/gtest_unescapeForFileName.cpp
@@ -0,0 +1,13 @@
+#include <Common/escapeForFileName.h>
+#include <gtest/gtest.h>
+
+using namespace DB;
+
+
+TEST(Common, unescapeForFileName)
+{
+    EXPECT_EQ(unescapeForFileName(escapeForFileName("172.19.0.6")), "172.19.0.6");
+    EXPECT_EQ(unescapeForFileName(escapeForFileName("abcd.")), "abcd.");
+    EXPECT_EQ(unescapeForFileName(escapeForFileName("abcd")), "abcd");
+    EXPECT_EQ(unescapeForFileName(escapeForFileName("..::")), "..::");
+}

From 1e1dc8828abb2a6f6e3d6d6c8436f78d5741f6c4 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 13 Aug 2017 00:02:45 +0300
Subject: [PATCH 124/281] Miscellaneous [#CLICKHOUSE-2].

---
 .../DataStreams/AggregatingSortedBlockInputStream.h  |  2 +-
 .../tests/logical_functions_performance.cpp          | 12 ++++++------
 .../src/TableFunctions/getStructureOfRemoteTable.cpp |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/dbms/src/DataStreams/AggregatingSortedBlockInputStream.h b/dbms/src/DataStreams/AggregatingSortedBlockInputStream.h
index 455af621df1..4f1ee6424b3 100644
--- a/dbms/src/DataStreams/AggregatingSortedBlockInputStream.h
+++ b/dbms/src/DataStreams/AggregatingSortedBlockInputStream.h
@@ -16,7 +16,7 @@ namespace DB
   * During this for each group of consecutive identical values of the primary key (the columns by which the data is sorted),
   * merges them into one row. When merging, the data is pre-aggregated - merge of states of aggregate functions,
   * corresponding to a one value of the primary key. For columns that are not part of the primary key and which do not have the AggregateFunction type,
-  * when merged, the first random value is selected.
+  * when merged, the first value is selected.
   */
 class AggregatingSortedBlockInputStream : public MergingSortedBlockInputStream
 {
diff --git a/dbms/src/Functions/tests/logical_functions_performance.cpp b/dbms/src/Functions/tests/logical_functions_performance.cpp
index c315e00b131..09cf1784a3c 100644
--- a/dbms/src/Functions/tests/logical_functions_performance.cpp
+++ b/dbms/src/Functions/tests/logical_functions_performance.cpp
@@ -315,13 +315,13 @@ public:
 };
 
 
-struct NameAnd    { static const char * get() { return "and"; } };
-struct NameOr    { static const char * get() { return "or"; } };
-struct NameXor    { static const char * get() { return "xor"; } };
+struct NameAnd { static const char * get() { return "and"; } };
+struct NameOr { static const char * get() { return "or"; } };
+struct NameXor { static const char * get() { return "xor"; } };
 
-using FunctionAnd = FunctionAnyArityLogical    <AndImpl,    NameAnd>;
-using FunctionOr = FunctionAnyArityLogical    <OrImpl,    NameOr>    ;
-using FunctionXor = FunctionAnyArityLogical    <XorImpl,    NameXor>;
+using FunctionAnd = FunctionAnyArityLogical<AndImpl, NameAnd>;
+using FunctionOr = FunctionAnyArityLogical<OrImpl, NameOr>    ;
+using FunctionXor = FunctionAnyArityLogical<XorImpl, NameXor>;
 }
 
 using namespace DB;
diff --git a/dbms/src/TableFunctions/getStructureOfRemoteTable.cpp b/dbms/src/TableFunctions/getStructureOfRemoteTable.cpp
index 11acaa6121a..64afb0af67e 100644
--- a/dbms/src/TableFunctions/getStructureOfRemoteTable.cpp
+++ b/dbms/src/TableFunctions/getStructureOfRemoteTable.cpp
@@ -28,7 +28,7 @@ NamesAndTypesList getStructureOfRemoteTable(
     Settings settings = context.getSettings();
     NamesAndTypesList res;
 
-    /// Send to the first random remote shard.
+    /// Send to the first any remote shard.
     const auto & shard_info = cluster.getAnyShardInfo();
 
     if (shard_info.isLocal())

From 133be4d7396c4dd1f337ac096c0af7c449b83410 Mon Sep 17 00:00:00 2001
From: Alexey Zatelepin <ztlpn@yandex-team.ru>
Date: Fri, 28 Jul 2017 15:58:24 +0300
Subject: [PATCH 125/281] move and rename things [#CLICKHOUSE-3151]

---
 ...Constructor.cpp => AlterStreamFactory.cpp} | 10 ++--
 ...ueryConstructor.h => AlterStreamFactory.h} |  6 +--
 ...structor.cpp => DescribeStreamFactory.cpp} | 10 ++--
 ...yConstructor.h => DescribeStreamFactory.h} |  6 +--
 .../{IQueryConstructor.h => IStreamFactory.h} |  4 +-
 dbms/src/Interpreters/ClusterProxy/Query.h    | 46 -------------------
 ...onstructor.cpp => SelectStreamFactory.cpp} | 12 ++---
 ...eryConstructor.h => SelectStreamFactory.h} |  6 +--
 .../{Query.cpp => executeQuery.cpp}           | 25 ++++------
 .../Interpreters/ClusterProxy/executeQuery.h  | 29 ++++++++++++
 dbms/src/Storages/StorageDistributed.cpp      | 26 +++++------
 11 files changed, 79 insertions(+), 101 deletions(-)
 rename dbms/src/Interpreters/ClusterProxy/{AlterQueryConstructor.cpp => AlterStreamFactory.cpp} (80%)
 rename dbms/src/Interpreters/ClusterProxy/{AlterQueryConstructor.h => AlterStreamFactory.h} (81%)
 rename dbms/src/Interpreters/ClusterProxy/{DescribeQueryConstructor.cpp => DescribeStreamFactory.cpp} (84%)
 rename dbms/src/Interpreters/ClusterProxy/{DescribeQueryConstructor.h => DescribeStreamFactory.h} (80%)
 rename dbms/src/Interpreters/ClusterProxy/{IQueryConstructor.h => IStreamFactory.h} (95%)
 delete mode 100644 dbms/src/Interpreters/ClusterProxy/Query.h
 rename dbms/src/Interpreters/ClusterProxy/{SelectQueryConstructor.cpp => SelectStreamFactory.cpp} (82%)
 rename dbms/src/Interpreters/ClusterProxy/{SelectQueryConstructor.h => SelectStreamFactory.h} (87%)
 rename dbms/src/Interpreters/ClusterProxy/{Query.cpp => executeQuery.cpp} (78%)
 create mode 100644 dbms/src/Interpreters/ClusterProxy/executeQuery.h

diff --git a/dbms/src/Interpreters/ClusterProxy/AlterQueryConstructor.cpp b/dbms/src/Interpreters/ClusterProxy/AlterStreamFactory.cpp
similarity index 80%
rename from dbms/src/Interpreters/ClusterProxy/AlterQueryConstructor.cpp
rename to dbms/src/Interpreters/ClusterProxy/AlterStreamFactory.cpp
index 783a665eaf1..698e0f6729a 100644
--- a/dbms/src/Interpreters/ClusterProxy/AlterQueryConstructor.cpp
+++ b/dbms/src/Interpreters/ClusterProxy/AlterStreamFactory.cpp
@@ -1,4 +1,4 @@
-#include <Interpreters/ClusterProxy/AlterQueryConstructor.h>
+#include <Interpreters/ClusterProxy/AlterStreamFactory.h>
 #include <Interpreters/InterpreterAlterQuery.h>
 #include <DataStreams/RemoteBlockInputStream.h>
 #include <DataStreams/LazyBlockInputStream.h>
@@ -16,7 +16,7 @@ constexpr PoolMode pool_mode = PoolMode::GET_ONE;
 namespace ClusterProxy
 {
 
-BlockInputStreamPtr AlterQueryConstructor::createLocal(const ASTPtr & query_ast, const Context & context, const Cluster::Address & address)
+BlockInputStreamPtr AlterStreamFactory::createLocal(const ASTPtr & query_ast, const Context & context, const Cluster::Address & address)
 {
     /// The ALTER query may be a resharding query that is a part of a distributed
     /// job. Since the latter heavily relies on synchronization among its participating
@@ -31,7 +31,7 @@ BlockInputStreamPtr AlterQueryConstructor::createLocal(const ASTPtr & query_ast,
     return stream;
 }
 
-BlockInputStreamPtr AlterQueryConstructor::createRemote(
+BlockInputStreamPtr AlterStreamFactory::createRemote(
         const ConnectionPoolWithFailoverPtr & pool, const std::string & query,
         const Settings & settings, ThrottlerPtr throttler, const Context & context)
 {
@@ -40,7 +40,7 @@ BlockInputStreamPtr AlterQueryConstructor::createRemote(
     return stream;
 }
 
-BlockInputStreamPtr AlterQueryConstructor::createRemote(
+BlockInputStreamPtr AlterStreamFactory::createRemote(
         ConnectionPoolWithFailoverPtrs && pools, const std::string & query,
         const Settings & settings, ThrottlerPtr throttler, const Context & context)
 {
@@ -49,7 +49,7 @@ BlockInputStreamPtr AlterQueryConstructor::createRemote(
     return stream;
 }
 
-PoolMode AlterQueryConstructor::getPoolMode() const
+PoolMode AlterStreamFactory::getPoolMode() const
 {
     return pool_mode;
 }
diff --git a/dbms/src/Interpreters/ClusterProxy/AlterQueryConstructor.h b/dbms/src/Interpreters/ClusterProxy/AlterStreamFactory.h
similarity index 81%
rename from dbms/src/Interpreters/ClusterProxy/AlterQueryConstructor.h
rename to dbms/src/Interpreters/ClusterProxy/AlterStreamFactory.h
index 675809e3c0d..b0ea2e9b674 100644
--- a/dbms/src/Interpreters/ClusterProxy/AlterQueryConstructor.h
+++ b/dbms/src/Interpreters/ClusterProxy/AlterStreamFactory.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Interpreters/ClusterProxy/IQueryConstructor.h>
+#include <Interpreters/ClusterProxy/IStreamFactory.h>
 
 namespace DB
 {
@@ -8,10 +8,10 @@ namespace DB
 namespace ClusterProxy
 {
 
-class AlterQueryConstructor final : public IQueryConstructor
+class AlterStreamFactory final : public IStreamFactory
 {
 public:
-    AlterQueryConstructor() = default;
+    AlterStreamFactory() = default;
 
     BlockInputStreamPtr createLocal(const ASTPtr & query_ast, const Context & context, const Cluster::Address & address) override;
     BlockInputStreamPtr createRemote(
diff --git a/dbms/src/Interpreters/ClusterProxy/DescribeQueryConstructor.cpp b/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.cpp
similarity index 84%
rename from dbms/src/Interpreters/ClusterProxy/DescribeQueryConstructor.cpp
rename to dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.cpp
index 9f7966a573c..7dd2ff67a5b 100644
--- a/dbms/src/Interpreters/ClusterProxy/DescribeQueryConstructor.cpp
+++ b/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.cpp
@@ -1,4 +1,4 @@
-#include <Interpreters/ClusterProxy/DescribeQueryConstructor.h>
+#include <Interpreters/ClusterProxy/DescribeStreamFactory.h>
 #include <Interpreters/InterpreterDescribeQuery.h>
 #include <DataStreams/MaterializingBlockInputStream.h>
 #include <DataStreams/BlockExtraInfoInputStream.h>
@@ -28,7 +28,7 @@ BlockExtraInfo toBlockExtraInfo(const Cluster::Address & address)
 namespace ClusterProxy
 {
 
-BlockInputStreamPtr DescribeQueryConstructor::createLocal(const ASTPtr & query_ast, const Context & context, const Cluster::Address & address)
+BlockInputStreamPtr DescribeStreamFactory::createLocal(const ASTPtr & query_ast, const Context & context, const Cluster::Address & address)
 {
     InterpreterDescribeQuery interpreter{query_ast, context};
     BlockInputStreamPtr stream = interpreter.execute().in;
@@ -42,7 +42,7 @@ BlockInputStreamPtr DescribeQueryConstructor::createLocal(const ASTPtr & query_a
     return std::make_shared<BlockExtraInfoInputStream>(materialized_stream, toBlockExtraInfo(address));
 }
 
-BlockInputStreamPtr DescribeQueryConstructor::createRemote(
+BlockInputStreamPtr DescribeStreamFactory::createRemote(
         const ConnectionPoolWithFailoverPtr & pool, const std::string & query,
         const Settings & settings, ThrottlerPtr throttler, const Context & context)
 {
@@ -52,7 +52,7 @@ BlockInputStreamPtr DescribeQueryConstructor::createRemote(
     return stream;
 }
 
-BlockInputStreamPtr DescribeQueryConstructor::createRemote(
+BlockInputStreamPtr DescribeStreamFactory::createRemote(
         ConnectionPoolWithFailoverPtrs && pools, const std::string & query,
         const Settings & settings, ThrottlerPtr throttler, const Context & context)
 {
@@ -62,7 +62,7 @@ BlockInputStreamPtr DescribeQueryConstructor::createRemote(
     return stream;
 }
 
-PoolMode DescribeQueryConstructor::getPoolMode() const
+PoolMode DescribeStreamFactory::getPoolMode() const
 {
     return pool_mode;
 }
diff --git a/dbms/src/Interpreters/ClusterProxy/DescribeQueryConstructor.h b/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.h
similarity index 80%
rename from dbms/src/Interpreters/ClusterProxy/DescribeQueryConstructor.h
rename to dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.h
index 861a8a152d2..8f0f7f6e4f0 100644
--- a/dbms/src/Interpreters/ClusterProxy/DescribeQueryConstructor.h
+++ b/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Interpreters/ClusterProxy/IQueryConstructor.h>
+#include <Interpreters/ClusterProxy/IStreamFactory.h>
 
 namespace DB
 {
@@ -8,10 +8,10 @@ namespace DB
 namespace ClusterProxy
 {
 
-class DescribeQueryConstructor final : public IQueryConstructor
+class DescribeStreamFactory final : public IStreamFactory
 {
 public:
-    DescribeQueryConstructor() = default;
+    DescribeStreamFactory() = default;
 
     BlockInputStreamPtr createLocal(const ASTPtr & query_ast, const Context & context, const Cluster::Address & address) override;
     BlockInputStreamPtr createRemote(
diff --git a/dbms/src/Interpreters/ClusterProxy/IQueryConstructor.h b/dbms/src/Interpreters/ClusterProxy/IStreamFactory.h
similarity index 95%
rename from dbms/src/Interpreters/ClusterProxy/IQueryConstructor.h
rename to dbms/src/Interpreters/ClusterProxy/IStreamFactory.h
index bb8fb136224..0181aa33c2d 100644
--- a/dbms/src/Interpreters/ClusterProxy/IQueryConstructor.h
+++ b/dbms/src/Interpreters/ClusterProxy/IStreamFactory.h
@@ -18,10 +18,10 @@ namespace ClusterProxy
 
 /// Base class for the implementation of the details of distributed query
 /// execution that are specific to the query type.
-class IQueryConstructor
+class IStreamFactory
 {
 public:
-    virtual ~IQueryConstructor() {}
+    virtual ~IStreamFactory() {}
 
     /// Create an input stream for local query execution.
     virtual BlockInputStreamPtr createLocal(const ASTPtr & query_ast, const Context & context, const Cluster::Address & address) = 0;
diff --git a/dbms/src/Interpreters/ClusterProxy/Query.h b/dbms/src/Interpreters/ClusterProxy/Query.h
deleted file mode 100644
index bf3a3998054..00000000000
--- a/dbms/src/Interpreters/ClusterProxy/Query.h
+++ /dev/null
@@ -1,46 +0,0 @@
-#pragma once
-
-#include <Parsers/IAST.h>
-#include <Storages/IStorage.h>
-#include <Interpreters/Cluster.h>
-
-namespace DB
-{
-
-struct Settings;
-class Context;
-class Cluster;
-
-namespace ClusterProxy
-{
-
-class IQueryConstructor;
-
-/// This class is designed for distributed queries execution. It hides from
-/// the caller the details about the actual locations at which a distributed
-/// query is performed. Depending on the type of query to be performed,
-/// (currently SELECT, DESCRIBE, or ALTER (for resharding)), a so-called
-/// query constructor is specified. Such an object states, among other things,
-/// how connections must be allocated for remote execution.
-class Query
-{
-public:
-    Query(IQueryConstructor & query_constructor_, const ClusterPtr & cluster_,
-        const ASTPtr & query_ast_, const Context & context_, const Settings & settings_, bool enable_shard_multiplexing_);
-
-    /// For each location at which we perform the query, create an input stream
-    /// from which we can fetch the result.
-    BlockInputStreams execute();
-
-private:
-    IQueryConstructor & query_constructor;
-    ClusterPtr cluster;
-    ASTPtr query_ast;
-    const Context & context;
-    const Settings & settings;
-    bool enable_shard_multiplexing;
-};
-
-}
-
-}
diff --git a/dbms/src/Interpreters/ClusterProxy/SelectQueryConstructor.cpp b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
similarity index 82%
rename from dbms/src/Interpreters/ClusterProxy/SelectQueryConstructor.cpp
rename to dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
index a827f89b270..384164d95bb 100644
--- a/dbms/src/Interpreters/ClusterProxy/SelectQueryConstructor.cpp
+++ b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
@@ -1,4 +1,4 @@
-#include <Interpreters/ClusterProxy/SelectQueryConstructor.h>
+#include <Interpreters/ClusterProxy/SelectStreamFactory.h>
 #include <Interpreters/InterpreterSelectQuery.h>
 #include <DataStreams/RemoteBlockInputStream.h>
 #include <DataStreams/MaterializingBlockInputStream.h>
@@ -16,7 +16,7 @@ constexpr PoolMode pool_mode = PoolMode::GET_MANY;
 namespace ClusterProxy
 {
 
-SelectQueryConstructor::SelectQueryConstructor(
+SelectStreamFactory::SelectStreamFactory(
         QueryProcessingStage::Enum processed_stage_,
         QualifiedTableName main_table_,
         const Tables & external_tables_)
@@ -26,7 +26,7 @@ SelectQueryConstructor::SelectQueryConstructor(
 {
 }
 
-BlockInputStreamPtr SelectQueryConstructor::createLocal(const ASTPtr & query_ast, const Context & context, const Cluster::Address & address)
+BlockInputStreamPtr SelectStreamFactory::createLocal(const ASTPtr & query_ast, const Context & context, const Cluster::Address & address)
 {
     InterpreterSelectQuery interpreter{query_ast, context, processed_stage};
     BlockInputStreamPtr stream = interpreter.execute().in;
@@ -38,7 +38,7 @@ BlockInputStreamPtr SelectQueryConstructor::createLocal(const ASTPtr & query_ast
     return std::make_shared<MaterializingBlockInputStream>(stream);
 }
 
-BlockInputStreamPtr SelectQueryConstructor::createRemote(
+BlockInputStreamPtr SelectStreamFactory::createRemote(
         const ConnectionPoolWithFailoverPtr & pool, const std::string & query,
         const Settings & settings, ThrottlerPtr throttler, const Context & context)
 {
@@ -48,7 +48,7 @@ BlockInputStreamPtr SelectQueryConstructor::createRemote(
     return stream;
 }
 
-BlockInputStreamPtr SelectQueryConstructor::createRemote(
+BlockInputStreamPtr SelectStreamFactory::createRemote(
         ConnectionPoolWithFailoverPtrs && pools, const std::string & query,
         const Settings & settings, ThrottlerPtr throttler, const Context & context)
 {
@@ -58,7 +58,7 @@ BlockInputStreamPtr SelectQueryConstructor::createRemote(
     return stream;
 }
 
-PoolMode SelectQueryConstructor::getPoolMode() const
+PoolMode SelectStreamFactory::getPoolMode() const
 {
     return pool_mode;
 }
diff --git a/dbms/src/Interpreters/ClusterProxy/SelectQueryConstructor.h b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.h
similarity index 87%
rename from dbms/src/Interpreters/ClusterProxy/SelectQueryConstructor.h
rename to dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.h
index cb428e87633..d4b4f82a659 100644
--- a/dbms/src/Interpreters/ClusterProxy/SelectQueryConstructor.h
+++ b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Interpreters/ClusterProxy/IQueryConstructor.h>
+#include <Interpreters/ClusterProxy/IStreamFactory.h>
 #include <Core/QueryProcessingStage.h>
 #include <Storages/IStorage.h>
 
@@ -10,10 +10,10 @@ namespace DB
 namespace ClusterProxy
 {
 
-class SelectQueryConstructor final : public IQueryConstructor
+class SelectStreamFactory final : public IStreamFactory
 {
 public:
-    SelectQueryConstructor(
+    SelectStreamFactory(
             QueryProcessingStage::Enum processed_stage,
             QualifiedTableName main_table,
             const Tables & external_tables);
diff --git a/dbms/src/Interpreters/ClusterProxy/Query.cpp b/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp
similarity index 78%
rename from dbms/src/Interpreters/ClusterProxy/Query.cpp
rename to dbms/src/Interpreters/ClusterProxy/executeQuery.cpp
index 2f16b3466bb..4d656014b59 100644
--- a/dbms/src/Interpreters/ClusterProxy/Query.cpp
+++ b/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -1,5 +1,5 @@
-#include <Interpreters/ClusterProxy/Query.h>
-#include <Interpreters/ClusterProxy/IQueryConstructor.h>
+#include <Interpreters/ClusterProxy/executeQuery.h>
+#include <Interpreters/ClusterProxy/IStreamFactory.h>
 #include <Interpreters/Settings.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/Cluster.h>
@@ -14,14 +14,9 @@ namespace DB
 namespace ClusterProxy
 {
 
-Query::Query(IQueryConstructor & query_constructor_, const ClusterPtr & cluster_,
-    const ASTPtr & query_ast_, const Context & context_, const Settings & settings_, bool enable_shard_multiplexing_)
-    : query_constructor{query_constructor_}, cluster{cluster_}, query_ast{query_ast_},
-    context{context_}, settings{settings_}, enable_shard_multiplexing{enable_shard_multiplexing_}
-{
-}
-
-BlockInputStreams Query::execute()
+BlockInputStreams executeQuery(
+        IStreamFactory & stream_factory, const ClusterPtr & cluster,
+        const ASTPtr & query_ast, const Context & context, const Settings & settings, bool enable_shard_multiplexing)
 {
     BlockInputStreams res;
 
@@ -53,7 +48,7 @@ BlockInputStreams Query::execute()
 
     size_t remote_count = 0;
 
-    if (query_constructor.getPoolMode() == PoolMode::GET_ALL)
+    if (stream_factory.getPoolMode() == PoolMode::GET_ALL)
     {
         for (const auto & shard_info : cluster->getShardsInfo())
         {
@@ -87,7 +82,7 @@ BlockInputStreams Query::execute()
         bool create_local_queries = shard_info.isLocal();
 
         bool create_remote_queries;
-        if (query_constructor.getPoolMode() == PoolMode::GET_ALL)
+        if (stream_factory.getPoolMode() == PoolMode::GET_ALL)
             create_remote_queries = shard_info.hasRemoteConnections();
         else
             create_remote_queries = !create_local_queries;
@@ -101,7 +96,7 @@ BlockInputStreams Query::execute()
 
             for (const auto & address : shard_info.local_addresses)
             {
-                BlockInputStreamPtr stream = query_constructor.createLocal(query_ast, new_context, address);
+                BlockInputStreamPtr stream = stream_factory.createLocal(query_ast, new_context, address);
                 if (stream)
                     res.emplace_back(stream);
             }
@@ -114,7 +109,7 @@ BlockInputStreams Query::execute()
 
             if (actual_pools_per_thread == 1)
             {
-                res.emplace_back(query_constructor.createRemote(shard_info.pool, query, new_settings, throttler, context));
+                res.emplace_back(stream_factory.createRemote(shard_info.pool, query, new_settings, throttler, context));
                 ++current_thread;
             }
             else
@@ -122,7 +117,7 @@ BlockInputStreams Query::execute()
                 pools.push_back(shard_info.pool);
                 if (pools.size() == actual_pools_per_thread)
                 {
-                    res.emplace_back(query_constructor.createRemote(std::move(pools), query, new_settings, throttler, context));
+                    res.emplace_back(stream_factory.createRemote(std::move(pools), query, new_settings, throttler, context));
                     pools = ConnectionPoolWithFailoverPtrs();
                     ++current_thread;
                 }
diff --git a/dbms/src/Interpreters/ClusterProxy/executeQuery.h b/dbms/src/Interpreters/ClusterProxy/executeQuery.h
new file mode 100644
index 00000000000..28181fa0e42
--- /dev/null
+++ b/dbms/src/Interpreters/ClusterProxy/executeQuery.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <Parsers/IAST.h>
+#include <Storages/IStorage.h>
+#include <Interpreters/Cluster.h>
+
+namespace DB
+{
+
+struct Settings;
+class Context;
+class Cluster;
+
+namespace ClusterProxy
+{
+
+class IStreamFactory;
+
+/// Execute a distributed query, creating a vector of BlockInputStreams, from which the result can be read.
+/// If `enable_shard_multiplexing` is false, each stream corresponds to a single shard.
+/// `stream_factory` object encapsulates the logic of creating streams for a different type of query
+/// (currently SELECT, DESCRIBE, or ALTER (for resharding)).
+BlockInputStreams executeQuery(
+        IStreamFactory & stream_factory, const ClusterPtr & cluster,
+        const ASTPtr & query_ast, const Context & context, const Settings & settings, bool enable_shard_multiplexing);
+
+}
+
+}
diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp
index 160d50e304b..7e084c6d476 100644
--- a/dbms/src/Storages/StorageDistributed.cpp
+++ b/dbms/src/Storages/StorageDistributed.cpp
@@ -28,10 +28,10 @@
 #include <Interpreters/InterpreterAlterQuery.h>
 #include <Interpreters/InterpreterDescribeQuery.h>
 #include <Interpreters/ExpressionAnalyzer.h>
-#include <Interpreters/ClusterProxy/Query.h>
-#include <Interpreters/ClusterProxy/SelectQueryConstructor.h>
-#include <Interpreters/ClusterProxy/DescribeQueryConstructor.h>
-#include <Interpreters/ClusterProxy/AlterQueryConstructor.h>
+#include <Interpreters/ClusterProxy/executeQuery.h>
+#include <Interpreters/ClusterProxy/SelectStreamFactory.h>
+#include <Interpreters/ClusterProxy/DescribeStreamFactory.h>
+#include <Interpreters/ClusterProxy/AlterStreamFactory.h>
 
 #include <Core/Field.h>
 
@@ -222,11 +222,11 @@ BlockInputStreams StorageDistributed::read(
     //bool enable_shard_multiplexing = !(ast.order_expression_list && !ast.group_expression_list);
     bool enable_shard_multiplexing = false;
 
-    ClusterProxy::SelectQueryConstructor select_query_constructor(
+    ClusterProxy::SelectStreamFactory select_stream_factory(
         processed_stage,  QualifiedTableName{remote_database, remote_table}, external_tables);
 
-    return ClusterProxy::Query{select_query_constructor, cluster, modified_query_ast,
-        context, settings, enable_shard_multiplexing}.execute();
+    return ClusterProxy::executeQuery(
+            select_stream_factory, cluster, modified_query_ast, context, settings, enable_shard_multiplexing);
 }
 
 
@@ -356,10 +356,10 @@ void StorageDistributed::reshardPartitions(
         */
         bool enable_shard_multiplexing = false;
 
-        ClusterProxy::AlterQueryConstructor alter_query_constructor;
+        ClusterProxy::AlterStreamFactory alter_stream_factory;
 
-        BlockInputStreams streams = ClusterProxy::Query{alter_query_constructor, cluster, alter_query_ptr,
-            context, context.getSettingsRef(), enable_shard_multiplexing}.execute();
+        BlockInputStreams streams = ClusterProxy::executeQuery(
+                alter_stream_factory, cluster, alter_query_ptr, context, context.getSettingsRef(), enable_shard_multiplexing);
 
         /// This callback is called if an exception has occurred while attempting to read
         /// a block from a shard. This is to avoid a potential deadlock if other shards are
@@ -434,10 +434,10 @@ BlockInputStreams StorageDistributed::describe(const Context & context, const Se
       */
     bool enable_shard_multiplexing = false;
 
-    ClusterProxy::DescribeQueryConstructor describe_query_constructor;
+    ClusterProxy::DescribeStreamFactory describe_stream_factory;
 
-    return ClusterProxy::Query{describe_query_constructor, cluster, describe_query_ptr,
-        context, settings, enable_shard_multiplexing}.execute();
+    return ClusterProxy::executeQuery(
+            describe_stream_factory, cluster, describe_query_ptr, context, settings, enable_shard_multiplexing);
 }
 
 

From ee457eca8a37a211e32c903de7987059234d18ae Mon Sep 17 00:00:00 2001
From: Alexey Zatelepin <ztlpn@yandex-team.ru>
Date: Mon, 31 Jul 2017 17:07:40 +0300
Subject: [PATCH 126/281] remove shard multiplexing code, simplify
 [#CLICKHOUSE-3151]

---
 .../DataStreams/RemoteBlockInputStream.cpp    | 13 ---
 dbms/src/DataStreams/RemoteBlockInputStream.h |  8 --
 .../ClusterProxy/AlterStreamFactory.cpp       | 67 ++++++---------
 .../ClusterProxy/AlterStreamFactory.h         | 13 ++-
 .../ClusterProxy/DescribeStreamFactory.cpp    | 61 +++++--------
 .../ClusterProxy/DescribeStreamFactory.h      | 13 ++-
 .../ClusterProxy/IStreamFactory.h             | 17 ++--
 .../ClusterProxy/SelectStreamFactory.cpp      | 63 +++++---------
 .../ClusterProxy/SelectStreamFactory.h        | 13 ++-
 .../ClusterProxy/executeQuery.cpp             | 85 ++-----------------
 .../Interpreters/ClusterProxy/executeQuery.h  |  2 +-
 dbms/src/Storages/StorageDistributed.cpp      | 25 +-----
 12 files changed, 97 insertions(+), 283 deletions(-)

diff --git a/dbms/src/DataStreams/RemoteBlockInputStream.cpp b/dbms/src/DataStreams/RemoteBlockInputStream.cpp
index 4709ec37759..c1f67ba31e7 100644
--- a/dbms/src/DataStreams/RemoteBlockInputStream.cpp
+++ b/dbms/src/DataStreams/RemoteBlockInputStream.cpp
@@ -34,15 +34,6 @@ RemoteBlockInputStream::RemoteBlockInputStream(const ConnectionPoolWithFailoverP
     init(settings_);
 }
 
-RemoteBlockInputStream::RemoteBlockInputStream(ConnectionPoolWithFailoverPtrs && pools_, const String & query_,
-    const Settings * settings_, const Context & context_, ThrottlerPtr throttler_,
-    const Tables & external_tables_, QueryProcessingStage::Enum stage_)
-    : pools(std::move(pools_)), query(query_), throttler(throttler_), external_tables(external_tables_),
-    stage(stage_), context(context_)
-{
-    init(settings_);
-}
-
 RemoteBlockInputStream::~RemoteBlockInputStream()
 {
     /** If interrupted in the middle of the loop of communication with replicas, then interrupt
@@ -233,10 +224,6 @@ void RemoteBlockInputStream::createMultiplexedConnections()
         multiplexed_connections = std::make_unique<MultiplexedConnections>(
                 *pool, multiplexed_connections_settings, throttler,
                 append_extra_info, pool_mode, main_table_ptr);
-    else if (!pools.empty())
-        multiplexed_connections = std::make_unique<MultiplexedConnections>(
-                pools, multiplexed_connections_settings, throttler,
-                append_extra_info, pool_mode, main_table_ptr);
     else
         throw Exception("Internal error", ErrorCodes::LOGICAL_ERROR);
 }
diff --git a/dbms/src/DataStreams/RemoteBlockInputStream.h b/dbms/src/DataStreams/RemoteBlockInputStream.h
index 22fde419ea3..77f404391ae 100644
--- a/dbms/src/DataStreams/RemoteBlockInputStream.h
+++ b/dbms/src/DataStreams/RemoteBlockInputStream.h
@@ -30,11 +30,6 @@ public:
         const Context & context_, ThrottlerPtr throttler_ = nullptr, const Tables & external_tables_ = Tables(),
         QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
 
-    /// Takes a pool for each shard and gets one or several connections from it
-    RemoteBlockInputStream(ConnectionPoolWithFailoverPtrs && pools_, const String & query_, const Settings * settings_,
-        const Context & context_, ThrottlerPtr throttler_ = nullptr, const Tables & external_tables_ = Tables(),
-        QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
-
     ~RemoteBlockInputStream() override;
 
     /// Specify how we allocate connections on a shard.
@@ -101,9 +96,6 @@ private:
     /// One shard's connections pool
     ConnectionPoolWithFailoverPtr pool = nullptr;
 
-    /// Connections pools of one or several shards
-    ConnectionPoolWithFailoverPtrs pools;
-
     std::unique_ptr<MultiplexedConnections> multiplexed_connections;
 
     const String query;
diff --git a/dbms/src/Interpreters/ClusterProxy/AlterStreamFactory.cpp b/dbms/src/Interpreters/ClusterProxy/AlterStreamFactory.cpp
index 698e0f6729a..511d21c1628 100644
--- a/dbms/src/Interpreters/ClusterProxy/AlterStreamFactory.cpp
+++ b/dbms/src/Interpreters/ClusterProxy/AlterStreamFactory.cpp
@@ -6,54 +6,35 @@
 namespace DB
 {
 
-namespace
-{
-
-constexpr PoolMode pool_mode = PoolMode::GET_ONE;
-
-}
-
 namespace ClusterProxy
 {
 
-BlockInputStreamPtr AlterStreamFactory::createLocal(const ASTPtr & query_ast, const Context & context, const Cluster::Address & address)
+void AlterStreamFactory::createForShard(
+        const Cluster::ShardInfo & shard_info,
+        const String & query, const ASTPtr & query_ast,
+        const Context & context, const ThrottlerPtr & throttler,
+        BlockInputStreams & res)
 {
-    /// The ALTER query may be a resharding query that is a part of a distributed
-    /// job. Since the latter heavily relies on synchronization among its participating
-    /// nodes, it is very important to defer the execution of a local query so as
-    /// to prevent any deadlock.
-    auto interpreter = std::make_shared<InterpreterAlterQuery>(query_ast, context);
-    auto stream = std::make_shared<LazyBlockInputStream>(
-        [interpreter]() mutable
-        {
-            return interpreter->execute().in;
-        });
-    return stream;
-}
-
-BlockInputStreamPtr AlterStreamFactory::createRemote(
-        const ConnectionPoolWithFailoverPtr & pool, const std::string & query,
-        const Settings & settings, ThrottlerPtr throttler, const Context & context)
-{
-    auto stream = std::make_shared<RemoteBlockInputStream>(pool, query, &settings, context, throttler);
-    stream->setPoolMode(pool_mode);
-    return stream;
-}
-
-BlockInputStreamPtr AlterStreamFactory::createRemote(
-        ConnectionPoolWithFailoverPtrs && pools, const std::string & query,
-        const Settings & settings, ThrottlerPtr throttler, const Context & context)
-{
-    auto stream = std::make_shared<RemoteBlockInputStream>(std::move(pools), query, &settings, context, throttler);
-    stream->setPoolMode(pool_mode);
-    return stream;
-}
-
-PoolMode AlterStreamFactory::getPoolMode() const
-{
-    return pool_mode;
+    if (shard_info.isLocal())
+    {
+        /// The ALTER query may be a resharding query that is a part of a distributed
+        /// job. Since the latter heavily relies on synchronization among its participating
+        /// nodes, it is very important to defer the execution of a local query so as
+        /// to prevent any deadlock.
+        auto interpreter = std::make_shared<InterpreterAlterQuery>(query_ast, context);
+        res.emplace_back(std::make_shared<LazyBlockInputStream>(
+            [interpreter]() mutable
+            {
+                return interpreter->execute().in;
+            }));
+    }
+    else
+    {
+        auto stream = std::make_shared<RemoteBlockInputStream>(shard_info.pool, query, &context.getSettingsRef(), context, throttler);
+        stream->setPoolMode(PoolMode::GET_ONE);
+        res.emplace_back(std::move(stream));
+    }
 }
 
 }
-
 }
diff --git a/dbms/src/Interpreters/ClusterProxy/AlterStreamFactory.h b/dbms/src/Interpreters/ClusterProxy/AlterStreamFactory.h
index b0ea2e9b674..ff545f2da99 100644
--- a/dbms/src/Interpreters/ClusterProxy/AlterStreamFactory.h
+++ b/dbms/src/Interpreters/ClusterProxy/AlterStreamFactory.h
@@ -13,14 +13,11 @@ class AlterStreamFactory final : public IStreamFactory
 public:
     AlterStreamFactory() = default;
 
-    BlockInputStreamPtr createLocal(const ASTPtr & query_ast, const Context & context, const Cluster::Address & address) override;
-    BlockInputStreamPtr createRemote(
-            const ConnectionPoolWithFailoverPtr & pool, const std::string & query,
-            const Settings & settings, ThrottlerPtr throttler, const Context & context) override;
-    BlockInputStreamPtr createRemote(
-            ConnectionPoolWithFailoverPtrs && pools, const std::string & query,
-            const Settings & settings, ThrottlerPtr throttler, const Context & context) override;
-    PoolMode getPoolMode() const override;
+    virtual void createForShard(
+            const Cluster::ShardInfo & shard_info,
+            const String & query, const ASTPtr & query_ast,
+            const Context & context, const ThrottlerPtr & throttler,
+            BlockInputStreams & res) override;
 };
 
 }
diff --git a/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.cpp b/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.cpp
index 7dd2ff67a5b..4677e458bab 100644
--- a/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.cpp
+++ b/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.cpp
@@ -10,8 +10,6 @@ namespace DB
 namespace
 {
 
-constexpr PoolMode pool_mode = PoolMode::GET_ALL;
-
 BlockExtraInfo toBlockExtraInfo(const Cluster::Address & address)
 {
     BlockExtraInfo block_extra_info;
@@ -28,46 +26,31 @@ BlockExtraInfo toBlockExtraInfo(const Cluster::Address & address)
 namespace ClusterProxy
 {
 
-BlockInputStreamPtr DescribeStreamFactory::createLocal(const ASTPtr & query_ast, const Context & context, const Cluster::Address & address)
+void DescribeStreamFactory::createForShard(
+        const Cluster::ShardInfo & shard_info,
+        const String & query, const ASTPtr & query_ast,
+        const Context & context, const ThrottlerPtr & throttler,
+        BlockInputStreams & res)
 {
-    InterpreterDescribeQuery interpreter{query_ast, context};
-    BlockInputStreamPtr stream = interpreter.execute().in;
-
-    /** Materialization is needed, since from remote servers the constants come materialized.
-      * If you do not do this, different types (Const and non-Const) columns will be produced in different threads,
-      * And this is not allowed, since all code is based on the assumption that in the block stream all types are the same.
-      */
-    BlockInputStreamPtr materialized_stream = std::make_shared<MaterializingBlockInputStream>(stream);
-
-    return std::make_shared<BlockExtraInfoInputStream>(materialized_stream, toBlockExtraInfo(address));
-}
-
-BlockInputStreamPtr DescribeStreamFactory::createRemote(
-        const ConnectionPoolWithFailoverPtr & pool, const std::string & query,
-        const Settings & settings, ThrottlerPtr throttler, const Context & context)
-{
-    auto stream = std::make_shared<RemoteBlockInputStream>(pool, query, &settings, context, throttler);
-    stream->setPoolMode(pool_mode);
-    stream->appendExtraInfo();
-    return stream;
-}
-
-BlockInputStreamPtr DescribeStreamFactory::createRemote(
-        ConnectionPoolWithFailoverPtrs && pools, const std::string & query,
-        const Settings & settings, ThrottlerPtr throttler, const Context & context)
-{
-    auto stream =  std::make_shared<RemoteBlockInputStream>(std::move(pools), query, &settings, context, throttler);
-    stream->setPoolMode(pool_mode);
-    stream->appendExtraInfo();
-    return stream;
-}
-
-PoolMode DescribeStreamFactory::getPoolMode() const
-{
-    return pool_mode;
-}
+    for (const Cluster::Address & local_address : shard_info.local_addresses)
+    {
+        InterpreterDescribeQuery interpreter{query_ast, context};
+        BlockInputStreamPtr stream = interpreter.execute().in;
 
+        /** Materialization is needed, since from remote servers the constants come materialized.
+         * If you do not do this, different types (Const and non-Const) columns will be produced in different threads,
+         * And this is not allowed, since all code is based on the assumption that in the block stream all types are the same.
+         */
+        BlockInputStreamPtr materialized_stream = std::make_shared<MaterializingBlockInputStream>(stream);
+        res.emplace_back(std::make_shared<BlockExtraInfoInputStream>(materialized_stream, toBlockExtraInfo(local_address)));
+    }
 
+    auto remote_stream = std::make_shared<RemoteBlockInputStream>(
+            shard_info.pool, query, &context.getSettingsRef(), context, throttler);
+    remote_stream->setPoolMode(PoolMode::GET_ALL);
+    remote_stream->appendExtraInfo();
+    res.emplace_back(std::move(remote_stream));
 }
 
 }
+}
diff --git a/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.h b/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.h
index 8f0f7f6e4f0..5489b1f1816 100644
--- a/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.h
+++ b/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.h
@@ -13,14 +13,11 @@ class DescribeStreamFactory final : public IStreamFactory
 public:
     DescribeStreamFactory() = default;
 
-    BlockInputStreamPtr createLocal(const ASTPtr & query_ast, const Context & context, const Cluster::Address & address) override;
-    BlockInputStreamPtr createRemote(
-            const ConnectionPoolWithFailoverPtr & pool, const std::string & query,
-            const Settings & settings, ThrottlerPtr throttler, const Context & context) override;
-    BlockInputStreamPtr createRemote(
-            ConnectionPoolWithFailoverPtrs && pools, const std::string & query,
-            const Settings & settings, ThrottlerPtr throttler, const Context & context) override;
-    PoolMode getPoolMode() const override;
+    virtual void createForShard(
+            const Cluster::ShardInfo & shard_info,
+            const String & query, const ASTPtr & query_ast,
+            const Context & context, const ThrottlerPtr & throttler,
+            BlockInputStreams & res) override;
 };
 
 }
diff --git a/dbms/src/Interpreters/ClusterProxy/IStreamFactory.h b/dbms/src/Interpreters/ClusterProxy/IStreamFactory.h
index 0181aa33c2d..7c60c81cca6 100644
--- a/dbms/src/Interpreters/ClusterProxy/IStreamFactory.h
+++ b/dbms/src/Interpreters/ClusterProxy/IStreamFactory.h
@@ -23,18 +23,11 @@ class IStreamFactory
 public:
     virtual ~IStreamFactory() {}
 
-    /// Create an input stream for local query execution.
-    virtual BlockInputStreamPtr createLocal(const ASTPtr & query_ast, const Context & context, const Cluster::Address & address) = 0;
-    /// Create an input stream for remote query execution on one shard.
-    virtual BlockInputStreamPtr createRemote(
-            const ConnectionPoolWithFailoverPtr & pool, const std::string & query,
-            const Settings & settings, ThrottlerPtr throttler, const Context & context) = 0;
-    /// Create an input stream for remote query execution on one or more shards.
-    virtual BlockInputStreamPtr createRemote(
-            ConnectionPoolWithFailoverPtrs && pools, const std::string & query,
-            const Settings & new_settings, ThrottlerPtr throttler, const Context & context) = 0;
-    /// Specify how we allocate connections on a shard.
-    virtual PoolMode getPoolMode() const = 0;
+    virtual void createForShard(
+            const Cluster::ShardInfo & shard_info,
+            const String & query, const ASTPtr & query_ast, const Context & context,
+            const ThrottlerPtr & throttler,
+            BlockInputStreams & res) = 0;
 };
 
 }
diff --git a/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
index 384164d95bb..f3c7b87e71e 100644
--- a/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
+++ b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
@@ -6,13 +6,6 @@
 namespace DB
 {
 
-namespace
-{
-
-constexpr PoolMode pool_mode = PoolMode::GET_MANY;
-
-}
-
 namespace ClusterProxy
 {
 
@@ -26,43 +19,31 @@ SelectStreamFactory::SelectStreamFactory(
 {
 }
 
-BlockInputStreamPtr SelectStreamFactory::createLocal(const ASTPtr & query_ast, const Context & context, const Cluster::Address & address)
+void SelectStreamFactory::createForShard(
+        const Cluster::ShardInfo & shard_info,
+        const String & query, const ASTPtr & query_ast,
+        const Context & context, const ThrottlerPtr & throttler,
+        BlockInputStreams & res)
 {
-    InterpreterSelectQuery interpreter{query_ast, context, processed_stage};
-    BlockInputStreamPtr stream = interpreter.execute().in;
+    if (shard_info.isLocal())
+    {
+        InterpreterSelectQuery interpreter{query_ast, context, processed_stage};
+        BlockInputStreamPtr stream = interpreter.execute().in;
 
-    /** Materialization is needed, since from remote servers the constants come materialized.
-      * If you do not do this, different types (Const and non-Const) columns will be produced in different threads,
-      * And this is not allowed, since all code is based on the assumption that in the block stream all types are the same.
-      */
-    return std::make_shared<MaterializingBlockInputStream>(stream);
-}
-
-BlockInputStreamPtr SelectStreamFactory::createRemote(
-        const ConnectionPoolWithFailoverPtr & pool, const std::string & query,
-        const Settings & settings, ThrottlerPtr throttler, const Context & context)
-{
-    auto stream = std::make_shared<RemoteBlockInputStream>(pool, query, &settings, context, throttler, external_tables, processed_stage);
-    stream->setPoolMode(pool_mode);
-    stream->setMainTable(main_table);
-    return stream;
-}
-
-BlockInputStreamPtr SelectStreamFactory::createRemote(
-        ConnectionPoolWithFailoverPtrs && pools, const std::string & query,
-        const Settings & settings, ThrottlerPtr throttler, const Context & context)
-{
-    auto stream = std::make_shared<RemoteBlockInputStream>(std::move(pools), query, &settings, context, throttler, external_tables, processed_stage);
-    stream->setPoolMode(pool_mode);
-    stream->setMainTable(main_table);
-    return stream;
-}
-
-PoolMode SelectStreamFactory::getPoolMode() const
-{
-    return pool_mode;
+        /** Materialization is needed, since from remote servers the constants come materialized.
+         * If you do not do this, different types (Const and non-Const) columns will be produced in different threads,
+         * And this is not allowed, since all code is based on the assumption that in the block stream all types are the same.
+         */
+        res.emplace_back(std::make_shared<MaterializingBlockInputStream>(stream));
+    }
+    else
+    {
+        auto stream = std::make_shared<RemoteBlockInputStream>(shard_info.pool, query, &context.getSettingsRef(), context, throttler, external_tables, processed_stage);
+        stream->setPoolMode(PoolMode::GET_MANY);
+        stream->setMainTable(main_table);
+        res.emplace_back(std::move(stream));
+    }
 }
 
 }
-
 }
diff --git a/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.h b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.h
index d4b4f82a659..b677d4a8cce 100644
--- a/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.h
+++ b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.h
@@ -18,14 +18,11 @@ public:
             QualifiedTableName main_table,
             const Tables & external_tables);
 
-    BlockInputStreamPtr createLocal(const ASTPtr & query_ast, const Context & context, const Cluster::Address & address) override;
-    BlockInputStreamPtr createRemote(
-            const ConnectionPoolWithFailoverPtr & pool, const std::string & query,
-            const Settings & settings, ThrottlerPtr throttler, const Context & context) override;
-    BlockInputStreamPtr createRemote(
-            ConnectionPoolWithFailoverPtrs && pools, const std::string & query,
-            const Settings & settings, ThrottlerPtr throttler, const Context & context) override;
-    PoolMode getPoolMode() const override;
+    virtual void createForShard(
+            const Cluster::ShardInfo & shard_info,
+            const String & query, const ASTPtr & query_ast,
+            const Context & context, const ThrottlerPtr & throttler,
+            BlockInputStreams & res) override;
 
 private:
     QueryProcessingStage::Enum processed_stage;
diff --git a/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp b/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp
index 4d656014b59..cf1c6e6db36 100644
--- a/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -16,7 +16,7 @@ namespace ClusterProxy
 
 BlockInputStreams executeQuery(
         IStreamFactory & stream_factory, const ClusterPtr & cluster,
-        const ASTPtr & query_ast, const Context & context, const Settings & settings, bool enable_shard_multiplexing)
+        const ASTPtr & query_ast, const Context & context, const Settings & settings)
 {
     BlockInputStreams res;
 
@@ -36,6 +36,9 @@ BlockInputStreams executeQuery(
     new_settings.limits.max_memory_usage_for_user.changed = false;
     new_settings.limits.max_memory_usage_for_all_queries.changed = false;
 
+    Context new_context(context);
+    new_context.setSettings(new_settings);
+
     /// Network bandwidth limit, if needed.
     ThrottlerPtr throttler;
     if (settings.limits.max_network_bandwidth || settings.limits.max_network_bytes)
@@ -44,86 +47,8 @@ BlockInputStreams executeQuery(
             settings.limits.max_network_bytes,
             "Limit for bytes to send or receive over network exceeded.");
 
-    /// Spread shards by threads uniformly.
-
-    size_t remote_count = 0;
-
-    if (stream_factory.getPoolMode() == PoolMode::GET_ALL)
-    {
-        for (const auto & shard_info : cluster->getShardsInfo())
-        {
-            if (shard_info.hasRemoteConnections())
-                ++remote_count;
-        }
-    }
-    else
-        remote_count = cluster->getRemoteShardCount();
-
-    size_t thread_count;
-
-    if (!enable_shard_multiplexing)
-        thread_count = remote_count;
-    else if (remote_count == 0)
-        thread_count = 0;
-    else if (settings.max_distributed_processing_threads == 0)
-        thread_count = 1;
-    else
-        thread_count = std::min(remote_count, static_cast<size_t>(settings.max_distributed_processing_threads));
-
-    size_t pools_per_thread = (thread_count > 0) ? (remote_count / thread_count) : 0;
-    size_t remainder = (thread_count > 0) ? (remote_count % thread_count) : 0;
-
-    ConnectionPoolWithFailoverPtrs pools;
-
-    /// Loop over shards.
-    size_t current_thread = 0;
     for (const auto & shard_info : cluster->getShardsInfo())
-    {
-        bool create_local_queries = shard_info.isLocal();
-
-        bool create_remote_queries;
-        if (stream_factory.getPoolMode() == PoolMode::GET_ALL)
-            create_remote_queries = shard_info.hasRemoteConnections();
-        else
-            create_remote_queries = !create_local_queries;
-
-        if (create_local_queries)
-        {
-            /// Add queries to localhost (they are processed in-process, without network communication).
-
-            Context new_context = context;
-            new_context.setSettings(new_settings);
-
-            for (const auto & address : shard_info.local_addresses)
-            {
-                BlockInputStreamPtr stream = stream_factory.createLocal(query_ast, new_context, address);
-                if (stream)
-                    res.emplace_back(stream);
-            }
-        }
-
-        if (create_remote_queries)
-        {
-            size_t excess = (current_thread < remainder) ? 1 : 0;
-            size_t actual_pools_per_thread = pools_per_thread + excess;
-
-            if (actual_pools_per_thread == 1)
-            {
-                res.emplace_back(stream_factory.createRemote(shard_info.pool, query, new_settings, throttler, context));
-                ++current_thread;
-            }
-            else
-            {
-                pools.push_back(shard_info.pool);
-                if (pools.size() == actual_pools_per_thread)
-                {
-                    res.emplace_back(stream_factory.createRemote(std::move(pools), query, new_settings, throttler, context));
-                    pools = ConnectionPoolWithFailoverPtrs();
-                    ++current_thread;
-                }
-            }
-        }
-    }
+        stream_factory.createForShard(shard_info, query, query_ast, new_context, throttler, res);
 
     return res;
 }
diff --git a/dbms/src/Interpreters/ClusterProxy/executeQuery.h b/dbms/src/Interpreters/ClusterProxy/executeQuery.h
index 28181fa0e42..a28fc9fed97 100644
--- a/dbms/src/Interpreters/ClusterProxy/executeQuery.h
+++ b/dbms/src/Interpreters/ClusterProxy/executeQuery.h
@@ -22,7 +22,7 @@ class IStreamFactory;
 /// (currently SELECT, DESCRIBE, or ALTER (for resharding)).
 BlockInputStreams executeQuery(
         IStreamFactory & stream_factory, const ClusterPtr & cluster,
-        const ASTPtr & query_ast, const Context & context, const Settings & settings, bool enable_shard_multiplexing);
+        const ASTPtr & query_ast, const Context & context, const Settings & settings);
 
 }
 
diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp
index 7e084c6d476..7b75cb54497 100644
--- a/dbms/src/Storages/StorageDistributed.cpp
+++ b/dbms/src/Storages/StorageDistributed.cpp
@@ -213,20 +213,11 @@ BlockInputStreams StorageDistributed::read(
     if (settings.global_subqueries_method == GlobalSubqueriesMethod::PUSH)
         external_tables = context.getExternalTables();
 
-    /// Disable multiplexing of shards if there is an ORDER BY without GROUP BY.
-    //const ASTSelectQuery & ast = *(static_cast<const ASTSelectQuery *>(modified_query_ast.get()));
-
-    /** The functionality of shard_multiplexing is not completed - turn it off.
-      * (Because connecting to different shards within a single thread is not done in parallel.)
-      */
-    //bool enable_shard_multiplexing = !(ast.order_expression_list && !ast.group_expression_list);
-    bool enable_shard_multiplexing = false;
-
     ClusterProxy::SelectStreamFactory select_stream_factory(
         processed_stage,  QualifiedTableName{remote_database, remote_table}, external_tables);
 
     return ClusterProxy::executeQuery(
-            select_stream_factory, cluster, modified_query_ast, context, settings, enable_shard_multiplexing);
+            select_stream_factory, cluster, modified_query_ast, context, settings);
 }
 
 
@@ -351,15 +342,10 @@ void StorageDistributed::reshardPartitions(
 
         resharding_worker.registerQuery(coordinator_id, queryToString(alter_query_ptr));
 
-        /** The functionality of shard_multiplexing is not completed - turn it off.
-        * (Because connecting to different shards within a single thread is not done in parallel.)
-        */
-        bool enable_shard_multiplexing = false;
-
         ClusterProxy::AlterStreamFactory alter_stream_factory;
 
         BlockInputStreams streams = ClusterProxy::executeQuery(
-                alter_stream_factory, cluster, alter_query_ptr, context, context.getSettingsRef(), enable_shard_multiplexing);
+                alter_stream_factory, cluster, alter_query_ptr, context, context.getSettingsRef());
 
         /// This callback is called if an exception has occurred while attempting to read
         /// a block from a shard. This is to avoid a potential deadlock if other shards are
@@ -429,15 +415,10 @@ BlockInputStreams StorageDistributed::describe(const Context & context, const Se
     describe_query.database = remote_database;
     describe_query.table = remote_table;
 
-    /** The functionality of shard_multiplexing is not completed - turn it off.
-      * (Because connecting connections to different shards within a single thread is not done in parallel.)
-      */
-    bool enable_shard_multiplexing = false;
-
     ClusterProxy::DescribeStreamFactory describe_stream_factory;
 
     return ClusterProxy::executeQuery(
-            describe_stream_factory, cluster, describe_query_ptr, context, settings, enable_shard_multiplexing);
+            describe_stream_factory, cluster, describe_query_ptr, context, settings);
 }
 
 

From 3f3b7f61e2479035a457fb579431e575c34bd977 Mon Sep 17 00:00:00 2001
From: Alexey Zatelepin <ztlpn@yandex-team.ru>
Date: Mon, 31 Jul 2017 18:03:22 +0300
Subject: [PATCH 127/281] always send changed settings for remote connections
 [#CLICKHOUSE-3151]

---
 dbms/src/Client/MultiplexedConnections.cpp    | 62 +++++++------------
 dbms/src/Client/MultiplexedConnections.h      | 10 ++-
 .../DataStreams/RemoteBlockInputStream.cpp    | 26 +++-----
 dbms/src/DataStreams/RemoteBlockInputStream.h | 17 +++--
 .../ClickHouseDictionarySource.cpp            |  4 +-
 .../ClusterProxy/AlterStreamFactory.cpp       |  2 +-
 .../ClusterProxy/DescribeStreamFactory.cpp    |  2 +-
 .../ClusterProxy/SelectStreamFactory.cpp      |  2 +-
 dbms/src/Server/Benchmark.cpp                 |  2 +-
 dbms/src/Server/PerformanceTest.cpp           |  2 +-
 .../Storages/StorageReplicatedMergeTree.cpp   |  2 +-
 .../getStructureOfRemoteTable.cpp             |  5 +-
 12 files changed, 50 insertions(+), 86 deletions(-)

diff --git a/dbms/src/Client/MultiplexedConnections.cpp b/dbms/src/Client/MultiplexedConnections.cpp
index 26f517187bf..0eb5b6c290e 100644
--- a/dbms/src/Client/MultiplexedConnections.cpp
+++ b/dbms/src/Client/MultiplexedConnections.cpp
@@ -12,12 +12,9 @@ namespace ErrorCodes
 }
 
 
-MultiplexedConnections::MultiplexedConnections(Connection * connection_, const Settings * settings_, ThrottlerPtr throttler_)
+MultiplexedConnections::MultiplexedConnections(Connection & connection, const Settings & settings_, ThrottlerPtr throttler_)
     : settings(settings_), throttler(throttler_), supports_parallel_execution(false)
 {
-    if (connection_ == nullptr)
-        throw Exception("Invalid connection specified", ErrorCodes::LOGICAL_ERROR);
-
     active_connection_total_count = 1;
 
     ShardState shard_state;
@@ -30,8 +27,8 @@ MultiplexedConnections::MultiplexedConnections(Connection * connection_, const S
     replica_state.connection_index = 0;
     replica_state.shard_state = &shard_states[0];
 
-    connection_->setThrottler(throttler);
-    connections.push_back(connection_);
+    connection.setThrottler(throttler);
+    connections.push_back(&connection);
 
     auto res = replica_map.emplace(connections[0]->socket.impl()->sockfd(), replica_state);
     if (!res.second)
@@ -39,7 +36,7 @@ MultiplexedConnections::MultiplexedConnections(Connection * connection_, const S
 }
 
 MultiplexedConnections::MultiplexedConnections(
-        ConnectionPoolWithFailover & pool_, const Settings * settings_, ThrottlerPtr throttler_,
+        ConnectionPoolWithFailover & pool_, const Settings & settings_, ThrottlerPtr throttler_,
         bool append_extra_info, PoolMode pool_mode_, const QualifiedTableName * main_table)
     : settings(settings_), throttler(throttler_), pool_mode(pool_mode_)
 {
@@ -53,7 +50,7 @@ MultiplexedConnections::MultiplexedConnections(
 }
 
 MultiplexedConnections::MultiplexedConnections(
-        const ConnectionPoolWithFailoverPtrs & pools_, const Settings * settings_, ThrottlerPtr throttler_,
+        const ConnectionPoolWithFailoverPtrs & pools_, const Settings & settings_, ThrottlerPtr throttler_,
         bool append_extra_info, PoolMode pool_mode_, const QualifiedTableName * main_table)
     : settings(settings_), throttler(throttler_), pool_mode(pool_mode_)
 {
@@ -110,44 +107,27 @@ void MultiplexedConnections::sendQuery(
 
     if (supports_parallel_execution)
     {
-        if (settings == nullptr)
+        /// Each shard has one or more replicas.
+        auto it = connections.begin();
+        for (const auto & shard_state : shard_states)
         {
-            /// Each shard has one address.
-            auto it = connections.begin();
-            for (size_t i = 0; i < shard_states.size(); ++i)
+            Settings query_settings = settings;
+            query_settings.parallel_replicas_count = shard_state.active_connection_count;
+
+            UInt64 offset = 0;
+
+            for (size_t i = 0; i < shard_state.allocated_connection_count; ++i)
             {
                 Connection * connection = *it;
                 if (connection == nullptr)
                     throw Exception("MultiplexedConnections: Internal error", ErrorCodes::LOGICAL_ERROR);
 
-                connection->sendQuery(query, query_id, stage, nullptr, client_info, with_pending_data);
+                query_settings.parallel_replica_offset = offset;
+                connection->sendQuery(query, query_id, stage, &query_settings, client_info, with_pending_data);
+                ++offset;
                 ++it;
             }
         }
-        else
-        {
-            /// Each shard has one or more replicas.
-            auto it = connections.begin();
-            for (const auto & shard_state : shard_states)
-            {
-                Settings query_settings = *settings;
-                query_settings.parallel_replicas_count = shard_state.active_connection_count;
-
-                UInt64 offset = 0;
-
-                for (size_t i = 0; i < shard_state.allocated_connection_count; ++i)
-                {
-                    Connection * connection = *it;
-                    if (connection == nullptr)
-                        throw Exception("MultiplexedConnections: Internal error", ErrorCodes::LOGICAL_ERROR);
-
-                    query_settings.parallel_replica_offset = offset;
-                    connection->sendQuery(query, query_id, stage, &query_settings, client_info, with_pending_data);
-                    ++offset;
-                    ++it;
-                }
-            }
-        }
     }
     else
     {
@@ -155,7 +135,7 @@ void MultiplexedConnections::sendQuery(
         if (connection == nullptr)
             throw Exception("MultiplexedConnections: Internal error", ErrorCodes::LOGICAL_ERROR);
 
-        connection->sendQuery(query, query_id, stage, settings, client_info, with_pending_data);
+        connection->sendQuery(query, query_id, stage, &settings, client_info, with_pending_data);
     }
 
     sent_query = true;
@@ -280,9 +260,9 @@ void MultiplexedConnections::initFromShard(ConnectionPoolWithFailover & pool, co
 {
     std::vector<IConnectionPool::Entry> entries;
     if (main_table)
-        entries = pool.getManyChecked(settings, pool_mode, *main_table);
+        entries = pool.getManyChecked(&settings, pool_mode, *main_table);
     else
-        entries = pool.getMany(settings, pool_mode);
+        entries = pool.getMany(&settings, pool_mode);
 
     /// If getMany() did not allocate connections and did not throw exceptions, this means that
     /// `skip_unavailable_shards` was set. Then just return.
@@ -424,7 +404,7 @@ MultiplexedConnections::ReplicaMap::iterator MultiplexedConnections::waitForRead
                 read_list.push_back(connection->socket);
         }
 
-        int n = Poco::Net::Socket::select(read_list, write_list, except_list, settings->receive_timeout);
+        int n = Poco::Net::Socket::select(read_list, write_list, except_list, settings.receive_timeout);
 
         if (n == 0)
             throw Exception("Timeout exceeded while reading from " + dumpAddressesUnlocked(), ErrorCodes::TIMEOUT_EXCEEDED);
diff --git a/dbms/src/Client/MultiplexedConnections.h b/dbms/src/Client/MultiplexedConnections.h
index c26bdad16a0..6921c8aa7d6 100644
--- a/dbms/src/Client/MultiplexedConnections.h
+++ b/dbms/src/Client/MultiplexedConnections.h
@@ -20,22 +20,20 @@ class MultiplexedConnections final : private boost::noncopyable
 {
 public:
     /// Accepts ready connection.
-    MultiplexedConnections(Connection * connection_, const Settings * settings_, ThrottlerPtr throttler_);
+    MultiplexedConnections(Connection & connection, const Settings & settings_, ThrottlerPtr throttler_);
 
     /** Accepts a pool from which it will be necessary to get one or more connections.
       * If the append_extra_info flag is set, additional information appended to each received block.
-      * If the get_all_replicas flag is set, all connections are selected.
       */
     MultiplexedConnections(
-            ConnectionPoolWithFailover & pool_, const Settings * settings_, ThrottlerPtr throttler_,
+            ConnectionPoolWithFailover & pool_, const Settings & settings_, ThrottlerPtr throttler_,
             bool append_extra_info, PoolMode pool_mode_, const QualifiedTableName * main_table = nullptr);
 
     /** Accepts pools, one for each shard, from which one will need to get one or more connections.
       * If the append_extra_info flag is set, additional information appended to each received block.
-      * If the do_broadcast flag is set, all connections are received.
       */
     MultiplexedConnections(
-            const ConnectionPoolWithFailoverPtrs & pools_, const Settings * settings_, ThrottlerPtr throttler_,
+            const ConnectionPoolWithFailoverPtrs & pools_, const Settings & settings_, ThrottlerPtr throttler_,
             bool append_extra_info, PoolMode pool_mode_, const QualifiedTableName * main_table = nullptr);
 
     /// Send all content of external tables to replicas.
@@ -131,7 +129,7 @@ private:
     void invalidateReplica(ReplicaMap::iterator it);
 
 private:
-    const Settings * settings;
+    const Settings & settings;
 
     Connections connections;
     ReplicaMap replica_map;
diff --git a/dbms/src/DataStreams/RemoteBlockInputStream.cpp b/dbms/src/DataStreams/RemoteBlockInputStream.cpp
index c1f67ba31e7..9ba4f57899c 100644
--- a/dbms/src/DataStreams/RemoteBlockInputStream.cpp
+++ b/dbms/src/DataStreams/RemoteBlockInputStream.cpp
@@ -17,21 +17,23 @@ namespace ErrorCodes
 
 
 RemoteBlockInputStream::RemoteBlockInputStream(Connection & connection_, const String & query_,
-    const Settings * settings_, const Context & context_, ThrottlerPtr throttler_,
+    const Context & context_, const Settings * settings, ThrottlerPtr throttler_,
     const Tables & external_tables_, QueryProcessingStage::Enum stage_)
     : connection(&connection_), query(query_), throttler(throttler_), external_tables(external_tables_),
     stage(stage_), context(context_)
 {
-    init(settings_);
+    if (settings)
+        context.setSettings(*settings);
 }
 
 RemoteBlockInputStream::RemoteBlockInputStream(const ConnectionPoolWithFailoverPtr & pool_, const String & query_,
-    const Settings * settings_, const Context & context_, ThrottlerPtr throttler_,
+    const Context & context_, const Settings * settings, ThrottlerPtr throttler_,
     const Tables & external_tables_, QueryProcessingStage::Enum stage_)
     : pool(pool_), query(query_), throttler(throttler_), external_tables(external_tables_),
     stage(stage_), context(context_)
 {
-    init(settings_);
+    if (settings)
+        context.setSettings(*settings);
 }
 
 RemoteBlockInputStream::~RemoteBlockInputStream()
@@ -215,30 +217,18 @@ void RemoteBlockInputStream::readSuffixImpl()
 
 void RemoteBlockInputStream::createMultiplexedConnections()
 {
-    Settings * multiplexed_connections_settings = send_settings ? &context.getSettingsRef() : nullptr;
     const QualifiedTableName * main_table_ptr = main_table ? &main_table.value() : nullptr;
     if (connection != nullptr)
         multiplexed_connections = std::make_unique<MultiplexedConnections>(
-                connection, multiplexed_connections_settings, throttler);
+                *connection, context.getSettingsRef(), throttler);
     else if (pool != nullptr)
         multiplexed_connections = std::make_unique<MultiplexedConnections>(
-                *pool, multiplexed_connections_settings, throttler,
+                *pool, context.getSettingsRef(), throttler,
                 append_extra_info, pool_mode, main_table_ptr);
     else
         throw Exception("Internal error", ErrorCodes::LOGICAL_ERROR);
 }
 
-void RemoteBlockInputStream::init(const Settings * settings)
-{
-    if (settings)
-    {
-        send_settings = true;
-        context.setSettings(*settings);
-    }
-    else
-        send_settings = false;
-}
-
 void RemoteBlockInputStream::sendQuery()
 {
     createMultiplexedConnections();
diff --git a/dbms/src/DataStreams/RemoteBlockInputStream.h b/dbms/src/DataStreams/RemoteBlockInputStream.h
index 77f404391ae..dcee8995560 100644
--- a/dbms/src/DataStreams/RemoteBlockInputStream.h
+++ b/dbms/src/DataStreams/RemoteBlockInputStream.h
@@ -20,14 +20,16 @@ namespace DB
 class RemoteBlockInputStream : public IProfilingBlockInputStream
 {
 public:
-    /// Takes already set connection
-    RemoteBlockInputStream(Connection & connection_, const String & query_, const Settings * settings_,
-        const Context & context_, ThrottlerPtr throttler_ = nullptr, const Tables & external_tables_ = Tables(),
+    /// Takes already set connection.
+    /// If `settings` is nullptr, settings will be taken from context.
+    RemoteBlockInputStream(Connection & connection_, const String & query_, const Context & context_,
+        const Settings * settings = nullptr, ThrottlerPtr throttler_ = nullptr, const Tables & external_tables_ = Tables(),
         QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
 
-    /// Takes a pool and gets one or several connections from it
-    RemoteBlockInputStream(const ConnectionPoolWithFailoverPtr & pool_, const String & query_, const Settings * settings_,
-        const Context & context_, ThrottlerPtr throttler_ = nullptr, const Tables & external_tables_ = Tables(),
+    /// Takes a pool and gets one or several connections from it.
+    /// If `settings` is nullptr, settings will be taken from context.
+    RemoteBlockInputStream(const ConnectionPoolWithFailoverPtr & pool_, const String & query_, const Context & context_,
+        const Settings * settings = nullptr, ThrottlerPtr throttler_ = nullptr, const Tables & external_tables_ = Tables(),
         QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
 
     ~RemoteBlockInputStream() override;
@@ -82,8 +84,6 @@ protected:
     bool hasThrownException() const;
 
 private:
-    void init(const Settings * settings);
-
     void sendQuery();
 
     /// If wasn't sent yet, send request to cancell all connections to replicas
@@ -99,7 +99,6 @@ private:
     std::unique_ptr<MultiplexedConnections> multiplexed_connections;
 
     const String query;
-    bool send_settings;
     /// If != nullptr, used to limit network trafic
     ThrottlerPtr throttler;
     /// Temporary tables needed to be sent to remote servers
diff --git a/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp b/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp
index 5c52616f82e..894d3207094 100644
--- a/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp
+++ b/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp
@@ -71,7 +71,7 @@ BlockInputStreamPtr ClickHouseDictionarySource::loadAll()
       */
     if (is_local)
         return executeQuery(load_all_query, context, true).in;
-    return std::make_shared<RemoteBlockInputStream>(pool, load_all_query, nullptr, context);
+    return std::make_shared<RemoteBlockInputStream>(pool, load_all_query, context);
 }
 
 
@@ -101,7 +101,7 @@ BlockInputStreamPtr ClickHouseDictionarySource::createStreamForSelectiveLoad(con
 {
     if (is_local)
         return executeQuery(query, context, true).in;
-    return std::make_shared<RemoteBlockInputStream>(pool, query, nullptr, context);
+    return std::make_shared<RemoteBlockInputStream>(pool, query, context);
 }
 
 }
diff --git a/dbms/src/Interpreters/ClusterProxy/AlterStreamFactory.cpp b/dbms/src/Interpreters/ClusterProxy/AlterStreamFactory.cpp
index 511d21c1628..ae70ced1e14 100644
--- a/dbms/src/Interpreters/ClusterProxy/AlterStreamFactory.cpp
+++ b/dbms/src/Interpreters/ClusterProxy/AlterStreamFactory.cpp
@@ -30,7 +30,7 @@ void AlterStreamFactory::createForShard(
     }
     else
     {
-        auto stream = std::make_shared<RemoteBlockInputStream>(shard_info.pool, query, &context.getSettingsRef(), context, throttler);
+        auto stream = std::make_shared<RemoteBlockInputStream>(shard_info.pool, query, context, nullptr, throttler);
         stream->setPoolMode(PoolMode::GET_ONE);
         res.emplace_back(std::move(stream));
     }
diff --git a/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.cpp b/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.cpp
index 4677e458bab..570e655263b 100644
--- a/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.cpp
+++ b/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.cpp
@@ -46,7 +46,7 @@ void DescribeStreamFactory::createForShard(
     }
 
     auto remote_stream = std::make_shared<RemoteBlockInputStream>(
-            shard_info.pool, query, &context.getSettingsRef(), context, throttler);
+            shard_info.pool, query, context, nullptr, throttler);
     remote_stream->setPoolMode(PoolMode::GET_ALL);
     remote_stream->appendExtraInfo();
     res.emplace_back(std::move(remote_stream));
diff --git a/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
index f3c7b87e71e..86d3abd89a6 100644
--- a/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
+++ b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
@@ -38,7 +38,7 @@ void SelectStreamFactory::createForShard(
     }
     else
     {
-        auto stream = std::make_shared<RemoteBlockInputStream>(shard_info.pool, query, &context.getSettingsRef(), context, throttler, external_tables, processed_stage);
+        auto stream = std::make_shared<RemoteBlockInputStream>(shard_info.pool, query, context, nullptr, throttler, external_tables, processed_stage);
         stream->setPoolMode(PoolMode::GET_MANY);
         stream->setMainTable(main_table);
         res.emplace_back(std::move(stream));
diff --git a/dbms/src/Server/Benchmark.cpp b/dbms/src/Server/Benchmark.cpp
index 07fc1ffb1d1..986a2efac09 100644
--- a/dbms/src/Server/Benchmark.cpp
+++ b/dbms/src/Server/Benchmark.cpp
@@ -296,7 +296,7 @@ private:
     void execute(ConnectionPool::Entry & connection, Query & query)
     {
         Stopwatch watch;
-        RemoteBlockInputStream stream(*connection, query, &settings, global_context, nullptr, Tables(), query_processing_stage);
+        RemoteBlockInputStream stream(*connection, query, global_context, &settings, nullptr, Tables(), query_processing_stage);
 
         Progress progress;
         stream.setProgressCallback([&progress](const Progress & value) { progress.incrementPiecewiseAtomically(value); });
diff --git a/dbms/src/Server/PerformanceTest.cpp b/dbms/src/Server/PerformanceTest.cpp
index bd2c9b6726e..b421a437e94 100644
--- a/dbms/src/Server/PerformanceTest.cpp
+++ b/dbms/src/Server/PerformanceTest.cpp
@@ -1072,7 +1072,7 @@ private:
         statistics.last_query_rows_read = 0;
         statistics.last_query_bytes_read = 0;
 
-        RemoteBlockInputStream stream(connection, query, &settings, global_context, nullptr, Tables() /*, query_processing_stage*/);
+        RemoteBlockInputStream stream(connection, query, global_context, &settings);
 
         stream.setProgressCallback([&](const Progress & value)
             {
diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
index 13074932363..ad6f281501b 100644
--- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
@@ -3086,7 +3086,7 @@ void StorageReplicatedMergeTree::sendRequestToLeaderReplica(const ASTPtr & query
         leader_address.database,
         "", "", "ClickHouse replica");
 
-    RemoteBlockInputStream stream(connection, formattedAST(new_query), &settings, context);
+    RemoteBlockInputStream stream(connection, formattedAST(new_query), context, &settings);
     NullBlockOutputStream output;
 
     copyData(stream, output);
diff --git a/dbms/src/TableFunctions/getStructureOfRemoteTable.cpp b/dbms/src/TableFunctions/getStructureOfRemoteTable.cpp
index 64afb0af67e..81f1cce93cf 100644
--- a/dbms/src/TableFunctions/getStructureOfRemoteTable.cpp
+++ b/dbms/src/TableFunctions/getStructureOfRemoteTable.cpp
@@ -25,7 +25,6 @@ NamesAndTypesList getStructureOfRemoteTable(
 {
     /// Request for a table description
     String query = "DESC TABLE " + backQuoteIfNeed(database) + "." + backQuoteIfNeed(table);
-    Settings settings = context.getSettings();
     NamesAndTypesList res;
 
     /// Send to the first any remote shard.
@@ -34,9 +33,7 @@ NamesAndTypesList getStructureOfRemoteTable(
     if (shard_info.isLocal())
         return context.getTable(database, table)->getColumnsList();
 
-    BlockInputStreamPtr input = std::make_shared<RemoteBlockInputStream>(
-            shard_info.pool, query, &settings, context, nullptr,
-            Tables(), QueryProcessingStage::Complete);
+    BlockInputStreamPtr input = std::make_shared<RemoteBlockInputStream>(shard_info.pool, query, context);
     input->readPrefix();
 
     const DataTypeFactory & data_type_factory = DataTypeFactory::instance();

From 36dc7857a16987815e1b47653c3c00ddb2bdfcea Mon Sep 17 00:00:00 2001
From: Alexey Zatelepin <ztlpn@yandex-team.ru>
Date: Tue, 1 Aug 2017 15:25:02 +0300
Subject: [PATCH 128/281] make LazyBlockInputStream thread-safe
 [#CLICKHOUSE-3151]

---
 dbms/src/DataStreams/LazyBlockInputStream.h | 39 ++++++++++++++++++---
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/dbms/src/DataStreams/LazyBlockInputStream.h b/dbms/src/DataStreams/LazyBlockInputStream.h
index 6239bfcafb4..795e26aa012 100644
--- a/dbms/src/DataStreams/LazyBlockInputStream.h
+++ b/dbms/src/DataStreams/LazyBlockInputStream.h
@@ -16,17 +16,31 @@ public:
     using Generator = std::function<BlockInputStreamPtr()>;
 
     LazyBlockInputStream(Generator generator_)
-        : generator(generator_) {}
+        : generator(std::move(generator_))
+    {
+    }
 
-    String getName() const override { return "Lazy"; }
+    LazyBlockInputStream(const char * name_, Generator generator_)
+        : name(name_)
+        , generator(std::move(generator_))
+    {
+    }
+
+    String getName() const override { return name; }
 
     String getID() const override
     {
         std::stringstream res;
-        res << "Lazy(" << this << ")";
+        res << name << "(" << this << ")";
         return res.str();
     }
 
+    void cancel() override
+    {
+        std::lock_guard<std::mutex> lock(cancel_mutex);
+        IProfilingBlockInputStream::cancel();
+    }
+
 protected:
     Block readImpl() override
     {
@@ -37,9 +51,9 @@ protected:
             if (!input)
                 return Block();
 
-            children.push_back(input);
+            auto * p_input = dynamic_cast<IProfilingBlockInputStream *>(input.get());
 
-            if (IProfilingBlockInputStream * p_input = dynamic_cast<IProfilingBlockInputStream *>(input.get()))
+            if (p_input)
             {
                 /// They could have been set before, but were not passed into the `input`.
                 if (progress_callback)
@@ -47,14 +61,29 @@ protected:
                 if (process_list_elem)
                     p_input->setProcessListElement(process_list_elem);
             }
+
+            input->readPrefix();
+
+            {
+                std::lock_guard<std::mutex> lock(cancel_mutex);
+
+                children.push_back(input);
+
+                if (isCancelled() && p_input)
+                    p_input->cancel();
+            }
         }
 
         return input->read();
     }
 
 private:
+    const char * name = "Lazy";
     Generator generator;
+
     BlockInputStreamPtr input;
+
+    std::mutex cancel_mutex;
 };
 
 }

From b469ef17923792628828045a4e8c7c0401fca7d7 Mon Sep 17 00:00:00 2001
From: Alexey Zatelepin <ztlpn@yandex-team.ru>
Date: Fri, 28 Jul 2017 22:34:25 +0300
Subject: [PATCH 129/281] don't use local replica if its delay is unacceptable
 [#CLICKHOUSE-3151]

---
 .../src/Client/ConnectionPoolWithFailover.cpp |  13 +-
 dbms/src/Client/ConnectionPoolWithFailover.h  |   9 +-
 dbms/src/Client/MultiplexedConnections.cpp    |  69 +++++---
 dbms/src/Client/MultiplexedConnections.h      |  24 ++-
 dbms/src/Common/PoolWithFailoverBase.h        |  39 ++---
 .../DataStreams/RemoteBlockInputStream.cpp    |  64 +++++---
 dbms/src/DataStreams/RemoteBlockInputStream.h |  37 +++--
 .../ClusterProxy/SelectStreamFactory.cpp      | 154 ++++++++++++++++--
 8 files changed, 296 insertions(+), 113 deletions(-)

diff --git a/dbms/src/Client/ConnectionPoolWithFailover.cpp b/dbms/src/Client/ConnectionPoolWithFailover.cpp
index 24261f554d2..7ec99cb83c5 100644
--- a/dbms/src/Client/ConnectionPoolWithFailover.cpp
+++ b/dbms/src/Client/ConnectionPoolWithFailover.cpp
@@ -79,10 +79,17 @@ std::vector<IConnectionPool::Entry> ConnectionPoolWithFailover::getMany(const Se
     {
         return tryGetEntry(pool, fail_message, settings);
     };
-    return getManyImpl(settings, pool_mode, try_get_entry);
+
+    std::vector<TryResult> results = getManyImpl(settings, pool_mode, try_get_entry);
+
+    std::vector<Entry> entries;
+    entries.reserve(results.size());
+    for (auto & result : results)
+        entries.emplace_back(std::move(result.entry));
+    return entries;
 }
 
-std::vector<IConnectionPool::Entry> ConnectionPoolWithFailover::getManyChecked(
+std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::getManyChecked(
         const Settings * settings, PoolMode pool_mode, const QualifiedTableName & table_to_check)
 {
     TryGetEntryFunc try_get_entry = [&](NestedPool & pool, std::string & fail_message)
@@ -92,7 +99,7 @@ std::vector<IConnectionPool::Entry> ConnectionPoolWithFailover::getManyChecked(
     return getManyImpl(settings, pool_mode, try_get_entry);
 }
 
-std::vector<ConnectionPool::Entry> ConnectionPoolWithFailover::getManyImpl(
+std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::getManyImpl(
         const Settings * settings,
         PoolMode pool_mode,
         const TryGetEntryFunc & try_get_entry)
diff --git a/dbms/src/Client/ConnectionPoolWithFailover.h b/dbms/src/Client/ConnectionPoolWithFailover.h
index 78fd350a7e2..b61fa03d711 100644
--- a/dbms/src/Client/ConnectionPoolWithFailover.h
+++ b/dbms/src/Client/ConnectionPoolWithFailover.h
@@ -47,16 +47,17 @@ public:
       */
     std::vector<Entry> getMany(const Settings * settings, PoolMode pool_mode);
 
+    using Base = PoolWithFailoverBase<IConnectionPool>;
+    using TryResult = Base::TryResult;
+
     /// The same as getMany(), but check that replication delay for table_to_check is acceptable.
     /// Delay threshold is taken from settings.
-    std::vector<Entry> getManyChecked(
+    std::vector<TryResult> getManyChecked(
             const Settings * settings, PoolMode pool_mode, const QualifiedTableName & table_to_check);
 
 private:
-    using Base = PoolWithFailoverBase<IConnectionPool>;
-
     /// Get the values of relevant settings and call Base::getMany()
-    std::vector<Entry> getManyImpl(
+    std::vector<TryResult> getManyImpl(
             const Settings * settings,
             PoolMode pool_mode,
             const TryGetEntryFunc & try_get_entry);
diff --git a/dbms/src/Client/MultiplexedConnections.cpp b/dbms/src/Client/MultiplexedConnections.cpp
index 0eb5b6c290e..9dc5aedd691 100644
--- a/dbms/src/Client/MultiplexedConnections.cpp
+++ b/dbms/src/Client/MultiplexedConnections.cpp
@@ -12,7 +12,7 @@ namespace ErrorCodes
 }
 
 
-MultiplexedConnections::MultiplexedConnections(Connection & connection, const Settings & settings_, ThrottlerPtr throttler_)
+MultiplexedConnections::MultiplexedConnections(Connection & connection, const Settings & settings_, const ThrottlerPtr & throttler_)
     : settings(settings_), throttler(throttler_), supports_parallel_execution(false)
 {
     active_connection_total_count = 1;
@@ -30,17 +30,15 @@ MultiplexedConnections::MultiplexedConnections(Connection & connection, const Se
     connection.setThrottler(throttler);
     connections.push_back(&connection);
 
-    auto res = replica_map.emplace(connections[0]->socket.impl()->sockfd(), replica_state);
-    if (!res.second)
-        throw Exception("Invalid set of connections", ErrorCodes::LOGICAL_ERROR);
+    replica_map.emplace(connections[0]->socket.impl()->sockfd(), replica_state);
 }
 
 MultiplexedConnections::MultiplexedConnections(
-        ConnectionPoolWithFailover & pool_, const Settings & settings_, ThrottlerPtr throttler_,
-        bool append_extra_info, PoolMode pool_mode_, const QualifiedTableName * main_table)
-    : settings(settings_), throttler(throttler_), pool_mode(pool_mode_)
+        std::vector<IConnectionPool::Entry> && connections,
+        const Settings & settings_, const ThrottlerPtr & throttler_, bool append_extra_info)
+    : settings(settings_), throttler(throttler_)
 {
-    initFromShard(pool_, main_table);
+    initShard(connections);
     registerShards();
 
     supports_parallel_execution = active_connection_total_count > 1;
@@ -50,18 +48,33 @@ MultiplexedConnections::MultiplexedConnections(
 }
 
 MultiplexedConnections::MultiplexedConnections(
-        const ConnectionPoolWithFailoverPtrs & pools_, const Settings & settings_, ThrottlerPtr throttler_,
-        bool append_extra_info, PoolMode pool_mode_, const QualifiedTableName * main_table)
-    : settings(settings_), throttler(throttler_), pool_mode(pool_mode_)
+        ConnectionPoolWithFailover & pool,
+        const Settings & settings_, const ThrottlerPtr & throttler_, bool append_extra_info,
+        PoolMode pool_mode, const QualifiedTableName * main_table)
+    : settings(settings_), throttler(throttler_)
 {
-    if (pools_.empty())
+    initShard(pool, pool_mode, main_table);
+    registerShards();
+
+    supports_parallel_execution = active_connection_total_count > 1;
+
+    if (append_extra_info)
+        block_extra_info = std::make_unique<BlockExtraInfo>();
+}
+
+MultiplexedConnections::MultiplexedConnections(
+        const ConnectionPoolWithFailoverPtrs & pools, const Settings & settings_, const ThrottlerPtr & throttler_,
+        bool append_extra_info, PoolMode pool_mode, const QualifiedTableName * main_table)
+    : settings(settings_), throttler(throttler_)
+{
+    if (pools.empty())
         throw Exception("Pools are not specified", ErrorCodes::LOGICAL_ERROR);
 
-    for (auto & pool : pools_)
+    for (auto & pool : pools)
     {
         if (!pool)
             throw Exception("Invalid pool specified", ErrorCodes::LOGICAL_ERROR);
-        initFromShard(*pool, main_table);
+        initShard(*pool, pool_mode, main_table);
     }
 
     registerShards();
@@ -223,7 +236,7 @@ Connection::Packet MultiplexedConnections::drain()
 
             case Protocol::Server::Exception:
             default:
-                /// If we receive an exception or an unknown package, we save it.
+                /// If we receive an exception or an unknown packet, we save it.
                 res = std::move(packet);
                 break;
         }
@@ -256,27 +269,37 @@ std::string MultiplexedConnections::dumpAddressesUnlocked() const
     return os.str();
 }
 
-void MultiplexedConnections::initFromShard(ConnectionPoolWithFailover & pool, const QualifiedTableName * main_table)
+void MultiplexedConnections::initShard(ConnectionPoolWithFailover & pool, PoolMode pool_mode, const QualifiedTableName * main_table)
 {
-    std::vector<IConnectionPool::Entry> entries;
+    std::vector<IConnectionPool::Entry> connections;
     if (main_table)
-        entries = pool.getManyChecked(&settings, pool_mode, *main_table);
+    {
+        auto try_results = pool.getManyChecked(&settings, pool_mode, *main_table);
+        connections.reserve(try_results.size());
+        for (auto & try_result : try_results)
+            connections.emplace_back(std::move(try_result.entry));
+    }
     else
-        entries = pool.getMany(&settings, pool_mode);
+        connections = pool.getMany(&settings, pool_mode);
 
+    initShard(connections);
+}
+
+void MultiplexedConnections::initShard(const std::vector<IConnectionPool::Entry> & connections)
+{
     /// If getMany() did not allocate connections and did not throw exceptions, this means that
     /// `skip_unavailable_shards` was set. Then just return.
-    if (entries.empty())
+    if (connections.empty())
         return;
 
     ShardState shard_state;
-    shard_state.allocated_connection_count = entries.size();
-    shard_state.active_connection_count = entries.size();
+    shard_state.allocated_connection_count = connections.size();
+    shard_state.active_connection_count = connections.size();
     active_connection_total_count += shard_state.active_connection_count;
 
     shard_states.push_back(shard_state);
 
-    pool_entries.insert(pool_entries.end(), entries.begin(), entries.end());
+    pool_entries.insert(pool_entries.end(), connections.begin(), connections.end());
 }
 
 void MultiplexedConnections::registerShards()
diff --git a/dbms/src/Client/MultiplexedConnections.h b/dbms/src/Client/MultiplexedConnections.h
index 6921c8aa7d6..5f2e524a58e 100644
--- a/dbms/src/Client/MultiplexedConnections.h
+++ b/dbms/src/Client/MultiplexedConnections.h
@@ -20,21 +20,30 @@ class MultiplexedConnections final : private boost::noncopyable
 {
 public:
     /// Accepts ready connection.
-    MultiplexedConnections(Connection & connection, const Settings & settings_, ThrottlerPtr throttler_);
+    MultiplexedConnections(Connection & connection, const Settings & settings_, const ThrottlerPtr & throttler_);
+
+    /** Accepts a vector of connections to replicas of one shard already taken from pool.
+      * If the append_extra_info flag is set, additional information appended to each received block.
+      */
+    MultiplexedConnections(
+            std::vector<IConnectionPool::Entry> && connections,
+            const Settings & settings_, const ThrottlerPtr & throttler_, bool append_extra_info);
 
     /** Accepts a pool from which it will be necessary to get one or more connections.
       * If the append_extra_info flag is set, additional information appended to each received block.
       */
     MultiplexedConnections(
-            ConnectionPoolWithFailover & pool_, const Settings & settings_, ThrottlerPtr throttler_,
-            bool append_extra_info, PoolMode pool_mode_, const QualifiedTableName * main_table = nullptr);
+            ConnectionPoolWithFailover & pool,
+            const Settings & settings_, const ThrottlerPtr & throttler_, bool append_extra_info,
+            PoolMode pool_mode, const QualifiedTableName * main_table = nullptr);
 
     /** Accepts pools, one for each shard, from which one will need to get one or more connections.
       * If the append_extra_info flag is set, additional information appended to each received block.
       */
     MultiplexedConnections(
-            const ConnectionPoolWithFailoverPtrs & pools_, const Settings & settings_, ThrottlerPtr throttler_,
-            bool append_extra_info, PoolMode pool_mode_, const QualifiedTableName * main_table = nullptr);
+            const ConnectionPoolWithFailoverPtrs & pools,
+            const Settings & settings_, const ThrottlerPtr & throttler_, bool append_extra_info,
+            PoolMode pool_mode, const QualifiedTableName * main_table = nullptr);
 
     /// Send all content of external tables to replicas.
     void sendExternalTablesData(std::vector<ExternalTablesData> & data);
@@ -104,7 +113,8 @@ private:
     using ShardStates = std::vector<ShardState>;
 
 private:
-    void initFromShard(ConnectionPoolWithFailover & pool, const QualifiedTableName * main_table);
+    void initShard(ConnectionPoolWithFailover & pool, PoolMode pool_mode, const QualifiedTableName * main_table);
+    void initShard(const std::vector<IConnectionPool::Entry> & connections);
 
     void registerShards();
 
@@ -154,8 +164,6 @@ private:
 
     bool cancelled = false;
 
-    PoolMode pool_mode = PoolMode::GET_MANY;
-
     /// A mutex for the sendCancel function to execute safely
     /// in separate thread.
     mutable std::mutex cancel_mutex;
diff --git a/dbms/src/Common/PoolWithFailoverBase.h b/dbms/src/Common/PoolWithFailoverBase.h
index 0478bece49c..47ad7a7374a 100644
--- a/dbms/src/Common/PoolWithFailoverBase.h
+++ b/dbms/src/Common/PoolWithFailoverBase.h
@@ -107,7 +107,7 @@ public:
     /// Returns at least min_entries and at most max_entries connections (at most one connection per nested pool).
     /// The method will throw if it is unable to get min_entries alive connections or
     /// if fallback_to_stale_replicas is false and it is unable to get min_entries connections to up-to-date replicas.
-    std::vector<Entry> getMany(
+    std::vector<TryResult> getMany(
             size_t min_entries, size_t max_entries,
             const TryGetEntryFunc & try_get_entry,
             const GetPriorityFunc & get_priority = GetPriorityFunc(),
@@ -141,16 +141,16 @@ template<typename TNestedPool>
 typename TNestedPool::Entry
 PoolWithFailoverBase<TNestedPool>::get(const TryGetEntryFunc & try_get_entry, const GetPriorityFunc & get_priority)
 {
-    std::vector<Entry> entries = getMany(1, 1, try_get_entry, get_priority);
-    if (entries.empty() || entries[0].isNull())
+    std::vector<TryResult> results = getMany(1, 1, try_get_entry, get_priority);
+    if (results.empty() || results[0].entry.isNull())
         throw DB::Exception(
                 "PoolWithFailoverBase::getMany() returned less than min_entries entries.",
                 DB::ErrorCodes::LOGICAL_ERROR);
-    return entries[0];
+    return results[0].entry;
 }
 
 template<typename TNestedPool>
-std::vector<typename TNestedPool::Entry>
+std::vector<typename PoolWithFailoverBase<TNestedPool>::TryResult>
 PoolWithFailoverBase<TNestedPool>::getMany(
         size_t min_entries, size_t max_entries,
         const TryGetEntryFunc & try_get_entry,
@@ -262,34 +262,27 @@ PoolWithFailoverBase<TNestedPool>::getMany(
                     [](const TryResult & r) { return r.entry.isNull() || !r.is_usable; }),
             try_results.end());
 
-    std::vector<Entry> entries;
+    /// Sort so that preferred items are near the beginning.
+    std::stable_sort(
+            try_results.begin(), try_results.end(),
+            [](const TryResult & left, const TryResult & right)
+            {
+                return std::forward_as_tuple(!left.is_up_to_date, left.staleness)
+                    < std::forward_as_tuple(!right.is_up_to_date, right.staleness);
+            });
 
     if (up_to_date_count >= min_entries)
     {
         /// There is enough up-to-date entries.
-        entries.reserve(up_to_date_count);
-        for (const TryResult & result: try_results)
-        {
-            if (result.is_up_to_date)
-                entries.push_back(result.entry);
-        }
+        try_results.resize(up_to_date_count);
     }
     else if (fallback_to_stale_replicas)
     {
         /// There is not enough up-to-date entries but we are allowed to return stale entries.
         /// Gather all up-to-date ones and least-bad stale ones.
-        std::stable_sort(
-                try_results.begin(), try_results.end(),
-                [](const TryResult & left, const TryResult & right)
-                {
-                    return std::forward_as_tuple(!left.is_up_to_date, left.staleness)
-                        < std::forward_as_tuple(!right.is_up_to_date, right.staleness);
-                });
 
         size_t size = std::min(try_results.size(), max_entries);
-        entries.reserve(size);
-        for (size_t i = 0; i < size; ++i)
-            entries.push_back(try_results[i].entry);
+        try_results.resize(size);
     }
     else
         throw DB::Exception(
@@ -297,7 +290,7 @@ PoolWithFailoverBase<TNestedPool>::getMany(
                 + ", needed: " + std::to_string(min_entries),
                 DB::ErrorCodes::ALL_REPLICAS_ARE_STALE);
 
-    return entries;
+    return try_results;
 }
 
 template<typename TNestedPool>
diff --git a/dbms/src/DataStreams/RemoteBlockInputStream.cpp b/dbms/src/DataStreams/RemoteBlockInputStream.cpp
index 9ba4f57899c..fa152855741 100644
--- a/dbms/src/DataStreams/RemoteBlockInputStream.cpp
+++ b/dbms/src/DataStreams/RemoteBlockInputStream.cpp
@@ -16,24 +16,52 @@ namespace ErrorCodes
 }
 
 
-RemoteBlockInputStream::RemoteBlockInputStream(Connection & connection_, const String & query_,
-    const Context & context_, const Settings * settings, ThrottlerPtr throttler_,
-    const Tables & external_tables_, QueryProcessingStage::Enum stage_)
-    : connection(&connection_), query(query_), throttler(throttler_), external_tables(external_tables_),
-    stage(stage_), context(context_)
+RemoteBlockInputStream::RemoteBlockInputStream(
+        Connection & connection,
+        const String & query_, const Context & context_, const Settings * settings,
+        const ThrottlerPtr & throttler, const Tables & external_tables_, QueryProcessingStage::Enum stage_)
+    : query(query_), context(context_), external_tables(external_tables_), stage(stage_)
 {
     if (settings)
         context.setSettings(*settings);
+
+    create_multiplexed_connections = [this, &connection, throttler]()
+    {
+        return std::make_unique<MultiplexedConnections>(connection, context.getSettingsRef(), throttler);
+    };
 }
 
-RemoteBlockInputStream::RemoteBlockInputStream(const ConnectionPoolWithFailoverPtr & pool_, const String & query_,
-    const Context & context_, const Settings * settings, ThrottlerPtr throttler_,
-    const Tables & external_tables_, QueryProcessingStage::Enum stage_)
-    : pool(pool_), query(query_), throttler(throttler_), external_tables(external_tables_),
-    stage(stage_), context(context_)
+RemoteBlockInputStream::RemoteBlockInputStream(
+        std::vector<IConnectionPool::Entry> && connections,
+        const String & query_, const Context & context_, const Settings * settings,
+        const ThrottlerPtr & throttler, const Tables & external_tables_, QueryProcessingStage::Enum stage_)
+    : query(query_), context(context_), external_tables(external_tables_), stage(stage_)
 {
     if (settings)
         context.setSettings(*settings);
+
+    create_multiplexed_connections = [this, connections, throttler]() mutable
+    {
+        return std::make_unique<MultiplexedConnections>(
+                std::move(connections), context.getSettingsRef(), throttler, append_extra_info);
+    };
+}
+
+RemoteBlockInputStream::RemoteBlockInputStream(
+        const ConnectionPoolWithFailoverPtr & pool,
+        const String & query_, const Context & context_, const Settings * settings,
+        const ThrottlerPtr & throttler, const Tables & external_tables_, QueryProcessingStage::Enum stage_)
+    : query(query_), context(context_), external_tables(external_tables_), stage(stage_)
+{
+    if (settings)
+        context.setSettings(*settings);
+
+    create_multiplexed_connections = [this, pool, throttler]()
+    {
+        const QualifiedTableName * main_table_ptr = main_table ? &main_table.value() : nullptr;
+        return std::make_unique<MultiplexedConnections>(
+                *pool, context.getSettingsRef(), throttler, append_extra_info, pool_mode, main_table_ptr);
+    };
 }
 
 RemoteBlockInputStream::~RemoteBlockInputStream()
@@ -215,23 +243,9 @@ void RemoteBlockInputStream::readSuffixImpl()
     }
 }
 
-void RemoteBlockInputStream::createMultiplexedConnections()
-{
-    const QualifiedTableName * main_table_ptr = main_table ? &main_table.value() : nullptr;
-    if (connection != nullptr)
-        multiplexed_connections = std::make_unique<MultiplexedConnections>(
-                *connection, context.getSettingsRef(), throttler);
-    else if (pool != nullptr)
-        multiplexed_connections = std::make_unique<MultiplexedConnections>(
-                *pool, context.getSettingsRef(), throttler,
-                append_extra_info, pool_mode, main_table_ptr);
-    else
-        throw Exception("Internal error", ErrorCodes::LOGICAL_ERROR);
-}
-
 void RemoteBlockInputStream::sendQuery()
 {
-    createMultiplexedConnections();
+    multiplexed_connections = create_multiplexed_connections();
 
     if (context.getSettingsRef().skip_unavailable_shards && 0 == multiplexed_connections->size())
         return;
diff --git a/dbms/src/DataStreams/RemoteBlockInputStream.h b/dbms/src/DataStreams/RemoteBlockInputStream.h
index dcee8995560..e81c4b71db3 100644
--- a/dbms/src/DataStreams/RemoteBlockInputStream.h
+++ b/dbms/src/DataStreams/RemoteBlockInputStream.h
@@ -22,15 +22,27 @@ class RemoteBlockInputStream : public IProfilingBlockInputStream
 public:
     /// Takes already set connection.
     /// If `settings` is nullptr, settings will be taken from context.
-    RemoteBlockInputStream(Connection & connection_, const String & query_, const Context & context_,
-        const Settings * settings = nullptr, ThrottlerPtr throttler_ = nullptr, const Tables & external_tables_ = Tables(),
-        QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
+    RemoteBlockInputStream(
+            Connection & connection,
+            const String & query_, const Context & context_, const Settings * settings = nullptr,
+            const ThrottlerPtr & throttler = nullptr, const Tables & external_tables_ = Tables(),
+            QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
+
+    /// Accepts several connections already taken from pool.
+    /// If `settings` is nullptr, settings will be taken from context.
+    RemoteBlockInputStream(
+            std::vector<IConnectionPool::Entry> && connections,
+            const String & query_, const Context & context_, const Settings * settings = nullptr,
+            const ThrottlerPtr & throttler = nullptr, const Tables & external_tables_ = Tables(),
+            QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
 
     /// Takes a pool and gets one or several connections from it.
     /// If `settings` is nullptr, settings will be taken from context.
-    RemoteBlockInputStream(const ConnectionPoolWithFailoverPtr & pool_, const String & query_, const Context & context_,
-        const Settings * settings = nullptr, ThrottlerPtr throttler_ = nullptr, const Tables & external_tables_ = Tables(),
-        QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
+    RemoteBlockInputStream(
+            const ConnectionPoolWithFailoverPtr & pool,
+            const String & query_, const Context & context_, const Settings * settings = nullptr,
+            const ThrottlerPtr & throttler = nullptr, const Tables & external_tables_ = Tables(),
+            QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
 
     ~RemoteBlockInputStream() override;
 
@@ -74,9 +86,6 @@ protected:
 
     void readSuffixImpl() override;
 
-    /// Creates an object to talk to one shard's replicas performing query
-    void createMultiplexedConnections();
-
     /// Returns true if query was sent
     bool isQueryPending() const;
 
@@ -90,21 +99,15 @@ private:
     void tryCancel(const char * reason);
 
 private:
-    /// Already set connection
-    Connection * connection = nullptr;
-
-    /// One shard's connections pool
-    ConnectionPoolWithFailoverPtr pool = nullptr;
+    std::function<std::unique_ptr<MultiplexedConnections>()> create_multiplexed_connections;
 
     std::unique_ptr<MultiplexedConnections> multiplexed_connections;
 
     const String query;
-    /// If != nullptr, used to limit network trafic
-    ThrottlerPtr throttler;
+    Context context;
     /// Temporary tables needed to be sent to remote servers
     Tables external_tables;
     QueryProcessingStage::Enum stage;
-    Context context;
 
     /// Threads for reading from temporary tables and following sending of data
     /// to remote servers for GLOBAL-subqueries
diff --git a/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
index 86d3abd89a6..259a39d7775 100644
--- a/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
+++ b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
@@ -2,10 +2,22 @@
 #include <Interpreters/InterpreterSelectQuery.h>
 #include <DataStreams/RemoteBlockInputStream.h>
 #include <DataStreams/MaterializingBlockInputStream.h>
+#include <DataStreams/LazyBlockInputStream.h>
+#include <Storages/StorageReplicatedMergeTree.h>
+#include <Common/Exception.h>
+
+#include <common/logger_useful.h>
 
 namespace DB
 {
 
+namespace ErrorCodes
+{
+
+extern const int ALL_REPLICAS_ARE_STALE;
+
+}
+
 namespace ClusterProxy
 {
 
@@ -19,30 +31,152 @@ SelectStreamFactory::SelectStreamFactory(
 {
 }
 
+namespace
+{
+
+BlockInputStreamPtr createLocalStream(const ASTPtr & query_ast, const Context & context, QueryProcessingStage::Enum processed_stage)
+{
+    InterpreterSelectQuery interpreter{query_ast, context, processed_stage};
+    BlockInputStreamPtr stream = interpreter.execute().in;
+
+    /** Materialization is needed, since from remote servers the constants come materialized.
+      * If you do not do this, different types (Const and non-Const) columns will be produced in different threads,
+      * And this is not allowed, since all code is based on the assumption that in the block stream all types are the same.
+      */
+    return std::make_shared<MaterializingBlockInputStream>(stream);
+}
+
+}
+
 void SelectStreamFactory::createForShard(
         const Cluster::ShardInfo & shard_info,
         const String & query, const ASTPtr & query_ast,
         const Context & context, const ThrottlerPtr & throttler,
         BlockInputStreams & res)
 {
-    if (shard_info.isLocal())
+    auto emplace_local_stream = [&]()
     {
-        InterpreterSelectQuery interpreter{query_ast, context, processed_stage};
-        BlockInputStreamPtr stream = interpreter.execute().in;
+        res.emplace_back(createLocalStream(query_ast, context, processed_stage));
+    };
 
-        /** Materialization is needed, since from remote servers the constants come materialized.
-         * If you do not do this, different types (Const and non-Const) columns will be produced in different threads,
-         * And this is not allowed, since all code is based on the assumption that in the block stream all types are the same.
-         */
-        res.emplace_back(std::make_shared<MaterializingBlockInputStream>(stream));
-    }
-    else
+    auto emplace_remote_stream = [&]()
     {
         auto stream = std::make_shared<RemoteBlockInputStream>(shard_info.pool, query, context, nullptr, throttler, external_tables, processed_stage);
         stream->setPoolMode(PoolMode::GET_MANY);
         stream->setMainTable(main_table);
         res.emplace_back(std::move(stream));
+    };
+
+    if (shard_info.isLocal())
+    {
+        StoragePtr main_table_storage = context.tryGetTable(main_table.database, main_table.table);
+        if (!main_table_storage) /// Table is absent on a local server.
+        {
+            if (shard_info.pool)
+            {
+                LOG_WARNING(
+                        &Logger::get("ClusterProxy::SelectStreamFactory"),
+                        "There is no table " << main_table.database << "." << main_table.table
+                        << " on local replica of shard " << shard_info.shard_num << ", will try remote replicas.");
+
+                emplace_remote_stream();
+                return;
+            }
+            else
+            {
+                /// Let it fail the usual way.
+                emplace_local_stream();
+                return;
+            }
+        }
+
+        const auto * replicated_storage = dynamic_cast<const StorageReplicatedMergeTree *>(main_table_storage.get());
+
+        if (!replicated_storage)
+        {
+            /// Table is not replicated, use local server.
+            emplace_local_stream();
+            return;
+        }
+
+        const Settings & settings = context.getSettingsRef();
+        UInt64 max_allowed_delay = settings.max_replica_delay_for_distributed_queries;
+
+        if (!max_allowed_delay)
+        {
+            emplace_local_stream();
+            return;
+        }
+
+        UInt32 local_delay = replicated_storage->getAbsoluteDelay();
+
+        if (local_delay < max_allowed_delay)
+        {
+            emplace_local_stream();
+            return;
+        }
+
+        /// If we reached this point, local replica is stale.
+
+        if (!settings.fallback_to_stale_replicas_for_distributed_queries)
+        {
+            if (shard_info.pool)
+            {
+                /// If we cannot fallback, then we cannot use local replica. Try our luck with remote replicas.
+                emplace_remote_stream();
+                return;
+            }
+            else
+                throw Exception(
+                        "Local replica for shard " + toString(shard_info.shard_num)
+                        + " is stale (delay: " + toString(local_delay) + "), but no other replica configured.",
+                        ErrorCodes::ALL_REPLICAS_ARE_STALE);
+        }
+
+        if (!shard_info.pool)
+        {
+            /// There are no remote replicas but we are allowed to fall back to stale local replica.
+            emplace_local_stream();
+            return;
+        }
+
+        /// Try our luck with remote replicas, but if they are stale too, then fallback to local replica.
+        /// Do it lazily to avoid connecting in the main thread.
+
+        auto lazily_create_stream = [
+                pool = shard_info.pool, query, query_ast, context, throttler,
+                main_table = main_table, external_tables = external_tables, stage = processed_stage,
+                local_delay]()
+            -> BlockInputStreamPtr
+        {
+            std::vector<ConnectionPoolWithFailover::TryResult> try_results =
+                pool->getManyChecked(&context.getSettingsRef(), PoolMode::GET_MANY, main_table);
+
+            double max_remote_delay = 0.0;
+            for (const auto & try_result : try_results)
+            {
+                if (!try_result.is_up_to_date)
+                    max_remote_delay = std::max(try_result.staleness, max_remote_delay);
+            }
+
+            if (local_delay < max_remote_delay)
+                return createLocalStream(query_ast, context, stage);
+            else
+            {
+                std::vector<IConnectionPool::Entry> connections;
+                connections.reserve(try_results.size());
+                for (auto & try_result : try_results)
+                    connections.emplace_back(std::move(try_result.entry));
+
+                return std::make_shared<RemoteBlockInputStream>(
+                        std::move(connections), query, context, nullptr, throttler, external_tables, stage);
+            }
+        };
+
+        res.emplace_back(std::make_shared<LazyBlockInputStream>("LazyShardWithLocalReplica", lazily_create_stream));
     }
+    else
+        emplace_remote_stream();
 }
 
 }

From c99f3d7ee05f95f4bd5bfbb175ae1f794d45347a Mon Sep 17 00:00:00 2001
From: Alexey Zatelepin <ztlpn@yandex-team.ru>
Date: Tue, 1 Aug 2017 17:36:00 +0000
Subject: [PATCH 130/281] add test [#CLICKHOUSE-3151]

---
 dbms/tests/integration/helpers/network.py     | 12 +++--
 .../configs/remote_servers.xml                | 15 +++++-
 .../test_delayed_replica_failover/test.py     | 53 ++++++++++++-------
 3 files changed, 55 insertions(+), 25 deletions(-)

diff --git a/dbms/tests/integration/helpers/network.py b/dbms/tests/integration/helpers/network.py
index 59026a8e2ee..807e4f8b48b 100644
--- a/dbms/tests/integration/helpers/network.py
+++ b/dbms/tests/integration/helpers/network.py
@@ -36,12 +36,18 @@ class PartitionManager:
         self._delete_rule({'destination': instance.ip_address, 'source_port': 2181, 'action': action})
 
 
-    def partition_instances(self, left, right, action='DROP'):
+    def partition_instances(self, left, right, port=None, action='DROP'):
         self._check_instance(left)
         self._check_instance(right)
 
-        self._add_rule({'source': left.ip_address, 'destination': right.ip_address, 'action': action})
-        self._add_rule({'source': right.ip_address, 'destination': left.ip_address, 'action': action})
+        def create_rule(src, dst):
+            rule = {'source': src.ip_address, 'destination': dst.ip_address, 'action': action}
+            if port is not None:
+                rule['destination_port'] = port
+            return rule
+
+        self._add_rule(create_rule(left, right))
+        self._add_rule(create_rule(right, left))
 
 
     def heal_all(self):
diff --git a/dbms/tests/integration/test_delayed_replica_failover/configs/remote_servers.xml b/dbms/tests/integration/test_delayed_replica_failover/configs/remote_servers.xml
index 014e7bf253e..0df620f5b57 100644
--- a/dbms/tests/integration/test_delayed_replica_failover/configs/remote_servers.xml
+++ b/dbms/tests/integration/test_delayed_replica_failover/configs/remote_servers.xml
@@ -4,11 +4,22 @@
             <shard>
                 <internal_replication>true</internal_replication>
                 <replica>
-                    <host>replica1</host>
+                    <host>node_1_1</host>
                     <port>9000</port>
                 </replica>
                 <replica>
-                    <host>replica2</host>
+                    <host>node_1_2</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+            <shard>
+                <internal_replication>true</internal_replication>
+                <replica>
+                    <host>node_2_1</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node_2_2</host>
                     <port>9000</port>
                 </replica>
             </shard>
diff --git a/dbms/tests/integration/test_delayed_replica_failover/test.py b/dbms/tests/integration/test_delayed_replica_failover/test.py
index 31e58d14e6b..6e2942331e6 100644
--- a/dbms/tests/integration/test_delayed_replica_failover/test.py
+++ b/dbms/tests/integration/test_delayed_replica_failover/test.py
@@ -7,21 +7,28 @@ from helpers.network import PartitionManager
 
 cluster = ClickHouseCluster(__file__)
 
-instance_with_dist_table = cluster.add_instance('instance_with_dist_table', main_configs=['configs/remote_servers.xml'])
-replica1 = cluster.add_instance('replica1', with_zookeeper=True)
-replica2 = cluster.add_instance('replica2', with_zookeeper=True)
+# Cluster with 2 shards of 2 replicas each. node_1_1 is the instance with Distributed table.
+# Thus we have a shard with a local replica and a shard with remote replicas.
+node_1_1 = instance_with_dist_table = cluster.add_instance(
+    'node_1_1', with_zookeeper=True, main_configs=['configs/remote_servers.xml'])
+node_1_2 = cluster.add_instance('node_1_2', with_zookeeper=True)
+node_2_1 = cluster.add_instance('node_2_1', with_zookeeper=True)
+node_2_2 = cluster.add_instance('node_2_2', with_zookeeper=True)
 
 @pytest.fixture(scope="module")
 def started_cluster():
     try:
         cluster.start()
 
-        for replica in (replica1, replica2):
-            replica.query(
-                "CREATE TABLE replicated (d Date, x UInt32) ENGINE = "
-                "ReplicatedMergeTree('/clickhouse/tables/replicated', '{instance}', d, d, 8192)")
+        for shard in (1, 2):
+            for replica in (1, 2):
+                node = cluster.instances['node_{}_{}'.format(shard, replica)]
+                node.query('''
+CREATE TABLE replicated (d Date, x UInt32) ENGINE =
+    ReplicatedMergeTree('/clickhouse/tables/{shard}/replicated', '{instance}', d, d, 8192)'''
+                    .format(shard=shard, instance=node.name))
 
-        instance_with_dist_table.query(
+        node_1_1.query(
             "CREATE TABLE distributed (d Date, x UInt32) ENGINE = "
             "Distributed('test_cluster', 'default', 'replicated')")
 
@@ -33,36 +40,42 @@ def started_cluster():
 
 def test(started_cluster):
     with PartitionManager() as pm:
-        pm.partition_instances(replica1, replica2)
+        # Hinder replication between replicas of the same shard, but leave the possibility of distributed connection.
+        pm.partition_instances(node_1_1, node_1_2, port=9009)
+        pm.partition_instances(node_2_1, node_2_2, port=9009)
 
-        replica2.query("INSERT INTO replicated VALUES ('2017-05-08', 1)")
+        node_1_2.query("INSERT INTO replicated VALUES ('2017-05-08', 1)")
+        node_2_2.query("INSERT INTO replicated VALUES ('2017-05-08', 2)")
 
         time.sleep(1) # accrue replica delay
 
-        assert replica1.query("SELECT count() FROM replicated").strip() == ''
-        assert replica2.query("SELECT count() FROM replicated").strip() == '1'
+        assert node_1_1.query("SELECT sum(x) FROM replicated").strip() == ''
+        assert node_1_2.query("SELECT sum(x) FROM replicated").strip() == '1'
+        assert node_2_1.query("SELECT sum(x) FROM replicated").strip() == ''
+        assert node_2_2.query("SELECT sum(x) FROM replicated").strip() == '2'
 
-        # With in_order balancing replica1 is chosen.
+        # With in_order balancing first replicas chosen.
         assert instance_with_dist_table.query(
             "SELECT count() FROM distributed SETTINGS load_balancing='in_order'").strip() == ''
 
-        # When we set max_replica_delay, replica1 must be excluded.
+        # When we set max_replica_delay, first replicas must be excluded.
         assert instance_with_dist_table.query('''
-SELECT count() FROM distributed SETTINGS
+SELECT sum(x) FROM distributed SETTINGS
     load_balancing='in_order',
     max_replica_delay_for_distributed_queries=1
-''').strip() == '1'
+''').strip() == '3'
 
-        pm.drop_instance_zk_connections(replica2)
+        pm.drop_instance_zk_connections(node_1_2)
+        pm.drop_instance_zk_connections(node_2_2)
 
         time.sleep(4) # allow pings to zookeeper to timeout (must be greater than ZK session timeout).
 
-        # At this point all replicas are stale, but the query must still go to replica2 which is the least stale one.
+        # At this point all replicas are stale, but the query must still go to second replicas which are the least stale ones.
         assert instance_with_dist_table.query('''
-SELECT count() FROM distributed SETTINGS
+SELECT sum(x) FROM distributed SETTINGS
     load_balancing='in_order',
     max_replica_delay_for_distributed_queries=1
-''').strip() == '1'
+''').strip() == '3'
 
         # If we forbid stale replicas, the query must fail.
         with pytest.raises(Exception):

From 67dffb9be63f343fb1448df20414a53f98a3bed7 Mon Sep 17 00:00:00 2001
From: Alexey Zatelepin <ztlpn@yandex-team.ru>
Date: Wed, 2 Aug 2017 16:05:01 +0300
Subject: [PATCH 131/281] simplify MultiplexedConnections further (so that it
 works with only one shard) [#CLICKHOUSE-3151]

---
 dbms/src/Client/MultiplexedConnections.cpp    | 269 +++++-------------
 dbms/src/Client/MultiplexedConnections.h      |  91 ++----
 .../DataStreams/RemoteBlockInputStream.cpp    |  16 +-
 3 files changed, 99 insertions(+), 277 deletions(-)

diff --git a/dbms/src/Client/MultiplexedConnections.cpp b/dbms/src/Client/MultiplexedConnections.cpp
index 9dc5aedd691..30d1e6c1e34 100644
--- a/dbms/src/Client/MultiplexedConnections.cpp
+++ b/dbms/src/Client/MultiplexedConnections.cpp
@@ -12,74 +12,45 @@ namespace ErrorCodes
 }
 
 
-MultiplexedConnections::MultiplexedConnections(Connection & connection, const Settings & settings_, const ThrottlerPtr & throttler_)
-    : settings(settings_), throttler(throttler_), supports_parallel_execution(false)
+MultiplexedConnections::MultiplexedConnections(Connection & connection, const Settings & settings_, const ThrottlerPtr & throttler)
+    : settings(settings_)
 {
-    active_connection_total_count = 1;
-
-    ShardState shard_state;
-    shard_state.allocated_connection_count = active_connection_total_count;
-    shard_state.active_connection_count = active_connection_total_count;
-
-    shard_states.push_back(shard_state);
+    connection.setThrottler(throttler);
 
     ReplicaState replica_state;
-    replica_state.connection_index = 0;
-    replica_state.shard_state = &shard_states[0];
+    replica_state.connection = &connection;
+    replica_states.push_back(replica_state);
+    fd_to_replica_state_idx.emplace(connection.socket.impl()->sockfd(), 0);
 
-    connection.setThrottler(throttler);
-    connections.push_back(&connection);
-
-    replica_map.emplace(connections[0]->socket.impl()->sockfd(), replica_state);
+    active_connection_count = 1;
 }
 
 MultiplexedConnections::MultiplexedConnections(
         std::vector<IConnectionPool::Entry> && connections,
-        const Settings & settings_, const ThrottlerPtr & throttler_, bool append_extra_info)
-    : settings(settings_), throttler(throttler_)
+        const Settings & settings_, const ThrottlerPtr & throttler, bool append_extra_info)
+    : settings(settings_)
 {
-    initShard(connections);
-    registerShards();
+    /// If we didn't get any connections from pool and getMany() did not throw exceptions, this means that
+    /// `skip_unavailable_shards` was set. Then just return.
+    if (connections.empty())
+        return;
 
-    supports_parallel_execution = active_connection_total_count > 1;
-
-    if (append_extra_info)
-        block_extra_info = std::make_unique<BlockExtraInfo>();
-}
-
-MultiplexedConnections::MultiplexedConnections(
-        ConnectionPoolWithFailover & pool,
-        const Settings & settings_, const ThrottlerPtr & throttler_, bool append_extra_info,
-        PoolMode pool_mode, const QualifiedTableName * main_table)
-    : settings(settings_), throttler(throttler_)
-{
-    initShard(pool, pool_mode, main_table);
-    registerShards();
-
-    supports_parallel_execution = active_connection_total_count > 1;
-
-    if (append_extra_info)
-        block_extra_info = std::make_unique<BlockExtraInfo>();
-}
-
-MultiplexedConnections::MultiplexedConnections(
-        const ConnectionPoolWithFailoverPtrs & pools, const Settings & settings_, const ThrottlerPtr & throttler_,
-        bool append_extra_info, PoolMode pool_mode, const QualifiedTableName * main_table)
-    : settings(settings_), throttler(throttler_)
-{
-    if (pools.empty())
-        throw Exception("Pools are not specified", ErrorCodes::LOGICAL_ERROR);
-
-    for (auto & pool : pools)
+    replica_states.reserve(connections.size());
+    fd_to_replica_state_idx.reserve(connections.size());
+    for (size_t i = 0; i < connections.size(); ++i)
     {
-        if (!pool)
-            throw Exception("Invalid pool specified", ErrorCodes::LOGICAL_ERROR);
-        initShard(*pool, pool_mode, main_table);
+        Connection * connection = &(*connections[i]);
+        connection->setThrottler(throttler);
+
+        ReplicaState replica_state;
+        replica_state.pool_entry = std::move(connections[i]);
+        replica_state.connection = connection;
+
+        replica_states.push_back(std::move(replica_state));
+        fd_to_replica_state_idx.emplace(connection->socket.impl()->sockfd(), i);
     }
 
-    registerShards();
-
-    supports_parallel_execution = active_connection_total_count > 1;
+    active_connection_count = connections.size();
 
     if (append_extra_info)
         block_extra_info = std::make_unique<BlockExtraInfo>();
@@ -92,17 +63,18 @@ void MultiplexedConnections::sendExternalTablesData(std::vector<ExternalTablesDa
     if (!sent_query)
         throw Exception("Cannot send external tables data: query not yet sent.", ErrorCodes::LOGICAL_ERROR);
 
-    if (data.size() < active_connection_total_count)
+    if (data.size() != active_connection_count)
         throw Exception("Mismatch between replicas and data sources", ErrorCodes::MISMATCH_REPLICAS_DATA_SOURCES);
 
     auto it = data.begin();
-    for (auto & e : replica_map)
+    for (ReplicaState & state : replica_states)
     {
-        ReplicaState & state = e.second;
-        Connection * connection = connections[state.connection_index];
+        Connection * connection = state.connection;
         if (connection != nullptr)
+        {
             connection->sendExternalTablesData(*it);
-        ++it;
+            ++it;
+        }
     }
 }
 
@@ -118,33 +90,24 @@ void MultiplexedConnections::sendQuery(
     if (sent_query)
         throw Exception("Query already sent.", ErrorCodes::LOGICAL_ERROR);
 
-    if (supports_parallel_execution)
+    if (replica_states.size() > 1)
     {
-        /// Each shard has one or more replicas.
-        auto it = connections.begin();
-        for (const auto & shard_state : shard_states)
+        Settings query_settings = settings;
+        query_settings.parallel_replicas_count = replica_states.size();
+
+        for (size_t i = 0; i < replica_states.size(); ++i)
         {
-            Settings query_settings = settings;
-            query_settings.parallel_replicas_count = shard_state.active_connection_count;
+            Connection * connection = replica_states[i].connection;
+            if (connection == nullptr)
+                throw Exception("MultiplexedConnections: Internal error", ErrorCodes::LOGICAL_ERROR);
 
-            UInt64 offset = 0;
-
-            for (size_t i = 0; i < shard_state.allocated_connection_count; ++i)
-            {
-                Connection * connection = *it;
-                if (connection == nullptr)
-                    throw Exception("MultiplexedConnections: Internal error", ErrorCodes::LOGICAL_ERROR);
-
-                query_settings.parallel_replica_offset = offset;
-                connection->sendQuery(query, query_id, stage, &query_settings, client_info, with_pending_data);
-                ++offset;
-                ++it;
-            }
+            query_settings.parallel_replica_offset = i;
+            connection->sendQuery(query, query_id, stage, &query_settings, client_info, with_pending_data);
         }
     }
     else
     {
-        Connection * connection = connections[0];
+        Connection * connection = replica_states[0].connection;
         if (connection == nullptr)
             throw Exception("MultiplexedConnections: Internal error", ErrorCodes::LOGICAL_ERROR);
 
@@ -180,14 +143,13 @@ void MultiplexedConnections::disconnect()
 {
     std::lock_guard<std::mutex> lock(cancel_mutex);
 
-    for (auto it = replica_map.begin(); it != replica_map.end(); ++it)
+    for (ReplicaState & state : replica_states)
     {
-        ReplicaState & state = it->second;
-        Connection * connection = connections[state.connection_index];
+        Connection * connection = state.connection;
         if (connection != nullptr)
         {
             connection->disconnect();
-            invalidateReplica(it);
+            invalidateReplica(state);
         }
     }
 }
@@ -199,10 +161,9 @@ void MultiplexedConnections::sendCancel()
     if (!sent_query || cancelled)
         throw Exception("Cannot cancel. Either no query sent or already cancelled.", ErrorCodes::LOGICAL_ERROR);
 
-    for (const auto & e : replica_map)
+    for (ReplicaState & state : replica_states)
     {
-        const ReplicaState & state = e.second;
-        Connection * connection = connections[state.connection_index];
+        Connection * connection = state.connection;
         if (connection != nullptr)
             connection->sendCancel();
     }
@@ -255,10 +216,9 @@ std::string MultiplexedConnections::dumpAddressesUnlocked() const
 {
     bool is_first = true;
     std::ostringstream os;
-    for (const auto & e : replica_map)
+    for (const ReplicaState & state : replica_states)
     {
-        const ReplicaState & state = e.second;
-        const Connection * connection = connections[state.connection_index];
+        const Connection * connection = state.connection;
         if (connection != nullptr)
         {
             os << (is_first ? "" : "; ") << connection->getDescription();
@@ -269,75 +229,6 @@ std::string MultiplexedConnections::dumpAddressesUnlocked() const
     return os.str();
 }
 
-void MultiplexedConnections::initShard(ConnectionPoolWithFailover & pool, PoolMode pool_mode, const QualifiedTableName * main_table)
-{
-    std::vector<IConnectionPool::Entry> connections;
-    if (main_table)
-    {
-        auto try_results = pool.getManyChecked(&settings, pool_mode, *main_table);
-        connections.reserve(try_results.size());
-        for (auto & try_result : try_results)
-            connections.emplace_back(std::move(try_result.entry));
-    }
-    else
-        connections = pool.getMany(&settings, pool_mode);
-
-    initShard(connections);
-}
-
-void MultiplexedConnections::initShard(const std::vector<IConnectionPool::Entry> & connections)
-{
-    /// If getMany() did not allocate connections and did not throw exceptions, this means that
-    /// `skip_unavailable_shards` was set. Then just return.
-    if (connections.empty())
-        return;
-
-    ShardState shard_state;
-    shard_state.allocated_connection_count = connections.size();
-    shard_state.active_connection_count = connections.size();
-    active_connection_total_count += shard_state.active_connection_count;
-
-    shard_states.push_back(shard_state);
-
-    pool_entries.insert(pool_entries.end(), connections.begin(), connections.end());
-}
-
-void MultiplexedConnections::registerShards()
-{
-    replica_map.reserve(pool_entries.size());
-    connections.reserve(pool_entries.size());
-
-    size_t offset = 0;
-    for (auto & shard_state : shard_states)
-    {
-        size_t index_begin = offset;
-        size_t index_end = offset + shard_state.allocated_connection_count;
-        registerReplicas(index_begin, index_end, shard_state);
-        offset = index_end;
-    }
-}
-
-void MultiplexedConnections::registerReplicas(size_t index_begin, size_t index_end, ShardState & shard_state)
-{
-    for (size_t i = index_begin; i < index_end; ++i)
-    {
-        ReplicaState replica_state;
-        replica_state.connection_index = i;
-        replica_state.shard_state = &shard_state;
-
-        Connection * connection = &*(pool_entries[i]);
-        if (connection == nullptr)
-            throw Exception("MultiplexedConnections: Internal error", ErrorCodes::LOGICAL_ERROR);
-
-        connection->setThrottler(throttler);
-        connections.push_back(connection);
-
-        auto res = replica_map.emplace(connection->socket.impl()->sockfd(), replica_state);
-        if (!res.second)
-            throw Exception("Invalid set of connections", ErrorCodes::LOGICAL_ERROR);
-    }
-}
-
 Connection::Packet MultiplexedConnections::receivePacketUnlocked()
 {
     if (!sent_query)
@@ -345,14 +236,10 @@ Connection::Packet MultiplexedConnections::receivePacketUnlocked()
     if (!hasActiveConnections())
         throw Exception("No more packets are available.", ErrorCodes::LOGICAL_ERROR);
 
-    auto it = getReplicaForReading();
-    if (it == replica_map.end())
-        throw Exception("Logical error: no available replica", ErrorCodes::NO_AVAILABLE_REPLICA);
-
-    ReplicaState & state = it->second;
-    current_connection = connections[state.connection_index];
+    ReplicaState & state = getReplicaForReading();
+    current_connection = state.connection;
     if (current_connection == nullptr)
-        throw Exception("MultiplexedConnections: Internal error", ErrorCodes::LOGICAL_ERROR);
+        throw Exception("Logical error: no available replica", ErrorCodes::NO_AVAILABLE_REPLICA);
 
     Connection::Packet packet = current_connection->receivePacket();
 
@@ -366,48 +253,32 @@ Connection::Packet MultiplexedConnections::receivePacketUnlocked()
             break;
 
         case Protocol::Server::EndOfStream:
-            invalidateReplica(it);
+            invalidateReplica(state);
             break;
 
         case Protocol::Server::Exception:
         default:
             current_connection->disconnect();
-            invalidateReplica(it);
+            invalidateReplica(state);
             break;
     }
 
     return packet;
 }
 
-MultiplexedConnections::ReplicaMap::iterator MultiplexedConnections::getReplicaForReading()
+MultiplexedConnections::ReplicaState & MultiplexedConnections::getReplicaForReading()
 {
-    ReplicaMap::iterator it;
+    if (replica_states.size() == 1)
+        return replica_states[0];
 
-    if (supports_parallel_execution)
-        it = waitForReadEvent();
-    else
-    {
-        it = replica_map.begin();
-        const ReplicaState & state = it->second;
-        Connection * connection = connections[state.connection_index];
-        if (connection == nullptr)
-            it = replica_map.end();
-    }
-
-    return it;
-}
-
-MultiplexedConnections::ReplicaMap::iterator MultiplexedConnections::waitForReadEvent()
-{
     Poco::Net::Socket::SocketList read_list;
-    read_list.reserve(active_connection_total_count);
+    read_list.reserve(active_connection_count);
 
     /// First, we check if there are data already in the buffer
     /// of at least one connection.
-    for (const auto & e : replica_map)
+    for (const ReplicaState & state : replica_states)
     {
-        const ReplicaState & state = e.second;
-        Connection * connection = connections[state.connection_index];
+        Connection * connection = state.connection;
         if ((connection != nullptr) && connection->hasReadBufferPendingData())
             read_list.push_back(connection->socket);
     }
@@ -419,10 +290,9 @@ MultiplexedConnections::ReplicaMap::iterator MultiplexedConnections::waitForRead
         Poco::Net::Socket::SocketList write_list;
         Poco::Net::Socket::SocketList except_list;
 
-        for (const auto & e : replica_map)
+        for (const ReplicaState & state : replica_states)
         {
-            const ReplicaState & state = e.second;
-            Connection * connection = connections[state.connection_index];
+            Connection * connection = state.connection;
             if (connection != nullptr)
                 read_list.push_back(connection->socket);
         }
@@ -434,17 +304,14 @@ MultiplexedConnections::ReplicaMap::iterator MultiplexedConnections::waitForRead
     }
 
     auto & socket = read_list[rand() % read_list.size()];
-    return replica_map.find(socket.impl()->sockfd());
+    return replica_states[fd_to_replica_state_idx.at(socket.impl()->sockfd())];
 }
 
-void MultiplexedConnections::invalidateReplica(MultiplexedConnections::ReplicaMap::iterator it)
+void MultiplexedConnections::invalidateReplica(ReplicaState & state)
 {
-    ReplicaState & state = it->second;
-    ShardState * shard_state = state.shard_state;
-
-    connections[state.connection_index] = nullptr;
-    --shard_state->active_connection_count;
-    --active_connection_total_count;
+    state.connection = nullptr;
+    state.pool_entry = IConnectionPool::Entry();
+    --active_connection_count;
 }
 
 }
diff --git a/dbms/src/Client/MultiplexedConnections.h b/dbms/src/Client/MultiplexedConnections.h
index 5f2e524a58e..1765741c649 100644
--- a/dbms/src/Client/MultiplexedConnections.h
+++ b/dbms/src/Client/MultiplexedConnections.h
@@ -10,7 +10,7 @@ namespace DB
 {
 
 
-/** To retrieve data directly from multiple replicas (connections) from one or several shards
+/** To retrieve data directly from multiple replicas (connections) from one shard
   * within a single thread. As a degenerate case, it can also work with one connection.
   * It is assumed that all functions except sendCancel are always executed in one thread.
   *
@@ -29,22 +29,6 @@ public:
             std::vector<IConnectionPool::Entry> && connections,
             const Settings & settings_, const ThrottlerPtr & throttler_, bool append_extra_info);
 
-    /** Accepts a pool from which it will be necessary to get one or more connections.
-      * If the append_extra_info flag is set, additional information appended to each received block.
-      */
-    MultiplexedConnections(
-            ConnectionPoolWithFailover & pool,
-            const Settings & settings_, const ThrottlerPtr & throttler_, bool append_extra_info,
-            PoolMode pool_mode, const QualifiedTableName * main_table = nullptr);
-
-    /** Accepts pools, one for each shard, from which one will need to get one or more connections.
-      * If the append_extra_info flag is set, additional information appended to each received block.
-      */
-    MultiplexedConnections(
-            const ConnectionPoolWithFailoverPtrs & pools,
-            const Settings & settings_, const ThrottlerPtr & throttler_, bool append_extra_info,
-            PoolMode pool_mode, const QualifiedTableName * main_table = nullptr);
-
     /// Send all content of external tables to replicas.
     void sendExternalTablesData(std::vector<ExternalTablesData> & data);
 
@@ -79,87 +63,46 @@ public:
 
     /// Returns the number of replicas.
     /// Without locking, because sendCancel() does not change this number.
-    size_t size() const { return replica_map.size(); }
+    size_t size() const { return replica_states.size(); }
 
     /// Check if there are any valid replicas.
     /// Without locking, because sendCancel() does not change the state of the replicas.
-    bool hasActiveConnections() const { return active_connection_total_count > 0; }
+    bool hasActiveConnections() const { return active_connection_count > 0; }
 
 private:
-    /// Connections of the 1st shard, then the connections of the 2nd shard, etc.
-    using Connections = std::vector<Connection *>;
-
-    /// The state of the connections of one shard.
-    struct ShardState
-    {
-        /// The number of connections allocated, i.e. replicas for this shard.
-        size_t allocated_connection_count;
-        /// The current number of valid connections to the replicas of this shard.
-        size_t active_connection_count;
-    };
-
-    /// Description of a single replica.
-    struct ReplicaState
-    {
-        size_t connection_index;
-        /// The owner of this replica.
-        ShardState * shard_state;
-    };
-
-    /// Replicas hashed by id of the socket.
-    using ReplicaMap = std::unordered_map<int, ReplicaState>;
-
-    /// The state of each shard.
-    using ShardStates = std::vector<ShardState>;
-
-private:
-    void initShard(ConnectionPoolWithFailover & pool, PoolMode pool_mode, const QualifiedTableName * main_table);
-    void initShard(const std::vector<IConnectionPool::Entry> & connections);
-
-    void registerShards();
-
-    /// Register replicas of one shard.
-    void registerReplicas(size_t index_begin, size_t index_end, ShardState & shard_state);
-
     /// Internal version of `receivePacket` function without locking.
     Connection::Packet receivePacketUnlocked();
 
     /// Internal version of `dumpAddresses` function without locking.
     std::string dumpAddressesUnlocked() const;
 
-    /// Get a replica where you can read the data.
-    ReplicaMap::iterator getReplicaForReading();
+    /// Description of a single replica.
+    struct ReplicaState
+    {
+        Connection * connection = nullptr;
+        ConnectionPool::Entry pool_entry;
+    };
 
-    /** Check if there are any data that can be read on any of the replicas.
-      * Returns one such replica if it exists.
-      */
-    ReplicaMap::iterator waitForReadEvent();
+    /// Get a replica where you can read the data.
+    ReplicaState & getReplicaForReading();
 
     /// Mark the replica as invalid.
-    void invalidateReplica(ReplicaMap::iterator it);
+    void invalidateReplica(ReplicaState & replica_state);
 
 private:
     const Settings & settings;
 
-    Connections connections;
-    ReplicaMap replica_map;
-    ShardStates shard_states;
+    /// The current number of valid connections to the replicas of this shard.
+    size_t active_connection_count = 0;
 
-    /// If not nullptr, then it is used to restrict network traffic.
-    ThrottlerPtr throttler;
-
-    std::vector<ConnectionPool::Entry> pool_entries;
+    std::vector<ReplicaState> replica_states;
+    std::unordered_map<int, size_t> fd_to_replica_state_idx;
 
     /// Connection that received last block.
-    Connection * current_connection;
+    Connection * current_connection = nullptr;
     /// Information about the last received block, if supported.
     std::unique_ptr<BlockExtraInfo> block_extra_info;
 
-    /// The current number of valid connections to replicas.
-    size_t active_connection_total_count = 0;
-    /// The query is run in parallel on multiple replicas.
-    bool supports_parallel_execution;
-
     bool sent_query = false;
 
     bool cancelled = false;
diff --git a/dbms/src/DataStreams/RemoteBlockInputStream.cpp b/dbms/src/DataStreams/RemoteBlockInputStream.cpp
index fa152855741..c20b0e7a60a 100644
--- a/dbms/src/DataStreams/RemoteBlockInputStream.cpp
+++ b/dbms/src/DataStreams/RemoteBlockInputStream.cpp
@@ -58,9 +58,21 @@ RemoteBlockInputStream::RemoteBlockInputStream(
 
     create_multiplexed_connections = [this, pool, throttler]()
     {
-        const QualifiedTableName * main_table_ptr = main_table ? &main_table.value() : nullptr;
+        const Settings & settings = context.getSettingsRef();
+
+        std::vector<IConnectionPool::Entry> connections;
+        if (main_table)
+        {
+            auto try_results = pool->getManyChecked(&settings, pool_mode, main_table.value());
+            connections.reserve(try_results.size());
+            for (auto & try_result : try_results)
+                connections.emplace_back(std::move(try_result.entry));
+        }
+        else
+            connections = pool->getMany(&settings, pool_mode);
+
         return std::make_unique<MultiplexedConnections>(
-                *pool, context.getSettingsRef(), throttler, append_extra_info, pool_mode, main_table_ptr);
+                std::move(connections), settings, throttler, append_extra_info);
     };
 }
 

From 7b0a7def02c342cc3e46552abc31b7f983be83a0 Mon Sep 17 00:00:00 2001
From: Alexey Zatelepin <ztlpn@yandex-team.ru>
Date: Wed, 2 Aug 2017 18:24:47 +0300
Subject: [PATCH 132/281] remove unused setting [#CLICKHOUSE-3151]

---
 dbms/src/Interpreters/Settings.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h
index 5d7d5d8173a..8df32185146 100644
--- a/dbms/src/Interpreters/Settings.h
+++ b/dbms/src/Interpreters/Settings.h
@@ -191,9 +191,6 @@ struct Settings
     M(SettingUInt64, select_sequential_consistency, 0) \
     /** The maximum number of different shards and the maximum number of replicas of one shard in the `remote` function. */ \
     M(SettingUInt64, table_function_remote_max_addresses, 1000) \
-    /** Maximum number of threads for distributed processing of one query */ \
-    M(SettingUInt64, max_distributed_processing_threads, 8) \
-    \
     /** Settings to reduce the number of threads in case of slow reads. */ \
     /** Pay attention only to readings that took at least that much time. */ \
     M(SettingMilliseconds,     read_backoff_min_latency_ms, 1000) \

From 3c0d0274d0e2592f449f231d690c3c26e4d2578c Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Sun, 13 Aug 2017 12:16:22 +0300
Subject: [PATCH 133/281] Update executeQuery.h

---
 dbms/src/Interpreters/ClusterProxy/executeQuery.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/dbms/src/Interpreters/ClusterProxy/executeQuery.h b/dbms/src/Interpreters/ClusterProxy/executeQuery.h
index a28fc9fed97..b2646a69f69 100644
--- a/dbms/src/Interpreters/ClusterProxy/executeQuery.h
+++ b/dbms/src/Interpreters/ClusterProxy/executeQuery.h
@@ -17,7 +17,6 @@ namespace ClusterProxy
 class IStreamFactory;
 
 /// Execute a distributed query, creating a vector of BlockInputStreams, from which the result can be read.
-/// If `enable_shard_multiplexing` is false, each stream corresponds to a single shard.
 /// `stream_factory` object encapsulates the logic of creating streams for a different type of query
 /// (currently SELECT, DESCRIBE, or ALTER (for resharding)).
 BlockInputStreams executeQuery(

From c65c49b50aebb31f0a7776681611b6d9ce8a7f66 Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Wed, 26 Jul 2017 22:31:32 +0300
Subject: [PATCH 134/281] Add config-based host name selection.
 [#CLICKHOUSE-3128]

---
 dbms/src/Interpreters/Cluster.cpp   |  12 ++
 dbms/src/Interpreters/Cluster.h     |   3 +
 dbms/src/Interpreters/DDLWorker.cpp | 238 +++++++++++++++++++---------
 dbms/src/Interpreters/DDLWorker.h   |  18 ++-
 4 files changed, 184 insertions(+), 87 deletions(-)

diff --git a/dbms/src/Interpreters/Cluster.cpp b/dbms/src/Interpreters/Cluster.cpp
index 2ed378b12d8..e8d7c67208c 100644
--- a/dbms/src/Interpreters/Cluster.cpp
+++ b/dbms/src/Interpreters/Cluster.cpp
@@ -5,6 +5,7 @@
 #include <Common/StringUtils.h>
 #include <IO/HexWriteBuffer.h>
 #include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
 #include <Poco/Util/AbstractConfiguration.h>
 #include <Poco/Util/Application.h>
 #include <openssl/sha.h>
@@ -18,6 +19,7 @@ namespace ErrorCodes
     extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
     extern const int LOGICAL_ERROR;
     extern const int SHARD_HAS_NO_CONNECTIONS;
+    extern const int SYNTAX_ERROR;
 }
 
 namespace
@@ -113,6 +115,16 @@ String Cluster::Address::toString(const String & host_name, UInt16 port)
     return escapeForFileName(host_name) + ':' + DB::toString(port);
 }
 
+void Cluster::Address::fromString(const String & host_port_string, String & host_name, UInt16 & port)
+{
+    auto pos = host_port_string.find_last_of(':');
+    if (pos == std::string::npos)
+        throw Exception("Incorrect host ID format " + host_port_string, ErrorCodes::SYNTAX_ERROR);
+
+    host_name = unescapeForFileName(host_port_string.substr(0, pos));
+    port = parse<UInt16>(host_port_string.substr(0, pos));
+}
+
 
 String Cluster::Address::toStringFull() const
 {
diff --git a/dbms/src/Interpreters/Cluster.h b/dbms/src/Interpreters/Cluster.h
index cb131a00393..f68236fad8d 100644
--- a/dbms/src/Interpreters/Cluster.h
+++ b/dbms/src/Interpreters/Cluster.h
@@ -57,6 +57,7 @@ public:
         UInt32 replica_num;
         bool is_local;
 
+        Address() = default;
         Address(Poco::Util::AbstractConfiguration & config, const String & config_prefix);
         Address(const String & host_port_, const String & user_, const String & password_);
 
@@ -65,6 +66,8 @@ public:
 
         static String toString(const String & host_name, UInt16 port);
 
+        static void fromString(const String & host_port_string, String & host_name, UInt16 & port);
+
         /// Retrurns escaped user:password@resolved_host_address:resolved_host_port#default_database
         String toStringFull() const;
     };
diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp
index e0be7563b9b..500eae0897d 100644
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@@ -30,6 +30,7 @@
 
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Common/ZooKeeper/Lock.h>
+#include <Common/isLocalAddress.h>
 #include <Poco/Timestamp.h>
 
 #include <experimental/optional>
@@ -47,13 +48,13 @@ namespace ErrorCodes
     extern const int INCONSISTENT_CLUSTER_DEFINITION;
     extern const int TIMEOUT_EXCEEDED;
     extern const int UNFINISHED;
+    extern const int UNKNOWN_TYPE_OF_QUERY;
 }
 
 
 const size_t DDLWorker::node_max_lifetime_seconds = 7 * 24 * 60 * 60; // week
 const size_t DDLWorker::cleanup_min_period_seconds = 60; // minute
 
-
 struct DDLLogEntry
 {
     String query;
@@ -98,21 +99,81 @@ struct DDLLogEntry
 };
 
 
-using ShardAndHostNum = std::experimental::optional<std::pair<size_t, size_t>>;
-static ShardAndHostNum tryGetShardAndHostNum(const Cluster::AddressesWithFailover & cluster, const String & host_name, UInt16 port)
+struct DDLTask
 {
-    for (size_t shard_num = 0; shard_num < cluster.size(); ++shard_num)
-    {
-        for (size_t host_num = 0; host_num < cluster[shard_num].size(); ++host_num)
-        {
-            const Cluster::Address & address = cluster[shard_num][host_num];
-            if (address.host_name == host_name && address.port == port)
-                return std::make_pair(shard_num, host_num);
-        }
-    }
+    DDLLogEntry entry;
+    String entry_name;
 
-    return {};
-}
+    ASTPtr query;
+    ASTQueryWithOnCluster * query_on_cluster = nullptr;
+
+    String cluster_name;
+    ClusterPtr cluster;
+
+    Cluster::Address host_address;
+    String host_id_in_cluster;
+    size_t host_shard_num;
+    size_t host_replica_num;
+
+    /// Parses entry and query, extracts cluster and finds current host in the cluster
+    /// Return true if current host is found in the cluster
+    bool fillFromEntryData(const String & entry_data, const String & entry_name_, DDLWorker & worker)
+    {
+        entry.parse(entry_data);
+        entry_name = entry_name_;
+
+        {
+            const char * begin = entry.query.data();
+            const char * end = begin + entry.query.size();
+
+            ParserQuery parser_query(end);
+            String description;
+            query = parseQuery(parser_query, begin, end, description);
+        }
+
+        if (!query || !(query_on_cluster = dynamic_cast<ASTQueryWithOnCluster *>(query.get())))
+            throw Exception("Recieved unknown DDL query", ErrorCodes::UNKNOWN_TYPE_OF_QUERY);
+
+        cluster_name = query_on_cluster->cluster;
+        cluster = worker.context.tryGetCluster(cluster_name);
+
+        if (!cluster)
+        {
+            LOG_INFO(worker.log, "Will not execute entry " << entry_name << ": there is no cluster " << cluster_name << " on current host");
+            return false;
+        }
+
+        bool found = false;
+        const auto & shards = cluster->getShardsAddresses();
+        for (size_t shard_num = 0; shard_num < shards.size(); ++shard_num)
+        {
+            for (size_t replica_num = 0; replica_num < shards[shard_num].size(); ++replica_num)
+            {
+                const Cluster::Address & address = shards[shard_num][replica_num];
+
+                if (isLocalAddress(address.resolved_address))
+                {
+                    if (found)
+                    {
+                        LOG_WARNING(worker.log, "There are at least two the same ClickHouse instances in cluster " << cluster_name << ": "
+                            << host_id_in_cluster << " and " << address.toString()
+                            << ". Will use the first one only.");
+                    }
+                    else
+                    {
+                        host_address = address;
+                        host_id_in_cluster = address.toString();
+                        host_shard_num = shard_num;
+                        host_replica_num = replica_num;
+                        found = true;
+                    }
+                }
+            }
+        }
+
+        return found;
+    }
+};
 
 
 static bool isSupportedAlterType(int type)
@@ -136,9 +197,8 @@ DDLWorker::DDLWorker(const std::string & zk_root_dir, Context & context_)
     if (queue_dir.back() == '/')
         queue_dir.resize(queue_dir.size() - 1);
 
-    host_name = getFQDNOrHostName();
-    port = context.getTCPPort();
-    host_id = Cluster::Address::toString(host_name, port);
+    host_fqdn = getFQDNOrHostName();
+    host_fqdn_id = Cluster::Address::toString(host_fqdn, context.getTCPPort());
 
     event_queue_updated = std::make_shared<Poco::Event>();
 
@@ -181,34 +241,74 @@ void DDLWorker::processTasks()
             continue;
         }
 
-        DDLLogEntry node;
-        node.parse(node_data);
+        DDLTask task;
+        bool found_cluster_and_host = false;
+        try
+        {
+            found_cluster_and_host = task.fillFromEntryData(node_data, node_name, *this);
+        }
+        catch (...)
+        {
+            auto status = ExecutionStatus::fromCurrentException();
+            /// We even cannot parse host name and can't properly submit execution status.
+            /// What should we do?
+        }
 
-        bool host_in_hostlist = std::find(node.hosts.cbegin(), node.hosts.cend(), host_id) != node.hosts.cend();
-        bool already_processed = zookeeper->exists(node_path + "/finished/" + host_id);
+        const auto & hosts = task.entry.hosts;
+        if (!found_cluster_and_host)
+        {
+            bool fqdn_in_hostlist = std::find(hosts.cbegin(), hosts.cend(), host_fqdn_id) != hosts.cend();
+            if (fqdn_in_hostlist)
+            {
+                LOG_ERROR(log, "Not found current host in cluster " << task.cluster_name << " of task " << task.entry_name
+                    << ", but found host " << host_fqdn_id << " with the same FQDN in host list of the task"
+                    << ". Possibly inconsistent cluster definition among servers.");
+            }
+            else
+            {
+                LOG_DEBUG(log, "Skipping task " << node_data);
+            }
 
+            last_processed_node_name = node_name;
+            continue;
+        }
+        else
+        {
+            bool host_in_hostlist = std::find(hosts.cbegin(), hosts.cend(), task.host_id_in_cluster) != hosts.cend();
+            if (!host_in_hostlist)
+            {
+                LOG_ERROR(log, "Current host was found in cluster " << task.cluster_name
+                    << ", but was not found in host list of task " << task.entry_name
+                    << ". Possibly inconsistent cluster definition among servers.");
+
+                last_processed_node_name = node_name;
+                continue;
+            }
+        }
+
+        bool already_processed = zookeeper->exists(node_path + "/finished/" + task.host_id_in_cluster);
         if (!server_startup && already_processed)
         {
             throw Exception(
-                "Server expects that DDL node " + node_name + " should be processed, but it was already processed according to ZK",
+                "Server expects that DDL task " + node_name + " should be processed, but it was already processed according to ZK",
                 ErrorCodes::LOGICAL_ERROR);
         }
 
-        if (host_in_hostlist && !already_processed)
+        if (!already_processed)
         {
             try
             {
-                processTask(node, node_name);
+                processTask(task);
             }
             catch (...)
             {
-                tryLogCurrentException(log, "An error occurred while processing node " + node_name + " (" + node.query + ")");
+                tryLogCurrentException(log, "An error occurred while processing task " + node_name + " (" + task.entry.query + ")");
                 throw;
             }
         }
         else
         {
-            LOG_DEBUG(log, "Node " << node_name << " (" << node.query << ") will not be processed");
+            LOG_DEBUG(log, "Task " << node_name << " (" << task.entry.query << ") has been already processed");
         }
 
         last_processed_node_name = node_name;
@@ -240,58 +340,32 @@ static bool tryExecuteQuery(const String & query, Context & context, ExecutionSt
 }
 
 
-void DDLWorker::processTask(const DDLLogEntry & node, const std::string & node_name)
+void DDLWorker::processTask(DDLTask & task)
 {
-    LOG_DEBUG(log, "Processing node " << node_name << " (" << node.query << ")");
+    LOG_DEBUG(log, "Processing entry " << task.entry_name << " (" << task.entry.query << ")");
 
-    String node_path = queue_dir + "/" + node_name;
+    String node_path = queue_dir + "/" + task.entry_name;
     createStatusDirs(node_path);
 
-    bool should_not_execute = current_node == node_name && current_node_was_executed;
+    bool should_not_execute = current_node == task.entry_name && current_node_was_executed;
 
     if (!should_not_execute)
     {
-        current_node = node_name;
+        current_node = task.entry_name;
         current_node_was_executed = false;
 
-        zookeeper->create(node_path + "/active/" + host_id, "", zkutil::CreateMode::Ephemeral);
+        zookeeper->create(node_path + "/active/" + task.host_id_in_cluster, "", zkutil::CreateMode::Ephemeral);
 
         try
         {
-            ASTPtr query_ast;
-            {
-                const char * begin = &node.query.front();
-                ParserQuery parser_query(begin + node.query.size());
-                String description;
-                query_ast = parseQuery(parser_query, begin, begin + node.query.size(), description);
-            }
-
-            const ASTQueryWithOnCluster * query;
-            if (!query_ast || !(query = dynamic_cast<const ASTQueryWithOnCluster *>(query_ast.get())))
-                throw Exception("Recieved unsupported DDL query", ErrorCodes::NOT_IMPLEMENTED);
-
-            String cluster_name = query->cluster;
-            auto cluster = context.getCluster(cluster_name);
-
-            auto shard_host_num = tryGetShardAndHostNum(cluster->getShardsAddresses(), host_name, port);
-            if (!shard_host_num)
-            {
-                throw Exception("Cannot find own address (" + host_id + ") in cluster " + cluster_name + " configuration",
-                                ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION);
-            }
-
-            size_t shard_num = shard_host_num->first;
-            size_t host_num = shard_host_num->second;
-
-            const auto & host_address = cluster->getShardsAddresses().at(shard_num).at(host_num);
-            ASTPtr rewritten_ast = query->getRewrittenASTWithoutOnCluster(host_address.default_database);
+            ASTPtr rewritten_ast = task.query_on_cluster->getRewrittenASTWithoutOnCluster(task.host_address.default_database);
             String rewritten_query = queryToString(rewritten_ast);
 
             LOG_DEBUG(log, "Executing query: " << rewritten_query);
 
             if (auto query_alter = dynamic_cast<const ASTAlterQuery *>(rewritten_ast.get()))
             {
-                processTaskAlter(query_alter, rewritten_query, cluster, shard_num, node_path);
+                processTaskAlter(task, query_alter, rewritten_query, node_path);
             }
             else
             {
@@ -313,21 +387,23 @@ void DDLWorker::processTask(const DDLLogEntry & node, const std::string & node_n
 
     /// Delete active flag and create finish flag
     zkutil::Ops ops;
-    ops.emplace_back(std::make_unique<zkutil::Op::Remove>(node_path + "/active/" + host_id, -1));
-    ops.emplace_back(std::make_unique<zkutil::Op::Create>(node_path + "/finished/" + host_id,
+    ops.emplace_back(std::make_unique<zkutil::Op::Remove>(node_path + "/active/" + task.host_id_in_cluster, -1));
+    ops.emplace_back(std::make_unique<zkutil::Op::Create>(node_path + "/finished/" + task.host_id_in_cluster,
         current_node_execution_status.serializeText(), zookeeper->getDefaultACL(), zkutil::CreateMode::Persistent));
 
     int code = zookeeper->tryMultiWithRetries(ops);
     if (code != ZOK && code != ZNONODE)
-        throw zkutil::KeeperException("Cannot commit executed node " + node_name, code);
+    {
+        /// FIXME: if server fails here, the task will be executed twice. We need WAL here.
+        throw zkutil::KeeperException("Cannot commit executed entry " + task.entry_name, code);
+    }
 }
 
 
 void DDLWorker::processTaskAlter(
+    DDLTask & task,
     const ASTAlterQuery * query_alter,
     const String & rewritten_query,
-    const std::shared_ptr<Cluster> & cluster,
-    ssize_t shard_num,
     const String & node_path)
 {
     String database = query_alter->database.empty() ? context.getCurrentDatabase() : query_alter->database;
@@ -345,31 +421,31 @@ void DDLWorker::processTaskAlter(
             execute_on_leader_replica |= param.type == ASTAlterQuery::DROP_PARTITION;
     }
 
-    const auto & shard_info = cluster->getShardsInfo().at(shard_num);
+    const auto & shard_info = task.cluster->getShardsInfo().at(task.host_shard_num);
     bool config_is_replicated_shard = shard_info.hasInternalReplication();
 
     if (execute_once_on_replica && !config_is_replicated_shard)
     {
-        throw Exception("Table " + query_alter->table + " is replicated, but shard #" + toString(shard_num + 1) +
+        throw Exception("Table " + query_alter->table + " is replicated, but shard #" + toString(task.host_shard_num + 1) +
             " isn't replicated according to its cluster definition", ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION);
     }
     else if (!execute_once_on_replica && config_is_replicated_shard)
     {
-        throw Exception("Table " + query_alter->table + " isn't replicated, but shard #" + toString(shard_num + 1) +
+        throw Exception("Table " + query_alter->table + " isn't replicated, but shard #" + toString(task.host_shard_num + 1) +
             " replicated according to its cluster definition", ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION);
     }
 
     if (execute_once_on_replica)
     {
-        /// The following code may perform ALTER twice if
-        ///  current secver aquires lock, executes replicated alter,
+        /// The following code can perform ALTER twice if
+        ///  current server aquires lock, executes replicated alter,
         ///  losts zookeeper connection and doesn't have time to create /executed node, second server executes replicated alter again
         /// To avoid this problem alter() method of replicated tables should be changed and takes into account ddl query id tag.
         if (!context.getSettingsRef().distributed_ddl_allow_replicated_alter)
             throw Exception("Distributed DDL alters don't work properly yet", ErrorCodes::NOT_IMPLEMENTED);
 
         Strings replica_names;
-        for (const auto & address : cluster->getShardsAddresses().at(shard_num))
+        for (const auto & address : task.cluster->getShardsAddresses().at(task.host_shard_num))
             replica_names.emplace_back(address.toString());
         std::sort(replica_names.begin(), replica_names.end());
 
@@ -386,8 +462,8 @@ void DDLWorker::processTaskAlter(
             auto zookeeper_holder = std::make_shared<zkutil::ZooKeeperHolder>();
             zookeeper_holder->initFromInstance(zookeeper);
 
-            zkutil::Lock lock(zookeeper_holder, shard_path, "lock", host_id);
-            std::mt19937 rng(std::hash<String>{}(host_id) + reinterpret_cast<intptr_t>(&rng));
+            zkutil::Lock lock(zookeeper_holder, shard_path, "lock", task.host_id_in_cluster);
+            std::mt19937 rng(std::hash<String>{}(task.host_id_in_cluster) + reinterpret_cast<intptr_t>(&rng));
 
             for (int num_tries = 0; num_tries < 10; ++num_tries)
             {
@@ -406,7 +482,7 @@ void DDLWorker::processTaskAlter(
                         /// TODO: it is ok to recieve exception "host is not leader"
                     }
 
-                    zookeeper->create(is_executed_path, host_id, zkutil::CreateMode::Persistent);
+                    zookeeper->create(is_executed_path, task.host_id_in_cluster, zkutil::CreateMode::Persistent);
                     lock.unlock();
                     alter_executed_by_replica = true;
                     break;
@@ -455,7 +531,7 @@ void DDLWorker::cleanupQueue(const Strings * node_names_to_check)
             if (!zookeeper->tryGet(node_path, data, &stat))
                 continue;
 
-            /// TODO: Add shared lock to avoid rare race counditions.
+            /// TODO: Add shared lock to avoid rare race conditions.
 
             size_t zookeeper_time_seconds = stat.mtime / zookeeper_time_resolution;
             if (zookeeper_time_seconds + node_max_lifetime_seconds < current_time_seconds)
@@ -603,7 +679,7 @@ public:
             if (!zookeeper->exists(node_path))
             {
                 throw Exception("Cannot provide query execution status. The query's node " + node_path
-                                + " had been deleted by cleaner since it was finished (or its lifetime is expired)",
+                                + " had been deleted by the cleaner since it was finished (or its lifetime is expired)",
                                 ErrorCodes::UNFINISHED);
             }
 
@@ -615,15 +691,19 @@ public:
             Strings cur_active_hosts = getChildrenAllowNoNode(zookeeper, node_path + "/active");
 
             res = sample.cloneEmpty();
-            for (const String & host : new_hosts)
+            for (const String & host_id : new_hosts)
             {
                 ExecutionStatus status(1, "Cannot obtain error message");
                 {
                     String status_data;
-                    if (zookeeper->tryGet(node_path + "/finished/" + host, status_data))
+                    if (zookeeper->tryGet(node_path + "/finished/" + host_id, status_data))
                         status.deserializeText(status_data);
                 }
 
+                String host;
+                UInt16 port;
+                Cluster::Address::fromString(host_id, host, port);
+
                 res.getByName("host").column->insert(host);
                 res.getByName("status").column->insert(static_cast<UInt64>(status.code));
                 res.getByName("error").column->insert(status.message);
@@ -698,7 +778,7 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, Context & context)
 
     DDLLogEntry entry;
     entry.query = queryToString(query_ptr);
-    entry.initiator = ddl_worker.getHostName();
+    entry.initiator = ddl_worker.getCommonHostID();
 
     Cluster::AddressesWithFailover shards = cluster->getShardsAddresses();
     for (const auto & shard : shards)
diff --git a/dbms/src/Interpreters/DDLWorker.h b/dbms/src/Interpreters/DDLWorker.h
index 52ec7fabbc5..510721f0154 100644
--- a/dbms/src/Interpreters/DDLWorker.h
+++ b/dbms/src/Interpreters/DDLWorker.h
@@ -15,6 +15,7 @@ namespace DB
 
 class ASTAlterQuery;
 struct DDLLogEntry;
+struct DDLTask;
 
 
 BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, Context & context);
@@ -29,21 +30,22 @@ public:
     /// Pushes query into DDL queue, returns path to created node
     String enqueueQuery(DDLLogEntry & entry);
 
-    std::string getHostName() const
+    /// Host ID (name:port) for logging purposes
+    /// Note that in each entry hosts are identified by name:port from cluster config
+    std::string getCommonHostID() const
     {
-        return host_id;
+        return host_fqdn_id;
     }
 
 private:
     void processTasks();
 
-    void processTask(const DDLLogEntry & node, const std::string & node_path);
+    void processTask(DDLTask & task);
 
     void processTaskAlter(
+        DDLTask & task,
         const ASTAlterQuery * query_alter,
         const String & rewritten_query,
-        const std::shared_ptr<Cluster> & cluster,
-        ssize_t shard_num,
         const String & node_path);
 
     /// Checks and cleanups queue's nodes
@@ -58,9 +60,8 @@ private:
     Context & context;
     Logger * log = &Logger::get("DDLWorker");
 
-    std::string host_id;        /// host_name:port
-    std::string host_name;
-    UInt16 port;
+    std::string host_fqdn;      /// current host domain name
+    std::string host_fqdn_id;   /// host_name:port
 
     std::string queue_dir;      /// dir with queue of queries
     std::string master_dir;     /// dir with queries was initiated by the server
@@ -87,6 +88,7 @@ private:
     static const size_t cleanup_min_period_seconds;
 
     friend class DDLQueryStatusInputSream;
+    friend class DDLTask;
 };
 
 

From 7e8f3a0561535ece2c7f5e179bc523395914d1db Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Thu, 27 Jul 2017 14:30:27 +0300
Subject: [PATCH 135/281] Add parse error handling. [#CLICKHOUSE-3128]

---
 dbms/src/Interpreters/DDLWorker.cpp | 55 +++++++++++++++++++++--------
 1 file changed, 41 insertions(+), 14 deletions(-)

diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp
index 500eae0897d..e76b4a65c7d 100644
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@@ -249,9 +249,21 @@ void DDLWorker::processTasks()
         }
         catch (...)
         {
-            auto status = ExecutionStatus::fromCurrentException();
-            /// We even cannot parse host name and can't properly submit execution status.
+            /// We even cannot parse host name and therefore cannot properly submit execution status.
             /// What should we do?
+            /// We can try to create fail node using FQDN if it equal to host name in cluster config attempt will be sucessfull.
+            /// Otherwise, that node will be ignored by DDLQueryStatusInputSream.
+
+            tryLogCurrentException(log, "Cannot parse DDL task " + node_data + ", will try to send error status");
+
+            ExecutionStatus status = ExecutionStatus::fromCurrentException();
+            String host_id = task.host_id_in_cluster.empty() ? host_fqdn_id : task.host_id_in_cluster;
+
+            createStatusDirs(node_path);
+            zookeeper->create(node_path + "/finished/" + host_id, current_node_execution_status.serializeText(), zkutil::CreateMode::Persistent);
+
+            last_processed_node_name = node_name;
+            continue;
         }
 
         const auto & hosts = task.entry.hosts;
@@ -628,18 +640,20 @@ class DDLQueryStatusInputSream : public IProfilingBlockInputStream
 {
 public:
 
-    DDLQueryStatusInputSream(const String & zk_node_path, Context & context, size_t num_hosts)
+    DDLQueryStatusInputSream(const String & zk_node_path, const DDLLogEntry & entry, Context & context)
     : node_path(zk_node_path), context(context), watch(CLOCK_MONOTONIC_COARSE)
     {
         sample = Block{
             {std::make_shared<DataTypeString>(),    "host"},
+            {std::make_shared<DataTypeUInt16>(),    "port"},
             {std::make_shared<DataTypeUInt64>(),    "status"},
             {std::make_shared<DataTypeString>(),    "error"},
             {std::make_shared<DataTypeUInt64>(),    "num_hosts_remaining"},
             {std::make_shared<DataTypeUInt64>(),    "num_hosts_active"},
         };
 
-        setTotalRowsApprox(num_hosts);
+        waiting_hosts.insert(entry.hosts.cbegin(), entry.hosts.cend());
+        setTotalRowsApprox(entry.hosts.size());
     }
 
     String getName() const override
@@ -683,7 +697,7 @@ public:
                                 ErrorCodes::UNFINISHED);
             }
 
-            Strings new_hosts = getNewAndUpdate(finished_hosts_set, getChildrenAllowNoNode(zookeeper, node_path + "/finished"));
+            Strings new_hosts = getNewAndUpdate(getChildrenAllowNoNode(zookeeper, node_path + "/finished"));
             ++try_number;
             if (new_hosts.empty())
                 continue;
@@ -705,6 +719,7 @@ public:
                 Cluster::Address::fromString(host_id, host, port);
 
                 res.getByName("host").column->insert(host);
+                res.getByName("port").column->insert(port);
                 res.getByName("status").column->insert(static_cast<UInt64>(status.code));
                 res.getByName("error").column->insert(status.message);
                 res.getByName("num_hosts_remaining").column->insert(total_rows_approx - (++num_hosts_finished));
@@ -724,15 +739,25 @@ public:
         return res;
     }
 
-    static Strings getNewAndUpdate(NameSet & prev, const Strings & cur_list)
+    Strings getNewAndUpdate(const Strings & current_list_of_finished_hosts)
     {
         Strings diff;
-        for (const String & elem : cur_list)
+        for (const String & host : current_list_of_finished_hosts)
         {
-            if (!prev.count(elem))
+            if (!waiting_hosts.count(host))
             {
-                diff.emplace_back(elem);
-                prev.emplace(elem);
+                if (!ignoring_hosts.count(host))
+                {
+                    ignoring_hosts.emplace(host);
+                    LOG_INFO(log, "Unexpected host " << host << " appeared " << " in task " << node_path);
+                }
+                continue;
+            }
+
+            if (!finished_hosts.count(host))
+            {
+                diff.emplace_back(host);
+                finished_hosts.emplace(host);
             }
         }
 
@@ -747,10 +772,12 @@ private:
     String node_path;
     Context & context;
 
-    Stopwatch watch;
-
-    NameSet finished_hosts_set;
+    NameSet waiting_hosts;  /// hosts from task host list
+    NameSet finished_hosts; /// finished hosts from host list
+    NameSet ignoring_hosts; /// appeared hosts that are not in hosts list
     size_t num_hosts_finished = 0;
+
+    Stopwatch watch;
 };
 
 
@@ -793,7 +820,7 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, Context & context)
     if (node_path.empty())
         return io;
 
-    auto stream = std::make_shared<DDLQueryStatusInputSream>(node_path, context, entry.hosts.size());
+    auto stream = std::make_shared<DDLQueryStatusInputSream>(node_path, entry, context);
     io.in_sample = stream->sample.cloneEmpty();
     io.in = std::move(stream);
     return io;

From 736feab79f879dab8d75b916b24d3288dfe40f16 Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Thu, 27 Jul 2017 16:11:16 +0300
Subject: [PATCH 136/281] Add port field. [#CLICKHOUSE-3128]

---
 dbms/src/Common/Exception.cpp                 | 14 ++++++++++
 dbms/src/Common/Exception.h                   |  2 ++
 dbms/src/Interpreters/DDLWorker.cpp           | 26 +++++++++----------
 .../configs/config.d/query_log.xml            |  2 +-
 .../configs/users.d/query_log.xml             |  2 +-
 .../integration/test_distributed_ddl/test.py  |  8 +++---
 6 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/dbms/src/Common/Exception.cpp b/dbms/src/Common/Exception.cpp
index afca8873335..5f8b7036f17 100644
--- a/dbms/src/Common/Exception.cpp
+++ b/dbms/src/Common/Exception.cpp
@@ -242,6 +242,20 @@ void ExecutionStatus::deserializeText(const std::string & data)
     rb >> code >> "\n" >> escape >> message;
 }
 
+bool ExecutionStatus::tryDeserializeText(const std::string & data)
+{
+    try
+    {
+        deserializeText(data);
+    }
+    catch (...)
+    {
+        return false;
+    }
+
+    return true;
+}
+
 ExecutionStatus ExecutionStatus::fromCurrentException(const std::string & start_of_message)
 {
     return ExecutionStatus(getCurrentExceptionCode(), start_of_message + ": " + getCurrentExceptionMessage(false, true));
diff --git a/dbms/src/Common/Exception.h b/dbms/src/Common/Exception.h
index f7ba8d7cf6e..ecf3790d790 100644
--- a/dbms/src/Common/Exception.h
+++ b/dbms/src/Common/Exception.h
@@ -107,6 +107,8 @@ struct ExecutionStatus
     std::string serializeText() const;
 
     void deserializeText(const std::string & data);
+
+    bool tryDeserializeText(const std::string & data);
 };
 
 
diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp
index e76b4a65c7d..7cf05a9f8b8 100644
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@@ -624,7 +624,7 @@ void DDLWorker::run()
         }
         catch (zkutil::KeeperException &)
         {
-            LOG_DEBUG(log, "Recovering ZooKeeper session");
+            LOG_DEBUG(log, "Recovering ZooKeeper session after " << getCurrentExceptionMessage());
             zookeeper = context.getZooKeeper();
         }
         catch (...)
@@ -641,12 +641,12 @@ class DDLQueryStatusInputSream : public IProfilingBlockInputStream
 public:
 
     DDLQueryStatusInputSream(const String & zk_node_path, const DDLLogEntry & entry, Context & context)
-    : node_path(zk_node_path), context(context), watch(CLOCK_MONOTONIC_COARSE)
+    : node_path(zk_node_path), context(context), watch(CLOCK_MONOTONIC_COARSE), log(&Logger::get("DDLQueryStatusInputSream"))
     {
         sample = Block{
             {std::make_shared<DataTypeString>(),    "host"},
             {std::make_shared<DataTypeUInt16>(),    "port"},
-            {std::make_shared<DataTypeUInt64>(),    "status"},
+            {std::make_shared<DataTypeInt64>(),     "status"},
             {std::make_shared<DataTypeString>(),    "error"},
             {std::make_shared<DataTypeUInt64>(),    "num_hosts_remaining"},
             {std::make_shared<DataTypeUInt64>(),    "num_hosts_active"},
@@ -671,7 +671,7 @@ public:
     Block readImpl() override
     {
         Block res;
-        if (num_hosts_finished >= total_rows_approx)
+        if (num_hosts_finished >= waiting_hosts.size())
             return res;
 
         auto zookeeper = context.getZooKeeper();
@@ -707,11 +707,11 @@ public:
             res = sample.cloneEmpty();
             for (const String & host_id : new_hosts)
             {
-                ExecutionStatus status(1, "Cannot obtain error message");
+                ExecutionStatus status(-1, "Cannot obtain error message");
                 {
                     String status_data;
                     if (zookeeper->tryGet(node_path + "/finished/" + host_id, status_data))
-                        status.deserializeText(status_data);
+                        status.tryDeserializeText(status_data);
                 }
 
                 String host;
@@ -719,10 +719,10 @@ public:
                 Cluster::Address::fromString(host_id, host, port);
 
                 res.getByName("host").column->insert(host);
-                res.getByName("port").column->insert(port);
-                res.getByName("status").column->insert(static_cast<UInt64>(status.code));
+                res.getByName("port").column->insert(static_cast<UInt64>(port));
+                res.getByName("status").column->insert(static_cast<Int64>(status.code));
                 res.getByName("error").column->insert(status.message);
-                res.getByName("num_hosts_remaining").column->insert(total_rows_approx - (++num_hosts_finished));
+                res.getByName("num_hosts_remaining").column->insert(waiting_hosts.size() - (++num_hosts_finished));
                 res.getByName("num_hosts_active").column->insert(cur_active_hosts.size());
             }
         }
@@ -766,18 +766,18 @@ public:
 
     ~DDLQueryStatusInputSream() override = default;
 
-    Block sample;
-
 private:
     String node_path;
     Context & context;
+    Stopwatch watch;
+    Logger * log;
+
+    Block sample;
 
     NameSet waiting_hosts;  /// hosts from task host list
     NameSet finished_hosts; /// finished hosts from host list
     NameSet ignoring_hosts; /// appeared hosts that are not in hosts list
     size_t num_hosts_finished = 0;
-
-    Stopwatch watch;
 };
 
 
diff --git a/dbms/tests/integration/test_distributed_ddl/configs/config.d/query_log.xml b/dbms/tests/integration/test_distributed_ddl/configs/config.d/query_log.xml
index c4d957d0b77..839ef92c6dc 100644
--- a/dbms/tests/integration/test_distributed_ddl/configs/config.d/query_log.xml
+++ b/dbms/tests/integration/test_distributed_ddl/configs/config.d/query_log.xml
@@ -9,6 +9,6 @@
         <table>query_log</table>
 
         <!-- Interval of flushing data. -->
-        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
+        <flush_interval_milliseconds>1000</flush_interval_milliseconds>
     </query_log>
 </yandex>
\ No newline at end of file
diff --git a/dbms/tests/integration/test_distributed_ddl/configs/users.d/query_log.xml b/dbms/tests/integration/test_distributed_ddl/configs/users.d/query_log.xml
index 09989cbdcc7..d8254eeb8af 100644
--- a/dbms/tests/integration/test_distributed_ddl/configs/users.d/query_log.xml
+++ b/dbms/tests/integration/test_distributed_ddl/configs/users.d/query_log.xml
@@ -2,7 +2,7 @@
     <profiles>
         <!-- Default profile settings. -->
         <default>
-            <log_queries>0</log_queries>
+            <log_queries>1</log_queries>
             <distributed_ddl_allow_replicated_alter>1</distributed_ddl_allow_replicated_alter>
         </default>
     </profiles>
diff --git a/dbms/tests/integration/test_distributed_ddl/test.py b/dbms/tests/integration/test_distributed_ddl/test.py
index 220ae8272e5..357629001b1 100644
--- a/dbms/tests/integration/test_distributed_ddl/test.py
+++ b/dbms/tests/integration/test_distributed_ddl/test.py
@@ -13,9 +13,9 @@ def check_all_hosts_sucesfully_executed(tsv_content, num_hosts=None):
         num_hosts = len(cluster.instances)
 
     M = TSV.toMat(tsv_content)
-    hosts = [l[0] for l in M]
-    codes = [l[1] for l in M]
-    messages = [l[2] for l in M]
+    hosts = [(l[0], l[1]) for l in M] # (host, port)
+    codes = [l[2] for l in M]
+    messages = [l[3] for l in M]
 
     assert len(hosts) == num_hosts and len(set(hosts)) == num_hosts, tsv_content
     assert len(set(codes)) == 1, tsv_content
@@ -62,7 +62,7 @@ CREATE TABLE IF NOT EXISTS all_tables ON CLUSTER 'cluster_no_replicas'
 
     finally:
         pass
-        cluster.shutdown()
+        #cluster.shutdown()
 
 
 def test_default_database(started_cluster):

From 628de0a3ea4f2f279667538805c1da0e80fd2bab Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Thu, 27 Jul 2017 21:44:55 +0300
Subject: [PATCH 137/281] Fixed logging of DDL queries. [#CLICKHOUSE-3128]

---
 dbms/src/Common/Exception.cpp                 |  3 +-
 dbms/src/Interpreters/Cluster.cpp             |  4 +-
 dbms/src/Interpreters/DDLWorker.cpp           | 44 ++++++++++++-------
 dbms/src/Interpreters/DDLWorker.h             |  2 +
 dbms/src/Interpreters/executeQuery.cpp        |  4 +-
 dbms/src/Interpreters/executeQuery.h          |  6 +--
 dbms/tests/integration/helpers/cluster.py     |  6 +++
 .../integration/test_distributed_ddl/test.py  | 12 ++++-
 8 files changed, 55 insertions(+), 26 deletions(-)

diff --git a/dbms/src/Common/Exception.cpp b/dbms/src/Common/Exception.cpp
index 5f8b7036f17..86c1b272b6a 100644
--- a/dbms/src/Common/Exception.cpp
+++ b/dbms/src/Common/Exception.cpp
@@ -258,7 +258,8 @@ bool ExecutionStatus::tryDeserializeText(const std::string & data)
 
 ExecutionStatus ExecutionStatus::fromCurrentException(const std::string & start_of_message)
 {
-    return ExecutionStatus(getCurrentExceptionCode(), start_of_message + ": " + getCurrentExceptionMessage(false, true));
+    String msg = start_of_message.empty() ? "" : (start_of_message + ": " + getCurrentExceptionMessage(false, true));
+    return ExecutionStatus(getCurrentExceptionCode(), msg);
 }
 
 
diff --git a/dbms/src/Interpreters/Cluster.cpp b/dbms/src/Interpreters/Cluster.cpp
index e8d7c67208c..0872273e3b2 100644
--- a/dbms/src/Interpreters/Cluster.cpp
+++ b/dbms/src/Interpreters/Cluster.cpp
@@ -119,10 +119,10 @@ void Cluster::Address::fromString(const String & host_port_string, String & host
 {
     auto pos = host_port_string.find_last_of(':');
     if (pos == std::string::npos)
-        throw Exception("Incorrect host ID format " + host_port_string, ErrorCodes::SYNTAX_ERROR);
+        throw Exception("Incorrect <host>:<port> format " + host_port_string, ErrorCodes::SYNTAX_ERROR);
 
     host_name = unescapeForFileName(host_port_string.substr(0, pos));
-    port = parse<UInt16>(host_port_string.substr(0, pos));
+    port = parse<UInt16>(host_port_string.substr(pos + 1));
 }
 
 
diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp
index 7cf05a9f8b8..ae002ea0498 100644
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@@ -328,25 +328,29 @@ void DDLWorker::processTasks()
 }
 
 
-static bool tryExecuteQuery(const String & query, Context & context, ExecutionStatus & status, Logger * log = nullptr)
+bool DDLWorker::tryExecuteQuery(const String & query, const DDLTask & task, ExecutionStatus & status)
 {
+    String query_prefix = "/*ddl_entry=" + task.entry_name + "*/ ";
+    String query_to_execute = query_prefix + query;
+
+    ReadBufferFromString istr(query_to_execute);
+    String dummy_string;
+    WriteBufferFromString ostr(dummy_string);
+
     try
     {
-        executeQuery(query, context);
+        executeQuery(istr, ostr, false, context, nullptr);
     }
     catch (...)
     {
         status = ExecutionStatus::fromCurrentException();
-
-        if (log)
-            tryLogCurrentException(log, "Query " + query + " wasn't finished successfully");
+        tryLogCurrentException(log, "Query " + query + " wasn't finished successfully");
 
         return false;
     }
 
     status = ExecutionStatus(0);
-    if (log)
-        LOG_DEBUG(log, "Executed query: " << query);
+    LOG_DEBUG(log, "Executed query: " << query);
 
     return true;
 }
@@ -372,7 +376,6 @@ void DDLWorker::processTask(DDLTask & task)
         {
             ASTPtr rewritten_ast = task.query_on_cluster->getRewrittenASTWithoutOnCluster(task.host_address.default_database);
             String rewritten_query = queryToString(rewritten_ast);
-
             LOG_DEBUG(log, "Executing query: " << rewritten_query);
 
             if (auto query_alter = dynamic_cast<const ASTAlterQuery *>(rewritten_ast.get()))
@@ -381,7 +384,7 @@ void DDLWorker::processTask(DDLTask & task)
             }
             else
             {
-                tryExecuteQuery(rewritten_query, context, current_node_execution_status, log);
+                tryExecuteQuery(rewritten_query, task, current_node_execution_status);
             }
         }
         catch (const zkutil::KeeperException & e)
@@ -487,7 +490,7 @@ void DDLWorker::processTaskAlter(
 
                 if (lock.tryLock())
                 {
-                    tryExecuteQuery(rewritten_query, context, current_node_execution_status, log);
+                    tryExecuteQuery(rewritten_query, task, current_node_execution_status);
 
                     if (execute_on_leader_replica && current_node_execution_status.code == ErrorCodes::NOT_IMPLEMENTED)
                     {
@@ -509,7 +512,7 @@ void DDLWorker::processTaskAlter(
     }
     else
     {
-        tryExecuteQuery(rewritten_query, context, current_node_execution_status, log);
+        tryExecuteQuery(rewritten_query, task, current_node_execution_status);
     }
 }
 
@@ -565,7 +568,7 @@ void DDLWorker::cleanupQueue(const Strings * node_names_to_check)
         }
         catch (...)
         {
-            tryLogCurrentException(log, "An error occured while checking and cleaning node " + node_name + " from queue");
+            LOG_INFO(log, "An error occured while checking and cleaning node " + node_name + " from queue: " + getCurrentExceptionMessage(false));
         }
     }
 }
@@ -614,7 +617,7 @@ void DDLWorker::run()
         {
             processTasks();
 
-            LOG_DEBUG(log, "Waiting watch");
+            LOG_DEBUG(log, "Waiting a watch");
             event_queue_updated->wait();
 
             if (stop_flag)
@@ -624,7 +627,7 @@ void DDLWorker::run()
         }
         catch (zkutil::KeeperException &)
         {
-            LOG_DEBUG(log, "Recovering ZooKeeper session after " << getCurrentExceptionMessage());
+            LOG_DEBUG(log, "Recovering ZooKeeper session after " << getCurrentExceptionMessage(true));
             zookeeper = context.getZooKeeper();
         }
         catch (...)
@@ -730,6 +733,15 @@ public:
         return res;
     }
 
+    Block getSampleBlock() const
+    {
+        return sample.cloneEmpty();
+    }
+
+    ~DDLQueryStatusInputSream() override = default;
+
+private:
+
     static Strings getChildrenAllowNoNode(const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & node_path)
     {
         Strings res;
@@ -764,8 +776,6 @@ public:
         return diff;
     }
 
-    ~DDLQueryStatusInputSream() override = default;
-
 private:
     String node_path;
     Context & context;
@@ -821,7 +831,7 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, Context & context)
         return io;
 
     auto stream = std::make_shared<DDLQueryStatusInputSream>(node_path, entry, context);
-    io.in_sample = stream->sample.cloneEmpty();
+    io.in_sample = stream->getSampleBlock();
     io.in = std::move(stream);
     return io;
 }
diff --git a/dbms/src/Interpreters/DDLWorker.h b/dbms/src/Interpreters/DDLWorker.h
index 510721f0154..e127100d3e7 100644
--- a/dbms/src/Interpreters/DDLWorker.h
+++ b/dbms/src/Interpreters/DDLWorker.h
@@ -48,6 +48,8 @@ private:
         const String & rewritten_query,
         const String & node_path);
 
+    bool tryExecuteQuery(const String & query, const DDLTask & task, ExecutionStatus & status);
+
     /// Checks and cleanups queue's nodes
     void cleanupQueue(const Strings * node_names_to_check = nullptr);
 
diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp
index 782d73974cc..7495cfe3715 100644
--- a/dbms/src/Interpreters/executeQuery.cpp
+++ b/dbms/src/Interpreters/executeQuery.cpp
@@ -147,7 +147,9 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
         ast = parseQuery(parser, begin, end, "");
 
         /// Copy query into string. It will be written to log and presented in processlist. If an INSERT query, string will not include data to insertion.
-        query_size = ast->range.second - ast->range.first;
+        if (!(begin <= ast->range.first && ast->range.second <= end))
+            throw Exception("Unexpected behavior: AST chars range is not inside source range", ErrorCodes::LOGICAL_ERROR);
+        query_size = ast->range.second - begin;
 
         if (max_query_size && query_size > max_query_size)
             throw Exception("Query is too large (" + toString(query_size) + ")."
diff --git a/dbms/src/Interpreters/executeQuery.h b/dbms/src/Interpreters/executeQuery.h
index 809e9393b67..cc333ea8cb9 100644
--- a/dbms/src/Interpreters/executeQuery.h
+++ b/dbms/src/Interpreters/executeQuery.h
@@ -10,10 +10,10 @@ namespace DB
 
 /// Parse and execute a query.
 void executeQuery(
-    ReadBuffer & istr,                    /// Where to read query from (and data for INSERT, if present).
-    WriteBuffer & ostr,                    /// Where to write query output to.
+    ReadBuffer & istr,                  /// Where to read query from (and data for INSERT, if present).
+    WriteBuffer & ostr,                 /// Where to write query output to.
     bool allow_into_outfile,            /// If true and the query contains INTO OUTFILE section, redirect output to that file.
-    Context & context,                    /// DB, tables, data types, storage engines, functions, aggregate functions...
+    Context & context,                  /// DB, tables, data types, storage engines, functions, aggregate functions...
     std::function<void(const String &)> set_content_type /// If non-empty callback is passed, it will be called with the Content-Type of the result.
     );
 
diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py
index 021024d04af..f7ed65c8ae0 100644
--- a/dbms/tests/integration/helpers/cluster.py
+++ b/dbms/tests/integration/helpers/cluster.py
@@ -89,6 +89,12 @@ class ClickHouseCluster:
             shutil.rmtree(self.instances_dir)
 
         for instance in self.instances.values():
+            # Kill unstopped containers from previous launch
+            try:
+                subprocess.check_call(self.base_cmd + ['kill'])
+                subprocess.check_call(self.base_cmd + ['down', '--volumes'])
+            except:
+                pass
             instance.create_dir(destroy_dir=destroy_dirs)
 
         subprocess.check_call(self.base_cmd + ['up', '-d'])
diff --git a/dbms/tests/integration/test_distributed_ddl/test.py b/dbms/tests/integration/test_distributed_ddl/test.py
index 357629001b1..53fd3c486d1 100644
--- a/dbms/tests/integration/test_distributed_ddl/test.py
+++ b/dbms/tests/integration/test_distributed_ddl/test.py
@@ -27,6 +27,11 @@ def ddl_check_query(instance, query, num_hosts=None):
     check_all_hosts_sucesfully_executed(contents, num_hosts)
     return contents
 
+def ddl_check_there_are_no_dublicates(instance):
+    answer = instance.query("SELECT max(c), argMax(q, c) FROM (SELECT lower(query) AS q, count() AS c FROM system.query_log WHERE type=2 AND q LIKE '/*ddl_entry=query-%' GROUP BY query)")
+    row = TSV.toMat(answer)[0]
+    assert row[0] == "1", "dublicates on {} {}, query {}".format(instance.name, instance.ip_address, row[1])
+
 
 TEST_REPLICATED_ALTERS=True
 
@@ -61,7 +66,10 @@ CREATE TABLE IF NOT EXISTS all_tables ON CLUSTER 'cluster_no_replicas'
         ddl_check_query(instance, "DROP DATABASE IF EXISTS test2 ON CLUSTER 'cluster'")
 
     finally:
-        pass
+        # Check query log to ensure that DDL queries are not executed twice
+        time.sleep(1)
+        for instance in cluster.instances.values():
+            ddl_check_there_are_no_dublicates(instance)
         #cluster.shutdown()
 
 
@@ -98,7 +106,7 @@ def test_on_server_fail(started_cluster):
     contents = instance.query("SELECT hostName() AS h FROM all_tables WHERE database='test' AND name='test_server_fail' ORDER BY h")
     assert TSV(contents) == TSV("ch1\nch2\nch3\nch4\n")
 
-    ddl_check_query(instance, "DROP TABLE IF EXISTS test.test_server_fail ON CLUSTER 'cluster'")
+    ddl_check_query(instance, "DROP TABLE test.test_server_fail ON CLUSTER 'cluster'")
 
 
 def _test_on_connection_losses(cluster, zk_timeout):

From c2cf3f81be273b24b4a12a967e70857fcb42c61f Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Fri, 28 Jul 2017 01:29:48 +0300
Subject: [PATCH 138/281] Fix test after rebase. [#CLICKHOUSE-312]

---
 dbms/tests/integration/helpers/cluster.py          | 13 +++++++------
 .../tests/integration/test_distributed_ddl/test.py | 14 +++++++-------
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py
index f7ed65c8ae0..e0684b5b4d2 100644
--- a/dbms/tests/integration/helpers/cluster.py
+++ b/dbms/tests/integration/helpers/cluster.py
@@ -84,17 +84,18 @@ class ClickHouseCluster:
         if self.is_up:
             return
 
+        # Kill unstopped containers from previous launch
+        try:
+            subprocess.check_call(self.base_cmd + ['kill'])
+            subprocess.check_call(self.base_cmd + ['down', '--volumes'])
+        except:
+            pass
+
         if destroy_dirs and p.exists(self.instances_dir):
             print "Removing instances dir", self.instances_dir
             shutil.rmtree(self.instances_dir)
 
         for instance in self.instances.values():
-            # Kill unstopped containers from previous launch
-            try:
-                subprocess.check_call(self.base_cmd + ['kill'])
-                subprocess.check_call(self.base_cmd + ['down', '--volumes'])
-            except:
-                pass
             instance.create_dir(destroy_dir=destroy_dirs)
 
         subprocess.check_call(self.base_cmd + ['up', '-d'])
diff --git a/dbms/tests/integration/test_distributed_ddl/test.py b/dbms/tests/integration/test_distributed_ddl/test.py
index 53fd3c486d1..28dbeeac31d 100644
--- a/dbms/tests/integration/test_distributed_ddl/test.py
+++ b/dbms/tests/integration/test_distributed_ddl/test.py
@@ -34,19 +34,19 @@ def ddl_check_there_are_no_dublicates(instance):
 
 
 TEST_REPLICATED_ALTERS=True
-
 cluster = ClickHouseCluster(__file__)
-for i in xrange(4):
-    cluster.add_instance(
-        'ch{}'.format(i+1),
-        config_dir="configs",
-        macroses={"layer": 0, "shard": i/2 + 1, "replica": i%2 + 1},
-        with_zookeeper=True)
 
 
 @pytest.fixture(scope="module")
 def started_cluster():
     try:
+        for i in xrange(4):
+            cluster.add_instance(
+                'ch{}'.format(i+1),
+                config_dir="configs",
+                macroses={"layer": 0, "shard": i/2 + 1, "replica": i%2 + 1},
+                with_zookeeper=True)
+
         cluster.start()
 
         # Initialize databases and service tables

From 4a7927b18621102e1fe3c31c662e76305f119cbc Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Fri, 28 Jul 2017 19:14:49 +0300
Subject: [PATCH 139/281] Many improvements. [#CLICKHOUSE-3128]

---
 dbms/src/Interpreters/Cluster.cpp   |   5 +
 dbms/src/Interpreters/Cluster.h     |   5 +-
 dbms/src/Interpreters/DDLWorker.cpp | 477 ++++++++++++++++++----------
 dbms/src/Interpreters/DDLWorker.h   |  21 +-
 4 files changed, 324 insertions(+), 184 deletions(-)

diff --git a/dbms/src/Interpreters/Cluster.cpp b/dbms/src/Interpreters/Cluster.cpp
index 0872273e3b2..f2af88823fa 100644
--- a/dbms/src/Interpreters/Cluster.cpp
+++ b/dbms/src/Interpreters/Cluster.cpp
@@ -115,6 +115,11 @@ String Cluster::Address::toString(const String & host_name, UInt16 port)
     return escapeForFileName(host_name) + ':' + DB::toString(port);
 }
 
+String Cluster::Address::readableString() const
+{
+    return host_name + ':' + DB::toString(port);
+}
+
 void Cluster::Address::fromString(const String & host_port_string, String & host_name, UInt16 & port)
 {
     auto pos = host_port_string.find_last_of(':');
diff --git a/dbms/src/Interpreters/Cluster.h b/dbms/src/Interpreters/Cluster.h
index f68236fad8d..cad36bd6ce8 100644
--- a/dbms/src/Interpreters/Cluster.h
+++ b/dbms/src/Interpreters/Cluster.h
@@ -61,9 +61,12 @@ public:
         Address(Poco::Util::AbstractConfiguration & config, const String & config_prefix);
         Address(const String & host_port_, const String & user_, const String & password_);
 
-        /// Returns escaped 'host_name:port'
+        /// Returns 'escaped_host_name:port'
         String toString() const;
 
+        /// Returns 'host_name:port'
+        String readableString() const;
+
         static String toString(const String & host_name, UInt16 port);
 
         static void fromString(const String & host_port_string, String & host_name, UInt16 & port);
diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp
index ae002ea0498..5a24c44b266 100644
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@@ -55,10 +55,54 @@ namespace ErrorCodes
 const size_t DDLWorker::node_max_lifetime_seconds = 7 * 24 * 60 * 60; // week
 const size_t DDLWorker::cleanup_min_period_seconds = 60; // minute
 
+namespace
+{
+
+struct HostID
+{
+    String host_name;
+    UInt16 port;
+
+    HostID() = default;
+
+    HostID(const Cluster::Address & address)
+    : host_name(address.host_name), port(address.port) {}
+
+    static HostID fromString(const String & host_port_str)
+    {
+        HostID res;
+        Cluster::Address::fromString(host_port_str, res.host_name, res.port);
+        return res;
+    }
+
+    String toString() const
+    {
+        return Cluster::Address::toString(host_name, port);
+    }
+
+    String readableString() const
+    {
+        return host_name + ":" + DB::toString(port);
+    }
+
+    bool isLocalAddress() const
+    {
+        return DB::isLocalAddress(Poco::Net::SocketAddress(host_name, port));
+    }
+
+    static String applyToString(const HostID & host_id)
+    {
+        return host_id.toString();
+    }
+};
+
+}
+
+
 struct DDLLogEntry
 {
     String query;
-    Strings hosts;
+    std::vector<HostID> hosts;
     String initiator; // optional
 
     static constexpr int CURRENT_VERSION = 1;
@@ -67,10 +111,13 @@ struct DDLLogEntry
     {
         WriteBufferFromOwnString wb;
 
+        Strings host_id_strings(hosts.size());
+        std::transform(hosts.begin(), hosts.end(), host_id_strings.begin(), HostID::applyToString);
+
         auto version = CURRENT_VERSION;
         wb << "version: " << version << "\n";
         wb << "query: " << escape << query << "\n";
-        wb << "hosts: " << hosts << "\n";
+        wb << "hosts: " << host_id_strings << "\n";
         wb << "initiator: " << initiator << "\n";
 
         return wb.str();
@@ -86,8 +133,9 @@ struct DDLLogEntry
         if (version != CURRENT_VERSION)
             throw Exception("Unknown DDLLogEntry format version: " + DB::toString(version), ErrorCodes::UNKNOWN_FORMAT_VERSION);
 
+        Strings host_id_strings;
         rb >> "query: " >> escape >> query >> "\n";
-        rb >> "hosts: " >> hosts >> "\n";
+        rb >> "hosts: " >> host_id_strings >> "\n";
 
         if (!rb.eof())
             rb >> "initiator: " >> initiator >> "\n";
@@ -95,84 +143,42 @@ struct DDLLogEntry
             initiator.clear();
 
         assertEOF(rb);
+
+        hosts.resize(host_id_strings.size());
+        std::transform(host_id_strings.begin(), host_id_strings.end(), hosts.begin(), HostID::fromString);
     }
 };
 
 
 struct DDLTask
 {
-    DDLLogEntry entry;
-    String entry_name;
+    /// Stages of task lifetime correspond ordering of these data fields:
 
+    /// Stage 1: parse entry
+    String entry_name;
+    String entry_path;
+    DDLLogEntry entry;
+
+    /// Stage 2: resolve host_id and check that
+    HostID host_id;
+    String host_id_str;
+
+    /// Stage 3.1: parse query
     ASTPtr query;
     ASTQueryWithOnCluster * query_on_cluster = nullptr;
 
+    /// Stage 3.2: check cluster and find the host in cluster
     String cluster_name;
     ClusterPtr cluster;
-
-    Cluster::Address host_address;
-    String host_id_in_cluster;
+    Cluster::Address address_in_cluster;
     size_t host_shard_num;
     size_t host_replica_num;
 
-    /// Parses entry and query, extracts cluster and finds current host in the cluster
-    /// Return true if current host is found in the cluster
-    bool fillFromEntryData(const String & entry_data, const String & entry_name_, DDLWorker & worker)
-    {
-        entry.parse(entry_data);
-        entry_name = entry_name_;
+    /// Stage 3.3: execute query
+    ExecutionStatus execution_status;
+    bool was_executed = false;
 
-        {
-            const char * begin = entry.query.data();
-            const char * end = begin + entry.query.size();
-
-            ParserQuery parser_query(end);
-            String description;
-            query = parseQuery(parser_query, begin, end, description);
-        }
-
-        if (!query || !(query_on_cluster = dynamic_cast<ASTQueryWithOnCluster *>(query.get())))
-            throw Exception("Recieved unknown DDL query", ErrorCodes::UNKNOWN_TYPE_OF_QUERY);
-
-        cluster_name = query_on_cluster->cluster;
-        cluster = worker.context.tryGetCluster(cluster_name);
-
-        if (!cluster)
-        {
-            LOG_INFO(worker.log, "Will not execute entry " << entry_name << ": there is no cluster " << cluster_name << " on current host");
-            return false;
-        }
-
-        bool found = false;
-        const auto & shards = cluster->getShardsAddresses();
-        for (size_t shard_num = 0; shard_num < shards.size(); ++shard_num)
-        {
-            for (size_t replica_num = 0; replica_num < shards[shard_num].size(); ++replica_num)
-            {
-                const Cluster::Address & address = shards[shard_num][replica_num];
-
-                if (isLocalAddress(address.resolved_address))
-                {
-                    if (found)
-                    {
-                        LOG_WARNING(worker.log, "There are at least two the same ClickHouse instances in cluster " << cluster_name << ": "
-                            << host_id_in_cluster << " and " << address.toString()
-                            << ". Will use the first one only.");
-                    }
-                    else
-                    {
-                        host_address = address;
-                        host_id_in_cluster = address.toString();
-                        host_shard_num = shard_num;
-                        host_replica_num = replica_num;
-                        found = true;
-                    }
-                }
-            }
-        }
-
-        return found;
-    }
+    /// Stage 4: commit results to ZooKeeper
 };
 
 
@@ -214,6 +220,61 @@ DDLWorker::~DDLWorker()
 }
 
 
+bool DDLWorker::initAndCheckTask(DDLTask & task, const String & entry_name)
+{
+    task.entry_name = entry_name;
+    task.entry_path = queue_dir + "/" + task.entry_name;
+
+    String node_data;
+    if (!zookeeper->tryGet(task.entry_path, node_data))
+    {
+        /// It is Ok that node could be deleted just now. It means that there are no current host in node's host list.
+        return false;
+    }
+
+    try
+    {
+        task.entry.parse(node_data);
+    }
+    catch (...)
+    {
+        /// What should we do if we even cannot parse host name and therefore cannot properly submit execution status?
+        /// We can try to create fail node using FQDN if it equal to host name in cluster config attempt will be sucessfull.
+        /// Otherwise, that node will be ignored by DDLQueryStatusInputSream.
+
+        tryLogCurrentException(log, "Cannot parse DDL task " + task.entry_name + ", will try to send error status");
+
+        String status = ExecutionStatus::fromCurrentException().serializeText();
+        createStatusDirs(task.entry_path);
+        zookeeper->tryCreate(task.entry_path + "/finished/" + host_fqdn_id, status, zkutil::CreateMode::Persistent);
+
+        return false;
+    }
+
+    bool host_in_hostlist = false;
+    for (const HostID & host : task.entry.hosts)
+    {
+        if (!host.isLocalAddress())
+            continue;
+
+        if (host_in_hostlist)
+        {
+            /// This check could be slow a little bit
+            LOG_WARNING(log, "There are two the same ClickHouse instances in task " << task.entry_name
+                << ": " << task.host_id.readableString() << " and " << host.readableString() << ". Will use the first one only.");
+        }
+        else
+        {
+            host_in_hostlist = true;
+            task.host_id = host;
+            task.host_id_str = host.toString();
+        }
+    }
+
+    return host_in_hostlist;
+}
+
+
 void DDLWorker::processTasks()
 {
     LOG_DEBUG(log, "Processing tasks");
@@ -222,87 +283,50 @@ void DDLWorker::processTasks()
     if (queue_nodes.empty())
         return;
 
-    bool server_startup = last_processed_node_name.empty();
+    bool server_startup = last_processed_task_name.empty();
 
     std::sort(queue_nodes.begin(), queue_nodes.end());
     auto begin_node = server_startup
         ? queue_nodes.begin()
-        : std::upper_bound(queue_nodes.begin(), queue_nodes.end(), last_processed_node_name);
+        : std::upper_bound(queue_nodes.begin(), queue_nodes.end(), last_processed_task_name);
 
     for (auto it = begin_node; it != queue_nodes.end(); ++it)
     {
-        const String & node_name = *it;
-        String node_path = queue_dir + "/" + node_name;
-        String node_data;
+        String entry_name = *it;
 
-        if (!zookeeper->tryGet(node_path, node_data))
+        if (current_task)
         {
-            /// It is Ok that node could be deleted just now. It means that there are no current host in node's host list.
-            continue;
-        }
-
-        DDLTask task;
-        bool found_cluster_and_host = false;
-        try
-        {
-            found_cluster_and_host = task.fillFromEntryData(node_data, node_name, *this);
-        }
-        catch (...)
-        {
-            /// We even cannot parse host name and therefore cannot properly submit execution status.
-            /// What should we do?
-            /// We can try to create fail node using FQDN if it equal to host name in cluster config attempt will be sucessfull.
-            /// Otherwise, that node will be ignored by DDLQueryStatusInputSream.
-
-            tryLogCurrentException(log, "Cannot parse DDL task " + node_data + ", will try to send error status");
-
-            ExecutionStatus status = ExecutionStatus::fromCurrentException();
-            String host_id = task.host_id_in_cluster.empty() ? host_fqdn_id : task.host_id_in_cluster;
-
-            createStatusDirs(node_path);
-            zookeeper->create(node_path + "/finished/" + host_id, current_node_execution_status.serializeText(), zkutil::CreateMode::Persistent);
-
-            last_processed_node_name = node_name;
-            continue;
-        }
-
-        const auto & hosts = task.entry.hosts;
-        if (!found_cluster_and_host)
-        {
-            bool fqdn_in_hostlist = std::find(hosts.cbegin(), hosts.cend(), host_fqdn_id) != hosts.cend();
-            if (fqdn_in_hostlist)
+            if (current_task->entry_name == entry_name)
             {
-                LOG_ERROR(log, "Not found current host in cluster " << task.cluster_name << " of task " << task.entry_name
-                    << ", but found host " << host_fqdn_id << " with the same FQDN in host list of the task"
-                    << ". Possibly inconsistent cluster definition among servers.");
+                LOG_INFO(log, "Trying to process task " << entry_name << " again");
             }
             else
             {
-                LOG_DEBUG(log, "Skipping task " << node_data);
+                LOG_INFO(log, "Task " << current_task->entry_name << " was deleted from ZooKeeper before current host commited it");
+                current_task = nullptr;
             }
-
-            last_processed_node_name = node_name;
-            continue;
         }
-        else
-        {
-            bool host_in_hostlist = std::find(hosts.cbegin(), hosts.cend(), task.host_id_in_cluster) != hosts.cend();
-            if (!host_in_hostlist)
-            {
-                LOG_ERROR(log, "Current host was found in cluster " << task.cluster_name
-                    << ", but was not found in host list of task " << task.entry_name
-                    << ". Possibly inconsistent cluster definition among servers.");
 
-                last_processed_node_name = node_name;
+        if (!current_task)
+        {
+            current_task = std::make_unique<DDLTask>();
+
+            if (!initAndCheckTask(*current_task, entry_name))
+            {
+                LOG_DEBUG(log, "Will not execute task" << entry_name);
+                last_processed_task_name = current_task->entry_name;
+                current_task.reset();
                 continue;
             }
         }
 
-        bool already_processed = zookeeper->exists(node_path + "/finished/" + task.host_id_in_cluster);
+        DDLTask & task = *current_task;
+
+        bool already_processed = zookeeper->exists(task.entry_path + "/finished/" + task.host_id_str);
         if (!server_startup && already_processed)
         {
             throw Exception(
-                "Server expects that DDL task " + node_name + " should be processed, but it was already processed according to ZK",
+                "Server expects that DDL task " + task.entry_name + " should be processed, but it was already processed according to ZK",
                 ErrorCodes::LOGICAL_ERROR);
         }
 
@@ -314,22 +338,115 @@ void DDLWorker::processTasks()
             }
             catch (...)
             {
-                tryLogCurrentException(log, "An error occurred while processing task " + node_name + " (" + task.entry.query + ")");
+                tryLogCurrentException(log, "An error occurred while processing task " + task.entry_name + " (" + task.entry.query + ")");
                 throw;
             }
         }
         else
         {
-            LOG_DEBUG(log, "Task " << node_name << " (" << task.entry.query << ") has been already processed");
+            LOG_DEBUG(log, "Task " << task.entry_name << " (" << task.entry.query << ") has been already processed");
         }
 
-        last_processed_node_name = node_name;
+        last_processed_task_name = task.entry_name;
+        current_task.reset();
+    }
+}
+
+
+/// Parses query and resolves cluster and host in cluster
+void DDLWorker::parseQueryAndResolveHost(DDLTask & task)
+{
+    {
+        const char * begin = task.entry.query.data();
+        const char * end = begin + task.entry.query.size();
+
+        ParserQuery parser_query(end);
+        String description;
+        task.query = parseQuery(parser_query, begin, end, description);
+    }
+
+    if (!task.query || !(task.query_on_cluster = dynamic_cast<ASTQueryWithOnCluster *>(task.query.get())))
+        throw Exception("Recieved unknown DDL query", ErrorCodes::UNKNOWN_TYPE_OF_QUERY);
+
+    task.cluster_name = task.query_on_cluster->cluster;
+    task.cluster = context.tryGetCluster(task.cluster_name);
+    if (!task.cluster)
+    {
+        throw Exception("DDL task " + task.entry_name + " contains current host " + task.host_id.readableString()
+            + " in cluster " + task.cluster_name + ", but there are no such cluster here.", ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION);
+    }
+
+    /// Try find host from task host list in cluster
+    /// At the first, try find exact match (host name and ports should be literally equal)
+    /// If the attempt fails, try find it resolving host name of each instance
+    const auto & shards = task.cluster->getShardsWithFailoverAddresses();
+
+    bool found_exact_match = false;
+    for (size_t shard_num = 0; shard_num < shards.size(); ++shard_num)
+        for (size_t replica_num = 0; replica_num < shards[shard_num].size(); ++replica_num)
+        {
+            const Cluster::Address & address = shards[shard_num][replica_num];
+
+            if (address.host_name == task.host_id.host_name && address.port == task.host_id.port)
+            {
+                if (found_exact_match)
+                {
+                    throw Exception("There are two exactly the same ClickHouse instances " + address.readableString()
+                        + " in cluster " + task.cluster_name, ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION);
+                }
+
+                found_exact_match = true;
+                task.host_shard_num = shard_num;
+                task.host_replica_num = replica_num;
+                task.address_in_cluster = address;
+            }
+        }
+
+    if (found_exact_match)
+        return;
+
+    LOG_WARNING(log, "Not found the exact match of host " << task.host_id.readableString() << " from task " << task.entry_name
+        << " in " << " cluster " << task.cluster_name << " definition. Will try to find it using host name resolving.");
+
+    bool found_via_resolving = false;
+    for (size_t shard_num = 0; shard_num < shards.size(); ++shard_num)
+        for (size_t replica_num = 0; replica_num < shards[shard_num].size(); ++replica_num)
+        {
+            const Cluster::Address & address = shards[shard_num][replica_num];
+
+            if (isLocalAddress(address.resolved_address))
+            {
+                if (found_via_resolving)
+                {
+                    throw Exception("There are two the same ClickHouse instances in cluster " + task.cluster_name + " : "
+                        + task.address_in_cluster.readableString() + " and " + address.readableString(), ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION);
+                }
+                else
+                {
+                    found_via_resolving = true;
+                    task.host_shard_num = shard_num;
+                    task.host_replica_num = replica_num;
+                    task.address_in_cluster = address;
+                }
+            }
+        }
+
+    if (!found_via_resolving)
+    {
+        throw Exception("Not found host " + task.host_id.readableString() + " in definition of cluster " + task.cluster_name,
+                        ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION);
+    }
+    else
+    {
+        LOG_INFO(log, "Resolved host " << task.host_id.readableString() << " from task " << task.entry_name
+            << " as host " << task.address_in_cluster.readableString() << " in definition of cluster " << task.cluster_name);
     }
 }
 
 
 bool DDLWorker::tryExecuteQuery(const String & query, const DDLTask & task, ExecutionStatus & status)
 {
+    /// Add special comment at the start of query to easily identify DDL-produced queries in query_log
     String query_prefix = "/*ddl_entry=" + task.entry_name + "*/ ";
     String query_to_execute = query_prefix + query;
 
@@ -358,33 +475,32 @@ bool DDLWorker::tryExecuteQuery(const String & query, const DDLTask & task, Exec
 
 void DDLWorker::processTask(DDLTask & task)
 {
-    LOG_DEBUG(log, "Processing entry " << task.entry_name << " (" << task.entry.query << ")");
+    LOG_DEBUG(log, "Processing task " << task.entry_name << " (" << task.entry.query << ")");
 
-    String node_path = queue_dir + "/" + task.entry_name;
-    createStatusDirs(node_path);
+    createStatusDirs(task.entry_path);
 
-    bool should_not_execute = current_node == task.entry_name && current_node_was_executed;
+    String active_node_path = task.entry_path + "/active/" + task.host_id_str;
+    auto code = zookeeper->tryCreate(active_node_path, "", zkutil::CreateMode::Ephemeral);
+    if (code != ZOK && code != ZNODEEXISTS)
+        throw zkutil::KeeperException(code, active_node_path);
 
-    if (!should_not_execute)
+    if (!task.was_executed)
     {
-        current_node = task.entry_name;
-        current_node_was_executed = false;
-
-        zookeeper->create(node_path + "/active/" + task.host_id_in_cluster, "", zkutil::CreateMode::Ephemeral);
-
         try
         {
-            ASTPtr rewritten_ast = task.query_on_cluster->getRewrittenASTWithoutOnCluster(task.host_address.default_database);
+            parseQueryAndResolveHost(task);
+
+            ASTPtr rewritten_ast = task.query_on_cluster->getRewrittenASTWithoutOnCluster(task.address_in_cluster.default_database);
             String rewritten_query = queryToString(rewritten_ast);
             LOG_DEBUG(log, "Executing query: " << rewritten_query);
 
-            if (auto query_alter = dynamic_cast<const ASTAlterQuery *>(rewritten_ast.get()))
+            if (auto ast_alter = dynamic_cast<const ASTAlterQuery *>(rewritten_ast.get()))
             {
-                processTaskAlter(task, query_alter, rewritten_query, node_path);
+                processTaskAlter(task, ast_alter, rewritten_query, task.entry_path);
             }
             else
             {
-                tryExecuteQuery(rewritten_query, task, current_node_execution_status);
+                tryExecuteQuery(rewritten_query, task, task.execution_status);
             }
         }
         catch (const zkutil::KeeperException & e)
@@ -393,41 +509,40 @@ void DDLWorker::processTask(DDLTask & task)
         }
         catch (...)
         {
-            current_node_execution_status = ExecutionStatus::fromCurrentException("An error occured during query preparation");
+            task.execution_status = ExecutionStatus::fromCurrentException("An error occured before execution");
         }
 
         /// We need to distinguish ZK errors occured before and after query executing
-        current_node_was_executed = true;
+        task.was_executed = true;
     }
 
+    /// FIXME: if server fails right here, the task will be executed twice. We need WAL here.
+
     /// Delete active flag and create finish flag
     zkutil::Ops ops;
-    ops.emplace_back(std::make_unique<zkutil::Op::Remove>(node_path + "/active/" + task.host_id_in_cluster, -1));
-    ops.emplace_back(std::make_unique<zkutil::Op::Create>(node_path + "/finished/" + task.host_id_in_cluster,
-        current_node_execution_status.serializeText(), zookeeper->getDefaultACL(), zkutil::CreateMode::Persistent));
+    ops.emplace_back(std::make_unique<zkutil::Op::Remove>(task.entry_path + "/active/" + task.host_id_str, -1));
+    ops.emplace_back(std::make_unique<zkutil::Op::Create>(task.entry_path + "/finished/" + task.host_id_str,
+        task.execution_status.serializeText(), zookeeper->getDefaultACL(), zkutil::CreateMode::Persistent));
 
-    int code = zookeeper->tryMultiWithRetries(ops);
-    if (code != ZOK && code != ZNONODE)
-    {
-        /// FIXME: if server fails here, the task will be executed twice. We need WAL here.
-        throw zkutil::KeeperException("Cannot commit executed entry " + task.entry_name, code);
-    }
+    code = zookeeper->tryMultiWithRetries(ops);
+    if (code != ZOK)
+        throw zkutil::KeeperException("Cannot commit executed task to ZooKeeper " + task.entry_name, code);
 }
 
 
 void DDLWorker::processTaskAlter(
     DDLTask & task,
-    const ASTAlterQuery * query_alter,
+    const ASTAlterQuery * ast_alter,
     const String & rewritten_query,
     const String & node_path)
 {
-    String database = query_alter->database.empty() ? context.getCurrentDatabase() : query_alter->database;
-    StoragePtr storage = context.getTable(database, query_alter->table);
+    String database = ast_alter->database.empty() ? context.getCurrentDatabase() : ast_alter->database;
+    StoragePtr storage = context.getTable(database, ast_alter->table);
 
     bool execute_once_on_replica = storage->supportsReplication();
     bool execute_on_leader_replica = false;
 
-    for (const auto & param : query_alter->parameters)
+    for (const auto & param : ast_alter->parameters)
     {
         if (!isSupportedAlterType(param.type))
             throw Exception("Unsupported type of ALTER query", ErrorCodes::NOT_IMPLEMENTED);
@@ -441,12 +556,12 @@ void DDLWorker::processTaskAlter(
 
     if (execute_once_on_replica && !config_is_replicated_shard)
     {
-        throw Exception("Table " + query_alter->table + " is replicated, but shard #" + toString(task.host_shard_num + 1) +
+        throw Exception("Table " + ast_alter->table + " is replicated, but shard #" + toString(task.host_shard_num + 1) +
             " isn't replicated according to its cluster definition", ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION);
     }
     else if (!execute_once_on_replica && config_is_replicated_shard)
     {
-        throw Exception("Table " + query_alter->table + " isn't replicated, but shard #" + toString(task.host_shard_num + 1) +
+        throw Exception("Table " + ast_alter->table + " isn't replicated, but shard #" + toString(task.host_shard_num + 1) +
             " replicated according to its cluster definition", ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION);
     }
 
@@ -477,8 +592,8 @@ void DDLWorker::processTaskAlter(
             auto zookeeper_holder = std::make_shared<zkutil::ZooKeeperHolder>();
             zookeeper_holder->initFromInstance(zookeeper);
 
-            zkutil::Lock lock(zookeeper_holder, shard_path, "lock", task.host_id_in_cluster);
-            std::mt19937 rng(std::hash<String>{}(task.host_id_in_cluster) + reinterpret_cast<intptr_t>(&rng));
+            zkutil::Lock lock(zookeeper_holder, shard_path, "lock", task.host_id_str);
+            std::mt19937 rng(std::hash<String>{}(task.host_id_str) + reinterpret_cast<intptr_t>(&rng));
 
             for (int num_tries = 0; num_tries < 10; ++num_tries)
             {
@@ -490,14 +605,14 @@ void DDLWorker::processTaskAlter(
 
                 if (lock.tryLock())
                 {
-                    tryExecuteQuery(rewritten_query, task, current_node_execution_status);
+                    tryExecuteQuery(rewritten_query, task, task.execution_status);
 
-                    if (execute_on_leader_replica && current_node_execution_status.code == ErrorCodes::NOT_IMPLEMENTED)
+                    if (execute_on_leader_replica && task.execution_status.code == ErrorCodes::NOT_IMPLEMENTED)
                     {
                         /// TODO: it is ok to recieve exception "host is not leader"
                     }
 
-                    zookeeper->create(is_executed_path, task.host_id_in_cluster, zkutil::CreateMode::Persistent);
+                    zookeeper->create(is_executed_path, task.host_id_str, zkutil::CreateMode::Persistent);
                     lock.unlock();
                     alter_executed_by_replica = true;
                     break;
@@ -508,11 +623,11 @@ void DDLWorker::processTaskAlter(
         }
 
         if (!alter_executed_by_replica)
-            current_node_execution_status = ExecutionStatus(ErrorCodes::NOT_IMPLEMENTED, "Cannot enqueue replicated DDL query");
+            task.execution_status = ExecutionStatus(ErrorCodes::NOT_IMPLEMENTED, "Cannot enqueue replicated DDL query");
     }
     else
     {
-        tryExecuteQuery(rewritten_query, task, current_node_execution_status);
+        tryExecuteQuery(rewritten_query, task, task.execution_status);
     }
 }
 
@@ -533,7 +648,7 @@ void DDLWorker::cleanupQueue(const Strings * node_names_to_check)
 
     String data;
     zkutil::Stat stat;
-    DDLLogEntry node;
+    DDLLogEntry entry;
 
     Strings node_names_fetched = node_names_to_check ? Strings{} : zookeeper->getChildren(queue_dir);
     const Strings & node_names = (node_names_to_check) ? *node_names_to_check : node_names_fetched;
@@ -552,23 +667,23 @@ void DDLWorker::cleanupQueue(const Strings * node_names_to_check)
             if (zookeeper_time_seconds + node_max_lifetime_seconds < current_time_seconds)
             {
                 size_t lifetime_seconds = current_time_seconds - zookeeper_time_seconds;
-                LOG_INFO(log, "Lifetime of node " << node_name << " (" << lifetime_seconds << " sec.) is expired, deleting it");
+                LOG_INFO(log, "Lifetime of task " << node_name << " (" << lifetime_seconds << " sec.) is expired, deleting it");
                 zookeeper->removeRecursive(node_path);
                 continue;
             }
 
             Strings finished_nodes = zookeeper->getChildren(node_path + "/finished");
-            node.parse(data);
+            entry.parse(data);
 
-            if (finished_nodes.size() >= node.hosts.size())
+            if (finished_nodes.size() >= entry.hosts.size())
             {
-                LOG_INFO(log, "Node " << node_name << " had been executed by each host, deleting it");
+                LOG_INFO(log, "Task " << node_name << " had been executed by each host, deleting it");
                 zookeeper->removeRecursive(node_path);
             }
         }
         catch (...)
         {
-            LOG_INFO(log, "An error occured while checking and cleaning node " + node_name + " from queue: " + getCurrentExceptionMessage(false));
+            LOG_INFO(log, "An error occured while checking and cleaning task " + node_name + " from queue: " + getCurrentExceptionMessage(false));
         }
     }
 }
@@ -625,14 +740,22 @@ void DDLWorker::run()
 
             cleanupQueue();
         }
-        catch (zkutil::KeeperException &)
+        catch (zkutil::KeeperException & e)
         {
-            LOG_DEBUG(log, "Recovering ZooKeeper session after " << getCurrentExceptionMessage(true));
-            zookeeper = context.getZooKeeper();
+            if (e.code == ZCONNECTIONLOSS || e.code == ZSESSIONEXPIRED)
+            {
+                LOG_DEBUG(log, "Recovering ZooKeeper session after " << getCurrentExceptionMessage(false));
+                zookeeper = context.getZooKeeper();
+            }
+            else
+            {
+                LOG_ERROR(log, "Unexpected ZooKeeper error: " << getCurrentExceptionMessage(true) << ". Terminating...");
+                throw;
+            }
         }
         catch (...)
         {
-            tryLogCurrentException(log);
+            LOG_ERROR(log, "Unexpected error: " << getCurrentExceptionMessage(true) << ". Terminating...");
             throw;
         }
     }
@@ -655,7 +778,9 @@ public:
             {std::make_shared<DataTypeUInt64>(),    "num_hosts_active"},
         };
 
-        waiting_hosts.insert(entry.hosts.cbegin(), entry.hosts.cend());
+        for (const HostID & host: entry.hosts)
+            waiting_hosts.emplace(host.toString());
+
         setTotalRowsApprox(entry.hosts.size());
     }
 
@@ -821,7 +946,7 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, Context & context)
     for (const auto & shard : shards)
     {
         for (const auto & addr : shard)
-            entry.hosts.emplace_back(addr.toString());
+            entry.hosts.emplace_back(addr);
     }
 
     String node_path = ddl_worker.enqueueQuery(entry);
diff --git a/dbms/src/Interpreters/DDLWorker.h b/dbms/src/Interpreters/DDLWorker.h
index e127100d3e7..b029f1e2df2 100644
--- a/dbms/src/Interpreters/DDLWorker.h
+++ b/dbms/src/Interpreters/DDLWorker.h
@@ -31,7 +31,7 @@ public:
     String enqueueQuery(DDLLogEntry & entry);
 
     /// Host ID (name:port) for logging purposes
-    /// Note that in each entry hosts are identified by name:port from cluster config
+    /// Note that in each task hosts are identified individually by name:port from initiator server cluster config
     std::string getCommonHostID() const
     {
         return host_fqdn_id;
@@ -40,22 +40,30 @@ public:
 private:
     void processTasks();
 
+    bool initAndCheckTask(DDLTask & task, const String & entry_name);
+
+
     void processTask(DDLTask & task);
 
     void processTaskAlter(
         DDLTask & task,
-        const ASTAlterQuery * query_alter,
+        const ASTAlterQuery * ast_alter,
         const String & rewritten_query,
         const String & node_path);
 
+    void parseQueryAndResolveHost(DDLTask & task);
+
     bool tryExecuteQuery(const String & query, const DDLTask & task, ExecutionStatus & status);
 
+
     /// Checks and cleanups queue's nodes
     void cleanupQueue(const Strings * node_names_to_check = nullptr);
 
+
     void createStatusDirs(const std::string & node_name);
     ASTPtr getRewrittenQuery(const DDLLogEntry & node);
 
+
     void run();
 
 private:
@@ -68,15 +76,14 @@ private:
     std::string queue_dir;      /// dir with queue of queries
     std::string master_dir;     /// dir with queries was initiated by the server
 
-    /// Used to omit already processed nodes. Maybe usage of set is more obvious.
-    std::string last_processed_node_name;
+    /// Last task that was skipped or sucesfully executed
+    std::string last_processed_task_name;
 
     std::shared_ptr<zkutil::ZooKeeper> zookeeper;
 
     /// Save state of executed task to avoid duplicate execution on ZK error
-    std::string current_node = {};
-    bool current_node_was_executed = false;
-    ExecutionStatus current_node_execution_status;
+    using DDLTaskPtr = std::unique_ptr<DDLTask>;
+    DDLTaskPtr current_task;
 
     std::shared_ptr<Poco::Event> event_queue_updated;
     std::atomic<bool> stop_flag{false};

From cb02a1b3b5c2005280bc23760dca7a66d4bf1d1c Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Mon, 31 Jul 2017 21:57:13 +0300
Subject: [PATCH 140/281] Add random ZK packets drops into test. Fixed a couple
 bugs. [#CLICKHOUSE-3128]

---
 dbms/src/Interpreters/DDLWorker.cpp             | 16 ++++++++++++----
 dbms/tests/integration/helpers/network.py       |  8 ++++++--
 .../integration/test_distributed_ddl/test.py    | 17 +++++++++++++----
 3 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp
index 5a24c44b266..c41b7898921 100644
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@@ -323,7 +323,7 @@ void DDLWorker::processTasks()
         DDLTask & task = *current_task;
 
         bool already_processed = zookeeper->exists(task.entry_path + "/finished/" + task.host_id_str);
-        if (!server_startup && already_processed)
+        if (!server_startup && !task.was_executed && already_processed)
         {
             throw Exception(
                 "Server expects that DDL task " + task.entry_name + " should be processed, but it was already processed according to ZK",
@@ -723,8 +723,16 @@ void DDLWorker::run()
     setThreadName("DDLWorker");
     LOG_DEBUG(log, "Started DDLWorker thread");
 
-    zookeeper = context.getZooKeeper();
-    zookeeper->createAncestors(queue_dir + "/");
+    try
+    {
+        zookeeper = context.getZooKeeper();
+        zookeeper->createAncestors(queue_dir + "/");
+    }
+    catch (...)
+    {
+        tryLogCurrentException(log, "Terminating. Cannot initialize DDL queue");
+        throw;
+    }
 
     while (!stop_flag)
     {
@@ -742,7 +750,7 @@ void DDLWorker::run()
         }
         catch (zkutil::KeeperException & e)
         {
-            if (e.code == ZCONNECTIONLOSS || e.code == ZSESSIONEXPIRED)
+            if (e.isHardwareError())
             {
                 LOG_DEBUG(log, "Recovering ZooKeeper session after " << getCurrentExceptionMessage(false));
                 zookeeper = context.getZooKeeper();
diff --git a/dbms/tests/integration/helpers/network.py b/dbms/tests/integration/helpers/network.py
index 807e4f8b48b..ccd21866ad4 100644
--- a/dbms/tests/integration/helpers/network.py
+++ b/dbms/tests/integration/helpers/network.py
@@ -29,6 +29,7 @@ class PartitionManager:
         self._add_rule({'source': instance.ip_address, 'destination_port': 2181, 'action': action})
         self._add_rule({'destination': instance.ip_address, 'source_port': 2181, 'action': action})
 
+
     def restore_instance_zk_connections(self, instance, action='DROP'):
         self._check_instance(instance)
 
@@ -109,8 +110,11 @@ class _NetworkManager:
     def _iptables_cmd_suffix(
             source=None, destination=None,
             source_port=None, destination_port=None,
-            action=None):
-        ret = ['-p', 'tcp']
+            action=None, probability=None):
+        ret = []
+        if probability is not None:
+            ret.extend(['-m', 'statistic', '--mode', 'random', '--probability', str(probability)])
+        ret.extend(['-p', 'tcp'])
         if source is not None:
             ret.extend(['-s', source])
         if destination is not None:
diff --git a/dbms/tests/integration/test_distributed_ddl/test.py b/dbms/tests/integration/test_distributed_ddl/test.py
index 28dbeeac31d..2056e4a2383 100644
--- a/dbms/tests/integration/test_distributed_ddl/test.py
+++ b/dbms/tests/integration/test_distributed_ddl/test.py
@@ -28,9 +28,8 @@ def ddl_check_query(instance, query, num_hosts=None):
     return contents
 
 def ddl_check_there_are_no_dublicates(instance):
-    answer = instance.query("SELECT max(c), argMax(q, c) FROM (SELECT lower(query) AS q, count() AS c FROM system.query_log WHERE type=2 AND q LIKE '/*ddl_entry=query-%' GROUP BY query)")
-    row = TSV.toMat(answer)[0]
-    assert row[0] == "1", "dublicates on {} {}, query {}".format(instance.name, instance.ip_address, row[1])
+    rows = instance.query("SELECT max(c), argMax(q, c) FROM (SELECT lower(query) AS q, count() AS c FROM system.query_log WHERE type=2 AND q LIKE '/*ddl_entry=query-%' GROUP BY query)")
+    assert len(rows) == 0 or rows[0][0] == "1", "dublicates on {} {}, query {}".format(instance.name, instance.ip_address)
 
 
 TEST_REPLICATED_ALTERS=True
@@ -49,6 +48,12 @@ def started_cluster():
 
         cluster.start()
 
+        # Select sacrifice instance to test CONNECTION_LOSS and server fail on it
+        sacrifice = cluster.instances['ch2']
+        cluster.pm_random_drops = PartitionManager()
+        cluster.pm_random_drops._add_rule({'probability': 0.05, 'destination': sacrifice.ip_address, 'source_port': 2181, 'action': 'REJECT --reject-with tcp-reset'})
+        cluster.pm_random_drops._add_rule({'probability': 0.05, 'source': sacrifice.ip_address, 'destination_port': 2181, 'action': 'REJECT --reject-with tcp-reset'})
+
         # Initialize databases and service tables
         instance = cluster.instances['ch1']
 
@@ -66,10 +71,14 @@ CREATE TABLE IF NOT EXISTS all_tables ON CLUSTER 'cluster_no_replicas'
         ddl_check_query(instance, "DROP DATABASE IF EXISTS test2 ON CLUSTER 'cluster'")
 
     finally:
+        # Remove iptables rules for sacrifice instance
+        cluster.pm_random_drops.heal_all()
+
         # Check query log to ensure that DDL queries are not executed twice
-        time.sleep(1)
+        time.sleep(1.5)
         for instance in cluster.instances.values():
             ddl_check_there_are_no_dublicates(instance)
+
         #cluster.shutdown()
 
 

From c2e84d6df7ba9e06e8031d03995e5fa008149f9c Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Tue, 1 Aug 2017 17:41:00 +0300
Subject: [PATCH 141/281] Fixed several errors in case of connection loss.
 [#CLICKHOUSE-3128]

---
 dbms/src/Interpreters/DDLWorker.cpp           | 227 ++++++++++++++----
 dbms/src/Interpreters/DDLWorker.h             |  10 +-
 dbms/src/Server/Server.cpp                    |   2 +-
 .../integration/test_distributed_ddl/test.py  |  20 +-
 4 files changed, 198 insertions(+), 61 deletions(-)

diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp
index c41b7898921..7dcb2e5edfc 100644
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@@ -33,6 +33,8 @@
 #include <Common/isLocalAddress.h>
 #include <Poco/Timestamp.h>
 
+#include <ext/scope_guard.h>
+
 #include <experimental/optional>
 
 
@@ -52,9 +54,6 @@ namespace ErrorCodes
 }
 
 
-const size_t DDLWorker::node_max_lifetime_seconds = 7 * 24 * 60 * 60; // week
-const size_t DDLWorker::cleanup_min_period_seconds = 60; // minute
-
 namespace
 {
 
@@ -87,7 +86,15 @@ struct HostID
 
     bool isLocalAddress() const
     {
-        return DB::isLocalAddress(Poco::Net::SocketAddress(host_name, port));
+        try
+        {
+            return DB::isLocalAddress(Poco::Net::SocketAddress(host_name, port));
+        }
+        catch (const Poco::Exception & e)
+        {
+            /// Avoid "Host not found" exceptions
+            return false;
+        }
     }
 
     static String applyToString(const HostID & host_id)
@@ -196,13 +203,19 @@ static bool isSupportedAlterType(int type)
 }
 
 
-DDLWorker::DDLWorker(const std::string & zk_root_dir, Context & context_)
+DDLWorker::DDLWorker(const std::string & zk_root_dir, Context & context_, const Poco::Util::AbstractConfiguration * config, const String & prefix)
     : context(context_)
 {
     queue_dir = zk_root_dir;
     if (queue_dir.back() == '/')
         queue_dir.resize(queue_dir.size() - 1);
 
+    if (config)
+    {
+        task_max_lifetime = config->getUInt64(prefix + "task_max_lifetime", task_max_lifetime);
+        cleanup_delay_period = config->getUInt64(prefix + "cleanup_delay_period", cleanup_delay_period);
+    }
+
     host_fqdn = getFQDNOrHostName();
     host_fqdn_id = Cluster::Address::toString(host_fqdn, context.getTCPPort());
 
@@ -220,21 +233,24 @@ DDLWorker::~DDLWorker()
 }
 
 
-bool DDLWorker::initAndCheckTask(DDLTask & task, const String & entry_name)
+bool DDLWorker::initAndCheckTask(const String & entry_name)
 {
-    task.entry_name = entry_name;
-    task.entry_path = queue_dir + "/" + task.entry_name;
-
     String node_data;
-    if (!zookeeper->tryGet(task.entry_path, node_data))
+    String entry_path = queue_dir + "/" + entry_name;
+
+    if (!zookeeper->tryGet(entry_path, node_data))
     {
         /// It is Ok that node could be deleted just now. It means that there are no current host in node's host list.
         return false;
     }
 
+    auto task = std::make_unique<DDLTask>();
+    task->entry_name = entry_name;
+    task->entry_path = entry_path;
+
     try
     {
-        task.entry.parse(node_data);
+        task->entry.parse(node_data);
     }
     catch (...)
     {
@@ -242,17 +258,24 @@ bool DDLWorker::initAndCheckTask(DDLTask & task, const String & entry_name)
         /// We can try to create fail node using FQDN if it equal to host name in cluster config attempt will be sucessfull.
         /// Otherwise, that node will be ignored by DDLQueryStatusInputSream.
 
-        tryLogCurrentException(log, "Cannot parse DDL task " + task.entry_name + ", will try to send error status");
+        tryLogCurrentException(log, "Cannot parse DDL task " + entry_name + ", will try to send error status");
 
         String status = ExecutionStatus::fromCurrentException().serializeText();
-        createStatusDirs(task.entry_path);
-        zookeeper->tryCreate(task.entry_path + "/finished/" + host_fqdn_id, status, zkutil::CreateMode::Persistent);
+        try
+        {
+            createStatusDirs(entry_path);
+            zookeeper->tryCreate(entry_path + "/finished/" + host_fqdn_id, status, zkutil::CreateMode::Persistent);
+        }
+        catch (...)
+        {
+            tryLogCurrentException(log, "Can't report the task has invalid format");
+        }
 
         return false;
     }
 
     bool host_in_hostlist = false;
-    for (const HostID & host : task.entry.hosts)
+    for (const HostID & host : task->entry.hosts)
     {
         if (!host.isLocalAddress())
             continue;
@@ -260,17 +283,20 @@ bool DDLWorker::initAndCheckTask(DDLTask & task, const String & entry_name)
         if (host_in_hostlist)
         {
             /// This check could be slow a little bit
-            LOG_WARNING(log, "There are two the same ClickHouse instances in task " << task.entry_name
-                << ": " << task.host_id.readableString() << " and " << host.readableString() << ". Will use the first one only.");
+            LOG_WARNING(log, "There are two the same ClickHouse instances in task " << entry_name
+                << ": " << task->host_id.readableString() << " and " << host.readableString() << ". Will use the first one only.");
         }
         else
         {
             host_in_hostlist = true;
-            task.host_id = host;
-            task.host_id_str = host.toString();
+            task->host_id = host;
+            task->host_id_str = host.toString();
         }
     }
 
+    if (host_in_hostlist)
+        current_task = std::move(task);
+
     return host_in_hostlist;
 }
 
@@ -280,6 +306,7 @@ void DDLWorker::processTasks()
     LOG_DEBUG(log, "Processing tasks");
 
     Strings queue_nodes = zookeeper->getChildren(queue_dir, nullptr, event_queue_updated);
+    queue_nodes.erase(std::remove_if(queue_nodes.begin(), queue_nodes.end(), [&] (const String & s) { return !startsWith(s, "query-"); }), queue_nodes.end());
     if (queue_nodes.empty())
         return;
 
@@ -309,13 +336,10 @@ void DDLWorker::processTasks()
 
         if (!current_task)
         {
-            current_task = std::make_unique<DDLTask>();
-
-            if (!initAndCheckTask(*current_task, entry_name))
+            if (!initAndCheckTask(entry_name))
             {
-                LOG_DEBUG(log, "Will not execute task" << entry_name);
-                last_processed_task_name = current_task->entry_name;
-                current_task.reset();
+                LOG_DEBUG(log, "Will not execute task " << entry_name);
+                last_processed_task_name = entry_name;
                 continue;
             }
         }
@@ -338,7 +362,8 @@ void DDLWorker::processTasks()
             }
             catch (...)
             {
-                tryLogCurrentException(log, "An error occurred while processing task " + task.entry_name + " (" + task.entry.query + ")");
+                LOG_WARNING(log, "An error occurred while processing task " << task.entry_name << " (" << task.entry.query << ") : "
+                    << getCurrentExceptionMessage(true));
                 throw;
             }
         }
@@ -349,6 +374,9 @@ void DDLWorker::processTasks()
 
         last_processed_task_name = task.entry_name;
         current_task.reset();
+
+        if (stop_flag)
+            break;
     }
 }
 
@@ -524,7 +552,7 @@ void DDLWorker::processTask(DDLTask & task)
     ops.emplace_back(std::make_unique<zkutil::Op::Create>(task.entry_path + "/finished/" + task.host_id_str,
         task.execution_status.serializeText(), zookeeper->getDefaultACL(), zkutil::CreateMode::Persistent));
 
-    code = zookeeper->tryMultiWithRetries(ops);
+    code = zookeeper->tryMulti(ops);
     if (code != ZOK)
         throw zkutil::KeeperException("Cannot commit executed task to ZooKeeper " + task.entry_name, code);
 }
@@ -639,46 +667,118 @@ void DDLWorker::cleanupQueue(const Strings * node_names_to_check)
     constexpr size_t zookeeper_time_resolution = 1000;
 
     // Too early to check
-    if (last_cleanup_time_seconds && current_time_seconds < last_cleanup_time_seconds + cleanup_min_period_seconds)
+    if (last_cleanup_time_seconds && current_time_seconds < last_cleanup_time_seconds + cleanup_delay_period)
         return;
 
     last_cleanup_time_seconds = current_time_seconds;
 
     LOG_DEBUG(log, "Cleaning queue");
 
-    String data;
-    zkutil::Stat stat;
-    DDLLogEntry entry;
-
     Strings node_names_fetched = node_names_to_check ? Strings{} : zookeeper->getChildren(queue_dir);
     const Strings & node_names = (node_names_to_check) ? *node_names_to_check : node_names_fetched;
 
     for (const String & node_name : node_names)
     {
+        String node_path = queue_dir + "/" + node_name;
+        String lock_path = node_path + "/lock_write"; /// per-node lock to avoid concurrent cleaning
+        bool node_was_deleted = false;
+
+        auto delete_node = [&] ()
+        {
+            Strings childs = zookeeper->getChildren(node_path);
+            for (const String & child : childs)
+            {
+                if (child != "lock_write")
+                    zookeeper->removeRecursive(node_path + "/" + child);
+            }
+
+            zkutil::Ops ops;
+            ops.emplace_back(std::make_unique<zkutil::Op::Remove>(lock_path, -1));
+            ops.emplace_back(std::make_unique<zkutil::Op::Remove>(node_path, -1));
+            zookeeper->multi(ops);
+
+            node_was_deleted = true;
+        };
+
         try
         {
-            String node_path = queue_dir + "/" + node_name;
-            if (!zookeeper->tryGet(node_path, data, &stat))
-                continue;
+            zkutil::Ops ops;
+            ops.emplace_back(std::make_unique<zkutil::Op::Check>(node_path, -1));
+            ops.emplace_back(std::make_unique<zkutil::Op::Create>(lock_path, host_fqdn_id, zookeeper->getDefaultACL(),
+                                                                  zkutil::CreateMode::Ephemeral));
+            auto code = zookeeper->tryMulti(ops);
+            if (code != ZOK)
+            {
+                if (code == ZNONODE)
+                {
+                    /// Task node was deleted
+                    continue;
+                }
+                else if (code == ZNODEEXISTS)
+                {
+                    /// Is it our lock?
+                    String owner;
+                    if (!zookeeper->tryGet(lock_path, owner) || owner != host_fqdn_id)
+                        continue;
+                }
+                else
+                    throw zkutil::KeeperException(code);
+            }
 
-            /// TODO: Add shared lock to avoid rare race conditions.
+            SCOPE_EXIT({
+                if (!node_was_deleted && !zookeeper->expired())
+                {
+                    try
+                    {
+                        zookeeper->tryRemoveWithRetries(lock_path, -1);
+                    }
+                    catch (...)
+                    {
+                        tryLogCurrentException(log, "Can't remove lock for cleaning");
+                    }
+                }
+            });
+
+            zkutil::Stat stat;
+            String node_data = zookeeper->get(node_path, &stat);
 
             size_t zookeeper_time_seconds = stat.mtime / zookeeper_time_resolution;
-            if (zookeeper_time_seconds + node_max_lifetime_seconds < current_time_seconds)
+            if (zookeeper_time_seconds + task_max_lifetime < current_time_seconds)
             {
                 size_t lifetime_seconds = current_time_seconds - zookeeper_time_seconds;
                 LOG_INFO(log, "Lifetime of task " << node_name << " (" << lifetime_seconds << " sec.) is expired, deleting it");
-                zookeeper->removeRecursive(node_path);
+                delete_node();
                 continue;
             }
 
-            Strings finished_nodes = zookeeper->getChildren(node_path + "/finished");
-            entry.parse(data);
+            Strings active_hosts = zookeeper->getChildren(node_path + "/active");
+            if (!active_hosts.empty())
+                continue;
 
-            if (finished_nodes.size() >= entry.hosts.size())
+            Strings finished_hosts = zookeeper->getChildren(node_path + "/finished");
+            DDLLogEntry entry;
+            entry.parse(node_data);
+
+            /// Not all nodes were finished
+            if (finished_hosts.size() < entry.hosts.size())
+                continue;
+
+            /// Could be childs that are not from host list
+            bool all_finished = true;
+            NameSet finished_hosts_set(finished_hosts.begin(), finished_hosts.end());
+            for (const HostID & host : entry.hosts)
+            {
+                if (!finished_hosts_set.count(host.toString()))
+                {
+                    all_finished = false;
+                    break;
+                }
+            }
+
+            if (all_finished)
             {
                 LOG_INFO(log, "Task " << node_name << " had been executed by each host, deleting it");
-                zookeeper->removeRecursive(node_path);
+                delete_node();
             }
         }
         catch (...)
@@ -723,16 +823,30 @@ void DDLWorker::run()
     setThreadName("DDLWorker");
     LOG_DEBUG(log, "Started DDLWorker thread");
 
-    try
+    bool initialized = false;
+    do
     {
-        zookeeper = context.getZooKeeper();
-        zookeeper->createAncestors(queue_dir + "/");
-    }
-    catch (...)
-    {
-        tryLogCurrentException(log, "Terminating. Cannot initialize DDL queue");
-        throw;
-    }
+        try
+        {
+            try
+            {
+                zookeeper = context.getZooKeeper();
+                zookeeper->createAncestors(queue_dir + "/");
+                initialized = true;
+            }
+            catch (const zkutil::KeeperException & e)
+            {
+                if (!e.isHardwareError())
+                    throw;
+            }
+        }
+        catch (...)
+        {
+            tryLogCurrentException(log, "Terminating. Cannot initialize DDL queue");
+            throw;
+        }
+    } while (!initialized);
+
 
     while (!stop_flag)
     {
@@ -752,8 +866,15 @@ void DDLWorker::run()
         {
             if (e.isHardwareError())
             {
-                LOG_DEBUG(log, "Recovering ZooKeeper session after " << getCurrentExceptionMessage(false));
-                zookeeper = context.getZooKeeper();
+                if (!e.isTemporaryError())
+                {
+                    LOG_DEBUG(log, "Recovering ZooKeeper session after: " << getCurrentExceptionMessage(false));
+                    zookeeper = context.getZooKeeper();
+                }
+                else
+                {
+                    LOG_DEBUG(log, "Retry task processing after: " << getCurrentExceptionMessage(false));
+                }
             }
             else
             {
diff --git a/dbms/src/Interpreters/DDLWorker.h b/dbms/src/Interpreters/DDLWorker.h
index b029f1e2df2..7c06f95093d 100644
--- a/dbms/src/Interpreters/DDLWorker.h
+++ b/dbms/src/Interpreters/DDLWorker.h
@@ -24,7 +24,7 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, Context & context);
 class DDLWorker
 {
 public:
-    DDLWorker(const std::string & zk_root_dir, Context & context_);
+    DDLWorker(const std::string & zk_root_dir, Context & context_, const Poco::Util::AbstractConfiguration * config, const String & prefix);
     ~DDLWorker();
 
     /// Pushes query into DDL queue, returns path to created node
@@ -40,7 +40,9 @@ public:
 private:
     void processTasks();
 
-    bool initAndCheckTask(DDLTask & task, const String & entry_name);
+    /// Reads entry and check that the host belongs to host list of the task
+    /// Returns true and sets current_task if entry parsed and the check is passed
+    bool initAndCheckTask(const String & entry_name);
 
 
     void processTask(DDLTask & task);
@@ -92,9 +94,9 @@ private:
     size_t last_cleanup_time_seconds = 0;
 
     /// Delete node if its age is greater than that
-    static const size_t node_max_lifetime_seconds;
+    size_t task_max_lifetime = 7 * 24 * 60 * 60; // week (in seconds)
     /// Cleaning starts after new node event is received if the last cleaning wasn't made sooner than N seconds ago
-    static const size_t cleanup_min_period_seconds;
+    size_t cleanup_delay_period = 60; // minute (in seconds)
 
     friend class DDLQueryStatusInputSream;
     friend class DDLTask;
diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp
index 2b9efdd7cfc..453b0577ff2 100644
--- a/dbms/src/Server/Server.cpp
+++ b/dbms/src/Server/Server.cpp
@@ -278,7 +278,7 @@ int Server::main(const std::vector<std::string> & args)
     {
         /// DDL worker should be started after all tables were loaded
         String ddl_zookeeper_path = config().getString("distributed_ddl.path", "/clickhouse/task_queue/ddl/");
-        global_context->setDDLWorker(std::make_shared<DDLWorker>(ddl_zookeeper_path, *global_context));
+        global_context->setDDLWorker(std::make_shared<DDLWorker>(ddl_zookeeper_path, *global_context, &config(), "distributed_ddl."));
     }
 
     SCOPE_EXIT({
diff --git a/dbms/tests/integration/test_distributed_ddl/test.py b/dbms/tests/integration/test_distributed_ddl/test.py
index 2056e4a2383..9b28633b980 100644
--- a/dbms/tests/integration/test_distributed_ddl/test.py
+++ b/dbms/tests/integration/test_distributed_ddl/test.py
@@ -31,6 +31,20 @@ def ddl_check_there_are_no_dublicates(instance):
     rows = instance.query("SELECT max(c), argMax(q, c) FROM (SELECT lower(query) AS q, count() AS c FROM system.query_log WHERE type=2 AND q LIKE '/*ddl_entry=query-%' GROUP BY query)")
     assert len(rows) == 0 or rows[0][0] == "1", "dublicates on {} {}, query {}".format(instance.name, instance.ip_address)
 
+# Make retries in case of UNKNOWN_STATUS_OF_INSERT or zkutil::KeeperException errors
+def insert_reliable(instance, query_insert):
+    for i in xrange(100):
+        try:
+            instance.query(query_insert)
+            return
+        except Exception as e:
+            last_exception = e
+            s = str(e)
+            if not (s.find('Unknown status, client must retry') >= 0 or s.find('zkutil::KeeperException')):
+                raise e
+
+    raise last_exception
+
 
 TEST_REPLICATED_ALTERS=True
 cluster = ClickHouseCluster(__file__)
@@ -49,7 +63,7 @@ def started_cluster():
         cluster.start()
 
         # Select sacrifice instance to test CONNECTION_LOSS and server fail on it
-        sacrifice = cluster.instances['ch2']
+        sacrifice = cluster.instances['ch4']
         cluster.pm_random_drops = PartitionManager()
         cluster.pm_random_drops._add_rule({'probability': 0.05, 'destination': sacrifice.ip_address, 'source_port': 2181, 'action': 'REJECT --reject-with tcp-reset'})
         cluster.pm_random_drops._add_rule({'probability': 0.05, 'source': sacrifice.ip_address, 'destination_port': 2181, 'action': 'REJECT --reject-with tcp-reset'})
@@ -164,7 +178,7 @@ ENGINE = Distributed(cluster, default, merge, i)
 
     for i in xrange(4):
         k = (i / 2) * 2
-        cluster.instances['ch{}'.format(i + 1)].query("INSERT INTO merge (i) VALUES ({})({})".format(k, k+1))
+        insert_reliable(cluster.instances['ch{}'.format(i + 1)], "INSERT INTO merge (i) VALUES ({})({})".format(k, k+1))
 
     assert TSV(instance.query("SELECT i FROM all_merge_32 ORDER BY i")) == TSV(''.join(['{}\n'.format(x) for x in xrange(4)]))
 
@@ -177,7 +191,7 @@ ENGINE = Distributed(cluster, default, merge, i)
 
     for i in xrange(4):
         k = (i / 2) * 2 + 4
-        cluster.instances['ch{}'.format(i + 1)].query("INSERT INTO merge (p, i) VALUES (31, {})(31, {})".format(k, k+1))
+        insert_reliable(cluster.instances['ch{}'.format(i + 1)], "INSERT INTO merge (p, i) VALUES (31, {})(31, {})".format(k, k+1))
 
     assert TSV(instance.query("SELECT i, s FROM all_merge_64 ORDER BY i")) == TSV(''.join(['{}\t{}\n'.format(x,x) for x in xrange(8)]))
 

From e4785aeb4d80f54430803f08eef6896d76902928 Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Wed, 2 Aug 2017 17:42:35 +0300
Subject: [PATCH 142/281] Simplified clean queue logic. Added more tests.
 [#CLICKHOUSE-3128]

---
 dbms/src/Common/Exception.cpp                 |   2 +-
 dbms/src/Common/ZooKeeper/Lock.cpp            |   5 +
 dbms/src/Common/ZooKeeper/Lock.h              |   1 +
 dbms/src/Common/ZooKeeper/ZooKeeper.h         |   2 +-
 dbms/src/Core/ErrorCodes.cpp                  |   1 +
 dbms/src/Interpreters/DDLWorker.cpp           | 227 +++++++++---------
 dbms/src/Interpreters/DDLWorker.h             |   8 +-
 dbms/tests/integration/helpers/client.py      |   2 +-
 dbms/tests/integration/helpers/cluster.py     |  24 +-
 dbms/tests/integration/helpers/network.py     |  21 ++
 .../configs/config.d/cluster.xml              |  28 ---
 .../config.d/cluster_default_database.xml     |  32 ---
 .../configs/config.d/cluster_no_replicas.xml  |  34 ---
 .../cluster_without_internal_replication.xml  |  28 ---
 .../configs/config.d/clusters.xml             | 119 +++++++++
 .../configs/config.d/ddl.xml                  |   3 +
 .../integration/test_distributed_ddl/test.py  |  58 ++++-
 17 files changed, 340 insertions(+), 255 deletions(-)
 delete mode 100644 dbms/tests/integration/test_distributed_ddl/configs/config.d/cluster.xml
 delete mode 100644 dbms/tests/integration/test_distributed_ddl/configs/config.d/cluster_default_database.xml
 delete mode 100644 dbms/tests/integration/test_distributed_ddl/configs/config.d/cluster_no_replicas.xml
 delete mode 100644 dbms/tests/integration/test_distributed_ddl/configs/config.d/cluster_without_internal_replication.xml
 create mode 100644 dbms/tests/integration/test_distributed_ddl/configs/config.d/clusters.xml

diff --git a/dbms/src/Common/Exception.cpp b/dbms/src/Common/Exception.cpp
index 86c1b272b6a..d1176f73d07 100644
--- a/dbms/src/Common/Exception.cpp
+++ b/dbms/src/Common/Exception.cpp
@@ -258,7 +258,7 @@ bool ExecutionStatus::tryDeserializeText(const std::string & data)
 
 ExecutionStatus ExecutionStatus::fromCurrentException(const std::string & start_of_message)
 {
-    String msg = start_of_message.empty() ? "" : (start_of_message + ": " + getCurrentExceptionMessage(false, true));
+    String msg = (start_of_message.empty() ? "" : (start_of_message + ": ")) + getCurrentExceptionMessage(false, true);
     return ExecutionStatus(getCurrentExceptionCode(), msg);
 }
 
diff --git a/dbms/src/Common/ZooKeeper/Lock.cpp b/dbms/src/Common/ZooKeeper/Lock.cpp
index 541446e7ace..3c5d8ec9c63 100644
--- a/dbms/src/Common/ZooKeeper/Lock.cpp
+++ b/dbms/src/Common/ZooKeeper/Lock.cpp
@@ -138,3 +138,8 @@ void Lock::unlockOrMoveIfFailed(std::vector<zkutil::Lock> & failed_to_unlock_loc
     }
 }
 
+void Lock::unlockAssumeLockNodeRemovedManually()
+{
+    locked.reset(nullptr);
+}
+
diff --git a/dbms/src/Common/ZooKeeper/Lock.h b/dbms/src/Common/ZooKeeper/Lock.h
index dbacc2d62f0..ab176755c7a 100644
--- a/dbms/src/Common/ZooKeeper/Lock.h
+++ b/dbms/src/Common/ZooKeeper/Lock.h
@@ -60,6 +60,7 @@ namespace zkutil
 
         void unlock();
         void unlockOrMoveIfFailed(std::vector<zkutil::Lock> & failed_to_unlock_locks);
+        void unlockAssumeLockNodeRemovedManually();
 
         bool tryLock();
 
diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.h b/dbms/src/Common/ZooKeeper/ZooKeeper.h
index 197cb8083b2..fa8dd1d8c29 100644
--- a/dbms/src/Common/ZooKeeper/ZooKeeper.h
+++ b/dbms/src/Common/ZooKeeper/ZooKeeper.h
@@ -95,7 +95,7 @@ public:
     /// Throw an exception if something went wrong.
     std::string create(const std::string & path, const std::string & data, int32_t mode);
 
-    /// Doesn not throw in the following cases:
+    /// Does not throw in the following cases:
     /// * The parent for the created node does not exist
     /// * The parent is ephemeral.
     /// * The node already exists.
diff --git a/dbms/src/Core/ErrorCodes.cpp b/dbms/src/Core/ErrorCodes.cpp
index f713945cb67..ceee11c9e2b 100644
--- a/dbms/src/Core/ErrorCodes.cpp
+++ b/dbms/src/Core/ErrorCodes.cpp
@@ -381,6 +381,7 @@ namespace ErrorCodes
     extern const int CANNOT_PARSE_UUID = 376;
     extern const int ILLEGAL_SYNTAX_FOR_DATA_TYPE = 377;
     extern const int DATA_TYPE_CANNOT_HAVE_ARGUMENTS = 378;
+    extern const int UNKNOWN_STATUS_OF_DISTRIBUTED_DDL_TASK = 379;
 
     extern const int KEEPER_EXCEPTION = 999;
     extern const int POCO_EXCEPTION = 1000;
diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp
index 7dcb2e5edfc..7aa02151aeb 100644
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@@ -49,8 +49,9 @@ namespace ErrorCodes
     extern const int INCONSISTENT_TABLE_ACCROSS_SHARDS;
     extern const int INCONSISTENT_CLUSTER_DEFINITION;
     extern const int TIMEOUT_EXCEEDED;
-    extern const int UNFINISHED;
     extern const int UNKNOWN_TYPE_OF_QUERY;
+    extern const int UNFINISHED;
+    extern const int UNKNOWN_STATUS_OF_DISTRIBUTED_DDL_TASK;
 }
 
 
@@ -189,6 +190,15 @@ struct DDLTask
 };
 
 
+static std::unique_ptr<zkutil::Lock> createSimpleZooKeeperLock(
+    std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & lock_prefix, const String & lock_name, const String & lock_message)
+{
+    auto zookeeper_holder = std::make_shared<zkutil::ZooKeeperHolder>();
+    zookeeper_holder->initFromInstance(zookeeper);
+    return std::make_unique<zkutil::Lock>(std::move(zookeeper_holder), lock_prefix, lock_name, lock_message);
+}
+
+
 static bool isSupportedAlterType(int type)
 {
     static const std::unordered_set<int> supported_alter_types{
@@ -214,6 +224,7 @@ DDLWorker::DDLWorker(const std::string & zk_root_dir, Context & context_, const
     {
         task_max_lifetime = config->getUInt64(prefix + "task_max_lifetime", task_max_lifetime);
         cleanup_delay_period = config->getUInt64(prefix + "cleanup_delay_period", cleanup_delay_period);
+        max_tasks_in_queue = std::max(1UL, config->getUInt64(prefix + "max_tasks_in_queue ", max_tasks_in_queue));
     }
 
     host_fqdn = getFQDNOrHostName();
@@ -301,18 +312,24 @@ bool DDLWorker::initAndCheckTask(const String & entry_name)
 }
 
 
+static void filterAndSortQueueNodes(Strings & all_nodes)
+{
+    all_nodes.erase(std::remove_if(all_nodes.begin(), all_nodes.end(), [] (const String & s) { return !startsWith(s, "query-"); }), all_nodes.end());
+    std::sort(all_nodes.begin(), all_nodes.end());
+}
+
+
 void DDLWorker::processTasks()
 {
     LOG_DEBUG(log, "Processing tasks");
 
     Strings queue_nodes = zookeeper->getChildren(queue_dir, nullptr, event_queue_updated);
-    queue_nodes.erase(std::remove_if(queue_nodes.begin(), queue_nodes.end(), [&] (const String & s) { return !startsWith(s, "query-"); }), queue_nodes.end());
+    filterAndSortQueueNodes(queue_nodes);
     if (queue_nodes.empty())
         return;
 
     bool server_startup = last_processed_task_name.empty();
 
-    std::sort(queue_nodes.begin(), queue_nodes.end());
     auto begin_node = server_startup
         ? queue_nodes.begin()
         : std::upper_bound(queue_nodes.begin(), queue_nodes.end(), last_processed_task_name);
@@ -434,7 +451,7 @@ void DDLWorker::parseQueryAndResolveHost(DDLTask & task)
         return;
 
     LOG_WARNING(log, "Not found the exact match of host " << task.host_id.readableString() << " from task " << task.entry_name
-        << " in " << " cluster " << task.cluster_name << " definition. Will try to find it using host name resolving.");
+        << " in cluster " << task.cluster_name << " definition. Will try to find it using host name resolving.");
 
     bool found_via_resolving = false;
     for (size_t shard_num = 0; shard_num < shards.size(); ++shard_num)
@@ -505,13 +522,26 @@ void DDLWorker::processTask(DDLTask & task)
 {
     LOG_DEBUG(log, "Processing task " << task.entry_name << " (" << task.entry.query << ")");
 
-    createStatusDirs(task.entry_path);
-
+    String dummy;
     String active_node_path = task.entry_path + "/active/" + task.host_id_str;
-    auto code = zookeeper->tryCreate(active_node_path, "", zkutil::CreateMode::Ephemeral);
-    if (code != ZOK && code != ZNODEEXISTS)
+    String finished_node_path = task.entry_path + "/finished/" + task.host_id_str;
+
+    auto code = zookeeper->tryCreateWithRetries(active_node_path, "", zkutil::CreateMode::Ephemeral, dummy);
+    if (code == ZOK || code == ZNODEEXISTS)
+    {
+        // Ok
+    }
+    else if (code == ZNONODE)
+    {
+        /// There is no parent
+        createStatusDirs(task.entry_path);
+        if (ZOK != zookeeper->tryCreateWithRetries(active_node_path, "", zkutil::CreateMode::Ephemeral, dummy))
+            throw zkutil::KeeperException(code, active_node_path);
+    }
+    else
         throw zkutil::KeeperException(code, active_node_path);
 
+
     if (!task.was_executed)
     {
         try
@@ -548,13 +578,10 @@ void DDLWorker::processTask(DDLTask & task)
 
     /// Delete active flag and create finish flag
     zkutil::Ops ops;
-    ops.emplace_back(std::make_unique<zkutil::Op::Remove>(task.entry_path + "/active/" + task.host_id_str, -1));
-    ops.emplace_back(std::make_unique<zkutil::Op::Create>(task.entry_path + "/finished/" + task.host_id_str,
-        task.execution_status.serializeText(), zookeeper->getDefaultACL(), zkutil::CreateMode::Persistent));
-
-    code = zookeeper->tryMulti(ops);
-    if (code != ZOK)
-        throw zkutil::KeeperException("Cannot commit executed task to ZooKeeper " + task.entry_name, code);
+    ops.emplace_back(std::make_unique<zkutil::Op::Remove>(active_node_path, -1));
+    ops.emplace_back(std::make_unique<zkutil::Op::Create>(finished_node_path, task.execution_status.serializeText(),
+                                                          zookeeper->getDefaultACL(), zkutil::CreateMode::Persistent));
+    zookeeper->multi(ops);
 }
 
 
@@ -617,10 +644,7 @@ void DDLWorker::processTaskAlter(
 
         bool alter_executed_by_replica = false;
         {
-            auto zookeeper_holder = std::make_shared<zkutil::ZooKeeperHolder>();
-            zookeeper_holder->initFromInstance(zookeeper);
-
-            zkutil::Lock lock(zookeeper_holder, shard_path, "lock", task.host_id_str);
+            auto lock = createSimpleZooKeeperLock(zookeeper, shard_path, "lock", task.host_id_str);
             std::mt19937 rng(std::hash<String>{}(task.host_id_str) + reinterpret_cast<intptr_t>(&rng));
 
             for (int num_tries = 0; num_tries < 10; ++num_tries)
@@ -631,7 +655,7 @@ void DDLWorker::processTaskAlter(
                     break;
                 }
 
-                if (lock.tryLock())
+                if (lock->tryLock())
                 {
                     tryExecuteQuery(rewritten_query, task, task.execution_status);
 
@@ -641,7 +665,7 @@ void DDLWorker::processTaskAlter(
                     }
 
                     zookeeper->create(is_executed_path, task.host_id_str, zkutil::CreateMode::Persistent);
-                    lock.unlock();
+                    lock->unlock();
                     alter_executed_by_replica = true;
                     break;
                 }
@@ -660,7 +684,7 @@ void DDLWorker::processTaskAlter(
 }
 
 
-void DDLWorker::cleanupQueue(const Strings * node_names_to_check)
+void DDLWorker::cleanupQueue()
 {
     /// Both ZK and Poco use Unix epoch
     size_t current_time_seconds = Poco::Timestamp().epochTime();
@@ -674,112 +698,71 @@ void DDLWorker::cleanupQueue(const Strings * node_names_to_check)
 
     LOG_DEBUG(log, "Cleaning queue");
 
-    Strings node_names_fetched = node_names_to_check ? Strings{} : zookeeper->getChildren(queue_dir);
-    const Strings & node_names = (node_names_to_check) ? *node_names_to_check : node_names_fetched;
+    Strings queue_nodes = zookeeper->getChildren(queue_dir);
+    filterAndSortQueueNodes(queue_nodes);
 
-    for (const String & node_name : node_names)
+    size_t num_outdated_nodes = (queue_nodes.size() > max_tasks_in_queue) ? queue_nodes.size() - max_tasks_in_queue : 0;
+    auto first_non_outdated_node = queue_nodes.begin() + num_outdated_nodes;
+
+    for (auto it = queue_nodes.cbegin(); it < queue_nodes.cend(); ++it)
     {
+        String node_name = *it;
         String node_path = queue_dir + "/" + node_name;
-        String lock_path = node_path + "/lock_write"; /// per-node lock to avoid concurrent cleaning
-        bool node_was_deleted = false;
+        String lock_path = node_path + "/lock";
 
-        auto delete_node = [&] ()
-        {
-            Strings childs = zookeeper->getChildren(node_path);
-            for (const String & child : childs)
-            {
-                if (child != "lock_write")
-                    zookeeper->removeRecursive(node_path + "/" + child);
-            }
-
-            zkutil::Ops ops;
-            ops.emplace_back(std::make_unique<zkutil::Op::Remove>(lock_path, -1));
-            ops.emplace_back(std::make_unique<zkutil::Op::Remove>(node_path, -1));
-            zookeeper->multi(ops);
-
-            node_was_deleted = true;
-        };
+        zkutil::Stat stat;
+        String dummy;
 
         try
         {
-            zkutil::Ops ops;
-            ops.emplace_back(std::make_unique<zkutil::Op::Check>(node_path, -1));
-            ops.emplace_back(std::make_unique<zkutil::Op::Create>(lock_path, host_fqdn_id, zookeeper->getDefaultACL(),
-                                                                  zkutil::CreateMode::Ephemeral));
-            auto code = zookeeper->tryMulti(ops);
-            if (code != ZOK)
+            /// To avoid concurrent checks and cleans
+            auto lock = createSimpleZooKeeperLock(zookeeper, node_path, "lock", host_fqdn_id);
+            if (!lock->tryLock())
+                continue;
+
+            auto delete_node = [&] ()
             {
-                if (code == ZNONODE)
+                Strings childs = zookeeper->getChildren(node_path);
+                for (const String & child : childs)
                 {
-                    /// Task node was deleted
-                    continue;
+                    if (child != "lock")
+                        zookeeper->tryRemoveRecursive(node_path + "/" + child);
                 }
-                else if (code == ZNODEEXISTS)
-                {
-                    /// Is it our lock?
-                    String owner;
-                    if (!zookeeper->tryGet(lock_path, owner) || owner != host_fqdn_id)
-                        continue;
-                }
-                else
-                    throw zkutil::KeeperException(code);
+
+                /// Remove the lock node and its parent atomically
+                zkutil::Ops ops;
+                ops.emplace_back(std::make_unique<zkutil::Op::Remove>(lock_path, -1));
+                ops.emplace_back(std::make_unique<zkutil::Op::Remove>(node_path, -1));
+                zookeeper->multi(ops);
+
+                lock->unlockAssumeLockNodeRemovedManually();
+            };
+
+            /// Skip if there are active nodes (it is weak guard)
+            if (zookeeper->tryGet(node_path + "/active", dummy, &stat) && stat.numChildren > 0)
+                continue;
+
+            /// Delete if too many (max_tasks_in_queue) task in queue
+            if (it < first_non_outdated_node)
+            {
+                LOG_INFO(log, "Task " << node_name << " is outdated, deleting it");
+
+                delete_node();
+                continue;
             }
 
-            SCOPE_EXIT({
-                if (!node_was_deleted && !zookeeper->expired())
-                {
-                    try
-                    {
-                        zookeeper->tryRemoveWithRetries(lock_path, -1);
-                    }
-                    catch (...)
-                    {
-                        tryLogCurrentException(log, "Can't remove lock for cleaning");
-                    }
-                }
-            });
-
-            zkutil::Stat stat;
-            String node_data = zookeeper->get(node_path, &stat);
-
+            zookeeper->get(node_path, &stat);
             size_t zookeeper_time_seconds = stat.mtime / zookeeper_time_resolution;
+
+            /// Delte if node lifetmie (task_max_lifetime) is expired
             if (zookeeper_time_seconds + task_max_lifetime < current_time_seconds)
             {
                 size_t lifetime_seconds = current_time_seconds - zookeeper_time_seconds;
                 LOG_INFO(log, "Lifetime of task " << node_name << " (" << lifetime_seconds << " sec.) is expired, deleting it");
+
                 delete_node();
                 continue;
             }
-
-            Strings active_hosts = zookeeper->getChildren(node_path + "/active");
-            if (!active_hosts.empty())
-                continue;
-
-            Strings finished_hosts = zookeeper->getChildren(node_path + "/finished");
-            DDLLogEntry entry;
-            entry.parse(node_data);
-
-            /// Not all nodes were finished
-            if (finished_hosts.size() < entry.hosts.size())
-                continue;
-
-            /// Could be childs that are not from host list
-            bool all_finished = true;
-            NameSet finished_hosts_set(finished_hosts.begin(), finished_hosts.end());
-            for (const HostID & host : entry.hosts)
-            {
-                if (!finished_hosts_set.count(host.toString()))
-                {
-                    all_finished = false;
-                    break;
-                }
-            }
-
-            if (all_finished)
-            {
-                LOG_INFO(log, "Task " << node_name << " had been executed by each host, deleting it");
-                delete_node();
-            }
         }
         catch (...)
         {
@@ -806,13 +789,35 @@ void DDLWorker::createStatusDirs(const std::string & node_path)
 String DDLWorker::enqueueQuery(DDLLogEntry & entry)
 {
     if (entry.hosts.empty())
-        return {};
+        throw Exception("Empty host list in a distributed DDL task", ErrorCodes::LOGICAL_ERROR);
 
     String query_path_prefix = queue_dir + "/query-";
     zookeeper->createAncestors(query_path_prefix);
 
-    String node_path = zookeeper->create(query_path_prefix, entry.toString(), zkutil::CreateMode::PersistentSequential);
-    createStatusDirs(node_path);
+    String node_path;
+    try
+    {
+        node_path = zookeeper->create(query_path_prefix, entry.toString(), zkutil::CreateMode::PersistentSequential);
+    }
+    catch (const zkutil::KeeperException & e)
+    {
+        /// TODO: This condition could be relaxed with additional post-checks
+        if (e.isTemporaryError())
+            throw Exception("Unknown status of distributed DDL task", ErrorCodes::UNKNOWN_STATUS_OF_DISTRIBUTED_DDL_TASK);
+
+        throw;
+    }
+
+    /// Optional step
+    try
+    {
+        createStatusDirs(node_path);
+    }
+    catch (...)
+    {
+        LOG_INFO(log, "An error occurred while creating auxiliary ZooKeeper directories in " << node_path << " . They will be created later"
+            << ". Error : " << getCurrentExceptionMessage(true));
+    }
 
     return node_path;
 }
diff --git a/dbms/src/Interpreters/DDLWorker.h b/dbms/src/Interpreters/DDLWorker.h
index 7c06f95093d..7fefc08ccdc 100644
--- a/dbms/src/Interpreters/DDLWorker.h
+++ b/dbms/src/Interpreters/DDLWorker.h
@@ -59,7 +59,7 @@ private:
 
 
     /// Checks and cleanups queue's nodes
-    void cleanupQueue(const Strings * node_names_to_check = nullptr);
+    void cleanupQueue();
 
 
     void createStatusDirs(const std::string & node_name);
@@ -93,10 +93,12 @@ private:
 
     size_t last_cleanup_time_seconds = 0;
 
-    /// Delete node if its age is greater than that
-    size_t task_max_lifetime = 7 * 24 * 60 * 60; // week (in seconds)
     /// Cleaning starts after new node event is received if the last cleaning wasn't made sooner than N seconds ago
     size_t cleanup_delay_period = 60; // minute (in seconds)
+    /// Delete node if its age is greater than that
+    size_t task_max_lifetime = 7 * 24 * 60 * 60; // week (in seconds)
+    /// How many tasks could be in the queue
+    size_t max_tasks_in_queue = 1000;
 
     friend class DDLQueryStatusInputSream;
     friend class DDLTask;
diff --git a/dbms/tests/integration/helpers/client.py b/dbms/tests/integration/helpers/client.py
index dc6e2dc2812..1d63765f324 100644
--- a/dbms/tests/integration/helpers/client.py
+++ b/dbms/tests/integration/helpers/client.py
@@ -8,7 +8,7 @@ class Client:
     def __init__(self, host, port=9000, command='/usr/bin/clickhouse-client'):
         self.host = host
         self.port = port
-        self.command = [command, '--host', self.host, '--port', str(self.port)]
+        self.command = [command, '--host', self.host, '--port', str(self.port), '--stacktrace']
 
 
     def query(self, sql, stdin=None, timeout=None):
diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py
index e0684b5b4d2..103e408f3f5 100644
--- a/dbms/tests/integration/helpers/cluster.py
+++ b/dbms/tests/integration/helpers/cluster.py
@@ -49,7 +49,8 @@ class ClickHouseCluster:
         self.is_up = False
 
 
-    def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macroses={}, with_zookeeper=False, clickhouse_path_dir=None):
+    def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macroses={}, with_zookeeper=False,
+        clickhouse_path_dir=None, hostname=None):
         """Add an instance to the cluster.
 
         name - the name of the instance directory and the value of the 'instance' macro in ClickHouse.
@@ -65,7 +66,10 @@ class ClickHouseCluster:
         if name in self.instances:
             raise Exception("Can\'t add instance `%s': there is already an instance with the same name!" % name)
 
-        instance = ClickHouseInstance(self, self.base_dir, name, config_dir, main_configs, user_configs, macroses, with_zookeeper, self.base_configs_dir, self.server_bin_path, clickhouse_path_dir)
+        instance = ClickHouseInstance(
+            self, self.base_dir, name, config_dir, main_configs, user_configs, macroses, with_zookeeper,
+            self.base_configs_dir, self.server_bin_path, clickhouse_path_dir, hostname=hostname)
+
         self.instances[name] = instance
         self.base_cmd.extend(['--file', instance.docker_compose_path])
         if with_zookeeper and not self.with_zookeeper:
@@ -135,7 +139,7 @@ version: '2'
 services:
     {name}:
         image: ubuntu:14.04
-        hostname: {name}
+        hostname: {hostname}
         user: '{uid}'
         volumes:
             - {binary_path}:/usr/bin/clickhouse:ro
@@ -153,12 +157,13 @@ services:
 class ClickHouseInstance:
     def __init__(
             self, cluster, base_path, name, custom_config_dir, custom_main_configs, custom_user_configs, macroses,
-            with_zookeeper, base_configs_dir, server_bin_path, clickhouse_path_dir):
+            with_zookeeper, base_configs_dir, server_bin_path, clickhouse_path_dir, hostname=None):
 
         self.name = name
         self.base_cmd = cluster.base_cmd[:]
         self.docker_id = cluster.get_instance_docker_id(self.name)
         self.cluster = cluster
+        self.hostname = hostname if hostname is not None else self.name
 
         self.custom_config_dir = p.abspath(p.join(base_path, custom_config_dir)) if custom_config_dir else None
         self.custom_main_config_paths = [p.abspath(p.join(base_path, c)) for c in custom_main_configs]
@@ -187,6 +192,16 @@ class ClickHouseInstance:
         return self.client.get_query_request(*args, **kwargs)
 
 
+    def exec_in_container(self, cmd, **kwargs):
+        container = self.get_docker_handle()
+        handle = self.docker_client.api.exec_create(container.id, cmd, **kwargs)
+        output = self.docker_client.api.exec_start(handle).decode('utf8')
+        exit_code = self.docker_client.api.exec_inspect(handle)['ExitCode']
+        if exit_code:
+            raise Exception('Cmd {} failed! Return code {}. Output {}'.format(' '.join(cmd), exit_code, output))
+        return output
+
+
     def get_docker_handle(self):
         return self.docker_client.containers.get(self.docker_id)
 
@@ -301,6 +316,7 @@ class ClickHouseInstance:
         with open(self.docker_compose_path, 'w') as docker_compose:
             docker_compose.write(DOCKER_COMPOSE_TEMPLATE.format(
                 name=self.name,
+                hostname=self.hostname,
                 uid=os.getuid(),
                 binary_path=self.server_bin_path,
                 configs_dir=configs_dir,
diff --git a/dbms/tests/integration/helpers/network.py b/dbms/tests/integration/helpers/network.py
index ccd21866ad4..1001ae9d477 100644
--- a/dbms/tests/integration/helpers/network.py
+++ b/dbms/tests/integration/helpers/network.py
@@ -56,6 +56,15 @@ class PartitionManager:
             rule = self._iptables_rules.pop()
             _NetworkManager.get().delete_iptables_rule(**rule)
 
+    def pop_rules(self):
+        res = self._iptables_rules[:]
+        self.heal_all()
+        return res
+
+    def push_rules(self, rules):
+        for rule in rules:
+            self._add_rule(rule)
+
 
     @staticmethod
     def _check_instance(instance):
@@ -77,6 +86,18 @@ class PartitionManager:
         self.heal_all()
 
 
+class PartitionManagerDisbaler:
+    def __init__(self, manager):
+        self.manager = manager
+        self.rules = self.manager.pop_rules()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.manager.push_rules(self.rules)
+
+
 class _NetworkManager:
     """Execute commands inside a container with access to network settings.
 
diff --git a/dbms/tests/integration/test_distributed_ddl/configs/config.d/cluster.xml b/dbms/tests/integration/test_distributed_ddl/configs/config.d/cluster.xml
deleted file mode 100644
index 1b2e9f75e49..00000000000
--- a/dbms/tests/integration/test_distributed_ddl/configs/config.d/cluster.xml
+++ /dev/null
@@ -1,28 +0,0 @@
-<yandex>
-<remote_servers>
-    <cluster>
-        <shard>
-            <internal_replication>true</internal_replication>
-            <replica>
-                <host>ch1</host>
-                <port>9000</port>
-            </replica>
-            <replica>
-                <host>ch2</host>
-                <port>9000</port>
-            </replica>
-        </shard>
-        <shard>
-            <internal_replication>true</internal_replication>
-            <replica>
-                <host>ch3</host>
-                <port>9000</port>
-            </replica>
-            <replica>
-                <host>ch4</host>
-                <port>9000</port>
-            </replica>
-        </shard>
-    </cluster>
-</remote_servers>
-</yandex>
\ No newline at end of file
diff --git a/dbms/tests/integration/test_distributed_ddl/configs/config.d/cluster_default_database.xml b/dbms/tests/integration/test_distributed_ddl/configs/config.d/cluster_default_database.xml
deleted file mode 100644
index ccc16846812..00000000000
--- a/dbms/tests/integration/test_distributed_ddl/configs/config.d/cluster_default_database.xml
+++ /dev/null
@@ -1,32 +0,0 @@
-<yandex>
-<remote_servers>
-    <cluster2>
-        <shard>
-            <internal_replication>true</internal_replication>
-            <replica>
-                <host>ch1</host>
-                <port>9000</port>
-                <default_database>default</default_database>
-            </replica>
-            <replica>
-                <host>ch2</host>
-                <port>9000</port>
-                <default_database>test2</default_database>
-            </replica>
-        </shard>
-        <shard>
-            <internal_replication>true</internal_replication>
-            <replica>
-                <host>ch3</host>
-                <port>9000</port>
-                <default_database>default</default_database>
-            </replica>
-            <replica>
-                <host>ch4</host>
-                <port>9000</port>
-                <default_database>test2</default_database>
-            </replica>
-        </shard>
-    </cluster2>
-</remote_servers>
-</yandex>
\ No newline at end of file
diff --git a/dbms/tests/integration/test_distributed_ddl/configs/config.d/cluster_no_replicas.xml b/dbms/tests/integration/test_distributed_ddl/configs/config.d/cluster_no_replicas.xml
deleted file mode 100644
index 5b3a9409b24..00000000000
--- a/dbms/tests/integration/test_distributed_ddl/configs/config.d/cluster_no_replicas.xml
+++ /dev/null
@@ -1,34 +0,0 @@
-<yandex>
-<remote_servers>
-    <cluster_no_replicas>
-        <shard>
-            <internal_replication>false</internal_replication>
-            <replica>
-                <host>ch1</host>
-                <port>9000</port>
-            </replica>
-        </shard>
-        <shard>
-            <internal_replication>false</internal_replication>
-            <replica>
-                <host>ch2</host>
-                <port>9000</port>
-            </replica>
-        </shard>
-        <shard>
-            <internal_replication>false</internal_replication>
-            <replica>
-                <host>ch3</host>
-                <port>9000</port>
-            </replica>
-        </shard>
-        <shard>
-            <internal_replication>false</internal_replication>
-            <replica>
-                <host>ch4</host>
-                <port>9000</port>
-            </replica>
-        </shard>
-    </cluster_no_replicas>
-</remote_servers>
-</yandex>
\ No newline at end of file
diff --git a/dbms/tests/integration/test_distributed_ddl/configs/config.d/cluster_without_internal_replication.xml b/dbms/tests/integration/test_distributed_ddl/configs/config.d/cluster_without_internal_replication.xml
deleted file mode 100644
index 9378a0c1272..00000000000
--- a/dbms/tests/integration/test_distributed_ddl/configs/config.d/cluster_without_internal_replication.xml
+++ /dev/null
@@ -1,28 +0,0 @@
-<yandex>
-<remote_servers>
-    <cluster_without_replication>
-        <shard>
-            <internal_replication>false</internal_replication>
-            <replica>
-                <host>ch1</host>
-                <port>9000</port>
-            </replica>
-            <replica>
-                <host>ch2</host>
-                <port>9000</port>
-            </replica>
-        </shard>
-        <shard>
-            <internal_replication>false</internal_replication>
-            <replica>
-                <host>ch3</host>
-                <port>9000</port>
-            </replica>
-            <replica>
-                <host>ch4</host>
-                <port>9000</port>
-            </replica>
-        </shard>
-    </cluster_without_replication>
-</remote_servers>
-</yandex>
\ No newline at end of file
diff --git a/dbms/tests/integration/test_distributed_ddl/configs/config.d/clusters.xml b/dbms/tests/integration/test_distributed_ddl/configs/config.d/clusters.xml
new file mode 100644
index 00000000000..9d944c70854
--- /dev/null
+++ b/dbms/tests/integration/test_distributed_ddl/configs/config.d/clusters.xml
@@ -0,0 +1,119 @@
+<yandex>
+<remote_servers>
+
+    <!-- Main cluster -->
+    <cluster>
+        <shard>
+            <internal_replication>true</internal_replication>
+            <replica>
+                <host>ch1</host>
+                <port>9000</port>
+            </replica>
+            <replica>
+                <host>ch2</host>
+                <port>9000</port>
+            </replica>
+        </shard>
+        <shard>
+            <internal_replication>true</internal_replication>
+            <replica>
+                <host>ch3</host>
+                <port>9000</port>
+            </replica>
+            <replica>
+                <host>ch4</host>
+                <port>9000</port>
+            </replica>
+        </shard>
+    </cluster>
+
+    <!-- Cluster with specified default database -->
+    <cluster2>
+        <shard>
+            <internal_replication>true</internal_replication>
+            <replica>
+                <host>ch1</host>
+                <port>9000</port>
+                <default_database>default</default_database>
+            </replica>
+            <replica>
+                <host>ch2</host>
+                <port>9000</port>
+                <default_database>test2</default_database>
+            </replica>
+        </shard>
+        <shard>
+            <internal_replication>true</internal_replication>
+            <replica>
+                <host>ch3</host>
+                <port>9000</port>
+                <default_database>default</default_database>
+            </replica>
+            <replica>
+                <host>ch4</host>
+                <port>9000</port>
+                <default_database>test2</default_database>
+            </replica>
+        </shard>
+    </cluster2>
+
+    <!-- Cluster without replicas -->
+    <cluster_no_replicas>
+        <shard>
+            <internal_replication>false</internal_replication>
+            <replica>
+                <host>ch1</host>
+                <port>9000</port>
+            </replica>
+        </shard>
+        <shard>
+            <internal_replication>false</internal_replication>
+            <replica>
+                <host>ch2</host>
+                <port>9000</port>
+            </replica>
+        </shard>
+        <shard>
+            <internal_replication>false</internal_replication>
+            <replica>
+                <host>ch3</host>
+                <port>9000</port>
+            </replica>
+        </shard>
+        <shard>
+            <internal_replication>false</internal_replication>
+            <replica>
+                <host>ch4</host>
+                <port>9000</port>
+            </replica>
+        </shard>
+    </cluster_no_replicas>
+
+    <!-- Cluster without internal replication -->
+    <cluster_without_replication>
+        <shard>
+            <internal_replication>false</internal_replication>
+            <replica>
+                <host>ch1</host>
+                <port>9000</port>
+            </replica>
+            <replica>
+                <host>ch2</host>
+                <port>9000</port>
+            </replica>
+        </shard>
+        <shard>
+            <internal_replication>false</internal_replication>
+            <replica>
+                <host>ch3</host>
+                <port>9000</port>
+            </replica>
+            <replica>
+                <host>ch4</host>
+                <port>9000</port>
+            </replica>
+        </shard>
+    </cluster_without_replication>
+
+</remote_servers>
+</yandex>
\ No newline at end of file
diff --git a/dbms/tests/integration/test_distributed_ddl/configs/config.d/ddl.xml b/dbms/tests/integration/test_distributed_ddl/configs/config.d/ddl.xml
index abad0dee450..046ac052142 100644
--- a/dbms/tests/integration/test_distributed_ddl/configs/config.d/ddl.xml
+++ b/dbms/tests/integration/test_distributed_ddl/configs/config.d/ddl.xml
@@ -1,5 +1,8 @@
 <yandex>
     <distributed_ddl>
         <path>/clickhouse/task_queue/ddl</path>
+        <max_tasks_in_queue>10</max_tasks_in_queue>
+        <task_max_lifetime>3600</task_max_lifetime>
+        <cleanup_delay_period>1</cleanup_delay_period>
     </distributed_ddl>
 </yandex>
\ No newline at end of file
diff --git a/dbms/tests/integration/test_distributed_ddl/test.py b/dbms/tests/integration/test_distributed_ddl/test.py
index 9b28633b980..6b57de5de60 100644
--- a/dbms/tests/integration/test_distributed_ddl/test.py
+++ b/dbms/tests/integration/test_distributed_ddl/test.py
@@ -4,7 +4,7 @@ import datetime
 import pytest
 
 from helpers.cluster import ClickHouseCluster
-from helpers.network import PartitionManager
+from helpers.network import PartitionManager, PartitionManagerDisbaler
 from helpers.test_tools import TSV
 
 
@@ -17,9 +17,9 @@ def check_all_hosts_sucesfully_executed(tsv_content, num_hosts=None):
     codes = [l[2] for l in M]
     messages = [l[3] for l in M]
 
-    assert len(hosts) == num_hosts and len(set(hosts)) == num_hosts, tsv_content
-    assert len(set(codes)) == 1, tsv_content
-    assert codes[0] == "0", tsv_content
+    assert len(hosts) == num_hosts and len(set(hosts)) == num_hosts, "\n" + tsv_content
+    assert len(set(codes)) == 1, "\n" + tsv_content
+    assert codes[0] == "0", "\n" + tsv_content
 
 
 def ddl_check_query(instance, query, num_hosts=None):
@@ -50,8 +50,19 @@ TEST_REPLICATED_ALTERS=True
 cluster = ClickHouseCluster(__file__)
 
 
-@pytest.fixture(scope="module")
-def started_cluster():
+def replace_domains_to_ip_addresses_in_cluster_config(instances_to_replace):
+    clusters_config = open(p.join(cluster.base_dir, 'configs/config.d/clusters.xml')).read()
+
+    for inst_name, inst in cluster.instances.items():
+        clusters_config = clusters_config.replace(inst_name, str(inst.ip_address))
+
+    for inst_name in instances_to_replace:
+        inst = cluster.instances[inst_name]
+        cluster.instances[inst_name].exec_in_container(['bash', '-c', 'echo "$NEW_CONFIG" > /etc/clickhouse-server/config.d/clusters.xml'], environment={"NEW_CONFIG": clusters_config}, privileged=True)
+        # print cluster.instances[inst_name].exec_in_container(['cat', "/etc/clickhouse-server/config.d/clusters.xml"])
+
+
+def init_cluster(cluster):
     try:
         for i in xrange(4):
             cluster.add_instance(
@@ -62,15 +73,19 @@ def started_cluster():
 
         cluster.start()
 
+        # Replace config files for testing ability to set host in DNS and IP formats
+        replace_domains_to_ip_addresses_in_cluster_config(['ch1', 'ch3'])
+
         # Select sacrifice instance to test CONNECTION_LOSS and server fail on it
         sacrifice = cluster.instances['ch4']
         cluster.pm_random_drops = PartitionManager()
-        cluster.pm_random_drops._add_rule({'probability': 0.05, 'destination': sacrifice.ip_address, 'source_port': 2181, 'action': 'REJECT --reject-with tcp-reset'})
-        cluster.pm_random_drops._add_rule({'probability': 0.05, 'source': sacrifice.ip_address, 'destination_port': 2181, 'action': 'REJECT --reject-with tcp-reset'})
+        cluster.pm_random_drops._add_rule({'probability': 0.01, 'destination': sacrifice.ip_address, 'source_port': 2181, 'action': 'REJECT --reject-with tcp-reset'})
+        cluster.pm_random_drops._add_rule({'probability': 0.01, 'source': sacrifice.ip_address, 'destination_port': 2181, 'action': 'REJECT --reject-with tcp-reset'})
 
         # Initialize databases and service tables
         instance = cluster.instances['ch1']
 
+        instance.query("SELECT 1")
         ddl_check_query(instance, """
 CREATE TABLE IF NOT EXISTS all_tables ON CLUSTER 'cluster_no_replicas'
     (database String, name String, engine String, metadata_modification_time DateTime)
@@ -79,20 +94,31 @@ CREATE TABLE IF NOT EXISTS all_tables ON CLUSTER 'cluster_no_replicas'
 
         ddl_check_query(instance, "CREATE DATABASE IF NOT EXISTS test ON CLUSTER 'cluster'")
 
+    except Exception as e:
+        print e
+        raise
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        init_cluster(cluster)
+
         yield cluster
 
+        instance = cluster.instances['ch1']
         ddl_check_query(instance, "DROP DATABASE test ON CLUSTER 'cluster'")
         ddl_check_query(instance, "DROP DATABASE IF EXISTS test2 ON CLUSTER 'cluster'")
 
-    finally:
-        # Remove iptables rules for sacrifice instance
-        cluster.pm_random_drops.heal_all()
-
         # Check query log to ensure that DDL queries are not executed twice
         time.sleep(1.5)
         for instance in cluster.instances.values():
             ddl_check_there_are_no_dublicates(instance)
 
+    finally:
+        # Remove iptables rules for sacrifice instance
+        cluster.pm_random_drops.heal_all()
+
         #cluster.shutdown()
 
 
@@ -163,10 +189,14 @@ def test_replicated_alters(started_cluster):
     if not TEST_REPLICATED_ALTERS:
         return
 
+    # Temporarily disable random ZK packet drops, they might broke creation if ReplicatedMergeTree replicas
+    firewall_drops_rules = cluster.pm_random_drops.pop_rules()
+
     ddl_check_query(instance, """
 CREATE TABLE IF NOT EXISTS merge ON CLUSTER cluster (p Date, i Int32)
 ENGINE = ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/hits', '{replica}', p, p, 1)
 """)
+
     ddl_check_query(instance, """
 CREATE TABLE IF NOT EXISTS all_merge_32 ON CLUSTER cluster (p Date, i Int32)
 ENGINE = Distributed(cluster, default, merge, i)
@@ -200,6 +230,10 @@ ENGINE = Distributed(cluster, default, merge, i)
     assert TSV(instance.query("SELECT i, s FROM all_merge_64 ORDER BY i")) == TSV(''.join(['{}\t{}\n'.format(x,x) for x in xrange(4)]))
 
     ddl_check_query(instance, "DROP TABLE merge ON CLUSTER cluster")
+
+    # Enable random ZK packet drops
+    cluster.pm_random_drops.push_rules(firewall_drops_rules)
+
     ddl_check_query(instance, "DROP TABLE all_merge_32 ON CLUSTER cluster")
     ddl_check_query(instance, "DROP TABLE all_merge_64 ON CLUSTER cluster")
 

From cdf95ab308a8a759b412b88d99efc10dfcb8d27e Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Wed, 2 Aug 2017 23:33:29 +0300
Subject: [PATCH 143/281] Fixed race condition in case of different cluster
 definitions. [#CLICKHOUSE-3128]

---
 dbms/src/Interpreters/DDLWorker.cpp | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp
index 7aa02151aeb..32a2d33fa44 100644
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@@ -421,7 +421,7 @@ void DDLWorker::parseQueryAndResolveHost(DDLTask & task)
             + " in cluster " + task.cluster_name + ", but there are no such cluster here.", ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION);
     }
 
-    /// Try find host from task host list in cluster
+    /// Try to find host from task host list in cluster
     /// At the first, try find exact match (host name and ports should be literally equal)
     /// If the attempt fails, try find it resolving host name of each instance
     const auto & shards = task.cluster->getShardsWithFailoverAddresses();
@@ -629,16 +629,20 @@ void DDLWorker::processTaskAlter(
         if (!context.getSettingsRef().distributed_ddl_allow_replicated_alter)
             throw Exception("Distributed DDL alters don't work properly yet", ErrorCodes::NOT_IMPLEMENTED);
 
+        /// Generate unique name for shard node, it will be used to execute the query by only single host
+        /// Shard node name has format 'replica_name1,replica_name2,...,replica_nameN'
+        /// Where replica_name is 'escape(replica_ip_address):replica_port'
+        /// FIXME: this replica_name could be changed after replica restart
         Strings replica_names;
-        for (const auto & address : task.cluster->getShardsAddresses().at(task.host_shard_num))
-            replica_names.emplace_back(address.toString());
+        for (const Cluster::Address & address : task.cluster->getShardsAddresses().at(task.host_shard_num))
+            replica_names.emplace_back(address.resolved_address.host().toString());
         std::sort(replica_names.begin(), replica_names.end());
 
-        String shard_dir_name;
+        String shard_node_name;
         for (auto it = replica_names.begin(); it != replica_names.end(); ++it)
-            shard_dir_name += *it + (std::next(it) != replica_names.end() ? "," : "");
+            shard_node_name += *it + (std::next(it) != replica_names.end() ? "," : "");
 
-        String shard_path = node_path + "/shards/" + shard_dir_name;
+        String shard_path = node_path + "/shards/" + shard_node_name;
         String is_executed_path = shard_path + "/executed";
         zookeeper->createAncestors(shard_path + "/");
 

From 183c55676ec7d3bce1b9f71e850289129c393449 Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Wed, 2 Aug 2017 23:54:41 +0300
Subject: [PATCH 144/281] Fixed ON CLUSTER DDL for Views. [#CLICKHOUSE-3128]

---
 dbms/src/Parsers/ParserCreateQuery.cpp              | 6 ++++++
 dbms/tests/integration/test_distributed_ddl/test.py | 8 ++++++++
 2 files changed, 14 insertions(+)

diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp
index e3abacb249e..f4f939ff01e 100644
--- a/dbms/src/Parsers/ParserCreateQuery.cpp
+++ b/dbms/src/Parsers/ParserCreateQuery.cpp
@@ -298,6 +298,12 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
                 return false;
         }
 
+        if (ParserKeyword{"ON"}.ignore(pos, expected))
+        {
+            if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected))
+                return false;
+        }
+
         /// Optional - a list of columns can be specified. It must fully comply with SELECT.
         if (s_lparen.ignore(pos, expected))
         {
diff --git a/dbms/tests/integration/test_distributed_ddl/test.py b/dbms/tests/integration/test_distributed_ddl/test.py
index 6b57de5de60..8fb741e0729 100644
--- a/dbms/tests/integration/test_distributed_ddl/test.py
+++ b/dbms/tests/integration/test_distributed_ddl/test.py
@@ -136,6 +136,14 @@ def test_default_database(started_cluster):
     ddl_check_query(instance, "DROP DATABASE IF EXISTS test2 ON CLUSTER 'cluster'")
 
 
+def test_create_view(started_cluster):
+    instance = cluster.instances['ch3']
+    ddl_check_query(instance, "CREATE VIEW test.super_simple_view ON CLUSTER 'cluster' AS SELECT * FROM system.numbers")
+    ddl_check_query(instance, "CREATE MATERIALIZED VIEW test.simple_mat_view ON CLUSTER 'cluster' ENGINE = Memory AS SELECT * FROM system.numbers")
+    ddl_check_query(instance, "DROP TABLE test.simple_mat_view ON CLUSTER 'cluster'")
+    ddl_check_query(instance, "DROP TABLE test.super_simple_view ON CLUSTER 'cluster'")
+
+
 def test_on_server_fail(started_cluster):
     instance = cluster.instances['ch1']
     kill_instance = cluster.instances['ch2']

From 4ef5d14722241a5f198f77fd358b6569e751c8f6 Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Thu, 3 Aug 2017 00:37:04 +0300
Subject: [PATCH 145/281] Add distributed_ddl_task_timeout setting.
 [#CLICKHOUSE-3128]

---
 dbms/src/Interpreters/DDLWorker.cpp | 10 ++++++----
 dbms/src/Interpreters/Settings.h    |  4 +++-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp
index 32a2d33fa44..05c0f568215 100644
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@@ -920,6 +920,8 @@ public:
             waiting_hosts.emplace(host.toString());
 
         setTotalRowsApprox(entry.hosts.size());
+
+        timeout_seconds = context.getSettingsRef().distributed_ddl_task_timeout;
     }
 
     String getName() const override
@@ -932,8 +934,6 @@ public:
         return "DDLQueryStatusInputSream(" + node_path + ")";
     }
 
-    static constexpr size_t timeout_seconds = 120;
-
     Block readImpl() override
     {
         Block res;
@@ -949,7 +949,7 @@ public:
                 return res;
 
             auto elapsed_seconds = watch.elapsedSeconds();
-            if (elapsed_seconds > timeout_seconds)
+            if (timeout_seconds >= 0 && elapsed_seconds > timeout_seconds)
                 throw Exception("Watching query is executing too long (" + toString(std::round(elapsed_seconds)) + " sec.)", ErrorCodes::TIMEOUT_EXCEEDED);
 
             if (num_hosts_finished != 0 || try_number != 0)
@@ -1051,6 +1051,8 @@ private:
     NameSet finished_hosts; /// finished hosts from host list
     NameSet ignoring_hosts; /// appeared hosts that are not in hosts list
     size_t num_hosts_finished = 0;
+
+    Int64 timeout_seconds = 120;
 };
 
 
@@ -1090,7 +1092,7 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, Context & context)
     String node_path = ddl_worker.enqueueQuery(entry);
 
     BlockIO io;
-    if (node_path.empty())
+    if (context.getSettingsRef().distributed_ddl_task_timeout == 0)
         return io;
 
     auto stream = std::make_shared<DDLQueryStatusInputSream>(node_path, entry, context);
diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h
index 8df32185146..81b96ef53e4 100644
--- a/dbms/src/Interpreters/Settings.h
+++ b/dbms/src/Interpreters/Settings.h
@@ -291,7 +291,9 @@ struct Settings
     /** Timeout for insert query into distributed. Setting is used only with insert_distributed_sync enabled. \
      *  Zero value means no timeout. \
      */ \
-    M(SettingUInt64, insert_distributed_timeout, 0)
+    M(SettingUInt64, insert_distributed_timeout, 0) \
+    /* Timeout for DDL query responses from all hosts in cluster. Negative value means infinite. */ \
+    M(SettingInt64, distributed_ddl_task_timeout, 120)
 
 
     /// Possible limits for query execution.

From a34ab8201a201d349215a6e919687da38884261c Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Thu, 3 Aug 2017 20:00:41 +0300
Subject: [PATCH 146/281] Add FORMAT section for DDL queries CREATE, ALTER,
 RENAME, DROP. [#CLICKHOUSE-3128]

---
 dbms/src/Parsers/ASTAlterQuery.cpp             | 11 ++++++-----
 dbms/src/Parsers/ASTAlterQuery.h               |  9 +++++----
 dbms/src/Parsers/ASTCreateQuery.h              |  9 ++++++---
 dbms/src/Parsers/ASTDropQuery.h                | 16 +++++++++++-----
 dbms/src/Parsers/ASTKillQueryQuery.h           |  4 ++--
 dbms/src/Parsers/ASTQueryWithOutput.cpp        |  2 +-
 dbms/src/Parsers/ASTQueryWithOutput.h          |  2 +-
 dbms/src/Parsers/ASTRenameQuery.h              | 18 ++++++++++++------
 dbms/src/Parsers/ParserQuery.cpp               |  7 -------
 dbms/src/Parsers/ParserQueryWithOutput.cpp     | 15 ++++++++++++---
 .../integration/test_distributed_ddl/test.py   | 18 +++++++++++-------
 11 files changed, 67 insertions(+), 44 deletions(-)

diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp
index a9105c44a53..a7d4f4d9cd8 100644
--- a/dbms/src/Parsers/ASTAlterQuery.cpp
+++ b/dbms/src/Parsers/ASTAlterQuery.cpp
@@ -10,7 +10,7 @@ namespace ErrorCodes
     extern const int UNEXPECTED_AST_STRUCTURE;
 }
 
-ASTAlterQuery::Parameters::Parameters() : type(NO_TYPE) {}
+ASTAlterQuery::Parameters::Parameters() {}
 
 void ASTAlterQuery::Parameters::clone(Parameters & p) const
 {
@@ -42,7 +42,7 @@ void ASTAlterQuery::addParameters(const Parameters & params)
         children.push_back(params.primary_key);
 }
 
-ASTAlterQuery::ASTAlterQuery(StringRange range_) : IAST(range_)
+ASTAlterQuery::ASTAlterQuery(StringRange range_) : ASTQueryWithOutput(range_)
 {
 }
 
@@ -57,13 +57,14 @@ ASTPtr ASTAlterQuery::clone() const
     auto res = std::make_shared<ASTAlterQuery>(*this);
     for (ParameterContainer::size_type i = 0; i < parameters.size(); ++i)
         parameters[i].clone(res->parameters[i]);
+    cloneOutputOptions(*res);
     return res;
 }
 
 ASTPtr ASTAlterQuery::getRewrittenASTWithoutOnCluster(const std::string & new_database) const
 {
     auto query_ptr = clone();
-    ASTAlterQuery & query = static_cast<ASTAlterQuery &>(*query_ptr);
+    auto & query = static_cast<ASTAlterQuery &>(*query_ptr);
 
     query.cluster.clear();
     if (query.database.empty())
@@ -72,11 +73,11 @@ ASTPtr ASTAlterQuery::getRewrittenASTWithoutOnCluster(const std::string & new_da
     return query_ptr;
 }
 
-void ASTAlterQuery::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
+void ASTAlterQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
 {
     frame.need_parens = false;
 
-    std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' ');
+    std::string indent_str = settings.one_line ? "" : std::string(4u * frame.indent, ' ');
 
     settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ALTER TABLE " << (settings.hilite ? hilite_none : "");
 
diff --git a/dbms/src/Parsers/ASTAlterQuery.h b/dbms/src/Parsers/ASTAlterQuery.h
index 932274b777f..f49bc4c1435 100644
--- a/dbms/src/Parsers/ASTAlterQuery.h
+++ b/dbms/src/Parsers/ASTAlterQuery.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Parsers/IAST.h>
+#include <Parsers/ASTQueryWithOutput.h>
 #include <Parsers/ASTQueryWithOnCluster.h>
 
 
@@ -19,7 +20,7 @@ namespace DB
  *              [COORDINATE WITH 'coordinator_id']
  */
 
-class ASTAlterQuery : public IAST, public ASTQueryWithOnCluster
+class ASTAlterQuery : public ASTQueryWithOutput, public ASTQueryWithOnCluster
 {
 public:
     enum ParameterType
@@ -96,17 +97,17 @@ public:
 
     void addParameters(const Parameters & params);
 
-    ASTAlterQuery(StringRange range_ = StringRange());
+    explicit ASTAlterQuery(StringRange range_ = StringRange());
 
     /** Get the text that identifies this element. */
     String getID() const override;
 
     ASTPtr clone() const override;
 
-    ASTPtr getRewrittenASTWithoutOnCluster(const std::string & new_database = {}) const override;
+    ASTPtr getRewrittenASTWithoutOnCluster(const std::string & new_database) const override;
 
 protected:
-    void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
+    void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
 };
 
 }
diff --git a/dbms/src/Parsers/ASTCreateQuery.h b/dbms/src/Parsers/ASTCreateQuery.h
index d1020b57df5..209c2fb9b0f 100644
--- a/dbms/src/Parsers/ASTCreateQuery.h
+++ b/dbms/src/Parsers/ASTCreateQuery.h
@@ -2,6 +2,7 @@
 
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTFunction.h>
+#include <Parsers/ASTQueryWithOutput.h>
 #include <Parsers/ASTQueryWithOnCluster.h>
 
 
@@ -10,7 +11,7 @@ namespace DB
 
 
 /// CREATE TABLE or ATTACH TABLE query
-class ASTCreateQuery : public IAST, public ASTQueryWithOnCluster
+class ASTCreateQuery : public ASTQueryWithOutput, public ASTQueryWithOnCluster
 {
 public:
     bool attach{false};    /// Query ATTACH TABLE, not CREATE TABLE.
@@ -29,7 +30,7 @@ public:
     ASTPtr select;
 
     ASTCreateQuery() = default;
-    ASTCreateQuery(const StringRange range_) : IAST(range_) {}
+    ASTCreateQuery(const StringRange range_) : ASTQueryWithOutput(range_) {}
 
     /** Get the text that identifies this element. */
     String getID() const override { return (attach ? "AttachQuery_" : "CreateQuery_") + database + "_" + table; };
@@ -44,6 +45,8 @@ public:
         if (select)         { res->select = select->clone();                res->children.push_back(res->select); }
         if (inner_storage)  { res->inner_storage = inner_storage->clone();  res->children.push_back(res->inner_storage); }
 
+        cloneOutputOptions(*res);
+
         return res;
     }
 
@@ -60,7 +63,7 @@ public:
     }
 
 protected:
-    void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override
+    void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override
     {
         frame.need_parens = false;
 
diff --git a/dbms/src/Parsers/ASTDropQuery.h b/dbms/src/Parsers/ASTDropQuery.h
index b10f80d876c..8f4a7587ca8 100644
--- a/dbms/src/Parsers/ASTDropQuery.h
+++ b/dbms/src/Parsers/ASTDropQuery.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Parsers/IAST.h>
+#include <Parsers/ASTQueryWithOutput.h>
 #include <Parsers/ASTQueryWithOnCluster.h>
 
 namespace DB
@@ -9,7 +10,7 @@ namespace DB
 
 /** DROP query
   */
-class ASTDropQuery : public IAST, public ASTQueryWithOnCluster
+class ASTDropQuery : public ASTQueryWithOutput, public ASTQueryWithOnCluster
 {
 public:
     bool detach{false};    /// DETACH query, not DROP.
@@ -18,17 +19,22 @@ public:
     String table;
 
     ASTDropQuery() = default;
-    ASTDropQuery(const StringRange range_) : IAST(range_) {}
+    explicit ASTDropQuery(const StringRange range_) : ASTQueryWithOutput(range_) {}
 
     /** Get the text that identifies this element. */
     String getID() const override { return (detach ? "DetachQuery_" : "DropQuery_") + database + "_" + table; };
 
-    ASTPtr clone() const override { return std::make_shared<ASTDropQuery>(*this); }
+    ASTPtr clone() const override
+    {
+        auto res = std::make_shared<ASTDropQuery>(*this);
+        cloneOutputOptions(*res);
+        return res;
+    }
 
     ASTPtr getRewrittenASTWithoutOnCluster(const std::string & new_database) const override
     {
         auto query_ptr = clone();
-        ASTDropQuery & query = static_cast<ASTDropQuery &>(*query_ptr);
+        auto & query = static_cast<ASTDropQuery &>(*query_ptr);
 
         query.cluster.clear();
         if (query.database.empty())
@@ -38,7 +44,7 @@ public:
     }
 
 protected:
-    void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override
+    void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override
     {
         if (table.empty() && !database.empty())
         {
diff --git a/dbms/src/Parsers/ASTKillQueryQuery.h b/dbms/src/Parsers/ASTKillQueryQuery.h
index 5aca8e07588..e28c97e4baf 100644
--- a/dbms/src/Parsers/ASTKillQueryQuery.h
+++ b/dbms/src/Parsers/ASTKillQueryQuery.h
@@ -8,8 +8,8 @@ class ASTKillQueryQuery : public ASTQueryWithOutput
 {
 public:
     ASTPtr where_expression;    // expression to filter processes from system.processes table
-    bool sync = false;            // SYNC or ASYNC mode
-    bool test = false;            // does it TEST mode? (doesn't cancel queries just checks and shows them)
+    bool sync = false;          // SYNC or ASYNC mode
+    bool test = false;          // does it TEST mode? (doesn't cancel queries just checks and shows them)
 
     ASTKillQueryQuery() = default;
 
diff --git a/dbms/src/Parsers/ASTQueryWithOutput.cpp b/dbms/src/Parsers/ASTQueryWithOutput.cpp
index 5589702ac77..7548ac0cc54 100644
--- a/dbms/src/Parsers/ASTQueryWithOutput.cpp
+++ b/dbms/src/Parsers/ASTQueryWithOutput.cpp
@@ -21,7 +21,7 @@ void ASTQueryWithOutput::formatImpl(const FormatSettings & s, FormatState & stat
 {
     formatQueryImpl(s, state, frame);
 
-    std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' ');
+    std::string indent_str = s.one_line ? "" : std::string(4u * frame.indent, ' ');
 
     if (out_file)
     {
diff --git a/dbms/src/Parsers/ASTQueryWithOutput.h b/dbms/src/Parsers/ASTQueryWithOutput.h
index d4aad181ae2..587ca1ee174 100644
--- a/dbms/src/Parsers/ASTQueryWithOutput.h
+++ b/dbms/src/Parsers/ASTQueryWithOutput.h
@@ -15,7 +15,7 @@ public:
     ASTPtr format;
 
     ASTQueryWithOutput() = default;
-    ASTQueryWithOutput(const StringRange range_) : IAST(range_) {}
+    explicit ASTQueryWithOutput(const StringRange range_) : IAST(range_) {}
 
 protected:
     /// NOTE: call this helper at the end of the clone() method of descendant class.
diff --git a/dbms/src/Parsers/ASTRenameQuery.h b/dbms/src/Parsers/ASTRenameQuery.h
index 4af43849345..c1fb5299c43 100644
--- a/dbms/src/Parsers/ASTRenameQuery.h
+++ b/dbms/src/Parsers/ASTRenameQuery.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Parsers/IAST.h>
+#include <Parsers/ASTQueryWithOutput.h>
 #include <Parsers/ASTQueryWithOnCluster.h>
 
 namespace DB
@@ -9,7 +10,7 @@ namespace DB
 
 /** RENAME query
   */
-class ASTRenameQuery : public IAST, public ASTQueryWithOnCluster
+class ASTRenameQuery : public ASTQueryWithOutput, public ASTQueryWithOnCluster
 {
 public:
     struct Table
@@ -28,17 +29,22 @@ public:
     Elements elements;
 
     ASTRenameQuery() = default;
-    ASTRenameQuery(const StringRange range_) : IAST(range_) {}
+    explicit ASTRenameQuery(const StringRange range_) : ASTQueryWithOutput(range_) {}
 
     /** Get the text that identifies this element. */
     String getID() const override { return "Rename"; };
 
-    ASTPtr clone() const override { return std::make_shared<ASTRenameQuery>(*this); }
+    ASTPtr clone() const override
+    {
+        auto res = std::make_shared<ASTRenameQuery>(*this);
+        cloneOutputOptions(*res);
+        return res;
+    }
 
-    ASTPtr getRewrittenASTWithoutOnCluster(const std::string & new_database = {}) const override
+    ASTPtr getRewrittenASTWithoutOnCluster(const std::string & new_database) const override
     {
         auto query_ptr = clone();
-        ASTRenameQuery & query = static_cast<ASTRenameQuery &>(*query_ptr);
+        auto & query = static_cast<ASTRenameQuery &>(*query_ptr);
 
         query.cluster.clear();
         for (Element & elem : query.elements)
@@ -53,7 +59,7 @@ public:
     }
 
 protected:
-    void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override
+    void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override
     {
         settings.ostr << (settings.hilite ? hilite_keyword : "") << "RENAME TABLE " << (settings.hilite ? hilite_none : "");
 
diff --git a/dbms/src/Parsers/ParserQuery.cpp b/dbms/src/Parsers/ParserQuery.cpp
index 3eb748eb88d..6a93110c143 100644
--- a/dbms/src/Parsers/ParserQuery.cpp
+++ b/dbms/src/Parsers/ParserQuery.cpp
@@ -18,20 +18,13 @@ bool ParserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     ParserQueryWithOutput query_with_output_p;
     ParserInsertQuery insert_p(end);
-    ParserCreateQuery create_p;
-    ParserRenameQuery rename_p;
     ParserDropQuery drop_p;
-    ParserAlterQuery alter_p;
     ParserUseQuery use_p;
     ParserSetQuery set_p;
     ParserOptimizeQuery optimize_p;
 
     bool res = query_with_output_p.parse(pos, node, expected)
         || insert_p.parse(pos, node, expected)
-        || create_p.parse(pos, node, expected)
-        || rename_p.parse(pos, node, expected)
-        || drop_p.parse(pos, node, expected)
-        || alter_p.parse(pos, node, expected)
         || use_p.parse(pos, node, expected)
         || set_p.parse(pos, node, expected)
         || optimize_p.parse(pos, node, expected);
diff --git a/dbms/src/Parsers/ParserQueryWithOutput.cpp b/dbms/src/Parsers/ParserQueryWithOutput.cpp
index e84f838230b..342754026dc 100644
--- a/dbms/src/Parsers/ParserQueryWithOutput.cpp
+++ b/dbms/src/Parsers/ParserQueryWithOutput.cpp
@@ -4,10 +4,11 @@
 #include <Parsers/ParserTablePropertiesQuery.h>
 #include <Parsers/ParserShowProcesslistQuery.h>
 #include <Parsers/ParserCheckQuery.h>
+#include <Parsers/ParserCreateQuery.h>
+#include <Parsers/ParserRenameQuery.h>
+#include <Parsers/ParserAlterQuery.h>
+#include <Parsers/ParserDropQuery.h>
 #include <Parsers/ParserKillQueryQuery.h>
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/ExpressionElementParsers.h>
-#include <Common/typeid_cast.h>
 
 
 namespace DB
@@ -19,6 +20,10 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
     ParserSelectQuery select_p;
     ParserTablePropertiesQuery table_p;
     ParserShowProcesslistQuery show_processlist_p;
+    ParserCreateQuery create_p;
+    ParserAlterQuery alter_p;
+    ParserRenameQuery rename_p;
+    ParserDropQuery drop_p;
     ParserCheckQuery check_p;
     ParserKillQueryQuery kill_query_p;
 
@@ -28,6 +33,10 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
         || show_tables_p.parse(pos, query, expected)
         || table_p.parse(pos, query, expected)
         || show_processlist_p.parse(pos, query, expected)
+        || create_p.parse(pos, query, expected)
+        || alter_p.parse(pos, query, expected)
+        || rename_p.parse(pos, query, expected)
+        || drop_p.parse(pos, query, expected)
         || check_p.parse(pos, query, expected)
         || kill_query_p.parse(pos, query, expected);
 
diff --git a/dbms/tests/integration/test_distributed_ddl/test.py b/dbms/tests/integration/test_distributed_ddl/test.py
index 8fb741e0729..a59cc686cd3 100644
--- a/dbms/tests/integration/test_distributed_ddl/test.py
+++ b/dbms/tests/integration/test_distributed_ddl/test.py
@@ -125,8 +125,8 @@ def started_cluster():
 def test_default_database(started_cluster):
     instance = cluster.instances['ch3']
 
-    ddl_check_query(instance, "CREATE DATABASE IF NOT EXISTS test2 ON CLUSTER 'cluster'")
-    ddl_check_query(instance, "DROP TABLE IF EXISTS null ON CLUSTER 'cluster'")
+    ddl_check_query(instance, "CREATE DATABASE IF NOT EXISTS test2 ON CLUSTER 'cluster' FORMAT TSV")
+    ddl_check_query(instance, "DROP TABLE IF EXISTS null ON CLUSTER 'cluster' FORMAT TSV")
     ddl_check_query(instance, "CREATE TABLE null ON CLUSTER 'cluster2' (s String DEFAULT 'escape\t\nme') ENGINE = Null")
 
     contents = instance.query("SELECT hostName() AS h, database FROM all_tables WHERE name = 'null' ORDER BY h")
@@ -138,10 +138,14 @@ def test_default_database(started_cluster):
 
 def test_create_view(started_cluster):
     instance = cluster.instances['ch3']
-    ddl_check_query(instance, "CREATE VIEW test.super_simple_view ON CLUSTER 'cluster' AS SELECT * FROM system.numbers")
-    ddl_check_query(instance, "CREATE MATERIALIZED VIEW test.simple_mat_view ON CLUSTER 'cluster' ENGINE = Memory AS SELECT * FROM system.numbers")
-    ddl_check_query(instance, "DROP TABLE test.simple_mat_view ON CLUSTER 'cluster'")
-    ddl_check_query(instance, "DROP TABLE test.super_simple_view ON CLUSTER 'cluster'")
+    ddl_check_query(instance, "CREATE VIEW test.super_simple_view ON CLUSTER 'cluster' AS SELECT * FROM system.numbers FORMAT TSV")
+    ddl_check_query(instance, "CREATE MATERIALIZED VIEW test.simple_mat_view ON CLUSTER 'cluster' ENGINE = Memory AS SELECT * FROM system.numbers FORMAT TSV")
+    ddl_check_query(instance, "DROP TABLE test.simple_mat_view ON CLUSTER 'cluster' FORMAT TSV")
+    ddl_check_query(instance, "DROP TABLE test.super_simple_view2 ON CLUSTER 'cluster' FORMAT TSV")
+
+    ddl_check_query(instance, "CREATE TABLE test.super_simple (i Int8) ON CLUSTER 'cluster'")
+    ddl_check_query(instance, "RENAME TABLE test.super_simple TO test.super_simple2 ON CLUSTER 'cluster' FORMAT TSV")
+    ddl_check_query(instance, "DROP TABLE test.super_simple2 ON CLUSTER 'cluster'")
 
 
 def test_on_server_fail(started_cluster):
@@ -274,7 +278,7 @@ ENGINE = Distributed(cluster_without_replication, default, merge, i)
 
 
     ddl_check_query(instance, "ALTER TABLE merge ON CLUSTER cluster_without_replication MODIFY COLUMN i Int64")
-    ddl_check_query(instance, "ALTER TABLE merge ON CLUSTER cluster_without_replication ADD COLUMN s DEFAULT toString(i)")
+    ddl_check_query(instance, "ALTER TABLE merge ON CLUSTER cluster_without_replication ADD COLUMN s DEFAULT toString(i) FORMAT TSV")
 
     assert TSV(instance.query("SELECT i, s FROM all_merge_64 ORDER BY i")) == TSV(''.join(['{}\t{}\n'.format(x,x) for x in xrange(4)]))
 

From f815498e34784ad57178ece17716f3f8e9b8e5d8 Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Thu, 3 Aug 2017 20:56:25 +0300
Subject: [PATCH 147/281] Fix error codes skipping. [#CLICKHOUSE-2]

---
 dbms/src/Databases/DatabaseOrdinary.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp
index 1416fd36dca..609b20c6383 100644
--- a/dbms/src/Databases/DatabaseOrdinary.cpp
+++ b/dbms/src/Databases/DatabaseOrdinary.cpp
@@ -351,6 +351,10 @@ void DatabaseOrdinary::renameTable(
             to_database_concrete->name,
             to_table_name);
     }
+    catch (const Exception & e)
+    {
+        throw;
+    }
     catch (const Poco::Exception & e)
     {
         /// More good diagnostics.

From d9ce96f8f535e4dab4cf707c880f08b73239fbe7 Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Thu, 10 Aug 2017 22:12:52 +0300
Subject: [PATCH 148/281] Fixed test and misspellings. [#CLICKHOUSE-3207]

---
 dbms/src/Interpreters/DDLWorker.cpp           | 63 +++++++++++--------
 dbms/src/Interpreters/DDLWorker.h             | 16 ++---
 .../configs/config.d/ddl.xml                  |  2 +-
 .../integration/test_distributed_ddl/test.py  |  4 +-
 4 files changed, 46 insertions(+), 39 deletions(-)

diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp
index 05c0f568215..97503e4542c 100644
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@@ -33,10 +33,6 @@
 #include <Common/isLocalAddress.h>
 #include <Poco/Timestamp.h>
 
-#include <ext/scope_guard.h>
-
-#include <experimental/optional>
-
 
 namespace DB
 {
@@ -209,12 +205,12 @@ static bool isSupportedAlterType(int type)
         ASTAlterQuery::DROP_PARTITION
     };
 
-    return supported_alter_types.count(type);
+    return supported_alter_types.count(type) != 0;
 }
 
 
 DDLWorker::DDLWorker(const std::string & zk_root_dir, Context & context_, const Poco::Util::AbstractConfiguration * config, const String & prefix)
-    : context(context_)
+    : context(context_), log(&Logger::get("DDLWorker"))
 {
     queue_dir = zk_root_dir;
     if (queue_dir.back() == '/')
@@ -222,8 +218,8 @@ DDLWorker::DDLWorker(const std::string & zk_root_dir, Context & context_, const
 
     if (config)
     {
-        task_max_lifetime = config->getUInt64(prefix + "task_max_lifetime", task_max_lifetime);
-        cleanup_delay_period = config->getUInt64(prefix + "cleanup_delay_period", cleanup_delay_period);
+        task_max_lifetime = config->getUInt64(prefix + "task_max_lifetime", static_cast<UInt64>(task_max_lifetime));
+        cleanup_delay_period = config->getUInt64(prefix + "cleanup_delay_period", static_cast<UInt64>(cleanup_delay_period));
         max_tasks_in_queue = std::max(1UL, config->getUInt64(prefix + "max_tasks_in_queue ", max_tasks_in_queue));
     }
 
@@ -622,12 +618,12 @@ void DDLWorker::processTaskAlter(
 
     if (execute_once_on_replica)
     {
-        /// The following code can perform ALTER twice if
-        ///  current server aquires lock, executes replicated alter,
-        ///  losts zookeeper connection and doesn't have time to create /executed node, second server executes replicated alter again
+        /// The following code can perform ALTER twice if:
+        ///  current server acquires the lock, executes replicated alter,
+        ///  loses zookeeper connection and doesn't have time to create /executed node, second server executes replicated alter again
         /// To avoid this problem alter() method of replicated tables should be changed and takes into account ddl query id tag.
         if (!context.getSettingsRef().distributed_ddl_allow_replicated_alter)
-            throw Exception("Distributed DDL alters don't work properly yet", ErrorCodes::NOT_IMPLEMENTED);
+            throw Exception("Distributed DDL alters for replicated tables don't work properly yet", ErrorCodes::NOT_IMPLEMENTED);
 
         /// Generate unique name for shard node, it will be used to execute the query by only single host
         /// Shard node name has format 'replica_name1,replica_name2,...,replica_nameN'
@@ -646,7 +642,7 @@ void DDLWorker::processTaskAlter(
         String is_executed_path = shard_path + "/executed";
         zookeeper->createAncestors(shard_path + "/");
 
-        bool alter_executed_by_replica = false;
+        bool alter_executed_by_any_replica = false;
         {
             auto lock = createSimpleZooKeeperLock(zookeeper, shard_path, "lock", task.host_id_str);
             std::mt19937 rng(std::hash<String>{}(task.host_id_str) + reinterpret_cast<intptr_t>(&rng));
@@ -655,7 +651,7 @@ void DDLWorker::processTaskAlter(
             {
                 if (zookeeper->exists(is_executed_path))
                 {
-                    alter_executed_by_replica = true;
+                    alter_executed_by_any_replica = true;
                     break;
                 }
 
@@ -665,12 +661,12 @@ void DDLWorker::processTaskAlter(
 
                     if (execute_on_leader_replica && task.execution_status.code == ErrorCodes::NOT_IMPLEMENTED)
                     {
-                        /// TODO: it is ok to recieve exception "host is not leader"
+                        /// TODO: it is ok to receive exception "host is not leader"
                     }
 
                     zookeeper->create(is_executed_path, task.host_id_str, zkutil::CreateMode::Persistent);
                     lock->unlock();
-                    alter_executed_by_replica = true;
+                    alter_executed_by_any_replica = true;
                     break;
                 }
 
@@ -678,7 +674,7 @@ void DDLWorker::processTaskAlter(
             }
         }
 
-        if (!alter_executed_by_replica)
+        if (!alter_executed_by_any_replica)
             task.execution_status = ExecutionStatus(ErrorCodes::NOT_IMPLEMENTED, "Cannot enqueue replicated DDL query");
     }
     else
@@ -691,8 +687,8 @@ void DDLWorker::processTaskAlter(
 void DDLWorker::cleanupQueue()
 {
     /// Both ZK and Poco use Unix epoch
-    size_t current_time_seconds = Poco::Timestamp().epochTime();
-    constexpr size_t zookeeper_time_resolution = 1000;
+    Int64 current_time_seconds = Poco::Timestamp().epochTime();
+    constexpr Int64 zookeeper_time_resolution = 1000;
 
     // Too early to check
     if (last_cleanup_time_seconds && current_time_seconds < last_cleanup_time_seconds + cleanup_delay_period)
@@ -719,7 +715,8 @@ void DDLWorker::cleanupQueue()
 
         try
         {
-            /// To avoid concurrent checks and cleans
+            /// Usage of the lock is not necessary now (tryRemoveRecursive correctly removes node in a presence of concurrent cleaners)
+            /// But the lock will be required to implement system.distributed_ddl_queue table
             auto lock = createSimpleZooKeeperLock(zookeeper, node_path, "lock", host_fqdn_id);
             if (!lock->tryLock())
                 continue;
@@ -890,6 +887,9 @@ void DDLWorker::run()
                 LOG_ERROR(log, "Unexpected ZooKeeper error: " << getCurrentExceptionMessage(true) << ". Terminating...");
                 throw;
             }
+
+            /// Unlock the processing just in case
+            event_queue_updated->set();
         }
         catch (...)
         {
@@ -1056,17 +1056,28 @@ private:
 };
 
 
-BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, Context & context)
+BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, Context & context)
 {
-    const auto query = dynamic_cast<const ASTQueryWithOnCluster *>(query_ptr.get());
+    ASTPtr query_ptr;
+
+    /// Remove FORMAT ... INTO OUTFILE if exists
+    if (dynamic_cast<const ASTQueryWithOutput *>(query_ptr_.get()))
+    {
+        query_ptr = query_ptr_->clone();
+        auto query_with_output = dynamic_cast<ASTQueryWithOutput *>(query_ptr.get());
+        query_with_output->out_file = nullptr;
+        query_with_output->format = nullptr;
+    }
+    else
+        query_ptr = query_ptr_;
+
+    auto query = dynamic_cast<const ASTQueryWithOnCluster *>(query_ptr.get());
     if (!query)
     {
-        throw Exception("Distributed execution is not supported for such DDL queries",
-                        ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("Distributed execution is not supported for such DDL queries", ErrorCodes::NOT_IMPLEMENTED);
     }
 
-    auto query_alter = dynamic_cast<const ASTAlterQuery *>(query_ptr.get());
-    if (query_alter)
+    if (auto query_alter = dynamic_cast<const ASTAlterQuery *>(query_ptr.get()))
     {
         for (const auto & param : query_alter->parameters)
         {
diff --git a/dbms/src/Interpreters/DDLWorker.h b/dbms/src/Interpreters/DDLWorker.h
index 7fefc08ccdc..6b7044ef265 100644
--- a/dbms/src/Interpreters/DDLWorker.h
+++ b/dbms/src/Interpreters/DDLWorker.h
@@ -57,28 +57,24 @@ private:
 
     bool tryExecuteQuery(const String & query, const DDLTask & task, ExecutionStatus & status);
 
-
     /// Checks and cleanups queue's nodes
     void cleanupQueue();
 
-
+    /// Init task node
     void createStatusDirs(const std::string & node_name);
-    ASTPtr getRewrittenQuery(const DDLLogEntry & node);
 
 
     void run();
 
 private:
     Context & context;
-    Logger * log = &Logger::get("DDLWorker");
+    Logger * log;
 
     std::string host_fqdn;      /// current host domain name
     std::string host_fqdn_id;   /// host_name:port
-
     std::string queue_dir;      /// dir with queue of queries
-    std::string master_dir;     /// dir with queries was initiated by the server
 
-    /// Last task that was skipped or sucesfully executed
+    /// Name of last task that was skipped or successfully executed
     std::string last_processed_task_name;
 
     std::shared_ptr<zkutil::ZooKeeper> zookeeper;
@@ -91,12 +87,12 @@ private:
     std::atomic<bool> stop_flag{false};
     std::thread thread;
 
-    size_t last_cleanup_time_seconds = 0;
+    Int64 last_cleanup_time_seconds = 0;
 
     /// Cleaning starts after new node event is received if the last cleaning wasn't made sooner than N seconds ago
-    size_t cleanup_delay_period = 60; // minute (in seconds)
+    Int64 cleanup_delay_period = 60; // minute (in seconds)
     /// Delete node if its age is greater than that
-    size_t task_max_lifetime = 7 * 24 * 60 * 60; // week (in seconds)
+    Int64 task_max_lifetime = 7 * 24 * 60 * 60; // week (in seconds)
     /// How many tasks could be in the queue
     size_t max_tasks_in_queue = 1000;
 
diff --git a/dbms/tests/integration/test_distributed_ddl/configs/config.d/ddl.xml b/dbms/tests/integration/test_distributed_ddl/configs/config.d/ddl.xml
index 046ac052142..c819fc7713a 100644
--- a/dbms/tests/integration/test_distributed_ddl/configs/config.d/ddl.xml
+++ b/dbms/tests/integration/test_distributed_ddl/configs/config.d/ddl.xml
@@ -3,6 +3,6 @@
         <path>/clickhouse/task_queue/ddl</path>
         <max_tasks_in_queue>10</max_tasks_in_queue>
         <task_max_lifetime>3600</task_max_lifetime>
-        <cleanup_delay_period>1</cleanup_delay_period>
+        <cleanup_delay_period>5</cleanup_delay_period>
     </distributed_ddl>
 </yandex>
\ No newline at end of file
diff --git a/dbms/tests/integration/test_distributed_ddl/test.py b/dbms/tests/integration/test_distributed_ddl/test.py
index a59cc686cd3..b5e050d00d8 100644
--- a/dbms/tests/integration/test_distributed_ddl/test.py
+++ b/dbms/tests/integration/test_distributed_ddl/test.py
@@ -141,9 +141,9 @@ def test_create_view(started_cluster):
     ddl_check_query(instance, "CREATE VIEW test.super_simple_view ON CLUSTER 'cluster' AS SELECT * FROM system.numbers FORMAT TSV")
     ddl_check_query(instance, "CREATE MATERIALIZED VIEW test.simple_mat_view ON CLUSTER 'cluster' ENGINE = Memory AS SELECT * FROM system.numbers FORMAT TSV")
     ddl_check_query(instance, "DROP TABLE test.simple_mat_view ON CLUSTER 'cluster' FORMAT TSV")
-    ddl_check_query(instance, "DROP TABLE test.super_simple_view2 ON CLUSTER 'cluster' FORMAT TSV")
+    ddl_check_query(instance, "DROP TABLE IF EXISTS test.super_simple_view2 ON CLUSTER 'cluster' FORMAT TSV")
 
-    ddl_check_query(instance, "CREATE TABLE test.super_simple (i Int8) ON CLUSTER 'cluster'")
+    ddl_check_query(instance, "CREATE TABLE test.super_simple ON CLUSTER 'cluster' (i Int8) ENGINE = Memory")
     ddl_check_query(instance, "RENAME TABLE test.super_simple TO test.super_simple2 ON CLUSTER 'cluster' FORMAT TSV")
     ddl_check_query(instance, "DROP TABLE test.super_simple2 ON CLUSTER 'cluster'")
 

From 4af3e55699414aefc242463b6c0bab6d8c937d6d Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Fri, 11 Aug 2017 22:45:50 +0300
Subject: [PATCH 149/281] Update DDLWorker.cpp

---
 dbms/src/Interpreters/DDLWorker.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp
index 97503e4542c..004deeaf645 100644
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@@ -220,7 +220,7 @@ DDLWorker::DDLWorker(const std::string & zk_root_dir, Context & context_, const
     {
         task_max_lifetime = config->getUInt64(prefix + "task_max_lifetime", static_cast<UInt64>(task_max_lifetime));
         cleanup_delay_period = config->getUInt64(prefix + "cleanup_delay_period", static_cast<UInt64>(cleanup_delay_period));
-        max_tasks_in_queue = std::max(1UL, config->getUInt64(prefix + "max_tasks_in_queue ", max_tasks_in_queue));
+        max_tasks_in_queue = std::max(1UL, config->getUInt64(prefix + "max_tasks_in_queue", max_tasks_in_queue));
     }
 
     host_fqdn = getFQDNOrHostName();

From 15fa88d25bfa196dc49b7e44c1143f45ff511b82 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Fri, 11 Aug 2017 22:56:32 +0300
Subject: [PATCH 150/281] Update DDLWorker.cpp

---
 dbms/src/Interpreters/DDLWorker.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp
index 004deeaf645..99f7e425d3a 100644
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@@ -424,6 +424,7 @@ void DDLWorker::parseQueryAndResolveHost(DDLTask & task)
 
     bool found_exact_match = false;
     for (size_t shard_num = 0; shard_num < shards.size(); ++shard_num)
+    {
         for (size_t replica_num = 0; replica_num < shards[shard_num].size(); ++replica_num)
         {
             const Cluster::Address & address = shards[shard_num][replica_num];
@@ -442,6 +443,7 @@ void DDLWorker::parseQueryAndResolveHost(DDLTask & task)
                 task.address_in_cluster = address;
             }
         }
+    }
 
     if (found_exact_match)
         return;

From 7ba0fcaf51c82d95a0d3e978ebf54c2d1e6fa0c3 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Fri, 11 Aug 2017 23:20:15 +0300
Subject: [PATCH 151/281] Update DDLWorker.cpp

---
 dbms/src/Interpreters/DDLWorker.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp
index 99f7e425d3a..fb3d343138f 100644
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@@ -453,6 +453,7 @@ void DDLWorker::parseQueryAndResolveHost(DDLTask & task)
 
     bool found_via_resolving = false;
     for (size_t shard_num = 0; shard_num < shards.size(); ++shard_num)
+    {
         for (size_t replica_num = 0; replica_num < shards[shard_num].size(); ++replica_num)
         {
             const Cluster::Address & address = shards[shard_num][replica_num];
@@ -473,6 +474,7 @@ void DDLWorker::parseQueryAndResolveHost(DDLTask & task)
                 }
             }
         }
+    }
 
     if (!found_via_resolving)
     {
@@ -490,7 +492,7 @@ void DDLWorker::parseQueryAndResolveHost(DDLTask & task)
 bool DDLWorker::tryExecuteQuery(const String & query, const DDLTask & task, ExecutionStatus & status)
 {
     /// Add special comment at the start of query to easily identify DDL-produced queries in query_log
-    String query_prefix = "/*ddl_entry=" + task.entry_name + "*/ ";
+    String query_prefix = "/* ddl_entry=" + task.entry_name + " */ ";
     String query_to_execute = query_prefix + query;
 
     ReadBufferFromString istr(query_to_execute);
@@ -539,7 +541,6 @@ void DDLWorker::processTask(DDLTask & task)
     else
         throw zkutil::KeeperException(code, active_node_path);
 
-
     if (!task.was_executed)
     {
         try
@@ -615,7 +616,7 @@ void DDLWorker::processTaskAlter(
     else if (!execute_once_on_replica && config_is_replicated_shard)
     {
         throw Exception("Table " + ast_alter->table + " isn't replicated, but shard #" + toString(task.host_shard_num + 1) +
-            " replicated according to its cluster definition", ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION);
+            " is replicated according to its cluster definition", ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION);
     }
 
     if (execute_once_on_replica)
@@ -647,7 +648,7 @@ void DDLWorker::processTaskAlter(
         bool alter_executed_by_any_replica = false;
         {
             auto lock = createSimpleZooKeeperLock(zookeeper, shard_path, "lock", task.host_id_str);
-            std::mt19937 rng(std::hash<String>{}(task.host_id_str) + reinterpret_cast<intptr_t>(&rng));
+            std::mt19937 rng(StringRefHash{}(task.host_id_str) + reinterpret_cast<intptr_t>(&rng));
 
             for (int num_tries = 0; num_tries < 10; ++num_tries)
             {
@@ -692,7 +693,7 @@ void DDLWorker::cleanupQueue()
     Int64 current_time_seconds = Poco::Timestamp().epochTime();
     constexpr Int64 zookeeper_time_resolution = 1000;
 
-    // Too early to check
+    /// Too early to check
     if (last_cleanup_time_seconds && current_time_seconds < last_cleanup_time_seconds + cleanup_delay_period)
         return;
 
@@ -775,7 +776,7 @@ void DDLWorker::cleanupQueue()
 }
 
 
-/// Try to create unexisting "status" dirs for a node
+/// Try to create nonexisting "status" dirs for a node
 void DDLWorker::createStatusDirs(const std::string & node_path)
 {
     zkutil::Ops ops;
@@ -855,7 +856,6 @@ void DDLWorker::run()
         }
     } while (!initialized);
 
-
     while (!stop_flag)
     {
         try

From 04ab103d92c6db1241e35e324dba10b0ea4806a9 Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Sat, 12 Aug 2017 20:39:14 +0300
Subject: [PATCH 152/281] Avoid extra ZK calls on cleanup. [#CLICKHOUSE-3128]

---
 dbms/src/Interpreters/DDLWorker.cpp | 62 ++++++++++++++++-------------
 1 file changed, 34 insertions(+), 28 deletions(-)

diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp
index fb3d343138f..0adec196d6f 100644
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@@ -285,7 +285,10 @@ bool DDLWorker::initAndCheckTask(const String & entry_name)
     for (const HostID & host : task->entry.hosts)
     {
         if (!host.isLocalAddress())
+        {
+            //LOG_DEBUG(log, "Host " << host.readableString() << " is not local");
             continue;
+        }
 
         if (host_in_hostlist)
         {
@@ -691,7 +694,7 @@ void DDLWorker::cleanupQueue()
 {
     /// Both ZK and Poco use Unix epoch
     Int64 current_time_seconds = Poco::Timestamp().epochTime();
-    constexpr Int64 zookeeper_time_resolution = 1000;
+    constexpr UInt64 zookeeper_time_resolution = 1000;
 
     /// Too early to check
     if (last_cleanup_time_seconds && current_time_seconds < last_cleanup_time_seconds + cleanup_delay_period)
@@ -718,13 +721,42 @@ void DDLWorker::cleanupQueue()
 
         try
         {
+            /// Already deleted
+            if (!zookeeper->exists(node_path, &stat))
+                continue;
+
+            /// Delete node if its lifetmie is expired (according to task_max_lifetime parameter)
+            size_t zookeeper_time_seconds = stat.ctime / zookeeper_time_resolution;
+            bool node_lifetime_is_expired = zookeeper_time_seconds + task_max_lifetime < current_time_seconds;
+
+            /// If too many nodes in task queue (> max_tasks_in_queue), delete oldest one
+            bool node_is_outside_max_window = it < first_non_outdated_node;
+
+            if (!node_lifetime_is_expired && !node_is_outside_max_window)
+                continue;
+
+            /// Skip if there are active nodes (it is weak guard)
+            if (zookeeper->exists(node_path + "/active", &stat) && stat.numChildren > 0)
+            {
+                LOG_INFO(log, "Task " << node_name << " should be deleted but there are active workers. Skipping it.");
+                continue;
+            }
+
             /// Usage of the lock is not necessary now (tryRemoveRecursive correctly removes node in a presence of concurrent cleaners)
             /// But the lock will be required to implement system.distributed_ddl_queue table
             auto lock = createSimpleZooKeeperLock(zookeeper, node_path, "lock", host_fqdn_id);
             if (!lock->tryLock())
+            {
+                LOG_INFO(log, "Task " << node_name << " should be deleted but it is locked. Skipping it.");
                 continue;
+            }
 
-            auto delete_node = [&] ()
+            if (node_lifetime_is_expired)
+                LOG_INFO(log, "Lifetime of task " << node_name << " is expired, deleting it");
+            else if (node_is_outside_max_window)
+                LOG_INFO(log, "Task " << node_name << " is outdated, deleting it");
+
+            /// Deleting
             {
                 Strings childs = zookeeper->getChildren(node_path);
                 for (const String & child : childs)
@@ -740,32 +772,6 @@ void DDLWorker::cleanupQueue()
                 zookeeper->multi(ops);
 
                 lock->unlockAssumeLockNodeRemovedManually();
-            };
-
-            /// Skip if there are active nodes (it is weak guard)
-            if (zookeeper->tryGet(node_path + "/active", dummy, &stat) && stat.numChildren > 0)
-                continue;
-
-            /// Delete if too many (max_tasks_in_queue) task in queue
-            if (it < first_non_outdated_node)
-            {
-                LOG_INFO(log, "Task " << node_name << " is outdated, deleting it");
-
-                delete_node();
-                continue;
-            }
-
-            zookeeper->get(node_path, &stat);
-            size_t zookeeper_time_seconds = stat.mtime / zookeeper_time_resolution;
-
-            /// Delte if node lifetmie (task_max_lifetime) is expired
-            if (zookeeper_time_seconds + task_max_lifetime < current_time_seconds)
-            {
-                size_t lifetime_seconds = current_time_seconds - zookeeper_time_seconds;
-                LOG_INFO(log, "Lifetime of task " << node_name << " (" << lifetime_seconds << " sec.) is expired, deleting it");
-
-                delete_node();
-                continue;
             }
         }
         catch (...)

From bf1c4d156a72f915a78428dbc8c5493eaba67db2 Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Sat, 12 Aug 2017 23:00:00 +0300
Subject: [PATCH 153/281] Fix errors after rebase, better logging.
 [#CLICKHOUSE-3128]

---
 dbms/src/Interpreters/DDLWorker.cpp | 28 +++++++++++++++++-----------
 dbms/src/Interpreters/DDLWorker.h   |  2 +-
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp
index 0adec196d6f..3d75e6dd22f 100644
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@@ -240,7 +240,7 @@ DDLWorker::~DDLWorker()
 }
 
 
-bool DDLWorker::initAndCheckTask(const String & entry_name)
+bool DDLWorker::initAndCheckTask(const String & entry_name, String & out_reason)
 {
     String node_data;
     String entry_path = queue_dir + "/" + entry_name;
@@ -248,6 +248,7 @@ bool DDLWorker::initAndCheckTask(const String & entry_name)
     if (!zookeeper->tryGet(entry_path, node_data))
     {
         /// It is Ok that node could be deleted just now. It means that there are no current host in node's host list.
+        out_reason = "The task was deleted";
         return false;
     }
 
@@ -278,6 +279,7 @@ bool DDLWorker::initAndCheckTask(const String & entry_name)
             tryLogCurrentException(log, "Can't report the task has invalid format");
         }
 
+        out_reason = "Incorrect task format";
         return false;
     }
 
@@ -285,10 +287,7 @@ bool DDLWorker::initAndCheckTask(const String & entry_name)
     for (const HostID & host : task->entry.hosts)
     {
         if (!host.isLocalAddress())
-        {
-            //LOG_DEBUG(log, "Host " << host.readableString() << " is not local");
             continue;
-        }
 
         if (host_in_hostlist)
         {
@@ -306,6 +305,8 @@ bool DDLWorker::initAndCheckTask(const String & entry_name)
 
     if (host_in_hostlist)
         current_task = std::move(task);
+    else
+        out_reason = "There is no a local address in host list";
 
     return host_in_hostlist;
 }
@@ -345,16 +346,17 @@ void DDLWorker::processTasks()
             }
             else
             {
-                LOG_INFO(log, "Task " << current_task->entry_name << " was deleted from ZooKeeper before current host commited it");
+                LOG_INFO(log, "Task " << current_task->entry_name << " was deleted from ZooKeeper before current host committed it");
                 current_task = nullptr;
             }
         }
 
         if (!current_task)
         {
-            if (!initAndCheckTask(entry_name))
+            String reason;
+            if (!initAndCheckTask(entry_name, reason))
             {
-                LOG_DEBUG(log, "Will not execute task " << entry_name);
+                LOG_DEBUG(log, "Will not execute task " << entry_name << " : " << reason);
                 last_processed_task_name = entry_name;
                 continue;
             }
@@ -423,7 +425,7 @@ void DDLWorker::parseQueryAndResolveHost(DDLTask & task)
     /// Try to find host from task host list in cluster
     /// At the first, try find exact match (host name and ports should be literally equal)
     /// If the attempt fails, try find it resolving host name of each instance
-    const auto & shards = task.cluster->getShardsWithFailoverAddresses();
+    const auto & shards = task.cluster->getShardsAddresses();
 
     bool found_exact_match = false;
     for (size_t shard_num = 0; shard_num < shards.size(); ++shard_num)
@@ -738,7 +740,7 @@ void DDLWorker::cleanupQueue()
             /// Skip if there are active nodes (it is weak guard)
             if (zookeeper->exists(node_path + "/active", &stat) && stat.numChildren > 0)
             {
-                LOG_INFO(log, "Task " << node_name << " should be deleted but there are active workers. Skipping it.");
+                LOG_INFO(log, "Task " << node_name << " should be deleted, but there are active workers. Skipping it.");
                 continue;
             }
 
@@ -747,7 +749,7 @@ void DDLWorker::cleanupQueue()
             auto lock = createSimpleZooKeeperLock(zookeeper, node_path, "lock", host_fqdn_id);
             if (!lock->tryLock())
             {
-                LOG_INFO(log, "Task " << node_name << " should be deleted but it is locked. Skipping it.");
+                LOG_INFO(log, "Task " << node_name << " should be deleted, but it is locked. Skipping it.");
                 continue;
             }
 
@@ -874,6 +876,7 @@ void DDLWorker::run()
             if (stop_flag)
                 break;
 
+            /// TODO: it might delay the execution, move it to separate thread.
             cleanupQueue();
         }
         catch (zkutil::KeeperException & e)
@@ -958,7 +961,10 @@ public:
 
             auto elapsed_seconds = watch.elapsedSeconds();
             if (timeout_seconds >= 0 && elapsed_seconds > timeout_seconds)
-                throw Exception("Watching query is executing too long (" + toString(std::round(elapsed_seconds)) + " sec.)", ErrorCodes::TIMEOUT_EXCEEDED);
+            {
+                throw Exception("Watching task " + node_path + " is executing too long (" + toString(std::round(elapsed_seconds)) + " sec.)",
+                                ErrorCodes::TIMEOUT_EXCEEDED);
+            }
 
             if (num_hosts_finished != 0 || try_number != 0)
                 std::this_thread::sleep_for(std::chrono::milliseconds(50 * std::min(20LU, try_number + 1)));
diff --git a/dbms/src/Interpreters/DDLWorker.h b/dbms/src/Interpreters/DDLWorker.h
index 6b7044ef265..09e2ac6b07e 100644
--- a/dbms/src/Interpreters/DDLWorker.h
+++ b/dbms/src/Interpreters/DDLWorker.h
@@ -42,7 +42,7 @@ private:
 
     /// Reads entry and check that the host belongs to host list of the task
     /// Returns true and sets current_task if entry parsed and the check is passed
-    bool initAndCheckTask(const String & entry_name);
+    bool initAndCheckTask(const String & entry_name, String & out_reason);
 
 
     void processTask(DDLTask & task);

From 9aca95001ead9acd6488788fe69dc440670bfc82 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 13 Aug 2017 12:20:05 +0300
Subject: [PATCH 154/281] Fixed regression, added test [#CLICKHOUSE-2].

---
 dbms/src/Parsers/ParserSelectQuery.cpp                      | 6 +++---
 .../00491_distributed_and_aliases_in_where_having.reference | 1 +
 .../00491_distributed_and_aliases_in_where_having.sql       | 1 +
 3 files changed, 5 insertions(+), 3 deletions(-)
 create mode 100644 dbms/tests/queries/0_stateless/00491_distributed_and_aliases_in_where_having.reference
 create mode 100644 dbms/tests/queries/0_stateless/00491_distributed_and_aliases_in_where_having.sql

diff --git a/dbms/src/Parsers/ParserSelectQuery.cpp b/dbms/src/Parsers/ParserSelectQuery.cpp
index 26b5de0d8f3..99240a31f76 100644
--- a/dbms/src/Parsers/ParserSelectQuery.cpp
+++ b/dbms/src/Parsers/ParserSelectQuery.cpp
@@ -42,16 +42,16 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     ParserKeyword s_by("BY");
 
     ParserNotEmptyExpressionList exp_list(false);
-    ParserNotEmptyExpressionList exp_list_for_former_with_clause(false, true); /// Set prefer_alias_to_column_name for each alias.
+    ParserNotEmptyExpressionList exp_list_for_with_clause(false, true); /// Set prefer_alias_to_column_name for each alias.
     ParserNotEmptyExpressionList exp_list_for_select_clause(true);    /// Allows aliases without AS keyword.
-    ParserExpression exp_elem;
+    ParserExpressionWithOptionalAlias exp_elem(false);
     ParserOrderByExpressionList order_list;
 
     /// WITH expr list
     {
         if (s_with.ignore(pos, expected))
         {
-            if (!exp_list_for_former_with_clause.parse(pos, select_query->with_expression_list, expected))
+            if (!exp_list_for_with_clause.parse(pos, select_query->with_expression_list, expected))
                 return false;
         }
     }
diff --git a/dbms/tests/queries/0_stateless/00491_distributed_and_aliases_in_where_having.reference b/dbms/tests/queries/0_stateless/00491_distributed_and_aliases_in_where_having.reference
new file mode 100644
index 00000000000..573541ac970
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00491_distributed_and_aliases_in_where_having.reference
@@ -0,0 +1 @@
+0
diff --git a/dbms/tests/queries/0_stateless/00491_distributed_and_aliases_in_where_having.sql b/dbms/tests/queries/0_stateless/00491_distributed_and_aliases_in_where_having.sql
new file mode 100644
index 00000000000..fe6d8055d80
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00491_distributed_and_aliases_in_where_having.sql
@@ -0,0 +1 @@
+SELECT dummy FROM (SELECT dummy, NOT dummy AS x FROM remote('127.0.0.{1,2}', system.one) GROUP BY dummy HAVING x);

From 547516c6369f58170c9674862d4bcf64d26835d1 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 13 Aug 2017 12:27:38 +0300
Subject: [PATCH 155/281] Fixed errors after merge [#CLICKHOUSE-2].

---
 dbms/src/Interpreters/DDLWorker.cpp | 2 +-
 dbms/src/Interpreters/DDLWorker.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp
index 3d75e6dd22f..ab5da2be2cd 100644
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@@ -728,7 +728,7 @@ void DDLWorker::cleanupQueue()
                 continue;
 
             /// Delete node if its lifetmie is expired (according to task_max_lifetime parameter)
-            size_t zookeeper_time_seconds = stat.ctime / zookeeper_time_resolution;
+            Int64 zookeeper_time_seconds = stat.ctime / zookeeper_time_resolution;
             bool node_lifetime_is_expired = zookeeper_time_seconds + task_max_lifetime < current_time_seconds;
 
             /// If too many nodes in task queue (> max_tasks_in_queue), delete oldest one
diff --git a/dbms/src/Interpreters/DDLWorker.h b/dbms/src/Interpreters/DDLWorker.h
index 09e2ac6b07e..f84f7ccba8b 100644
--- a/dbms/src/Interpreters/DDLWorker.h
+++ b/dbms/src/Interpreters/DDLWorker.h
@@ -97,7 +97,7 @@ private:
     size_t max_tasks_in_queue = 1000;
 
     friend class DDLQueryStatusInputSream;
-    friend class DDLTask;
+    friend struct DDLTask;
 };
 
 

From 0ef64e5dfe2450b4faca433eb7d5b36589d3752a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Aug 2017 02:58:04 +0300
Subject: [PATCH 156/281] Fixed error [#CLICKHOUSE-2].

---
 dbms/src/Functions/FunctionsCoding.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Functions/FunctionsCoding.h b/dbms/src/Functions/FunctionsCoding.h
index fc7efe33fd4..048b9b88d2d 100644
--- a/dbms/src/Functions/FunctionsCoding.h
+++ b/dbms/src/Functions/FunctionsCoding.h
@@ -1039,7 +1039,7 @@ private:
         size_t dst_pos = 0;
         for (; dst_pos < num_bytes; ++dst_pos)
         {
-            dst[dst_pos] = unhex2(reinterpret_cast<const char *>(src));
+            dst[dst_pos] = unhex2(reinterpret_cast<const char *>(&src[src_pos]));
             src_pos += 2;
         }
     }

From 6ec873c75ed8bbef1fa481dc6d24d98fd263ed34 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Aug 2017 03:02:12 +0300
Subject: [PATCH 157/281] Fixed test (it was dependent on time zone)
 [#CLICKHOUSE-2].

---
 .../00489_pk_subexpression.reference          | 40 +++++++++----------
 .../0_stateless/00489_pk_subexpression.sql    | 10 ++---
 2 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/dbms/tests/queries/0_stateless/00489_pk_subexpression.reference b/dbms/tests/queries/0_stateless/00489_pk_subexpression.reference
index 5f46ec305ae..1a523ee1527 100644
--- a/dbms/tests/queries/0_stateless/00489_pk_subexpression.reference
+++ b/dbms/tests/queries/0_stateless/00489_pk_subexpression.reference
@@ -1,20 +1,20 @@
-2000-01-01      1970-01-01 00:00:01     11      1235
-2000-01-01      1970-01-01 00:00:02     11      4395
-2000-01-01      1970-01-01 00:00:03     22      3545
-2000-01-01      1970-01-01 00:00:04     22      6984
-2000-01-01      1970-01-01 00:00:05     33      4596
-2000-01-01      1970-01-01 00:02:03     33      1235
-2000-01-01      1970-01-01 00:02:01     33      2791
-2000-01-01      1970-01-01 00:02:02     33      2791
-2000-01-01      1970-01-01 00:02:05     44      4578
-2000-01-01      1970-01-01 00:02:04     44      4935
-2000-01-01      1970-01-01 00:02:08     55      1235
-2000-01-01      1970-01-01 00:02:07     55      2791
-2000-01-01      1970-01-01 00:02:06     55      5786
-2000-01-01      1970-01-01 00:00:01     11      1235
-2000-01-01      1970-01-01 00:01:03     11      3572
-2000-01-01      1970-01-01 00:01:01     11      4563
-2000-01-01      1970-01-01 00:01:02     11      4578
-2000-01-01      1970-01-01 00:01:03     11      3572
-2000-01-01      1970-01-01 00:01:01     11      4563
-2000-01-01      1970-01-01 00:01:02     11      4578
+1	11	1235
+2	11	4395
+3	22	3545
+4	22	6984
+5	33	4596
+123	33	1235
+121	33	2791
+122	33	2791
+125	44	4578
+124	44	4935
+128	55	1235
+127	55	2791
+126	55	5786
+1	11	1235
+63	11	3572
+61	11	4563
+62	11	4578
+63	11	3572
+61	11	4563
+62	11	4578
diff --git a/dbms/tests/queries/0_stateless/00489_pk_subexpression.sql b/dbms/tests/queries/0_stateless/00489_pk_subexpression.sql
index f5d0364bb63..4d53c7e29d9 100644
--- a/dbms/tests/queries/0_stateless/00489_pk_subexpression.sql
+++ b/dbms/tests/queries/0_stateless/00489_pk_subexpression.sql
@@ -9,21 +9,21 @@ SET max_block_size = 1;
 
 -- Test inferred limit
 SET max_rows_to_read = 5;
-SELECT * FROM test.pk WHERE x BETWEEN toDateTime(0) AND toDateTime(59);
+SELECT toUInt32(x), y, z FROM test.pk WHERE x BETWEEN toDateTime(0) AND toDateTime(59);
 
 SET max_rows_to_read = 9;
-SELECT * FROM test.pk WHERE x BETWEEN toDateTime(120) AND toDateTime(240);
+SELECT toUInt32(x), y, z FROM test.pk WHERE x BETWEEN toDateTime(120) AND toDateTime(240);
 
 -- Index is coarse, cannot read single row
 SET max_rows_to_read = 5;
-SELECT * FROM test.pk WHERE x = toDateTime(1);
+SELECT toUInt32(x), y, z FROM test.pk WHERE x = toDateTime(1);
 
 -- Index works on interval 00:01:00 - 00:01:59
 SET max_rows_to_read = 4;
-SELECT * FROM test.pk WHERE x BETWEEN toDateTime(60) AND toDateTime(119) AND y = 11;
+SELECT toUInt32(x), y, z FROM test.pk WHERE x BETWEEN toDateTime(60) AND toDateTime(119) AND y = 11;
 
 -- Cannot read less rows as PK is coarser on interval 00:01:00 - 00:02:00
 SET max_rows_to_read = 5;
-SELECT * FROM test.pk WHERE x BETWEEN toDateTime(60) AND toDateTime(120) AND y = 11;
+SELECT toUInt32(x), y, z FROM test.pk WHERE x BETWEEN toDateTime(60) AND toDateTime(120) AND y = 11;
 
 DROP TABLE test.pk;

From 11adefa2c0aa9c71ab8e8dcb10ab8417165722ac Mon Sep 17 00:00:00 2001
From: robot-metrika-test <robot-metrika-test@yandex-team.ru>
Date: Mon, 14 Aug 2017 04:04:51 +0300
Subject: [PATCH 158/281] Auto version update to [54270]

---
 dbms/cmake/version.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake
index f5e604aa938..366b7483399 100644
--- a/dbms/cmake/version.cmake
+++ b/dbms/cmake/version.cmake
@@ -1,6 +1,6 @@
 # This strings autochanged from release_lib.sh:
-set(VERSION_DESCRIBE v1.1.54269-testing)
-set(VERSION_REVISION 54269)
+set(VERSION_DESCRIBE v1.1.54270-testing)
+set(VERSION_REVISION 54270)
 # end of autochange
 
 set (VERSION_MAJOR 1)

From 96d8ba9e3ff9ea14b4675048d13db9821387f691 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Aug 2017 04:29:19 +0300
Subject: [PATCH 159/281] Fixed typo [#CLICKHOUSE-2].

---
 dbms/tests/integration/helpers/cluster.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py
index 103e408f3f5..5e270f61a8f 100644
--- a/dbms/tests/integration/helpers/cluster.py
+++ b/dbms/tests/integration/helpers/cluster.py
@@ -183,7 +183,7 @@ class ClickHouseInstance:
         self.client = None
         self.default_timeout = 20.0 # 20 sec
 
-    # Conntects to the instance via clickhouse-client, sends a query (1st argument) and returns the answer
+    # Connects to the instance via clickhouse-client, sends a query (1st argument) and returns the answer
     def query(self, *args, **kwargs):
         return self.client.query(*args, **kwargs)
 

From 986c6c729df557d28192f1a36be915b8514ab0cb Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Aug 2017 07:23:38 +0300
Subject: [PATCH 160/281] Fixed incompatibility [#CLICKHOUSE-2].

---
 dbms/src/Functions/FunctionFactory.cpp      | 20 +++++++++++--
 dbms/src/Functions/FunctionFactory.h        | 31 +++++++++++++--------
 dbms/src/Functions/FunctionsConditional.cpp |  5 ++++
 3 files changed, 42 insertions(+), 14 deletions(-)

diff --git a/dbms/src/Functions/FunctionFactory.cpp b/dbms/src/Functions/FunctionFactory.cpp
index 5766d887fe5..c8819fbbae8 100644
--- a/dbms/src/Functions/FunctionFactory.cpp
+++ b/dbms/src/Functions/FunctionFactory.cpp
@@ -1,3 +1,4 @@
+#include <Poco/String.h>
 #include <Functions/FunctionFactory.h>
 #include <Common/Exception.h>
 
@@ -10,8 +11,17 @@ namespace ErrorCodes
     extern const int UNKNOWN_FUNCTION;
 }
 
-FunctionFactory::FunctionFactory()
+
+void FunctionFactory::registerFunction(const String & name, Creator creator, CaseSensitiveness case_sensitiveness)
 {
+    if (!functions.emplace(name, creator).second)
+        throw Exception("FunctionFactory: the function name '" + name + "' is not unique",
+            ErrorCodes::LOGICAL_ERROR);
+
+    if (case_sensitiveness == CaseInsensitive
+        && !case_insensitive_functions.emplace(Poco::toLower(name), creator).second)
+        throw Exception("FunctionFactory: the case insensitive function name '" + name + "' is not unique",
+            ErrorCodes::LOGICAL_ERROR);
 }
 
 
@@ -33,8 +43,12 @@ FunctionPtr FunctionFactory::tryGet(
     auto it = functions.find(name);
     if (functions.end() != it)
         return it->second(context);
-    else
-        return {};
+
+    it = case_insensitive_functions.find(Poco::toLower(name));
+    if (case_insensitive_functions.end() != it)
+        return it->second(context);
+
+    return {};
 }
 
 }
diff --git a/dbms/src/Functions/FunctionFactory.h b/dbms/src/Functions/FunctionFactory.h
index 46dfd06149e..9702e233a44 100644
--- a/dbms/src/Functions/FunctionFactory.h
+++ b/dbms/src/Functions/FunctionFactory.h
@@ -4,7 +4,9 @@
 #include <memory>
 #include <unordered_map>
 #include <ext/singleton.h>
+
 #include <Common/Exception.h>
+#include <Core/Types.h>
 
 
 namespace DB
@@ -30,22 +32,29 @@ class FunctionFactory : public ext::singleton<FunctionFactory>
 
 private:
     using Creator = FunctionPtr(*)(const Context & context);    /// Not std::function, for lower object size and less indirection.
-    std::unordered_map<std::string, Creator> functions;
+    using Functions = std::unordered_map<String, Creator>;
+
+    Functions functions;
+    Functions case_insensitive_functions;
+
+    /// For compatibility with SQL, it's possible to specify that certain function name is case insensitive.
+    enum CaseSensitiveness
+    {
+        CaseSensitive,
+        CaseInsensitive
+    };
 
 public:
-    FunctionFactory();
-
-    FunctionPtr get(const std::string & name, const Context & context) const;    /// Throws an exception if not found.
-    FunctionPtr tryGet(const std::string & name, const Context & context) const; /// Returns nullptr if not found.
+    FunctionPtr get(const String & name, const Context & context) const;    /// Throws an exception if not found.
+    FunctionPtr tryGet(const String & name, const Context & context) const; /// Returns nullptr if not found.
 
     /// No locking, you must register all functions before usage of get, tryGet.
-    template <typename Function> void registerFunction()
-    {
-        static_assert(std::is_same<decltype(&Function::create), Creator>::value, "Function::create has incorrect type");
+    void registerFunction(const String & name, Creator creator, CaseSensitiveness case_sensitiveness = CaseSensitive);
 
-        if (!functions.emplace(std::string(Function::name), &Function::create).second)
-            throw Exception("FunctionFactory: the function name '" + std::string(Function::name) + "' is not unique",
-                ErrorCodes::LOGICAL_ERROR);
+    template <typename Function>
+    void registerFunction()
+    {
+        registerFunction(String(Function::name), &Function::create);
     }
 };
 
diff --git a/dbms/src/Functions/FunctionsConditional.cpp b/dbms/src/Functions/FunctionsConditional.cpp
index 8c1989f19fc..31abeab1890 100644
--- a/dbms/src/Functions/FunctionsConditional.cpp
+++ b/dbms/src/Functions/FunctionsConditional.cpp
@@ -20,8 +20,13 @@ void registerFunctionsConditional(FunctionFactory & factory)
     factory.registerFunction<FunctionMultiIf>();
     factory.registerFunction<FunctionCaseWithExpression>();
     factory.registerFunction<FunctionCaseWithoutExpression>();
+
+    /// These are obsolete function names.
+    factory.registerFunction("caseWithExpr", FunctionCaseWithExpression::create);
+    factory.registerFunction("caseWithoutExpr", FunctionCaseWithoutExpression::create);
 }
 
+
 namespace
 {
 

From eca1b39deee732a9295d91402878162530bd4838 Mon Sep 17 00:00:00 2001
From: robot-metrika-test <robot-metrika-test@yandex-team.ru>
Date: Mon, 14 Aug 2017 07:25:10 +0300
Subject: [PATCH 161/281] Auto version update to [54271]

---
 dbms/cmake/version.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake
index 366b7483399..574ab1b00a2 100644
--- a/dbms/cmake/version.cmake
+++ b/dbms/cmake/version.cmake
@@ -1,6 +1,6 @@
 # This strings autochanged from release_lib.sh:
-set(VERSION_DESCRIBE v1.1.54270-testing)
-set(VERSION_REVISION 54270)
+set(VERSION_DESCRIBE v1.1.54271-testing)
+set(VERSION_REVISION 54271)
 # end of autochange
 
 set (VERSION_MAJOR 1)

From 2e6eb504dcd4dd278ed5faa24b279d25113470d2 Mon Sep 17 00:00:00 2001
From: Evgeniy Gatov <egatov@yandex-team.ru>
Date: Mon, 14 Aug 2017 06:38:32 +0300
Subject: [PATCH 162/281] LimitReadBuffer fix [#CLICKHOUSE-2].

---
 dbms/src/IO/LimitReadBuffer.h | 60 +++++++++++++++++++++++++----------
 1 file changed, 43 insertions(+), 17 deletions(-)

diff --git a/dbms/src/IO/LimitReadBuffer.h b/dbms/src/IO/LimitReadBuffer.h
index 304c3302245..b99c8e95bf5 100644
--- a/dbms/src/IO/LimitReadBuffer.h
+++ b/dbms/src/IO/LimitReadBuffer.h
@@ -8,26 +8,52 @@ namespace DB
 
 /** Lets read from another ReadBuffer no more than the specified number of bytes.
   */
-class LimitReadBuffer : public ReadBuffer
-{
-private:
-    ReadBuffer & in;
-    size_t limit;
-
-    bool nextImpl() override
+    class LimitReadBuffer : public ReadBuffer
     {
-        if (count() >= limit || !in.next())
-            return false;
+    private:
+        ReadBuffer & in;
+        size_t limit;
 
-        working_buffer = in.buffer();
-        if (limit - count() < working_buffer.size())
-            working_buffer.resize(limit - count());
+        bool nextImpl() override
+        {
+            /// Let underlying buffer calculate read bytes in `next()` call.
+            in.position() = position();
 
-        return true;
-    }
+            if (bytes >= limit || !in.next())
+            {
+                return false;
+            }
 
-public:
-    LimitReadBuffer(ReadBuffer & in_, size_t limit_) : ReadBuffer(nullptr, 0), in(in_), limit(limit_) {}
-};
+            position() = in.position();
+
+            working_buffer = in.buffer();
+
+            if (limit - count() < working_buffer.size())
+            {
+                working_buffer.resize(limit - count());
+            }
+
+            return true;
+        }
+
+    public:
+        LimitReadBuffer(ReadBuffer & in_, size_t limit_) : ReadBuffer(in_.position(), 0), in(in_), limit(limit_)
+        {
+            working_buffer = in.buffer();
+
+            size_t bytes_in_buffer = working_buffer.end() - position();
+
+            working_buffer = Buffer(position(), working_buffer.end());
+
+            if (limit < bytes_in_buffer)
+                working_buffer.resize(limit);
+        }
+        virtual ~LimitReadBuffer() override
+        {
+            /// Update underlying buffer's position in case when limit wasn't reached.
+            if (working_buffer.size() != 0)
+                in.position() = position();
+        }
+    };
 
 }

From 3330cd6a7fc70d44138bc9dcbc92d81b60693770 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Aug 2017 07:26:52 +0300
Subject: [PATCH 163/281] Update LimitReadBuffer.h

---
 dbms/src/IO/LimitReadBuffer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/IO/LimitReadBuffer.h b/dbms/src/IO/LimitReadBuffer.h
index b99c8e95bf5..d4b887c0fdf 100644
--- a/dbms/src/IO/LimitReadBuffer.h
+++ b/dbms/src/IO/LimitReadBuffer.h
@@ -6,7 +6,7 @@
 namespace DB
 {
 
-/** Lets read from another ReadBuffer no more than the specified number of bytes.
+/** Allows to read from another ReadBuffer no more than the specified number of bytes.
   */
     class LimitReadBuffer : public ReadBuffer
     {

From d87b615bdfcfe124f1f96ebdce25cf14f73ba632 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Aug 2017 07:30:37 +0300
Subject: [PATCH 164/281] Update LimitReadBuffer.h

---
 dbms/src/IO/LimitReadBuffer.h | 83 ++++++++++++++++-------------------
 1 file changed, 39 insertions(+), 44 deletions(-)

diff --git a/dbms/src/IO/LimitReadBuffer.h b/dbms/src/IO/LimitReadBuffer.h
index d4b887c0fdf..2183d47d400 100644
--- a/dbms/src/IO/LimitReadBuffer.h
+++ b/dbms/src/IO/LimitReadBuffer.h
@@ -8,52 +8,47 @@ namespace DB
 
 /** Allows to read from another ReadBuffer no more than the specified number of bytes.
   */
-    class LimitReadBuffer : public ReadBuffer
+class LimitReadBuffer : public ReadBuffer
+{
+private:
+    ReadBuffer & in;
+    size_t limit;
+
+    bool nextImpl() override
     {
-    private:
-        ReadBuffer & in;
-        size_t limit;
+        /// Let underlying buffer calculate read bytes in `next()` call.
+        in.position() = position();
 
-        bool nextImpl() override
-        {
-            /// Let underlying buffer calculate read bytes in `next()` call.
+        if (bytes >= limit || !in.next())
+            return false;
+
+        working_buffer = in.buffer();
+
+        if (limit - count() < working_buffer.size())
+            working_buffer.resize(limit - count());
+
+        return true;
+    }
+
+public:
+    LimitReadBuffer(ReadBuffer & in_, size_t limit_) : ReadBuffer(in_.position(), 0), in(in_), limit(limit_)
+    {
+        working_buffer = in.buffer();
+
+        size_t bytes_in_buffer = working_buffer.end() - position();
+
+        working_buffer = Buffer(position(), working_buffer.end());
+
+        if (limit < bytes_in_buffer)
+            working_buffer.resize(limit);
+    }
+    
+    virtual ~LimitReadBuffer() override
+    {
+        /// Update underlying buffer's position in case when limit wasn't reached.
+        if (working_buffer.size() != 0)
             in.position() = position();
-
-            if (bytes >= limit || !in.next())
-            {
-                return false;
-            }
-
-            position() = in.position();
-
-            working_buffer = in.buffer();
-
-            if (limit - count() < working_buffer.size())
-            {
-                working_buffer.resize(limit - count());
-            }
-
-            return true;
-        }
-
-    public:
-        LimitReadBuffer(ReadBuffer & in_, size_t limit_) : ReadBuffer(in_.position(), 0), in(in_), limit(limit_)
-        {
-            working_buffer = in.buffer();
-
-            size_t bytes_in_buffer = working_buffer.end() - position();
-
-            working_buffer = Buffer(position(), working_buffer.end());
-
-            if (limit < bytes_in_buffer)
-                working_buffer.resize(limit);
-        }
-        virtual ~LimitReadBuffer() override
-        {
-            /// Update underlying buffer's position in case when limit wasn't reached.
-            if (working_buffer.size() != 0)
-                in.position() = position();
-        }
-    };
+    }
+};
 
 }

From 16f0af8018047d57dcf03b00d1fd265827cbe7c5 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Aug 2017 07:31:54 +0300
Subject: [PATCH 165/281] Update LimitReadBuffer.h

---
 dbms/src/IO/LimitReadBuffer.h | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/dbms/src/IO/LimitReadBuffer.h b/dbms/src/IO/LimitReadBuffer.h
index 2183d47d400..6397a164a5d 100644
--- a/dbms/src/IO/LimitReadBuffer.h
+++ b/dbms/src/IO/LimitReadBuffer.h
@@ -33,17 +33,9 @@ private:
 public:
     LimitReadBuffer(ReadBuffer & in_, size_t limit_) : ReadBuffer(in_.position(), 0), in(in_), limit(limit_)
     {
-        working_buffer = in.buffer();
-
-        size_t bytes_in_buffer = working_buffer.end() - position();
-
-        working_buffer = Buffer(position(), working_buffer.end());
-
-        if (limit < bytes_in_buffer)
-            working_buffer.resize(limit);
     }
     
-    virtual ~LimitReadBuffer() override
+    ~LimitReadBuffer() override
     {
         /// Update underlying buffer's position in case when limit wasn't reached.
         if (working_buffer.size() != 0)

From cbed1415d79cbc837b28e4a171df26b341b240cd Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Aug 2017 07:32:48 +0300
Subject: [PATCH 166/281] Update LimitReadBuffer.h

---
 dbms/src/IO/LimitReadBuffer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/IO/LimitReadBuffer.h b/dbms/src/IO/LimitReadBuffer.h
index 6397a164a5d..cc5891939dd 100644
--- a/dbms/src/IO/LimitReadBuffer.h
+++ b/dbms/src/IO/LimitReadBuffer.h
@@ -31,7 +31,7 @@ private:
     }
 
 public:
-    LimitReadBuffer(ReadBuffer & in_, size_t limit_) : ReadBuffer(in_.position(), 0), in(in_), limit(limit_)
+    LimitReadBuffer(ReadBuffer & in_, size_t limit_) : ReadBuffer(nullptr, 0), in(in_), limit(limit_)
     {
     }
     

From 94a15df62682fd7df84ae40150c8b387af6b4a7a Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Aug 2017 07:33:11 +0300
Subject: [PATCH 167/281] Update LimitReadBuffer.h

---
 dbms/src/IO/LimitReadBuffer.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/dbms/src/IO/LimitReadBuffer.h b/dbms/src/IO/LimitReadBuffer.h
index cc5891939dd..bb788bfa78c 100644
--- a/dbms/src/IO/LimitReadBuffer.h
+++ b/dbms/src/IO/LimitReadBuffer.h
@@ -31,9 +31,7 @@ private:
     }
 
 public:
-    LimitReadBuffer(ReadBuffer & in_, size_t limit_) : ReadBuffer(nullptr, 0), in(in_), limit(limit_)
-    {
-    }
+    LimitReadBuffer(ReadBuffer & in_, size_t limit_) : ReadBuffer(nullptr, 0), in(in_), limit(limit_) {}
     
     ~LimitReadBuffer() override
     {

From 09d83eecd0d12391c81b6e142c72904515d185fb Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Aug 2017 07:42:04 +0300
Subject: [PATCH 168/281] Moved code to cpp [#CLICKHOUSE-2].

---
 dbms/src/IO/LimitReadBuffer.cpp | 35 +++++++++++++++++++++++++++++++++
 dbms/src/IO/LimitReadBuffer.h   | 27 ++++---------------------
 2 files changed, 39 insertions(+), 23 deletions(-)
 create mode 100644 dbms/src/IO/LimitReadBuffer.cpp

diff --git a/dbms/src/IO/LimitReadBuffer.cpp b/dbms/src/IO/LimitReadBuffer.cpp
new file mode 100644
index 00000000000..c015173045d
--- /dev/null
+++ b/dbms/src/IO/LimitReadBuffer.cpp
@@ -0,0 +1,35 @@
+#include <IO/LimitReadBuffer.h>
+
+
+namespace DB
+{
+
+bool LimitReadBuffer::nextImpl()
+{
+    /// Let underlying buffer calculate read bytes in `next()` call.
+    in.position() = position();
+
+    if (bytes >= limit || !in.next())
+        return false;
+
+    working_buffer = in.buffer();
+
+    if (limit - bytes < working_buffer.size())
+        working_buffer.resize(limit - bytes);
+
+    return true;
+}
+
+
+LimitReadBuffer::LimitReadBuffer(ReadBuffer & in_, size_t limit_)
+    : ReadBuffer(nullptr, 0), in(in_), limit(limit_) {}
+
+
+LimitReadBuffer::~LimitReadBuffer()
+{
+    /// Update underlying buffer's position in case when limit wasn't reached.
+    if (working_buffer.size() != 0)
+        in.position() = position();
+}
+
+}
diff --git a/dbms/src/IO/LimitReadBuffer.h b/dbms/src/IO/LimitReadBuffer.h
index bb788bfa78c..655cd810a44 100644
--- a/dbms/src/IO/LimitReadBuffer.h
+++ b/dbms/src/IO/LimitReadBuffer.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <cstddef>
 #include <IO/ReadBuffer.h>
 
 
@@ -14,31 +15,11 @@ private:
     ReadBuffer & in;
     size_t limit;
 
-    bool nextImpl() override
-    {
-        /// Let underlying buffer calculate read bytes in `next()` call.
-        in.position() = position();
-
-        if (bytes >= limit || !in.next())
-            return false;
-
-        working_buffer = in.buffer();
-
-        if (limit - count() < working_buffer.size())
-            working_buffer.resize(limit - count());
-
-        return true;
-    }
+    bool nextImpl() override;
 
 public:
-    LimitReadBuffer(ReadBuffer & in_, size_t limit_) : ReadBuffer(nullptr, 0), in(in_), limit(limit_) {}
-    
-    ~LimitReadBuffer() override
-    {
-        /// Update underlying buffer's position in case when limit wasn't reached.
-        if (working_buffer.size() != 0)
-            in.position() = position();
-    }
+    LimitReadBuffer(ReadBuffer & in_, size_t limit_);
+    ~LimitReadBuffer() override;
 };
 
 }

From 957fb9f2e5a477e3fa39583df5e28cad3f6f5a7d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Aug 2017 07:49:20 +0300
Subject: [PATCH 169/281] Added test [#CLICKHOUSE-2].

---
 dbms/src/IO/tests/CMakeLists.txt              |  3 +++
 dbms/src/IO/tests/limit_read_buffer.cpp       | 26 +++++++++++++++++++
 dbms/src/IO/tests/limit_read_buffer.reference |  3 +++
 dbms/src/IO/tests/limit_read_buffer.sh        |  2 ++
 4 files changed, 34 insertions(+)
 create mode 100644 dbms/src/IO/tests/limit_read_buffer.cpp
 create mode 100644 dbms/src/IO/tests/limit_read_buffer.reference
 create mode 100755 dbms/src/IO/tests/limit_read_buffer.sh

diff --git a/dbms/src/IO/tests/CMakeLists.txt b/dbms/src/IO/tests/CMakeLists.txt
index d08b05e1499..3330b111f84 100644
--- a/dbms/src/IO/tests/CMakeLists.txt
+++ b/dbms/src/IO/tests/CMakeLists.txt
@@ -76,3 +76,6 @@ target_link_libraries (zlib_buffers dbms)
 
 add_executable (remote_read_write_buffer remote_read_write_buffer.cpp ${SRCS})
 target_link_libraries (remote_read_write_buffer dbms)
+
+add_executable (limit_read_buffer limit_read_buffer.cpp ${SRCS})
+target_link_libraries (limit_read_buffer dbms)
diff --git a/dbms/src/IO/tests/limit_read_buffer.cpp b/dbms/src/IO/tests/limit_read_buffer.cpp
new file mode 100644
index 00000000000..432c21b5410
--- /dev/null
+++ b/dbms/src/IO/tests/limit_read_buffer.cpp
@@ -0,0 +1,26 @@
+#include <string>
+
+#include <IO/ReadBufferFromFileDescriptor.h>
+#include <IO/LimitReadBuffer.h>
+#include <IO/WriteBufferFromFileDescriptor.h>
+#include <IO/copyData.h>
+#include <IO/WriteHelpers.h>
+
+
+int main(int argc, char ** argv)
+{
+    using namespace DB;
+
+    size_t limit = std::stol(argv[1]);
+
+    ReadBufferFromFileDescriptor in(STDIN_FILENO);
+    LimitReadBuffer limit_in(in, limit);
+
+    WriteBufferFromFileDescriptor out(STDOUT_FILENO);
+
+    copyData(limit_in, out);
+    writeCString("\n--- the rest ---\n", out);
+    copyData(in, out);
+
+    return 0;
+}
diff --git a/dbms/src/IO/tests/limit_read_buffer.reference b/dbms/src/IO/tests/limit_read_buffer.reference
new file mode 100644
index 00000000000..ce8c1253433
--- /dev/null
+++ b/dbms/src/IO/tests/limit_read_buffer.reference
@@ -0,0 +1,3 @@
+Hello, wor
+--- the rest ---
+ld!
diff --git a/dbms/src/IO/tests/limit_read_buffer.sh b/dbms/src/IO/tests/limit_read_buffer.sh
new file mode 100755
index 00000000000..177d6cd957a
--- /dev/null
+++ b/dbms/src/IO/tests/limit_read_buffer.sh
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+./limit_read_buffer 10 <<< "Hello, world!"

From 1495490726f6b1a1d1ca4a6f7fd81f5c41332223 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Aug 2017 08:44:04 +0300
Subject: [PATCH 170/281] DDLWorker: better [#CLICKHOUSE-5].

---
 dbms/src/Interpreters/DDLWorker.cpp | 6 +++---
 dbms/src/Server/Server.cpp          | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp
index ab5da2be2cd..0741bbcfa0a 100644
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@@ -218,9 +218,9 @@ DDLWorker::DDLWorker(const std::string & zk_root_dir, Context & context_, const
 
     if (config)
     {
-        task_max_lifetime = config->getUInt64(prefix + "task_max_lifetime", static_cast<UInt64>(task_max_lifetime));
-        cleanup_delay_period = config->getUInt64(prefix + "cleanup_delay_period", static_cast<UInt64>(cleanup_delay_period));
-        max_tasks_in_queue = std::max(1UL, config->getUInt64(prefix + "max_tasks_in_queue", max_tasks_in_queue));
+        task_max_lifetime = config->getUInt64(prefix + ".task_max_lifetime", static_cast<UInt64>(task_max_lifetime));
+        cleanup_delay_period = config->getUInt64(prefix + ".cleanup_delay_period", static_cast<UInt64>(cleanup_delay_period));
+        max_tasks_in_queue = std::max(1UL, config->getUInt64(prefix + ".max_tasks_in_queue", max_tasks_in_queue));
     }
 
     host_fqdn = getFQDNOrHostName();
diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp
index 453b0577ff2..4ad0b9ec719 100644
--- a/dbms/src/Server/Server.cpp
+++ b/dbms/src/Server/Server.cpp
@@ -278,7 +278,7 @@ int Server::main(const std::vector<std::string> & args)
     {
         /// DDL worker should be started after all tables were loaded
         String ddl_zookeeper_path = config().getString("distributed_ddl.path", "/clickhouse/task_queue/ddl/");
-        global_context->setDDLWorker(std::make_shared<DDLWorker>(ddl_zookeeper_path, *global_context, &config(), "distributed_ddl."));
+        global_context->setDDLWorker(std::make_shared<DDLWorker>(ddl_zookeeper_path, *global_context, &config(), "distributed_ddl"));
     }
 
     SCOPE_EXIT({

From a16ed9fc32b2fe83c0311fe3a33459ba2cbc97c6 Mon Sep 17 00:00:00 2001
From: robot-metrika-test <robot-metrika-test@yandex-team.ru>
Date: Mon, 14 Aug 2017 08:49:05 +0300
Subject: [PATCH 171/281] Auto version update to [54272]

---
 dbms/cmake/version.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake
index 574ab1b00a2..1ed93f2603e 100644
--- a/dbms/cmake/version.cmake
+++ b/dbms/cmake/version.cmake
@@ -1,6 +1,6 @@
 # This strings autochanged from release_lib.sh:
-set(VERSION_DESCRIBE v1.1.54271-testing)
-set(VERSION_REVISION 54271)
+set(VERSION_DESCRIBE v1.1.54272-testing)
+set(VERSION_REVISION 54272)
 # end of autochange
 
 set (VERSION_MAJOR 1)

From 5af5f78cb7f8d42f9168856fbf166c0b37f1f0ad Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Aug 2017 08:51:03 +0300
Subject: [PATCH 172/281] Terminate only DDLWorker in case of unexpected
 exception in DDLWorker [#CLICKHOUSE-4].

---
 dbms/src/Interpreters/DDLWorker.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp
index 0741bbcfa0a..d77e1f275d5 100644
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@@ -859,8 +859,8 @@ void DDLWorker::run()
         }
         catch (...)
         {
-            tryLogCurrentException(log, "Terminating. Cannot initialize DDL queue");
-            throw;
+            tryLogCurrentException(log, "Terminating. Cannot initialize DDL queue.");
+            return;
         }
     } while (!initialized);
 
@@ -895,8 +895,8 @@ void DDLWorker::run()
             }
             else
             {
-                LOG_ERROR(log, "Unexpected ZooKeeper error: " << getCurrentExceptionMessage(true) << ". Terminating...");
-                throw;
+                LOG_ERROR(log, "Unexpected ZooKeeper error: " << getCurrentExceptionMessage(true) << ". Terminating.");
+                return;
             }
 
             /// Unlock the processing just in case
@@ -904,8 +904,8 @@ void DDLWorker::run()
         }
         catch (...)
         {
-            LOG_ERROR(log, "Unexpected error: " << getCurrentExceptionMessage(true) << ". Terminating...");
-            throw;
+            LOG_ERROR(log, "Unexpected error: " << getCurrentExceptionMessage(true) << ". Terminating.");
+            return;
         }
     }
 }

From b1bc0a9e99801416d03cebd422e25712ac6e2f5b Mon Sep 17 00:00:00 2001
From: robot-metrika-test <robot-metrika-test@yandex-team.ru>
Date: Mon, 14 Aug 2017 09:49:06 +0300
Subject: [PATCH 173/281] Auto version update to [54273]

---
 dbms/cmake/version.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake
index 1ed93f2603e..6ba3f16d45a 100644
--- a/dbms/cmake/version.cmake
+++ b/dbms/cmake/version.cmake
@@ -1,6 +1,6 @@
 # This strings autochanged from release_lib.sh:
-set(VERSION_DESCRIBE v1.1.54272-testing)
-set(VERSION_REVISION 54272)
+set(VERSION_DESCRIBE v1.1.54273-testing)
+set(VERSION_REVISION 54273)
 # end of autochange
 
 set (VERSION_MAJOR 1)

From 33a50b4d65567345a7af8a6ce2dd6e303a535183 Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Mon, 14 Aug 2017 14:49:30 +0300
Subject: [PATCH 174/281] Fix false IOError messages in integration tests log.
 [#CLICKHOUSE-2]

---
 dbms/tests/integration/helpers/cluster.py                   | 6 +++---
 .../tests/integration/test_delayed_replica_failover/test.py | 4 ++++
 dbms/tests/integration/test_distributed_ddl/test.py         | 3 +--
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py
index 5e270f61a8f..9a64e2cb070 100644
--- a/dbms/tests/integration/helpers/cluster.py
+++ b/dbms/tests/integration/helpers/cluster.py
@@ -88,10 +88,10 @@ class ClickHouseCluster:
         if self.is_up:
             return
 
-        # Kill unstopped containers from previous launch
+        # Just in case kill unstopped containers from previous launch
         try:
-            subprocess.check_call(self.base_cmd + ['kill'])
-            subprocess.check_call(self.base_cmd + ['down', '--volumes'])
+            if not subprocess.call(['docker-compose', 'kill']):
+                subprocess.call(['docker-compose', 'down', '--volumes'])
         except:
             pass
 
diff --git a/dbms/tests/integration/test_delayed_replica_failover/test.py b/dbms/tests/integration/test_delayed_replica_failover/test.py
index 6e2942331e6..4d99c28d4ca 100644
--- a/dbms/tests/integration/test_delayed_replica_failover/test.py
+++ b/dbms/tests/integration/test_delayed_replica_failover/test.py
@@ -1,5 +1,9 @@
 import pytest
 import time
+import os, sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import helpers
 
 from helpers.cluster import ClickHouseCluster
 from helpers.network import PartitionManager
diff --git a/dbms/tests/integration/test_distributed_ddl/test.py b/dbms/tests/integration/test_distributed_ddl/test.py
index b5e050d00d8..32363debfa6 100644
--- a/dbms/tests/integration/test_distributed_ddl/test.py
+++ b/dbms/tests/integration/test_distributed_ddl/test.py
@@ -118,8 +118,7 @@ def started_cluster():
     finally:
         # Remove iptables rules for sacrifice instance
         cluster.pm_random_drops.heal_all()
-
-        #cluster.shutdown()
+        cluster.shutdown()
 
 
 def test_default_database(started_cluster):

From 05d3f338ca8e49833b0f74711ca0d3f77a4400db Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Mon, 14 Aug 2017 15:03:43 +0300
Subject: [PATCH 175/281] Add useful .gitignore. [#CLICKHOUSE-2]

---
 dbms/tests/integration/test_dictionaries/configs/.gitignore | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 dbms/tests/integration/test_dictionaries/configs/.gitignore

diff --git a/dbms/tests/integration/test_dictionaries/configs/.gitignore b/dbms/tests/integration/test_dictionaries/configs/.gitignore
new file mode 100644
index 00000000000..e94ff4a69e5
--- /dev/null
+++ b/dbms/tests/integration/test_dictionaries/configs/.gitignore
@@ -0,0 +1 @@
+dictionaries

From 1f2c4fc75438f986caac710cd4c40951edcca047 Mon Sep 17 00:00:00 2001
From: proller <proller@github.com>
Date: Mon, 14 Aug 2017 16:36:46 +0300
Subject: [PATCH 176/281] Cmake: Don't detect and use -no-pie flag with clang

---
 cmake/test_compiler.cmake | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/cmake/test_compiler.cmake b/cmake/test_compiler.cmake
index 03f65a58065..ab27f884046 100644
--- a/cmake/test_compiler.cmake
+++ b/cmake/test_compiler.cmake
@@ -1,21 +1,26 @@
 include (CheckCXXSourceCompiles)
 include (CMakePushCheckState)
 
-cmake_push_check_state ()
+if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+# clang4 : -no-pie cause error
+# clang6 : -no-pie cause warning
+else ()
 
-set (TEST_FLAG "-no-pie")
-set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG}")
+    cmake_push_check_state ()
 
-check_cxx_source_compiles("
-    int main() {
-        return 0;
-    }
-" HAVE_NO_PIE)
+    set (TEST_FLAG "-no-pie")
+    set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG}")
 
-set (CMAKE_REQUIRED_FLAGS "")
+    check_cxx_source_compiles("
+        int main() {
+            return 0;
+        }
+        " HAVE_NO_PIE)
+
+    if (HAVE_NO_PIE)
+        set (FLAG_NO_PIE ${TEST_FLAG})
+    endif ()
+
+    cmake_pop_check_state ()
 
-if (HAVE_NO_PIE)
-    set (FLAG_NO_PIE ${TEST_FLAG})
 endif ()
-
-cmake_pop_check_state ()

From c305b778710b58bebd3b173c72886fed75c149f1 Mon Sep 17 00:00:00 2001
From: robot-metrika-test <robot-metrika-test@yandex-team.ru>
Date: Mon, 14 Aug 2017 19:07:55 +0300
Subject: [PATCH 177/281] Auto version update to [54274]

---
 dbms/cmake/version.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake
index 6ba3f16d45a..cf77eea6cbb 100644
--- a/dbms/cmake/version.cmake
+++ b/dbms/cmake/version.cmake
@@ -1,6 +1,6 @@
 # This strings autochanged from release_lib.sh:
-set(VERSION_DESCRIBE v1.1.54273-testing)
-set(VERSION_REVISION 54273)
+set(VERSION_DESCRIBE v1.1.54274-testing)
+set(VERSION_REVISION 54274)
 # end of autochange
 
 set (VERSION_MAJOR 1)

From 8a14cc49ff908fffbb6e092ecd5ad0abde7b7612 Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Tue, 15 Aug 2017 16:00:08 +0300
Subject: [PATCH 178/281] asyncMulti() -> multi(). [#CLICKHOUSE-3221]

---
 .../ReplicatedMergeTreeCleanupThread.cpp      | 26 +++----------------
 .../Storages/StorageReplicatedMergeTree.cpp   | 14 +---------
 2 files changed, 4 insertions(+), 36 deletions(-)

diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp
index a988cf80ac7..11638044935 100644
--- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp
+++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp
@@ -170,7 +170,6 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks()
     auto first_outdated_block = std::min(first_outdated_block_fixed_threshold, first_outdated_block_time_threshold);
 
     /// TODO After about half a year, we could remain only multi op, because there will be no obsolete children nodes.
-    std::vector<zkutil::ZooKeeper::MultiFuture> multi_futures;
     zkutil::Ops ops;
     for (auto it = first_outdated_block; it != timed_blocks.end(); ++it)
     {
@@ -181,7 +180,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks()
             ops.emplace_back(new zkutil::Op::Remove(path, -1));
             if (ops.size() >= zkutil::MULTI_BATCH_SIZE)
             {
-                multi_futures.emplace_back(zookeeper->tryAsyncMulti(ops));
+                zookeeper->multi(ops);
                 ops.clear();
             }
         }
@@ -191,31 +190,12 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks()
 
     if (!ops.empty())
     {
-        multi_futures.emplace_back(zookeeper->tryAsyncMulti(ops));
+        zookeeper->multi(ops);
         ops.clear();
     }
 
     auto num_nodes_to_delete = timed_blocks.end() - first_outdated_block;
-    size_t num_nodes_not_deleted = 0;
-    int last_error_code = ZOK;
-
-    for (auto & future : multi_futures)
-    {
-        auto res = future.get();
-        if (res.code != ZOK)
-        {
-            num_nodes_not_deleted += res.results->size();
-            last_error_code = res.code;
-        }
-    }
-
-    if (num_nodes_not_deleted)
-    {
-        LOG_ERROR(log, "There was a problem with deleting " << num_nodes_not_deleted << " (of " << num_nodes_to_delete << ")"
-                       << " old blocks from ZooKeeper, error: " << zkutil::ZooKeeper::error2string(last_error_code));
-    }
-    else
-        LOG_TRACE(log, "Cleared " << num_nodes_to_delete << " old blocks from ZooKeeper");
+    LOG_TRACE(log, "Cleared " << num_nodes_to_delete << " old blocks from ZooKeeper");
 }
 
 
diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
index ad6f281501b..410bb309731 100644
--- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
@@ -3827,7 +3827,6 @@ void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZK(Logger * log_)
 void StorageReplicatedMergeTree::removePartsFromZooKeeper(zkutil::ZooKeeperPtr & zookeeper, const Strings & part_names)
 {
     zkutil::Ops ops;
-    std::vector<zkutil::ZooKeeper::MultiFuture> futures;
 
     for (auto it = part_names.cbegin(); it != part_names.cend(); ++it)
     {
@@ -3835,21 +3834,10 @@ void StorageReplicatedMergeTree::removePartsFromZooKeeper(zkutil::ZooKeeperPtr &
 
         if (ops.size() >= zkutil::MULTI_BATCH_SIZE || next(it) == part_names.cend())
         {
-            futures.emplace_back(zookeeper->tryAsyncMulti(ops));
+            zookeeper->tryMulti(ops);
             ops.clear();
         }
     }
-
-    int last_error_code = ZOK;
-    for (auto & future : futures)
-    {
-        auto res = future.get();
-        if (res.code != ZOK)
-            last_error_code = res.code;
-    }
-
-    if (last_error_code != ZOK)
-        throw zkutil::KeeperException(last_error_code);
 }
 
 

From 335078ae90d12108d0d49f5a1285c00f7d8ea8ab Mon Sep 17 00:00:00 2001
From: robot-metrika-test <robot-metrika-test@yandex-team.ru>
Date: Tue, 15 Aug 2017 16:04:00 +0300
Subject: [PATCH 179/281] Auto version update to [54275]

---
 dbms/cmake/version.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake
index cf77eea6cbb..4c3c9c34107 100644
--- a/dbms/cmake/version.cmake
+++ b/dbms/cmake/version.cmake
@@ -1,6 +1,6 @@
 # This strings autochanged from release_lib.sh:
-set(VERSION_DESCRIBE v1.1.54274-testing)
-set(VERSION_REVISION 54274)
+set(VERSION_DESCRIBE v1.1.54275-testing)
+set(VERSION_REVISION 54275)
 # end of autochange
 
 set (VERSION_MAJOR 1)

From f25f0cd7598e3af14051123c54f8e8fed6b8daab Mon Sep 17 00:00:00 2001
From: Alexey Zatelepin <ztlpn@yandex-team.ru>
Date: Mon, 14 Aug 2017 21:16:11 +0300
Subject: [PATCH 180/281] reduce number of places where partitioning by month
 is assumed [#CLICKHOUSE-3000]

---
 dbms/src/Interpreters/AsynchronousMetrics.cpp |   4 +-
 .../Storages/MergeTree/ActiveDataPartSet.cpp  | 108 +-----------
 .../Storages/MergeTree/ActiveDataPartSet.h    |  39 +----
 .../Storages/MergeTree/DataPartsExchange.cpp  |   3 +-
 dbms/src/Storages/MergeTree/MergeTreeData.cpp | 114 +++++--------
 dbms/src/Storages/MergeTree/MergeTreeData.h   |  15 +-
 .../MergeTree/MergeTreeDataMerger.cpp         |  85 +++++-----
 .../Storages/MergeTree/MergeTreeDataMerger.h  |   4 +-
 .../Storages/MergeTree/MergeTreeDataPart.h    |  12 +-
 .../MergeTree/MergeTreeDataSelectExecutor.cpp |   6 +-
 .../MergeTree/MergeTreeDataWriter.cpp         |  14 +-
 .../Storages/MergeTree/MergeTreeDataWriter.h  |   2 +-
 .../Storages/MergeTree/MergeTreePartInfo.cpp  | 119 ++++++++++++++
 .../Storages/MergeTree/MergeTreePartInfo.h    |  44 +++++
 .../MergeTree/MergedBlockOutputStream.h       |   2 +-
 .../ReplicatedMergeTreeBlockOutputStream.cpp  |  19 +--
 .../MergeTree/ReplicatedMergeTreeLogEntry.h   |   2 +-
 .../ReplicatedMergeTreePartCheckThread.cpp    |  25 ++-
 .../MergeTree/ReplicatedMergeTreeQueue.cpp    |  18 +-
 .../Storages/MergeTree/ReshardingWorker.cpp   |   6 +-
 .../MergeTree/ShardedPartitionUploader.cpp    |   3 +-
 dbms/src/Storages/StorageMergeTree.cpp        |  22 +--
 .../Storages/StorageReplicatedMergeTree.cpp   | 155 +++++++++---------
 .../src/Storages/StorageReplicatedMergeTree.h |   8 +-
 .../Storages/System/StorageSystemParts.cpp    |  15 +-
 dbms/src/Storages/tests/part_name.cpp         |   4 +-
 26 files changed, 424 insertions(+), 424 deletions(-)
 create mode 100644 dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp
 create mode 100644 dbms/src/Storages/MergeTree/MergeTreePartInfo.h

diff --git a/dbms/src/Interpreters/AsynchronousMetrics.cpp b/dbms/src/Interpreters/AsynchronousMetrics.cpp
index bd54a465d3f..12a9c6dc8ca 100644
--- a/dbms/src/Interpreters/AsynchronousMetrics.cpp
+++ b/dbms/src/Interpreters/AsynchronousMetrics.cpp
@@ -174,12 +174,12 @@ void AsynchronousMetrics::update()
                             "Cannot get replica delay for table: " + backQuoteIfNeed(db.first) + "." + backQuoteIfNeed(iterator->name()));
                     }
 
-                    calculateMax(max_part_count_for_partition, table_replicated_merge_tree->getData().getMaxPartsCountForMonth());
+                    calculateMax(max_part_count_for_partition, table_replicated_merge_tree->getData().getMaxPartsCountForPartition());
                 }
 
                 if (table_merge_tree)
                 {
-                    calculateMax(max_part_count_for_partition, table_merge_tree->getData().getMaxPartsCountForMonth());
+                    calculateMax(max_part_count_for_partition, table_merge_tree->getData().getMaxPartsCountForPartition());
                 }
             }
         }
diff --git a/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp b/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp
index 106fabba78b..2c8b7f225b7 100644
--- a/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp
+++ b/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp
@@ -1,18 +1,9 @@
 #include <Storages/MergeTree/ActiveDataPartSet.h>
-#include <IO/WriteHelpers.h>
-#include <IO/ReadHelpers.h>
-#include <IO/ReadBufferFromString.h>
 
 
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int BAD_DATA_PART_NAME;
-}
-
-
 ActiveDataPartSet::ActiveDataPartSet(const Strings & names)
 {
     for (const auto & name : names)
@@ -29,12 +20,12 @@ void ActiveDataPartSet::add(const String & name)
 
 void ActiveDataPartSet::addImpl(const String & name)
 {
-    if (getContainingPartImpl(name) != "")
+    if (!getContainingPartImpl(name).empty())
         return;
 
     Part part;
     part.name = name;
-    parsePartName(name, part);
+    part.info = MergeTreePartInfo::fromPartName(name);
 
     /// Parts contained in `part` are located contiguously inside `data_parts`, overlapping with the place where the part itself would be inserted.
     Parts::iterator it = parts.lower_bound(part);
@@ -71,7 +62,7 @@ String ActiveDataPartSet::getContainingPart(const String & part_name) const
 String ActiveDataPartSet::getContainingPartImpl(const String & part_name) const
 {
     Part part;
-    parsePartName(part_name, part);
+    part.info = MergeTreePartInfo::fromPartName(part_name);
 
     /// A part can only be covered/overlapped by the previous or next one in `parts`.
     Parts::iterator it = parts.lower_bound(part);
@@ -91,7 +82,7 @@ String ActiveDataPartSet::getContainingPartImpl(const String & part_name) const
             return it->name;
     }
 
-    return "";
+    return String();
 }
 
 
@@ -115,95 +106,4 @@ size_t ActiveDataPartSet::size() const
 }
 
 
-String ActiveDataPartSet::getPartName(DayNum_t left_date, DayNum_t right_date, Int64 left_id, Int64 right_id, UInt64 level)
-{
-    const auto & date_lut = DateLUT::instance();
-
-    /// Directory name for the part has form: `YYYYMMDD_YYYYMMDD_N_N_L`.
-
-    unsigned left_date_id = date_lut.toNumYYYYMMDD(left_date);
-    unsigned right_date_id = date_lut.toNumYYYYMMDD(right_date);
-
-    WriteBufferFromOwnString wb;
-
-    writeIntText(left_date_id, wb);
-    writeChar('_', wb);
-    writeIntText(right_date_id, wb);
-    writeChar('_', wb);
-    writeIntText(left_id, wb);
-    writeChar('_', wb);
-    writeIntText(right_id, wb);
-    writeChar('_', wb);
-    writeIntText(level, wb);
-
-    return wb.str();
-}
-
-
-bool ActiveDataPartSet::isPartDirectory(const String & dir_name)
-{
-    return parsePartNameImpl(dir_name, nullptr);
-}
-
-bool ActiveDataPartSet::parsePartNameImpl(const String & dir_name, Part * part)
-{
-    UInt32 min_yyyymmdd = 0;
-    UInt32 max_yyyymmdd = 0;
-    Int64 min_block_num = 0;
-    Int64 max_block_num = 0;
-    UInt32 level = 0;
-
-    ReadBufferFromString in(dir_name);
-
-    if (!tryReadIntText(min_yyyymmdd, in)
-        || !checkChar('_', in)
-        || !tryReadIntText(max_yyyymmdd, in)
-        || !checkChar('_', in)
-        || !tryReadIntText(min_block_num, in)
-        || !checkChar('_', in)
-        || !tryReadIntText(max_block_num, in)
-        || !checkChar('_', in)
-        || !tryReadIntText(level, in)
-        || !in.eof())
-    {
-        return false;
-    }
-
-    if (part)
-    {
-        const auto & date_lut = DateLUT::instance();
-
-        part->left_date = date_lut.YYYYMMDDToDayNum(min_yyyymmdd);
-        part->right_date = date_lut.YYYYMMDDToDayNum(max_yyyymmdd);
-        part->left = min_block_num;
-        part->right = max_block_num;
-        part->level = level;
-
-        DayNum_t left_month = date_lut.toFirstDayNumOfMonth(part->left_date);
-        DayNum_t right_month = date_lut.toFirstDayNumOfMonth(part->right_date);
-
-        if (left_month != right_month)
-            throw Exception("Part name " + dir_name + " contains different months", ErrorCodes::BAD_DATA_PART_NAME);
-
-        part->month = left_month;
-    }
-
-    return true;
-}
-
-void ActiveDataPartSet::parsePartName(const String & dir_name, Part & part)
-{
-    if (!parsePartNameImpl(dir_name, &part))
-        throw Exception("Unexpected part name: " + dir_name, ErrorCodes::BAD_DATA_PART_NAME);
-}
-
-bool ActiveDataPartSet::contains(const String & outer_part_name, const String & inner_part_name)
-{
-    Part outer;
-    Part inner;
-    parsePartName(outer_part_name, outer);
-    parsePartName(inner_part_name, inner);
-    return outer.contains(inner);
-}
-
 }
diff --git a/dbms/src/Storages/MergeTree/ActiveDataPartSet.h b/dbms/src/Storages/MergeTree/ActiveDataPartSet.h
index c01f197f097..34a1ffd73b5 100644
--- a/dbms/src/Storages/MergeTree/ActiveDataPartSet.h
+++ b/dbms/src/Storages/MergeTree/ActiveDataPartSet.h
@@ -1,4 +1,6 @@
 #pragma once
+
+#include <Storages/MergeTree/MergeTreePartInfo.h>
 #include <mutex>
 #include <common/DateLUT.h>
 #include <Core/Types.h>
@@ -21,27 +23,12 @@ public:
 
     struct Part
     {
-        DayNum_t left_date;
-        DayNum_t right_date;
-        Int64 left;
-        Int64 right;
-        UInt32 level;
-        String name; /// pure name without prefixes
-        DayNum_t month;
+        String name;
+        MergeTreePartInfo info;
 
-        bool operator<(const Part & rhs) const
-        {
-            return std::tie(month, left, right, level) < std::tie(rhs.month, rhs.left, rhs.right, rhs.level);
-        }
+        bool operator<(const Part & rhs) const { return info < rhs.info; }
 
-        /// Contains another part (obtained after merging another part with some other)
-        bool contains(const Part & rhs) const
-        {
-            return month == rhs.month        /// Parts for different months are not merged
-                && left <= rhs.left
-                && right >= rhs.right
-                && level >= rhs.level;
-        }
+        bool contains(const Part & rhs) const { return info.contains(rhs.info); }
     };
 
     void add(const String & name);
@@ -49,22 +36,10 @@ public:
     /// If not found, returns an empty string.
     String getContainingPart(const String & name) const;
 
-    Strings getParts() const; /// In ascending order of the month and block number.
+    Strings getParts() const; /// In ascending order of the partition_id and block number.
 
     size_t size() const;
 
-    static String getPartName(DayNum_t left_date, DayNum_t right_date, Int64 left_id, Int64 right_id, UInt64 level);
-
-    /// Returns true if the directory name matches the format of the directory name of the parts
-    static bool isPartDirectory(const String & dir_name);
-
-    static bool parsePartNameImpl(const String & dir_name, Part * part);
-
-    /// Put data in DataPart from the name of the part.
-    static void parsePartName(const String & dir_name, Part & part);
-
-    static bool contains(const String & outer_part_name, const String & inner_part_name);
-
 private:
     using Parts = std::set<Part>;
 
diff --git a/dbms/src/Storages/MergeTree/DataPartsExchange.cpp b/dbms/src/Storages/MergeTree/DataPartsExchange.cpp
index 2977d838c6b..b0d98f44ae9 100644
--- a/dbms/src/Storages/MergeTree/DataPartsExchange.cpp
+++ b/dbms/src/Storages/MergeTree/DataPartsExchange.cpp
@@ -265,7 +265,8 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPartImpl(
 
     assertEOF(in);
 
-    ActiveDataPartSet::parsePartName(part_name, *new_data_part);
+    new_data_part->info = MergeTreePartInfo::fromPartName(part_name);
+    MergeTreePartInfo::parseMinMaxDatesFromPartName(part_name, new_data_part->min_date, new_data_part->max_date);
     new_data_part->modification_time = time(nullptr);
     new_data_part->loadColumns(true);
     new_data_part->loadChecksums(true);
diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
index 6efe3336dbe..86c2f7a607e 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
@@ -280,11 +280,11 @@ Int64 MergeTreeData::getMaxDataPartIndex()
 {
     std::lock_guard<std::mutex> lock_all(all_data_parts_mutex);
 
-    Int64 max_part_id = 0;
+    Int64 max_block_id = 0;
     for (const auto & part : all_data_parts)
-        max_part_id = std::max(max_part_id, part->right);
+        max_block_id = std::max(max_block_id, part->info.max_block);
 
-    return max_part_id;
+    return max_block_id;
 }
 
 
@@ -316,9 +316,10 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
     for (const String & file_name : part_file_names)
     {
         MutableDataPartPtr part = std::make_shared<DataPart>(*this);
-        if (!ActiveDataPartSet::parsePartNameImpl(file_name, part.get()))
+        if (!MergeTreePartInfo::tryParsePartName(file_name, &part->info))
             continue;
 
+        MergeTreePartInfo::parseMinMaxDatesFromPartName(file_name, part->min_date, part->max_date);
         part->name = file_name;
         part->relative_path = file_name;
         bool broken = false;
@@ -350,7 +351,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
         /// Ignore and possibly delete broken parts that can appear as a result of hard server restart.
         if (broken)
         {
-            if (part->level == 0)
+            if (part->info.level == 0)
             {
                 /// It is impossible to restore level 0 parts.
                 LOG_ERROR(log, "Considering to remove broken part " << full_path + file_name << " because it's impossible to repair.");
@@ -370,11 +371,11 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
                     if (contained_name == file_name)
                         continue;
 
-                    DataPart contained_part(*this);
-                    if (!ActiveDataPartSet::parsePartNameImpl(contained_name, &contained_part))
+                    MergeTreePartInfo contained_part_info;
+                    if (!MergeTreePartInfo::tryParsePartName(contained_name, &contained_part_info))
                         continue;
 
-                    if (part->contains(contained_part))
+                    if (part->info.contains(contained_part_info))
                     {
                         LOG_ERROR(log, "Found part " << full_path + contained_name);
                         ++contained_parts;
@@ -425,8 +426,8 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
         ++curr_jt;
         while (curr_jt != data_parts.end())
         {
-            /// Don't consider data parts belonging to different months.
-            if ((*curr_jt)->month != (*prev_jt)->month)
+            /// Don't consider data parts belonging to different partitions.
+            if ((*curr_jt)->info.partition_id != (*prev_jt)->info.partition_id)
             {
                 ++prev_jt;
                 ++curr_jt;
@@ -1194,10 +1195,10 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace(
           * Otherwise there is race condition - merge of blocks could happen in interval that doesn't yet contain new part.
           */
         if (increment)
-            part->left = part->right = increment->get();
+            part->info.min_block = part->info.max_block = increment->get();
 
         String old_name = part->name;
-        String new_name = ActiveDataPartSet::getPartName(part->left_date, part->right_date, part->left, part->right, part->level);
+        String new_name = MergeTreePartInfo::getPartName(part->min_date, part->max_date, part->info.min_block, part->info.max_block, part->info.level);
 
         LOG_TRACE(log, "Renaming temporary part " << part->relative_path << " to " << new_name << ".");
 
@@ -1343,18 +1344,18 @@ void MergeTreeData::renameAndDetachPart(const DataPartPtr & part, const String &
         Strings restored;
         bool error = false;
 
-        Int64 pos = part->left;
+        Int64 pos = part->info.min_block;
 
         if (it != all_data_parts.begin())
         {
             --it;
             if (part->contains(**it))
             {
-                if ((*it)->left != part->left)
+                if ((*it)->info.min_block != part->info.min_block)
                     error = true;
                 data_parts.insert(*it);
                 addPartContributionToColumnSizes(*it);
-                pos = (*it)->right + 1;
+                pos = (*it)->info.max_block + 1;
                 restored.push_back((*it)->name);
             }
             else
@@ -1366,17 +1367,17 @@ void MergeTreeData::renameAndDetachPart(const DataPartPtr & part, const String &
 
         for (; it != all_data_parts.end() && part->contains(**it); ++it)
         {
-            if ((*it)->left < pos)
+            if ((*it)->info.min_block < pos)
                 continue;
-            if ((*it)->left > pos)
+            if ((*it)->info.min_block > pos)
                 error = true;
             data_parts.insert(*it);
             addPartContributionToColumnSizes(*it);
-            pos = (*it)->right + 1;
+            pos = (*it)->info.max_block + 1;
             restored.push_back((*it)->name);
         }
 
-        if (pos != part->right + 1)
+        if (pos != part->info.max_block + 1)
             error = true;
 
         for (const String & name : restored)
@@ -1423,23 +1424,23 @@ MergeTreeData::DataParts MergeTreeData::getAllDataParts() const
     return all_data_parts;
 }
 
-size_t MergeTreeData::getMaxPartsCountForMonth() const
+size_t MergeTreeData::getMaxPartsCountForPartition() const
 {
     std::lock_guard<std::mutex> lock(data_parts_mutex);
 
     size_t res = 0;
     size_t cur_count = 0;
-    DayNum_t cur_month = DayNum_t(0);
+    const String * cur_partition_id = nullptr;
 
     for (const auto & part : data_parts)
     {
-        if (part->month == cur_month)
+        if (cur_partition_id && part->info.partition_id == *cur_partition_id)
         {
             ++cur_count;
         }
         else
         {
-            cur_month = part->month;
+            cur_partition_id = &part->info.partition_id;
             cur_count = 1;
         }
 
@@ -1452,7 +1453,7 @@ size_t MergeTreeData::getMaxPartsCountForMonth() const
 
 void MergeTreeData::delayInsertIfNeeded(Poco::Event * until)
 {
-    const size_t parts_count = getMaxPartsCountForMonth();
+    const size_t parts_count = getMaxPartsCountForPartition();
     if (parts_count < settings.parts_to_delay_insert)
         return;
 
@@ -1483,7 +1484,7 @@ void MergeTreeData::delayInsertIfNeeded(Poco::Event * until)
 MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(const String & part_name)
 {
     MutableDataPartPtr tmp_part(new DataPart(*this));
-    ActiveDataPartSet::parsePartName(part_name, *tmp_part);
+    tmp_part->info = MergeTreePartInfo::fromPartName(part_name);
 
     std::lock_guard<std::mutex> lock(data_parts_mutex);
 
@@ -1511,7 +1512,7 @@ MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(const String &
 MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_name)
 {
     MutableDataPartPtr tmp_part(new DataPart(*this));
-    ActiveDataPartSet::parsePartName(part_name, *tmp_part);
+    tmp_part->info = MergeTreePartInfo::fromPartName(part_name);
 
     std::lock_guard<std::mutex> lock(all_data_parts_mutex);
     auto it = all_data_parts.lower_bound(tmp_part);
@@ -1537,7 +1538,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::loadPartAndFixMetadata(const St
 
     part->relative_path = relative_path;
     part->name = Poco::Path(relative_path).getFileName();
-    ActiveDataPartSet::parsePartName(part->name, *part);
+    part->info = MergeTreePartInfo::fromPartName(part->name);
+    MergeTreePartInfo::parseMinMaxDatesFromPartName(part->name, part->min_date, part->max_date);
     String full_part_path = part->getFullPath();
 
     /// Earlier the list of columns was written incorrectly. Delete it and re-create.
@@ -1676,7 +1678,7 @@ void MergeTreeData::freezePartition(const std::string & prefix, const String & w
     LOG_DEBUG(log, "Freezed " << parts_processed << " parts");
 }
 
-size_t MergeTreeData::getPartitionSize(const std::string & partition_name) const
+size_t MergeTreeData::getPartitionSize(const std::string & partition_id) const
 {
     size_t size = 0;
 
@@ -1685,10 +1687,10 @@ size_t MergeTreeData::getPartitionSize(const std::string & partition_name) const
 
     for (Poco::DirectoryIterator it(full_path); it != end; ++it)
     {
-        const auto filename = it.name();
-        if (!ActiveDataPartSet::isPartDirectory(filename))
+        MergeTreePartInfo part_info;
+        if (!MergeTreePartInfo::tryParsePartName(it.name(), &part_info))
             continue;
-        if (!startsWith(filename, partition_name))
+        if (part_info.partition_id != partition_id)
             continue;
 
         const auto part_path = it.path().absolute().toString();
@@ -1702,57 +1704,17 @@ size_t MergeTreeData::getPartitionSize(const std::string & partition_name) const
     return size;
 }
 
-static std::pair<String, DayNum_t> getMonthNameAndDayNum(const Field & partition)
+String MergeTreeData::getPartitionID(const Field & partition)
 {
-    String month_name = partition.getType() == Field::Types::UInt64
+    String partition_id = partition.getType() == Field::Types::UInt64
         ? toString(partition.get<UInt64>())
         : partition.safeGet<String>();
 
-    if (month_name.size() != 6 || !std::all_of(month_name.begin(), month_name.end(), isNumericASCII))
-        throw Exception("Invalid partition format: " + month_name + ". Partition should consist of 6 digits: YYYYMM",
+    if (partition_id.size() != 6 || !std::all_of(partition_id.begin(), partition_id.end(), isNumericASCII))
+        throw Exception("Invalid partition format: " + partition_id + ". Partition should consist of 6 digits: YYYYMM",
             ErrorCodes::INVALID_PARTITION_NAME);
 
-    DayNum_t date = DateLUT::instance().YYYYMMDDToDayNum(parse<UInt32>(month_name + "01"));
-
-    /// Can't just compare date with 0, because 0 is a valid DayNum too.
-    if (month_name != toString(DateLUT::instance().toNumYYYYMMDD(date) / 100))
-        throw Exception("Invalid partition format: " + month_name + " doesn't look like month.",
-            ErrorCodes::INVALID_PARTITION_NAME);
-
-    return std::make_pair(month_name, date);
-}
-
-
-String MergeTreeData::getMonthName(const Field & partition)
-{
-    return getMonthNameAndDayNum(partition).first;
-}
-
-String MergeTreeData::getMonthName(DayNum_t month)
-{
-    return toString(DateLUT::instance().toNumYYYYMMDD(month) / 100);
-}
-
-DayNum_t MergeTreeData::getMonthDayNum(const Field & partition)
-{
-    return getMonthNameAndDayNum(partition).second;
-}
-
-DayNum_t MergeTreeData::getMonthFromName(const String & month_name)
-{
-    DayNum_t date = DateLUT::instance().YYYYMMDDToDayNum(parse<UInt32>(month_name + "01"));
-
-    /// Can't just compare date with 0, because 0 is a valid DayNum too.
-    if (month_name != toString(DateLUT::instance().toNumYYYYMMDD(date) / 100))
-        throw Exception("Invalid partition format: " + month_name + " doesn't look like month.",
-            ErrorCodes::INVALID_PARTITION_NAME);
-
-    return date;
-}
-
-DayNum_t MergeTreeData::getMonthFromPartPrefix(const String & part_prefix)
-{
-    return getMonthFromName(part_prefix.substr(0, strlen("YYYYMM")));
+    return partition_id;
 }
 
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h
index a8135ba9440..f2372b17d09 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeData.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeData.h
@@ -5,7 +5,7 @@
 #include <Interpreters/Context.h>
 #include <Interpreters/ExpressionActions.h>
 #include <Storages/IStorage.h>
-#include <Storages/MergeTree/ActiveDataPartSet.h>
+#include <Storages/MergeTree/MergeTreePartInfo.h>
 #include <Storages/MergeTree/MergeTreeSettings.h>
 #include <IO/ReadBufferFromString.h>
 #include <IO/WriteBufferFromFile.h>
@@ -90,7 +90,7 @@ public:
     using MutableDataPartPtr = std::shared_ptr<DataPart>;
     /// After the DataPart is added to the working set, it cannot be changed.
     using DataPartPtr = std::shared_ptr<const DataPart>;
-    struct DataPartPtrLess { bool operator() (const DataPartPtr & lhs, const DataPartPtr & rhs) const { return *lhs < *rhs; } };
+    struct DataPartPtrLess { bool operator() (const DataPartPtr & lhs, const DataPartPtr & rhs) const { return lhs->info < rhs->info; } };
     using DataParts = std::set<DataPartPtr, DataPartPtrLess>;
     using DataPartsVector = std::vector<DataPartPtr>;
 
@@ -294,7 +294,7 @@ public:
     /// Total size of active parts in bytes.
     size_t getTotalActiveSizeInBytes() const;
 
-    size_t getMaxPartsCountForMonth() const;
+    size_t getMaxPartsCountForPartition() const;
 
     /// If the table contains too many active parts, sleep for a while to give them time to merge.
     /// If until is non-null, wake up from the sleep earlier if the event happened.
@@ -404,7 +404,7 @@ public:
     void freezePartition(const std::string & prefix, const String & with_name);
 
     /// Returns the size of partition in bytes.
-    size_t getPartitionSize(const std::string & partition_name) const;
+    size_t getPartitionSize(const std::string & partition_id) const;
 
     struct ColumnSize
     {
@@ -452,12 +452,7 @@ public:
     }
 
     /// For ATTACH/DETACH/DROP/RESHARD PARTITION.
-    static String getMonthName(const Field & partition);
-    static String getMonthName(DayNum_t month);
-    static DayNum_t getMonthDayNum(const Field & partition);
-    static DayNum_t getMonthFromName(const String & month_name);
-    /// Get month from the part name or a sufficient prefix.
-    static DayNum_t getMonthFromPartPrefix(const String & part_prefix);
+    static String getPartitionID(const Field & partition);
 
     Context & context;
     const String date_column_name;
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp
index aeff72dbab7..063c0802551 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp
@@ -69,12 +69,12 @@ std::string createMergedPartName(const MergeTreeData::DataPartsVector & parts)
 
     for (const MergeTreeData::DataPartPtr & part : parts)
     {
-        level = std::max(level, part->level);
-        left_date = std::min(left_date, part->left_date);
-        right_date = std::max(right_date, part->right_date);
+        level = std::max(level, part->info.level);
+        left_date = std::min(left_date, part->min_date);
+        right_date = std::max(right_date, part->max_date);
     }
 
-    return ActiveDataPartSet::getPartName(left_date, right_date, parts.front()->left, parts.back()->right, level + 1);
+    return MergeTreePartInfo::getPartName(left_date, right_date, parts.front()->info.min_block, parts.back()->info.max_block, level + 1);
 }
 
 }
@@ -146,28 +146,29 @@ bool MergeTreeDataMerger::selectPartsToMerge(
 
     IMergeSelector::Partitions partitions;
 
-    DayNum_t prev_month = DayNum_t(-1);
+    const String * prev_partition_id = nullptr;
     const MergeTreeData::DataPartPtr * prev_part = nullptr;
     for (const MergeTreeData::DataPartPtr & part : data_parts)
     {
-        DayNum_t month = part->month;
-        if (month != prev_month || (prev_part && !can_merge_callback(*prev_part, part)))
+        const String & partition_id = part->info.partition_id;
+        if (!prev_partition_id || partition_id != *prev_partition_id || (prev_part && !can_merge_callback(*prev_part, part)))
         {
             if (partitions.empty() || !partitions.back().empty())
                 partitions.emplace_back();
-            prev_month = month;
+            prev_partition_id = &partition_id;
         }
 
         IMergeSelector::Part part_info;
         part_info.size = part->size_in_bytes;
         part_info.age = current_time - part->modification_time;
-        part_info.level = part->level;
+        part_info.level = part->info.level;
         part_info.data = &part;
 
         partitions.back().emplace_back(part_info);
 
-        /// Check for consistenty of data parts. If assertion is failed, it requires immediate investigation.
-        if (prev_part && part->month == (*prev_part)->month && part->left < (*prev_part)->right)
+        /// Check for consistency of data parts. If assertion is failed, it requires immediate investigation.
+        if (prev_part && part->info.partition_id == (*prev_part)->info.partition_id
+            && part->info.min_block < (*prev_part)->info.max_block)
         {
             LOG_ERROR(log, "Part " << part->name << " intersects previous part " << (*prev_part)->name);
         }
@@ -206,13 +207,13 @@ bool MergeTreeDataMerger::selectPartsToMerge(
 
         parts.push_back(part);
 
-        level = std::max(level, part->level);
-        left_date = std::min(left_date, part->left_date);
-        right_date = std::max(right_date, part->right_date);
+        level = std::max(level, part->info.level);
+        left_date = std::min(left_date, part->min_date);
+        right_date = std::max(right_date, part->max_date);
     }
 
-    merged_name = ActiveDataPartSet::getPartName(
-        left_date, right_date, parts.front()->left, parts.back()->right, level + 1);
+    merged_name = MergeTreePartInfo::getPartName(
+        left_date, right_date, parts.front()->info.min_block, parts.back()->info.max_block, level + 1);
 
     LOG_DEBUG(log, "Selected " << parts.size() << " parts from " << parts.front()->name << " to " << parts.back()->name);
     return true;
@@ -224,10 +225,10 @@ bool MergeTreeDataMerger::selectAllPartsToMergeWithinPartition(
     String & merged_name,
     size_t available_disk_space,
     const AllowedMergingPredicate & can_merge,
-    DayNum_t partition,
+    const String & partition_id,
     bool final)
 {
-    MergeTreeData::DataPartsVector parts = selectAllPartsFromPartition(partition);
+    MergeTreeData::DataPartsVector parts = selectAllPartsFromPartition(partition_id);
 
     if (parts.empty())
         return false;
@@ -249,9 +250,9 @@ bool MergeTreeDataMerger::selectAllPartsToMergeWithinPartition(
             && !can_merge(*prev_it, *it))
             return false;
 
-        level = std::max(level, (*it)->level);
-        left_date = std::min(left_date, (*it)->left_date);
-        right_date = std::max(right_date, (*it)->right_date);
+        level = std::max(level, (*it)->info.level);
+        left_date = std::min(left_date, (*it)->min_date);
+        right_date = std::max(right_date, (*it)->max_date);
 
         sum_bytes += (*it)->size_in_bytes;
 
@@ -279,15 +280,15 @@ bool MergeTreeDataMerger::selectAllPartsToMergeWithinPartition(
     }
 
     what = parts;
-    merged_name = ActiveDataPartSet::getPartName(
-        left_date, right_date, parts.front()->left, parts.back()->right, level + 1);
+    merged_name = MergeTreePartInfo::getPartName(
+        left_date, right_date, parts.front()->info.min_block, parts.back()->info.max_block, level + 1);
 
     LOG_DEBUG(log, "Selected " << parts.size() << " parts from " << parts.front()->name << " to " << parts.back()->name);
     return true;
 }
 
 
-MergeTreeData::DataPartsVector MergeTreeDataMerger::selectAllPartsFromPartition(DayNum_t partition)
+MergeTreeData::DataPartsVector MergeTreeDataMerger::selectAllPartsFromPartition(const String & partition_id)
 {
     MergeTreeData::DataPartsVector parts_from_partition;
 
@@ -296,11 +297,10 @@ MergeTreeData::DataPartsVector MergeTreeDataMerger::selectAllPartsFromPartition(
     for (MergeTreeData::DataParts::iterator it = data_parts.cbegin(); it != data_parts.cend(); ++it)
     {
         const MergeTreeData::DataPartPtr & current_part = *it;
-        DayNum_t month = current_part->month;
-        if (month != partition)
+        if (current_part->info.partition_id != partition_id)
             continue;
 
-        parts_from_partition.push_back(*it);
+        parts_from_partition.push_back(current_part);
     }
 
     return parts_from_partition;
@@ -516,7 +516,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMerger::mergePartsToTemporaryPart
         gathering_columns, gathering_column_names, merging_columns, merging_column_names);
 
     MergeTreeData::MutableDataPartPtr new_data_part = std::make_shared<MergeTreeData::DataPart>(data);
-    ActiveDataPartSet::parsePartName(merged_name, *new_data_part);
+    new_data_part->info = MergeTreePartInfo::fromPartName(merged_name);
+    MergeTreePartInfo::parseMinMaxDatesFromPartName(merged_name, new_data_part->min_date, new_data_part->max_date);
     new_data_part->name = merged_name;
     new_data_part->relative_path = TMP_PREFIX + merged_name;
     new_data_part->is_temp = true;
@@ -855,8 +856,8 @@ MergeTreeData::PerShardDataParts MergeTreeDataMerger::reshardPartition(
     size_t aio_threshold = data.context.getSettings().min_bytes_to_use_direct_io;
 
     /// Assemble all parts of the partition.
-    DayNum_t month = MergeTreeData::getMonthFromName(job.partition);
-    MergeTreeData::DataPartsVector parts = selectAllPartsFromPartition(month);
+    String partition_id = MergeTreeData::getPartitionID(job.partition);
+    MergeTreeData::DataPartsVector parts = selectAllPartsFromPartition(partition_id);
 
     /// Create a temporary folder name.
     std::string merged_name = createMergedPartName(parts);
@@ -951,12 +952,12 @@ MergeTreeData::PerShardDataParts MergeTreeDataMerger::reshardPartition(
         data_part->name = merged_name;
         data_part->relative_path = "reshard/" + toString(shard_no) + "/tmp_" + merged_name;
         data_part->is_temp = true;
-        data_part->left_date = std::numeric_limits<UInt16>::max();
-        data_part->right_date = std::numeric_limits<UInt16>::min();
-        data_part->month = month;
-        data_part->left = temp_index;
-        data_part->right = temp_index;
-        data_part->level = 0;
+        data_part->min_date = std::numeric_limits<UInt16>::max();
+        data_part->max_date = std::numeric_limits<UInt16>::min();
+        data_part->info.partition_id = partition_id;
+        data_part->info.min_block = temp_index;
+        data_part->info.max_block = temp_index;
+        data_part->info.level = 0;
 
         String new_part_tmp_path = data_part->getFullPath();
         Poco::File(new_part_tmp_path).createDirectories();
@@ -1045,10 +1046,10 @@ MergeTreeData::PerShardDataParts MergeTreeDataMerger::reshardPartition(
             rows_written += block_with_dates.block.rows();
             output_stream->write(block_with_dates.block);
 
-            if (block_with_dates.min_date < data_part->left_date)
-                data_part->left_date = block_with_dates.min_date;
-            if (block_with_dates.max_date > data_part->right_date)
-                data_part->right_date = block_with_dates.max_date;
+            if (block_with_dates.min_date < data_part->min_date)
+                data_part->min_date = block_with_dates.min_date;
+            if (block_with_dates.max_date > data_part->max_date)
+                data_part->max_date = block_with_dates.max_date;
 
             merge_entry->rows_written = merged_stream->getProfileInfo().rows;
             merge_entry->bytes_written_uncompressed = merged_stream->getProfileInfo().bytes;
@@ -1092,8 +1093,8 @@ MergeTreeData::PerShardDataParts MergeTreeDataMerger::reshardPartition(
         size_t shard_no = entry.first;
         MergeTreeData::MutableDataPartPtr & part_from_shard = entry.second;
 
-        std::string new_name = ActiveDataPartSet::getPartName(part_from_shard->left_date,
-            part_from_shard->right_date, part_from_shard->left, part_from_shard->right, part_from_shard->level);
+        std::string new_name = MergeTreePartInfo::getPartName(part_from_shard->min_date,
+            part_from_shard->max_date, part_from_shard->info.min_block, part_from_shard->info.max_block, part_from_shard->info.level);
         std::string new_relative_path = "reshard/" + toString(shard_no) + "/" + new_name;
 
         part_from_shard->renameTo(new_relative_path);
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.h b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.h
index 90efd8dceb5..48d06988ba5 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.h
@@ -58,7 +58,7 @@ public:
         String & merged_name,
         size_t available_disk_space,
         const AllowedMergingPredicate & can_merge,
-        DayNum_t partition,
+        const String & partition_id,
         bool final);
 
     /** Merge the parts.
@@ -93,7 +93,7 @@ public:
 private:
     /** Select all parts belonging to the same partition.
       */
-    MergeTreeData::DataPartsVector selectAllPartsFromPartition(DayNum_t partition);
+    MergeTreeData::DataPartsVector selectAllPartsFromPartition(const String & partition_id);
 
     /** Temporarily cancel merges.
       */
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h
index fb64c4a701a..d7c30b9ad22 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h
@@ -2,7 +2,7 @@
 
 #include <Core/Field.h>
 #include <Core/NamesAndTypes.h>
-#include <Storages/MergeTree/ActiveDataPartSet.h>
+#include <Storages/MergeTree/MergeTreePartInfo.h>
 #include <Columns/IColumn.h>
 #include <shared_mutex>
 
@@ -83,7 +83,7 @@ class MergeTreeData;
 
 
 /// Description of the data part.
-struct MergeTreeDataPart : public ActiveDataPartSet::Part
+struct MergeTreeDataPart
 {
     using Checksums = MergeTreeDataPartChecksums;
     using Checksum = MergeTreeDataPartChecksums::Checksum;
@@ -110,9 +110,17 @@ struct MergeTreeDataPart : public ActiveDataPartSet::Part
     /// Returns part->name with prefixes like 'tmp_<name>'
     String getNameWithPrefix() const;
 
+    bool contains(const MergeTreeDataPart & other) const { return info.contains(other.info); }
+
 
     MergeTreeData & storage;
 
+    String name;
+    MergeTreePartInfo info;
+
+    DayNum_t min_date;
+    DayNum_t max_date;
+
     /// A directory path (realative to storage's path) where part data is actually stored
     /// Examples: 'detached/tmp_fetch_<name>', 'tmp_<name>', '<name>'
     mutable String relative_path;
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 95a56800c34..ee85110d30d 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -234,13 +234,13 @@ BlockInputStreams MergeTreeDataSelectExecutor::read(
             if (part_values.find(part->name) == part_values.end())
                 continue;
 
-            Field left = static_cast<UInt64>(part->left_date);
-            Field right = static_cast<UInt64>(part->right_date);
+            Field left = static_cast<UInt64>(part->min_date);
+            Field right = static_cast<UInt64>(part->max_date);
 
             if (!date_condition.mayBeTrueInRange(1, &left, &right, data_types_date))
                 continue;
 
-            if (max_block_number_to_read && part->right > max_block_number_to_read)
+            if (max_block_number_to_read && part->info.max_block > max_block_number_to_read)
                 continue;
 
             parts.push_back(part);
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index 0817e4a8e71..e8b2b723ab1 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -105,7 +105,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithDa
     /// This will generate unique name in scope of current server process.
     Int64 temp_index = data.insert_increment.get();
 
-    String part_name = ActiveDataPartSet::getPartName(DayNum_t(min_date), DayNum_t(max_date), temp_index, temp_index, 0);
+    String part_name = MergeTreePartInfo::getPartName(DayNum_t(min_date), DayNum_t(max_date), temp_index, temp_index, 0);
 
     MergeTreeData::MutableDataPartPtr new_data_part = std::make_shared<MergeTreeData::DataPart>(data);
     new_data_part->name = part_name;
@@ -157,14 +157,14 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithDa
     out.writeWithPermutation(block, perm_ptr);
     MergeTreeData::DataPart::Checksums checksums = out.writeSuffixAndGetChecksums();
 
-    new_data_part->left_date = DayNum_t(min_date);
-    new_data_part->right_date = DayNum_t(max_date);
-    new_data_part->left = temp_index;
-    new_data_part->right = temp_index;
-    new_data_part->level = 0;
+    new_data_part->info.partition_id = toString(date_lut.toNumYYYYMM(min_month));
+    new_data_part->info.min_block = temp_index;
+    new_data_part->info.max_block = temp_index;
+    new_data_part->info.level = 0;
+    new_data_part->min_date = DayNum_t(min_date);
+    new_data_part->max_date = DayNum_t(max_date);
     new_data_part->size = part_size;
     new_data_part->modification_time = time(nullptr);
-    new_data_part->month = min_month;
     new_data_part->columns = columns;
     new_data_part->checksums = checksums;
     new_data_part->index.swap(out.getIndex());
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.h b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.h
index c6c4a394d83..8f4466c9e86 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.h
@@ -44,7 +44,7 @@ public:
     MergeTreeDataWriter(MergeTreeData & data_) : data(data_), log(&Logger::get(data.getLogName() + " (Writer)")) {}
 
     /** Split the block to blocks, each of them must be written as separate part.
-      *  (split rows by months)
+      *  (split rows by partition)
       * Works deterministically: if same block was passed, function will return same result in same order.
       */
     BlocksWithDateIntervals splitBlockIntoParts(const Block & block);
diff --git a/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp b/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp
new file mode 100644
index 00000000000..02c9f7e90e4
--- /dev/null
+++ b/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp
@@ -0,0 +1,119 @@
+#include <Storages/MergeTree/MergeTreePartInfo.h>
+#include <IO/ReadBufferFromString.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_DATA_PART_NAME;
+}
+
+
+MergeTreePartInfo MergeTreePartInfo::fromPartName(const String & dir_name)
+{
+    MergeTreePartInfo part_info;
+    if (!tryParsePartName(dir_name, &part_info))
+        throw Exception("Unexpected part name: " + dir_name, ErrorCodes::BAD_DATA_PART_NAME);
+    return part_info;
+}
+
+bool MergeTreePartInfo::tryParsePartName(const String & dir_name, MergeTreePartInfo * part_info)
+{
+    UInt32 min_yyyymmdd = 0;
+    UInt32 max_yyyymmdd = 0;
+    Int64 min_block_num = 0;
+    Int64 max_block_num = 0;
+    UInt32 level = 0;
+
+    ReadBufferFromString in(dir_name);
+
+    if (!tryReadIntText(min_yyyymmdd, in)
+        || !checkChar('_', in)
+        || !tryReadIntText(max_yyyymmdd, in)
+        || !checkChar('_', in)
+        || !tryReadIntText(min_block_num, in)
+        || !checkChar('_', in)
+        || !tryReadIntText(max_block_num, in)
+        || !checkChar('_', in)
+        || !tryReadIntText(level, in)
+        || !in.eof())
+    {
+        return false;
+    }
+
+    if (part_info)
+    {
+        part_info->partition_id = dir_name.substr(0, strlen("YYYYMM"));
+        part_info->min_block = min_block_num;
+        part_info->max_block = max_block_num;
+        part_info->level = level;
+    }
+
+    return true;
+}
+
+
+void MergeTreePartInfo::parseMinMaxDatesFromPartName(const String & dir_name, DayNum_t & min_date, DayNum_t & max_date)
+{
+    UInt32 min_yyyymmdd = 0;
+    UInt32 max_yyyymmdd = 0;
+
+    ReadBufferFromString in(dir_name);
+
+    if (!tryReadIntText(min_yyyymmdd, in)
+        || !checkChar('_', in)
+        || !tryReadIntText(max_yyyymmdd, in))
+    {
+        throw Exception("Unexpected part name: " + dir_name, ErrorCodes::BAD_DATA_PART_NAME);
+    }
+
+    const auto & date_lut = DateLUT::instance();
+
+    DayNum_t min_month = date_lut.toFirstDayNumOfMonth(min_date);
+    DayNum_t max_month = date_lut.toFirstDayNumOfMonth(max_date);
+
+    if (min_month != max_month)
+        throw Exception("Part name " + dir_name + " contains different months", ErrorCodes::BAD_DATA_PART_NAME);
+
+    min_date = date_lut.YYYYMMDDToDayNum(min_yyyymmdd);
+    max_date = date_lut.YYYYMMDDToDayNum(max_yyyymmdd);
+}
+
+
+bool MergeTreePartInfo::contains(const String & outer_part_name, const String & inner_part_name)
+{
+    MergeTreePartInfo outer = fromPartName(outer_part_name);
+    MergeTreePartInfo inner = fromPartName(inner_part_name);
+    return outer.contains(inner);
+}
+
+
+String MergeTreePartInfo::getPartName(DayNum_t left_date, DayNum_t right_date, Int64 left_id, Int64 right_id, UInt64 level)
+{
+    const auto & date_lut = DateLUT::instance();
+
+    /// Directory name for the part has form: `YYYYMMDD_YYYYMMDD_N_N_L`.
+
+    unsigned left_date_id = date_lut.toNumYYYYMMDD(left_date);
+    unsigned right_date_id = date_lut.toNumYYYYMMDD(right_date);
+
+    WriteBufferFromOwnString wb;
+
+    writeIntText(left_date_id, wb);
+    writeChar('_', wb);
+    writeIntText(right_date_id, wb);
+    writeChar('_', wb);
+    writeIntText(left_id, wb);
+    writeChar('_', wb);
+    writeIntText(right_id, wb);
+    writeChar('_', wb);
+    writeIntText(level, wb);
+
+    return wb.str();
+}
+
+
+}
diff --git a/dbms/src/Storages/MergeTree/MergeTreePartInfo.h b/dbms/src/Storages/MergeTree/MergeTreePartInfo.h
new file mode 100644
index 00000000000..8110fc36390
--- /dev/null
+++ b/dbms/src/Storages/MergeTree/MergeTreePartInfo.h
@@ -0,0 +1,44 @@
+#pragma once
+
+#include <Core/Types.h>
+#include <common/DateLUT.h>
+
+namespace DB
+{
+
+/// Information about partition and the range of blocks contained in the part.
+/// Allows determining if parts are disjoint or one part fully contains the other.
+struct MergeTreePartInfo
+{
+    String partition_id;
+    Int64 min_block;
+    Int64 max_block;
+    UInt32 level;
+
+    bool operator<(const MergeTreePartInfo & rhs) const
+    {
+        return std::forward_as_tuple(partition_id, min_block, max_block, level)
+            < std::forward_as_tuple(rhs.partition_id, rhs.min_block, rhs.max_block, rhs.level);
+    }
+
+    /// Contains another part (obtained after merging another part with some other)
+    bool contains(const MergeTreePartInfo & rhs) const
+    {
+        return partition_id == rhs.partition_id        /// Parts for different partitions are not merged
+            && min_block <= rhs.min_block
+            && max_block >= rhs.max_block
+            && level >= rhs.level;
+    }
+
+    static MergeTreePartInfo fromPartName(const String & part_name);
+
+    static bool tryParsePartName(const String & dir_name, MergeTreePartInfo * part_info);
+
+    static void parseMinMaxDatesFromPartName(const String & dir_name, DayNum_t & min_date, DayNum_t & max_date);
+
+    static bool contains(const String & outer_part_name, const String & inner_part_name);
+
+    static String getPartName(DayNum_t left_date, DayNum_t right_date, Int64 left_id, Int64 right_id, UInt64 level);
+};
+
+}
diff --git a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h
index 92a0dda7013..f8081d0aa17 100644
--- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h
+++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h
@@ -91,7 +91,7 @@ private:
 
 
 /** To write one part.
-  * The data refers to one month, and are written in one part.
+  * The data refers to one partition, and is written in one part.
   */
 class MergedBlockOutputStream : public IMergedBlockOutputStream
 {
diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp
index 94c6579d7a2..92010fbe214 100644
--- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp
+++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp
@@ -166,17 +166,16 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo
     /// Obtain incremental block number and lock it. The lock holds our intention to add the block to the filesystem.
     /// We remove the lock just after renaming the part. In case of exception, block number will be marked as abandoned.
 
-    String month_name = toString(DateLUT::instance().toNumYYYYMMDD(DayNum_t(part->left_date)) / 100);
-    AbandonableLockInZooKeeper block_number_lock = storage.allocateBlockNumber(month_name, zookeeper);    /// 2 RTT
-    Int64 part_number = block_number_lock.getNumber();
+    AbandonableLockInZooKeeper block_number_lock = storage.allocateBlockNumber(part->info.partition_id, zookeeper);    /// 2 RTT
+    Int64 block_number = block_number_lock.getNumber();
 
     /// Set part attributes according to part_number. Prepare an entry for log.
 
-    part->left = part_number;
-    part->right = part_number;
-    part->level = 0;
+    part->info.min_block = block_number;
+    part->info.max_block = block_number;
+    part->info.level = 0;
 
-    String part_name = ActiveDataPartSet::getPartName(part->left_date, part->right_date, part->left, part->right, part->level);
+    String part_name = MergeTreePartInfo::getPartName(part->min_date, part->max_date, block_number, block_number, 0);
 
     part->name = part_name;
 
@@ -198,7 +197,7 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo
         ops.emplace_back(
             std::make_unique<zkutil::Op::Create>(
                 storage.zookeeper_path + "/blocks/" + block_id,
-                toString(part_number),  /// We will able to know original part number for duplicate blocks, if we want.
+                toString(block_number),  /// We will able to know original part number for duplicate blocks, if we want.
                 acl,
                 zkutil::CreateMode::Persistent));
 
@@ -303,13 +302,13 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo
             {
                 /// if the node with the quorum existed, but was quickly removed.
 
-                throw Exception("Unexpected ZNODEEXISTS while adding block " + toString(part_number) + " with ID '" + block_id + "': "
+                throw Exception("Unexpected ZNODEEXISTS while adding block " + toString(block_number) + " with ID '" + block_id + "': "
                     + zkutil::ZooKeeper::error2string(code), ErrorCodes::UNEXPECTED_ZOOKEEPER_ERROR);
             }
         }
         else
         {
-            throw Exception("Unexpected error while adding block " + toString(part_number) + " with ID '" + block_id + "': "
+            throw Exception("Unexpected error while adding block " + toString(block_number) + " with ID '" + block_id + "': "
                 + zkutil::ZooKeeper::error2string(code), ErrorCodes::UNEXPECTED_ZOOKEEPER_ERROR);
         }
     }
diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
index 185f0a1a9fb..913115f185c 100644
--- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
+++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
@@ -34,7 +34,7 @@ struct ReplicatedMergeTreeLogEntryData
         EMPTY,          /// Not used.
         GET_PART,       /// Get the part from another replica.
         MERGE_PARTS,    /// Merge the parts.
-        DROP_RANGE,     /// Delete the parts in the specified month in the specified number range.
+        DROP_RANGE,     /// Delete the parts in the specified partition in the specified number range.
         ATTACH_PART,    /// Move a part from the `detached` directory. Obsolete. TODO: Remove after half year.
         CLEAR_COLUMN,   /// Drop specific column from specified partition.
     };
diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
index d377c50f00b..bb7ae636b7c 100644
--- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
+++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
@@ -86,8 +86,7 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par
     }
 
     /// If the part is not in ZooKeeper, we'll check if it's at least somewhere.
-    ActiveDataPartSet::Part part_info;
-    ActiveDataPartSet::parsePartName(part_name, part_info);
+    auto part_info = MergeTreePartInfo::fromPartName(part_name);
 
     /** The logic is this:
         * - if some live or inactive replica has such a part, or a part covering it
@@ -103,7 +102,7 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par
 
     bool found = false;
 
-    size_t part_length_in_blocks = part_info.right + 1 - part_info.left;
+    size_t part_length_in_blocks = part_info.max_block + 1 - part_info.min_block;
     std::vector<char> found_blocks(part_length_in_blocks);
 
     Strings replicas = zookeeper->getChildren(storage.zookeeper_path + "/replicas");
@@ -112,20 +111,20 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par
         Strings parts = zookeeper->getChildren(storage.zookeeper_path + "/replicas/" + replica + "/parts");
         for (const String & part_on_replica : parts)
         {
-            if (part_on_replica == part_name || ActiveDataPartSet::contains(part_on_replica, part_name))
+            auto part_on_replica_info = MergeTreePartInfo::fromPartName(part_on_replica);
+
+            if (part_on_replica == part_name || part_on_replica_info.contains(part_info))
             {
                 found = true;
                 LOG_WARNING(log, "Found part " << part_on_replica << " on " << replica);
                 break;
             }
 
-            if (ActiveDataPartSet::contains(part_name, part_on_replica))
+            if (part_info.contains(part_on_replica_info))
             {
-                ActiveDataPartSet::Part part_on_replica_info;
-                ActiveDataPartSet::parsePartName(part_on_replica, part_on_replica_info);
 
-                for (auto block_num = part_on_replica_info.left; block_num <= part_on_replica_info.right; ++block_num)
-                    found_blocks.at(block_num - part_info.left) = 1;
+                for (auto block_num = part_on_replica_info.min_block; block_num <= part_on_replica_info.max_block; ++block_num)
+                    found_blocks.at(block_num - part_info.min_block) = 1;
             }
         }
         if (found)
@@ -192,12 +191,12 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par
         return;
     }
 
-    const auto partition_str = part_name.substr(0, 6);
-    for (auto i = part_info.left; i <= part_info.right; ++i)
+    const String & partition_id = part_info.partition_id;
+    for (auto i = part_info.min_block; i <= part_info.max_block; ++i)
     {
-        zookeeper->createIfNotExists(storage.zookeeper_path + "/nonincrement_block_numbers/" + partition_str, "");
+        zookeeper->createIfNotExists(storage.zookeeper_path + "/nonincrement_block_numbers/" + partition_id, "");
         AbandonableLockInZooKeeper::createAbandonedIfNotExists(
-            storage.zookeeper_path + "/nonincrement_block_numbers/" + partition_str + "/block-" + padIndex(i),
+            storage.zookeeper_path + "/nonincrement_block_numbers/" + partition_id + "/block-" + padIndex(i),
             *zookeeper);
     }
 }
diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
index 3e8d3c0c887..dc24c934baf 100644
--- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@@ -421,7 +421,7 @@ void ReplicatedMergeTreeQueue::removeGetsAndMergesInRange(zkutil::ZooKeeperPtr z
     for (Queue::iterator it = queue.begin(); it != queue.end();)
     {
         if (((*it)->type == LogEntry::GET_PART || (*it)->type == LogEntry::MERGE_PARTS) &&
-            ActiveDataPartSet::contains(part_name, (*it)->new_part_name))
+            MergeTreePartInfo::contains(part_name, (*it)->new_part_name))
         {
             if ((*it)->currently_executing)
                 to_wait.push_back(*it);
@@ -460,18 +460,16 @@ ReplicatedMergeTreeQueue::Queue ReplicatedMergeTreeQueue::getConflictsForClearCo
         {
             if (elem->type == LogEntry::MERGE_PARTS || elem->type == LogEntry::GET_PART || elem->type == LogEntry::ATTACH_PART)
             {
-                if (ActiveDataPartSet::contains(entry.new_part_name, elem->new_part_name))
+                if (MergeTreePartInfo::contains(entry.new_part_name, elem->new_part_name))
                     conflicts.emplace_back(elem);
             }
 
             if (elem->type == LogEntry::CLEAR_COLUMN)
             {
-                ActiveDataPartSet::Part cur_part;
-                ActiveDataPartSet::parsePartName(elem->new_part_name, cur_part);
-                ActiveDataPartSet::Part part;
-                ActiveDataPartSet::parsePartName(entry.new_part_name, part);
+                auto cur_part = MergeTreePartInfo::fromPartName(elem->new_part_name);
+                auto part = MergeTreePartInfo::fromPartName(entry.new_part_name);
 
-                if (part.month == cur_part.month)
+                if (part.partition_id == cur_part.partition_id)
                     conflicts.emplace_back(elem);
             }
         }
@@ -525,14 +523,12 @@ bool ReplicatedMergeTreeQueue::isNotCoveredByFuturePartsImpl(const String & new_
 
     /// A more complex check is whether another part is currently created by other action that will cover this part.
     /// NOTE The above is redundant, but left for a more convenient message in the log.
-    ActiveDataPartSet::Part result_part;
-    ActiveDataPartSet::parsePartName(new_part_name, result_part);
+    auto result_part = MergeTreePartInfo::fromPartName(new_part_name);
 
     /// It can slow down when the size of `future_parts` is large. But it can not be large, since `BackgroundProcessingPool` is limited.
     for (const auto & future_part_name : future_parts)
     {
-        ActiveDataPartSet::Part future_part;
-        ActiveDataPartSet::parsePartName(future_part_name, future_part);
+        auto future_part = MergeTreePartInfo::fromPartName(future_part_name);
 
         if (future_part.contains(result_part))
         {
diff --git a/dbms/src/Storages/MergeTree/ReshardingWorker.cpp b/dbms/src/Storages/MergeTree/ReshardingWorker.cpp
index 7dc8d622aeb..7438cc6f266 100644
--- a/dbms/src/Storages/MergeTree/ReshardingWorker.cpp
+++ b/dbms/src/Storages/MergeTree/ReshardingWorker.cpp
@@ -174,10 +174,10 @@ std::string computeHashFromPartition(const std::string & data_path, const std::s
 
     for (Poco::DirectoryIterator it(data_path); it != end; ++it)
     {
-        const auto filename = it.name();
-        if (!ActiveDataPartSet::isPartDirectory(filename))
+        MergeTreePartInfo part_info;
+        if (!MergeTreePartInfo::tryParsePartName(it.name(), &part_info))
             continue;
-        if (!startsWith(filename, partition_name))
+        if (part_info.partition_id != partition_name)
             continue;
 
         const auto part_path = it.path().absolute().toString();
diff --git a/dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp b/dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp
index 175b261fafa..55950a5a93d 100644
--- a/dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp
+++ b/dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp
@@ -104,7 +104,8 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & body
 
     assertEOF(body);
 
-    ActiveDataPartSet::parsePartName(part_name, *data_part);
+    data_part->info = MergeTreePartInfo::fromPartName(part_name);
+    MergeTreePartInfo::parseMinMaxDatesFromPartName(part_name, data_part->min_date, data_part->max_date);
     data_part->modification_time = time(nullptr);
     data_part->loadColumns(true);
     data_part->loadChecksums(true);
diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp
index bc1ad69b13a..b96ba9ad7bd 100644
--- a/dbms/src/Storages/StorageMergeTree.cpp
+++ b/dbms/src/Storages/StorageMergeTree.cpp
@@ -12,6 +12,7 @@
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTSelectQuery.h>
 #include <Storages/MergeTree/MergeTreeData.h>
+#include <Storages/MergeTree/ActiveDataPartSet.h>
 
 #include <Poco/DirectoryIterator.h>
 #include <Poco/File.h>
@@ -320,8 +321,7 @@ bool StorageMergeTree::merge(
         }
         else
         {
-            DayNum_t month = MergeTreeData::getMonthFromName(partition);
-            selected = merger.selectAllPartsToMergeWithinPartition(parts, merged_name, disk_space, can_merge, month, final);
+            selected = merger.selectAllPartsToMergeWithinPartition(parts, merged_name, disk_space, can_merge, partition, final);
         }
 
         if (!selected)
@@ -406,7 +406,7 @@ void StorageMergeTree::clearColumnInPartition(const ASTPtr & query, const Field
     auto lock_read_structure = lockStructure(false);
     auto lock_write_data = lockDataForAlter();
 
-    DayNum_t month = MergeTreeData::getMonthDayNum(partition);
+    String partition_id = MergeTreeData::getPartitionID(partition);
     MergeTreeData::DataParts parts = data.getDataParts();
 
     std::vector<MergeTreeData::AlterDataPartTransactionPtr> transactions;
@@ -428,7 +428,7 @@ void StorageMergeTree::clearColumnInPartition(const ASTPtr & query, const Field
 
     for (const auto & part : parts)
     {
-        if (part->month != month)
+        if (part->info.partition_id != partition_id)
             continue;
 
         if (auto transaction = data.alterDataPart(part, columns_for_parts, data.primary_expr_ast, false))
@@ -455,14 +455,14 @@ void StorageMergeTree::dropPartition(const ASTPtr & query, const Field & partiti
     /// Waits for completion of merge and does not start new ones.
     auto lock = lockForAlter();
 
-    DayNum_t month = MergeTreeData::getMonthDayNum(partition);
+    String partition_id = MergeTreeData::getPartitionID(partition);
 
     size_t removed_parts = 0;
     MergeTreeData::DataParts parts = data.getDataParts();
 
     for (const auto & part : parts)
     {
-        if (part->month != month)
+        if (part->info.partition_id != partition_id)
             continue;
 
         LOG_DEBUG(log, "Removing part " << part->name);
@@ -485,7 +485,7 @@ void StorageMergeTree::attachPartition(const ASTPtr & query, const Field & field
     if (part)
         partition = field.getType() == Field::Types::UInt64 ? toString(field.get<UInt64>()) : field.safeGet<String>();
     else
-        partition = MergeTreeData::getMonthName(field);
+        partition = MergeTreeData::getPartitionID(field);
 
     String source_dir = "detached/";
 
@@ -502,10 +502,12 @@ void StorageMergeTree::attachPartition(const ASTPtr & query, const Field & field
         for (Poco::DirectoryIterator it = Poco::DirectoryIterator(full_path + source_dir); it != Poco::DirectoryIterator(); ++it)
         {
             String name = it.name();
-            if (!ActiveDataPartSet::isPartDirectory(name))
-                continue;
-            if (name.substr(0, partition.size()) != partition)
+            MergeTreePartInfo part_info;
+            if (!MergeTreePartInfo::tryParsePartName(name, &part_info)
+                || part_info.partition_id != partition)
+            {
                 continue;
+            }
             LOG_DEBUG(log, "Found part " << name);
             active_parts.add(name);
         }
diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
index 410bb309731..603c6b3c1b5 100644
--- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
@@ -791,7 +791,7 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
       */
     size_t unexpected_parts_nonnew = 0;
     for (const auto & part : unexpected_parts)
-        if (part->level > 0)
+        if (part->info.level > 0)
             ++unexpected_parts_nonnew;
 
     String sanity_report = "There are "
@@ -1252,20 +1252,18 @@ bool StorageReplicatedMergeTree::executeLogEntry(const LogEntry & entry)
                         {
                             ops.emplace_back(std::make_unique<zkutil::Op::Remove>(quorum_path, quorum_stat.version));
 
-                            const auto partition_str = entry.new_part_name.substr(0, 6);
-                            ActiveDataPartSet::Part part_info;
-                            ActiveDataPartSet::parsePartName(entry.new_part_name, part_info);
+                            auto part_info = MergeTreePartInfo::fromPartName(entry.new_part_name);
 
-                            if (part_info.left != part_info.right)
+                            if (part_info.min_block != part_info.max_block)
                                 throw Exception("Logical error: log entry with quorum for part covering more than one block number",
                                     ErrorCodes::LOGICAL_ERROR);
 
-                            zookeeper->createIfNotExists(zookeeper_path + "/nonincrement_block_numbers/" + partition_str, "");
+                            zookeeper->createIfNotExists(zookeeper_path + "/nonincrement_block_numbers/" + part_info.partition_id, "");
 
                             auto acl = zookeeper->getDefaultACL();
 
                             ops.emplace_back(std::make_unique<zkutil::Op::Create>(
-                                zookeeper_path + "/nonincrement_block_numbers/" + partition_str + "/block-" + padIndex(part_info.left),
+                                zookeeper_path + "/nonincrement_block_numbers/" + part_info.partition_id + "/block-" + padIndex(part_info.min_block),
                                 "",
                                 acl,
                                 zkutil::CreateMode::Persistent));
@@ -1373,6 +1371,8 @@ void StorageReplicatedMergeTree::executeDropRange(const StorageReplicatedMergeTr
     LOG_DEBUG(log, (entry.detach ? "Detaching" : "Removing") << " parts.");
     size_t removed_parts = 0;
 
+    auto entry_part_info = MergeTreePartInfo::fromPartName(entry.new_part_name);
+
     /// Delete the parts contained in the range to be deleted.
     /// It's important that no old parts remain (after the merge), because otherwise,
     ///  after adding a new replica, this new replica downloads them, but does not delete them.
@@ -1381,7 +1381,7 @@ void StorageReplicatedMergeTree::executeDropRange(const StorageReplicatedMergeTr
     auto parts = data.getAllDataParts();
     for (const auto & part : parts)
     {
-        if (!ActiveDataPartSet::contains(entry.new_part_name, part->name))
+        if (!entry_part_info.contains(part->info))
             continue;
 
         LOG_DEBUG(log, "Removing part " << part->name);
@@ -1417,6 +1417,8 @@ void StorageReplicatedMergeTree::executeClearColumnInPartition(const LogEntry &
     /// So, if conflicts are found, throw an exception and will retry execution later
     queue.disableMergesAndFetchesInRange(entry);
 
+    auto entry_part_info = MergeTreePartInfo::fromPartName(entry.new_part_name);
+
     /// We don't change table structure, only data in some parts, disable reading from them
     auto lock_read_structure = lockStructure(false);
     auto lock_write_data = lockDataForAlter();
@@ -1442,7 +1444,7 @@ void StorageReplicatedMergeTree::executeClearColumnInPartition(const LogEntry &
     auto parts = data.getDataParts();
     for (const auto & part : parts)
     {
-        if (!ActiveDataPartSet::contains(entry.new_part_name, part->name))
+        if (!entry_part_info.contains(part->info))
             continue;
 
         LOG_DEBUG(log, "Clearing column " << entry.column_name << " in part " << part->name);
@@ -1578,7 +1580,7 @@ namespace
         const MergeTreeData::DataPartPtr & right,
         zkutil::ZooKeeperPtr && zookeeper, const String & zookeeper_path, const MergeTreeData & data)
     {
-        String month_name = left->name.substr(0, 6);
+        const String & partition_id = left->info.partition_id;
 
         /// You can not merge parts, among which is a part for which the quorum is unsatisfied.
         /// Note: theoretically, this could be resolved. But this will make logic more complex.
@@ -1588,13 +1590,12 @@ namespace
             ReplicatedMergeTreeQuorumEntry quorum_entry;
             quorum_entry.fromString(quorum_node_value);
 
-            ActiveDataPartSet::Part part_info;
-            ActiveDataPartSet::parsePartName(quorum_entry.part_name, part_info);
+            auto part_info = MergeTreePartInfo::fromPartName(quorum_entry.part_name);
 
-            if (part_info.left != part_info.right)
+            if (part_info.min_block != part_info.max_block)
                 throw Exception("Logical error: part written with quorum covers more than one block numbers", ErrorCodes::LOGICAL_ERROR);
 
-            if (left->right <= part_info.left && right->left >= part_info.right)
+            if (left->info.max_block <= part_info.min_block && right->info.min_block >= part_info.max_block)
                 return false;
         }
 
@@ -1603,21 +1604,20 @@ namespace
         String quorum_last_part;
         if (zookeeper->tryGet(zookeeper_path + "/quorum/last_part", quorum_last_part) && quorum_last_part.empty() == false)
         {
-            ActiveDataPartSet::Part part_info;
-            ActiveDataPartSet::parsePartName(quorum_last_part, part_info);
+            auto part_info = MergeTreePartInfo::fromPartName(quorum_last_part);
 
-            if (part_info.left != part_info.right)
+            if (part_info.min_block != part_info.max_block)
                 throw Exception("Logical error: part written with quorum covers more than one block numbers", ErrorCodes::LOGICAL_ERROR);
 
-            if (left->right <= part_info.left && right->left >= part_info.right)
+            if (left->info.max_block <= part_info.min_block && right->info.min_block >= part_info.max_block)
                 return false;
         }
 
         /// You can merge the parts, if all the numbers between them are abandoned - do not correspond to any blocks.
-        for (Int64 number = left->right + 1; number <= right->left - 1; ++number)
+        for (Int64 number = left->info.max_block + 1; number <= right->info.min_block - 1; ++number)
         {
-            String path1 = zookeeper_path +              "/block_numbers/" + month_name + "/block-" + padIndex(number);
-            String path2 = zookeeper_path + "/nonincrement_block_numbers/" + month_name + "/block-" + padIndex(number);
+            String path1 = zookeeper_path +              "/block_numbers/" + partition_id + "/block-" + padIndex(number);
+            String path2 = zookeeper_path + "/nonincrement_block_numbers/" + partition_id + "/block-" + padIndex(number);
 
             if (AbandonableLockInZooKeeper::check(path1, *zookeeper) != AbandonableLockInZooKeeper::ABANDONED &&
                 AbandonableLockInZooKeeper::check(path2, *zookeeper) != AbandonableLockInZooKeeper::ABANDONED)
@@ -1849,14 +1849,14 @@ bool StorageReplicatedMergeTree::createLogEntryToMergeParts(
     String path_created = zookeeper->create(zookeeper_path + "/log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential);
     entry.znode_name = path_created.substr(path_created.find_last_of('/') + 1);
 
-    String month_name = parts[0]->name.substr(0, 6);
+    const String & partition_id = parts[0]->info.partition_id;
     for (size_t i = 0; i + 1 < parts.size(); ++i)
     {
         /// Remove the unnecessary entries about non-existent blocks.
-        for (Int64 number = parts[i]->right + 1; number <= parts[i + 1]->left - 1; ++number)
+        for (Int64 number = parts[i]->info.max_block + 1; number <= parts[i + 1]->info.min_block - 1; ++number)
         {
-            zookeeper->tryRemove(zookeeper_path +              "/block_numbers/" + month_name + "/block-" + padIndex(number));
-            zookeeper->tryRemove(zookeeper_path + "/nonincrement_block_numbers/" + month_name + "/block-" + padIndex(number));
+            zookeeper->tryRemove(zookeeper_path +              "/block_numbers/" + partition_id + "/block-" + padIndex(number));
+            zookeeper->tryRemove(zookeeper_path + "/nonincrement_block_numbers/" + partition_id + "/block-" + padIndex(number));
         }
     }
 
@@ -1962,9 +1962,9 @@ String StorageReplicatedMergeTree::findReplicaHavingCoveringPart(const LogEntry
         Strings parts = zookeeper->getChildren(zookeeper_path + "/replicas/" + replica + "/parts");
         for (const String & part_on_replica : parts)
         {
-            if (part_on_replica == entry.new_part_name || ActiveDataPartSet::contains(part_on_replica, entry.new_part_name))
+            if (part_on_replica == entry.new_part_name || MergeTreePartInfo::contains(part_on_replica, entry.new_part_name))
             {
-                if (largest_part_found.empty() || ActiveDataPartSet::contains(part_on_replica, largest_part_found))
+                if (largest_part_found.empty() || MergeTreePartInfo::contains(part_on_replica, largest_part_found))
                 {
                     largest_part_found = part_on_replica;
                 }
@@ -2306,16 +2306,14 @@ BlockInputStreams StorageReplicatedMergeTree::read(
             {
                 ReplicatedMergeTreeQuorumEntry quorum_entry;
                 quorum_entry.fromString(quorum_str);
-                ActiveDataPartSet::Part part_info;
-                ActiveDataPartSet::parsePartName(quorum_entry.part_name, part_info);
-                max_block_number_to_read = part_info.left - 1;
+                auto part_info = MergeTreePartInfo::fromPartName(quorum_entry.part_name);
+                max_block_number_to_read = part_info.min_block - 1;
             }
         }
         else
         {
-            ActiveDataPartSet::Part part_info;
-            ActiveDataPartSet::parsePartName(last_part, part_info);
-            max_block_number_to_read = part_info.right;
+            auto part_info = MergeTreePartInfo::fromPartName(last_part);
+            max_block_number_to_read = part_info.max_block;
         }
     }
 
@@ -2340,7 +2338,7 @@ BlockOutputStreamPtr StorageReplicatedMergeTree::write(const ASTPtr & query, con
 }
 
 
-bool StorageReplicatedMergeTree::optimize(const ASTPtr & query, const String & partition, bool final, bool deduplicate, const Settings & settings)
+bool StorageReplicatedMergeTree::optimize(const ASTPtr & query, const String & partition_id, bool final, bool deduplicate, const Settings & settings)
 {
     assertNotReadonly();
 
@@ -2369,14 +2367,13 @@ bool StorageReplicatedMergeTree::optimize(const ASTPtr & query, const String & p
 
         bool selected = false;
 
-        if (partition.empty())
+        if (partition_id.empty())
         {
             selected = merger.selectPartsToMerge(parts, merged_name, false, data.settings.max_bytes_to_merge_at_max_space_in_pool, can_merge);
         }
         else
         {
-            DayNum_t month = MergeTreeData::getMonthFromName(partition);
-            selected = merger.selectAllPartsToMergeWithinPartition(parts, merged_name, disk_space, can_merge, month, final);
+            selected = merger.selectAllPartsToMergeWithinPartition(parts, merged_name, disk_space, can_merge, partition_id, final);
         }
 
         if (!selected)
@@ -2563,27 +2560,27 @@ void StorageReplicatedMergeTree::alter(const AlterCommands & params,
 }
 
 
-/// The name of an imaginary part covering all possible parts in the specified month with numbers in the range from zero to specified right bound.
-static String getFakePartNameCoveringPartRange(const String & month_name, UInt64 left, UInt64 right)
+/// The name of an imaginary part covering all possible parts in the specified partition with numbers in the range from zero to specified right bound.
+static String getFakePartNameCoveringPartRange(const String & partition_id, UInt64 left, UInt64 right)
 {
     /// The date range is all month long.
     const auto & lut = DateLUT::instance();
-    time_t start_time = lut.YYYYMMDDToDate(parse<UInt32>(month_name + "01"));
+    time_t start_time = lut.YYYYMMDDToDate(parse<UInt32>(partition_id + "01"));
     DayNum_t left_date = lut.toDayNum(start_time);
     DayNum_t right_date = DayNum_t(static_cast<size_t>(left_date) + lut.daysInMonth(start_time) - 1);
 
     /// Artificial high level is choosen, to make this part "covering" all parts inside.
-    return ActiveDataPartSet::getPartName(left_date, right_date, left, right, 999999999);
+    return MergeTreePartInfo::getPartName(left_date, right_date, left, right, 999999999);
 }
 
 
-String StorageReplicatedMergeTree::getFakePartNameCoveringAllPartsInPartition(const String & month_name)
+String StorageReplicatedMergeTree::getFakePartNameCoveringAllPartsInPartition(const String & partition_id)
 {
     /// Even if there is no data in the partition, you still need to mark the range for deletion.
     /// - Because before executing DETACH, tasks for downloading parts to this partition can be executed.
     Int64 left = 0;
 
-    /** Let's skip one number in `block_numbers` for the month being deleted, and we will only delete parts until this number.
+    /** Let's skip one number in `block_numbers` for the partition being deleted, and we will only delete parts until this number.
       * This prohibits merges of deleted parts with the new inserted data.
       * Invariant: merges of deleted parts with other parts do not appear in the log.
       * NOTE: If you need to similarly support a `DROP PART` request, you will have to think of some new mechanism for it,
@@ -2593,7 +2590,7 @@ String StorageReplicatedMergeTree::getFakePartNameCoveringAllPartsInPartition(co
 
     {
         auto zookeeper = getZooKeeper();
-        AbandonableLockInZooKeeper block_number_lock = allocateBlockNumber(month_name, zookeeper);
+        AbandonableLockInZooKeeper block_number_lock = allocateBlockNumber(partition_id, zookeeper);
         right = block_number_lock.getNumber();
         block_number_lock.unlock();
     }
@@ -2603,7 +2600,7 @@ String StorageReplicatedMergeTree::getFakePartNameCoveringAllPartsInPartition(co
         return {};
 
     --right;
-    return getFakePartNameCoveringPartRange(month_name, left, right);
+    return getFakePartNameCoveringPartRange(partition_id, left, right);
 }
 
 
@@ -2614,12 +2611,12 @@ void StorageReplicatedMergeTree::clearColumnInPartition(
 
     /// We don't block merges, so anyone can manage this task (not only leader)
 
-    String month_name = MergeTreeData::getMonthName(partition);
-    String fake_part_name = getFakePartNameCoveringAllPartsInPartition(month_name);
+    String partition_id = MergeTreeData::getPartitionID(partition);
+    String fake_part_name = getFakePartNameCoveringAllPartsInPartition(partition_id);
 
     if (fake_part_name.empty())
     {
-        LOG_INFO(log, "Will not clear partition " << month_name << ", it is empty.");
+        LOG_INFO(log, "Will not clear partition " << partition_id << ", it is empty.");
         return;
     }
 
@@ -2654,12 +2651,12 @@ void StorageReplicatedMergeTree::dropPartition(const ASTPtr & query, const Field
         return;
     }
 
-    String month_name = MergeTreeData::getMonthName(partition);
-    String fake_part_name = getFakePartNameCoveringAllPartsInPartition(month_name);
+    String partition_id = MergeTreeData::getPartitionID(partition);
+    String fake_part_name = getFakePartNameCoveringAllPartsInPartition(partition_id);
 
     if (fake_part_name.empty())
     {
-        LOG_INFO(log, "Will not drop partition " << month_name << ", it is empty.");
+        LOG_INFO(log, "Will not drop partition " << partition_id << ", it is empty.");
         return;
     }
 
@@ -2699,12 +2696,12 @@ void StorageReplicatedMergeTree::attachPartition(const ASTPtr & query, const Fie
 {
     assertNotReadonly();
 
-    String partition;
+    String partition_id;
 
     if (attach_part)
-        partition = field.safeGet<String>();
+        partition_id = field.safeGet<String>();
     else
-        partition = MergeTreeData::getMonthName(field);
+        partition_id = MergeTreeData::getPartitionID(field);
 
     String source_dir = "detached/";
 
@@ -2712,20 +2709,21 @@ void StorageReplicatedMergeTree::attachPartition(const ASTPtr & query, const Fie
     Strings parts;
     if (attach_part)
     {
-        parts.push_back(partition);
+        parts.push_back(partition_id);
     }
     else
     {
-        LOG_DEBUG(log, "Looking for parts for partition " << partition << " in " << source_dir);
+        LOG_DEBUG(log, "Looking for parts for partition " << partition_id << " in " << source_dir);
         ActiveDataPartSet active_parts;
 
         std::set<String> part_names;
         for (Poco::DirectoryIterator it = Poco::DirectoryIterator(full_path + source_dir); it != Poco::DirectoryIterator(); ++it)
         {
             String name = it.name();
-            if (!ActiveDataPartSet::isPartDirectory(name))
+            MergeTreePartInfo part_info;
+            if (!MergeTreePartInfo::tryParsePartName(name, &part_info))
                 continue;
-            if (!startsWith(name, partition))
+            if (part_info.partition_id != partition_id)
                 continue;
             LOG_DEBUG(log, "Found part " << name);
             active_parts.add(name);
@@ -2837,18 +2835,18 @@ bool StorageReplicatedMergeTree::existsNodeCached(const std::string & path)
 }
 
 
-AbandonableLockInZooKeeper StorageReplicatedMergeTree::allocateBlockNumber(const String & month_name, zkutil::ZooKeeperPtr & zookeeper)
+AbandonableLockInZooKeeper StorageReplicatedMergeTree::allocateBlockNumber(const String & partition_id, zkutil::ZooKeeperPtr & zookeeper)
 {
-    String month_path = zookeeper_path + "/block_numbers/" + month_name;
-    if (!existsNodeCached(month_path))
+    String partition_path = zookeeper_path + "/block_numbers/" + partition_id;
+    if (!existsNodeCached(partition_path))
     {
-        int code = zookeeper->tryCreate(month_path, "", zkutil::CreateMode::Persistent);
+        int code = zookeeper->tryCreate(partition_path, "", zkutil::CreateMode::Persistent);
         if (code != ZOK && code != ZNODEEXISTS)
-            throw zkutil::KeeperException(code, month_path);
+            throw zkutil::KeeperException(code, partition_path);
     }
 
     return AbandonableLockInZooKeeper(
-        month_path + "/block-",
+        partition_path + "/block-",
         zookeeper_path + "/temp", *zookeeper);
 }
 
@@ -3209,21 +3207,24 @@ void StorageReplicatedMergeTree::getReplicaDelays(time_t & out_absolute_delay, t
 
 void StorageReplicatedMergeTree::fetchPartition(const Field & partition, const String & from_, const Settings & settings)
 {
-    String partition_str = MergeTreeData::getMonthName(partition);
+    String partition_id = MergeTreeData::getPartitionID(partition);
 
     String from = from_;
     if (from.back() == '/')
         from.resize(from.size() - 1);
 
-    LOG_INFO(log, "Will fetch partition " << partition_str << " from shard " << from_);
+    LOG_INFO(log, "Will fetch partition " << partition_id << " from shard " << from_);
 
     /** Let's check that there is no such partition in the `detached` directory (where we will write the downloaded parts).
       * Unreliable (there is a race condition) - such a partition may appear a little later.
       */
     Poco::DirectoryIterator dir_end;
     for (Poco::DirectoryIterator dir_it{data.getFullPath() + "detached/"}; dir_it != dir_end; ++dir_it)
-        if (startsWith(dir_it.name(), partition_str))
-            throw Exception("Detached partition " + partition_str + " already exists.", ErrorCodes::PARTITION_ALREADY_EXISTS);
+    {
+        MergeTreePartInfo part_info;
+        if (MergeTreePartInfo::tryParsePartName(dir_it.name(), &part_info) && part_info.partition_id == partition_id)
+            throw Exception("Detached partition " + partition_id + " already exists.", ErrorCodes::PARTITION_ALREADY_EXISTS);
+    }
 
     zkutil::Strings replicas;
     zkutil::Strings active_replicas;
@@ -3311,13 +3312,15 @@ void StorageReplicatedMergeTree::fetchPartition(const Field & partition, const S
             /// Leaving only the parts of the desired partition.
             Strings parts_to_fetch_partition;
             for (const String & part : parts_to_fetch)
-                if (startsWith(part, partition_str))
+            {
+                if (MergeTreePartInfo::fromPartName(part).partition_id == partition_id)
                     parts_to_fetch_partition.push_back(part);
+            }
 
             parts_to_fetch = std::move(parts_to_fetch_partition);
 
             if (parts_to_fetch.empty())
-                throw Exception("Partition " + partition_str + " on " + best_replica_path + " doesn't exist", ErrorCodes::PARTITION_DOESNT_EXIST);
+                throw Exception("Partition " + partition_id + " on " + best_replica_path + " doesn't exist", ErrorCodes::PARTITION_DOESNT_EXIST);
         }
         else
         {
@@ -3448,19 +3451,17 @@ void StorageReplicatedMergeTree::reshardPartitions(
                 throw Exception{"Shard paths must be distinct", ErrorCodes::DUPLICATE_SHARD_PATHS};
         }
 
-        DayNum_t partition_num = !partition.isNull() ? MergeTreeData::getMonthDayNum(partition) : DayNum_t();
-
-        bool include_all = !partition_num;
+        bool include_all = partition.isNull();
+        String partition_id = !partition.isNull() ? MergeTreeData::getPartitionID(partition) : String();
 
         /// Make a list of local partitions that need to be resharded.
         std::set<std::string> unique_partition_list;
         const MergeTreeData::DataParts & data_parts = data.getDataParts();
         for (MergeTreeData::DataParts::iterator it = data_parts.cbegin(); it != data_parts.cend(); ++it)
         {
-            const MergeTreeData::DataPartPtr & current_part = *it;
-            DayNum_t month = current_part->month;
-            if (include_all || month == partition_num)
-                unique_partition_list.insert(MergeTreeData::getMonthName(month));
+            const String & current_partition_id = (*it)->info.partition_id;
+            if (include_all || partition_id == current_partition_id)
+                unique_partition_list.insert(current_partition_id);
         }
 
         partition_list.assign(unique_partition_list.begin(), unique_partition_list.end());
diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h
index 5dd3471384f..06b45ae8f1d 100644
--- a/dbms/src/Storages/StorageReplicatedMergeTree.h
+++ b/dbms/src/Storages/StorageReplicatedMergeTree.h
@@ -131,7 +131,7 @@ public:
 
     BlockOutputStreamPtr write(const ASTPtr & query, const Settings & settings) override;
 
-    bool optimize(const ASTPtr & query, const String & partition, bool final, bool deduplicate, const Settings & settings) override;
+    bool optimize(const ASTPtr & query, const String & partition_id, bool final, bool deduplicate, const Settings & settings) override;
 
     void alter(const AlterCommands & params, const String & database_name, const String & table_name, const Context & context) override;
 
@@ -449,7 +449,7 @@ private:
     /// With the quorum being tracked, add a replica to the quorum for the part.
     void updateQuorum(const String & part_name);
 
-    AbandonableLockInZooKeeper allocateBlockNumber(const String & month_name, zkutil::ZooKeeperPtr & zookeeper);
+    AbandonableLockInZooKeeper allocateBlockNumber(const String & partition_id, zkutil::ZooKeeperPtr & zookeeper);
 
     /** Wait until all replicas, including this, execute the specified action from the log.
       * If replicas are added at the same time, it can not wait the added replica .
@@ -467,8 +467,8 @@ private:
     void assertNotReadonly() const;
 
     /// The name of an imaginary part covering all parts in the specified partition (at the call moment).
-    /// Returns empty string if partition is empy.
-    String getFakePartNameCoveringAllPartsInPartition(const String & month_name);
+    /// Returns empty string if partition is empty.
+    String getFakePartNameCoveringAllPartsInPartition(const String & partition_id);
 
     /// Check for a node in ZK. If it is, remember this information, and then immediately answer true.
     std::unordered_set<std::string> existing_nodes_cache;
diff --git a/dbms/src/Storages/System/StorageSystemParts.cpp b/dbms/src/Storages/System/StorageSystemParts.cpp
index 70fd42f14f8..1c15c6e1983 100644
--- a/dbms/src/Storages/System/StorageSystemParts.cpp
+++ b/dbms/src/Storages/System/StorageSystemParts.cpp
@@ -201,11 +201,8 @@ BlockInputStreams StorageSystemParts::read(
         /// Finally, we'll go through the list of parts.
         for (const MergeTreeData::DataPartPtr & part : all_parts)
         {
-            LocalDate partition_date {part->month};
-            String partition = toString(partition_date.year()) + (partition_date.month() < 10 ? "0" : "") + toString(partition_date.month());
-
             size_t i = 0;
-            block.getByPosition(i++).column->insert(partition);
+            block.getByPosition(i++).column->insert(part->info.partition_id);
             block.getByPosition(i++).column->insert(part->name);
             block.getByPosition(i++).column->insert(static_cast<UInt64>(active_parts.count(part)));
             block.getByPosition(i++).column->insert(part->size);
@@ -228,11 +225,11 @@ BlockInputStreams StorageSystemParts::read(
             /// For convenience, in returned refcount, don't add references that was due to local variables in this method: all_parts, active_parts.
             block.getByPosition(i++).column->insert(part.use_count() - (active_parts.count(part) ? 2 : 1));
 
-            block.getByPosition(i++).column->insert(static_cast<UInt64>(part->left_date));
-            block.getByPosition(i++).column->insert(static_cast<UInt64>(part->right_date));
-            block.getByPosition(i++).column->insert(part->left);
-            block.getByPosition(i++).column->insert(part->right);
-            block.getByPosition(i++).column->insert(static_cast<UInt64>(part->level));
+            block.getByPosition(i++).column->insert(static_cast<UInt64>(part->min_date));
+            block.getByPosition(i++).column->insert(static_cast<UInt64>(part->max_date));
+            block.getByPosition(i++).column->insert(part->info.min_block);
+            block.getByPosition(i++).column->insert(part->info.max_block);
+            block.getByPosition(i++).column->insert(static_cast<UInt64>(part->info.level));
             block.getByPosition(i++).column->insert(part->getIndexSizeInBytes());
             block.getByPosition(i++).column->insert(part->getIndexSizeInAllocatedBytes());
 
diff --git a/dbms/src/Storages/tests/part_name.cpp b/dbms/src/Storages/tests/part_name.cpp
index 1e2058b2eb7..39ec4656ff0 100644
--- a/dbms/src/Storages/tests/part_name.cpp
+++ b/dbms/src/Storages/tests/part_name.cpp
@@ -1,5 +1,5 @@
 #include <IO/ReadHelpers.h>
-#include <Storages/MergeTree/ActiveDataPartSet.h>
+#include <Storages/MergeTree/MergeTreePartInfo.h>
 #include <common/LocalDateTime.h>
 
 
@@ -9,7 +9,7 @@ int main(int argc, char ** argv)
 
     for (DayNum_t date = today; DayNum_t(date + 10) > today; --date)
     {
-        std::string name = DB::ActiveDataPartSet::getPartName(date, date, 0, 0, 0);
+        std::string name = DB::MergeTreePartInfo::getPartName(date, date, 0, 0, 0);
         std::cerr << name << '\n';
 
         time_t time = DateLUT::instance().YYYYMMDDToDate(DB::parse<UInt32>(name));

From 0df4ae6b02c3a6ca6de0daceb02c3cd5c47642a8 Mon Sep 17 00:00:00 2001
From: Alexey Zatelepin <ztlpn@yandex-team.ru>
Date: Tue, 15 Aug 2017 14:59:08 +0300
Subject: [PATCH 181/281] do not use unnecessary temporary objects to query a
 set of parts [#CLICKHOUSE-3000]

---
 .../Storages/MergeTree/ActiveDataPartSet.cpp  | 53 ++++++++-----------
 .../Storages/MergeTree/ActiveDataPartSet.h    | 18 ++-----
 dbms/src/Storages/MergeTree/MergeTreeData.cpp | 14 +++--
 dbms/src/Storages/MergeTree/MergeTreeData.h   | 11 +++-
 4 files changed, 42 insertions(+), 54 deletions(-)

diff --git a/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp b/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp
index 2c8b7f225b7..8396181901c 100644
--- a/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp
+++ b/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp
@@ -20,66 +20,59 @@ void ActiveDataPartSet::add(const String & name)
 
 void ActiveDataPartSet::addImpl(const String & name)
 {
-    if (!getContainingPartImpl(name).empty())
+    auto part_info = MergeTreePartInfo::fromPartName(name);
+
+    if (!getContainingPartImpl(part_info).empty())
         return;
 
-    Part part;
-    part.name = name;
-    part.info = MergeTreePartInfo::fromPartName(name);
-
     /// Parts contained in `part` are located contiguously inside `data_parts`, overlapping with the place where the part itself would be inserted.
-    Parts::iterator it = parts.lower_bound(part);
+    auto it = part_info_to_name.lower_bound(part_info);
 
     /// Let's go left.
-    while (it != parts.begin())
+    while (it != part_info_to_name.begin())
     {
         --it;
-        if (!part.contains(*it))
+        if (!part_info.contains(it->first))
         {
             ++it;
             break;
         }
-        parts.erase(it++);
+        part_info_to_name.erase(it++);
     }
 
     /// Let's go to the right.
-    while (it != parts.end() && part.contains(*it))
+    while (it != part_info_to_name.end() && part_info.contains(it->first))
     {
-        parts.erase(it++);
+        part_info_to_name.erase(it++);
     }
 
-    parts.insert(part);
+    part_info_to_name.emplace(part_info, name);
 }
 
 
 String ActiveDataPartSet::getContainingPart(const String & part_name) const
 {
     std::lock_guard<std::mutex> lock(mutex);
-    return getContainingPartImpl(part_name);
+    return getContainingPartImpl(MergeTreePartInfo::fromPartName(part_name));
 }
 
 
-String ActiveDataPartSet::getContainingPartImpl(const String & part_name) const
+String ActiveDataPartSet::getContainingPartImpl(const MergeTreePartInfo & part_info) const
 {
-    Part part;
-    part.info = MergeTreePartInfo::fromPartName(part_name);
-
     /// A part can only be covered/overlapped by the previous or next one in `parts`.
-    Parts::iterator it = parts.lower_bound(part);
+    auto it = part_info_to_name.lower_bound(part_info);
 
-    if (it != parts.end())
+    if (it != part_info_to_name.end())
     {
-        if (it->name == part_name)
-            return it->name;
-        if (it->contains(part))
-            return it->name;
+        if (it->first.contains(part_info))
+            return it->second;
     }
 
-    if (it != parts.begin())
+    if (it != part_info_to_name.begin())
     {
         --it;
-        if (it->contains(part))
-            return it->name;
+        if (it->first.contains(part_info))
+            return it->second;
     }
 
     return String();
@@ -91,9 +84,9 @@ Strings ActiveDataPartSet::getParts() const
     std::lock_guard<std::mutex> lock(mutex);
 
     Strings res;
-    res.reserve(parts.size());
-    for (const Part & part : parts)
-        res.push_back(part.name);
+    res.reserve(part_info_to_name.size());
+    for (const auto & kv : part_info_to_name)
+        res.push_back(kv.second);
 
     return res;
 }
@@ -102,7 +95,7 @@ Strings ActiveDataPartSet::getParts() const
 size_t ActiveDataPartSet::size() const
 {
     std::lock_guard<std::mutex> lock(mutex);
-    return parts.size();
+    return part_info_to_name.size();
 }
 
 
diff --git a/dbms/src/Storages/MergeTree/ActiveDataPartSet.h b/dbms/src/Storages/MergeTree/ActiveDataPartSet.h
index 34a1ffd73b5..1e6067adb16 100644
--- a/dbms/src/Storages/MergeTree/ActiveDataPartSet.h
+++ b/dbms/src/Storages/MergeTree/ActiveDataPartSet.h
@@ -4,7 +4,7 @@
 #include <mutex>
 #include <common/DateLUT.h>
 #include <Core/Types.h>
-#include <set>
+#include <map>
 
 
 namespace DB
@@ -21,16 +21,6 @@ public:
     ActiveDataPartSet() {}
     ActiveDataPartSet(const Strings & names);
 
-    struct Part
-    {
-        String name;
-        MergeTreePartInfo info;
-
-        bool operator<(const Part & rhs) const { return info < rhs.info; }
-
-        bool contains(const Part & rhs) const { return info.contains(rhs.info); }
-    };
-
     void add(const String & name);
 
     /// If not found, returns an empty string.
@@ -41,14 +31,12 @@ public:
     size_t size() const;
 
 private:
-    using Parts = std::set<Part>;
-
     mutable std::mutex mutex;
-    Parts parts;
+    std::map<MergeTreePartInfo, String> part_info_to_name;
 
     /// Do not block mutex.
     void addImpl(const String & name);
-    String getContainingPartImpl(const String & name) const;
+    String getContainingPartImpl(const MergeTreePartInfo & part_info) const;
 };
 
 }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
index 86c2f7a607e..f675eaee193 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
@@ -1483,26 +1483,25 @@ void MergeTreeData::delayInsertIfNeeded(Poco::Event * until)
 
 MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(const String & part_name)
 {
-    MutableDataPartPtr tmp_part(new DataPart(*this));
-    tmp_part->info = MergeTreePartInfo::fromPartName(part_name);
+    auto part_info = MergeTreePartInfo::fromPartName(part_name);
 
     std::lock_guard<std::mutex> lock(data_parts_mutex);
 
     /// The part can be covered only by the previous or the next one in data_parts.
-    auto it = data_parts.lower_bound(tmp_part);
+    auto it = data_parts.lower_bound(part_info);
 
     if (it != data_parts.end())
     {
         if ((*it)->name == part_name)
             return *it;
-        if ((*it)->contains(*tmp_part))
+        if ((*it)->info.contains(part_info))
             return *it;
     }
 
     if (it != data_parts.begin())
     {
         --it;
-        if ((*it)->contains(*tmp_part))
+        if ((*it)->info.contains(part_info))
             return *it;
     }
 
@@ -1511,11 +1510,10 @@ MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(const String &
 
 MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_name)
 {
-    MutableDataPartPtr tmp_part(new DataPart(*this));
-    tmp_part->info = MergeTreePartInfo::fromPartName(part_name);
+    auto part_info = MergeTreePartInfo::fromPartName(part_name);
 
     std::lock_guard<std::mutex> lock(all_data_parts_mutex);
-    auto it = all_data_parts.lower_bound(tmp_part);
+    auto it = all_data_parts.lower_bound(part_info);
     if (it != all_data_parts.end() && (*it)->name == part_name)
         return *it;
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h
index f2372b17d09..0229494e36a 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeData.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeData.h
@@ -90,7 +90,16 @@ public:
     using MutableDataPartPtr = std::shared_ptr<DataPart>;
     /// After the DataPart is added to the working set, it cannot be changed.
     using DataPartPtr = std::shared_ptr<const DataPart>;
-    struct DataPartPtrLess { bool operator() (const DataPartPtr & lhs, const DataPartPtr & rhs) const { return lhs->info < rhs->info; } };
+
+    struct DataPartPtrLess
+    {
+        using is_transparent = void;
+
+        bool operator()(const DataPartPtr & lhs, const MergeTreePartInfo & rhs) const { return lhs->info < rhs; }
+        bool operator()(const MergeTreePartInfo & lhs, const DataPartPtr & rhs) const { return lhs < rhs->info; }
+        bool operator()(const DataPartPtr & lhs, const DataPartPtr & rhs) const { return lhs->info < rhs->info; }
+    };
+
     using DataParts = std::set<DataPartPtr, DataPartPtrLess>;
     using DataPartsVector = std::vector<DataPartPtr>;
 

From 372801c59bd7d53100e776cc6759e66f557b56e5 Mon Sep 17 00:00:00 2001
From: Alexey Zatelepin <ztlpn@yandex-team.ru>
Date: Tue, 15 Aug 2017 19:21:13 +0300
Subject: [PATCH 182/281] fix comments [#CLICKHOUSE-3000]

---
 dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp b/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp
index 8396181901c..80ed6f7e8e5 100644
--- a/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp
+++ b/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp
@@ -25,7 +25,7 @@ void ActiveDataPartSet::addImpl(const String & name)
     if (!getContainingPartImpl(part_info).empty())
         return;
 
-    /// Parts contained in `part` are located contiguously inside `data_parts`, overlapping with the place where the part itself would be inserted.
+    /// Parts contained in `part` are located contiguously in `part_info_to_name`, overlapping with the place where the part itself would be inserted.
     auto it = part_info_to_name.lower_bound(part_info);
 
     /// Let's go left.
@@ -59,7 +59,7 @@ String ActiveDataPartSet::getContainingPart(const String & part_name) const
 
 String ActiveDataPartSet::getContainingPartImpl(const MergeTreePartInfo & part_info) const
 {
-    /// A part can only be covered/overlapped by the previous or next one in `parts`.
+    /// A part can only be covered/overlapped by the previous or next one in `part_info_to_name`.
     auto it = part_info_to_name.lower_bound(part_info);
 
     if (it != part_info_to_name.end())

From 9c6f1a1e4b338c3f329235c71dbe72d51c751b29 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 15 Aug 2017 15:34:28 +0300
Subject: [PATCH 183/281] supported drop query for temporary tables
 [#CLICKHOUSE-3219]

---
 dbms/src/Interpreters/Context.cpp             | 25 +++++++++++++++
 dbms/src/Interpreters/Context.h               |  3 ++
 .../src/Interpreters/InterpreterDropQuery.cpp | 14 ++++++++
 dbms/src/Interpreters/ProcessList.cpp         | 32 +++++++++++++++++--
 dbms/src/Interpreters/ProcessList.h           |  4 +++
 5 files changed, 75 insertions(+), 3 deletions(-)

diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp
index 0afefb73903..a123ff52745 100644
--- a/dbms/src/Interpreters/Context.cpp
+++ b/dbms/src/Interpreters/Context.cpp
@@ -781,6 +781,31 @@ void Context::addExternalTable(const String & table_name, StoragePtr storage)
     }
 }
 
+StoragePtr Context::tryRemoveExternalTable(const String & database_name, const String & table_name)
+{
+    auto lock = getLock();
+
+    /// Ability to remove the temporary tables of another query in the form _query_QUERY_ID.table
+
+    if (startsWith(database_name, "_query_"))
+    {
+        String requested_query_id = database_name.substr(strlen("_query_"));
+
+        return shared->process_list.tryRemoveTemporaryTable(requested_query_id, table_name);
+    }
+    else if(database_name.empty())
+    {
+        Tables::const_iterator it = external_tables.find(table_name);
+        if (external_tables.end() == it)
+            return StoragePtr();
+
+        auto storage = it->second;
+        external_tables.erase(it);
+        return storage;
+    }
+
+    return {};
+}
 
 DDLGuard::DDLGuard(Map & map_, std::mutex & mutex_, std::unique_lock<std::mutex> && lock, const String & elem, const String & message)
     : map(map_), mutex(mutex_)
diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h
index 223d3270312..ad2d7e0cf0a 100644
--- a/dbms/src/Interpreters/Context.h
+++ b/dbms/src/Interpreters/Context.h
@@ -170,6 +170,7 @@ public:
     StoragePtr getTable(const String & database_name, const String & table_name) const;
     StoragePtr tryGetTable(const String & database_name, const String & table_name) const;
     void addExternalTable(const String & table_name, StoragePtr storage);
+    StoragePtr tryRemoveExternalTable(const String & database_name, const String & table_name);
 
     void addDatabase(const String & database_name, const DatabasePtr & database);
     DatabasePtr detachDatabase(const String & database_name);
@@ -241,9 +242,11 @@ public:
 
     const Context & getSessionContext() const;
     Context & getSessionContext();
+    bool hasSessionContext() const { return session_context != nullptr; }
 
     const Context & getGlobalContext() const;
     Context & getGlobalContext();
+    bool hasGlobalContext() const { return global_context != nullptr; }
 
     void setSessionContext(Context & context_)                                { session_context = &context_; }
     void setGlobalContext(Context & context_)                                { global_context = &context_; }
diff --git a/dbms/src/Interpreters/InterpreterDropQuery.cpp b/dbms/src/Interpreters/InterpreterDropQuery.cpp
index 9375f36a4eb..3e5675deb95 100644
--- a/dbms/src/Interpreters/InterpreterDropQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterDropQuery.cpp
@@ -45,6 +45,20 @@ BlockIO InterpreterDropQuery::execute()
         return {};
     }
 
+    /// Drop temporary table.
+    StoragePtr table = (context.hasSessionContext() ? context.getSessionContext() : context)
+            .tryRemoveExternalTable(drop.database, drop.table);
+    if (table)
+    {
+        table->shutdown();
+        /// If table was already dropped by anyone, an exception will be thrown
+        auto table_lock = table->lockForAlter();
+        /// Delete table data
+        table->drop();
+        table->is_dropped = true;
+        return {};
+    }
+
     String database_name = drop.database.empty() ? current_database : drop.database;
     String database_name_escaped = escapeForFileName(database_name);
 
diff --git a/dbms/src/Interpreters/ProcessList.cpp b/dbms/src/Interpreters/ProcessList.cpp
index 2d3c329ad30..6d35e117f82 100644
--- a/dbms/src/Interpreters/ProcessList.cpp
+++ b/dbms/src/Interpreters/ProcessList.cpp
@@ -209,8 +209,17 @@ StoragePtr ProcessList::tryGetTemporaryTable(const String & query_id, const Stri
 {
     std::lock_guard<std::mutex> lock(mutex);
 
+    Tables * tables;
+    Tables::iterator iterator;
+    std::tie(tables, iterator) = tryFindTemporaryTable(query_id, table_name);
+
+    return tables ? iterator->second : nullptr;
+}
+
+std::tuple<Tables *, Tables::iterator> ProcessList::tryFindTemporaryTable(const String & query_id, const String & table_name) const
+{
     /// NOTE We search for all user-s. That is, there is no isolation, and the complexity is O(users).
-    for (const auto & user_queries : user_to_queries)
+    for (auto & user_queries : user_to_queries)
     {
         auto it = user_queries.second.queries.find(query_id);
         if (user_queries.second.queries.end() == it)
@@ -220,10 +229,27 @@ StoragePtr ProcessList::tryGetTemporaryTable(const String & query_id, const Stri
         if ((*it->second).temporary_tables.end() == jt)
             continue;
 
-        return jt->second;
+        return {& ((*it->second).temporary_tables), jt};
     }
 
-    return {};
+    return {nullptr, Tables::iterator()};
+}
+
+
+StoragePtr ProcessList::tryRemoveTemporaryTable(const String & query_id, const String & table_name) const
+{
+    std::lock_guard<std::mutex> lock(mutex);
+
+    Tables * tables;
+    Tables::iterator iterator;
+    std::tie(tables, iterator) = tryFindTemporaryTable(query_id, table_name);
+
+    if (!tables)
+        return {};
+
+    StoragePtr storage = iterator->second;
+    tables->erase(iterator);
+    return storage;
 }
 
 
diff --git a/dbms/src/Interpreters/ProcessList.h b/dbms/src/Interpreters/ProcessList.h
index 80c8a2c4d9f..3b725a63c9b 100644
--- a/dbms/src/Interpreters/ProcessList.h
+++ b/dbms/src/Interpreters/ProcessList.h
@@ -226,6 +226,8 @@ private:
     /// Call under lock. Finds process with specified current_user and current_query_id.
     ProcessListElement * tryGetProcessListElement(const String & current_query_id, const String & current_user);
 
+    std::tuple<Tables *, Tables::iterator> tryFindTemporaryTable(const String & query_id, const String & table_name) const;
+
 public:
     ProcessList(size_t max_size_ = 0) : cur_size(0), max_size(max_size_) {}
 
@@ -265,6 +267,8 @@ public:
 
     /// Find temporary table by query_id and name. NOTE: doesn't work fine if there are many queries with same query_id.
     StoragePtr tryGetTemporaryTable(const String & query_id, const String & table_name) const;
+    /// Find temporary table by query_id and name and remove it if exists.
+    StoragePtr tryRemoveTemporaryTable(const String & query_id, const String & table_name) const;
 
 
     enum class CancellationCode

From 89df47fd528330f8287a287e9264c0f01fece6d8 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 15 Aug 2017 15:54:12 +0300
Subject: [PATCH 184/281] added test for drop temporary table

---
 .../0_stateless/00492_drop_temporary_table.reference      | 1 +
 .../queries/0_stateless/00492_drop_temporary_table.sql    | 8 ++++++++
 2 files changed, 9 insertions(+)
 create mode 100644 dbms/tests/queries/0_stateless/00492_drop_temporary_table.reference
 create mode 100644 dbms/tests/queries/0_stateless/00492_drop_temporary_table.sql

diff --git a/dbms/tests/queries/0_stateless/00492_drop_temporary_table.reference b/dbms/tests/queries/0_stateless/00492_drop_temporary_table.reference
new file mode 100644
index 00000000000..573541ac970
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00492_drop_temporary_table.reference
@@ -0,0 +1 @@
+0
diff --git a/dbms/tests/queries/0_stateless/00492_drop_temporary_table.sql b/dbms/tests/queries/0_stateless/00492_drop_temporary_table.sql
new file mode 100644
index 00000000000..715c78d675c
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00492_drop_temporary_table.sql
@@ -0,0 +1,8 @@
+drop table if exists temp_tab;
+create temporary table temp_tab (number UInt64);
+insert into temp_tab select number from system.numbers limit 1;
+select number from temp_tab;
+drop table temp_tab;
+create temporary table temp_tab (number UInt64);
+select number from temp_tab;
+drop table temp_tab;

From f8513b932ffe112f65579ba7a68540f7d3a1e739 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 15 Aug 2017 20:00:18 +0300
Subject: [PATCH 185/281] removed ability to select from external temporary
 table by query_id

---
 dbms/src/Interpreters/Context.cpp             | 41 +++-------------
 dbms/src/Interpreters/Context.h               |  2 +-
 .../src/Interpreters/InterpreterDropQuery.cpp | 22 +++++----
 dbms/src/Interpreters/ProcessList.cpp         | 48 -------------------
 dbms/src/Interpreters/ProcessList.h           |  8 ----
 5 files changed, 20 insertions(+), 101 deletions(-)

diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp
index a123ff52745..805e1969ad9 100644
--- a/dbms/src/Interpreters/Context.cpp
+++ b/dbms/src/Interpreters/Context.cpp
@@ -721,22 +721,6 @@ StoragePtr Context::getTableImpl(const String & database_name, const String & ta
 {
     auto lock = getLock();
 
-    /** Ability to access the temporary tables of another query in the form _query_QUERY_ID.table
-      * NOTE In the future, you may need to think about isolation.
-      */
-    if (startsWith(database_name, "_query_"))
-    {
-        String requested_query_id = database_name.substr(strlen("_query_"));
-
-        auto res = shared->process_list.tryGetTemporaryTable(requested_query_id, table_name);
-
-        if (!res && exception)
-            *exception = Exception(
-                "Cannot find temporary table with name " + table_name + " for query with id " + requested_query_id, ErrorCodes::UNKNOWN_TABLE);
-
-        return res;
-    }
-
     if (database_name.empty())
     {
         StoragePtr res = tryGetExternalTable(table_name);
@@ -781,28 +765,17 @@ void Context::addExternalTable(const String & table_name, StoragePtr storage)
     }
 }
 
-StoragePtr Context::tryRemoveExternalTable(const String & database_name, const String & table_name)
+StoragePtr Context::tryRemoveExternalTable(const String & table_name)
 {
     auto lock = getLock();
 
-    /// Ability to remove the temporary tables of another query in the form _query_QUERY_ID.table
+    Tables::const_iterator it = external_tables.find(table_name);
+    if (external_tables.end() == it)
+        return StoragePtr();
 
-    if (startsWith(database_name, "_query_"))
-    {
-        String requested_query_id = database_name.substr(strlen("_query_"));
-
-        return shared->process_list.tryRemoveTemporaryTable(requested_query_id, table_name);
-    }
-    else if(database_name.empty())
-    {
-        Tables::const_iterator it = external_tables.find(table_name);
-        if (external_tables.end() == it)
-            return StoragePtr();
-
-        auto storage = it->second;
-        external_tables.erase(it);
-        return storage;
-    }
+    auto storage = it->second;
+    external_tables.erase(it);
+    return storage;
 
     return {};
 }
diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h
index ad2d7e0cf0a..f2e5855bc6b 100644
--- a/dbms/src/Interpreters/Context.h
+++ b/dbms/src/Interpreters/Context.h
@@ -170,7 +170,7 @@ public:
     StoragePtr getTable(const String & database_name, const String & table_name) const;
     StoragePtr tryGetTable(const String & database_name, const String & table_name) const;
     void addExternalTable(const String & table_name, StoragePtr storage);
-    StoragePtr tryRemoveExternalTable(const String & database_name, const String & table_name);
+    StoragePtr tryRemoveExternalTable(const String & table_name);
 
     void addDatabase(const String & database_name, const DatabasePtr & database);
     DatabasePtr detachDatabase(const String & database_name);
diff --git a/dbms/src/Interpreters/InterpreterDropQuery.cpp b/dbms/src/Interpreters/InterpreterDropQuery.cpp
index 3e5675deb95..ae25c53ea1d 100644
--- a/dbms/src/Interpreters/InterpreterDropQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterDropQuery.cpp
@@ -46,17 +46,19 @@ BlockIO InterpreterDropQuery::execute()
     }
 
     /// Drop temporary table.
-    StoragePtr table = (context.hasSessionContext() ? context.getSessionContext() : context)
-            .tryRemoveExternalTable(drop.database, drop.table);
-    if (table)
+    if (drop.database.empty())
     {
-        table->shutdown();
-        /// If table was already dropped by anyone, an exception will be thrown
-        auto table_lock = table->lockForAlter();
-        /// Delete table data
-        table->drop();
-        table->is_dropped = true;
-        return {};
+        StoragePtr table = (context.hasSessionContext() ? context.getSessionContext() : context).tryRemoveExternalTable(drop.table);
+        if (table)
+        {
+            table->shutdown();
+            /// If table was already dropped by anyone, an exception will be thrown
+            auto table_lock = table->lockForAlter();
+            /// Delete table data
+            table->drop();
+            table->is_dropped = true;
+            return {};
+        }
     }
 
     String database_name = drop.database.empty() ? current_database : drop.database;
diff --git a/dbms/src/Interpreters/ProcessList.cpp b/dbms/src/Interpreters/ProcessList.cpp
index 6d35e117f82..e4283df201a 100644
--- a/dbms/src/Interpreters/ProcessList.cpp
+++ b/dbms/src/Interpreters/ProcessList.cpp
@@ -205,54 +205,6 @@ void ProcessList::addTemporaryTable(ProcessListElement & elem, const String & ta
 }
 
 
-StoragePtr ProcessList::tryGetTemporaryTable(const String & query_id, const String & table_name) const
-{
-    std::lock_guard<std::mutex> lock(mutex);
-
-    Tables * tables;
-    Tables::iterator iterator;
-    std::tie(tables, iterator) = tryFindTemporaryTable(query_id, table_name);
-
-    return tables ? iterator->second : nullptr;
-}
-
-std::tuple<Tables *, Tables::iterator> ProcessList::tryFindTemporaryTable(const String & query_id, const String & table_name) const
-{
-    /// NOTE We search for all user-s. That is, there is no isolation, and the complexity is O(users).
-    for (auto & user_queries : user_to_queries)
-    {
-        auto it = user_queries.second.queries.find(query_id);
-        if (user_queries.second.queries.end() == it)
-            continue;
-
-        auto jt = (*it->second).temporary_tables.find(table_name);
-        if ((*it->second).temporary_tables.end() == jt)
-            continue;
-
-        return {& ((*it->second).temporary_tables), jt};
-    }
-
-    return {nullptr, Tables::iterator()};
-}
-
-
-StoragePtr ProcessList::tryRemoveTemporaryTable(const String & query_id, const String & table_name) const
-{
-    std::lock_guard<std::mutex> lock(mutex);
-
-    Tables * tables;
-    Tables::iterator iterator;
-    std::tie(tables, iterator) = tryFindTemporaryTable(query_id, table_name);
-
-    if (!tables)
-        return {};
-
-    StoragePtr storage = iterator->second;
-    tables->erase(iterator);
-    return storage;
-}
-
-
 ProcessListElement * ProcessList::tryGetProcessListElement(const String & current_query_id, const String & current_user)
 {
     auto user_it = user_to_queries.find(current_user);
diff --git a/dbms/src/Interpreters/ProcessList.h b/dbms/src/Interpreters/ProcessList.h
index 3b725a63c9b..da284667e95 100644
--- a/dbms/src/Interpreters/ProcessList.h
+++ b/dbms/src/Interpreters/ProcessList.h
@@ -226,8 +226,6 @@ private:
     /// Call under lock. Finds process with specified current_user and current_query_id.
     ProcessListElement * tryGetProcessListElement(const String & current_query_id, const String & current_user);
 
-    std::tuple<Tables *, Tables::iterator> tryFindTemporaryTable(const String & query_id, const String & table_name) const;
-
 public:
     ProcessList(size_t max_size_ = 0) : cur_size(0), max_size(max_size_) {}
 
@@ -265,12 +263,6 @@ public:
     /// Register temporary table. Then it is accessible by query_id and name.
     void addTemporaryTable(ProcessListElement & elem, const String & table_name, StoragePtr storage);
 
-    /// Find temporary table by query_id and name. NOTE: doesn't work fine if there are many queries with same query_id.
-    StoragePtr tryGetTemporaryTable(const String & query_id, const String & table_name) const;
-    /// Find temporary table by query_id and name and remove it if exists.
-    StoragePtr tryRemoveTemporaryTable(const String & query_id, const String & table_name) const;
-
-
     enum class CancellationCode
     {
         NotFound = 0,                     /// already cancelled

From 0185fd4542286efcb33e984cfae918fc6aaae644 Mon Sep 17 00:00:00 2001
From: Alexey Zatelepin <ztlpn@yandex-team.ru>
Date: Tue, 15 Aug 2017 23:03:59 +0300
Subject: [PATCH 186/281] fix parsing min-max dates from part name
 [#CLICKHOUSE-3000]

---
 dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp b/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp
index 02c9f7e90e4..910c9a95ba4 100644
--- a/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp
@@ -72,14 +72,14 @@ void MergeTreePartInfo::parseMinMaxDatesFromPartName(const String & dir_name, Da
 
     const auto & date_lut = DateLUT::instance();
 
+    min_date = date_lut.YYYYMMDDToDayNum(min_yyyymmdd);
+    max_date = date_lut.YYYYMMDDToDayNum(max_yyyymmdd);
+
     DayNum_t min_month = date_lut.toFirstDayNumOfMonth(min_date);
     DayNum_t max_month = date_lut.toFirstDayNumOfMonth(max_date);
 
     if (min_month != max_month)
         throw Exception("Part name " + dir_name + " contains different months", ErrorCodes::BAD_DATA_PART_NAME);
-
-    min_date = date_lut.YYYYMMDDToDayNum(min_yyyymmdd);
-    max_date = date_lut.YYYYMMDDToDayNum(max_yyyymmdd);
 }
 
 
@@ -115,5 +115,4 @@ String MergeTreePartInfo::getPartName(DayNum_t left_date, DayNum_t right_date, I
     return wb.str();
 }
 
-
 }

From 4272265128e12bfdfd57ae0169e76b3ecd81f426 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 15 Aug 2017 23:14:15 +0300
Subject: [PATCH 187/281] Fixed LimitReadBuffer [#CLICKHOUSE-2].

---
 dbms/src/IO/LimitReadBuffer.cpp               |  9 ++++++++-
 dbms/src/IO/tests/limit_read_buffer.cpp       | 15 ++++++++++++---
 dbms/src/IO/tests/limit_read_buffer.reference |  5 ++++-
 dbms/src/IO/tests/limit_read_buffer.sh        |  2 +-
 4 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/dbms/src/IO/LimitReadBuffer.cpp b/dbms/src/IO/LimitReadBuffer.cpp
index c015173045d..82f9fee4855 100644
--- a/dbms/src/IO/LimitReadBuffer.cpp
+++ b/dbms/src/IO/LimitReadBuffer.cpp
@@ -22,7 +22,14 @@ bool LimitReadBuffer::nextImpl()
 
 
 LimitReadBuffer::LimitReadBuffer(ReadBuffer & in_, size_t limit_)
-    : ReadBuffer(nullptr, 0), in(in_), limit(limit_) {}
+    : ReadBuffer(in_.position(), 0), in(in_), limit(limit_)
+{
+    size_t remaining_bytes_in_buffer = in.buffer().end() - in.position();
+    if (remaining_bytes_in_buffer > limit)
+        remaining_bytes_in_buffer = limit;
+
+    working_buffer = Buffer(in.position(), in.position() + remaining_bytes_in_buffer);
+}
 
 
 LimitReadBuffer::~LimitReadBuffer()
diff --git a/dbms/src/IO/tests/limit_read_buffer.cpp b/dbms/src/IO/tests/limit_read_buffer.cpp
index 432c21b5410..f4ad14de9db 100644
--- a/dbms/src/IO/tests/limit_read_buffer.cpp
+++ b/dbms/src/IO/tests/limit_read_buffer.cpp
@@ -14,11 +14,20 @@ int main(int argc, char ** argv)
     size_t limit = std::stol(argv[1]);
 
     ReadBufferFromFileDescriptor in(STDIN_FILENO);
-    LimitReadBuffer limit_in(in, limit);
-
     WriteBufferFromFileDescriptor out(STDOUT_FILENO);
 
-    copyData(limit_in, out);
+    writeCString("--- first ---\n", out);
+    {
+        LimitReadBuffer limit_in(in, limit);
+        copyData(limit_in, out);
+    }
+
+    writeCString("\n--- second ---\n", out);
+    {
+        LimitReadBuffer limit_in(in, limit);
+        copyData(limit_in, out);
+    }
+
     writeCString("\n--- the rest ---\n", out);
     copyData(in, out);
 
diff --git a/dbms/src/IO/tests/limit_read_buffer.reference b/dbms/src/IO/tests/limit_read_buffer.reference
index ce8c1253433..beeff575bf4 100644
--- a/dbms/src/IO/tests/limit_read_buffer.reference
+++ b/dbms/src/IO/tests/limit_read_buffer.reference
@@ -1,3 +1,6 @@
+--- first ---
 Hello, wor
+--- second ---
+ld! Abcdef
 --- the rest ---
-ld!
+ghijklmnopqrstuvwxyz.
diff --git a/dbms/src/IO/tests/limit_read_buffer.sh b/dbms/src/IO/tests/limit_read_buffer.sh
index 177d6cd957a..ee6bc97d5fa 100755
--- a/dbms/src/IO/tests/limit_read_buffer.sh
+++ b/dbms/src/IO/tests/limit_read_buffer.sh
@@ -1,2 +1,2 @@
 #!/usr/bin/env bash
-./limit_read_buffer 10 <<< "Hello, world!"
+./limit_read_buffer 10 <<< 'Hello, world! Abcdefghijklmnopqrstuvwxyz.'

From dc29ae7e730c3bc0518d6b5250d1b6dc999c897a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 15 Aug 2017 23:17:50 +0300
Subject: [PATCH 188/281] Added test (tnx. egatov) [#CLICKHOUSE-2].

---
 dbms/src/IO/tests/CMakeLists.txt         |   3 +
 dbms/src/IO/tests/limit_read_buffer2.cpp | 131 +++++++++++++++++++++++
 2 files changed, 134 insertions(+)
 create mode 100644 dbms/src/IO/tests/limit_read_buffer2.cpp

diff --git a/dbms/src/IO/tests/CMakeLists.txt b/dbms/src/IO/tests/CMakeLists.txt
index 3330b111f84..bd02841a2ee 100644
--- a/dbms/src/IO/tests/CMakeLists.txt
+++ b/dbms/src/IO/tests/CMakeLists.txt
@@ -79,3 +79,6 @@ target_link_libraries (remote_read_write_buffer dbms)
 
 add_executable (limit_read_buffer limit_read_buffer.cpp ${SRCS})
 target_link_libraries (limit_read_buffer dbms)
+
+add_executable (limit_read_buffer2 limit_read_buffer2.cpp ${SRCS})
+target_link_libraries (limit_read_buffer2 dbms)
diff --git a/dbms/src/IO/tests/limit_read_buffer2.cpp b/dbms/src/IO/tests/limit_read_buffer2.cpp
new file mode 100644
index 00000000000..980acc194fd
--- /dev/null
+++ b/dbms/src/IO/tests/limit_read_buffer2.cpp
@@ -0,0 +1,131 @@
+#include <sstream>
+
+#include <IO/LimitReadBuffer.h>
+#include <IO/copyData.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/ReadHelpers.h>
+
+
+int main(int argc, char ** argv)
+{
+    try
+    {
+        std::stringstream s;
+
+        {
+            std::string src = "1";
+
+            std::string dst;
+
+            DB::ReadBuffer in(&src[0], src.size(), 0);
+
+            DB::LimitReadBuffer limit_in(in, 1);
+
+            {
+                DB::WriteBufferFromString out(dst);
+
+                DB::copyData(limit_in, out);
+            }
+
+            if (limit_in.count() != 1)
+            {
+                s << "Failed!, incorrect count(): " << limit_in.count();
+                throw DB::Exception(s.str());
+            }
+
+            if (in.count() != limit_in.count())
+            {
+                s << "Failed!, incorrect underlying buffer's count(): " << in.count();
+                throw DB::Exception(s.str());
+            }
+            if (src != dst)
+            {
+                s << "Failed!, incorrect destination value, read: " << dst << ", expected: " << src;
+                throw DB::Exception(s.str());
+            }
+        }
+        {
+            std::string src = "abc";
+            DB::ReadBuffer in(&src[0], src.size(), 0);
+
+            std::string dst;
+
+            {
+                DB::WriteBufferFromString out(dst);
+
+                char x;
+                DB::readChar(x, in);
+
+                DB::LimitReadBuffer limit_in(in, 1);
+
+                DB::copyData(limit_in, out);
+
+
+                if (in.count() != 2)
+                {
+                    s << "Failed!, Incorrect underlying buffer's count: " << in.count() << ", expected: " << 2;
+                    throw DB::Exception(s.str());
+                }
+
+                if (limit_in.count() != 1)
+                {
+                    s << "Failed!, Incorrect count: " << limit_in.count() << ", expected: " << 1;
+                    throw DB::Exception(s.str());
+                }
+            }
+
+            if (dst != "b")
+            {
+                s << "Failed!, Incorrect destination value: " << dst << ", expected 'b'";
+                throw DB::Exception(dst);
+            }
+
+            char y;
+            DB::readChar(y, in);
+            if (y != 'c')
+            {
+                s << "Failed!, Read incorrect value from underlying buffer: " << y << ", expected 'c'";
+                throw DB::Exception(s.str());
+            }
+            while (!in.eof())
+                in.ignore();
+            if (in.count() != 3)
+            {
+                s << "Failed!, Incorrect final count from underlying buffer: " << in.count() << ", expected: 3";
+                throw DB::Exception(s.str());
+            }
+        }
+
+        {
+            std::string src = "abc";
+            DB::ReadBuffer in(&src[0], src.size(), 0);
+
+            {
+                DB::LimitReadBuffer limit_in(in, 1);
+
+                char x;
+                DB::readChar(x, limit_in);
+
+                if (limit_in.count() != 1)
+                {
+                    s << "Failed!, Incorrect count: " << limit_in.count() << ", expected: " << 1;
+                    throw DB::Exception(s.str());
+                }
+            }
+
+            if (in.count() != 1)
+            {
+                s << "Failed!, Incorrect final count from underlying buffer: " << in.count() << ", expected: 1";
+                throw DB::Exception(s.str());
+            }
+        }
+
+    }
+    catch (const DB::Exception & e)
+    {
+        std::cerr << e.what() << ", " << e.displayText() << std::endl;
+        return 1;
+    }
+
+    return 0;
+}

From ad40104022ff1e16679a683d9da9e263bb0f1c1c Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Tue, 15 Aug 2017 13:20:05 +0300
Subject: [PATCH 189/281] Unix timestamp format for DateTime fields in CSV.
 Resolves #366. [#CLICKHOUSE-3168]

---
 dbms/src/DataTypes/DataTypeDateTime.cpp            |  6 +++---
 dbms/src/IO/ReadHelpers.h                          | 11 ++++++++---
 dbms/tests/queries/0_stateless/00301_csv.reference |  4 ++++
 dbms/tests/queries/0_stateless/00301_csv.sh        | 10 ++++++++++
 4 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/dbms/src/DataTypes/DataTypeDateTime.cpp b/dbms/src/DataTypes/DataTypeDateTime.cpp
index a74918f08b6..fe6ccf81439 100644
--- a/dbms/src/DataTypes/DataTypeDateTime.cpp
+++ b/dbms/src/DataTypes/DataTypeDateTime.cpp
@@ -72,9 +72,9 @@ void DataTypeDateTime::serializeTextCSV(const IColumn & column, size_t row_num,
 
 void DataTypeDateTime::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const char delimiter) const
 {
-    LocalDateTime value;
-    readCSV(value, istr);
-    static_cast<ColumnUInt32 &>(column).getData().push_back(static_cast<time_t>(value));
+    time_t x;
+    readCSVSimple(x, istr, readDateTimeText);
+    static_cast<ColumnUInt32 &>(column).getData().push_back(x);
 }
 
 void registerDataTypeDateTime(DataTypeFactory & factory)
diff --git a/dbms/src/IO/ReadHelpers.h b/dbms/src/IO/ReadHelpers.h
index 544f4e25c7b..1c4e93bbd06 100644
--- a/dbms/src/IO/ReadHelpers.h
+++ b/dbms/src/IO/ReadHelpers.h
@@ -629,7 +629,7 @@ void readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUT
 /** In YYYY-MM-DD hh:mm:ss format, according to specified time zone.
   * As an exception, also supported parsing of unix timestamp in form of decimal number.
   */
-inline void readDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance())
+inline void readDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut)
 {
     /** Read 10 characters, that could represent unix timestamp.
       * Only unix timestamp of 5-10 characters is supported.
@@ -666,6 +666,11 @@ inline void readDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTI
         readDateTimeTextFallback(datetime, buf, date_lut);
 }
 
+inline void readDateTimeText(time_t & datetime, ReadBuffer & buf)
+{
+    readDateTimeText(datetime, buf, DateLUT::instance());
+}
+
 inline void readDateTimeText(LocalDateTime & datetime, ReadBuffer & buf)
 {
     char s[19];
@@ -767,7 +772,7 @@ inline void readDoubleQuoted(LocalDateTime & x, ReadBuffer & buf)
 
 /// CSV, for numbers, dates, datetimes: quotes are optional, no special escaping rules.
 template <typename T>
-inline void readCSVSimple(T & x, ReadBuffer & buf)
+inline void readCSVSimple(T & x, ReadBuffer & buf, void (*readText_)(T & x, ReadBuffer & buf) = readText)
 {
     if (buf.eof())
         throwReadAfterEOF();
@@ -777,7 +782,7 @@ inline void readCSVSimple(T & x, ReadBuffer & buf)
     if (maybe_quote == '\'' || maybe_quote == '\"')
         ++buf.position();
 
-    readText(x, buf);
+    readText_(x, buf);
 
     if (maybe_quote == '\'' || maybe_quote == '\"')
         assertChar(maybe_quote, buf);
diff --git a/dbms/tests/queries/0_stateless/00301_csv.reference b/dbms/tests/queries/0_stateless/00301_csv.reference
index d038fc8ac38..435eb0c87da 100644
--- a/dbms/tests/queries/0_stateless/00301_csv.reference
+++ b/dbms/tests/queries/0_stateless/00301_csv.reference
@@ -2,3 +2,7 @@ Hello, world	123	2016-01-01
 Hello, "world"	456	2016-01-02
 Hello "world"	789	2016-01-03
 Hello\n world	100	2016-01-04
+2016-01-01 01:02:03	1
+2016-01-02 01:02:03	2
+2017-08-15 13:15:01	3
+1970-01-02 06:46:39	4
diff --git a/dbms/tests/queries/0_stateless/00301_csv.sh b/dbms/tests/queries/0_stateless/00301_csv.sh
index 37f59eba277..8d1fe7055a2 100755
--- a/dbms/tests/queries/0_stateless/00301_csv.sh
+++ b/dbms/tests/queries/0_stateless/00301_csv.sh
@@ -11,3 +11,13 @@ Hello "world", 789 ,2016-01-03
 
 clickhouse-client --query="SELECT * FROM test.csv ORDER BY d";
 clickhouse-client --query="DROP TABLE test.csv";
+
+clickhouse-client --query="CREATE TABLE test.csv (t DateTime, s String) ENGINE = Memory";
+
+echo '"2016-01-01 01:02:03","1"
+2016-01-02 01:02:03, "2"
+1502792101,"3"
+99999,"4"' | clickhouse-client --query="INSERT INTO test.csv FORMAT CSV";
+
+clickhouse-client --query="SELECT * FROM test.csv ORDER BY s";
+clickhouse-client --query="DROP TABLE test.csv";

From d6736a32d98ac9f471858ca187d5079a66b6caed Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Wed, 16 Aug 2017 00:33:07 +0300
Subject: [PATCH 190/281] Update star_schema.rst

---
 docs/en/getting_started/example_datasets/star_schema.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/en/getting_started/example_datasets/star_schema.rst b/docs/en/getting_started/example_datasets/star_schema.rst
index b3b5a199c25..0e041fb343b 100644
--- a/docs/en/getting_started/example_datasets/star_schema.rst
+++ b/docs/en/getting_started/example_datasets/star_schema.rst
@@ -42,11 +42,11 @@ Create tables in ClickHouse:
             LO_TAX                  UInt8,
             LO_COMMITDATE           Date,
             LO_SHIPMODE             String
-    )Engine=MergeTree(LO_ORDERDATE,(LO_ORDERKEY,LO_LINENUMBER,LO_ORDERDATE),8192);
+    ) Engine = MergeTree(LO_ORDERDATE,(LO_ORDERKEY,LO_LINENUMBER,LO_ORDERDATE),8192);
 
     CREATE TABLE customer (
             C_CUSTKEY       UInt32,
-            C_NAME	        String,
+            C_NAME          String,
             C_ADDRESS       String,
             C_CITY          String,
             C_NATION        String,
@@ -54,7 +54,7 @@ Create tables in ClickHouse:
             C_PHONE         String,
             C_MKTSEGMENT    String,
             C_FAKEDATE      Date
-    )Engine=MergeTree(C_FAKEDATE,(C_CUSTKEY,C_FAKEDATE),8192);
+    ) Engine = MergeTree(C_FAKEDATE,(C_CUSTKEY,C_FAKEDATE),8192);
 
     CREATE TABLE part (
             P_PARTKEY       UInt32,
@@ -67,7 +67,7 @@ Create tables in ClickHouse:
             P_SIZE          UInt8,
             P_CONTAINER     String,
             P_FAKEDATE      Date
-    )Engine=MergeTree(P_FAKEDATE,(P_PARTKEY,P_FAKEDATE),8192);
+    ) Engine = MergeTree(P_FAKEDATE,(P_PARTKEY,P_FAKEDATE),8192);
 
     CREATE TABLE lineorderd AS lineorder ENGINE = Distributed(perftest_3shards_1replicas, default, lineorder, rand());
     CREATE TABLE customerd AS customer ENGINE = Distributed(perftest_3shards_1replicas, default, customer, rand());

From 2ddf4625638b5e3fb615fdc2830e6624276b5c25 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 16 Aug 2017 00:52:52 +0300
Subject: [PATCH 191/281] Fixed regression [#CLICKHOUSE-2].

---
 dbms/src/Functions/GatherUtils.h              |  2 +-
 .../00493_substring_of_fixedstring.reference  | 52 +++++++++++++++++++
 .../00493_substring_of_fixedstring.sql        |  7 +++
 3 files changed, 60 insertions(+), 1 deletion(-)
 create mode 100644 dbms/tests/queries/0_stateless/00493_substring_of_fixedstring.reference
 create mode 100644 dbms/tests/queries/0_stateless/00493_substring_of_fixedstring.sql

diff --git a/dbms/src/Functions/GatherUtils.h b/dbms/src/Functions/GatherUtils.h
index 75492770de4..d0f90733c95 100644
--- a/dbms/src/Functions/GatherUtils.h
+++ b/dbms/src/Functions/GatherUtils.h
@@ -276,7 +276,7 @@ struct FixedStringSource
     {
         const auto & chars = col.getChars();
         pos = chars.data();
-        end = pos + col.size();
+        end = pos + chars.size();
     }
 
     void next()
diff --git a/dbms/tests/queries/0_stateless/00493_substring_of_fixedstring.reference b/dbms/tests/queries/0_stateless/00493_substring_of_fixedstring.reference
new file mode 100644
index 00000000000..72d55841a37
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00493_substring_of_fixedstring.reference
@@ -0,0 +1,52 @@
+hello\0\0\0
+hello\0\0\0
+0\0\0\0\0\0\0\0
+1\0\0\0\0\0\0\0
+2\0\0\0\0\0\0\0
+3\0\0\0\0\0\0\0
+4\0\0\0\0\0\0\0
+5\0\0\0\0\0\0\0
+6\0\0\0\0\0\0\0
+7\0\0\0\0\0\0\0
+8\0\0\0\0\0\0\0
+9\0\0\0\0\0\0\0
+995
+996
+997
+998
+999
+100
+100
+100
+100
+100
+
+9
+99
+998
+999\0
+
+1
+10
+100
+1004
+995\0
+96\0
+7\0
+\0
+
+1000
+001
+02
+3
+
+995
+9
+7\0
+\0
+
+10
+001
+0
+3
+
diff --git a/dbms/tests/queries/0_stateless/00493_substring_of_fixedstring.sql b/dbms/tests/queries/0_stateless/00493_substring_of_fixedstring.sql
new file mode 100644
index 00000000000..e267e1d54bb
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00493_substring_of_fixedstring.sql
@@ -0,0 +1,7 @@
+SELECT substring(toFixedString('hello', 16), 1, 8);
+SELECT substring(toFixedString(materialize('hello'), 16), 1, 8);
+SELECT substring(toFixedString(toString(number), 16), 1, 8) FROM system.numbers LIMIT 10;
+SELECT substring(toFixedString(toString(number), 4), 1, 3) FROM system.numbers LIMIT 995, 10;
+SELECT substring(toFixedString(toString(number), 4), 1, number % 5) FROM system.numbers LIMIT 995, 10;
+SELECT substring(toFixedString(toString(number), 4), 1 + number % 5) FROM system.numbers LIMIT 995, 10;
+SELECT substring(toFixedString(toString(number), 4), 1 + number % 5, 1 + number % 3) FROM system.numbers LIMIT 995, 10;

From d3118b6e9223c022f5e49b5fb2883c11656ec967 Mon Sep 17 00:00:00 2001
From: Konstantin Lebedev <kostyan.lebedev@gmail.com>
Date: Wed, 16 Aug 2017 01:27:47 +0300
Subject: [PATCH 192/281] Added link to native python driver

---
 docs/ru/interfaces/third-party_client_libraries.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/ru/interfaces/third-party_client_libraries.rst b/docs/ru/interfaces/third-party_client_libraries.rst
index 94d69595cab..6c8a0f09359 100644
--- a/docs/ru/interfaces/third-party_client_libraries.rst
+++ b/docs/ru/interfaces/third-party_client_libraries.rst
@@ -6,6 +6,7 @@
 * Python:
     - `infi.clickhouse_orm <https://github.com/Infinidat/infi.clickhouse_orm>`_
     - `sqlalchemy-clickhouse <https://github.com/cloudflare/sqlalchemy-clickhouse>`_
+    - `clickhouse-driver <https://github.com/mymarilyn/clickhouse-driver>`_
 * PHP
     - `clickhouse-php-client <https://github.com/8bitov/clickhouse-php-client>`_
     - `PhpClickHouseClient <https://github.com/SevaCode/PhpClickHouseClient>`_

From 215098fe70e864683a9b992c857a28bb5d70db3f Mon Sep 17 00:00:00 2001
From: BayoNet <bayonet@virtUbuntu16.04>
Date: Wed, 16 Aug 2017 11:14:13 +0300
Subject: [PATCH 193/281] Some errors in desctiptions are fixed

---
 docs/ru/dicts/external_dicts_dict.rst         | 2 +-
 docs/ru/dicts/external_dicts_dict_sources.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/ru/dicts/external_dicts_dict.rst b/docs/ru/dicts/external_dicts_dict.rst
index b400261f0e5..eab4ef30381 100644
--- a/docs/ru/dicts/external_dicts_dict.rst
+++ b/docs/ru/dicts/external_dicts_dict.rst
@@ -31,5 +31,5 @@
 * name - Идентификатор, под которым словарь будет доступен для использования. Используйте символы ``[a-zA-Z0-9_\-]``.
 * :ref:`source <dicts-external_dicts_dict_sources>` - Источник словаря.
 * :ref:`layout <dicts-external_dicts_dict_layout>` - Размещение словаря в памяти.
-* :ref:`structure <dicts-external_dicts_dict_structure>` - Ключ словаря.
+* :ref:`structure <dicts-external_dicts_dict_structure>` - Структура словаря. Ключ и атрибуты, которые можно получить по ключу.
 * :ref:`lifetime <dicts-external_dicts_dict_lifetime>` - Периодичность обновления словарей.
diff --git a/docs/ru/dicts/external_dicts_dict_sources.rst b/docs/ru/dicts/external_dicts_dict_sources.rst
index 035287b1a52..d65e59124be 100644
--- a/docs/ru/dicts/external_dicts_dict_sources.rst
+++ b/docs/ru/dicts/external_dicts_dict_sources.rst
@@ -102,7 +102,7 @@ HTTP(s)
       </http>
   </source>
 
-Чтобы ClickHouse смог обратиться к HTTPS-ресурсу, необходимо прописать :ref:`настройки openSSL <server_settings-openSSL>` в конфигурации сервера.
+Чтобы ClickHouse смог обратиться к HTTPS-ресурсу, необходимо :ref:`настроить openSSL <server_settings-openSSL>` в конфигурации сервера.
 
 Поля настройки:
 

From 7dae4f9f7b29ae3a9e0c5366f3a0e17600219503 Mon Sep 17 00:00:00 2001
From: BayoNet <bayonet@virtUbuntu16.04>
Date: Mon, 31 Jul 2017 11:31:30 +0300
Subject: [PATCH 194/281] External dictionaries topic is restructured and
 updated.

---
 docs/ru/dicts/external_dicts.rst              | 343 +---------------
 docs/ru/dicts/external_dicts_dict.rst         |  35 ++
 docs/ru/dicts/external_dicts_dict_layout.rst  | 250 +++++++++++
 .../ru/dicts/external_dicts_dict_lifetime.rst |  38 ++
 docs/ru/dicts/external_dicts_dict_sources.rst | 388 ++++++++++++++++++
 .../dicts/external_dicts_dict_structure.rst   | 125 ++++++
 docs/ru/dicts/index.rst                       |   4 +-
 docs/ru/formats/index.rst                     |   2 +
 docs/ru/functions/ext_dict_functions.rst      |   4 +-
 docs/ru/functions/other_functions.rst         |   2 +
 .../operations/server_settings/settings.rst   |  14 +-
 11 files changed, 877 insertions(+), 328 deletions(-)
 create mode 100644 docs/ru/dicts/external_dicts_dict.rst
 create mode 100644 docs/ru/dicts/external_dicts_dict_layout.rst
 create mode 100644 docs/ru/dicts/external_dicts_dict_lifetime.rst
 create mode 100644 docs/ru/dicts/external_dicts_dict_sources.rst
 create mode 100644 docs/ru/dicts/external_dicts_dict_structure.rst

diff --git a/docs/ru/dicts/external_dicts.rst b/docs/ru/dicts/external_dicts.rst
index 4001eeff87f..00374b5a91e 100644
--- a/docs/ru/dicts/external_dicts.rst
+++ b/docs/ru/dicts/external_dicts.rst
@@ -1,345 +1,44 @@
 .. _dicts-external_dicts:
 
+***************
 Внешние словари
-===============
+***************
 
-Существует возможность подключать свои собственные словари из различных источников данных.
-Источником данных для словаря может быть файл на локальной файловой системе, сервер ClickHouse, сервер MySQL, MongoDB или любой ODBC источник.
-Словарь может полностью храниться в оперативке и периодически обновляться, или быть частично закэшированным в оперативке и динамически подгружать отсутствующие значения.
+Существует возможность подключать собственные словари из различных источников данных. Источником данных для словаря может быть локальный текстовый/исполняемый файл, HTTP(s) ресурс или другая СУБД. Подробнее смотрите в разделе ":ref:`dicts-external_dicts_dict_sources`".
 
-Конфигурация внешних словарей находится в отдельном файле или файлах, указанных в конфигурационном параметре :ref:`dictionaries_config <server_settings-dictionaries_config>`.
-Этот параметр содержит абсолютный или относительный путь к файлу с конфигурацией словарей. Относительный путь - относительно директории с конфигурационным файлом сервера. Путь может содержать wildcard-ы \* и ? - тогда рассматриваются все подходящие файлы. Пример: ``dictionaries/*.xml``.
+ClickHouse может полностью или частично хранить словари в оперативной памяти, периодически обновлять их и динамически подгружать отсутствующие значения.
 
-Конфигурация словарей, а также множество файлов с конфигурацией, может обновляться без перезапуска сервера. Сервер проверяет обновления каждые 5 секунд. То есть, словари могут подключаться динамически.
+Конфигурация внешних словарей находится в одном или нескольких файлах. Путь к конфигурации указывается в параметре :ref:`server_settings-dictionaries_config`.
 
-Создание словарей может производиться при старте сервера или при первом использовании. Это определяется конфигурационном параметром :ref:`dictionaries_lazy_load <server_settings-dictionaries_lazy_load>` (в основном конфигурационном файле сервера). Параметр не обязателен, по умолчанию - ``true``. Если true, то каждый словарь создаётся при первом использовании;  если словарь не удалось создать - вызов функции, использующей словарь, кидает исключение. Если ``false``, то все словари создаются при старте сервера, и в случае ошибки, сервер завершает работу.
+Периодически ClickHouse обновляет конфигурацию словарей и словари. Т.о. словари можно подгружать динамически.
+
+Словари могут загружаться при старте сервера или при первом использовании, в зависимости от настройки :ref:`server_settings-dictionaries_lazy_load`.
 
 Конфигурационный файл словарей имеет вид:
 
 .. code-block:: xml
 
   <dictionaries>
-      <comment>Не обязательный элемент с любым содержимым; полностью игнорируется.</comment>
+      <comment>Необязательный элемент с любым содержимым. Полностью игнорируется.</comment>
   
-      <!-- Можно задать произвольное количество разных словарей. -->
       <dictionary>
-          <!-- Имя словаря. Под этим именем словарь будет доступен для использования. -->
-          <name>os</name>
-  
-          <!-- Источник данных. -->
-          <source>
-  
-              <!-- Источник - файл на локальной файловой системе. -->
-              <file>
-                  <!-- Путь на локальной файловой системе. -->
-                  <path>/opt/dictionaries/os.tsv</path>
-                  <!-- С помощью какого формата понимать файл. -->
-                  <format>TabSeparated</format>
-              </file>
-  
-              <!-- или источник - таблица на сервере MySQL.
-              <mysql>
-                  <!- - Эти параметры могут быть указаны как снаружи (общие для всех реплик), так и внутри конкретной реплики - ->
-                  <port>3306</port>
-                  <user>clickhouse</user>
-                  <password>qwerty</password>
-                  <!- - Можно указать от одной до произвольного количества реплик для отказоустойчивости. - ->
-                  <replica>
-                      <host>example01-1</host>
-                      <priority>1</priority> <!- - Меньше значение - больше приоритет. - ->
-                  </replica>
-                  <replica>
-                      <host>example01-2</host>
-                      <priority>1</priority>
-                  </replica>
-                  <db>conv_main</db>
-                  <table>counters</table>
-              </mysql>
-              -->
-  
-              <!-- или источник - таблица на сервере ClickHouse.
-              <clickhouse>
-                  <host>example01-01-1</host>
-                  <port>9000</port>
-                  <user>default</user>
-                  <password></password>
-                  <db>default</db>
-                  <table>counters</table>
-              </clickhouse>
-              <!- - Если адрес похож на localhost, то запрос будет идти без сетевого взаимодействия.
-                    Для отказоустойчивости, вы можете создать Distributed таблицу на localhost и прописать её. - ->
-              -->
-  
-              <!-- Для <mysql> и <clickhouse> доступен атрибут <where>, позволяющий задать условие выбора
-              <clickhouse>
-                  <host>example01-01-1</host>
-                  <port>9000</port>
-                  <user>default</user>
-                  <password></password>
-                  <db>default</db>
-                  <table>ids</table>
-                  <where>id=10</where>
-              </clickhouse>
-              -->
+          <!-- Конфигурация словаря -->
+      </dictionary>
 
-              <!-- или источник - исполняемый файл. Если layout.cache - список нужных ключей будет записан в поток STDIN программы -->
-              <executable>
-                  <!-- Путь или имя программы (если директория есть в переменной окружения PATH) и параметры -->
-                  <command>cat /opt/dictionaries/os.tsv</command>
-                  <!-- С помощью какого формата понимать вывод и формировать список ключей. -->
-                  <format>TabSeparated</format>
-              </executable>
-  
-              <!-- или источник - http сервер. Если layout.cache - список нужных ключей будет послан как POST запрос -->
-              <http>
-                  <url>http://[::1]/os.tsv</url>
-                  <!-- С помощью какого формата понимать ответ и формировать список ключей. -->
-                  <format>TabSeparated</format>
-              </http>
-  
-          </source>
-  
-          <!-- Периодичность обновления для полностью загружаемых словарей. 0 - никогда не обновлять. -->
-          <lifetime>
-              <min>300</min>
-              <max>360</max>
-              <!-- Периодичность обновления выбирается равномерно-случайно между min и max,
-                   чтобы размазать по времени нагрузку при обновлении словарей на большом количестве серверов. -->
-          </lifetime>
-  
-          <!-- или
-          <!- - Периодичность обновления для полностью загружаемых словарей или время инвалидации для кэшируемых словарей.
-                0 - никогда не обновлять. - ->
-          <lifetime>300</lifetime>
-          -->
-  
-          <layout>   <!-- Способ размещения в памяти. -->
-              <flat />
-              <!-- или
-              <hashed />
-              или
-              <cache>
-                  <!- - Размер кэша в количестве ячеек; округляется вверх до степени двух. - ->
-                  <size_in_cells>1000000000</size_in_cells>
-              </cache>
-              -->
-          </layout>
-  
-          <!-- Структура. -->
-          <structure>
-              <!-- Описание столбца, являющегося идентификатором (ключом) словаря. -->
-              <id>
-                  <!-- Имя столбца с идентификатором. -->
-                  <name>Id</name>
-              </id>
-  
-              <attribute>    <!-- id уже входит в атрибуты и дополнительно указывать его здесь не нужно. -->
-                  <!-- Имя столбца. -->
-                  <name>Name</name>
-                  <!-- Тип столбца. (Как столбец понимается при загрузке.
-                       В случае MySQL, в таблице может быть TEXT, VARCHAR, BLOB, но загружается всё как String) -->
-                  <type>String</type>
-                  <!-- Какое значение использовать для несуществующего элемента. В примере - пустая строка. -->
-                  <null_value></null_value>
-              </attribute>
-  
-              <!-- Может быть указано произвольное количество атрибутов. -->
-              <attribute>
-                  <name>ParentID</name>
-                  <type>UInt64</type>
-                  <null_value>0</null_value>
-                  <!-- Определяет ли иерархию - отображение в идентификатор родителя (по умолчанию, false). -->
-                  <hierarchical>true</hierarchical>
-                  <!-- Можно считать отображение id -> attribute инъективным, чтобы оптимизировать GROUP BY. (по умолчанию, false) -->
-                  <injective>true</injective>
-              </attribute>
+      ...
 
-              <!-- Атрибут может быть выражением -->
-              <attribute>
-                  <name>expr</name>
-                  <type>UInt64</type>
-                  <expression>rand64()</expression>
-                  <null_value>0</null_value>
-              </attribute>
-          </structure>
+      <dictionary>
+          <!-- Конфигурация словаря -->
       </dictionary>
   </dictionaries>
 
-Идентификатор (ключевой атрибут) словаря должен быть числом, помещающимся в UInt64.
-Также есть возможность задавать произвольные составные ключи (см. раздел "Словари с составными ключами"). Замечание: составной ключ может состоять и из одного элемента, что даёт возможность использовать в качестве ключа, например, строку.
+В одном файле можно :ref:`сконфигурировать <dicts-external_dicts_dict>` произвольное количество словарей. Формат файла сохраняется даже если словарь один (т.е. ``<dictionaries><dictionary> <!--configuration--> </dictionary></dictionaries>``).
 
+Смотрите также ":ref:`ext_dict_functions`" .
 
-Существует шесть способов размещения словаря в памяти.
+.. attention:: Вы можете преобразовать значения по небольшому словарю, описав его в запросе ``SELECT`` (см. функцию ":ref:`other_functions-transform`"). Эта функциональность не связана с внешними словарями.
 
-flat
------
-В виде плоских массивов. Самый эффективный способ. Он подходит, если все ключи меньше 500 000. Если при создании словаря обнаружен ключ больше, то кидается исключение и словарь не создаётся. Словарь загружается в оперативку целиком. Словарь использует количество оперативки, пропорциональное максимальному значению ключа. Ввиду ограничения на 500 000, потребление оперативки вряд ли может быть большим.
-Поддерживаются все виды источников. При обновлении, данные (из файла, из таблицы) читаются целиком.
-
-hashed
-------
-В виде хэш-таблиц. Слегка менее эффективный способ. Словарь тоже загружается в оперативку целиком, и может содержать произвольное количество элементов с произвольными идентификаторами. На практике, имеет смысл использовать до десятков миллионов элементов, пока хватает оперативки.
-Поддерживаются все виды источников. При обновлении, данные (из файла, из таблицы) читаются целиком.
-
-cache
------
-Наименее эффективный способ. Подходит, если словарь не помещается в оперативку. Представляет собой кэш из фиксированного количества ячеек, в которых могут быть расположены часто используемые данные. Поддерживается источник MySQL, ClickHouse, executable, http; источник-файл не поддерживается. При поиске в словаре, сначала просматривается кэш. На каждый блок данных, все не найденные в кэше ключи (или устаревшие ключи) собираются в пачку, и с этой пачкой делается запрос к источнику вида SELECT attrs... FROM db.table WHERE id IN (k1, k2, ...). Затем полученные данные записываются в кэш.
-
-range_hashed
-------------
-В таблице прописаны какие-то данные для диапазонов дат, для каждого ключа. Дать возможность доставать эти данные для заданного ключа, для заданной даты.
-
-
-Пример: таблица содержит скидки для каждого рекламодателя в виде:
-
-  +------------------+-----------------------------+------------+----------+
-  | id рекламодателя | дата начала действия скидки | дата конца | величина |
-  +==================+=============================+============+==========+
-  | 123              | 2015-01-01                  | 2015-01-15 | 0.15     |
-  +------------------+-----------------------------+------------+----------+
-  | 123              | 2015-01-16                  | 2015-01-31 | 0.25     |
-  +------------------+-----------------------------+------------+----------+
-  | 456              | 2015-01-01                  | 2015-01-15 | 0.05     |
-  +------------------+-----------------------------+------------+----------+
-
-Добавляем ``layout = range_hashed``.
-При использовании такого layout, в structure должны быть элементы ``range_min``, ``range_max``.
-
-Пример:
-
-.. code-block:: xml
-
-  <structure>
-      <id>
-          <name>Id</name>
-      </id>
-      <range_min>
-          <name>first</name>
-      </range_min>
-      <range_max>
-          <name>last</name>
-      </range_max>
-      ...
-      
-Эти столбцы должны иметь тип Date. Другие типы пока не поддерживаем.
-Столбцы обозначают закрытый диапазон дат.
-
-Для работы с такими словарями, функции dictGetT должны принимать ещё один аргумент - дату:
-
-``dictGetT('dict_name', 'attr_name', id, date)``
-
-Функция достаёт значение для данного id и для диапазона дат, в который входит переданная дата. Если не найден id или для найденного id не найден диапазон, то возвращается значение по умолчанию для словаря.
-
-Если есть перекрывающиеся диапазоны, то можно использовать любой подходящий.
-
-Если граница диапазона является NULL или является некорректной датой (1900-01-01, 2039-01-01), то диапазон следует считать открытым. Диапазон может быть открытым с обеих сторон.
-
-В оперативке данные представлены в виде хэш-таблицы со значением в виде упорядоченного массива диапазонов и соответствующих им значений.
-
-Пример словаря по диапазонам:
-
-.. code-block:: xml
-
-  <dictionaries>
-          <dictionary>
-                  <name>xxx</name>
-                  <source>
-                          <mysql>
-                                  <password>xxx</password>
-                                  <port>3306</port>
-                                  <user>xxx</user>
-                                  <replica>
-                                          <host>xxx</host>
-                                          <priority>1</priority>
-                                  </replica>
-                                  <db>dicts</db>
-                                  <table>xxx</table>
-                          </mysql>
-                  </source>
-                  <lifetime>
-                          <min>300</min>
-                          <max>360</max>
-                  </lifetime>
-                  <layout>
-                          <range_hashed />
-                  </layout>
-                  <structure>
-                          <id>
-                                  <name>Abcdef</name>
-                          </id>
-                          <range_min>
-                                  <name>StartDate</name>
-                          </range_min>
-                          <range_max>
-                                  <name>EndDate</name>
-                          </range_max>
-                          <attribute>
-                                  <name>XXXType</name>
-                                  <type>String</type>
-                                  <null_value />
-                          </attribute>
-                  </structure>
-          </dictionary>
-  </dictionaries>
-
-complex_key_hashed
-------------------
-
-Для использования с составными ключами. Аналогичен hashed.
-
-complex_key_cache
------------------
-
-Для использования с составными ключами. Аналогичен cache.
-
-Примечания
-----------
-
-Рекомендуется использовать способ ``flat``, если возможно, или ``hashed``, ``complex_key_hashed``. Скорость работы словарей с таким размещением в памяти является безупречной.
-
-Способы ``cache`` и ``complex_key_cache`` следует использовать лишь если это неизбежно. Скорость работы кэша очень сильно зависит от правильности настройки и сценария использования. Словарь типа cache нормально работает лишь при достаточно больших hit rate-ах (рекомендуется 99% и выше). Посмотреть средний hit rate можно в таблице system.dictionaries. Укажите достаточно большой размер кэша. Количество ячеек следует подобрать экспериментальным путём - выставить некоторое значение, с помощью запроса добиться полной заполненности кэша, посмотреть на потребление оперативки (эта информация находится в таблице system.dictionaries); затем пропорционально увеличить количество ячеек так, чтобы расходовалось разумное количество оперативки. В качестве источника для кэша рекомендуется MySQL, MongoDB, так как ClickHouse плохо обрабатывает запросы со случайными чтениями.
-
-Во всех случаях, производительность будет выше, если вызывать функцию для работы со словарём после ``GROUP BY``, или если доставаемый атрибут помечен как инъективный. Для cache словарей, производительность будет лучше, если вызывать функцию после LIMIT-а - для этого можно использовать подзапрос с LIMIT-ом, и снаружи вызывать функцию со словарём.
-
-Атрибут называется инъективным, если разным ключам соответствуют разные значения атрибута. Тогда при использовании в ``GROUP BY`` функции, достающей значение атрибута по ключу, эта функция автоматически выносится из GROUP BY.
-
-При обновлении словарей из файла, сначала проверяется время модификации файла, и загрузка производится только если файл изменился.
-При обновлении из MySQL, для flat и hashed словарей, сначала делается запрос ``SHOW TABLE STATUS`` и смотрится время обновления таблицы. И если оно не NULL, то оно сравнивается с запомненным временем. Это работает для MyISAM таблиц, а для InnoDB таблиц время обновления неизвестно, поэтому загрузка из InnoDB делается при каждом обновлении.
-
-Для cache-словарей может быть задано время устаревания (``lifetime``) данных в кэше. Если от загрузки данных в ячейке прошло больше времени, чем lifetime, то значение не используется, и будет запрошено заново при следующей необходимости его использовать.
-
-Если словарь не удалось ни разу загрузить, то при попытке его использования, будет брошено исключение.
-Если при запросе к источнику cached словаря возникла ошибка, то будет брошено исключение.
-Обновление словарей (кроме загрузки при первом использовании) не блокирует запросы - во время обновления используется старая версия словаря. Если при обновлении возникнет ошибка, то ошибка пишется в лог сервера, а запросы продолжат использовать старую версию словарей.
-
-Список внешних словарей и их статус можно посмотреть в таблице ``system.dictionaries``.
-
-Для использования внешних словарей, смотрите раздел "Функции для работы с внешними словарями".
-
-Обратите внимание, что вы можете преобразовать значения по небольшому словарю, указав всё содержимое словаря прямо в запросе SELECT - смотрите раздел "Функция transform". Эта функциональность никак не связана с внешними словарями.
-
-Словари с составными ключами
-----------------------------
-
-В качестве ключа может выступать кортеж (tuple) из полей произвольных типов. Параметр layout в этом случае должен быть равен complex_key_hashed или complex_key_cache.
-
-Структура ключа задаётся не в элементе ``<id>``, а в элементе ``<key>``. Поля ключа задаются в том же формате, что и атрибуты словаря. Пример:
-
-.. code-block:: xml
-
-  <structure>
-      <key>
-          <attribute>
-              <name>field1</name>
-              <type>String</type>
-          </attribute>
-          <attribute>
-              <name>field2</name>
-              <type>UInt32</type>
-          </attribute>
-          ...
-      </key>
-  ...
-
-
-При использовании такого словаря, в функции dictGet* в качестве ключа передаётся Tuple со значениями полей. Пример: ``dictGetString('dict_name', 'attr_name', tuple('field1', 123))``.
+.. toctree::
+    :glob:
+   
+    external_dicts_dict*
diff --git a/docs/ru/dicts/external_dicts_dict.rst b/docs/ru/dicts/external_dicts_dict.rst
new file mode 100644
index 00000000000..b400261f0e5
--- /dev/null
+++ b/docs/ru/dicts/external_dicts_dict.rst
@@ -0,0 +1,35 @@
+.. _dicts-external_dicts_dict:
+
+**************************
+Настройка внешнего словаря
+**************************
+
+Конфигурация словаря имеет следующую структуру:
+
+.. code-block:: xml
+
+  <dictionary>
+      <name>dict_name</name>
+
+      <source>
+        <!-- Source configuration -->
+      </source>
+
+      <layout>
+        <!-- Memory layout configuration -->
+      </layout>
+
+      <structure>
+        <!-- Complex key configuration -->
+      </structure>
+
+      <lifetime>
+        <!-- Lifetime of dictionary in memory -->
+      </lifetime>
+  </dictionary>
+
+* name - Идентификатор, под которым словарь будет доступен для использования. Используйте символы ``[a-zA-Z0-9_\-]``.
+* :ref:`source <dicts-external_dicts_dict_sources>` - Источник словаря.
+* :ref:`layout <dicts-external_dicts_dict_layout>` - Размещение словаря в памяти.
+* :ref:`structure <dicts-external_dicts_dict_structure>` - Ключ словаря.
+* :ref:`lifetime <dicts-external_dicts_dict_lifetime>` - Периодичность обновления словарей.
diff --git a/docs/ru/dicts/external_dicts_dict_layout.rst b/docs/ru/dicts/external_dicts_dict_layout.rst
new file mode 100644
index 00000000000..4ee4cc6fe05
--- /dev/null
+++ b/docs/ru/dicts/external_dicts_dict_layout.rst
@@ -0,0 +1,250 @@
+.. _dicts-external_dicts_dict_layout:
+
+**************************
+Хранение словарей в памяти
+**************************
+
+Словари можно размещать в памяти :ref:`множеством способов <dicts-external_dicts_dict_layout-manner>`.
+
+Рекомендуем :ref:`dicts-external_dicts_dict_layout-flat`, :ref:`dicts-external_dicts_dict_layout-hashed` и :ref:`dicts-external_dicts_dict_layout-complex_key_hashed`. Скорость обработки словарей при этом максимальна.
+
+Размещение с кэшированием не рекомендуется использовать из-за потенциально низкой производительности и сложностей в подборе оптимальных параметров. Читайте об этом подробнее в разделе ":ref:`dicts-external_dicts_dict_layout-cache`".
+
+Повысить производительнось словарей можно следующими способами:
+
+* Вызывать функцию для работы со словарём после ``GROUP BY``.
+* Помечать извлекаемые атрибуты как инъективные. Атрибут называется инъективным, если разным ключам соответствуют разные значения атрибута. Тогда при использовании в ``GROUP BY`` функции, достающей значение атрибута по ключу, эта функция автоматически выносится из ``GROUP BY``.
+
+ClickHouse периодически обновляет словари. Сначала проверяется время модификации файла/таблицы, затем, если файл/таблица обновились, обновляется словарь. Если словарь хранится в таблице типа MyISAM, то время модификации проверяется запросом ``SHOW TABLE STATUS``. Для таблиц InnoDB нельзя получить время модификации, поэтому словарь обновляется каждый раз.
+
+Обновление словарей (кроме загрузки при первом использовании) не блокирует запросы - во время обновления используется старая версия словаря. Если при обновлении возникнет ошибка, то ошибка пишется в лог сервера, а запросы продолжат использовать старую версию словарей.
+
+При ошибках работы со словарями ClickHouse генерирует исключения. Например, в следующих ситуациях:
+
+* При обращении к словарю, который не удалось загрузить.
+* При ошибке запроса к ``cached``-словарю.
+
+
+Список внешних словарей и их статус можно посмотреть в таблице ``system.dictionaries``.
+
+Общий вид конфигурации:
+
+.. code-block:: xml
+
+  <dictionaries>
+      <dictionary>
+          ...
+          <layout>
+              <layout_type> 
+                  <!-- layout settings -->
+              </layout_type>
+          </layout>
+          ...
+      </dictionary>
+  </dictionaries>
+
+
+.. _dicts-external_dicts_dict_layout-manner:
+
+Способы размещения словарей в памяти
+====================================
+
+* :ref:`dicts-external_dicts_dict_layout-flat`
+* :ref:`dicts-external_dicts_dict_layout-hashed`
+* :ref:`dicts-external_dicts_dict_layout-cache`
+* :ref:`dicts-external_dicts_dict_layout-range_hashed`
+* :ref:`dicts-external_dicts_dict_layout-complex_key_hashed`
+* :ref:`dicts-external_dicts_dict_layout-complex_key_cache`
+
+
+.. _dicts-external_dicts_dict_layout-flat:
+
+flat
+----
+
+Словарь полностью хранится в оперативной памяти в виде плоских массивов. Объем памяти, занимаемой словарем? пропорционален размеру самого большого (по размеру) ключа.
+
+Ключ словаря имеет тип ``UInt64`` и его величина ограничена 500 000. Если при создании словаря обнаружен ключ больше, то ClickHouse бросает исключение и не создает словарь.
+
+Поддерживаются все виды источников. При обновлении, данные (из файла, из таблицы) читаются целиком.
+
+Это метод обеспечивает максимальную производительность среди всех доступных способов размещения словаря.
+
+Пример конфигурации:
+
+.. code-block:: xml
+
+  <layout>
+    <flat />
+  </layout>
+
+
+.. _dicts-external_dicts_dict_layout-hashed:
+
+hashed
+------
+
+Словарь полностью хранится в оперативной памяти в виде хэш-таблиц. Словарь может содержать произвольное количество элементов с произвольными идентификаторами. На практике, количество ключей может достигать десятков миллионов элементов.
+
+Поддерживаются все виды источников. При обновлении, данные (из файла, из таблицы) читаются целиком.
+
+Пример конфигурации:
+
+.. code-block:: xml
+
+  <layout>
+    <hashed />
+  </layout>
+
+
+.. _dicts-external_dicts_dict_layout-complex_key_hashed:
+
+complex_key_hashed
+------------------
+
+Тип размещения предназначен для использования с составными :ref:`ключами <dicts-external_dicts_dict_structure>`. Аналогичен hashed.
+
+Пример конфигурации:
+
+.. code-block:: xml
+
+  <layout>
+    <complex_key_hashed />
+  </layout>
+
+
+.. _dicts-external_dicts_dict_layout-range_hashed:
+
+range_hashed
+------------
+
+Словарь хранится в оперативной памяти в виде хэш-таблицы с упорядоченным массивом диапазонов и соответствующих им значений.
+
+Этот способ размещения работает также как и hashed и позволяет дополнительно к ключу использовать дипазоны по дате/времени, если они указаны в словаре.
+
+Пример: таблица содержит скидки для каждого рекламодателя в виде:
+
+  +------------------+-----------------------------+------------+----------+
+  | id рекламодателя | дата начала действия скидки | дата конца | величина |
+  +==================+=============================+============+==========+
+  | 123              | 2015-01-01                  | 2015-01-15 | 0.15     |
+  +------------------+-----------------------------+------------+----------+
+  | 123              | 2015-01-16                  | 2015-01-31 | 0.25     |
+  +------------------+-----------------------------+------------+----------+
+  | 456              | 2015-01-01                  | 2015-01-15 | 0.05     |
+  +------------------+-----------------------------+------------+----------+
+
+Столбцы с датами в словаре должны иметь тип ``Date``.
+
+Чтобы использовать выборку по диапазонам дат, необходимо в :ref:`structure <dicts-external_dicts_dict_structure>` определить элементы ``range_min``, ``range_max``.
+
+Пример:
+
+.. code-block:: xml
+
+  <structure>
+      <id>
+          <name>Id</name>
+      </id>
+      <range_min>
+          <name>first</name>
+      </range_min>
+      <range_max>
+          <name>last</name>
+      </range_max>
+      ...
+      
+
+
+Для работы с такими словарями в функцию ``dictGetT`` необходимо передавать дополнительный аргумент - дату: ::
+
+  dictGetT('dict_name', 'attr_name', id, date)
+
+Функция возвращает значение для заданных ``id`` и диапазона дат, в который входит переданная дата.
+
+Особенности алгоритма:
+
+* Если не найден ``id`` или для найденного ``id`` не найден диапазон, то возвращается значение по умолчанию для словаря.
+* Если есть перекрывающиеся диапазоны, то можно использовать любой подходящий.
+* Если граница диапазона ``NULL`` или некорректная дата (1900-01-01, 2039-01-01), то диапазон считается открытым. Диапазон может быть открытым с обеих сторон.
+
+
+Пример конфигурации:
+
+.. code-block:: xml
+
+  <dictionaries>
+          <dictionary>
+                  
+                  ...
+                  
+                  <layout>
+                          <range_hashed />
+                  </layout>
+                  
+                  <structure>
+                          <id>
+                                  <name>Abcdef</name>
+                          </id>
+                          <range_min>
+                                  <name>StartDate</name>
+                          </range_min>
+                          <range_max>
+                                  <name>EndDate</name>
+                          </range_max>
+                          <attribute>
+                                  <name>XXXType</name>
+                                  <type>String</type>
+                                  <null_value />
+                          </attribute>
+                  </structure>
+
+          </dictionary>
+  </dictionaries>
+
+
+.. _dicts-external_dicts_dict_layout-cache:
+
+cache
+-----
+
+Словарь хранится в кэше, состоящем из фиксированного количества ячеек. Ячейки содержат часто используемые элементы.
+
+При поиске в словаре сначала просматривается кэш. На каждый блок данных, все не найденные в кэше или устаревшие ключи запрашиваются у источника с помощью ``SELECT attrs... FROM db.table WHERE id IN (k1, k2, ...)``. Затем, полученные данные записываются в кэш.
+
+Для cache-словарей может быть задано время устаревания (:ref:`lifetime <dicts-external_dicts_dict_lifetime>`) данных в кэше. Если от загрузки данных в ячейке прошло больше времени, чем ``lifetime``, то значение не используется, и будет запрошено заново при следующей необходимости его использовать.
+
+Это наименее эффективный из всех способов размещения словарей. Скорость работы кэша очень сильно зависит от правильности настройки и сценария использования. Словарь типа cache показывает высокую производительность лишь при достаточно больших hit rate-ах (рекомендуется 99% и выше). Посмотреть средний hit rate можно в таблице ``system.dictionaries``.
+
+Чтобы увеличить производительность кэша, используйте подзапрос с ``LIMIT``, а снаружи вызывайте функцию со словарём.
+
+Поддерживаются :ref:`источники <dicts-external_dicts_dict_sources>`: MySQL, ClickHouse, executable, HTTP.
+
+Пример настройки:
+
+.. code-block:: xml
+             
+    <layout>
+        <cache>
+            <!-- Размер кэша в количестве ячеек. Округляется вверх до степени двух. -->
+            <size_in_cells>1000000000</size_in_cells>
+        </cache>
+    </layout>
+
+Укажите достаточно большой размер кэша. Количество ячеек следует подобрать экспериментальным путём:
+
+1. Выставить некоторое значение.
+2. Запросами добиться полной заполненности кэша.
+3. Оценить потребление оперативной памяти с помощью таблицы ``system.dictionaries``.
+4. Увеличивать/уменьшать количество ячеек до получения требуемого расхода оперативной памяти.
+
+.. warning:: Не используйте в качестве источника ClickHouse, поскольку он медленно обрабатывает запросы со случайным чтением.
+
+
+.. _dicts-external_dicts_dict_layout-complex_key_cache:
+
+complex_key_cache
+-----------------
+
+Тип размещения предназначен для использования с составными :ref:`ключами <dicts-external_dicts_dict_structure>`. Аналогичен ``cache``.
+
diff --git a/docs/ru/dicts/external_dicts_dict_lifetime.rst b/docs/ru/dicts/external_dicts_dict_lifetime.rst
new file mode 100644
index 00000000000..d3de506b800
--- /dev/null
+++ b/docs/ru/dicts/external_dicts_dict_lifetime.rst
@@ -0,0 +1,38 @@
+.. _dicts-external_dicts_dict_lifetime:
+
+*******************
+Обновление словарей
+*******************
+
+ClickHouse периодически обновляет словари. Интервал обновления для полностью загружаемых словарей и интервал инвалидации для кэшируемых словарей определяется в теге ``<lifetime>`` в секундах.
+
+Обновление словарей (кроме загрузки при первом использовании) не блокирует запросы, во время обновления используется старая версия словаря. Если при обновлении возникнет ошибка, то ошибка пишется в лог сервера, а запросы продолжат использовать старую версию словарей.
+
+Пример настройки:
+
+.. code-block:: xml
+
+  <dictionary>
+      ...
+      <lifetime>300</lifetime>
+      ...
+  </dictionary>
+
+
+Настройка ``<lifetime>0</lifetime>`` запрещает обновление словарей.
+
+
+Можно задать интервал, внутри которого ClickHouse равномерно-случайно выберет время для обновления. Это необходимо для распределения нагрузки на источник словаря при обновлении на большом количестве серверов.
+
+Пример настройки:
+
+.. code-block:: xml
+
+  <dictionary>
+      ...
+      <lifetime>
+          <min>300</min>
+          <max>360</max>
+      </lifetime>
+      ...
+  </dictionary>
diff --git a/docs/ru/dicts/external_dicts_dict_sources.rst b/docs/ru/dicts/external_dicts_dict_sources.rst
new file mode 100644
index 00000000000..035287b1a52
--- /dev/null
+++ b/docs/ru/dicts/external_dicts_dict_sources.rst
@@ -0,0 +1,388 @@
+.. _dicts-external_dicts_dict_sources:
+
+**************************
+Источники внешних словарей
+**************************
+
+Внешний словарь можно подключить из множества источников.
+
+Общий вид конфигурации:
+
+.. code-block:: xml
+
+  <dictionaries>
+    <dictionary>
+      ...
+      <source>
+        <source_type>
+          <!-- Source configuration -->
+        </source_type>
+      </source>
+      ...
+    </dictionary>
+    ...
+  </dictionaries>
+
+Источник настраивается в разделе ``source``. 
+
+Типы источников (``source_type``):
+
+ * :ref:`dicts-external_dicts_dict_sources-local_file`
+ * :ref:`dicts-external_dicts_dict_sources-executable`
+ * :ref:`dicts-external_dicts_dict_sources-http`
+ * :ref:`dicts-external_dicts_dict_sources-odbc`
+ * СУБД:
+
+   * :ref:`dicts-external_dicts_dict_sources-mysql`
+   * :ref:`dicts-external_dicts_dict_sources-clickhouse`
+   * :ref:`dicts-external_dicts_dict_sources-mongodb`
+
+
+.. _dicts-external_dicts_dict_sources-local_file:
+
+Локальный файл
+==============
+
+Пример настройки:
+
+.. code-block:: xml
+
+  <source>
+    <file>
+      <path>/opt/dictionaries/os.tsv</path>
+      <format>TabSeparated</format>
+    </file>
+  </source>
+
+Поля настройки:
+
+* ``path`` - Абсолютный путь к файлу.
+* ``format`` - Формат файла. Поддерживаются все форматы, описанные в разделе ":ref:`formats`".
+
+
+.. _dicts-external_dicts_dict_sources-executable:
+
+Исполняемый файл
+================
+
+Работа с исполняемым файлом зависит от :ref:`размещения словаря в памяти <dicts-external_dicts_dict_layout>`. Если тип размещения словаря ``cache`` и ``complex_key_cache``, то  ClickHouse запрашивает необходимые ключи, отправляя запрос в ``STDIN`` исполняемого файла.
+
+Пример настройки:
+
+.. code-block:: xml
+
+  <source>
+      <executable>
+          <command>cat /opt/dictionaries/os.tsv</command>
+          <format>TabSeparated</format>
+      </executable>
+  </source>
+
+Поля настройки:
+
+* ``command`` - Абсолютный путь к исполняемому файлу или имя файла (если каталог программы прописан в ``PATH``).
+* ``format`` - Формат файла. Поддерживаются все форматы, описанные в разделе ":ref:`formats`".
+
+
+.. _dicts-external_dicts_dict_sources-http:
+
+HTTP(s)
+=======
+
+Работа с HTTP(s) сервером зависит от :ref:`размещения словаря в памяти <dicts-external_dicts_dict_layout>`. Если тип размещения словаря ``cache`` и ``complex_key_cache``, то  ClickHouse запрашивает необходимые ключи, отправляя запрос методом ``POST``.
+
+Пример настройки:
+
+.. code-block:: xml
+
+  <source>
+      <http>
+          <url>http://[::1]/os.tsv</url>
+          <format>TabSeparated</format>
+      </http>
+  </source>
+
+Чтобы ClickHouse смог обратиться к HTTPS-ресурсу, необходимо прописать :ref:`настройки openSSL <server_settings-openSSL>` в конфигурации сервера.
+
+Поля настройки:
+
+* ``url`` - URL источника.
+* ``format`` - Формат файла. Поддерживаются все форматы, описанные в разделе ":ref:`formats`".
+
+
+.. _dicts-external_dicts_dict_sources-odbc:
+
+ODBC
+====
+
+Этим способом можно подключить любую базу данных, имеющую ODBC драйвер.
+
+Пример настройки:
+
+.. code-block:: xml
+
+  <odbc>
+      <db>DatabaseName</db>
+      <table>TableName</table>
+      <connection_string>DSN=some_parameters</connection_string>
+  </odbc>
+
+Поля настройки:
+
+* ``db`` - Имя базы данных. Не указывать, если имя базы задано в параметрах ``<connection_string>``.
+* ``table`` - Имя таблицы.
+* ``connection_string`` - Строка соединения.
+  
+
+Пример подключения PostgreSQL
+-----------------------------
+
+ОС Ubuntu.
+
+Установка unixODBC и ODBC-драйвера для PostgreSQL: ::
+
+  sudo apt-get install -y unixodbc odbcinst odbc-postgresql
+
+
+Настройка ``/etc/odbc.ini`` (или ``~/.odbc.ini``): ::
+
+  [DEFAULT]
+  Driver = myconnection
+
+  [myconnection]
+  Description         = PostgreSQL connection to my_db
+  Driver              = PostgreSQL Unicode
+  Database            = my_db
+  Servername          = 127.0.0.1
+  UserName            = username
+  Password            = password
+  Port                = 5432
+  Protocol            = 9.3
+  ReadOnly            = No
+  RowVersioning       = No
+  ShowSystemTables    = No
+  ConnSettings        =
+
+
+Конфигурация словаря в ClickHouse:
+
+.. code-block:: xml
+
+  <dictionary>
+      <name>table_name</name>
+      <source>
+          <odbc>
+              <!-- в connection_string можно указывать следующие параметры: -->
+              <!-- DSN=myconnection;UID=username;PWD=password;HOST=127.0.0.1;PORT=5432;DATABASE=my_db -->
+              <connection_string>DSN=myconnection</connection_string>
+              <table>postgresql_table</table>
+          </odbc>
+      </source>
+      <lifetime>
+          <min>300</min>
+          <max>360</max>
+      </lifetime>
+      <layout>
+          <hashed/>
+      </layout>
+      <structure>
+          <id>
+              <name>id</name>
+          </id>
+          <attribute>
+              <name>some_column</name>
+              <type>UInt64</type>
+              <null_value>0</null_value>
+          </attribute>
+      </structure>
+  </dictionary>
+
+Может понадобиться в ``odbc.ini`` указать полный путь до библиотеки с драйвером ``DRIVER=/usr/local/lib/psqlodbcw.so``.
+
+Пример подключения MS SQL Server
+--------------------------------
+
+ОС Ubuntu.
+
+Установка драйвера: ::
+  
+  sudo apt-get install tdsodbc freetds-bin sqsh
+
+Настройка драйвера: ::
+
+  $ cat /etc/freetds/freetds.conf 
+  ...
+
+  [MSSQL]
+  host = 192.168.56.101
+  port = 1433
+  tds version = 7.0
+  client charset = UTF-8
+
+  $ cat /etc/odbcinst.ini 
+  ...
+
+  [FreeTDS]
+  Description     = FreeTDS
+  Driver          = /usr/lib/x86_64-linux-gnu/odbc/libtdsodbc.so
+  Setup           = /usr/lib/x86_64-linux-gnu/odbc/libtdsS.so
+  FileUsage       = 1
+  UsageCount      = 5
+
+  $ cat ~/.odbc.ini 
+  ...
+
+  [MSSQL]
+  Description     = FreeTDS
+  Driver          = FreeTDS
+  Servername      = MSSQL
+  Database        = test
+  UID             = test
+  PWD             = test
+  Port            = 1433
+
+
+Настройка словаря в ClickHouse:
+
+.. code-block:: xml
+
+  <dictionaries>
+      <dictionary>
+          <name>test</name>
+          <source>
+              <odbc>
+                  <table>dict</table>
+                  <connection_string>DSN=MSSQL;UID=test;PWD=test</connection_string>
+              </odbc>
+          </source>
+
+          <lifetime>
+              <min>300</min>
+              <max>360</max>
+          </lifetime>
+
+          <layout>
+              <flat />
+          </layout>
+
+          <structure>
+              <id>
+                  <name>k</name>
+              </id>
+              <attribute>
+                  <name>s</name>
+                  <type>String</type>
+                  <null_value></null_value>
+              </attribute>
+          </structure>
+      </dictionary>
+  </dictionaries>
+
+
+
+СУБД
+====
+
+.. _dicts-external_dicts_dict_sources-mysql:
+
+MySQL
+-----
+
+Пример настройки:
+
+.. code-block:: xml
+
+  <source>
+    <mysql>
+        <port>3306</port>
+        <user>clickhouse</user>
+        <password>qwerty</password>
+        <replica>
+            <host>example01-1</host>
+            <priority>1</priority>
+        </replica>
+        <replica>
+            <host>example01-2</host>
+            <priority>1</priority>
+        </replica>
+        <db>conv_main</db>
+        <table>counters</table>
+        <where>id=10</where>
+    </mysql>
+  </source>
+
+
+Поля настройки:
+
+* ``port`` - порт сервера MySQL. Можно указать для всех реплик или для каждой в отдельности (внутри ``<replica>``).
+* ``user`` - имя пользователя MySQL. Можно указать для всех реплик или для каждой в отдельности (внутри ``<replica>``).
+* ``password`` - пароль пользователя MySQL. Можно указать для всех реплик или для каждой в отдельности (внутри ``<replica>``).
+* ``replica`` - блок конфигурации реплики. Блоков может быть несколько.
+  
+  * ``replica/host`` - хост MySQL.
+  * ``replica/priority`` - приоритет реплики. При попытке соединения ClickHouse обходит реплики в соответствии с приоритетом. Чем меньше цифра, тем выше приоритет.
+* ``db`` - имя базы данных.
+* ``table`` - имя таблицы.
+* ``where`` - условие выбора. Может отсутствовать.
+
+.. _dicts-external_dicts_dict_sources-clickhouse:
+
+ClickHouse
+----------
+
+Пример настройки:
+
+.. code-block:: xml
+  
+  <source>
+      <clickhouse>
+          <host>example01-01-1</host>
+          <port>9000</port>
+          <user>default</user>
+          <password></password>
+          <db>default</db>
+          <table>ids</table>
+          <where>id=10</where>
+      </clickhouse>
+  </source>
+
+Поля настройки:
+
+* ``host`` - хост ClickHouse. Если host локальный, то запрос выполняется без сетевого взаимодействия. Чтобы повысить отказоустойчивость решения, можно создать таблицу типа :ref:`Distributed <table_engines-distributed>` и прописать её в дальнейших настройках.
+* ``port`` - порт сервера ClickHouse.
+* ``user`` - имя пользователя ClickHouse.
+* ``password`` - пароль пользователя ClickHouse.
+* ``db`` - имя базы данных.
+* ``table`` - имя таблицы.
+* ``where`` - условие выбора. Может отсутствовать.
+
+
+.. _dicts-external_dicts_dict_sources-mongodb:
+
+MongoDB
+-------
+
+Пример настройки:
+
+.. code-block:: xml
+
+  <source>
+      <mongodb>
+          <host>localhost</host>
+          <port>27017</port>
+          <user></user>
+          <password></password>
+          <db>test</db>
+          <collection>dictionary_source</collection>
+      </mongodb>
+  </source>
+
+
+Поля настройки:
+
+* ``host`` - хост MongoDB.
+* ``port`` - порт сервера MongoDB.
+* ``user`` - имя пользователя MongoDB.
+* ``password`` - пароль пользователя MongoDB.
+* ``db`` - имя базы данных.
+* ``collection`` - имя коллекции.
diff --git a/docs/ru/dicts/external_dicts_dict_structure.rst b/docs/ru/dicts/external_dicts_dict_structure.rst
new file mode 100644
index 00000000000..ec0ae0be370
--- /dev/null
+++ b/docs/ru/dicts/external_dicts_dict_structure.rst
@@ -0,0 +1,125 @@
+.. _dicts-external_dicts_dict_structure:
+
+*******************
+Ключ и поля словаря
+*******************
+
+Секция ``<structure>`` описывает ключ словаря и поля, доступные для запросов.
+
+
+Общий вид структуры:
+
+.. code-block:: xml
+
+      <dictionary>
+          <structure>
+              <id>
+                  <name>Id</name>
+              </id>
+  
+              <attribute>
+                  <!-- Attribute parameters -->
+              </attribute>
+              
+              ...
+
+          </structure>
+      </dictionary>
+
+В структуре описываются столбцы:
+
+* ``<id>`` - :ref:`ключевой столбец <dicts-external_dicts_dict_structure-key>`.
+* ``<attribute>`` - :ref:`столбец данных <dicts-external_dicts_dict_structure-attributes>`. Столбцов может быть много.
+
+.. _dicts-external_dicts_dict_structure-key:
+
+Ключ
+====
+
+ClickHouse поддерживает следующие виды ключей:
+
+* Числовой ключ. Формат UInt64. Описывается в теге ``<id>``.
+* Составной ключ. Набор значений разного типа. Описывается в теге ``<key>``.
+  
+Структура может содержать либо ``<id>`` либо ``<key>``.
+
+
+.. attention:: Ключ не надо дополнительно описывать в атрибутах.
+
+Числовой ключ
+--------------
+
+Формат: ``UInt64``.
+
+Пример конфигурации:
+
+.. code-block:: xml
+
+    <id>
+        <name>Id</name>
+    </id>
+
+
+Поля конфигурации:
+
+* name - имя столбца с ключами.
+  
+
+Составной ключ
+---------------
+
+Ключем может быть кортеж (``tuple``) из полей произвольных типов. :ref:`layout <dicts-external_dicts_dict_layout>` в этом случае должен быть ``complex_key_hashed`` или ``complex_key_cache``.
+
+.. tip:: Cоставной ключ может состоять и из одного элемента, что даёт возможность использовать в качестве ключа, например, строку.
+
+Структура ключа задаётся в элементе ``<key>``. Поля ключа задаются в том же формате, что и :ref:`атрибуты <dicts-external_dicts_dict_structure-attributes>` словаря. Пример:
+
+.. code-block:: xml
+
+  <structure>
+      <key>
+          <attribute>
+              <name>field1</name>
+              <type>String</type>
+          </attribute>
+          <attribute>
+              <name>field2</name>
+              <type>UInt32</type>
+          </attribute>
+          ...
+      </key>
+  ...
+
+
+При запросе в функции ``dictGet*`` в качестве ключа передаётся кортеж. Пример: ``dictGetString('dict_name', 'attr_name', tuple('string for field1', num_for_field2))``.
+
+
+.. _dicts-external_dicts_dict_structure-attributes:
+
+Атрибуты
+========
+
+Пример конфигурации:
+
+.. code-block:: xml
+
+    <structure>
+        ...
+        <attribute>
+            <name>Name</name>
+            <type>Type</type>
+            <null_value></null_value>
+            <expression>rand64()</expression>
+            <hierarchical>true</hierarchical>
+            <injective>true</injective>
+        </attribute>
+    </structure>
+
+Поля конфигурации:
+
+* ``name`` - Имя столбца.
+* ``type`` - Тип столбца. Задает способ интерпретации данных в источнике. Например, в случае MySQL, в таблице-источнике поле может быть ``TEXT``, ``VARCHAR``, ``BLOB``, но загружено может быть как ``String``.
+* ``null_value`` - Значение по умолчанию для несуществующего элемента. В примере - пустая строка.
+* ``expression`` - Атрибут может быть выражением. Тег не обязательный.
+* ``hierarchical`` - Поддержка иерархии. Отображение в идентификатор родителя. По умолчанию, ``false``.
+* ``injective`` - Признак инъективности отображения ``id -> attribute``. Если ``true``, то можно оптимизировать ``GROUP BY``. По умолчанию, ``false``.
diff --git a/docs/ru/dicts/index.rst b/docs/ru/dicts/index.rst
index a8ad75e9cd2..8cd65b643e7 100644
--- a/docs/ru/dicts/index.rst
+++ b/docs/ru/dicts/index.rst
@@ -7,6 +7,6 @@
 Существуют встроенные и подключаемые (внешние) словари.
 
 .. toctree::
-    :glob:
 
-    *
+   external_dicts
+   internal_dicts
diff --git a/docs/ru/formats/index.rst b/docs/ru/formats/index.rst
index 6db2890830f..b7510f79d3b 100644
--- a/docs/ru/formats/index.rst
+++ b/docs/ru/formats/index.rst
@@ -1,3 +1,5 @@
+.. _formats:
+
 Форматы
 =======
 
diff --git a/docs/ru/functions/ext_dict_functions.rst b/docs/ru/functions/ext_dict_functions.rst
index 0e27687efe3..963c670c5af 100644
--- a/docs/ru/functions/ext_dict_functions.rst
+++ b/docs/ru/functions/ext_dict_functions.rst
@@ -1,6 +1,8 @@
+.. _ext_dict_functions:
+
 Функции для работы с внешними словарями
 ---------------------------------------
-Подробнее смотрите в разделе "Внешние словари".
+Информация о подключении и настройке внешних словарей смотрите в разделе :ref:`dicts-external_dicts`.
 
 dictGetUInt8, dictGetUInt16, dictGetUInt32, dictGetUInt64
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/docs/ru/functions/other_functions.rst b/docs/ru/functions/other_functions.rst
index 32a9c0a3ed3..b9b284e4293 100644
--- a/docs/ru/functions/other_functions.rst
+++ b/docs/ru/functions/other_functions.rst
@@ -108,6 +108,8 @@ bar
   │ 23 │ 400397 │ █████████████▎     │
   └────┴────────┴────────────────────┘
 
+.. _other_functions-transform:
+
 transform
 ~~~~~~~~~
 Преобразовать значение согласно явно указанному отображению одних элементов на другие.
diff --git a/docs/ru/operations/server_settings/settings.rst b/docs/ru/operations/server_settings/settings.rst
index c99054a18fa..9d7370e895a 100644
--- a/docs/ru/operations/server_settings/settings.rst
+++ b/docs/ru/operations/server_settings/settings.rst
@@ -98,9 +98,13 @@ default_profile
 
 dictionaries_config
 -------------------
-Конфигурация внешних словарей.
+Путь к конфигурации внешних словарей.
 
-Смотрите раздел :ref:`dicts-external_dicts`.
+Путь:
+  * Указывается абсолютным или относительно конфигурационного файла сервера.
+  * Может содержать wildcard-ы \* и ?.
+
+Про внешние словари читайте в разделе :ref:`dicts-external_dicts`.
 
 **Пример**
 
@@ -116,7 +120,11 @@ dictionaries_lazy_load
 
 Отложенная загрузка словарей.
 
-С установленным параметром словари подгружаются не при запуске сервера, а при первом обращении.
+Если ``true``, то каждый словарь создаётся при первом использовании. Если словарь не удалось создать, то вызов функции, использующей словарь, сгенерирует исключение. 
+
+Если ``false``, то все словари создаются при старте сервера, и в случае ошибки сервер завершает работу.
+
+По умолчанию - ``true``.
 
 **Пример**
 

From a1272601e5ae5bb7eb558b7e097349876a8bdf7c Mon Sep 17 00:00:00 2001
From: BayoNet <bayonet@virtUbuntu16.04>
Date: Mon, 31 Jul 2017 12:17:48 +0300
Subject: [PATCH 195/281] Default `make html` language changed to russian.

---
 docs/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/Makefile b/docs/Makefile
index 285b71a53b6..0d273821a0b 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -15,7 +15,7 @@ endif
 # Internal variables.
 PAPEROPT_a4     = -D latex_paper_size=a4
 PAPEROPT_letter = -D latex_paper_size=letter
-ALLSPHINXOPTS   = $(SPHINXOPTS) en
+ALLSPHINXOPTS   = $(SPHINXOPTS) ru
 # the i18n builder cannot share the environment and doctrees with the others
 I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) ru
 

From 5304357d7189f3f1478d557999d9e12eeeb70962 Mon Sep 17 00:00:00 2001
From: BayoNet <bayonet@virtUbuntu16.04>
Date: Wed, 16 Aug 2017 11:14:13 +0300
Subject: [PATCH 196/281] Some errors in desctiptions are fixed

---
 docs/ru/dicts/external_dicts_dict.rst         | 2 +-
 docs/ru/dicts/external_dicts_dict_sources.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/ru/dicts/external_dicts_dict.rst b/docs/ru/dicts/external_dicts_dict.rst
index b400261f0e5..eab4ef30381 100644
--- a/docs/ru/dicts/external_dicts_dict.rst
+++ b/docs/ru/dicts/external_dicts_dict.rst
@@ -31,5 +31,5 @@
 * name - Идентификатор, под которым словарь будет доступен для использования. Используйте символы ``[a-zA-Z0-9_\-]``.
 * :ref:`source <dicts-external_dicts_dict_sources>` - Источник словаря.
 * :ref:`layout <dicts-external_dicts_dict_layout>` - Размещение словаря в памяти.
-* :ref:`structure <dicts-external_dicts_dict_structure>` - Ключ словаря.
+* :ref:`structure <dicts-external_dicts_dict_structure>` - Структура словаря. Ключ и атрибуты, которые можно получить по ключу.
 * :ref:`lifetime <dicts-external_dicts_dict_lifetime>` - Периодичность обновления словарей.
diff --git a/docs/ru/dicts/external_dicts_dict_sources.rst b/docs/ru/dicts/external_dicts_dict_sources.rst
index 035287b1a52..d65e59124be 100644
--- a/docs/ru/dicts/external_dicts_dict_sources.rst
+++ b/docs/ru/dicts/external_dicts_dict_sources.rst
@@ -102,7 +102,7 @@ HTTP(s)
       </http>
   </source>
 
-Чтобы ClickHouse смог обратиться к HTTPS-ресурсу, необходимо прописать :ref:`настройки openSSL <server_settings-openSSL>` в конфигурации сервера.
+Чтобы ClickHouse смог обратиться к HTTPS-ресурсу, необходимо :ref:`настроить openSSL <server_settings-openSSL>` в конфигурации сервера.
 
 Поля настройки:
 

From 727ca8199860fa65e265ff4867a0fb8d5c989036 Mon Sep 17 00:00:00 2001
From: Alberto <alberto.delbarrio.albelda@gmail.com>
Date: Wed, 16 Aug 2017 12:24:06 +0200
Subject: [PATCH 197/281] Fixed typo

---
 dbms/src/Interpreters/Context.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp
index 805e1969ad9..c011734bdb1 100644
--- a/dbms/src/Interpreters/Context.cpp
+++ b/dbms/src/Interpreters/Context.cpp
@@ -1404,7 +1404,7 @@ void Context::checkTableCanBeDropped(const String & database, const String & tab
     ostr << "Table " << backQuoteIfNeed(database) << "." << backQuoteIfNeed(table) << " was not dropped.\n"
          << "Reason:\n"
          << "1. Table size (" << table_size_str << ") is greater than max_table_size_to_drop (" << max_table_size_to_drop_str << ")\n"
-         << "2. File '" << force_file.path() << "' intedned to force DROP "
+         << "2. File '" << force_file.path() << "' intended to force DROP "
             << (force_file_exists ? "exists but not writeable (could not be removed)" : "doesn't exist") << "\n";
 
     ostr << "How to fix this:\n"

From e2a0294e740e8937e43a0662dfacd15cdabe65b4 Mon Sep 17 00:00:00 2001
From: Konstantin Lebedev <kostyan.lebedev@gmail.com>
Date: Wed, 16 Aug 2017 09:36:02 +0300
Subject: [PATCH 198/281] Added link to native python driver to en docs

---
 docs/en/interfaces/third-party_client_libraries.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/en/interfaces/third-party_client_libraries.rst b/docs/en/interfaces/third-party_client_libraries.rst
index e0445008193..6d32eab73da 100644
--- a/docs/en/interfaces/third-party_client_libraries.rst
+++ b/docs/en/interfaces/third-party_client_libraries.rst
@@ -6,6 +6,7 @@ There exist third-party client libraries for ClickHouse:
 * Python:
     - `infi.clickhouse_orm <https://github.com/Infinidat/infi.clickhouse_orm>`_
     - `sqlalchemy-clickhouse <https://github.com/cloudflare/sqlalchemy-clickhouse>`_
+    - `clickhouse-driver <https://github.com/mymarilyn/clickhouse-driver>`_
 * PHP
     - `clickhouse-php-client <https://github.com/8bitov/clickhouse-php-client>`_
     - `PhpClickHouseClient <https://github.com/SevaCode/PhpClickHouseClient>`_

From bbc42a391adb185e151d78fd03cc29bdb258d5df Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Wed, 16 Aug 2017 16:54:14 +0300
Subject: [PATCH 199/281] Update queries.rst

---
 docs/en/query_language/queries.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/query_language/queries.rst b/docs/en/query_language/queries.rst
index 17896c5f6fa..25894a0c81c 100644
--- a/docs/en/query_language/queries.rst
+++ b/docs/en/query_language/queries.rst
@@ -194,7 +194,7 @@ Renames one or more tables.
 
     RENAME TABLE [db11.]name11 TO [db12.]name12, [db21.]name21 TO [db22.]name22, ... [ON CLUSTER cluster]
 
- All tables are renamed under global locking. Renaming tables is a light operation. If you indicated another database after TO, the table will be moved to this database. However, the directories with databases must reside in the same file system (otherwise, an error is returned).
+All tables are renamed under global locking. Renaming tables is a light operation. If you indicated another database after TO, the table will be moved to this database. However, the directories with databases must reside in the same file system (otherwise, an error is returned).
 
 ALTER
 ~~~~~

From c629799500242eeaeef0a8641f2507aa7292f6ea Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 16 Aug 2017 17:21:24 +0300
Subject: [PATCH 200/281] Fixed insufficient arguments checking
 [#CLICKHOUSE-3102].

---
 dbms/src/Functions/FunctionsStringArray.h | 4 ++++
 dbms/src/Interpreters/loadMetadata.cpp    | 1 -
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Functions/FunctionsStringArray.h b/dbms/src/Functions/FunctionsStringArray.h
index be44cd1d47f..bd9222699cf 100644
--- a/dbms/src/Functions/FunctionsStringArray.h
+++ b/dbms/src/Functions/FunctionsStringArray.h
@@ -20,6 +20,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int BAD_ARGUMENTS;
 }
 
 
@@ -210,6 +211,9 @@ public:
                 ErrorCodes::ILLEGAL_COLUMN);
 
         sep = col->getValue<String>();
+
+        if (sep.empty())
+            throw Exception("Illegal separator for function " + getName() + ". Must be not empty.", ErrorCodes::BAD_ARGUMENTS);
     }
 
     /// Returns the position of the argument that is the column of strings
diff --git a/dbms/src/Interpreters/loadMetadata.cpp b/dbms/src/Interpreters/loadMetadata.cpp
index dac4ef0c911..80b670f4c3f 100644
--- a/dbms/src/Interpreters/loadMetadata.cpp
+++ b/dbms/src/Interpreters/loadMetadata.cpp
@@ -137,7 +137,6 @@ void loadMetadataSystem(Context & context)
 
         auto system_database = std::make_shared<DatabaseOrdinary>(SYSTEM_DATABASE, global_path + "metadata/" SYSTEM_DATABASE);
         context.addDatabase(SYSTEM_DATABASE, system_database);
-
     }
 
 }

From a11759c75bd7d9964e94bc87cba200cc1ff17483 Mon Sep 17 00:00:00 2001
From: blazerer <roman.peshkurov@gmail.com>
Date: Wed, 16 Aug 2017 17:51:54 +0300
Subject: [PATCH 201/281] mysqlxx transaction exception safety

Fix terminate on exception and rollback query fail.
---
 libs/libmysqlxx/include/mysqlxx/Transaction.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/libs/libmysqlxx/include/mysqlxx/Transaction.h b/libs/libmysqlxx/include/mysqlxx/Transaction.h
index 542cd8e12e7..2b3a26ef63a 100644
--- a/libs/libmysqlxx/include/mysqlxx/Transaction.h
+++ b/libs/libmysqlxx/include/mysqlxx/Transaction.h
@@ -22,8 +22,14 @@ public:
 
     virtual ~Transaction()
     {
-        if (!finished)
-            rollback();
+        try
+        {
+            if (!finished)
+                rollback();
+        }
+        catch (...)
+        {
+        }
     }
 
     void commit()

From b6cafb9a885491ae5c1797fd9d2fcb5beb4f333f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 16 Aug 2017 20:21:39 +0300
Subject: [PATCH 202/281] Set "max_replica_delay_for_distributed_queries" to
 five minutes by default [#CLICKHOUSE-2141].

---
 dbms/src/Interpreters/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h
index 81b96ef53e4..a0eb9639060 100644
--- a/dbms/src/Interpreters/Settings.h
+++ b/dbms/src/Interpreters/Settings.h
@@ -274,7 +274,7 @@ struct Settings
      * Zero means do not take delay into account. \
      */ \
     \
-    M(SettingUInt64, max_replica_delay_for_distributed_queries, 0) \
+    M(SettingUInt64, max_replica_delay_for_distributed_queries, 300) \
    /** Suppose max_replica_delay_for_distributed_queries is set and all replicas for the queried table are stale. \
      * If this setting is enabled, the query will be performed anyway, otherwise the error will be reported. \
      */ \

From 0490a268f755ce4409d8e02b76141a93fea62896 Mon Sep 17 00:00:00 2001
From: proller <proller@github.com>
Date: Wed, 16 Aug 2017 21:06:40 +0300
Subject: [PATCH 203/281] Cmake: find_unwind: Fix disable on arm

---
 libs/libdaemon/cmake/find_unwind.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libs/libdaemon/cmake/find_unwind.cmake b/libs/libdaemon/cmake/find_unwind.cmake
index 0c55715b60c..80b2c746c7c 100644
--- a/libs/libdaemon/cmake/find_unwind.cmake
+++ b/libs/libdaemon/cmake/find_unwind.cmake
@@ -1,7 +1,7 @@
 include (CMakePushCheckState)
 cmake_push_check_state ()
 
-if (CMAKE_SYSTEM MATCHES "Linux" AND NOT ARM)
+if (CMAKE_SYSTEM MATCHES "Linux" AND NOT ARCH_ARM)
     option (USE_INTERNAL_UNWIND_LIBRARY "Set to FALSE to use system unwind library instead of bundled" ${NOT_UNBUNDLED})
 else ()
     option (USE_INTERNAL_UNWIND_LIBRARY "Set to FALSE to use system unwind library instead of bundled" OFF)
@@ -34,7 +34,7 @@ endif ()
 
 if (UNWIND_LIBRARY AND UNWIND_INCLUDE_DIR)
     set (USE_UNWIND 1)
-elseif (CMAKE_SYSTEM MATCHES "Linux" AND NOT ARM)
+elseif (CMAKE_SYSTEM MATCHES "Linux" AND NOT ARCH_ARM)
     set (USE_INTERNAL_UNWIND_LIBRARY 1)
     set (UNWIND_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libunwind/include")
     set (UNWIND_LIBRARY unwind)

From 164ad05ae1366440ee47f7f34b92b697c97a985e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 16 Aug 2017 22:22:49 +0300
Subject: [PATCH 204/281] Little better [#CLICKHOUSE-3231].

---
 .../Storages/MergeTree/DataPartsExchange.cpp  |  4 +--
 dbms/src/Storages/MergeTree/MergeTreeData.cpp | 13 +++-------
 .../Storages/MergeTree/MergeTreeDataPart.cpp  | 13 +++++++++-
 .../Storages/MergeTree/MergeTreeDataPart.h    | 25 +++++++++++--------
 .../MergeTree/ShardedPartitionUploader.cpp    |  4 +--
 5 files changed, 32 insertions(+), 27 deletions(-)

diff --git a/dbms/src/Storages/MergeTree/DataPartsExchange.cpp b/dbms/src/Storages/MergeTree/DataPartsExchange.cpp
index b0d98f44ae9..c7eb2abbaa7 100644
--- a/dbms/src/Storages/MergeTree/DataPartsExchange.cpp
+++ b/dbms/src/Storages/MergeTree/DataPartsExchange.cpp
@@ -268,9 +268,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPartImpl(
     new_data_part->info = MergeTreePartInfo::fromPartName(part_name);
     MergeTreePartInfo::parseMinMaxDatesFromPartName(part_name, new_data_part->min_date, new_data_part->max_date);
     new_data_part->modification_time = time(nullptr);
-    new_data_part->loadColumns(true);
-    new_data_part->loadChecksums(true);
-    new_data_part->loadIndex();
+    new_data_part->loadColumnsChecksumsIndex(true, false);
     new_data_part->is_sharded = false;
     new_data_part->checksums.checkEqual(checksums, false);
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
index f675eaee193..55163ffe408 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
@@ -302,7 +302,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
     Poco::DirectoryIterator end;
     for (Poco::DirectoryIterator it(full_path); it != end; ++it)
     {
-        /// Skip temporary directories older than one day.
+        /// Skip temporary directories.
         if (startsWith(it.name(), "tmp"))
             continue;
 
@@ -326,10 +326,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
 
         try
         {
-            part->loadColumns(require_part_metadata);
-            part->loadChecksums(require_part_metadata);
-            part->loadIndex();
-            part->checkNotBroken(require_part_metadata);
+            part->loadColumnsChecksumsIndex(require_part_metadata, true);
         }
         catch (const Exception & e)
         {
@@ -1544,11 +1541,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::loadPartAndFixMetadata(const St
     if (Poco::File(full_part_path + "columns.txt").exists())
         Poco::File(full_part_path + "columns.txt").remove();
 
-    part->loadColumns(false);
-    part->loadChecksums(false);
-    part->loadIndex();
-    part->checkNotBroken(false);
-
+    part->loadColumnsChecksumsIndex(false, true);
     part->modification_time = Poco::File(full_part_path).getLastModified().epochTime();
 
     /// If the checksums file is not present, calculate the checksums and write them to disk.
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
index 6d5de63053a..c628a70179d 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
@@ -529,6 +529,17 @@ void MergeTreeDataPart::renameAddPrefix(bool to_detached, const String & prefix)
     renameTo(dst_name());
 }
 
+
+void MergeTreeDataPart::loadColumnsChecksumsIndex(bool require_columns_checksums, bool check_consistency)
+{
+    loadColumns(require_columns_checksums);
+    loadChecksums(require_columns_checksums);
+    loadIndex();
+    if (check_consistency)
+        checkConsistency(require_columns_checksums);
+}
+
+
 void MergeTreeDataPart::loadIndex()
 {
     /// Size - in number of marks.
@@ -627,7 +638,7 @@ void MergeTreeDataPart::loadColumns(bool require)
     columns.readText(file);
 }
 
-void MergeTreeDataPart::checkNotBroken(bool require_part_metadata)
+void MergeTreeDataPart::checkConsistency(bool require_part_metadata)
 {
     String path = getFullPath();
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h
index d7c30b9ad22..3d6211f78a7 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h
@@ -180,19 +180,12 @@ struct MergeTreeDataPart
     /// Renames a part by appending a prefix to the name. To_detached - also moved to the detached directory.
     void renameAddPrefix(bool to_detached, const String & prefix) const;
 
-    /// Loads index file. Also calculates this->size if size=0
-    void loadIndex();
-
-    /// If checksums.txt exists, reads files' checksums (and sizes) from it
-    void loadChecksums(bool require);
-
     /// Populates columns_to_size map (compressed size).
     void accumulateColumnSizes(ColumnToSize & column_to_size) const;
 
-    /// Reads columns names and types from columns.txt
-    void loadColumns(bool require);
-
-    void checkNotBroken(bool require_part_metadata);
+    /// Initialize columns (from columns.txt if exists, or create from column files if not).
+    /// Load checksums from checksums.txt if exists. Load index if required.
+    void loadColumnsChecksumsIndex(bool require_columns_checksums, bool check_consistency);
 
     /// Checks that .bin and .mrk files exist
     bool hasColumnFiles(const String & column) const;
@@ -200,6 +193,18 @@ struct MergeTreeDataPart
     /// For data in RAM ('index')
     size_t getIndexSizeInBytes() const;
     size_t getIndexSizeInAllocatedBytes() const;
+
+private:
+    /// Reads columns names and types from columns.txt
+    void loadColumns(bool require);
+
+    /// If checksums.txt exists, reads files' checksums (and sizes) from it
+    void loadChecksums(bool require);
+
+    /// Loads index file. Also calculates this->size if size=0
+    void loadIndex();
+
+    void checkConsistency(bool require_part_metadata);
 };
 
 }
diff --git a/dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp b/dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp
index 55950a5a93d..b51180b51c4 100644
--- a/dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp
+++ b/dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp
@@ -107,9 +107,7 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & body
     data_part->info = MergeTreePartInfo::fromPartName(part_name);
     MergeTreePartInfo::parseMinMaxDatesFromPartName(part_name, data_part->min_date, data_part->max_date);
     data_part->modification_time = time(nullptr);
-    data_part->loadColumns(true);
-    data_part->loadChecksums(true);
-    data_part->loadIndex();
+    data_part->loadColumnsChecksumsIndex(true, false);
     data_part->is_sharded = false;
     data_part->checksums.checkEqual(checksums, false);
 

From 3a86673aea96db7e2c46f96bd291d7a7c2ece6ef Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 16 Aug 2017 22:41:52 +0300
Subject: [PATCH 205/281] Fixed error when part could become broken during
 ALTER MODIFY of element of Nested column [#CLICKHOUSE-3232].

---
 dbms/src/Storages/MergeTree/MergeTreeData.cpp | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
index 55163ffe408..78a7c3188f0 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
@@ -1030,7 +1030,16 @@ MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart(
             part->size_in_bytes,
             static_cast<double>(part->size_in_bytes) / this->getTotalActiveSizeInBytes());
         ExpressionBlockInputStream in(part_in, expression);
-        MergedColumnOnlyOutputStream out(*this, full_path + part->name + '/', true, compression_method, false);
+
+        /** Don't write offsets for arrays, because ALTER never change them
+         *  (MODIFY COLUMN could only change types of elements but never modify array sizes).
+          * Also note that they does not participate in 'rename_map'.
+          * Also note, that for columns, that are parts of Nested,
+          *  temporary column name ('converting_column_name') created in 'createConvertExpression' method
+          *  will have old name of shared offsets for arrays.
+          */
+        MergedColumnOnlyOutputStream out(*this, full_path + part->name + '/', true, compression_method, true /* skip_offsets */);
+
         in.readPrefix();
         out.writePrefix();
 

From eb88391655178d5b7a23658d70225456b66b0b81 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 16 Aug 2017 23:10:25 +0300
Subject: [PATCH 206/281] Fixed error with alias substitution
 [#CLICKHOUSE-2986].

---
 dbms/src/Interpreters/ExpressionAnalyzer.cpp  | 19 ++++++++++++-------
 .../00494_alias_substitution_bug.reference    |  2 ++
 .../00494_alias_substitution_bug.sql          |  5 +++++
 3 files changed, 19 insertions(+), 7 deletions(-)
 create mode 100644 dbms/tests/queries/0_stateless/00494_alias_substitution_bug.reference
 create mode 100644 dbms/tests/queries/0_stateless/00494_alias_substitution_bug.sql

diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp
index bce55a70a89..9532f6678ac 100644
--- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp
@@ -1009,8 +1009,10 @@ void ExpressionAnalyzer::normalizeTreeImpl(
     /// rewrite rules that act when you go from top to bottom.
     bool replaced = false;
 
-    ASTFunction * func_node = typeid_cast<ASTFunction *>(ast.get());
-    if (func_node)
+    ASTIdentifier * identifier_node = nullptr;
+    ASTFunction * func_node = nullptr;
+
+    if ((func_node = typeid_cast<ASTFunction *>(ast.get())))
     {
         /** Is there a column in the table whose name fully matches the function entry?
           * For example, in the table there is a column "domain(URL)", and we requested domain(URL).
@@ -1048,13 +1050,13 @@ void ExpressionAnalyzer::normalizeTreeImpl(
             }
         }
     }
-    else if (ASTIdentifier * node = typeid_cast<ASTIdentifier *>(ast.get()))
+    else if ((identifier_node = typeid_cast<ASTIdentifier *>(ast.get())))
     {
-        if (node->kind == ASTIdentifier::Column)
+        if (identifier_node->kind == ASTIdentifier::Column)
         {
             /// If it is an alias, but not a parent alias (for constructs like "SELECT column + 1 AS column").
-            Aliases::const_iterator jt = aliases.find(node->name);
-            if (jt != aliases.end() && current_alias != node->name)
+            Aliases::const_iterator jt = aliases.find(identifier_node->name);
+            if (jt != aliases.end() && current_alias != identifier_node->name)
             {
                 /// Let's replace it with the corresponding tree node.
                 if (current_asts.count(jt->second.get()))
@@ -1116,7 +1118,7 @@ void ExpressionAnalyzer::normalizeTreeImpl(
         return;
     }
 
-    /// Recurring calls. Don't go into subqueries.
+    /// Recurring calls. Don't go into subqueries. Don't go into components of compound identifiers.
     /// We also do not go to the left argument of lambda expressions, so as not to replace the formal parameters
     ///  on aliases in expressions of the form 123 AS x, arrayMap(x -> 1, [2]).
 
@@ -1134,6 +1136,9 @@ void ExpressionAnalyzer::normalizeTreeImpl(
             normalizeTreeImpl(child, finished_asts, current_asts, current_alias, level + 1);
         }
     }
+    else if (identifier_node)
+    {
+    }
     else
     {
         for (auto & child : ast->children)
diff --git a/dbms/tests/queries/0_stateless/00494_alias_substitution_bug.reference b/dbms/tests/queries/0_stateless/00494_alias_substitution_bug.reference
new file mode 100644
index 00000000000..6ed281c757a
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00494_alias_substitution_bug.reference
@@ -0,0 +1,2 @@
+1
+1
diff --git a/dbms/tests/queries/0_stateless/00494_alias_substitution_bug.sql b/dbms/tests/queries/0_stateless/00494_alias_substitution_bug.sql
new file mode 100644
index 00000000000..81c4b1a39a4
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00494_alias_substitution_bug.sql
@@ -0,0 +1,5 @@
+DROP TABLE IF EXISTS test.nested;
+CREATE TABLE test.nested (n Nested(x UInt8)) ENGINE = Memory;
+INSERT INTO test.nested VALUES ([1, 2]);
+SELECT 1 AS x FROM remote('127.0.0.1', test.nested) ARRAY JOIN n.x;
+DROP TABLE test.nested;

From 16d6c4f9e73f22aaf917193b8866d925066740a9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 16 Aug 2017 23:27:35 +0300
Subject: [PATCH 207/281] Removed very old protocol compatibility features
 [#CLICKHOUSE-2].

---
 dbms/src/Client/Connection.cpp                | 23 ++++---------------
 dbms/src/Core/Defines.h                       |  3 ---
 dbms/src/Core/Progress.cpp                    |  8 ++-----
 .../DataStreams/NativeBlockInputStream.cpp    |  2 +-
 dbms/src/DataStreams/NativeBlockInputStream.h |  3 +--
 .../DataStreams/NativeBlockOutputStream.cpp   |  2 +-
 .../src/DataStreams/NativeBlockOutputStream.h |  3 +--
 dbms/src/Server/TCPHandler.cpp                | 15 ++++--------
 8 files changed, 16 insertions(+), 43 deletions(-)

diff --git a/dbms/src/Client/Connection.cpp b/dbms/src/Client/Connection.cpp
index d66571a6641..8054699c7ec 100644
--- a/dbms/src/Client/Connection.cpp
+++ b/dbms/src/Client/Connection.cpp
@@ -351,8 +351,8 @@ void Connection::sendQuery(
     block_in.reset();
     block_out.reset();
 
-    /// If server version is new enough, send empty block which meand end of data.
-    if (server_revision >= DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES && !with_pending_data)
+    /// Send empty block which means end of data.
+    if (!with_pending_data)
     {
         sendData(Block());
         out->next();
@@ -384,9 +384,7 @@ void Connection::sendData(const Block & block, const String & name)
     }
 
     writeVarUInt(Protocol::Client::Data, *out);
-
-    if (server_revision >= DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES)
-        writeStringBinary(name, *out);
+    writeStringBinary(name, *out);
 
     size_t prev_bytes = out->count();
 
@@ -405,9 +403,7 @@ void Connection::sendPreparedData(ReadBuffer & input, size_t size, const String
     /// NOTE 'Throttler' is not used in this method (could use, but it's not important right now).
 
     writeVarUInt(Protocol::Client::Data, *out);
-
-    if (server_revision >= DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES)
-        writeStringBinary(name, *out);
+    writeStringBinary(name, *out);
 
     if (0 == size)
         copyData(input, *out);
@@ -419,13 +415,6 @@ void Connection::sendPreparedData(ReadBuffer & input, size_t size, const String
 
 void Connection::sendExternalTablesData(ExternalTablesData & data)
 {
-    /// If working with older server, don't send any info.
-    if (server_revision < DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES)
-    {
-        out->next();
-        return;
-    }
-
     if (data.empty())
     {
         /// Send empty block, which means end of data transfer.
@@ -552,9 +541,7 @@ Block Connection::receiveData()
     initBlockInput();
 
     String external_table_name;
-
-    if (server_revision >= DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES)
-        readStringBinary(external_table_name, *in);
+    readStringBinary(external_table_name, *in);
 
     size_t prev_bytes = in->count();
 
diff --git a/dbms/src/Core/Defines.h b/dbms/src/Core/Defines.h
index 08184f55f84..10ba7030836 100644
--- a/dbms/src/Core/Defines.h
+++ b/dbms/src/Core/Defines.h
@@ -61,9 +61,6 @@
 /// Name suffix for the column containing the array offsets.
 #define ARRAY_SIZES_COLUMN_NAME_SUFFIX ".size"
 
-#define DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES 50264
-#define DBMS_MIN_REVISION_WITH_TOTAL_ROWS_IN_PROGRESS 51554
-#define DBMS_MIN_REVISION_WITH_BLOCK_INFO 51903
 #define DBMS_MIN_REVISION_WITH_CLIENT_INFO 54032
 #define DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE 54058
 #define DBMS_MIN_REVISION_WITH_QUOTA_KEY_IN_CLIENT_INFO 54060
diff --git a/dbms/src/Core/Progress.cpp b/dbms/src/Core/Progress.cpp
index c96038ba290..540be33f6f1 100644
--- a/dbms/src/Core/Progress.cpp
+++ b/dbms/src/Core/Progress.cpp
@@ -17,9 +17,7 @@ void Progress::read(ReadBuffer & in, UInt64 server_revision)
 
     readVarUInt(new_rows, in);
     readVarUInt(new_bytes, in);
-
-    if (server_revision >= DBMS_MIN_REVISION_WITH_TOTAL_ROWS_IN_PROGRESS)
-        readVarUInt(new_total_rows, in);
+    readVarUInt(new_total_rows, in);
 
     rows = new_rows;
     bytes = new_bytes;
@@ -31,9 +29,7 @@ void Progress::write(WriteBuffer & out, UInt64 client_revision) const
 {
     writeVarUInt(rows.load(), out);
     writeVarUInt(bytes.load(), out);
-
-    if (client_revision >= DBMS_MIN_REVISION_WITH_TOTAL_ROWS_IN_PROGRESS)
-        writeVarUInt(total_rows.load(), out);
+    writeVarUInt(total_rows.load(), out);
 }
 
 
diff --git a/dbms/src/DataStreams/NativeBlockInputStream.cpp b/dbms/src/DataStreams/NativeBlockInputStream.cpp
index b153cf2bd5f..a65cd6719e5 100644
--- a/dbms/src/DataStreams/NativeBlockInputStream.cpp
+++ b/dbms/src/DataStreams/NativeBlockInputStream.cpp
@@ -106,7 +106,7 @@ Block NativeBlockInputStream::readImpl()
     }
 
     /// Additional information about the block.
-    if (server_revision >= DBMS_MIN_REVISION_WITH_BLOCK_INFO)
+    if (server_revision > 0)
         res.info.read(istr);
 
     /// Dimensions
diff --git a/dbms/src/DataStreams/NativeBlockInputStream.h b/dbms/src/DataStreams/NativeBlockInputStream.h
index 2efbf7cd3cc..b1df20a782a 100644
--- a/dbms/src/DataStreams/NativeBlockInputStream.h
+++ b/dbms/src/DataStreams/NativeBlockInputStream.h
@@ -60,8 +60,7 @@ struct IndexForNativeFormat
 class NativeBlockInputStream : public IProfilingBlockInputStream
 {
 public:
-    /** If a non-zero server_revision is specified, additional block information may be expected and read,
-      * depending on what is supported for the specified revision.
+    /** If a non-zero server_revision is specified, additional block information may be expected and read.
       *
       * `index` is not required parameter. If set, only parts of columns specified in the index will be read.
       */
diff --git a/dbms/src/DataStreams/NativeBlockOutputStream.cpp b/dbms/src/DataStreams/NativeBlockOutputStream.cpp
index 763ffabb7ee..b9cfc0fbac7 100644
--- a/dbms/src/DataStreams/NativeBlockOutputStream.cpp
+++ b/dbms/src/DataStreams/NativeBlockOutputStream.cpp
@@ -116,7 +116,7 @@ void NativeBlockOutputStream::writeData(const IDataType & type, const ColumnPtr
 void NativeBlockOutputStream::write(const Block & block)
 {
     /// Additional information about the block.
-    if (client_revision >= DBMS_MIN_REVISION_WITH_BLOCK_INFO)
+    if (client_revision > 0)
         block.info.write(ostr);
 
     /// Dimensions
diff --git a/dbms/src/DataStreams/NativeBlockOutputStream.h b/dbms/src/DataStreams/NativeBlockOutputStream.h
index 16ba2415cc7..d76cb827863 100644
--- a/dbms/src/DataStreams/NativeBlockOutputStream.h
+++ b/dbms/src/DataStreams/NativeBlockOutputStream.h
@@ -20,8 +20,7 @@ class CompressedWriteBuffer;
 class NativeBlockOutputStream : public IBlockOutputStream
 {
 public:
-    /** If non-zero client_revision is specified, additional block information can be written,
-      *  depending on what is supported for the specified revision.
+    /** If non-zero client_revision is specified, additional block information can be written.
       */
     NativeBlockOutputStream(
         WriteBuffer & ostr_, UInt64 client_revision_ = 0,
diff --git a/dbms/src/Server/TCPHandler.cpp b/dbms/src/Server/TCPHandler.cpp
index d93d60bd61e..011454a5159 100644
--- a/dbms/src/Server/TCPHandler.cpp
+++ b/dbms/src/Server/TCPHandler.cpp
@@ -144,8 +144,7 @@ void TCPHandler::runImpl()
                 continue;
 
             /// Get blocks of temporary tables
-            if (client_revision >= DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES)
-                readData(global_settings);
+            readData(global_settings);
 
             /// Reset the input stream, as we received an empty block while receiving external table data.
             /// So, the stream has been marked as cancelled and we can't read from it anymore.
@@ -416,8 +415,7 @@ void TCPHandler::sendTotals()
             initBlockOutput();
 
             writeVarUInt(Protocol::Server::Totals, *out);
-            if (client_revision >= DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES)
-                writeStringBinary("", *out);
+            writeStringBinary("", *out);
 
             state.block_out->write(totals);
             state.maybe_compressed_out->next();
@@ -438,8 +436,7 @@ void TCPHandler::sendExtremes()
             initBlockOutput();
 
             writeVarUInt(Protocol::Server::Extremes, *out);
-            if (client_revision >= DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES)
-                writeStringBinary("", *out);
+            writeStringBinary("", *out);
 
             state.block_out->write(extremes);
             state.maybe_compressed_out->next();
@@ -612,8 +609,7 @@ bool TCPHandler::receiveData()
 
     /// The name of the temporary table for writing data, default to empty string
     String external_table_name;
-    if (client_revision >= DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES)
-        readStringBinary(external_table_name, *in);
+    readStringBinary(external_table_name, *in);
 
     /// Read one block from the network and write it down
     Block block = state.block_in->read();
@@ -717,8 +713,7 @@ void TCPHandler::sendData(Block & block)
     initBlockOutput();
 
     writeVarUInt(Protocol::Server::Data, *out);
-    if (client_revision >= DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES)
-        writeStringBinary("", *out);
+    writeStringBinary("", *out);
 
     state.block_out->write(block);
     state.maybe_compressed_out->next();

From 175cd99288eec3254cce772c6752133cc8c83a7a Mon Sep 17 00:00:00 2001
From: proller <proller@users.noreply.github.com>
Date: Thu, 17 Aug 2017 00:01:43 +0300
Subject: [PATCH 208/281] Freebsd fixes: fix build with libc++, fix find system
 lib double-conversion (#1109)

* Cmake: Don't detect and use -no-pie flag with clang

* Camke: find_unwind: Fix disable on arm

* Freebsd fixes: fix build with libc++, fix find system lib double-conversion
---
 cmake/Modules/Finddouble-conversion.cmake                     | 4 ++--
 .../Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cmake/Modules/Finddouble-conversion.cmake b/cmake/Modules/Finddouble-conversion.cmake
index 178964b8e2e..cb01be0f25b 100644
--- a/cmake/Modules/Finddouble-conversion.cmake
+++ b/cmake/Modules/Finddouble-conversion.cmake
@@ -18,7 +18,7 @@
 #  DOUBLE_CONVERSION_INCLUDE_DIR       The location of double-conversion headers
 
 find_path(DOUBLE_CONVERSION_ROOT_DIR
-    NAMES include/double-conversion.h
+    NAMES include/double-conversion/double-conversion.h
 )
 
 find_library(DOUBLE_CONVERSION_LIBRARIES
@@ -27,7 +27,7 @@ find_library(DOUBLE_CONVERSION_LIBRARIES
 )
 
 find_path(DOUBLE_CONVERSION_INCLUDE_DIR
-    NAMES double-conversion.h
+    NAMES double-conversion/double-conversion.h
     PATHS ${DOUBLE_CONVERSION_ROOT_DIR}/include ${DOUBLE_CONVERSION_INCLUDE_PATHS}
 )
 
diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp
index 11638044935..3283ca1d988 100644
--- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp
+++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp
@@ -247,7 +247,7 @@ void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(zkutil::ZooKeeperPt
         if (!status.exists)
             throw zkutil::KeeperException("A block node was suddenly deleted", ZNONODE);
 
-        cached_block_stats->emplace(elem.first, status.stat);
+        cached_block_stats->emplace(elem.first, RequiredStat(status.stat));
         timed_blocks.emplace_back(elem.first, RequiredStat(status.stat));
     }
 

From 5ba535f05f3496b13a54bd696e2f0fb0e2f0052d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Vavru=C5=A1a?= <mvavrusa@cloudflare.com>
Date: Wed, 16 Aug 2017 14:58:52 -0700
Subject: [PATCH 209/281] Dictionaries: remove duplicate
 MySQLDictionarySource.h include

refs #1110
---
 dbms/src/Dictionaries/DictionarySourceFactory.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/dbms/src/Dictionaries/DictionarySourceFactory.cpp b/dbms/src/Dictionaries/DictionarySourceFactory.cpp
index 436f89fa525..a75618dbdac 100644
--- a/dbms/src/Dictionaries/DictionarySourceFactory.cpp
+++ b/dbms/src/Dictionaries/DictionarySourceFactory.cpp
@@ -3,7 +3,6 @@
 #include <Core/Block.h>
 #include <Dictionaries/DictionaryStructure.h>
 #include <Dictionaries/FileDictionarySource.h>
-#include <Dictionaries/MySQLDictionarySource.h>
 #include <Dictionaries/ClickHouseDictionarySource.h>
 #include <Dictionaries/ExecutableDictionarySource.h>
 #include <Dictionaries/HTTPDictionarySource.h>

From 2c5c42a9177278d56bc1cd64c36399999ed314d8 Mon Sep 17 00:00:00 2001
From: BayoNet <bayonet@virtUbuntu16.04>
Date: Thu, 17 Aug 2017 17:37:40 +0300
Subject: [PATCH 210/281] <invalidate_query> setting of external dictionaty
 <source> section is described.

---
 docs/ru/dicts/external_dicts.rst              | 11 ++++----
 docs/ru/dicts/external_dicts_dict_layout.rst  |  4 ---
 .../ru/dicts/external_dicts_dict_lifetime.rst | 27 ++++++++++++++++++-
 docs/ru/dicts/external_dicts_dict_sources.rst | 14 ++++++----
 4 files changed, 41 insertions(+), 15 deletions(-)

diff --git a/docs/ru/dicts/external_dicts.rst b/docs/ru/dicts/external_dicts.rst
index 00374b5a91e..946187dd6f3 100644
--- a/docs/ru/dicts/external_dicts.rst
+++ b/docs/ru/dicts/external_dicts.rst
@@ -6,12 +6,13 @@
 
 Существует возможность подключать собственные словари из различных источников данных. Источником данных для словаря может быть локальный текстовый/исполняемый файл, HTTP(s) ресурс или другая СУБД. Подробнее смотрите в разделе ":ref:`dicts-external_dicts_dict_sources`".
 
-ClickHouse может полностью или частично хранить словари в оперативной памяти, периодически обновлять их и динамически подгружать отсутствующие значения.
+ClickHouse:
+
+ * Полностью или частично хранить словари в оперативной памяти.
+ * Периодически обновляет их и динамически подгружает отсутствующие значения. Т.е. словари можно подгружать динамически.
 
 Конфигурация внешних словарей находится в одном или нескольких файлах. Путь к конфигурации указывается в параметре :ref:`server_settings-dictionaries_config`.
 
-Периодически ClickHouse обновляет конфигурацию словарей и словари. Т.о. словари можно подгружать динамически.
-
 Словари могут загружаться при старте сервера или при первом использовании, в зависимости от настройки :ref:`server_settings-dictionaries_lazy_load`.
 
 Конфигурационный файл словарей имеет вид:
@@ -19,7 +20,7 @@ ClickHouse может полностью или частично хранить
 .. code-block:: xml
 
   <dictionaries>
-      <comment>Необязательный элемент с любым содержимым. Полностью игнорируется.</comment>
+      <comment>Необязательный элемент с любым содержимым. Игнорируется сервером ClickHouse.</comment>
   
       <dictionary>
           <!-- Конфигурация словаря -->
@@ -36,7 +37,7 @@ ClickHouse может полностью или частично хранить
 
 Смотрите также ":ref:`ext_dict_functions`" .
 
-.. attention:: Вы можете преобразовать значения по небольшому словарю, описав его в запросе ``SELECT`` (см. функцию ":ref:`other_functions-transform`"). Эта функциональность не связана с внешними словарями.
+.. attention:: Вы можете преобразовывать значения по небольшому словарю, описав его в запросе ``SELECT`` (см. функцию ":ref:`other_functions-transform`"). Эта функциональность не связана с внешними словарями.
 
 .. toctree::
     :glob:
diff --git a/docs/ru/dicts/external_dicts_dict_layout.rst b/docs/ru/dicts/external_dicts_dict_layout.rst
index 4ee4cc6fe05..41aa1cb867c 100644
--- a/docs/ru/dicts/external_dicts_dict_layout.rst
+++ b/docs/ru/dicts/external_dicts_dict_layout.rst
@@ -15,10 +15,6 @@
 * Вызывать функцию для работы со словарём после ``GROUP BY``.
 * Помечать извлекаемые атрибуты как инъективные. Атрибут называется инъективным, если разным ключам соответствуют разные значения атрибута. Тогда при использовании в ``GROUP BY`` функции, достающей значение атрибута по ключу, эта функция автоматически выносится из ``GROUP BY``.
 
-ClickHouse периодически обновляет словари. Сначала проверяется время модификации файла/таблицы, затем, если файл/таблица обновились, обновляется словарь. Если словарь хранится в таблице типа MyISAM, то время модификации проверяется запросом ``SHOW TABLE STATUS``. Для таблиц InnoDB нельзя получить время модификации, поэтому словарь обновляется каждый раз.
-
-Обновление словарей (кроме загрузки при первом использовании) не блокирует запросы - во время обновления используется старая версия словаря. Если при обновлении возникнет ошибка, то ошибка пишется в лог сервера, а запросы продолжат использовать старую версию словарей.
-
 При ошибках работы со словарями ClickHouse генерирует исключения. Например, в следующих ситуациях:
 
 * При обращении к словарю, который не удалось загрузить.
diff --git a/docs/ru/dicts/external_dicts_dict_lifetime.rst b/docs/ru/dicts/external_dicts_dict_lifetime.rst
index d3de506b800..83871a80a31 100644
--- a/docs/ru/dicts/external_dicts_dict_lifetime.rst
+++ b/docs/ru/dicts/external_dicts_dict_lifetime.rst
@@ -6,7 +6,7 @@
 
 ClickHouse периодически обновляет словари. Интервал обновления для полностью загружаемых словарей и интервал инвалидации для кэшируемых словарей определяется в теге ``<lifetime>`` в секундах.
 
-Обновление словарей (кроме загрузки при первом использовании) не блокирует запросы, во время обновления используется старая версия словаря. Если при обновлении возникнет ошибка, то ошибка пишется в лог сервера, а запросы продолжат использовать старую версию словарей.
+Обновление словарей (кроме загрузки при первом использовании) не блокирует запросы - во время обновления используется старая версия словаря. Если при обновлении возникнет ошибка, то ошибка пишется в лог сервера, а запросы продолжат использовать старую версию словарей.
 
 Пример настройки:
 
@@ -36,3 +36,28 @@ ClickHouse периодически обновляет словари. Инте
       </lifetime>
       ...
   </dictionary>
+
+
+При обновлении словарей сервер ClickHouse применяет различную логику в зависимости от типа :ref:`источника <dicts-external_dicts_dict_sources>`:
+ 
+ * У текстового файла проверяется время модификации. Если время изменилось по отношению к запомненному ранее, то словарь обновляется.
+ * Для таблиц типа MyISAM, время модификации проверяется запросом ``SHOW TABLE STATUS``.
+ * Словари из других источников по умолчанию обновляются каждый раз.
+
+Для источников  MySQL (InnoDB), ODBC можно настроить запрос, который позволит обновлять словари только в случае их фактического изменения, а не каждый раз. Чтобы это сделать необходимо выполнить следующие условия/действия:
+ 
+ * В таблице словаря должно быть поле, которое гарантированно изменяется при обновлении данных в источнике.
+ * В настройках источника указывается запрос, который получает изменяющееся поле. Результат запроса сервер ClickHouse интерпретирует как строку и если эта строка изменилась по отношению к предыдущему состоянию, то словарь обновляется. Запрос следует указывать в поле ``<invalidate_query>`` настроек :ref:`источника <dicts-external_dicts_dict_sources>`.
+
+Пример настройки:
+
+.. code-block:: xml
+
+  <dictionary>
+      ...
+      <odbc>
+        ...
+        <invalidate_query>SELECT update_time FROM dictionary_source where id = 1</invalidate_query>
+      </odbc>
+      ...
+  </dictionary>
diff --git a/docs/ru/dicts/external_dicts_dict_sources.rst b/docs/ru/dicts/external_dicts_dict_sources.rst
index d65e59124be..251f89adb34 100644
--- a/docs/ru/dicts/external_dicts_dict_sources.rst
+++ b/docs/ru/dicts/external_dicts_dict_sources.rst
@@ -125,13 +125,15 @@ ODBC
       <db>DatabaseName</db>
       <table>TableName</table>
       <connection_string>DSN=some_parameters</connection_string>
+      <invalidate_query>SQL_REQUEST</invalidate_query>
   </odbc>
 
 Поля настройки:
 
-* ``db`` - Имя базы данных. Не указывать, если имя базы задано в параметрах ``<connection_string>``.
-* ``table`` - Имя таблицы.
-* ``connection_string`` - Строка соединения.
+* ``db`` - имя базы данных. Не указывать, если имя базы задано в параметрах ``<connection_string>``.
+* ``table`` - имя таблицы.
+* ``connection_string`` - строка соединения.
+* ``invalidate_query`` - запрос для проверки статуса словаря. Необязательный параметр. Читайте подробнее в разделе :ref:`dicts-external_dicts_dict_lifetime`.
   
 
 Пример подключения PostgreSQL
@@ -308,6 +310,7 @@ MySQL
         <db>conv_main</db>
         <table>counters</table>
         <where>id=10</where>
+        <invalidate_query>SQL_REQUEST</invalidate_query>
     </mysql>
   </source>
 
@@ -323,7 +326,8 @@ MySQL
   * ``replica/priority`` - приоритет реплики. При попытке соединения ClickHouse обходит реплики в соответствии с приоритетом. Чем меньше цифра, тем выше приоритет.
 * ``db`` - имя базы данных.
 * ``table`` - имя таблицы.
-* ``where`` - условие выбора. Может отсутствовать.
+* ``where`` - условие выбора. Необязательный параметр.
+* ``invalidate_query`` - запрос для проверки статуса словаря. Необязательный параметр. Читайте подробнее в разделе :ref:`dicts-external_dicts_dict_lifetime`.
 
 .. _dicts-external_dicts_dict_sources-clickhouse:
 
@@ -385,4 +389,4 @@ MongoDB
 * ``user`` - имя пользователя MongoDB.
 * ``password`` - пароль пользователя MongoDB.
 * ``db`` - имя базы данных.
-* ``collection`` - имя коллекции.
+* ``collection`` - имя коллекции.
\ No newline at end of file

From 4988cb9f036754d50c6a3f6a592ff2d511721325 Mon Sep 17 00:00:00 2001
From: proller <proller@users.noreply.github.com>
Date: Thu, 17 Aug 2017 21:17:17 +0300
Subject: [PATCH 211/281] Issue #1110 : Fix build without installed boost
 (#1115)

---
 cmake/print_include_directories.cmake | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/cmake/print_include_directories.cmake b/cmake/print_include_directories.cmake
index ba0bba4c9df..4943f1e2afc 100644
--- a/cmake/print_include_directories.cmake
+++ b/cmake/print_include_directories.cmake
@@ -7,6 +7,9 @@ list(APPEND dirs ${dirs1})
 get_property (dirs1 TARGET common PROPERTY INCLUDE_DIRECTORIES)
 list(APPEND dirs ${dirs1})
 
+get_property (dirs1 TARGET ${Boost_PROGRAM_OPTIONS_LIBRARY} PROPERTY INCLUDE_DIRECTORIES)
+list(APPEND dirs ${dirs1})
+
 list(REMOVE_DUPLICATES dirs)
 file (WRITE ${CMAKE_CURRENT_BINARY_DIR}/include_directories.txt "")
 foreach (dir ${dirs})

From 9f956d97ddd5175a356c502cfb911978a53bf1dc Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 17 Aug 2017 21:43:39 +0300
Subject: [PATCH 212/281] Removed debug output [#CLICKHOUSE-2].

---
 dbms/src/Interpreters/ExpressionAnalyzer.cpp | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp
index 9532f6678ac..e3cafeb0e3e 100644
--- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp
@@ -943,12 +943,8 @@ void ExpressionAnalyzer::addASTAliases(ASTPtr & ast, int ignore_levels)
             ++subquery_index;
         }
 
-        std::cerr << ast->getColumnName() << "\n";
-
         ast->setAlias(alias);
         aliases[alias] = ast;
-
-        std::cerr << ast->getAliasOrColumnName() << "\n";
     }
 }
 
@@ -2741,10 +2737,6 @@ void ExpressionAnalyzer::collectUsedColumns()
             columns_added_by_join.erase(it++);
     }
 
-/*    for (const auto & name_type : columns_added_by_join)
-        std::cerr << "JOINed column (required, not key): " << name_type.name << std::endl;
-    std::cerr << std::endl;*/
-
     /// Insert the columns required for the ARRAY JOIN calculation into the required columns list.
     NameSet array_join_sources;
     for (const auto & result_source : array_join_result_to_source)
@@ -2840,15 +2832,6 @@ void ExpressionAnalyzer::collectJoinedColumns(NameSet & joined_columns, NamesAnd
             joined_columns_name_type.emplace_back(col.name, col.type);
         }
     }
-
-/*    for (const auto & name : join_key_names_left)
-        std::cerr << "JOIN key (left): " << name << std::endl;
-    for (const auto & name : join_key_names_right)
-        std::cerr << "JOIN key (right): " << name << std::endl;
-    std::cerr << std::endl;
-    for (const auto & name : joined_columns)
-        std::cerr << "JOINed column: " << name << std::endl;
-    std::cerr << std::endl;*/
 }
 
 

From 2fa8ee5dd384552b802b9b45585ad31719498f88 Mon Sep 17 00:00:00 2001
From: proller <proller@github.com>
Date: Thu, 17 Aug 2017 19:49:23 +0300
Subject: [PATCH 213/281] Debian: keep debian/changelog commited with actual
 version.

---
 .gitignore       |  1 -
 debian/changelog |  5 +++++
 release          |  8 ++++----
 release_lib.sh   | 10 ++++++----
 4 files changed, 15 insertions(+), 9 deletions(-)
 create mode 100644 debian/changelog

diff --git a/.gitignore b/.gitignore
index a6509c69d76..208b03861e6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -199,7 +199,6 @@ vgcore*
 *.changes
 build-stamp
 configure-stamp
-debian/changelog
 debian/*.debhelper.log
 debian/*.debhelper
 debian/*.substvars
diff --git a/debian/changelog b/debian/changelog
new file mode 100644
index 00000000000..15bf503f468
--- /dev/null
+++ b/debian/changelog
@@ -0,0 +1,5 @@
+clickhouse (1.1.54275) unstable; urgency=low
+
+  * Modified source code
+
+ -- proller <proller@yandex-team.ru>  Thu, 17 Aug 2017 19:33:34 +0300
diff --git a/release b/release
index d85357fc185..106f118ad74 100755
--- a/release
+++ b/release
@@ -10,9 +10,12 @@ source "./release_lib.sh"
 CONTROL=debian/control
 CHLOG=debian/changelog
 CHDATE=$(LC_ALL=C date -R | sed -e 's/,/\\,/g') # Replace comma to '\,'
-
 DEBUILD_NOSIGN_OPTIONS="-us -uc"
 
+if [ -z "$REVISION" ] ; then
+    get_revision_author
+fi
+
 while [[ $1 == --* ]]
 do
     if [[ $1 == '--test' ]]; then
@@ -35,9 +38,6 @@ do
     fi
 done
 
-if [ -z "$REVISION" ] ; then
-    get_revision_author
-fi
 
 # Build options
 if [ -n "$SANITIZER" ]
diff --git a/release_lib.sh b/release_lib.sh
index abc392d9e1c..52e474cda65 100644
--- a/release_lib.sh
+++ b/release_lib.sh
@@ -25,7 +25,7 @@ function gen_revision_author {
         while [ $succeeded -eq 0 ] && [ $attempts -le $max_attempts ]; do
             attempts=$(($attempts + 1))
             REVISION=$(($REVISION + 1))
-            git_tag_grep=`git tag | grep "$VERSION_PREFIX$REVISION$VERSION_POSTFIX"`
+            ( git_tag_grep=`git tag | grep "$VERSION_PREFIX$REVISION$VERSION_POSTFIX"` ) || true
             if [ "$git_tag_grep" == "" ]; then
                 succeeded=1
             fi
@@ -36,17 +36,19 @@ function gen_revision_author {
         fi
 
         auto_message="Auto version update to"
-        git_log_grep=`git log --oneline --max-count=1 | grep "$auto_message"`
+        ( git_log_grep=`git log --oneline --max-count=1 | grep "$auto_message"` ) || true
         if [ "$git_log_grep" == "" ]; then
             tag="$VERSION_PREFIX$REVISION$VERSION_POSTFIX"
 
             # First tag for correct git describe
             echo -e "\nTrying to create tag: $tag"
-            git tag -a "$tag" -m "$tag"
+            git tag -a "$tag" -m "$tag" || true
 
             git_describe=`git describe`
             sed -i -- "s/VERSION_REVISION .*)/VERSION_REVISION $REVISION)/g;s/VERSION_DESCRIBE .*)/VERSION_DESCRIBE $git_describe)/g" dbms/cmake/version.cmake
-            git commit -m "$auto_message [$REVISION]" dbms/cmake/version.cmake
+
+            gen_changelog "$REVISION" "$CHDATE" "$AUTHOR" "$CHLOG"
+            git commit -m "$auto_message [$REVISION]" dbms/cmake/version.cmake debian/changelog
             #git push
 
             # Second tag for correct version information in version.cmake inside tag

From 850bc9f19d894740f13d6d9c089dc05a8e70a697 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 17 Aug 2017 22:36:59 +0300
Subject: [PATCH 214/281] fixed reading const zero column in prewhere

---
 .../Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp   | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp
index ad288f0be5c..592a97135da 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp
@@ -246,8 +246,11 @@ Block MergeTreeBaseBlockInputStream::readFromPart()
                     if (pre_range_reader)
                     {
                         /// Have to read rows from last partly read granula.
-                        auto & range = ranges_to_read.back();
-                        task->current_range_reader = reader->readRange(range.begin, range.end);
+                        if (!ranges_to_read.empty())
+                        {
+                            auto & range = ranges_to_read.back();
+                            task->current_range_reader = reader->readRange(range.begin, range.end);
+                        }
                         /// But can just skip them.
                         task->number_of_rows_to_skip = rows_was_read_in_last_range;
                     }

From f8bf45a5ec90c50a1c1b48c7cb6433f77947c3ae Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 17 Aug 2017 22:57:38 +0300
Subject: [PATCH 215/281] added test [#CLICKHOUSE-3237]

---
 .../0_stateless/00495_reading_const_zero_column.reference    | 0
 .../queries/0_stateless/00495_reading_const_zero_column.sql  | 5 +++++
 2 files changed, 5 insertions(+)
 create mode 100644 dbms/tests/queries/0_stateless/00495_reading_const_zero_column.reference
 create mode 100644 dbms/tests/queries/0_stateless/00495_reading_const_zero_column.sql

diff --git a/dbms/tests/queries/0_stateless/00495_reading_const_zero_column.reference b/dbms/tests/queries/0_stateless/00495_reading_const_zero_column.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/dbms/tests/queries/0_stateless/00495_reading_const_zero_column.sql b/dbms/tests/queries/0_stateless/00495_reading_const_zero_column.sql
new file mode 100644
index 00000000000..ad94b9c06b3
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00495_reading_const_zero_column.sql
@@ -0,0 +1,5 @@
+drop table if exists one_table;
+create table one_table (date Date, one UInt64) engine = MergeTree(date, (date, one), 8192);
+insert into one_table select today(), toUInt64(1) from system.numbers limit 100000;
+SET preferred_block_size_bytes = 8192;
+select isNull(one) from one_table where isNull(one);

From f28d0cff3c428179300453261ee883bfe2a57425 Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Thu, 17 Aug 2017 13:28:57 +0300
Subject: [PATCH 216/281] Add more detailed exception message.
 [#CLICKHOUSE-3234]

---
 dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
index c628a70179d..22f871e9c13 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
@@ -324,9 +324,7 @@ String MergeTreeDataPart::getColumnNameWithMinumumCompressedSize() const
     }
 
     if (!minimum_size_column)
-        throw Exception{
-            "Could not find a column of minimum size in MergeTree",
-            ErrorCodes::LOGICAL_ERROR};
+        throw Exception("Could not find a column of minimum size in MergeTree, part " + getFullPath(), ErrorCodes::LOGICAL_ERROR);
 
     return *minimum_size_column;
 }

From 1a68a17a06779f72cc21256daf35168344f32e1c Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Thu, 17 Aug 2017 22:22:56 +0300
Subject: [PATCH 217/281] Avoid infinite loop in DDLWorker if ZooKeeper dies.
 [#CLICKHOUSE-2]

---
 dbms/src/Interpreters/DDLWorker.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp
index d77e1f275d5..0618626632e 100644
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@@ -862,7 +862,7 @@ void DDLWorker::run()
             tryLogCurrentException(log, "Terminating. Cannot initialize DDL queue.");
             return;
         }
-    } while (!initialized);
+    } while (!initialized && !stop_flag);
 
     while (!stop_flag)
     {

From 97699eeea2ad5495bd9c15de6f2d0db1c49a13b4 Mon Sep 17 00:00:00 2001
From: proller <proller@github.com>
Date: Fri, 18 Aug 2017 01:51:12 +0300
Subject: [PATCH 218/281] Auto version update to [54278]

---
 dbms/cmake/version.cmake | 4 ++--
 debian/changelog         | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake
index 4c3c9c34107..539abf6643d 100644
--- a/dbms/cmake/version.cmake
+++ b/dbms/cmake/version.cmake
@@ -1,6 +1,6 @@
 # This strings autochanged from release_lib.sh:
-set(VERSION_DESCRIBE v1.1.54275-testing)
-set(VERSION_REVISION 54275)
+set(VERSION_DESCRIBE v1.1.54278-testing)
+set(VERSION_REVISION 54278)
 # end of autochange
 
 set (VERSION_MAJOR 1)
diff --git a/debian/changelog b/debian/changelog
index 15bf503f468..cf6a8a8ae96 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,5 +1,5 @@
-clickhouse (1.1.54275) unstable; urgency=low
+clickhouse (1.1.54278) unstable; urgency=low
 
   * Modified source code
 
- -- proller <proller@yandex-team.ru>  Thu, 17 Aug 2017 19:33:34 +0300
+ -- proller <proller@yandex-team.ru>  Fri, 18 Aug 2017 01:51:11 +0300

From 44f669c1ca26d751ba07b10dad94e1ac5698ea38 Mon Sep 17 00:00:00 2001
From: proller <proller@github.com>
Date: Fri, 18 Aug 2017 02:02:51 +0300
Subject: [PATCH 219/281] Cmake: fix USE_INTERNAL_BOOST_LIBRARY=0

---
 cmake/print_include_directories.cmake | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/cmake/print_include_directories.cmake b/cmake/print_include_directories.cmake
index 4943f1e2afc..83b8064d262 100644
--- a/cmake/print_include_directories.cmake
+++ b/cmake/print_include_directories.cmake
@@ -7,8 +7,10 @@ list(APPEND dirs ${dirs1})
 get_property (dirs1 TARGET common PROPERTY INCLUDE_DIRECTORIES)
 list(APPEND dirs ${dirs1})
 
-get_property (dirs1 TARGET ${Boost_PROGRAM_OPTIONS_LIBRARY} PROPERTY INCLUDE_DIRECTORIES)
-list(APPEND dirs ${dirs1})
+if (USE_INTERNAL_BOOST_LIBRARY)
+    get_property (dirs1 TARGET ${Boost_PROGRAM_OPTIONS_LIBRARY} PROPERTY INCLUDE_DIRECTORIES)
+    list(APPEND dirs ${dirs1})
+endif ()
 
 list(REMOVE_DUPLICATES dirs)
 file (WRITE ${CMAKE_CURRENT_BINARY_DIR}/include_directories.txt "")

From c720524078471244a940265d34f73727718f932b Mon Sep 17 00:00:00 2001
From: proller <proller@github.com>
Date: Fri, 18 Aug 2017 02:03:15 +0300
Subject: [PATCH 220/281] Auto version update to [54279]

---
 dbms/cmake/version.cmake | 4 ++--
 debian/changelog         | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake
index 539abf6643d..74238ed1ac9 100644
--- a/dbms/cmake/version.cmake
+++ b/dbms/cmake/version.cmake
@@ -1,6 +1,6 @@
 # This strings autochanged from release_lib.sh:
-set(VERSION_DESCRIBE v1.1.54278-testing)
-set(VERSION_REVISION 54278)
+set(VERSION_DESCRIBE v1.1.54279-testing)
+set(VERSION_REVISION 54279)
 # end of autochange
 
 set (VERSION_MAJOR 1)
diff --git a/debian/changelog b/debian/changelog
index cf6a8a8ae96..a81932ef4ce 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,5 +1,5 @@
-clickhouse (1.1.54278) unstable; urgency=low
+clickhouse (1.1.54279) unstable; urgency=low
 
   * Modified source code
 
- -- proller <proller@yandex-team.ru>  Fri, 18 Aug 2017 01:51:11 +0300
+ -- proller <proller@yandex-team.ru>  Fri, 18 Aug 2017 02:03:13 +0300

From 4687a2730d2159f1c70371ea6562ae938fca731e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 18 Aug 2017 04:00:13 +0300
Subject: [PATCH 221/281] Allow to override umask in config [#CLICKHOUSE-2].

---
 dbms/src/Server/config.xml                 |  5 +++++
 libs/libdaemon/include/daemon/BaseDaemon.h |  2 +-
 libs/libdaemon/src/BaseDaemon.cpp          | 15 ++++++++++++++-
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/dbms/src/Server/config.xml b/dbms/src/Server/config.xml
index baf563c0a2c..e4706f3b7f1 100644
--- a/dbms/src/Server/config.xml
+++ b/dbms/src/Server/config.xml
@@ -114,6 +114,11 @@
     -->
     <!-- <timezone>Europe/Moscow</timezone> -->
 
+    <!-- You can specify umask here (see "man umask"). Server will apply it on startup.
+         Number is always parsed as octal. Default umask is 027 (other users cannot read logs, data files, etc; group can only read).
+    -->
+    <!-- <umask>022</umask> -->
+
     <!-- Configuration of clusters that could be used in Distributed tables.
          https://clickhouse.yandex/reference_en.html#Distributed
       -->
diff --git a/libs/libdaemon/include/daemon/BaseDaemon.h b/libs/libdaemon/include/daemon/BaseDaemon.h
index 33a6889d610..c29261930ed 100644
--- a/libs/libdaemon/include/daemon/BaseDaemon.h
+++ b/libs/libdaemon/include/daemon/BaseDaemon.h
@@ -67,7 +67,7 @@ public:
     void buildLoggers();
 
     /// Определяет параметр командной строки
-    void defineOptions(Poco::Util::OptionSet& _options) override;
+    void defineOptions(Poco::Util::OptionSet & _options) override;
 
     /// Заставляет демон завершаться, если хотя бы одна задача завершилась неудачно
     void exitOnTaskError();
diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp
index eb2ec6bdab8..e64d9236396 100644
--- a/libs/libdaemon/src/BaseDaemon.cpp
+++ b/libs/libdaemon/src/BaseDaemon.cpp
@@ -29,6 +29,7 @@
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <iostream>
+#include <sstream>
 #include <memory>
 #include <Poco/Observer.h>
 #include <Poco/Logger.h>
@@ -691,7 +692,7 @@ std::string BaseDaemon::getDefaultCorePath() const
     return "/opt/cores/";
 }
 
-void BaseDaemon::initialize(Application& self)
+void BaseDaemon::initialize(Application & self)
 {
     task_manager.reset(new Poco::TaskManager);
     ServerApplication::initialize(self);
@@ -740,6 +741,18 @@ void BaseDaemon::initialize(Application& self)
         tzset();
     }
 
+    /// This must be done before creation of any files (including logs).
+    if (config().has("umask"))
+    {
+        std::string umask_str = config().getString("umask");
+        mode_t umask_num = 0;
+        std::stringstream stream;
+        stream << umask_str;
+        stream >> std::oct >> umask_num;
+
+        umask(umask_num);
+    }
+
     std::string log_path = config().getString("logger.log", "");
     if (!log_path.empty())
         log_path = Poco::Path(log_path).setFileName("").toString();

From 5908f17962391c788419096bee34ec76211497dd Mon Sep 17 00:00:00 2001
From: proller <proller@github.com>
Date: Fri, 18 Aug 2017 04:32:52 +0300
Subject: [PATCH 222/281] Fix release script

---
 release        | 3 +--
 release_lib.sh | 5 ++++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/release b/release
index 106f118ad74..8ee0d132211 100755
--- a/release
+++ b/release
@@ -8,7 +8,6 @@ cd $CURDIR
 source "./release_lib.sh"
 
 CONTROL=debian/control
-CHLOG=debian/changelog
 CHDATE=$(LC_ALL=C date -R | sed -e 's/,/\\,/g') # Replace comma to '\,'
 DEBUILD_NOSIGN_OPTIONS="-us -uc"
 
@@ -65,7 +64,7 @@ CMAKE_FLAGS_ADD+=" $LIBTCMALLOC_OPTS -DCMAKE_BUILD_TYPE=$CMAKE_BUILD_TYPE"
 REVISION+=$VERSION_POSTFIX
 echo -e "\nCurrent revision is $REVISION"
 
-gen_changelog "$REVISION" "$CHDATE" "$AUTHOR" "$CHLOG"
+gen_changelog "$REVISION" "$CHDATE" "$AUTHOR" ""
 
 # Build (only binary packages).
 debuild -e PATH -e SSH_AUTH_SOCK -e DEB_BUILD_OPTIONS=parallel=$THREAD_COUNT -e DEB_CC -e DEB_CXX -e DEB_CLANG -e CMAKE_FLAGS_ADD="$CMAKE_FLAGS_ADD" -b ${DEBUILD_NOSIGN_OPTIONS} ${DEBUILD_NODEPS_OPTIONS}
diff --git a/release_lib.sh b/release_lib.sh
index 52e474cda65..f5ddadb738f 100644
--- a/release_lib.sh
+++ b/release_lib.sh
@@ -47,7 +47,7 @@ function gen_revision_author {
             git_describe=`git describe`
             sed -i -- "s/VERSION_REVISION .*)/VERSION_REVISION $REVISION)/g;s/VERSION_DESCRIBE .*)/VERSION_DESCRIBE $git_describe)/g" dbms/cmake/version.cmake
 
-            gen_changelog "$REVISION" "$CHDATE" "$AUTHOR" "$CHLOG"
+            gen_changelog "$REVISION" "$CHDATE" "$AUTHOR" ""
             git commit -m "$auto_message [$REVISION]" dbms/cmake/version.cmake debian/changelog
             #git push
 
@@ -92,6 +92,9 @@ function gen_changelog {
     CHDATE="$2"
     AUTHOR="$3"
     CHLOG="$4"
+    if [ -z "$CHLOG" ] ; then
+        CHLOG=debian/changelog
+    fi
 
     sed \
         -e "s/[@]REVISION[@]/$REVISION/g" \

From 7b6fa26151bbde91e39760121f3494f9a14325af Mon Sep 17 00:00:00 2001
From: robot-metrika-test <robot-metrika-test@yandex-team.ru>
Date: Fri, 18 Aug 2017 04:35:46 +0300
Subject: [PATCH 223/281] Auto version update to [54280]

---
 dbms/cmake/version.cmake | 4 ++--
 debian/changelog         | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake
index 74238ed1ac9..2438e4e2597 100644
--- a/dbms/cmake/version.cmake
+++ b/dbms/cmake/version.cmake
@@ -1,6 +1,6 @@
 # This strings autochanged from release_lib.sh:
-set(VERSION_DESCRIBE v1.1.54279-testing)
-set(VERSION_REVISION 54279)
+set(VERSION_DESCRIBE v1.1.54280-testing)
+set(VERSION_REVISION 54280)
 # end of autochange
 
 set (VERSION_MAJOR 1)
diff --git a/debian/changelog b/debian/changelog
index a81932ef4ce..dafd0ceef12 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,5 +1,5 @@
-clickhouse (1.1.54279) unstable; urgency=low
+clickhouse (1.1.54280) unstable; urgency=low
 
   * Modified source code
 
- -- proller <proller@yandex-team.ru>  Fri, 18 Aug 2017 02:03:13 +0300
+ --  <robot-metrika-test@yandex-team.ru>  

From e545777187f5d2d2bca858268a43b20aab903e03 Mon Sep 17 00:00:00 2001
From: Alexey Zatelepin <ztlpn@yandex-team.ru>
Date: Fri, 18 Aug 2017 15:38:43 +0300
Subject: [PATCH 224/281] fix PoolWithFailoverBase::TryResult initialization

---
 dbms/src/Common/PoolWithFailoverBase.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Common/PoolWithFailoverBase.h b/dbms/src/Common/PoolWithFailoverBase.h
index 47ad7a7374a..6d436051685 100644
--- a/dbms/src/Common/PoolWithFailoverBase.h
+++ b/dbms/src/Common/PoolWithFailoverBase.h
@@ -71,7 +71,7 @@ public:
         TryResult() = default;
 
         explicit TryResult(Entry entry_)
-            : entry(std::move(entry))
+            : entry(std::move(entry_))
             , is_usable(true)
             , is_up_to_date(true)
         {

From b29fea530107638bf8ff45f8565af225e43d16b7 Mon Sep 17 00:00:00 2001
From: proller <proller@github.com>
Date: Fri, 18 Aug 2017 16:25:14 +0300
Subject: [PATCH 225/281] release script: fix autogenerating date on --version
 call. clean unused.

---
 debian/changelog |  2 +-
 release          |  3 +--
 release_lib.sh   | 44 +++++++++++++-------------------------------
 3 files changed, 15 insertions(+), 34 deletions(-)

diff --git a/debian/changelog b/debian/changelog
index dafd0ceef12..44d45811d3a 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -2,4 +2,4 @@ clickhouse (1.1.54280) unstable; urgency=low
 
   * Modified source code
 
- --  <robot-metrika-test@yandex-team.ru>  
+ -- proller <proller@yandex-team.ru>  Fri, 18 Aug 2017 16:18:33 +0300
diff --git a/release b/release
index 8ee0d132211..9e9d200499f 100755
--- a/release
+++ b/release
@@ -8,7 +8,6 @@ cd $CURDIR
 source "./release_lib.sh"
 
 CONTROL=debian/control
-CHDATE=$(LC_ALL=C date -R | sed -e 's/,/\\,/g') # Replace comma to '\,'
 DEBUILD_NOSIGN_OPTIONS="-us -uc"
 
 if [ -z "$REVISION" ] ; then
@@ -64,7 +63,7 @@ CMAKE_FLAGS_ADD+=" $LIBTCMALLOC_OPTS -DCMAKE_BUILD_TYPE=$CMAKE_BUILD_TYPE"
 REVISION+=$VERSION_POSTFIX
 echo -e "\nCurrent revision is $REVISION"
 
-gen_changelog "$REVISION" "$CHDATE" "$AUTHOR" ""
+gen_changelog "$REVISION" "" "$AUTHOR" ""
 
 # Build (only binary packages).
 debuild -e PATH -e SSH_AUTH_SOCK -e DEB_BUILD_OPTIONS=parallel=$THREAD_COUNT -e DEB_CC -e DEB_CXX -e DEB_CLANG -e CMAKE_FLAGS_ADD="$CMAKE_FLAGS_ADD" -b ${DEBUILD_NOSIGN_OPTIONS} ${DEBUILD_NODEPS_OPTIONS}
diff --git a/release_lib.sh b/release_lib.sh
index f5ddadb738f..e1729f4412e 100644
--- a/release_lib.sh
+++ b/release_lib.sh
@@ -3,9 +3,9 @@ function get_revision {
     grep "set(VERSION_REVISION" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_REVISION \(.*\))$/\1/'
 }
 
-# remove me after fixing all testing-building scripts
-function make_control {
-    true
+function get_author {
+    AUTHOR=$(git config --get user.name || echo ${USER})
+    echo $AUTHOR
 }
 
 # Generate revision number.
@@ -47,7 +47,7 @@ function gen_revision_author {
             git_describe=`git describe`
             sed -i -- "s/VERSION_REVISION .*)/VERSION_REVISION $REVISION)/g;s/VERSION_DESCRIBE .*)/VERSION_DESCRIBE $git_describe)/g" dbms/cmake/version.cmake
 
-            gen_changelog "$REVISION" "$CHDATE" "$AUTHOR" ""
+            gen_changelog "$REVISION" "" "$AUTHOR" ""
             git commit -m "$auto_message [$REVISION]" dbms/cmake/version.cmake debian/changelog
             #git push
 
@@ -78,7 +78,7 @@ function gen_revision_author {
 
 function get_revision_author {
     REVISION=$(get_revision)
-    AUTHOR=$(git config --get user.name || echo ${USER})
+    AUTHOR=$(get_author)
     export REVISION
     export AUTHOR
 }
@@ -92,10 +92,18 @@ function gen_changelog {
     CHDATE="$2"
     AUTHOR="$3"
     CHLOG="$4"
+    if [ -z "REVISION" ] ; then
+        get_revision_author
+    fi
+
     if [ -z "$CHLOG" ] ; then
         CHLOG=debian/changelog
     fi
 
+    if [ -z "$CHDATE" ] ; then
+        CHDATE=$(LC_ALL=C date -R | sed -e 's/,/\\,/g') # Replace comma to '\,'
+    fi
+
     sed \
         -e "s/[@]REVISION[@]/$REVISION/g" \
         -e "s/[@]DATE[@]/$CHDATE/g" \
@@ -103,29 +111,3 @@ function gen_changelog {
         -e "s/[@]EMAIL[@]/$(whoami)@yandex-team.ru/g" \
         < $CHLOG.in > $CHLOG
 }
-
-# Upload to Metrica repository
-# working directory - where script is itself
-function upload_debs {
-    REVISION="$1"
-    # Determine the repository, in which you need to upload the packages. It corresponds to the version of Ubuntu.
-    source /etc/lsb-release
-
-    if [ "$DISTRIB_CODENAME" == "precise" ]; then
-        REPO="metrika"
-    elif [ "$DISTRIB_CODENAME" == "trusty" ]; then
-        REPO="metrika-trusty"
-    elif [ "$DISTRIB_CODENAME" == "xenial" ]; then
-        REPO="metrika-xenial"
-    else
-        echo -e "\n\e[0;31mUnknown Ubuntu version $DISTRIB_CODENAME \e[0;0m\n"
-    fi
-
-    # Upload to Metrica repository.
-
-    cd ../
-    DUPLOAD_CONF=dupload.conf
-    cat src/debian/dupload.conf.in | sed -e "s/[@]AUTHOR[@]/$(whoami)/g" > $DUPLOAD_CONF
-
-    dupload metrika-yandex_1.1."$REVISION"_amd64.changes -t $REPO -c --nomail
-}

From 8726843fcd455b50baf2df54c2ae6ad63fb86d8d Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Fri, 18 Aug 2017 18:29:34 +0300
Subject: [PATCH 226/281] Add russian changelog for 2 recent revisions.
 [#CLICKHOUSE-2]

---
 CHANGELOG_RU.md | 102 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 102 insertions(+)
 create mode 100644 CHANGELOG_RU.md

diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md
new file mode 100644
index 00000000000..02dede9642c
--- /dev/null
+++ b/CHANGELOG_RU.md
@@ -0,0 +1,102 @@
+# Релиз ClickHouse 1.1.54276
+
+## Новые возможности:
+* Добавлена опциональная секция WITH запроса SELECT. Пример запроса: `WITH 1+1 AS a SELECT a, a*a`
+* Добавлена возможность синхронной вставки в Distributed таблицу: выдается Ok только после того как все данные записались на все шарды. Активируется настройкой insert_distributed_sync=1
+* Добавлен тип данных UUID для работы с 16-байтовыми идентификаторами
+* Добавлены алиасы типов CHAR, FLOAT и т.д. для совместимости с Tableau
+* Добавлены функции toYYYYMM, toYYYYMMDD, toYYYYMMDDhhmmss для перевода времени в числа
+* Добавлена возможность использовать IP адреса (совместно с hostname) для идентификации сервера при работе с кластерными DDL запросами
+* Добавлена поддержка неконстантных аргументов и отрицательных смещений в функции `substring(str, pos, len)`
+* Добавлен параметр max_size для агрегатной функции `groupArray(max_size)(column)`, и оптимизирована её производительность
+
+## Основные изменения:
+* Улучшение безопасности: все файлы сервера создаются с правами 0640
+* Улучшены сообщения об ошибках в случае синтаксически неверных запросов
+* Значительно уменьшен расход оперативной памяти и улучшена производительность слияний больших MergeTree-кусков данных
+* Значительно увеличена производительность слияний данных для движка ReplacingMergeTree
+* Улучшена производительность асинхронных вставок из Distributed таблицы за счет объединения нескольких исходных вставок. Функционал включается настройкой distributed_directory_monitor_batch_inserts=1.
+
+## Обратно несовместимые изменения:
+* Изменился бинарный формат агрегатных состояний функции `groupArray(array_column)` для массивов
+
+## Полный список изменений:
+* Добавлен вывод nan и inf значений в формате JSON
+* Более оптимальное выделение потоков при чтении из Distributed таблиц
+* Разрешено задавать настройки в режиме readonly, если их значение не изменяется
+* Добавлена возможность считывать нецелые гранулы движка MergeTree для выполнения ограничений на размер блока, задаваемый настройкой preferred_block_size_bytes - для уменьшения потребления оперативной памяти и увеличения кэш-локальности при обработке запросов из таблиц со столбцами большого размера
+* Эффективное использование индекса, содержащего выражения типа `toStartOfHour(x)`, для условий вида `toStartOfHour(x) op сonstexpr`
+* Добавлены новые настройки для MergeTree движков (секция merge_tree в config.xml):
+  - replicated_deduplication_window_seconds позволяет задать интервал дедупликации вставок в Replicated-таблицы в секундах
+  - cleanup_delay_period - периодичность запуска очистки неактуальных данных
+  - replicated_can_become_leader - запретить реплике становиться лидером (и назначать мержи)
+* Ускорена очистка неактуальных данных из ZooKeeper
+* Множественные улучшения и исправления работы кластерных DDL запросов. В частности, добавлена настройка distributed_ddl_task_timeout, ограничивающая время ожидания ответов серверов кластера.
+* Улучшено отображение стэктрейсов в логах сервера
+* Добавлен метод сжатия none
+* Возможность использования нескольких секций dictionaries_config в config.xml
+* Возможность подключения к MySQL через сокет на файловой системе
+* В таблицу system.parts добавлен столбец с информацией о размере marks в байтах
+
+Исправления багов:
+* Исправлена некорректная работа Distributed таблиц, использующих Merge таблицы, при SELECT с условием на поле _table
+* Исправлен редкий race condition в ReplicatedMergeTree при проверке кусков данных
+* Исправлено возможное зависание процедуры leader election при старте сервера
+* Исправлено игнорирование настройки max_replica_delay_for_distributed_queries при использовании локальной реплики в качестве источника данных
+* Исправлено некорректное поведение `ALTER TABLE CLEAR COLUMN IN PARTITION` при попытке очистить несуществующую колонку
+* Исправлено исключение в функции multiIf при использовании пустых массивов или строк
+* Исправлено чрезмерное выделение памяти при десериализации формата Native
+* Исправлено некорректное автообновление Trie словарей
+* Исправлено исключение при выполнении запросов с GROUP BY из Merge-таблицы при использовании SAMPLE
+* Исправлено падение GROUP BY при использовании настройки distributed_aggregation_memory_efficient=1
+* Добавлена возможность указывать database.table в правой части IN и JOIN
+* Исправлено использование слишком большого количества потоков при параллельной агрегации
+* Исправлена работа функции if с аргументами FixedString
+* Исправлена некорректная работа SELECT из Distributed-таблицы для шардов с весом 0
+* Исправлено падение запроса `CREATE VIEW IF EXISTS`
+* Исправлено некорректное поведение при input_format_skip_unknown_fields=1 в случае отрицательных чисел
+* Исправлен бесконечный цикл в функции `dictGetHierarchy()` в случае некоторых некорректных данных словаря
+Исправлены ошибки типа `Syntax error: unexpected (...)` при выполнении распределенных запросов с подзапросами в секции IN или JOIN, в случае * использования совместно с Merge таблицами
+* Исправлена неправильная интерпретация SELECT запроса из таблиц типа Dictionary
+* Исправлена ошибка "Cannot mremap" при использовании множеств в секциях IN, JOIN, содержащих более 2 млрд. элементов
+* Исправлен failover для словарей с источником MySQL
+* Улучшения процесса разработки и сборки ClickHouse:
+* Добавлена возмозможность сборки в Arcadia
+* Добавлена возможность сборки с помощью gcc 7
+* Ускорена параллельная сборка с помощью ccache+distcc
+
+
+# Релиз ClickHouse 1.1.54245
+
+## Новые возможности:
+* Распределённые DDL (например, `CREATE TABLE ON CLUSTER`)
+* Реплицируемый запрос `ALTER TABLE CLEAR COLUMN IN PARTITION`
+* Движок таблиц Dictionary (доступ к данным словаря в виде таблицы)
+* Движок баз данных Dictionary (в такой базе автоматически доступны Dictionary-таблицы для всех подключённых внешних словарей)
+* Возможность проверки необходимости обновления словаря путём отправки запроса в источник
+* Qualified имена столбцов
+* Квотирование идентификаторов двойными кавычками
+* Сессии в HTTP интерфейсе
+* Запрос OPTIMIZE для Replicated таблицы теперь можно выполнять не только на лидере
+
+## Обратно несовместимые изменения:
+* Убрана команда SET GLOBAL
+
+## Мелкие изменения:
+* Теперь после получения сигнала в лог печатается полный стектрейс
+* Ослаблена проверка на количество повреждённых/лишних кусков при старте (было слишком много ложных срабатываний)
+
+## Исправления багов:
+* Исправлено залипание плохого соединения при вставке в Distributed таблицу
+* GLOBAL IN теперь работает при запросе из таблицы Merge, смотрящей в Distributed
+* Теперь правильно определяется количество ядер на виртуалках Google Compute Engine
+* Исправления в работе executable источника кэшируемых внешних словарей
+* Исправлены сравнения строк, содержащих нулевые символы
+* Исправлено сравнение полей первичного ключа типа Float32 с константами
+* Раньше неправильная оценка размера поля могла приводить к слишком большим аллокациям
+* Исправлено падение при запросе Nullable столбца, добавленного в таблицу ALTER-ом
+* Исправлено падение при сортировке по Nullable столбцу, если количество строк меньше LIMIT
+* Исправлен ORDER BY подзапроса, состоящего только из константных значений
+* Раньше Replicated таблица могла остаться в невалидном состоянии после неудавшегося DROP TABLE
+* Алиасы для скалярных подзапросов с пустым результатом теперь не теряются
+* Теперь запрос, в котором использовалась компиляция, не завершается ошибкой, если .so файл повреждается

From 42910f31f787852dbc069a10110351e540c484c4 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Fri, 18 Aug 2017 19:29:24 +0300
Subject: [PATCH 227/281] Update CHANGELOG_RU.md

---
 CHANGELOG_RU.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md
index 02dede9642c..28829a72f00 100644
--- a/CHANGELOG_RU.md
+++ b/CHANGELOG_RU.md
@@ -38,7 +38,7 @@
 * Возможность подключения к MySQL через сокет на файловой системе
 * В таблицу system.parts добавлен столбец с информацией о размере marks в байтах
 
-Исправления багов:
+## Исправления багов:
 * Исправлена некорректная работа Distributed таблиц, использующих Merge таблицы, при SELECT с условием на поле _table
 * Исправлен редкий race condition в ReplicatedMergeTree при проверке кусков данных
 * Исправлено возможное зависание процедуры leader election при старте сервера

From a04e9a256e20691afcb6cdc1019bb38cae576421 Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <ludv1x@yandex.ru>
Date: Fri, 18 Aug 2017 19:30:31 +0300
Subject: [PATCH 228/281] Update CHANGELOG_RU.md

---
 CHANGELOG_RU.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md
index 28829a72f00..0f08a40a4ba 100644
--- a/CHANGELOG_RU.md
+++ b/CHANGELOG_RU.md
@@ -60,7 +60,8 @@
 * Исправлена неправильная интерпретация SELECT запроса из таблиц типа Dictionary
 * Исправлена ошибка "Cannot mremap" при использовании множеств в секциях IN, JOIN, содержащих более 2 млрд. элементов
 * Исправлен failover для словарей с источником MySQL
-* Улучшения процесса разработки и сборки ClickHouse:
+
+## Улучшения процесса разработки и сборки ClickHouse:
 * Добавлена возмозможность сборки в Arcadia
 * Добавлена возможность сборки с помощью gcc 7
 * Ускорена параллельная сборка с помощью ccache+distcc

From f918e224389ec7606cafac64946aaa7ca3146a0d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 18 Aug 2017 19:36:02 +0300
Subject: [PATCH 229/281] Miscellaneous [#CLICKHOUSE-2].

---
 dbms/src/Functions/FunctionFactory.cpp | 1 +
 dbms/src/Functions/FunctionFactory.h   | 5 -----
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/dbms/src/Functions/FunctionFactory.cpp b/dbms/src/Functions/FunctionFactory.cpp
index c8819fbbae8..c6fb89eee1b 100644
--- a/dbms/src/Functions/FunctionFactory.cpp
+++ b/dbms/src/Functions/FunctionFactory.cpp
@@ -9,6 +9,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int UNKNOWN_FUNCTION;
+    extern const int LOGICAL_ERROR;
 }
 
 
diff --git a/dbms/src/Functions/FunctionFactory.h b/dbms/src/Functions/FunctionFactory.h
index 9702e233a44..fa577674178 100644
--- a/dbms/src/Functions/FunctionFactory.h
+++ b/dbms/src/Functions/FunctionFactory.h
@@ -16,11 +16,6 @@ class Context;
 class IFunction;
 using FunctionPtr = std::shared_ptr<IFunction>;
 
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
 
 /** Creates function by name.
   * Function could use for initialization (take ownership of shared_ptr, for example)

From 2e468c143d6ea46569544e589339d74a213ebd85 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 18 Aug 2017 20:05:53 +0300
Subject: [PATCH 230/281] Added convenience comment in config [#CLICKHOUSE-2].

---
 dbms/src/Server/config.xml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/dbms/src/Server/config.xml b/dbms/src/Server/config.xml
index e4706f3b7f1..dfd3f2433dd 100644
--- a/dbms/src/Server/config.xml
+++ b/dbms/src/Server/config.xml
@@ -111,6 +111,9 @@
 
          Time zone is specified as identifier from IANA time zone database, like UTC or Africa/Abidjan.
          If not specified, system time zone at server startup is used.
+
+         Please note, that server could display time zone alias instead of specified name.
+         Example: W-SU is an alias for Europe/Moscow and Zulu is an alias for UTC.
     -->
     <!-- <timezone>Europe/Moscow</timezone> -->
 

From 5f4e8339258f343e559c14c1a87cf3b7ce338c78 Mon Sep 17 00:00:00 2001
From: Vadim Skipin <vskipin@yandex-team.ru>
Date: Fri, 18 Aug 2017 20:06:22 +0300
Subject: [PATCH 231/281] Cleanup function factories: * Switch to std::function
 to allow more complex creator logic * Cleanup headers

---
 .../AggregateFunctionFactory.cpp              | 10 ++--
 .../AggregateFunctionFactory.h                | 52 ++++++++++++-------
 dbms/src/Functions/FunctionFactory.cpp        | 11 +++-
 dbms/src/Functions/FunctionFactory.h          | 47 ++++++++++-------
 .../TableFunctions/TableFunctionFactory.cpp   | 14 ++++-
 .../src/TableFunctions/TableFunctionFactory.h | 50 ++++++++++--------
 6 files changed, 115 insertions(+), 69 deletions(-)

diff --git a/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp b/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp
index 19a01635065..6a541ff28b4 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp
+++ b/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp
@@ -1,13 +1,17 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <IO/WriteBuffer.h>
-#include <IO/WriteHelpers.h>
+
 #include <DataTypes/DataTypeAggregateFunction.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeNullable.h>
+#include <IO/WriteBuffer.h>
+#include <IO/WriteHelpers.h>
+#include <Interpreters/Context.h>
+
 #include <Common/StringUtils.h>
-#include <Poco/String.h>
 #include <Common/typeid_cast.h>
 
+#include <Poco/String.h>
+
 
 namespace DB
 {
diff --git a/dbms/src/AggregateFunctions/AggregateFunctionFactory.h b/dbms/src/AggregateFunctions/AggregateFunctionFactory.h
index be44a7a9e87..4e5423396bf 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionFactory.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionFactory.h
@@ -1,14 +1,22 @@
 #pragma once
 
-#include <unordered_map>
 #include <AggregateFunctions/IAggregateFunction.h>
+
 #include <ext/singleton.h>
 
+#include <functional>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
 
 namespace DB
 {
 
+class Context;
 class IDataType;
+
 using DataTypePtr = std::shared_ptr<IDataType>;
 using DataTypes = std::vector<DataTypePtr>;
 
@@ -19,22 +27,8 @@ class AggregateFunctionFactory final : public ext::singleton<AggregateFunctionFa
 {
     friend class StorageSystemFunctions;
 
-private:
-    /// No std::function, for smaller object size and less indirection.
-    using Creator = AggregateFunctionPtr(*)(const String & name, const DataTypes & argument_types, const Array & parameters);
-    using AggregateFunctions = std::unordered_map<String, Creator>;
-
 public:
-
-    AggregateFunctionPtr get(
-        const String & name,
-        const DataTypes & argument_types,
-        const Array & parameters = {},
-        int recursion_level = 0) const;
-
-    AggregateFunctionPtr tryGet(const String & name, const DataTypes & argument_types, const Array & parameters = {}) const;
-
-    bool isAggregateFunctionName(const String & name, int recursion_level = 0) const;
+    using Creator = std::function<AggregateFunctionPtr(const String &, const DataTypes &, const Array &)>;
 
     /// For compatibility with SQL, it's possible to specify that certain aggregate function name is case insensitive.
     enum CaseSensitiveness
@@ -43,11 +37,29 @@ public:
         CaseInsensitive
     };
 
-    /// Register an aggregate function by its name.
-    void registerFunction(const String & name, Creator creator, CaseSensitiveness case_sensitiveness = CaseSensitive);
+    /// Register a function by its name.
+    /// No locking, you must register all functions before usage of get.
+    void registerFunction(
+        const String & name,
+        Creator creator,
+        CaseSensitiveness case_sensitiveness = CaseSensitive);
+
+    /// Throws an exception if not found.
+    AggregateFunctionPtr get(
+        const String & name,
+        const DataTypes & argument_types,
+        const Array & parameters = {},
+        int recursion_level = 0) const;
+
+    /// Returns nullptr if not found.
+    AggregateFunctionPtr tryGet(
+        const String & name,
+        const DataTypes & argument_types,
+        const Array & parameters = {}) const;
+
+    bool isAggregateFunctionName(const String & name, int recursion_level = 0) const;
 
 private:
-
     AggregateFunctionPtr getImpl(
         const String & name,
         const DataTypes & argument_types,
@@ -55,6 +67,8 @@ private:
         int recursion_level) const;
 
 private:
+    using AggregateFunctions = std::unordered_map<String, Creator>;
+
     AggregateFunctions aggregate_functions;
 
     /// Case insensitive aggregate functions will be additionally added here with lowercased name.
diff --git a/dbms/src/Functions/FunctionFactory.cpp b/dbms/src/Functions/FunctionFactory.cpp
index c6fb89eee1b..4b681b29263 100644
--- a/dbms/src/Functions/FunctionFactory.cpp
+++ b/dbms/src/Functions/FunctionFactory.cpp
@@ -1,7 +1,11 @@
-#include <Poco/String.h>
 #include <Functions/FunctionFactory.h>
+
+#include <Interpreters/Context.h>
+
 #include <Common/Exception.h>
 
+#include <Poco/String.h>
+
 
 namespace DB
 {
@@ -13,7 +17,10 @@ namespace ErrorCodes
 }
 
 
-void FunctionFactory::registerFunction(const String & name, Creator creator, CaseSensitiveness case_sensitiveness)
+void FunctionFactory::registerFunction(const
+    std::string & name,
+    Creator creator,
+    CaseSensitiveness case_sensitiveness)
 {
     if (!functions.emplace(name, creator).second)
         throw Exception("FunctionFactory: the function name '" + name + "' is not unique",
diff --git a/dbms/src/Functions/FunctionFactory.h b/dbms/src/Functions/FunctionFactory.h
index fa577674178..3f4693080a1 100644
--- a/dbms/src/Functions/FunctionFactory.h
+++ b/dbms/src/Functions/FunctionFactory.h
@@ -1,20 +1,19 @@
 #pragma once
 
-#include <string>
-#include <memory>
-#include <unordered_map>
+#include <Functions/IFunction.h>
+
 #include <ext/singleton.h>
 
-#include <Common/Exception.h>
-#include <Core/Types.h>
+#include <functional>
+#include <memory>
+#include <string>
+#include <unordered_map>
 
 
 namespace DB
 {
 
 class Context;
-class IFunction;
-using FunctionPtr = std::shared_ptr<IFunction>;
 
 
 /** Creates function by name.
@@ -25,12 +24,8 @@ class FunctionFactory : public ext::singleton<FunctionFactory>
 {
     friend class StorageSystemFunctions;
 
-private:
-    using Creator = FunctionPtr(*)(const Context & context);    /// Not std::function, for lower object size and less indirection.
-    using Functions = std::unordered_map<String, Creator>;
-
-    Functions functions;
-    Functions case_insensitive_functions;
+public:
+    using Creator = std::function<FunctionPtr(const Context &)>;
 
     /// For compatibility with SQL, it's possible to specify that certain function name is case insensitive.
     enum CaseSensitiveness
@@ -39,18 +34,30 @@ private:
         CaseInsensitive
     };
 
-public:
-    FunctionPtr get(const String & name, const Context & context) const;    /// Throws an exception if not found.
-    FunctionPtr tryGet(const String & name, const Context & context) const; /// Returns nullptr if not found.
-
-    /// No locking, you must register all functions before usage of get, tryGet.
-    void registerFunction(const String & name, Creator creator, CaseSensitiveness case_sensitiveness = CaseSensitive);
+    /// Register a function by its name.
+    /// No locking, you must register all functions before usage of get.
+    void registerFunction(
+        const std::string & name,
+        Creator creator,
+        CaseSensitiveness case_sensitiveness = CaseSensitive);
 
     template <typename Function>
     void registerFunction()
     {
-        registerFunction(String(Function::name), &Function::create);
+        registerFunction(Function::name, &Function::create);
     }
+
+    /// Throws an exception if not found.
+    FunctionPtr get(const std::string & name, const Context & context) const;
+
+    /// Returns nullptr if not found.
+    FunctionPtr tryGet(const std::string & name, const Context & context) const;
+
+private:
+    using Functions = std::unordered_map<std::string, Creator>;
+
+    Functions functions;
+    Functions case_insensitive_functions;
 };
 
 }
diff --git a/dbms/src/TableFunctions/TableFunctionFactory.cpp b/dbms/src/TableFunctions/TableFunctionFactory.cpp
index 267c3c202a2..a3355fd32e9 100644
--- a/dbms/src/TableFunctions/TableFunctionFactory.cpp
+++ b/dbms/src/TableFunctions/TableFunctionFactory.cpp
@@ -1,7 +1,8 @@
-#include <Common/Exception.h>
+#include <TableFunctions/TableFunctionFactory.h>
+
 #include <Interpreters/Context.h>
 
-#include <TableFunctions/TableFunctionFactory.h>
+#include <Common/Exception.h>
 
 
 namespace DB
@@ -11,9 +12,17 @@ namespace ErrorCodes
 {
     extern const int READONLY;
     extern const int UNKNOWN_FUNCTION;
+    extern const int LOGICAL_ERROR;
 }
 
 
+void TableFunctionFactory::registerFunction(const std::string & name, Creator creator)
+{
+    if (!functions.emplace(name, std::move(creator)).second)
+        throw Exception("TableFunctionFactory: the table function name '" + name + "' is not unique",
+            ErrorCodes::LOGICAL_ERROR);
+}
+
 TableFunctionPtr TableFunctionFactory::get(
     const std::string & name,
     const Context & context) const
@@ -24,6 +33,7 @@ TableFunctionPtr TableFunctionFactory::get(
     auto it = functions.find(name);
     if (it == functions.end())
         throw Exception("Unknown table function " + name, ErrorCodes::UNKNOWN_FUNCTION);
+
     return it->second();
 }
 
diff --git a/dbms/src/TableFunctions/TableFunctionFactory.h b/dbms/src/TableFunctions/TableFunctionFactory.h
index fd58b9e625e..de782630386 100644
--- a/dbms/src/TableFunctions/TableFunctionFactory.h
+++ b/dbms/src/TableFunctions/TableFunctionFactory.h
@@ -1,46 +1,50 @@
 #pragma once
 
-#include <unordered_map>
-#include <ext/singleton.h>
-#include <Core/Types.h>
-#include <Common/Exception.h>
 #include <TableFunctions/ITableFunction.h>
 
+#include <ext/singleton.h>
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <unordered_map>
+
 
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
+class Context;
 
 
 /** Lets you get a table function by its name.
   */
-class TableFunctionFactory : public ext::singleton<TableFunctionFactory>
+class TableFunctionFactory final: public ext::singleton<TableFunctionFactory>
 {
-private:
-    /// No std::function, for smaller object size and less indirection.
-    using Creator = TableFunctionPtr(*)();
-    using TableFunctions = std::unordered_map<String, Creator>;
-
-    TableFunctions functions;
-
 public:
-    TableFunctionPtr get(
-        const String & name,
-        const Context & context) const;
+    using Creator = std::function<TableFunctionPtr()>;
 
-    /// Register a table function by its name.
+    /// Register a function by its name.
     /// No locking, you must register all functions before usage of get.
+    void registerFunction(const std::string & name, Creator creator);
+
     template <typename Function>
     void registerFunction()
     {
-        if (!functions.emplace(std::string(Function::name), []{ return TableFunctionPtr(std::make_unique<Function>()); }).second)
-            throw Exception("TableFunctionFactory: the table function name '" + String(Function::name) + "' is not unique",
-                ErrorCodes::LOGICAL_ERROR);
+        auto creator = [] () -> TableFunctionPtr {
+            return std::make_shared<Function>();
+        };
+        registerFunction(Function::name, std::move(creator));
     }
+
+    /// Throws an exception if not found.
+    TableFunctionPtr get(
+        const std::string & name,
+        const Context & context) const;
+
+private:
+    using TableFunctions = std::unordered_map<std::string, Creator>;
+
+    TableFunctions functions;
 };
 
 }

From 434a7d8f3823770368f951d8ac16dd48fe6dcfa5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 18 Aug 2017 21:45:02 +0300
Subject: [PATCH 232/281] Fixed function "substring" with out-of-bound negative
 offset argument [#CLICKHOUSE-2].

---
 dbms/src/Functions/GatherUtils.h | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/dbms/src/Functions/GatherUtils.h b/dbms/src/Functions/GatherUtils.h
index d0f90733c95..48d8a3112c5 100644
--- a/dbms/src/Functions/GatherUtils.h
+++ b/dbms/src/Functions/GatherUtils.h
@@ -107,16 +107,16 @@ struct NumericArraySource
     Slice getSliceFromRight(size_t offset) const
     {
         size_t elem_size = offsets[row_num] - prev_offset;
-        if (offset >= elem_size)
-            return {&elements[prev_offset], 0};
+        if (offset > elem_size)
+            return {&elements[prev_offset], elem_size};
         return {&elements[offsets[row_num] - offset], offset};
     }
 
     Slice getSliceFromRight(size_t offset, size_t length) const
     {
         size_t elem_size = offsets[row_num] - prev_offset;
-        if (offset >= elem_size)
-            return {&elements[prev_offset], 0};
+        if (offset > elem_size)
+            return {&elements[prev_offset], elem_size};
         return {&elements[offsets[row_num] - offset], std::min(length, offset)};
     }
 };
@@ -246,16 +246,16 @@ struct StringSource
     Slice getSliceFromRight(size_t offset) const
     {
         size_t elem_size = offsets[row_num] - prev_offset - 1;
-        if (offset >= elem_size)
-            return {&elements[prev_offset], 0};
+        if (offset > elem_size)
+            return {&elements[prev_offset], elem_size};
         return {&elements[prev_offset + elem_size - offset], offset};
     }
 
     Slice getSliceFromRight(size_t offset, size_t length) const
     {
         size_t elem_size = offsets[row_num] - prev_offset - 1;
-        if (offset >= elem_size)
-            return {&elements[prev_offset], 0};
+        if (offset > elem_size)
+            return {&elements[prev_offset], elem_size};
         return {&elements[prev_offset + elem_size - offset], std::min(length, offset)};
     }
 };
@@ -321,15 +321,15 @@ struct FixedStringSource
 
     Slice getSliceFromRight(size_t offset) const
     {
-        if (offset >= string_size)
-            return {pos, 0};
+        if (offset > string_size)
+            return {pos, string_size};
         return {pos + string_size - offset, offset};
     }
 
     Slice getSliceFromRight(size_t offset, size_t length) const
     {
-        if (offset >= string_size)
-            return {pos, 0};
+        if (offset > string_size)
+            return {pos, string_size};
         return {pos + string_size - offset, std::min(length, offset)};
     }
 };
@@ -560,16 +560,16 @@ struct GenericArraySource
     Slice getSliceFromRight(size_t offset) const
     {
         size_t elem_size = offsets[row_num] - prev_offset;
-        if (offset >= elem_size)
-            return {&elements, prev_offset, 0};
+        if (offset > elem_size)
+            return {&elements, prev_offset, elem_size};
         return {&elements, offsets[row_num] - offset, offset};
     }
 
     Slice getSliceFromRight(size_t offset, size_t length) const
     {
         size_t elem_size = offsets[row_num] - prev_offset;
-        if (offset >= elem_size)
-            return {&elements, prev_offset, 0};
+        if (offset > elem_size)
+            return {&elements, prev_offset, elem_size};
         return {&elements, offsets[row_num] - offset, std::min(length, offset)};
     }
 };

From 304ccf29b2c0fbb4773622bd67ab88dbcd975711 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 18 Aug 2017 22:02:50 +0300
Subject: [PATCH 233/281] Fixed error; added test [#CLICKHOUSE-2].

---
 dbms/src/Functions/FunctionsString.cpp        | 18 ++++++---
 .../00496_substring_negative_offset.reference | 40 +++++++++++++++++++
 .../00496_substring_negative_offset.sql       |  4 ++
 3 files changed, 57 insertions(+), 5 deletions(-)
 create mode 100644 dbms/tests/queries/0_stateless/00496_substring_negative_offset.reference
 create mode 100644 dbms/tests/queries/0_stateless/00496_substring_negative_offset.sql

diff --git a/dbms/src/Functions/FunctionsString.cpp b/dbms/src/Functions/FunctionsString.cpp
index 975f5dc6777..e1cdc8dc6a1 100644
--- a/dbms/src/Functions/FunctionsString.cpp
+++ b/dbms/src/Functions/FunctionsString.cpp
@@ -871,10 +871,18 @@ public:
                 throw Exception("Third argument provided for function substring could not be negative.", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
         }
 
-        if (const ColumnString * col = checkAndGetColumn<ColumnString>(&*column_string))
-            executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value, block, result, StringSource(*col));
-        else if (const ColumnFixedString * col = checkAndGetColumn<ColumnFixedString>(&*column_string))
-            executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value, block, result, FixedStringSource(*col));
+        if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
+            executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value,
+                             block, result, StringSource(*col));
+        else if (const ColumnFixedString * col = checkAndGetColumn<ColumnFixedString>(column_string.get()))
+            executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value,
+                             block, result, FixedStringSource(*col));
+        else if (const ColumnConst * col = checkAndGetColumnConst<ColumnString>(column_string.get()))
+            executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value,
+                             block, result, ConstSource<StringSource>(*col));
+        else if (const ColumnConst * col = checkAndGetColumnConst<ColumnFixedString>(column_string.get()))
+            executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value,
+                             block, result, ConstSource<FixedStringSource>(*col));
         else
             throw Exception(
                 "Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(),
@@ -945,7 +953,7 @@ public:
         if (start >= 0x8000000000000000ULL || length >= 0x8000000000000000ULL)
             throw Exception("Too large values of 2nd or 3rd argument provided for function substring.", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
 
-        if (const ColumnString * col = checkAndGetColumn<ColumnString>(&*column_string))
+        if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
         {
             std::shared_ptr<ColumnString> col_res = std::make_shared<ColumnString>();
             block.getByPosition(result).column = col_res;
diff --git a/dbms/tests/queries/0_stateless/00496_substring_negative_offset.reference b/dbms/tests/queries/0_stateless/00496_substring_negative_offset.reference
new file mode 100644
index 00000000000..b592f370dea
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00496_substring_negative_offset.reference
@@ -0,0 +1,40 @@
+abc
+abc
+abc
+bc
+c
+
+abc
+bc
+c
+
+abc
+abc
+abc
+bc
+c
+
+abc
+bc
+c
+
+abc
+abc
+abc
+bc
+c
+
+abc
+bc
+c
+
+abc
+abc
+abc
+bc
+c
+
+abc
+bc
+c
+
diff --git a/dbms/tests/queries/0_stateless/00496_substring_negative_offset.sql b/dbms/tests/queries/0_stateless/00496_substring_negative_offset.sql
new file mode 100644
index 00000000000..170af8f79b4
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00496_substring_negative_offset.sql
@@ -0,0 +1,4 @@
+SELECT substring('abc', number - 5) FROM system.numbers LIMIT 10;
+SELECT substring(materialize('abc'), number - 5) FROM system.numbers LIMIT 10;
+SELECT substring(toFixedString('abc', 3), number - 5) FROM system.numbers LIMIT 10;
+SELECT substring(materialize(toFixedString('abc', 3)), number - 5) FROM system.numbers LIMIT 10;

From d5b9f2127fe7733063bcb0c9b9f50f10111c8c7a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 18 Aug 2017 22:06:39 +0300
Subject: [PATCH 234/281] Added test result [#CLICKHOUSE-2].

---
 .../queries/0_stateless/00488_non_ascii_column_names.reference  | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 dbms/tests/queries/0_stateless/00488_non_ascii_column_names.reference

diff --git a/dbms/tests/queries/0_stateless/00488_non_ascii_column_names.reference b/dbms/tests/queries/0_stateless/00488_non_ascii_column_names.reference
new file mode 100644
index 00000000000..77bcc007aa3
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00488_non_ascii_column_names.reference
@@ -0,0 +1,2 @@
+hello
+hello	world

From 12a034478c55a021f6fc5bd7c52b9599e9597593 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 18 Aug 2017 22:30:39 +0300
Subject: [PATCH 235/281] Fixed bad translation [#CLICKHOUSE-2].

---
 dbms/src/DataStreams/RemoteBlockInputStream.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/DataStreams/RemoteBlockInputStream.h b/dbms/src/DataStreams/RemoteBlockInputStream.h
index e81c4b71db3..0117ac37703 100644
--- a/dbms/src/DataStreams/RemoteBlockInputStream.h
+++ b/dbms/src/DataStreams/RemoteBlockInputStream.h
@@ -109,7 +109,7 @@ private:
     Tables external_tables;
     QueryProcessingStage::Enum stage;
 
-    /// Threads for reading from temporary tables and following sending of data
+    /// Streams for reading from temporary tables and following sending of data
     /// to remote servers for GLOBAL-subqueries
     std::vector<ExternalTablesData> external_tables_data;
     std::mutex external_tables_mutex;

From 61f65e97a8f43dba2d5365d1d542710dbcf6bf4b Mon Sep 17 00:00:00 2001
From: Vadim Skipin <vskipin@yandex-team.ru>
Date: Fri, 18 Aug 2017 22:30:55 +0300
Subject: [PATCH 236/281] Fix typo

---
 .../evaluateConstantExpression.cpp             |  2 +-
 .../Interpreters/evaluateConstantExpression.h  |  2 +-
 dbms/src/Storages/StorageFactory.cpp           | 18 +++++++++---------
 dbms/src/TableFunctions/TableFunctionMerge.cpp |  2 +-
 .../TableFunctions/TableFunctionNumbers.cpp    |  2 +-
 .../src/TableFunctions/TableFunctionRemote.cpp |  4 ++--
 .../TableFunctionShardByHash.cpp               |  4 ++--
 7 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/dbms/src/Interpreters/evaluateConstantExpression.cpp b/dbms/src/Interpreters/evaluateConstantExpression.cpp
index 2839e01bb26..24ea8924c38 100644
--- a/dbms/src/Interpreters/evaluateConstantExpression.cpp
+++ b/dbms/src/Interpreters/evaluateConstantExpression.cpp
@@ -60,7 +60,7 @@ ASTPtr evaluateConstantExpressionAsLiteral(ASTPtr & node, const Context & contex
 }
 
 
-ASTPtr evaluateConstantExpressionOrIdentidierAsLiteral(ASTPtr & node, const Context & context)
+ASTPtr evaluateConstantExpressionOrIdentifierAsLiteral(ASTPtr & node, const Context & context)
 {
     if (const ASTIdentifier * id = typeid_cast<const ASTIdentifier *>(node.get()))
         return std::make_shared<ASTLiteral>(node->range, Field(id->name));
diff --git a/dbms/src/Interpreters/evaluateConstantExpression.h b/dbms/src/Interpreters/evaluateConstantExpression.h
index 52dbed9f83d..ed3befdf02b 100644
--- a/dbms/src/Interpreters/evaluateConstantExpression.h
+++ b/dbms/src/Interpreters/evaluateConstantExpression.h
@@ -29,6 +29,6 @@ std::shared_ptr<IAST> evaluateConstantExpressionAsLiteral(std::shared_ptr<IAST>
   * Also, if AST is identifier, then return string literal with its name.
   * Useful in places where some name may be specified as identifier, or as result of a constant expression.
   */
-std::shared_ptr<IAST> evaluateConstantExpressionOrIdentidierAsLiteral(std::shared_ptr<IAST> & node, const Context & context);
+std::shared_ptr<IAST> evaluateConstantExpressionOrIdentifierAsLiteral(std::shared_ptr<IAST> & node, const Context & context);
 
 }
diff --git a/dbms/src/Storages/StorageFactory.cpp b/dbms/src/Storages/StorageFactory.cpp
index e33e93605c7..adf212a9a59 100644
--- a/dbms/src/Storages/StorageFactory.cpp
+++ b/dbms/src/Storages/StorageFactory.cpp
@@ -321,7 +321,7 @@ StoragePtr StorageFactory::get(
         if (args.empty() || args.size() > 2)
             throw Exception(error_msg, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
-        args[0] = evaluateConstantExpressionOrIdentidierAsLiteral(args[0], local_context);
+        args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args[0], local_context);
         String format_name = static_cast<const ASTLiteral &>(*args[0]).value.safeGet<String>();
 
         int source_fd = -1;
@@ -352,7 +352,7 @@ StoragePtr StorageFactory::get(
                     source_fd = static_cast<int>(literal->value.get<UInt64>());
             }
 
-            args[1] = evaluateConstantExpressionOrIdentidierAsLiteral(args[1], local_context);
+            args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(args[1], local_context);
             source_path = static_cast<const ASTLiteral &>(*args[1]).value.safeGet<String>();
         }
 
@@ -457,7 +457,7 @@ StoragePtr StorageFactory::get(
                 " - name of source database and regexp for table names.",
                 ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
-        args[0] = evaluateConstantExpressionOrIdentidierAsLiteral(args[0], local_context);
+        args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args[0], local_context);
         args[1] = evaluateConstantExpressionAsLiteral(args[1], local_context);
 
         String source_database         = static_cast<const ASTLiteral &>(*args[0]).value.safeGet<String>();
@@ -497,8 +497,8 @@ StoragePtr StorageFactory::get(
 
         String cluster_name = getClusterName(*args[0]);
 
-        args[1] = evaluateConstantExpressionOrIdentidierAsLiteral(args[1], local_context);
-        args[2] = evaluateConstantExpressionOrIdentidierAsLiteral(args[2], local_context);
+        args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(args[1], local_context);
+        args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(args[2], local_context);
 
         String remote_database     = static_cast<const ASTLiteral &>(*args[1]).value.safeGet<String>();
         String remote_table     = static_cast<const ASTLiteral &>(*args[2]).value.safeGet<String>();
@@ -550,8 +550,8 @@ StoragePtr StorageFactory::get(
                 " destination_database, destination_table, num_buckets, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes.",
                 ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
-        args[0] = evaluateConstantExpressionOrIdentidierAsLiteral(args[0], local_context);
-        args[1] = evaluateConstantExpressionOrIdentidierAsLiteral(args[1], local_context);
+        args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args[0], local_context);
+        args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(args[1], local_context);
 
         String destination_database = static_cast<const ASTLiteral &>(*args[0]).value.safeGet<String>();
         String destination_table     = static_cast<const ASTLiteral &>(*args[1]).value.safeGet<String>();
@@ -598,8 +598,8 @@ StoragePtr StorageFactory::get(
             throw Exception(error_message_argument_number_mismatch,
                 ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
-        args[0] = evaluateConstantExpressionOrIdentidierAsLiteral(args[0], local_context);
-        args[1] = evaluateConstantExpressionOrIdentidierAsLiteral(args[1], local_context);
+        args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args[0], local_context);
+        args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(args[1], local_context);
 
         String destination_database = static_cast<const ASTLiteral &>(*args[0]).value.safeGet<String>();
         String destination_table    = static_cast<const ASTLiteral &>(*args[1]).value.safeGet<String>();
diff --git a/dbms/src/TableFunctions/TableFunctionMerge.cpp b/dbms/src/TableFunctions/TableFunctionMerge.cpp
index 2c50759fcde..d6e2c7d4dc7 100644
--- a/dbms/src/TableFunctions/TableFunctionMerge.cpp
+++ b/dbms/src/TableFunctions/TableFunctionMerge.cpp
@@ -71,7 +71,7 @@ StoragePtr TableFunctionMerge::execute(const ASTPtr & ast_function, const Contex
             " - name of source database and regexp for table names.",
             ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
-    args[0] = evaluateConstantExpressionOrIdentidierAsLiteral(args[0], context);
+    args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args[0], context);
     args[1] = evaluateConstantExpressionAsLiteral(args[1], context);
 
     String source_database = static_cast<const ASTLiteral &>(*args[0]).value.safeGet<String>();
diff --git a/dbms/src/TableFunctions/TableFunctionNumbers.cpp b/dbms/src/TableFunctions/TableFunctionNumbers.cpp
index 8e2187ec675..ee141f68549 100644
--- a/dbms/src/TableFunctions/TableFunctionNumbers.cpp
+++ b/dbms/src/TableFunctions/TableFunctionNumbers.cpp
@@ -32,7 +32,7 @@ StoragePtr TableFunctionNumbers::execute(const ASTPtr & ast_function, const Cont
         throw Exception("Table function 'numbers' requires exactly one argument: amount of numbers.",
             ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
-    args[0] = evaluateConstantExpressionOrIdentidierAsLiteral(args[0], context);
+    args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args[0], context);
 
     UInt64 limit = static_cast<const ASTLiteral &>(*args[0]).value.safeGet<UInt64>();
 
diff --git a/dbms/src/TableFunctions/TableFunctionRemote.cpp b/dbms/src/TableFunctions/TableFunctionRemote.cpp
index 9700ceb6fc2..d1240c93832 100644
--- a/dbms/src/TableFunctions/TableFunctionRemote.cpp
+++ b/dbms/src/TableFunctions/TableFunctionRemote.cpp
@@ -219,7 +219,7 @@ StoragePtr TableFunctionRemote::execute(const ASTPtr & ast_function, const Conte
     description = getStringLiteral(*args[arg_num], "Hosts pattern");
     ++arg_num;
 
-    args[arg_num] = evaluateConstantExpressionOrIdentidierAsLiteral(args[arg_num], context);
+    args[arg_num] = evaluateConstantExpressionOrIdentifierAsLiteral(args[arg_num], context);
     remote_database = static_cast<const ASTLiteral &>(*args[arg_num]).value.safeGet<String>();
     ++arg_num;
 
@@ -235,7 +235,7 @@ StoragePtr TableFunctionRemote::execute(const ASTPtr & ast_function, const Conte
         if (arg_num >= args.size())
             throw Exception(err, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
-        args[arg_num] = evaluateConstantExpressionOrIdentidierAsLiteral(args[arg_num], context);
+        args[arg_num] = evaluateConstantExpressionOrIdentifierAsLiteral(args[arg_num], context);
         remote_table = static_cast<const ASTLiteral &>(*args[arg_num]).value.safeGet<String>();
         ++arg_num;
     }
diff --git a/dbms/src/TableFunctions/TableFunctionShardByHash.cpp b/dbms/src/TableFunctions/TableFunctionShardByHash.cpp
index 583990f4cd8..edcda344fe6 100644
--- a/dbms/src/TableFunctions/TableFunctionShardByHash.cpp
+++ b/dbms/src/TableFunctions/TableFunctionShardByHash.cpp
@@ -57,8 +57,8 @@ StoragePtr TableFunctionShardByHash::execute(const ASTPtr & ast_function, const
     cluster_name = getClusterName(*args[0]);
     key = getStringLiteral(*args[1], "Key to hash");
 
-    args[2] = evaluateConstantExpressionOrIdentidierAsLiteral(args[2], context);
-    args[3] = evaluateConstantExpressionOrIdentidierAsLiteral(args[3], context);
+    args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(args[2], context);
+    args[3] = evaluateConstantExpressionOrIdentifierAsLiteral(args[3], context);
 
     remote_database = static_cast<const ASTLiteral &>(*args[2]).value.safeGet<String>();
     remote_table = static_cast<const ASTLiteral &>(*args[3]).value.safeGet<String>();

From e52428ddb2d47fd2caa26aa38452b955ff3d0a8f Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Fri, 18 Aug 2017 23:56:19 +0300
Subject: [PATCH 237/281] Fixed SIGSEGV in CREATE TABLE AS <MV>. MV without
 ENGINE is forbidden! [#CLICKHOUSE-2]

Resolves #1112
Resolves #598
---
 .../Interpreters/InterpreterCreateQuery.cpp   | 16 ++++++++--
 dbms/src/Parsers/ParserCreateQuery.cpp        |  8 ++---
 dbms/src/Storages/StorageMaterializedView.cpp | 30 ++++++++-----------
 ...insert_without_explicit_database.reference |  4 +++
 ...s_and_insert_without_explicit_database.sql | 25 ++++++++++++++++
 5 files changed, 59 insertions(+), 24 deletions(-)

diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp
index b17ba8dd66f..80536a76a15 100644
--- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp
@@ -445,8 +445,20 @@ String InterpreterCreateQuery::setEngine(
         String as_database_name = create.as_database.empty() ? context.getCurrentDatabase() : create.as_database;
         String as_table_name = create.as_table;
 
-        storage_name = as_storage->getName();
-        create.storage = typeid_cast<const ASTCreateQuery &>(*context.getCreateQuery(as_database_name, as_table_name)).storage;
+        auto as_create_ptr = context.getCreateQuery(as_database_name, as_table_name);
+        auto & as_create = typeid_cast<const ASTCreateQuery &>(*as_create_ptr);
+
+        if (!create.storage)
+        {
+            if (as_create.is_view || as_create.is_materialized_view)
+                create.storage = as_create.inner_storage;
+            else
+                create.storage = as_create.storage;
+
+            storage_name = typeid_cast<const ASTFunction &>(*create.storage).name;
+        }
+        else
+            storage_name = as_storage->getName();
     }
     else if (create.is_temporary)
         set_engine("Memory");
diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp
index f4f939ff01e..25037bd67b9 100644
--- a/dbms/src/Parsers/ParserCreateQuery.cpp
+++ b/dbms/src/Parsers/ParserCreateQuery.cpp
@@ -323,12 +323,10 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
         /// AS SELECT ...
         if (!s_as.ignore(pos, expected))
             return false;
-        Pos before_select = pos;
-        if (!s_select.ignore(pos, expected))
-            return false;
-        pos = before_select;
+
         ParserSelectQuery select_p;
-        select_p.parse(pos, select, expected);
+        if (!select_p.parse(pos, select, expected))
+            return false;
     }
 
     auto query = std::make_shared<ASTCreateQuery>(StringRange(begin, pos));
diff --git a/dbms/src/Storages/StorageMaterializedView.cpp b/dbms/src/Storages/StorageMaterializedView.cpp
index ba861f04a06..08429ed5e5a 100644
--- a/dbms/src/Storages/StorageMaterializedView.cpp
+++ b/dbms/src/Storages/StorageMaterializedView.cpp
@@ -19,6 +19,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
+    extern const int INCORRECT_QUERY;
 }
 
 
@@ -29,7 +30,7 @@ static void extractDependentTable(const ASTSelectQuery & query, String & select_
     if (!query_table)
         return;
 
-    if (const ASTIdentifier * ast_id = typeid_cast<const ASTIdentifier *>(query_table.get()))
+    if (auto ast_id = typeid_cast<const ASTIdentifier *>(query_table.get()))
     {
         auto query_database = query.database();
 
@@ -41,7 +42,7 @@ static void extractDependentTable(const ASTSelectQuery & query, String & select_
         select_database_name = typeid_cast<const ASTIdentifier &>(*query_database).name;
         select_table_name = ast_id->name;
     }
-    else if (const ASTSelectQuery * ast_select = typeid_cast<const ASTSelectQuery *>(query_table.get()))
+    else if (auto ast_select = typeid_cast<const ASTSelectQuery *>(query_table.get()))
     {
         extractDependentTable(*ast_select, select_database_name, select_table_name);
     }
@@ -66,13 +67,18 @@ StorageMaterializedView::StorageMaterializedView(
     database_name(database_name_), context(context_), columns(columns_)
 {
     ASTCreateQuery & create = typeid_cast<ASTCreateQuery &>(*query_);
+
+    if (!create.select)
+        throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY);
+
+    if (!create.inner_storage)
+        throw Exception("ENGINE of MaterializedView should be specified explicitly", ErrorCodes::INCORRECT_QUERY);
+
     ASTSelectQuery & select = typeid_cast<ASTSelectQuery &>(*create.select);
 
     /// If the internal query does not specify a database, retrieve it from the context and write it to the query.
     select.setDatabaseIfNeeded(database_name);
 
-    inner_query = create.select;
-
     extractDependentTable(select, select_database_name, select_table_name);
 
     if (!select_table_name.empty())
@@ -80,7 +86,8 @@ StorageMaterializedView::StorageMaterializedView(
             DatabaseAndTableName(select_database_name, select_table_name),
             DatabaseAndTableName(database_name, table_name));
 
-    auto inner_table_name = getInnerTableName();
+    String inner_table_name = getInnerTableName();
+    inner_query = create.select;
 
     /// If there is an ATTACH request, then the internal table must already be connected.
     if (!attach_)
@@ -91,18 +98,7 @@ StorageMaterializedView::StorageMaterializedView(
         manual_create_query->table = inner_table_name;
         manual_create_query->columns = create.columns;
         manual_create_query->children.push_back(manual_create_query->columns);
-
-        /// If you do not specify a storage type in the query, try retrieving it from SELECT query.
-        if (!create.inner_storage)
-        {
-            /// TODO also try to extract `params` to create a table
-            auto func = std::make_shared<ASTFunction>();
-            func->name = context.getTable(select_database_name, select_table_name)->getName();
-            manual_create_query->storage = func;
-        }
-        else
-            manual_create_query->storage = create.inner_storage;
-
+        manual_create_query->storage = create.inner_storage;
         manual_create_query->children.push_back(manual_create_query->storage);
 
         /// Execute the query.
diff --git a/dbms/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.reference b/dbms/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.reference
index 8fb767d89d5..37514bc429b 100644
--- a/dbms/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.reference
+++ b/dbms/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.reference
@@ -1,2 +1,6 @@
 2014-01-02	0	0	0000-00-00 00:00:00	2014-01-02 03:04:06
 1	2014-01-02 03:04:06
+0
+0
+0
+0
diff --git a/dbms/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql b/dbms/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql
index a3f475d2f25..47a7ef07503 100644
--- a/dbms/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql
+++ b/dbms/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql
@@ -11,3 +11,28 @@ SELECT * FROM default.test_view;
 
 DROP TABLE default.test_table;
 DROP TABLE default.test_view;
+
+-- Check only sophisticated constructors and desctructors:
+
+USE test;
+DROP TABLE IF EXISTS tmp;
+DROP TABLE IF EXISTS tmp_mv;
+DROP TABLE IF EXISTS tmp_mv2;
+DROP TABLE IF EXISTS  `.inner.tmp_mv`;
+DROP TABLE IF EXISTS  `.inner.tmp_mv2`;
+
+CREATE TABLE tmp (date Date, name String) ENGINE = Memory;
+CREATE MATERIALIZED VIEW tmp_mv ENGINE = AggregatingMergeTree(date, (date, name), 8192) AS SELECT date, name, countState() AS cc FROM tmp GROUP BY date, name;
+CREATE TABLE tmp_mv2 AS tmp_mv;
+CREATE TABLE tmp_mv3 AS tmp_mv ENGINE = Memory;
+CREATE MATERIALIZED VIEW tmp_mv4 ENGINE = AggregatingMergeTree(date, date, 8192) POPULATE AS SELECT DISTINCT * FROM tmp_mv;
+
+DROP TABLE tmp_mv;
+DROP TABLE tmp_mv2;
+DROP TABLE tmp_mv3;
+DROP TABLE tmp_mv4;
+
+EXISTS TABLE `.inner.tmp_mv`;
+EXISTS TABLE `.inner.tmp_mv2`;
+EXISTS TABLE `.inner.tmp_mv3`;
+EXISTS TABLE `.inner.tmp_mv4`;

From 1d39b94992c65c7e678dd6e8045c302b5ac0c3ac Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Sat, 19 Aug 2017 00:10:35 +0300
Subject: [PATCH 238/281] Update
 00101_materialized_views_and_insert_without_explicit_database.sql

---
 ...aterialized_views_and_insert_without_explicit_database.sql | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql b/dbms/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql
index 47a7ef07503..772ac54d830 100644
--- a/dbms/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql
+++ b/dbms/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql
@@ -18,8 +18,8 @@ USE test;
 DROP TABLE IF EXISTS tmp;
 DROP TABLE IF EXISTS tmp_mv;
 DROP TABLE IF EXISTS tmp_mv2;
-DROP TABLE IF EXISTS  `.inner.tmp_mv`;
-DROP TABLE IF EXISTS  `.inner.tmp_mv2`;
+DROP TABLE IF EXISTS `.inner.tmp_mv`;
+DROP TABLE IF EXISTS `.inner.tmp_mv2`;
 
 CREATE TABLE tmp (date Date, name String) ENGINE = Memory;
 CREATE MATERIALIZED VIEW tmp_mv ENGINE = AggregatingMergeTree(date, (date, name), 8192) AS SELECT date, name, countState() AS cc FROM tmp GROUP BY date, name;

From 47c8d89ba0002032a5972d9ba2c4c52c5f4bbcd5 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Sat, 19 Aug 2017 01:51:36 +0300
Subject: [PATCH 239/281] Update CHANGELOG_RU.md

---
 CHANGELOG_RU.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md
index 0f08a40a4ba..e2100c0f9bc 100644
--- a/CHANGELOG_RU.md
+++ b/CHANGELOG_RU.md
@@ -15,7 +15,7 @@
 * Улучшены сообщения об ошибках в случае синтаксически неверных запросов
 * Значительно уменьшен расход оперативной памяти и улучшена производительность слияний больших MergeTree-кусков данных
 * Значительно увеличена производительность слияний данных для движка ReplacingMergeTree
-* Улучшена производительность асинхронных вставок из Distributed таблицы за счет объединения нескольких исходных вставок. Функционал включается настройкой distributed_directory_monitor_batch_inserts=1.
+* Улучшена производительность асинхронных вставок из Distributed таблицы за счет объединения нескольких исходных вставок. Функциональность включается настройкой distributed_directory_monitor_batch_inserts=1.
 
 ## Обратно несовместимые изменения:
 * Изменился бинарный формат агрегатных состояний функции `groupArray(array_column)` для массивов

From 8ee56421aba03ee71c99b26ea4d98e1a65713e5e Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Sat, 19 Aug 2017 01:52:35 +0300
Subject: [PATCH 240/281] Update CHANGELOG_RU.md

---
 CHANGELOG_RU.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md
index e2100c0f9bc..772263efa25 100644
--- a/CHANGELOG_RU.md
+++ b/CHANGELOG_RU.md
@@ -21,7 +21,7 @@
 * Изменился бинарный формат агрегатных состояний функции `groupArray(array_column)` для массивов
 
 ## Полный список изменений:
-* Добавлен вывод nan и inf значений в формате JSON
+* Добавлена настройка `output_format_json_quote_denormals`, включающая вывод nan и inf значений в формате JSON
 * Более оптимальное выделение потоков при чтении из Distributed таблиц
 * Разрешено задавать настройки в режиме readonly, если их значение не изменяется
 * Добавлена возможность считывать нецелые гранулы движка MergeTree для выполнения ограничений на размер блока, задаваемый настройкой preferred_block_size_bytes - для уменьшения потребления оперативной памяти и увеличения кэш-локальности при обработке запросов из таблиц со столбцами большого размера

From e25f232b5ac2599e8e310afb1942a65a393fa1f0 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 19 Aug 2017 01:56:19 +0300
Subject: [PATCH 241/281] Raising limit on max size for merge (step 1 of 4)
 [#CLICKHOUSE-2]

---
 dbms/src/Storages/MergeTree/MergeTreeSettings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.h b/dbms/src/Storages/MergeTree/MergeTreeSettings.h
index b922f75870b..5f89b888492 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.h
@@ -22,7 +22,7 @@ struct MergeTreeSettings
     /** Merge settings. */
 
     /// Maximum in total size of parts to merge, when there are maximum (minimum) free threads in background pool (or entries in replication queue).
-    size_t max_bytes_to_merge_at_max_space_in_pool = 100ULL * 1024 * 1024 * 1024;
+    size_t max_bytes_to_merge_at_max_space_in_pool = 150ULL * 1024 * 1024 * 1024;
     size_t max_bytes_to_merge_at_min_space_in_pool = 1024 * 1024;
 
     /// How many tasks of merging parts are allowed simultaneously in ReplicatedMergeTree queue.

From 1852ec843cac18f0028f4ff22392968439e71255 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Sat, 19 Aug 2017 01:57:38 +0300
Subject: [PATCH 242/281] Update CHANGELOG_RU.md

---
 CHANGELOG_RU.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md
index 772263efa25..8a66cc18b55 100644
--- a/CHANGELOG_RU.md
+++ b/CHANGELOG_RU.md
@@ -56,7 +56,7 @@
 * Исправлено падение запроса `CREATE VIEW IF EXISTS`
 * Исправлено некорректное поведение при input_format_skip_unknown_fields=1 в случае отрицательных чисел
 * Исправлен бесконечный цикл в функции `dictGetHierarchy()` в случае некоторых некорректных данных словаря
-Исправлены ошибки типа `Syntax error: unexpected (...)` при выполнении распределенных запросов с подзапросами в секции IN или JOIN, в случае * использования совместно с Merge таблицами
+* Исправлены ошибки типа `Syntax error: unexpected (...)` при выполнении распределенных запросов с подзапросами в секции IN или JOIN, в случае * использования совместно с Merge таблицами
 * Исправлена неправильная интерпретация SELECT запроса из таблиц типа Dictionary
 * Исправлена ошибка "Cannot mremap" при использовании множеств в секциях IN, JOIN, содержащих более 2 млрд. элементов
 * Исправлен failover для словарей с источником MySQL

From 1307fd4e0566c48ddd9566ce8b444fc4624c07ff Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Sat, 19 Aug 2017 01:58:05 +0300
Subject: [PATCH 243/281] Update CHANGELOG_RU.md

---
 CHANGELOG_RU.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md
index 8a66cc18b55..5bdd899f8db 100644
--- a/CHANGELOG_RU.md
+++ b/CHANGELOG_RU.md
@@ -56,7 +56,7 @@
 * Исправлено падение запроса `CREATE VIEW IF EXISTS`
 * Исправлено некорректное поведение при input_format_skip_unknown_fields=1 в случае отрицательных чисел
 * Исправлен бесконечный цикл в функции `dictGetHierarchy()` в случае некоторых некорректных данных словаря
-* Исправлены ошибки типа `Syntax error: unexpected (...)` при выполнении распределенных запросов с подзапросами в секции IN или JOIN, в случае * использования совместно с Merge таблицами
+* Исправлены ошибки типа `Syntax error: unexpected (...)` при выполнении распределенных запросов с подзапросами в секции IN или JOIN, в случае использования совместно с Merge таблицами
 * Исправлена неправильная интерпретация SELECT запроса из таблиц типа Dictionary
 * Исправлена ошибка "Cannot mremap" при использовании множеств в секциях IN, JOIN, содержащих более 2 млрд. элементов
 * Исправлен failover для словарей с источником MySQL

From ce3e4676fab55fab0412ed9bd19c4b98c031aa6b Mon Sep 17 00:00:00 2001
From: Alexey Zatelepin <ztlpn@yandex-team.ru>
Date: Sat, 19 Aug 2017 21:35:01 +0300
Subject: [PATCH 244/281] remove unnecessary calls to find() [#CLICKHOUSE-2171]

---
 dbms/src/DataStreams/DistinctBlockInputStream.cpp | 2 +-
 dbms/src/Interpreters/Set.cpp                     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/src/DataStreams/DistinctBlockInputStream.cpp b/dbms/src/DataStreams/DistinctBlockInputStream.cpp
index 21fd85c4098..f27c1ee0f62 100644
--- a/dbms/src/DataStreams/DistinctBlockInputStream.cpp
+++ b/dbms/src/DataStreams/DistinctBlockInputStream.cpp
@@ -115,7 +115,7 @@ void DistinctBlockInputStream::buildFilter(
         /// Make a key.
         typename Method::Key key = state.getKey(columns, columns.size(), i, key_sizes);
 
-        typename Method::Data::iterator it = method.data.find(key);
+        typename Method::Data::iterator it;
         bool inserted;
         method.data.emplace(key, it, inserted);
 
diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp
index 5662886648a..4939e245f71 100644
--- a/dbms/src/Interpreters/Set.cpp
+++ b/dbms/src/Interpreters/Set.cpp
@@ -83,7 +83,7 @@ void NO_INLINE Set::insertFromBlockImplCase(
         /// Obtain a key to insert to the set
         typename Method::Key key = state.getKey(key_columns, keys_size, i, key_sizes);
 
-        typename Method::Data::iterator it = method.data.find(key);
+        typename Method::Data::iterator it;
         bool inserted;
         method.data.emplace(key, it, inserted);
 

From 7dc4d6f7b368e69d06ba9361145371f8c6a8bc4b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 20 Aug 2017 04:05:59 +0300
Subject: [PATCH 245/281] Miscellaneous [#CLICKHOUSE-2].

---
 dbms/src/Common/HashTable/HashTable.h | 135 ++++++++++++++------------
 1 file changed, 73 insertions(+), 62 deletions(-)

diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h
index 5480cedcba5..82e3a647cb2 100644
--- a/dbms/src/Common/HashTable/HashTable.h
+++ b/dbms/src/Common/HashTable/HashTable.h
@@ -293,6 +293,30 @@ protected:
         return place_value;
     }
 
+
+    template <size_t N>
+    size_t ALWAYS_INLINE findCellUnrolled(const Key & x, size_t hash_value, size_t place_value) const
+    {
+        while (true)
+        {
+            size_t places[N];
+            for (size_t i = 0; i < N; ++i)
+            {
+                places[i] = place_value;
+                place_value = grower.next(place_value);
+            }
+
+            for (size_t i = 0; i < N; ++i)
+            {
+                const Cell & cell = buf[places[i]];
+
+                if (cell.isZero(*this) || cell.keyEquals(x, hash_value))
+                    return places[i];
+            }
+        }
+    }
+
+
     /// Find an empty cell, starting with the specified position and further along the collision resolution chain.
     size_t ALWAYS_INLINE findEmptyCell(const Key & x, size_t hash_value, size_t place_value) const
     {
@@ -422,6 +446,50 @@ protected:
     }
 
 
+    template <typename Derived, bool is_const>
+    class iterator_base
+    {
+        using Container = typename std::conditional<is_const, const Self, Self>::type;
+        using cell_type = typename std::conditional<is_const, const Cell, Cell>::type;
+
+        Container * container;
+        cell_type * ptr;
+
+        friend class HashTable;
+
+    public:
+        iterator_base() {}
+        iterator_base(Container * container_, cell_type * ptr_) : container(container_), ptr(ptr_) {}
+
+        bool operator== (const iterator_base & rhs) const { return ptr == rhs.ptr; }
+        bool operator!= (const iterator_base & rhs) const { return ptr != rhs.ptr; }
+
+        Derived & operator++()
+        {
+            if (unlikely(ptr->isZero(*container)))
+                ptr = container->buf;
+            else
+                ++ptr;
+
+            while (ptr < container->buf + container->grower.bufSize() && ptr->isZero(*container))
+                ++ptr;
+
+            return static_cast<Derived &>(*this);
+        }
+
+        auto & operator* () const { return ptr->getValue(); }
+        auto operator->() const { return &ptr->getValue(); }
+
+        auto getPtr() const { return ptr; }
+        size_t getHash() const { return ptr->getHash(*container); }
+
+        size_t getCollisionChainLength() const
+        {
+            return container->grower.place((ptr - container->buf) - container->grower.place(getHash()));
+        }
+    };
+
+
 public:
     using key_type = Key;
     using value_type = typename Cell::value_type;
@@ -499,74 +567,17 @@ public:
         bool is_initialized = false;
     };
 
-    class iterator
+
+    class iterator : public iterator_base<iterator, false>
     {
-        Self * container;
-        Cell * ptr;
-
-        friend class HashTable;
-
     public:
-        iterator() {}
-        iterator(Self * container_, Cell * ptr_) : container(container_), ptr(ptr_) {}
-
-        bool operator== (const iterator & rhs) const { return ptr == rhs.ptr; }
-        bool operator!= (const iterator & rhs) const { return ptr != rhs.ptr; }
-
-        iterator & operator++()
-        {
-            if (unlikely(ptr->isZero(*container)))
-                ptr = container->buf;
-            else
-                ++ptr;
-
-            while (ptr < container->buf + container->grower.bufSize() && ptr->isZero(*container))
-                ++ptr;
-
-            return *this;
-        }
-
-        value_type & operator* () const { return ptr->getValue(); }
-        value_type * operator->() const { return &ptr->getValue(); }
-
-        Cell * getPtr() const { return ptr; }
-        size_t getHash() const { return ptr->getHash(*container); }
+        using iterator_base<iterator, false>::iterator_base;
     };
 
-
-    class const_iterator
+    class const_iterator : public iterator_base<const_iterator, true>
     {
-        const Self * container;
-        const Cell * ptr;
-
-        friend class HashTable;
-
     public:
-        const_iterator() {}
-        const_iterator(const Self * container_, const Cell * ptr_) : container(container_), ptr(ptr_) {}
-        const_iterator(const iterator & rhs) : container(rhs.container), ptr(rhs.ptr) {}
-
-        bool operator== (const const_iterator & rhs) const { return ptr == rhs.ptr; }
-        bool operator!= (const const_iterator & rhs) const { return ptr != rhs.ptr; }
-
-        const_iterator & operator++()
-        {
-            if (unlikely(ptr->isZero(*container)))
-                ptr = container->buf;
-            else
-                ++ptr;
-
-            while (ptr < container->buf + container->grower.bufSize() && ptr->isZero(*container))
-                ++ptr;
-
-            return *this;
-        }
-
-        const value_type & operator* () const { return ptr->getValue(); }
-        const value_type * operator->() const { return &ptr->getValue(); }
-
-        const Cell * getPtr() const { return ptr; }
-        size_t getHash() const { return ptr->getHash(*container); }
+        using iterator_base<const_iterator, true>::iterator_base;
     };
 
 

From 2fb7a34f0da2fa88992033a6cc8336fc55847991 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 20 Aug 2017 04:07:11 +0300
Subject: [PATCH 246/281] Miscellaneous [#CLICKHOUSE-2].

---
 dbms/src/Common/HashTable/HashTable.h | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h
index 82e3a647cb2..52b1f0fab89 100644
--- a/dbms/src/Common/HashTable/HashTable.h
+++ b/dbms/src/Common/HashTable/HashTable.h
@@ -294,29 +294,6 @@ protected:
     }
 
 
-    template <size_t N>
-    size_t ALWAYS_INLINE findCellUnrolled(const Key & x, size_t hash_value, size_t place_value) const
-    {
-        while (true)
-        {
-            size_t places[N];
-            for (size_t i = 0; i < N; ++i)
-            {
-                places[i] = place_value;
-                place_value = grower.next(place_value);
-            }
-
-            for (size_t i = 0; i < N; ++i)
-            {
-                const Cell & cell = buf[places[i]];
-
-                if (cell.isZero(*this) || cell.keyEquals(x, hash_value))
-                    return places[i];
-            }
-        }
-    }
-
-
     /// Find an empty cell, starting with the specified position and further along the collision resolution chain.
     size_t ALWAYS_INLINE findEmptyCell(const Key & x, size_t hash_value, size_t place_value) const
     {

From fc409f61e53646d2e88a57287496b67de546bca2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 20 Aug 2017 16:12:14 +0300
Subject: [PATCH 247/281] Miscellaneous [#CLICKHOUSE-2].

---
 dbms/src/Common/HashTable/HashTable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h
index 52b1f0fab89..d62043fbce2 100644
--- a/dbms/src/Common/HashTable/HashTable.h
+++ b/dbms/src/Common/HashTable/HashTable.h
@@ -455,7 +455,7 @@ protected:
         }
 
         auto & operator* () const { return ptr->getValue(); }
-        auto operator->() const { return &ptr->getValue(); }
+        auto * operator->() const { return &ptr->getValue(); }
 
         auto getPtr() const { return ptr; }
         size_t getHash() const { return ptr->getHash(*container); }

From 4c0c1ae602f75b046c77d84cd3f431218c245441 Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Mon, 21 Aug 2017 16:24:29 +0300
Subject: [PATCH 248/281] Add a changelog. [#CLICKHOUSE-3]

---
 CHANGELOG.md | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 112 insertions(+)
 create mode 100644 CHANGELOG.md

diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 00000000000..912718808a6
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,112 @@
+# ClickHouse release 1.1.54276
+
+## New features:
+
+* You can use an optional WITH clause in a SELECT query. Example query: `WITH 1+1 AS a SELECT a, a*a`
+* INSERT can be performed synchronously in a Distributed table: OK is returned only after all the data is saved on all the shards. This is activated by the setting insert_distributed_sync=1.
+* Added the UUID data type for working with 16-byte identifiers.
+* Added aliases of CHAR, FLOAT and other types for compatibility with the Tableau.
+* Added the functions toYYYYMM, toYYYYMMDD, and toYYYYMMDDhhmmss for converting time into numbers.
+* You can use IP addresses (together with the hostname) to identify servers for clustered DDL queries.
+* Added support for non-constant arguments and negative offsets in the function `substring(str, pos, len).`
+* Added the max_size parameter for the `groupArray(max_size)(column)` aggregate function, and optimized its performance.
+
+## Major changes:
+
+* Improved security: all server files are created with 0640 permissions.
+* Improved error messages for queries with invalid syntax.
+* Significantly reduced memory consumption and improved performance when merging large sections of MergeTree data.
+* Significantly increased the performance of data merges for the ReplacingMergeTree engine.
+* Improved performance for asynchronous inserts from a Distributed table by batching multiple source inserts. To enable this functionality, use the setting distributed_directory_monitor_batch_inserts=1.
+
+## Backward incompatible changes:
+
+* Changed the binary format of aggregate states of `groupArray(array_column)` functions for arrays.
+
+## Complete list of changes:
+
+* Added the `output_format_json_quote_denormals` setting, which enables outputting nan and inf values in JSON format.
+* Optimized thread allocation when reading from a Distributed table.
+* Settings can be modified in readonly mode if the value doesn't change.
+* Added the ability to read fractional granules of the MergeTree engine in order to meet restrictions on the block size specified in the preferred_block_size_bytes setting. The purpose is to reduce the consumption of RAM and increase cache locality when processing queries from tables with large columns.
+* Efficient use of indexes that contain expressions like `toStartOfHour(x)` for conditions like `toStartOfHour(x) op сonstexpr.`
+* Added new settings for MergeTree engines (the merge_tree section in config.xml):
+   - replicated_deduplication_window_seconds sets the size of deduplication window in seconds for Replicated tables.
+   - cleanup_delay_period sets how often to start cleanup to remove outdated data.
+   - replicated_can_become_leader can prevent a replica from becoming the leader (and assigning merges).
+* Accelerated cleanup to remove outdated data from ZooKeeper.
+* Multiple improvements and fixes for clustered DDL queries. Of particular interest is the new setting distributed_ddl_task_timeout, which limits the time to wait for a response from the servers in the cluster.
+* Improved display of stack traces in the server logs.
+* Added the "none" value for the compression method.
+* You can use multiple dictionaries_config sections in config.xml.
+* It is possible to connect to MySQL through a socket in the file system.
+* The `system.parts` table has a new column with information about the size of marks, in bytes.
+
+## Bug fixes:
+
+* Distributed tables using a Merge table now work correctly for a SELECT query with a condition on the  _table field.
+* Fixed a rare race condition in ReplicatedMergeTree when checking data parts.
+* Fixed possible freezing on "leader election" when starting a server.
+* The max_replica_delay_for_distributed_queries setting was ignored when using a local replica of the data source. This has been fixed.
+* Fixed incorrect behavior of `ALTER TABLE CLEAR COLUMN IN PARTITION` when attempting to clean a non-existing column.
+* Fixed an exception in the multiIf function when using empty arrays or strings.
+* Fixed excessive memory allocations when deserializing Native format.
+* Fixed incorrect auto-update of Trie dictionaries.
+* Fixed an exception when running queries with a GROUP BY clause from a Merge table when using SAMPLE.
+* Fixed a crash of GROUP BY when using distributed_aggregation_memory_efficient=1.
+* Now you can specify the database.table in the right side of IN and JOIN.
+* Too many threads were used for parallel aggregation. This has been fixed.
+* Fixed how the "if" function works with FixedString arguments.
+* SELECT worked incorrectly from a Distributed table for shards with a weight of 0. This has been fixed.
+* Crashes no longer occur when running `CREATE VIEW IF EXISTS.`
+* Fixed incorrect behavior when input_format_skip_unknown_fields=1 is set and there are negative numbers.
+* Fixed an infinite loop in the `dictGetHierarchy()` function if there is some invalid data in the dictionary.
+* Fixed `Syntax error: unexpected (...)` errors  when running distributed queries with subqueries in an IN or JOIN clause and Merge tables.
+* Fixed the incorrect interpretation of a SELECT query from Dictionary tables.
+* Fixed the "Cannot mremap" error when using arrays in IN and JOIN clauses with more than 2 billion elements.
+* Fixed the failover for dictionaries with MySQL as the source.
+
+## Improved workflow for developing and assembling ClickHouse:
+
+* Builds can be assembled in Arcadia.
+* You can use gcc 7 to compile ClickHouse.
+* Parallel builds using ccache+distcc are faster now.
+
+# ClickHouse release 1.1.54245
+
+## New features:
+
+* Distributed DDL (for example, `CREATE TABLE ON CLUSTER`).
+* The replicated request `ALTER TABLE CLEAR COLUMN IN PARTITION.`
+* The engine for Dictionary tables (access to dictionary data in the form of a table).
+* Dictionary database engine (this type of database automatically has Dictionary tables available for all the connected external dictionaries).
+* You can check for updates to the dictionary by sending a request to the source.
+* Qualified column names
+* Quoting identifiers using double quotation marks.
+* Sessions in the HTTP interface.
+* The OPTIMIZE query for a Replicated table can can run not only on the leader.
+
+## Backward incompatible changes:
+
+* Removed SET GLOBAL.
+
+## Minor changes:
+
+* If an alert is triggered, the full stack trace is printed into the log.
+* Relaxed the verification of the number of damaged or extra data parts at startup (there were too many false positives).
+
+## Bug fixes:
+
+* Fixed a bad connection "sticking" when inserting into a Distributed table.
+* GLOBAL IN now works for a query from a Merge table that looks at a Distributed table.
+* The incorrect number of cores was detected on a Google Compute Engine virtual machine. This has been fixed.
+* Changes in how an executable source of cached external dictionaries works.
+* Fixed the comparison of strings containing null characters.
+* Fixed the comparison of Float32 primary key fields with constants.
+* Previously, an incorrect estimate of the size of a field could lead to overly large allocations. This has been fixed.
+* Fixed a crash when querying a Nullable column added to a table using ALTER.
+* Fixed a crash when sorting by a Nullable column, if the number of rows is less than LIMIT.
+* Fixed an ORDER BY subquery consisting of only constant values.
+* Previously, a Replicated table could remain in the invalid state after a failed DROP TABLE.
+* Aliases for scalar subqueries with empty results are no longer lost.
+* Now a query that used compilation does not fail with an error if the .so file gets damaged.

From 75f1df36e4fbf8035d6419d147a95966129a8665 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 22 Aug 2017 20:29:02 +0300
Subject: [PATCH 249/281] Miscellaneous [#CLICKHOUSE-2].

---
 dbms/src/AggregateFunctions/AggregateFunctionSum.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSum.h b/dbms/src/AggregateFunctions/AggregateFunctionSum.h
index 10bbc7d8d8b..54f84688d6e 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionSum.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionSum.h
@@ -15,9 +15,7 @@ namespace DB
 template <typename T>
 struct AggregateFunctionSumData
 {
-    T sum;
-
-    AggregateFunctionSumData() : sum(0) {}
+    T sum{};
 };
 
 

From e47a29fd4b4711a2d66e29c64ddc81534c2c8c28 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 21 Aug 2017 23:11:31 +0300
Subject: [PATCH 250/281] More sophisticated test of hash tables with integer
 keys [#CLICKHOUSE-2].

---
 dbms/src/Common/tests/int_hashes_perf.cpp |  20 +-
 dbms/src/Interpreters/tests/hash_map2.cpp | 410 +++++++++++++++-------
 2 files changed, 294 insertions(+), 136 deletions(-)

diff --git a/dbms/src/Common/tests/int_hashes_perf.cpp b/dbms/src/Common/tests/int_hashes_perf.cpp
index be1cb2e5b93..990292e4e66 100644
--- a/dbms/src/Common/tests/int_hashes_perf.cpp
+++ b/dbms/src/Common/tests/int_hashes_perf.cpp
@@ -306,16 +306,16 @@ int main(int argc, char ** argv)
 
     setAffinity();
 
-    if (!method || method == 0) test<identity>        (n, &data[0], "0: identity");
-    if (!method || method == 1) test<intHash32>        (n, &data[0], "1: intHash32");
-    if (!method || method == 2) test<intHash64>        (n, &data[0], "2: intHash64");
-    if (!method || method == 3) test<hash3>            (n, &data[0], "3: two rounds");
-    if (!method || method == 4) test<hash4>            (n, &data[0], "4: two rounds and two variables");
-    if (!method || method == 5) test<hash5>            (n, &data[0], "5: two rounds with less ops");
-    if (!method || method == 6) test<murmurMix>        (n, &data[0], "6: murmur64 mixer");
-    if (!method || method == 7) test<mulShift>        (n, &data[0], "7: mulShift");
-    if (!method || method == 8) test<tabulation>    (n, &data[0], "8: tabulation");
-    if (!method || method == 9) test<crc32Hash>        (n, &data[0], "9: crc32");
+    if (!method || method == 0) test<identity>  (n, &data[0], "0: identity");
+    if (!method || method == 1) test<intHash32> (n, &data[0], "1: intHash32");
+    if (!method || method == 2) test<intHash64> (n, &data[0], "2: intHash64");
+    if (!method || method == 3) test<hash3>     (n, &data[0], "3: two rounds");
+    if (!method || method == 4) test<hash4>     (n, &data[0], "4: two rounds and two variables");
+    if (!method || method == 5) test<hash5>     (n, &data[0], "5: two rounds with less ops");
+    if (!method || method == 6) test<murmurMix> (n, &data[0], "6: murmur64 mixer");
+    if (!method || method == 7) test<mulShift>  (n, &data[0], "7: mulShift");
+    if (!method || method == 8) test<tabulation>(n, &data[0], "8: tabulation");
+    if (!method || method == 9) test<crc32Hash> (n, &data[0], "9: crc32");
 
     return 0;
 }
diff --git a/dbms/src/Interpreters/tests/hash_map2.cpp b/dbms/src/Interpreters/tests/hash_map2.cpp
index 8a8027c726a..0d946eb43a8 100644
--- a/dbms/src/Interpreters/tests/hash_map2.cpp
+++ b/dbms/src/Interpreters/tests/hash_map2.cpp
@@ -21,69 +21,307 @@
 using Key = UInt64;
 using Value = UInt64;
 
-struct CellWithoutZeroWithSavedHash : public HashMapCell<Key, Value, DefaultHash<Key> >
+
+
+/// Various hash functions to test
+
+namespace Hashes
 {
-//    size_t saved_hash;
+    struct IdentityHash
+    {
+        size_t operator()(Key x) const
+        {
+            return x;
+        }
+    };
 
-    static constexpr bool need_zero_value_storage = false;
+    struct SimpleMultiplyHash
+    {
+        size_t operator()(Key x) const
+        {
+            return x * 0xff51afd7ed558ccdULL;
+        }
+    };
 
-    CellWithoutZeroWithSavedHash() : HashMapCell() {}
-    CellWithoutZeroWithSavedHash(const Key & key_, const State & state) : HashMapCell(key_, state) {}
-    CellWithoutZeroWithSavedHash(const value_type & value_, const State & state) : HashMapCell(value_, state) {}
+    struct MultiplyAndMixHash
+    {
+        size_t operator()(Key x) const
+        {
+            x *= 0xff51afd7ed558ccdULL;
+            x ^= x >> 33;
+            return x;
+        }
+    };
 
-/*    bool keyEquals(const Key & key_) const { return value.first == key_; }
-    bool keyEquals(const CellWithoutZeroWithSavedHash & other) const { return saved_hash == other.saved_hash && value.first == other.value.first; }
+    struct MixMultiplyMixHash
+    {
+        size_t operator()(Key x) const
+        {
+            x ^= x >> 33;
+            x *= 0xff51afd7ed558ccdULL;
+            x ^= x >> 33;
+            return x;
+        }
+    };
 
-    void setHash(size_t hash_value) { saved_hash = hash_value; }
-    size_t getHash(const DefaultHash<Key> & hash) const { return saved_hash; }*/
-};
+    struct MurMurMixHash
+    {
+        size_t operator()(Key x) const
+        {
+            x ^= x >> 33;
+            x *= 0xff51afd7ed558ccdULL;
+            x ^= x >> 33;
+            x *= 0xc4ceb9fe1a85ec53ULL;
+            x ^= x >> 33;
+            return x;
+        }
+    };
 
-struct Grower : public HashTableGrower<>
+    struct MixAllBitsHash
+    {
+        size_t operator()(Key x) const
+        {
+            x ^= x >> 1;
+            x ^= x >> 2;
+            x ^= x >> 4;
+            x ^= x >> 8;
+            x ^= x >> 16;
+            x ^= x >> 32;
+            return x;
+        }
+    };
+
+    struct IntHash32
+    {
+        size_t operator()(Key x) const
+        {
+            x = (~x) + (x << 18);
+            x = x ^ ((x >> 31) | (x << 33));
+            x = x * 21;
+            x = x ^ ((x >> 11) | (x << 53));
+            x = x + (x << 6);
+            x = x ^ ((x >> 22) | (x << 42));
+
+            return x;
+        }
+    };
+
+    struct ArcadiaNumericHash
+    {
+        size_t operator()(Key x) const
+        {
+            x += ~(x << 32);
+            x ^= (x >> 22);
+            x += ~(x << 13);
+            x ^= (x >> 8);
+            x += (x << 3);
+            x ^= (x >> 15);
+            x += ~(x << 27);
+            x ^= (x >> 31);
+
+            return x;
+        }
+    };
+
+    struct MurMurButDifferentHash
+    {
+        size_t operator()(Key x) const
+        {
+            x ^= x >> 23;
+            x *= 0x2127599bf4325c37ULL;
+            x ^= x >> 47;
+            x *= 0xb492b66fbe98f273ULL;
+            x ^= x >> 33;
+            return x;
+        }
+    };
+
+    struct TwoRoundsTwoVarsHash
+    {
+        size_t operator()(Key x) const
+        {
+            UInt64 a = x;
+            UInt64 b = x;
+
+            a ^= a >> 23;
+            b ^= b >> 15;
+
+            a *= 0x2127599bf4325c37ULL;
+            b *= 0xb492b66fbe98f273ULL;
+
+            a ^= a >> 47;
+            b ^= b >> 33;
+
+            return a ^ b;
+        }
+    };
+
+    struct TwoRoundsLessOpsHash
+    {
+        size_t operator()(Key x) const
+        {
+            x *= 0xb492b66fbe98f273ULL;
+            x ^= x >> 23;
+            x *= 0x2127599bf4325c37ULL;
+            x ^= x >> 47;
+
+            return x;
+        }
+    };
+
+    #if __SSE4_2__
+    #include <nmmintrin.h>
+    #endif
+
+    struct CRC32Hash
+    {
+        size_t operator()(Key x) const
+        {
+    #if __SSE4_2__
+            return _mm_crc32_u64(-1ULL, x);
+    #else
+            /// On other platforms we do not have CRC32. NOTE This can be confusing.
+            return intHash64(x);
+    #endif
+        }
+    };
+
+    struct MulShiftHash
+    {
+        size_t operator()(Key x) const
+        {
+            static UInt64 random[2][256] =
+            {
+                {
+                    0xb9979dc11a7ab921, 0x069177f0cca1cb81, 0xa77b7458984cdca6, 0x20cdddcd60ebf956, 0x54a7e16ccc85c618, 0x5a7b32512add86d0, 0xd4024d92207929e0, 0x927506e044a87bcb, 0x0ee2fafa66d27f8d, 0x34e5d597062edc13, 0x9d9e5971e4ef679f, 0x1373fa521c455462, 0xae20d7a035922d67, 0x2acceeaa1c38dea1, 0xa8d7318d831b2291, 0x237a092ca188a58d, 0xf3aab9c253e94340, 0x59c067c9247ad798, 0x9a2a3b0fc175faea, 0xb4ff42232b62408a, 0xc33f6f6e14b2ad59, 0xd8354b02517bb45e, 0x6cd3324afa8e1fa8, 0x491f3efd10beb8ce, 0xec5511355b29b707, 0x11a0e525b3f8a1d9, 0xaecb034c53727bb3, 0xaad2f1a0f7b1bb15, 0xdc318ef665bf079b, 0xecaed1b2e1808d2d, 0x38ccecd71d8548f5, 0x4a29ee777e1e8984, 0x58d683d104a9a9ac, 0x1c86c3ae03b0d968, 0x6ae480de1f434851, 0xdbb254502c123636, 0x7c8514ca7feb2780, 0x209ce2dacac250c4, 0xec256290581d4242, 0xba54af4e004cd657, 0xef09683b8da8cfd4, 0xef4b60e1cc22c00f, 0x8bb9849ecb4f0f4e, 0x64186f57b4e7e197, 0xf86d9122f253282c, 0xe545dcc26d8232fc, 0x7a8eeab5b064887e, 0xd77895d6c7baf87b, 0x07bf805fc0fcef52, 0x362d110a780e166b, 0x1b0a76b85b924a27, 0x49904133b1fb35c4, 0x76328bab42b9f5b8, 0xcfd15d94b28db6d0, 0x8908beb1031eb7b3, 0xbe05c1d38c789639, 0x9a38f06e2a4a5e2e, 0x9c49fa7b05cbd5e7, 0xf31aa060547fc13b, 0xafcfd8714b2a4082, 0x71a117f24d3d8cbe, 0x201a2c30737c1243, 0x9915450b0b26957e, 0xbe5865d67bd08830, 0x1efd55f709543d56, 0x69305e7e4c08948e, 0xd19d4fe1a47b1598, 0x36e4246123e542b7, 0x4cf0640cc2b66fdf, 0xf4301d669c78d4f9, 0xe1e7025ea57b3a3e, 0x6d59685c09510c07, 0xd90b29fbe9680c7a, 0x5575a24ba532391e, 0xdebab7c32a11d297, 0x09010bd6a6b295c8, 0x5b745d475a2cc5f8, 0x1a17df8fa5d2fb0e, 0x9eb9c9a1c56caa5d, 0x8759a2d4b80f3104, 0x1a6732736d219e48, 0xb9a934bedbbb1b64, 0xe88fc304f4f41aa2, 0xb650530683f9aecf, 0xc6de4393e1754ec5, 0x455dd664a10d1975, 0x295dffd0c8accda1, 0x4b0e49b50e1c3cd1, 0xf9b16d40c7bcb99d, 0x73f924a20a6b9bba, 0x59d4473fddb90104, 0x16a1b347c260d1b0, 0x0b6c06c9546458b7, 0x808f3c2d9e112207, 0xd142ca3cdf2a92c8, 0x36b4ea106906393d, 0x3ad7d15985207a3f, 0x5b3236e568021603, 0x95ff5ce56c0c9e4f, 0x35b8973c8634cac8, 0xd09591efe1cea59e, 0x9a852d896596d1c5, 0x5b8d231f680d8003, 0xaf56403d17f64962, 0x27f9e41e098024b4, 0x276adf3aacd76289, 0x0f828c068e8e4552, 0x45b28f9c40591b8c, 0xf543929acc2fd3a1, 0x7f5c232270f8675c, 0x96bf54ca56992a6b, 0x831ceab94de9a77f, 0x884d660886d85f97, 0xa441f1401555b4cd, 0x2f7a37e0df322ca7, 0xcfd621bcaf7e8968, 0x2f4c4f54612f78bf, 0x81ad69be79a99942, 0x452169980a69690a, 0xfe7ecd6fdfffc7db, 0x6750d09c96c0723e, 0xa96e8b7c1f87d0b7, 0x9355142350227cfb, 0xd04b2c03a5fd752c, 0xaa884bf5cbaef0c9, 0x47439b6a210da3c7, 0x08366f260246b77d, 0xada9a09bde98ac47,
+                    0xf285854404e11a2e, 0x1f6b5f0db9344910, 0x43635afd969b49a1, 0xf6e336e4883a399c, 0xb3dc4cd8d0589e84, 0xeda5c2c0fc7742f4, 0x10b3b8fb622b125b, 0x9f109b5ab6eeed6e, 0x69277c89abd4cd78, 0xea221cf76ab7fc86, 0xfa0ca125e8daf891, 0xdac858a3c5e11c00, 0xafab19a748a947ae, 0xeb2b69a5a4da6489, 0x601eb5a12ac4565b, 0xf782900ee4cd7365, 0xa1dc2fd083f4546c, 0xf8723698955f8c1a, 0x1ef4282aebf3d945, 0x653da3bf71e624bd, 0xbe04b29a9baf7d5a, 0xb18f73a34543666f, 0x90c901c604dce6e2, 0x3208dbe059534b49, 0xacb11eee833467d5, 0x3d07083a9d5be0d6, 0x6bd915dd0fef2557, 0xabee35defb430211, 0xb3180e083638dc85, 0x2be0361e7febfce0, 0x4d97cf883fabdc41, 0xf1be01ddae8c4048, 0xb7d0b4f0332d2992, 0x72141ded3dd6f55a, 0x9123d943f30926d9, 0x7eeca6bc6be7313c, 0x95ab321d7ee5c72c, 0x21421b21f9d81288, 0x19233206a680c160, 0xf09a9d339c9d311c, 0x05133ec82d1ecd3b, 0x71f02fcd10beb60a, 0x5e61ad5eeb1ceeee, 0x087c9fbd97613ab5, 0xfe250587d99f3438, 0x5e871afeec0fa630, 0x4f88441aa4eedd4d, 0x1a42dd66f7452619, 0xf58e8d8f81b957ef, 0x66986b54ecf07347, 0x2382340318cfb238, 0x7d1c4f93c49b788c, 0xd8d9fa627f06932a, 0x6aa7c64c4873e3d5, 0x27efff555b10bb4e, 0x784a1565db94187f, 0x4271a9c1fcd0772f, 0xd234d11377eb40b0, 0xcaef5e6aa3977628, 0x64009936900c3fb0, 0xf0ae881430c514c5, 0x0cd32d01c5798aa4, 0xc45b39cce35de6e4, 0x1307b9f4d832f787, 0xceeee5e813135402, 0xc930a0867296e8a3, 0xe4b52a4e4f3d9783, 0xe4e7934e0d97202f, 0x51d1e9340b2459ce, 0x99cc0f90dcfe55b9, 0xf4f27b19cdedfe55, 0x9debe48d7c4cb666, 0xccd8bda6061dd33a, 0x3d1b011a2f870b00, 0xbb4bf9865127ebc3, 0xa149ab768fb8f044, 0x3119a981452666bd, 0x65db8aa27ea6d215, 0x1c6d1bd4e5f373ce, 0xa17f5382b2217d61, 0x9599f49e8a4c3636, 0x745bea7bbfdb01c0, 0x3827fcecc93556d5, 0x23467c41001df3a1, 0x6b055e5393e6df45, 0x845fe338233e4e8f, 0x2924d70bee302e1e, 0x00ff993181bdc604, 0x491f3a3eb8502037, 0x5d76d4307fb75616, 0xd02fc60a95d3e5ef, 0x807b31765ef80cfd, 0x94d6208ae964ca56, 0x01985a747cedaf30, 0xc93ba934dd080643, 0xff2171253b5f3f29, 0x661b5bf4c5c90793, 0x3e16f8961257ea4d, 0x6234adf4978717be, 0x0fb6ebda63b582d7, 0xe93085e5a1bfd52a, 0x356873f8edf17b1b, 0x6bd859a750a2cd58, 0x413c4f77cf56e540, 0x416f62eca618e666, 0x3fdbb2157558f5fc, 0x3446b31c6c14a00c, 0xba534ff625129ce8, 0x2579bbcd151f4a9c, 0xb13327991b337d1d, 0xe548cfe328d2e52a, 0x8685e6bd4abe8bc5, 0xc912ce5f3fe32b12, 0x544f4c648136ef2a, 0xac9dc00a55c71ae7, 0xb9c2048993ef7255, 0xdbfb82775439009b, 0x7124703dd90168cd, 0xfbf35a0d9e650617, 0x2c757dcd616aea51, 0x2c465d4aceb02eab, 0x28cb9d0f78dc8657, 0x87cfaeaa76051bc8, 0x0aeaf0c08f5e04a1, 0x7c60c6ac4e1bcdaa, 0x5904b7dcc3ac2e1f, 0x17625e6c1972f71c, 0x64dc0d8c46a96c16,
+                },
+                {
+                    0xb2b37152266d808f, 0xd1688ce5907fa9bb, 0xb4fbde50315ebb85, 0x94e1d1a40be0f814, 0xe5ef4ab98cebd353, 0xf908528b3ec218e6, 0xacc740ba335dceda, 0x19f10e26e4c68a48, 0x840a9672a2f5a2b5, 0xc0cf3b2996e6d1e9, 0x548a34dffc0570c1, 0xdc031240feff5c48, 0x92297ff0b800820e, 0x0c11352081d2a81e, 0x8fd8d29c79d55a69, 0x66488d4729e78c30, 0x941d465e2582b6fb, 0xe76b9689bba28cc5, 0x4103bec4b7c08c40, 0x4e4b4b76d188000c, 0x844db5402e959fc8, 0x2e134ee324e2f088, 0x625fa933c1fa4d1a, 0x1d4f0121515d0ce0, 0xb1d0d19a00d95196, 0xc38fe9c8032d86ac, 0xd6d71c1464cf5365, 0xef8d00bb18b455e8, 0x7536cd4fc7f65501, 0x97b4b02c30117dce, 0x7411e5084e20b6b9, 0x988167ff559bec81, 0xf91372f1a162a022, 0xc638ef90a723246c, 0x6761b912a7d129a9, 0x56927af0806a2486, 0xae697de7c569d46c, 0xf0587bfdf442de4e, 0x02b83e162ef75c67, 0x712f8e20cd8d62cd, 0x443a2b5a16f137ca, 0x5faca36e7dc9b149, 0x3fefa267d527a8d5, 0xa8bbd36ee9c73ceb, 0x451a8aa3972efdbd, 0x0b5a7ddaf5d22b7f, 0x37a58d60fb0033f5, 0xd57f6057b724127a, 0xa36b778ce2959583, 0xfd843a9d8800b0b4, 0x057f720e9582b96e, 0x4304286582ffe050, 0xa3fe58e3d150bad8, 0x3b1cc25468a8cf65, 0x5a343b883d439ecd, 0xefebddaf9fccf271, 0xdee57a385349bdc0, 0x685936c14e3c96ad, 0x54efe66dcb28d0fa, 0x3b3a5bbb4873165e, 0x6a10806224cc7c73, 0x8b81d349a6653e23, 0xd3c77a6c5254a1c1, 0xe426ed8badd6ae84, 0x159ed3abb8289790, 0xc34a0abddf1c3868, 0x7e0ffe203c7b27d3, 0x26a12e8a39321471, 0x058ef4beca30a376, 0xa329f59e34edcd3c, 0x72b0a270dcf9a332, 0x166ceb4a84f8b7e6, 0xa280d2c77091a912, 0xec16fa92ca834210, 0x87cb7bde305e2e8c, 0x637fbc31719a722e, 0xf1d6b723b62c74b9, 0xa79f73228d226e72, 0xdde9e7d9b6111fc5, 0x7ceb45bc84699f40, 0xa91bfc1705e0d732, 0x306af3eed572452d, 0x4a9433f2005ed515, 0x9f00376f44600cec, 0xefd838b045fb86ef, 0x65098de0679c0513, 0x51ccc18e00e21e0e, 0xd462c4e789677c32, 0x95a177c568ffc03e, 0xb12376a070d7b2d7, 0x664ae720214ec675, 0xcf15fe99a80aebd0, 0xa7d007e79eec58ab, 0x2c6b911aa9872e3d, 0x77f746e811360633, 0xd0830a895f8f557b, 0xfbefdc19fccb70e2, 0x02e4ca8f22349a2f, 0xcc8e04e0283f929c, 0xeb743ce2f3f2a830, 0x966611978470f470, 0xd46e0a5f306e76d0, 0xad2c8ed5bcd22c46, 0xf1eba8f4d933b891, 0x058a7875bb427a22, 0x45107f3da4aa3e16, 0xe81284e4b7d9db22, 0xe004099ab8178db8, 0x53e36cc21df27075, 0xd9a17d119adf78de, 0xe95c27a78e7824e7, 0xb584e128467f80dc, 0xd44ff1193314853c, 0xf0124a034d852e19, 0xc9077af6da8a9ce9, 0x57a082ea0d3f7acd, 0x14cd16087a7bf2a7, 0xeb64143d63df7307, 0xe15943fecae64d35, 0x978bb411cfc5f25c, 0x76a96f8f617871b7, 0xd7d9eddcbfae7bd1, 0x61bcfec611e3ed1c, 0xf2af9d3e527ecf80, 0x8182c0a8ee728f30, 0x7ca964bcddcbe009, 0x99c70ed92ad37d47, 0x52ad14b9a60bebdc,
+                    0x651a92a09a0f6cd9, 0xf081fe7ada03ec1f, 0xc2e558216dc44e49, 0x28d1e01b15889c1b, 0xd24d7e86573816dd, 0x965aa3b1404a1b4b, 0x39776cd5c65d62da, 0x5a1afb7ffaff7228, 0x993d7e2bccb5b123, 0x8af2f371498b991f, 0xec8f424fd6b122f9, 0xf7a7a05f114f446d, 0xf1729f142472c92c, 0x1e47d5d4dbd7d8d2, 0x3c79a550342c0bb7, 0x70d8ff005cc97454, 0x565f5c2a091373b5, 0xb2f07256c7f2de3b, 0xcc0e305a4fa50bba, 0x90d7259bcd4d4dfd, 0x57a21147c7b8d4b4, 0xb4e904c414344400, 0x1b9843cef6b7de4e, 0xc55ed544783d7077, 0x951fd36278c4af42, 0x5596691ec2a6c112, 0xa96366b16e85bae6, 0xdce8b9304db2cf16, 0x4196a4a77dad5d51, 0xc035d6a8486f503c, 0xda814111a183c18b, 0x850d476c24afcdeb, 0x8b1fb1911d79a756, 0x3fbb083dc9ff1546, 0xd9ea3048315a4840, 0x1bbaf883b34fb0e8, 0xc7812b9babc4f1aa, 0xeee79e6e139bfd6d, 0x0f61f802cad137ab, 0xe6702e761b466507, 0x00fea66009d13ddd, 0x27865b24fdc1543a, 0xe53c0cadcbd4ba6d, 0x081dead3238fe2fe, 0xc4ad52df5a17e06b, 0xdf196982de8820b4, 0x93056ac04ef6a5d0, 0xe3b0e29b5b394f0b, 0xd22124f2f63464f3, 0xcfdc91988a08c919, 0x142e82bacd825129, 0xa295a43e93e4e702, 0x1b344f1ae78d7438, 0x1ad39d6fce7b2de1, 0xf38a7608b3c52ae3, 0x0411864df1f01a7b, 0x8da709efa240a816, 0x4db801db3b0405a4, 0x82e53d875886c89c, 0x1f8a6580748a29c4, 0xb5f3c5da7b0c656d, 0xe6308e06103bb9da, 0x1c044a02583c3b23, 0x7824fafc7744ded1, 0x120ae3a85640de45, 0xbe7841a70dbe7f85, 0x13b1bedabd2227e0, 0x543f4d8601666ca0, 0x140acc8931524f45, 0x07d8d511f4deaf51, 0xe4218d90e617fb4c, 0xbff96e733749a010, 0x6cb6990dbdfed3a6, 0xe651708ed78c4008, 0x2d86b38cd518d5fe, 0x41316f458fcf5a20, 0x74cf6939366c2013, 0x298a2a81db72ba3a, 0xbdeb78b42a71be11, 0x7bf37745b51a4e93, 0x71d074c050fd7f32, 0xd7ae1af287ef3c83, 0x6545c0921918a9c4, 0x81b80a0be366c29b, 0xb5f96c0c829ff255, 0xedc1e2598be87178, 0xd930d6eafb62c208, 0x1b4ae0fd3bc45e3f, 0xae4eba5234af88a3, 0xef250aa0f82c9251, 0xf842992589d21959, 0x76d99f8cc618151d, 0xc0d053337ad3d0da, 0x87569479dbbdcc16, 0xc0a31211852f8ac5, 0xf09022e229d40060, 0x2a4acf18d2d9a943, 0x95b9dd630a124a30, 0xe3d441a7c183a2df, 0xb70592e18a0d31d5, 0x45dfe88bf06ec04f, 0x9a8982b8dabc5814, 0x9853c63295eb60a1, 0xcc6ab672b45261c5, 0x1d59b5ef15f998e5, 0xab052dc26ca65d9b, 0xc29094316db6ad02, 0x90e2463bc2a67bd5, 0x5d9658394413a531, 0xbbae171ee3e4997a, 0x7baf87e1759cbfee, 0x269b8ee8cb2d9c69, 0x98d272695f943d3c, 0xc2aa69caf54a47ae, 0xfb6d9caf908685a0, 0x36aab7dfb5ef3444, 0x9a28ca1db0e037e0, 0xd54c3b5005923402, 0x124addbcdcfa5bc1, 0x277b48aaa7bec0d9, 0x76d3a563d86fa26e, 0x0545a2263944b662, 0xdfd9e108d234c7a9, 0xa7f406b6d42d6ec8, 0x9becf8e91d0daf3e, 0xc53f653fc3f42bd7, 0x66a70e50c0535454, 0x2ff4d545d6a21306,
+                }
+            };
+
+            x ^= x >> 33;
+            x *= random[0][x & 0xFF];
+            x ^= x >> 33;
+            x *= random[1][x & 0xFF];
+            x ^= x >> 33;
+
+            return x;
+        }
+    };
+
+    struct TabulationHash
+    {
+        size_t operator()(Key x) const
+        {
+            static UInt64 random[8][256] =
+            {
+    {
+    0x4f3cecaf24409b1b, 0x6ad9f166d91c6613, 0x54bf75358aea8524, 0x3fbc1de24a079a6e, 0x57ea94a0259aec73, 0xf9174938aeb467ca, 0x6f5deec36e40d25e, 0x534addc1aa10643b, 0x94734451bc5c5ec5, 0x4d72432932c6c7b7, 0x56ddb2f1203575ef, 0x8f7f217ead5654b0, 0xd7c0eac16d4aa24e, 0x84e4265047714b2d, 0x449769b97c43e1e1, 0xfed98b3f4c5b7698, 0x48bb913b09ea35c1, 0xe69cfcd6052df551, 0x483636e39bab623d, 0xc108de147d29545a, 0xeafb7485ba1f8e40, 0xd4a3891e24ab7233, 0x941f1975d079ba30, 0x8776cd48b75bbf4c, 0x42c5f14b72ec0eef, 0x19a18efdff3a84b5, 0xec4078cb31112625, 0x3063155b2e1cace2, 0x4d0fd702d00c53d5, 0xc80ad41b4e104360, 0x67e9d8d12617d417, 0x1a6de5d7f6a3958b, 0x9520893617a19775, 0x8842a4072f85e7b1, 0xa066c6eec0f4c288, 0x91c753eb152561fb, 0x1eacdc853dbdc4d3, 0x38e8e61d8cc61e0e, 0x22117333fc2eb16e, 0xed909eb368ecc36b, 0x2b67fc646f0b73d2, 0x7c28b15e21c0a93a, 0x13f8de9d1bad4d5f, 0xd96a893cf9da4125, 0xd3bbc92ad05fb53e, 0xd13aaad8d8075799, 0x18f003d700064040, 0xbb47fd3c38570068, 0xcd3db144d1a1f6d4, 0xebb33f814155f734, 0x740c6c7f4d91ac30, 0x9e3ae55cefe1f46e, 0x678c8b1c10da8b96, 0x37510cf678751024, 0x4e9e97713eb900d6, 0xb11271b9b1617fe7, 0xf2b35c453dfcfc22, 0xf5c2a8307ec8d153, 0x14089b8b1462447b, 0x5a350397786bb472, 0xfff7cd246e11a821, 0xe5649ebd197aa820, 0x5b7b9b407888c0f8, 0x617e4610c8e466fa, 0x928f11ee454fbeb6, 0x72e6a8006953074e, 0xb695a3dadef3ae60, 0xa26906a7e0140bbd, 0xf856fd404e987a6f, 0x95cddade446d74c7, 0xf7cd44918454ca8d, 0x34b17066e9a0f88d, 0x3481e736e1fd2fbb, 0xdf09f9dab79514ba, 0xf9352d79440c48ae, 0xae71591dcae8b4b2, 0x2f1bfdb07031c1ee, 0x50f106bd520bb92d, 0xbc39d67db555d325, 0x0768dcf245299385, 0x2d0f0564d3ee2403, 0x0b4075b353804510, 0x3722374ac591e9d2, 0x561d767a57b72214, 0xd6e1775b179939ea, 0xedaf88090284c61e, 0xc897695662d2c7d2, 0xe2bdcab72b125c05, 0x37c0d44b70a6e565, 0x3c5be744f87454d9, 0xd8d969301f77195a, 0x3445efae8fea16b9, 0x338b465dd8e11413, 0x4bcde36d984527ac, 0x0b803b6976171f05, 0x51a7386146462803, 0xc2897c1a69b6f1c1, 0x5ab092594684a5b4, 0xd3f4afafab7fc3ee, 0x744c31a06c5c0460, 0x49f01a31eefe4021, 0xaa62ba4831a87a0c, 0x61052036b02cb121, 0xc4bb5582ed8d7e4a, 0x25b6b517dcf0d7fa, 0x703b1897ff50a997, 0x83e5b7ef256dead8, 0x8da8e0ea74709e3e, 0xd5e2be449fd500a8, 0x0c386f813d21c684, 0x4bfee7a5bc728b8e, 0x5f0aca2baba81c31, 0x7ddfcfe8a6e897de, 0x90b69d33b9af5fbd, 0xfddedc985978876b, 0xc8f93bbdde12f7f2, 0x45d7a1087f4739f0, 0x75ea324184f1e5b8, 0x97b78ad1ec2ef95f, 0x192e5bf5ab92521b, 0x5175eefdc3fadec7, 0xa23a36c97bac48b2, 0x6e1c21a7c232880e, 0xf8010ca06ce802ea, 0x6e9822f71a8123cb, 0xdcb21616d2941bc5, 0x6f2929e3943e0777, 0x5a6ae6bc66eb2f1d, 0xf20a3c93c0fa6172, 0xcc1f91002d11567d, 0xa558d8dd4fec490b, 0xbe0ca1be73f5d533, 0x66dd4427dc66bbe1, 0x6a27341a10cd985f, 0x8d12380bcbd9bec3, 0xfefaaf20d5b9139f, 0x8641d578cdc4e199, 0x5f21e78ab4d25c23, 0xef52fdd1026d6f8a, 0x842f108fa0fcb69d, 0x9bc3723415c2724b, 0xc9615a1d92ab9500, 0x7c0850b85e7535cd, 0x8e93357abaf1ed24, 0xdf6fb36799938890, 0xe2082b8202e65187, 0x11170a177d855ab0, 0x751564db24effa8d, 0xa74b7769d222649f, 0x12a999982fd65d64, 0xc380a14a49b93b32, 0x5b9e932a4f264ee3, 0x1ca4323b708a0aec, 0x28fd5d5a327c91d9, 0x31289307419c9f1b, 0x20bda201fd63649d, 0xd1d1c8c8163836d6, 0x9cdae51d6993d0a3, 0x3a6fbe9579c1da3a, 0x27072cbc6d3a4825, 0x94b7107387607e61, 0xb1b0b605f39f115c, 0xb2557d474dc4e952, 0x8794c509cf6e26b1, 0xc1a8398cb2e98c9c, 0x92d0f486b7ac4afd, 0x9082064213d7c39b, 0x7e200cefde5ed88d, 0x904b5a00dd597b29, 0xe84c40f0bf571c92, 0x196b4a47fe185361, 0x2fb3e66b0fa8c185, 0x581a7f90e8646a69, 0x4b74f5f56f087180, 0x2e94fe98e0b1cc0a, 0x198f7bfbc692eda7, 0xa4d0b7908707016e, 0x0c90ee281eff932d, 0x879a1cf4c24d189a, 0x2d66cea5314bd480, 0x23346e49480e7e83, 0x82d63e12f56ea02c, 0x4ce802d35f8f46e2, 0xd143fe931608d5f5, 0x66c466071c6718a2, 0x59a5ab0a39dd53b2, 0x4805529d2a394dff, 0x631151511fde3379, 0xab98f8154c24669d, 0xb3a05b6b742dcdf2, 0x9d9827d3e071e26f, 0x9c157833c16216fa, 0xa9c8584ed28c8023, 0x94ca7cddf52bbc28, 0xc34d111216c15159, 0x68abebbb05b62206, 0x4786f8c9094da769, 0xb9c218515d329fe9, 0xd997a2ad91f01905, 0x81aaa629f3b2bce0, 0xefef8a896238229a, 0x6af252f60dd72940, 0x4492c36d5f165eac, 0x9dd50a2ef4d5f9a1,
+    0xdfbf94a3550e6ecd, 0x9288b91b1caf05e7, 0x9c0ad10f9c67d06e, 0xdd4acdcb36db48c5, 0x7570a07af93f329d, 0xa31f26e2fffef103, 0x2886476327948381, 0xbddd48e33441c988, 0xc887b91661bf6e9b, 0xfc21127e01445e2f, 0xf0a8e3af7c953713, 0x47da9d0482ee366b, 0xc3d6ca5719939732, 0xedeadea909a6fa76, 0x26fcdad40d586205, 0xa1dd97b354af5ed4, 0xcc8aa75425740654, 0x4a0c5113156178f9, 0x981abd8fcf766b13, 0x99f31176a6cb712a, 0x49ac51e92982f34e, 0xdfeaa1da9373b16f, 0xa31666230da09aa3, 0xdc1eaceaf1afa2e2, 0x4326313df2d59166, 0x8836b6746f79236a, 0xe166e915f72d2540, 0x09cf7b4b78b2637e, 0x55369c7b01071949, 0x86ff67741a6c3cb6, 0x76503dcd4383b8cd, 0x36731b7b7cf8df2e, 0x51a2d6aad260273b, 0xf6d7cbb0db4253cb, 0x1230054a3bd28926, 0x4eb266aae155fc98, 0x9a2a7f60a9dac0e9, 0x79290b7b9035b3d4, 0x3a424cdbbe2aaa45, 0xcc46944623341c59, 0x7c185b60bade4199, 0x6ae892b88237cebc, 0xdd8ec8066e75ae58, 0x69a8aa059ff444a0, 0x987e27eae07462d5, 0x539f69c3d236e3c3, 0xc6bcae3c97941ed6, 0x29e853acc29b3eb5, 0x1faeb880153ba613, 0xd35bb34905b04517, 0xabcc7468231c83ca, 0x286d214a37fb65b3,
+    },
+    {
+    0x55baa482d6094d30, 0xa775277f382073a4, 0x73bae2387e32c5f8, 0x4a6416aaca027e0c, 0x0ad38404a8ca17e5, 0x37ca2344345f1a5d, 0x9c910f03937fc178, 0xd99efaccd8d0f15a, 0x557c9d7538915f7e, 0x500ee4fb62acc9e5, 0x3f1eb011057383c9, 0xaa4fe82c1926b108, 0x84e24186cfadd3fb, 0x0862cef2d38de5f0, 0xf03099ce7d642f66, 0x143b22300078711e, 0x04d44b5b3444abb0, 0x2c46d930d09accf6, 0xc5c8cfe5da8a7e49, 0x9c0dfe1f0c791972, 0xe287ed0f98ffb25c, 0xe91eb5daf941701b, 0xf4989aba25810a10, 0xa23853aed0869787, 0xc008036099dbab0e, 0x752dd9e217b5c4fd, 0x763c0b933129e776, 0x7b474798a5a16875, 0x31c3cf01e8ac88ae, 0xa3075c68c217fbbd, 0x12b53ba28ba2ef53, 0x62d619c3d366944e, 0x8d1aa326f3f96bb6, 0xe4ba4927c89184c3, 0x863b2374081f1e4f, 0xa2f5b1f264772e64, 0x7d8085a2941f5b09, 0xc19447f7e00838ca, 0x1fc50c73bd36adff, 0xc10e2d3ea18d12a2, 0xfb9d34ae20b0977a, 0x809ae69b8558a6d5, 0xf643d21e8543a777, 0x1223ae6c2809d3c5, 0x5ff0047e9b20a9d6, 0x814623fd6a99203e, 0xc912bd1a4b7ba3e1, 0xf4683a10792dce3a, 0x6157694e70c50676, 0xae3916927aae4a7c, 0x0973002f06279ed7, 0x0f4cd1c254c5e26e, 0x7d7ba6bb160200c3, 0x3c3bd06bd3fa92d9, 0x1e559440993b113a, 0xfca7ed369bf393eb, 0x6caa82117c2f3ca6, 0x4d848224ec4a0dfd, 0xfde9e96a3023bfc4, 0x3bf9ed37ebcad940, 0xcbf6587df4678baa, 0x98d4968ce7ec8779, 0x2644f85c1e0fbe76, 0xc984308f877374a2, 0xd53e563c2f8511a5, 0xd6dd5155704369f7, 0x853bdf051a2a6a2d, 0x21d9a29d02e502b4, 0x65150c59e806b5a9, 0xa1943a083f94e255, 0xdd5422dee2ebf9d2, 0x7a418ce99f1795b8, 0x98b0e22488a10741, 0xcabcf61f9966df2d, 0x58e14076e68fca44, 0x314d7ddf395cccdf, 0xcc403b46e733f565, 0xf15b35f8d71d8a1b, 0xf5f1b4f972ab688c, 0x1b3b1dea29c68904, 0x2c96354f3f1da731, 0x00f045692ffcf9f2, 0x03ace2077bc44510, 0x7da8d3d81d6d03f5, 0x5cd83bc6f5d9f8ad, 0x7c04887a3721286e, 0x1bd151e4125406cc, 0x888c7ad9ebd8ba2f, 0x143f637fe24a0da0, 0x6b3779a345052dc8, 0xbef717c4f597384e, 0xcbaffc196a0aebed, 0xc7c4c469a8d9c20a, 0x4e260ce8512295c8, 0xe0c9b47f50817abe, 0x0dd940d3900eaabe, 0x91d566526665dcf7, 0x470d9079c9c2fc39, 0x388867f7a6d48949, 0xd0c3ba55686d3d6c, 0x9880ba4fa1a0765d, 0x31178f63e3c86654, 0x1ad2daa404e3ce35, 0xcb816899daf1d758, 0x943662411dd67f38, 0xff9592c29d8c8150, 0x123d9c7fe4bf5702, 0xae43ea51c75b1793, 0x9d3e98a5ae2bc556, 0x601f8a748a24ab05, 0x3b090e60d6837c99, 0x6236755d48edd559, 0xefff911ef6b222b5, 0xdd567e4aafa31faa, 0x98bcb00b24a47474, 0x7cb94e6af25671c5, 0x5e821e019dde465e, 0x7defecfae68e913b, 0x915811752db3d4e7, 0x8be3d8cb181a5e8f, 0x95915979542f5ef1, 0x49708c505e34e6e5, 0xda4c55a2f6d1dc53, 0x2474d9abbfdc58c0, 0xc83b01a3120290ea, 0xd35cfa7833fb3e6a, 0x26dceb5adf4d2e72, 0xb4a95e8d3921a995, 0x05f9eea1de04e25c, 0xe1b3f728f77a0ea7, 0xd0e72bcdb5a43289, 0x532a242c481e5582, 0xa1af8ca39bd360b1, 0x92f5d69ceab3c9a3, 0x4d3effdcb8889428, 0x1a41253f9e794758, 0x8ddaf31b28db2834, 0x0e183ef313da9b9e, 0xd9b3f3a53daeee98, 0x39d555f28e821b18, 0xc5209431aed2ea3a, 0x70836f760f213e2c, 0xc8eb3a1a5b250610, 0xee225c27d0fae90c, 0xa3d1bfdb2f236eec, 0xe4ea1d3c6ab29fb1, 0x3e4113121d92c559, 0xd6f76fcb55b71cc9, 0x9b2c2b439c547467, 0xd4c0eaa7c33240f8, 0xbfaf66aa8c19832f, 0x9d3b1f5a4b810871, 0x01635ec6f0d79ff2, 0x60ac5b58598a106c, 0x487dd435e33b6846, 0x60bffd49a79cc594, 0x7b94ee39d7958a63, 0x146e9a412ad1259f, 0x7f78dd55015459f7, 0xf9b4611ed19dcc7b, 0x02099fed0debcb39, 0xfa2c53462f353c9f, 0x4df9be5b185dac4e, 0xdff08e1bb6197444, 0x68b8eef25269c9d1, 0x29375f899bfdf95c, 0x717425fb6cf4f3e7, 0xd05924d6516744ea, 0x7767e15c3f0ae746, 0xef09e001fbd96e10, 0x3ede191e2efd92b4, 0x2870f7b6584ed8db, 0x5b865889a51ac221, 0xc48bec3f1fb51b24, 0x221b7a0a04040567, 0xea1db24529ff5064, 0x1258c51e195e4518, 0x84789f760d2611c0, 0xff729a0328c4f026, 0xf600dfa2d51a3a17, 0xf90dc307cb46a03a, 0x9a7fdb17130b19d8, 0xbc52f4ea57b7260b, 0x40a6a8b285f3ab9e, 0xeb3cffe125e5ec4d, 0x7403e102ed00cedb, 0x73f6ef7ce2d178b7, 0xcffa6b38980c2e97, 0xd0e796c209ee19d2, 0x592bf15bd1da7c94, 0x39d26577581c4aac, 0x5396af489dd6bc53, 0x93258b2aa667f2e1, 0x848a94686c6f7296, 0x03e62afec9dee865, 0x07f2fd48c7715faa, 0xc15419583397837f, 0x672d8bce0e659a4e, 0x30773e75d88a6236, 0x87e89b73fd6b2338, 0x44b36eeaa3cf1782, 0x463715a899473e12, 0xacbbbf9ef34f1993, 0xe69201e06e0366bd,
+    0x4bb133053a2711e8, 0x3d8a3aa545b44de0, 0x76c4052be97df9b4, 0x764d1f80780c0dc3, 0x2f857803d304866b, 0x6e4bcb1fe6dba6f0, 0x203bda623fffa785, 0x4d4ebd02bbfd90f0, 0x4bb68df8e27003ad, 0xa8f2c6e2fcdd7a3c, 0xd671f8629f1e6a49, 0x8c5bc906af678f51, 0xc15daa98abab5323, 0x5e452b96ea600fd3, 0x8639b76f2b8701d0, 0x38d0efba497d2ed2, 0xe58138b534ec6b00, 0xc2ae5081bea98827, 0x94804a7d23f42a72, 0x2aeb670b22f6b6bd, 0xf82c45c2f9056dcf, 0x90688fa9103a686d, 0x033d4d30d160e812, 0x882a3ece84fec7b2, 0x3e6a772e2759e4da, 0x5e8c67943a040727, 0x32d55a6250431795, 0x7fc3e1139a64263d, 0xe43b8cc338035f25, 0x2dc5cf5c635e6b83, 0xa28357ae64955e46, 0xdd5be08f32c7243d, 0xe444c100eab4a18a, 0x879f71ed16556a1f, 0x4946e311697b872c, 0x3b03ec5646dc6737, 0x50dfa7300f6d9cc4, 0x8e7f48c57f42e905, 0x3649232984345f09, 0x8f8455e58304e686, 0x1ddecfecdd046180, 0x3bc6a932474b2a09, 0x350c7f543f5ccce4, 0x657e3a0eb2152346, 0xf71988c12b0bcd16, 0xa3e5210ed52837b5, 0x1cc15b50de742d38, 0x6d951d94d09d05e0, 0x0b62b88308358306, 0x7d8d89a552fc7b2c, 0x2ca923216a4670b5, 0x9b9ed6833377a5c8,
+    },
+    {
+    0xbb80ca3eebdae7b4, 0xf5fc0d7464541a53, 0x9a2c38654912cf7a, 0x021a17fd20db7bfd, 0xbc058265765c6a1d, 0x8b50b42412f8dd1e, 0x61dac7b7556a005f, 0x0df84fd583671469, 0x58165564a89bf07c, 0xff19c3a56cfe3267, 0x858ab38bb08616d3, 0x645f43110df8c740, 0x30348b0389c8d7c9, 0xf7d9a363581d8eaa, 0x671246f994cd2296, 0xa806a2677b012b0a, 0xa3c10c4870db951c, 0x56034551fe978bc9, 0xb06ee3c60f36cb7e, 0x632d0d010f3dc94d, 0x75d975398748a7c2, 0x6b90477488a987b5, 0x9fe5b16871658519, 0xf04888c6a874558a, 0xb138a4d0d84d3cab, 0xb11a09eff6ff47cc, 0x4eede1a796b9f4b0, 0xbb484f86d18aaecf, 0x2a30f47cfb79fa2c, 0x81ae01d95ac19546, 0x994abf9277a54039, 0x8ccdf00781ccd895, 0x7a0f04ea61661d2e, 0x89c3a67131ccc7d7, 0x3c45517546a7ba31, 0x22fd6f368fb15fa9, 0x9de7fbe2b23b406c, 0x9c781d12f148508a, 0xc3a45b7d9f3b9a66, 0xdc9e5ffdfc3dd94f, 0x2e7571001e32d2c1, 0x50e37ff45132e76c, 0x470f8344ec798a14, 0x4935da7cd2ab3c1d, 0xbae60625dbb8865a, 0x17c9776beba8739b, 0x43639b7937f12895, 0x6be16198b6f962c3, 0x4d91d6ef47a9a3db, 0x2dbdead3f22822f2, 0x3c81b7f0cfaaba7f, 0xef3a37046d27f6f3, 0x31d68e3a049ac326, 0xa8c44e710a99aa40, 0x93be20d0a0572ffe, 0xa38e5189e299539d, 0x3bee2001ad51ba22, 0x46edbadd327d48d7, 0xba8bfd9e4105d214, 0xb60dae7c54d82759, 0x7d93ed61474ac119, 0xfe57a9875ef3388c, 0xf27d7420dbc63872, 0x5f026dd089f44ca7, 0x04ccb7959ff3d70e, 0xf1c48fb27f2396ce, 0x9be866e8a8598eda, 0x6ac8d846c3e15a03, 0xf67e5ccd61c63b36, 0xdf30033839e8d09b, 0xfc1af6737d5bd799, 0xd670eba2d160fb3a, 0xe2e38d82c480922d, 0xd9c110c0b7454bb7, 0xcdd114994b88c8b4, 0x99a7cf833cec3774, 0x66f2e51c71fa8474, 0x5ab5e8bc8b4b961c, 0x8079e6a7a2c87c6b, 0x346759d7e0e87e26, 0x3afb97b58da05ee7, 0x29c3b79b60a81743, 0x673a121ba778462b, 0x58bb6651c3676e58, 0x21ca8f3b3717e87a, 0x48eb7f1e72e2d6fb, 0xf1fc8baf3eacf652, 0x6b60026b8bc860ef, 0x8d63de031fa90638, 0x1e5e8d6237d49d97, 0x38136b31c0b6215d, 0xe4577bee2ba3ac08, 0x82f23249ef3f3240, 0xfa7d83521c2cd147, 0x0b0c9b7d0a3e865e, 0xf91c445ceb6d8de8, 0x20c4aad83cc1588a, 0xfa18db09e7aa57b0, 0x9462bde17785da6b, 0x58de5861f9e1e0f0, 0xe1ea5249d05bc6b9, 0x64122980de6a88dd, 0x2dbf98533c8dda27, 0xf4a3f494cbeb0826, 0x0da98b56ec88bb70, 0x0f3023729baf8a54, 0x2de585c4fbd9726b, 0x2e75bbaea4864189, 0xd882334004de7a85, 0xb2902adfd01a3826, 0x97c3aa5b950b920f, 0x46842894ea9be2b1, 0x21eca237fe98a7ad, 0x3b1b381c09d5fde3, 0xcd80bbf3dc0a4f51, 0x81a22e80d737c423, 0xdfa4b9c3efa2690c, 0xb6640950f3512262, 0x8ab28a948a71fd7f, 0x1dcc9fa50918eae2, 0x2fcddc6cb9c448d8, 0x2af05f8c76f15e5b, 0xcd239ecc0d4ab1c2, 0x842916ee4c7bc90c, 0xa0e0067d3a2ca83e, 0x9c057b0a000c604e, 0xd66eaba1537d6547, 0x0fe6b8cfc9d6b2db, 0x7bca27e7f4463505, 0x795772f9d573c8de, 0xe7bd9d26f2c5a659, 0xc647ade7c3e886e1, 0x555b27d9e409c407, 0xbc92740673659089, 0x9937bc604b77b522, 0xecce084bccc0c015, 0xf040a0322db51be2, 0x48774313fa139741, 0xb0ef318171f2b231, 0xe1a8325d453addcf, 0xcff3ab63cf4e5300, 0x53d93408ab26653e, 0xb62ec45defb84ad3, 0xefa0678ac0ae466f, 0x3b77863961b7dd21, 0x9494470fdb1023d1, 0x7fc8a6630e395ffc, 0x34b2823c1fed2f01, 0x52bac1ec78029fc6, 0xb5b3dc553d87f40c, 0x8123455bf05c58d6, 0x99b48302e79f7585, 0x13599a21e6e5dbc6, 0xfa9ff5b4d2450b6e, 0xfeeff34ef2cec325, 0xd06b665f50c684a8, 0xd79c8e679afe4e42, 0x396e4fa2fd3d0219, 0x2470437d8c870bc1, 0xc409103bdac24398, 0x716685e54881c123, 0xa682b9ee57ceaba4, 0x5b072e58695c9d2b, 0x10bc4ff5df48b7f4, 0xf34e0ebbedb97531, 0x67b217d3fef4a126, 0xdebd2bfbb4d93189, 0xac02afd9ed15866d, 0x58b5a2be17103744, 0x52d898777aa0eb3f, 0x5ffc2287b414c08d, 0xcc4b3bd481c388b9, 0x9178a0523ce86cf3, 0x4052d981085790f6, 0x9dcc9bf08a7c81d1, 0x63c090d225c1ca65, 0x10f5fd1d5c89636f, 0x86fbfb7d0d9fe33d, 0xbf0ce7f29378ada3, 0x285b12f15fca3646, 0x48f4bb45a0ac2d95, 0xd03da469351ed891, 0x6445027b3770e1df, 0x619121108e91a7e5, 0xae25db3c091700f8, 0xa420a3820bce4cb3, 0xac925ff9d73c8aeb, 0x938a9d2d71e5de1b, 0xb117d661029ee7c3, 0xd26c8298488fe9a5, 0xaaa6efe17a616f72, 0x0d762246d5447e63, 0x8ca2e2259cb0807f, 0xd4ee85540367fde5, 0xf48c06b4576464ca, 0x8939ce11f242d079, 0x61f661397ee42812, 0xb9c933b237d256eb, 0xef6462b82accd495, 0xc841b19f709ee355, 0x40a83507a842a821, 0x81ebbef507433d5d, 0xdbf8e966e48b8425, 0x6fe079d5474e7efe,
+    0x95164bf3a6925ba4, 0xc606b1562c361bb6, 0x5e4478e57790321a, 0x72681d0c09b894d9, 0xa76b0fba11901df2, 0x31440946e0469413, 0xc007a32eb76038c7, 0xab2153b29a4f0afc, 0x1f260a34d801d133, 0xd478c19f40b0bc11, 0xbd507cd60448b636, 0x8bda5189b7e59cf5, 0xffe796e29958ea74, 0xc4497c21ed10d1f6, 0x0969bce43ac75030, 0xcfc56de77cb6a984, 0xc562800010f354eb, 0xd5412ac41f5e7a68, 0x3adeb5267c1c4c5c, 0xe4f9374056ed7da6, 0xc0145376822450fa, 0xb49b18b4d61d9c87, 0xffef71bf89364602, 0x3eae710f3dc5a3a7, 0xa3d8d45b2acbeee2, 0xe0629b6961ad0ae1, 0x7cbdd9e4aed684c3, 0xad7465e6350cf367, 0x4bb75782cd44f10e, 0x0b779338049be08b, 0x9f4d62c6a7f950e5, 0x7b77447b20b2c5c9, 0x1c202fb8a66a3b97, 0x6a42dbcbda5bd100, 0x30092bd58ff3f2b5, 0x4932990285689a17, 0x4eb3bf0664008056, 0x7349d0f71c402501, 0x1ac5c7890be1648f, 0x804e58480e1d9f03, 0x7745715195b33cc1, 0x7acaab2ae047de17, 0xbef967dd731932e8, 0xf39b36adf14f5f39, 0x54de75433d72679d, 0xa024427954257afc, 0xdc1f2602b79f60f6, 0x6945f75ca9b85afd, 0x79be4d5842f6e76c, 0x8587b10172ed4972, 0xfd94c25d96f769fb, 0x58fc9b5d7e109c27,
+    },
+    {
+    0xa88a7951f6af7c26, 0x854223da3e20c283, 0x2b2e5c4b70cfae4c, 0x63d43562ba946eec, 0x3a222d1e4bd67ede, 0xecd349cc5df57d11, 0xd8813655f95d202b, 0x8da9c54f840b6f23, 0xe19c356b4c7b0e4b, 0x33f801964d3e714d, 0x35ec2635f57d0e59, 0x2c9ad2755c85c608, 0xcd9005585557a085, 0xaf5a46b900bc2133, 0x9586a1d04597e820, 0x092aa13847e86510, 0x255145c434b7e9ed, 0x3be538ef4cd81013, 0x75c1a51b0c0a12af, 0x97c6bc4285339205, 0x71527e929440d1a8, 0xd89efb27b6632cc3, 0xe37e45bcf5418690, 0x9c19764bdb37ef75, 0xed923b87b16de861, 0x285c43a3fae7fd2c, 0xbda2e10ae12bf7ac, 0xf1d3e73d1c39c17f, 0x5f476ec09a2b58c9, 0x9ed9986bc0418584, 0x428210cdf58a1cb6, 0x99445397fabf0c25, 0xca1937c189d93e58, 0x16fd348ffd5d396a, 0x818f411019500b44, 0x8a950a743d53ad01, 0xccc6eea71c3bdea5, 0xd4c55e5e176b9c4a, 0x3220310c80c91a0c, 0xbc91c8af3025a7fd, 0xa52ac42dce9de1d7, 0xf1d40470602653c2, 0xa27709831970c604, 0xd9f77ac5ffa4546f, 0x33e08cda8037240f, 0x96291c5479dbb9c8, 0xcf93a6ec7a12e2aa, 0x3c414a99ace70849, 0x122e90df962e9ee4, 0xf29546a83900111b, 0xec5663205fbfcf62, 0xe54ba5b1804c55eb, 0x29a082609d1b4607, 0x159d3fcdb03a178f, 0x432d87c0f1810558, 0xd671f5b26b5d3965, 0x54ce81d17eb43632, 0xf82d6c60b41a7546, 0x1164d69b71895a9f, 0xe83730e7cf0d4fd5, 0x1ec8d8088556425a, 0x9fe70630541a2576, 0xd0b459959e7aa521, 0xb5f8fd1ceb0d643d, 0x3f264f978c5656e9, 0xf43dcafceba77ce3, 0x75cac4b48a08888c, 0x921d45dd4616f967, 0xc213d9e1ced5f9f1, 0x19663c87a2146214, 0x7c27f197147eff89, 0x7dc892acb09d55a8, 0x9e5229b1d98ceae3, 0x4edf950bd9d0bc8c, 0x4bae80af91b8f299, 0xd4033b0de084e625, 0x087bc700c8b74054, 0x28e1a48622969996, 0x7f7cd7de54d8b186, 0x88c3d303be96ab69, 0x9b45d7933cf40caa, 0x6df9872317ad2f89, 0x024a9ec77017d4e5, 0x165ba6c512af3185, 0xcf84b4c2e74de3f1, 0xbc697622ec5647ae, 0x706da3e4a61fe96f, 0xd9d3f9db778740f1, 0xf9c0e5eca2f2bcff, 0x2d524da481d9272e, 0x16bba40a6cce9ba5, 0x750e05ec675e93da, 0xdc93851adb541514, 0x9f1c7da1e981d778, 0x190070f1a5cf108f, 0x2c888733a270fae7, 0x39456e2ae70645ab, 0xd4c9d08cdcb1f71c, 0xc8196c614ca3e3c4, 0xd2b3cf7103fae0e9, 0x8836fd84df92bcc0, 0x6d50b0d36d72027b, 0xd11a6103e091d19b, 0x31b7b9817144abc1, 0x7f7052bea785c0f1, 0xe427d056c016e098, 0xde52125ca177b382, 0x97e2625bd4b40471, 0xcaac8129ff6244ea, 0xe2d1883196dc692a, 0x5b2b56d6cc7f1e5b, 0x6fcf8aa25135af49, 0xe6101fb2e7485b12, 0x43f699984ef3c842, 0x9063be433e05bb12, 0x3dc329422371eaa0, 0x599eb4840a097253, 0x89352561c10c16d8, 0x635bb8b0d0f2b10d, 0xd782844495ae23cb, 0xc3600266328f4a5d, 0x4abcc70311e5544a, 0xd7255c63db4e07f3, 0x39f379dcb004d6de, 0x82b2de870ad576b6, 0xf77baccb37f7a560, 0xe69c1c3b10f96c4e, 0xce668e8e4b8bdfb7, 0x5861502a49b34f17, 0x478844ec4a8d5d66, 0x29c8670c21feb8b1, 0x07b62ef8da3d3f98, 0x03198d5e29bb4811, 0x1d7cc509bc687944, 0x0f4bf147856e231b, 0x9bbf2c791c6f62d6, 0x97ea02cecb758f59, 0x519c16e96124ca95, 0x4590342c9e56d7e9, 0x6c3162803d692c2f, 0x77d690a788f2c008, 0x5b26afe0c406bcb0, 0xd2edea6ed5cbdea3, 0x23312eaddd21053e, 0x25b89ec08450bfb1, 0xdc6c41322241964c, 0x84b621796cddbc43, 0x7aa80d38b629966a, 0xfaddb1d7897daebf, 0x52c9251f7c37b125, 0x660c122e18c6a660, 0xed69ac30d66ce018, 0x417d46843cf3717a, 0x2eb3c3e35b5d50a2, 0x5236d099920e9c43, 0x82e41f78bb052814, 0xa6c70570b1dc8f20, 0x7c6c07e98c8192d2, 0xcfc74b2d5b86c5dc, 0xf1c4bebb6ea53836, 0xeee8de556fe348d8, 0xbeb43fa9f9ad4d1b, 0xd7bb5de4e32801ca, 0x331badb0d8fb1447, 0xcc484b71eb42660e, 0x08bb5bccd9b6cb79, 0x061a30335f58167a, 0x30ae0e1ee7d10f15, 0x0575c89117a048ac, 0x4cb57a8db1042199, 0xeb535a2899c5d90d, 0x6ccb63317da9b90e, 0xc2e045cc5a36ab33, 0xd72daa0318985a3d, 0xa83e48709485bcd3, 0x88bff618e9ce00e9, 0xd130f20674e94346, 0x9c604d925b7c78e9, 0x57f9630b7996c471, 0xf4167ed7bf51929d, 0xb47df98e06143ad1, 0xa91de782989a17a1, 0xfa450c2a983a516c, 0xd9f1ffc1142a974e, 0xb12fa960ef7acb7a, 0x9d1145c686991175, 0xc843bf8185df5cf5, 0x999f7f2a410af422, 0xdc14ac5eec660cf6, 0xa7305a02180595a1, 0xb026a76f4950433d, 0xaf56dbdf42def30a, 0xc9f77d35c81844d5, 0x3e3dbe0b889f855e, 0xb7c6251ff5793431, 0x29970f12adff286d, 0x0f4256a46c9509a6, 0x861398aa76312757, 0x80117a92346c98e8, 0xcb941de6d36d9a8d, 0xf96763bbf815159d, 0x9dee36d76883f2fa, 0xce293626a8588322, 0xf2cd528f36deb9fd,
+    0xee498833047760ff, 0x1735ef925ff3d8c9, 0x900e8a7581ba1243, 0xee66bb7219040439, 0x6916ba9919ee8980, 0xa5bf86f79c70dbe2, 0x6a4d218937106d41, 0x75b536d8a0e5934f, 0x15bfc0732c3a95d8, 0x60d202befa86f934, 0x8fce130f3a4721ea, 0x1dd04bc6bcddfe7b, 0x6253416ae2d529ab, 0x7942ac94c61a1103, 0xe30727ae935ad303, 0x198c70f055a0e11b, 0xc02d61abd4e871f8, 0x2d7fa01655e098c7, 0x9a18702441c63e27, 0x89f16573478cacbe, 0xf61b4d689888d5ba, 0x05854fce331ed312, 0x25adcf465754d6c7, 0x70e46a973b538cb5, 0xa567e24814e5eadf, 0x91819099ee2f7f3e, 0xa3fd1834aedf96f0, 0xa66c001124586041, 0x1de1d70453fd3ffd, 0xf08f8c9a3e638f4d, 0x5e7d8f4f27e06fde, 0x753ef15506e33866, 0x70f4901b22561095, 0xbf657c8be01ac8f1, 0xe3de164e00ab04ea, 0x88fa9dacc90e5e8e, 0x807af1339e75e9b1, 0xc2051e7c2d63f73f, 0x2aec04cbe0b6122a, 0x1f75fa1370b3c282, 0x8408940d0d46a3ec, 0xa7f7bf86e49f72df, 0x41ee278d9e440b0c, 0xff2158d4506077ec, 0x577a7ead4560808c, 0x7528a71b079c5f6d, 0x5508b7bf4ee8a355, 0x36cb40725f579ddf, 0x6f3628b1c380efdc, 0x53603471e5bde573, 0xb64003bd645a150b, 0x057df07f11dfd440,
+    },
+    {
+    0x9a77c7345cda77e2, 0x1ca85a2de1696bd9, 0xa5d9f1910f778ee8, 0x0f0568887c6835c4, 0x24d690187d3653b4, 0x16910eb48bbdf5d0, 0x91ad593d66ec3534, 0x9c84391fd555b7ea, 0xb7fbb2192c63fe68, 0x7cfed94f758f1072, 0x18dcb3b59a47ebc5, 0xa55f240d78072f6e, 0xe9c6c21cca2f8688, 0xe19eac5f19e46ac1, 0xb2682eda034d9cac, 0xfe3158c2b0cccc30, 0xd4893a7d8dfcc06f, 0x45758e32476450ce, 0x566595a64c4095db, 0x1c4d3bdf1b7f3f6b, 0x5a4a224a87582225, 0x67b4874cfa50d87f, 0x4afa51637244ed08, 0x972730da8b35e3d8, 0xc69ccdcf7676fa8f, 0x38a300f1a694a5a0, 0x58e578aba836edd9, 0x66ed06828b3cec53, 0x90fb57d1165837e0, 0xf8a28b42413bf900, 0xef361c212eee9d10, 0x584b1ffa73cc8cfa, 0xcae9e0a934eb7d6d, 0x54167f5166af683e, 0x0c86f4cc8b19543c, 0x6e895e81dead892f, 0x4d3712e2ce5aade8, 0xc1f214e71386fc5f, 0x5a919420900ce0b1, 0xe0d695ae0e37816f, 0xf531bb6a312b2844, 0x2ed6482bf280f375, 0x6f2cc06de0ed8798, 0x67ac3ea472c443cd, 0xf78f10414e69031f, 0x64a6fe0d3752f133, 0xfc579f78ad9c5fa9, 0xff7a4136a5887609, 0x83e6fa63dd05fcfc, 0x68bfbd0be612a1c0, 0xca9425599a7540e9, 0x95726b8f12208486, 0x79044fd8de167316, 0xd7a9dd6654df09a0, 0x7c5e2c454fdd3efe, 0x987a944a6698f80a, 0x14180a87e1e47dc1, 0x6e20e89e26bde6a3, 0x4729dc95fa0662f9, 0xa821bba5ce926e50, 0x25fda875eb4a26fc, 0x420e8ab43d767572, 0xbd72b87e93afe4e0, 0xc613165495fb12dd, 0x6c911c526cebf7b3, 0xd5375557a5e06a20, 0x7827cec995f9a783, 0xaf5bc58a7a6256f9, 0xa804f99b2986c91a, 0x090093da8f2d134d, 0x23a07269e2b3bdc0, 0xa0d112abebc21ead, 0xea60ceb3baede59b, 0x8edcb89e1449ebd5, 0x027227159f41556a, 0x818a8f349b46f3ca, 0x53c2fe4592c6d0b7, 0xad4c079cdf7f0f8f, 0x29ca4e889b080e1c, 0x0b24ef9197d62236, 0x8407220fcb49ff43, 0xf1ec64ff67d2a53d, 0x3257f1ab7b151e38, 0x473dafcfc804d842, 0x4840a949d38ee2e3, 0xd545611202ed1527, 0x2629c7420b29415c, 0x98dfbf9765f495f8, 0xa89e3299b7ff0dd2, 0x0acdbaea06493258, 0xd9fc11c216a06144, 0xa742f8c6441ceaaf, 0xa9bb953a7f594f36, 0xa5d25771ab29777c, 0x07467df7ae8651b1, 0x761c1e03fcea8625, 0x4fc0ceb9103e332e, 0x194f9eb3126b3469, 0xaaf201319cba0588, 0xdd357625e56d8d4f, 0x4c59e1e05c9c6e3c, 0x5de60410b28490cd, 0xf7b5fca38dbe10e1, 0x3cc4669d484a0e58, 0xaf5e4a1497b92520, 0xaba9ae98f8eb01ed, 0x23dcb749536e6d96, 0x176cdcb765585dfd, 0x9fdcc03eb6cc8aee, 0x7ef90df8d70ac032, 0x869ea534767effef, 0x3192464580f307c9, 0x0f364ffd58da0220, 0x5241ceea28126c87, 0x93a33f579362d687, 0x0ba283ed8c936244, 0xdfeaef961188fc6a, 0xc5fd5d7d19be65aa, 0x6cf7691fe8b97941, 0xb098cfdce8f83ce3, 0x2d2efd8e86efc5fb, 0xc847895f32509250, 0x7d5a439ca9805332, 0x6c73f8404c1d1b78, 0x63537939023b3534, 0x8f8c41a3790edbba, 0xbc07c56e04f83c0a, 0xd7776d8ceaa173b2, 0xb0958a6df751ea82, 0x72a49d76e3fdaaaa, 0x9a8755ed0d97c66a, 0xcc0212498aa9cf50, 0x49e15b32e397894c, 0xe7c9c8e6bbd3da89, 0x10320696fd4d50ca, 0x2e8e5e50b61234de, 0x65c4cba41a7c3d6c, 0x611009b13d54d04f, 0xf8a3147d1f5f8703, 0xa5e55f0497be92f0, 0x40972e0f332a88ce, 0x993e6b327b8b1b75, 0x3106bd73d7354afb, 0x38d21d37d5a32391, 0x23f2e73b6b532c54, 0xaa59ee2a4c0c0ae4, 0xa872464eebe07296, 0xe17597ea6d64ca1e, 0x7e81473b38c3a278, 0xfe779897a3a0817c, 0x6d39781d430c4199, 0x32f38244bdf00c87, 0x9170f5844adc6a17, 0xe7702538eab72626, 0xfc59dcf970709ec3, 0x4f5a5b877dffbb7e, 0x522bb999b577b0ae, 0x2f88f8601b6f03ec, 0x9359cf049c509223, 0xfbc59ca041039433, 0x0a130f79dc2c18dd, 0x2828b4b5c02cacac, 0xd323880c43c9088f, 0xa9d0fe1a5b95c24c, 0x4709d409107ce76b, 0xe3a966ae902a7db8, 0xe2d8b08e6ce7a27b, 0x87342131b457b8a2, 0xec10987c7b4e5237, 0x0686be69892b3d8b, 0xa10e34f060579ac4, 0x25b4f4d8893ef395, 0xfdf80694a47c1318, 0x4a7aef81eb0f0ccc, 0x4cdd7f1f2e26765c, 0xcdcd19799d02f601, 0x2a73302b9a783cec, 0x41c90bcd5a4048a5, 0x903cbaa13552c646, 0xf21e5360d91f720c, 0x61eb5b8837518aea, 0xc329fc22f184568c, 0x8c7bae6e28c5a811, 0x487b2e663c332db1, 0xcfe6e9d0990c269c, 0x2d470bff7ee5ba04, 0xdf7a75e3b806df45, 0xaec2713615ccfc13, 0xe9141f72150dda5f, 0xa43273d143e08a43, 0xe99cfbfa3c6c6ad5, 0x9c79062ab5592dcf, 0x4df4a40c0f04e044, 0x52dfa998e2dcd87a, 0x747314f727da3567, 0x5514e3fa427ea9cc, 0x4da2a99d17b80a97, 0x1d399dae1e0500f7, 0x48c22762a4b945c2, 0x90cc863a18f4d095, 0xfbc1d454dc9bd7a9, 0x5f20c7f4035e7272, 0x44169fc6292aa860, 0x8bfa97163371ffad,
+    0x0e4293b569cb4d08, 0xb859b978360af10a, 0x07f6a9aab3b807c8, 0x010aaf41927d14ee, 0xd79589856a744970, 0x6baf36d0195c5065, 0xb810cc0627c17bda, 0x53a7dac8761bfde4, 0x4f754280adde73a9, 0xc4220c627d177648, 0xa5b40676ec727b56, 0x188beac8841560f9, 0x5efc63712cb61fcc, 0xe08979b6232ed50c, 0xcc3cb06cd0d7bebd, 0x5d3aba8695f9b103, 0x8b585f9d522fa83a, 0xaf7abb7baf0a4de8, 0xc1485c2ad8193e3a, 0x920d19b1f34ff0c5, 0xda68cfd7d17036ae, 0xa4bf1ca38d152d89, 0xb7f8901716ce010b, 0xa07214f75395d901, 0x752d62b076fd1fd2, 0x997e7d2bbc6fb233, 0x32c2642bf5bba727, 0xb7565ca330a2f23a, 0xcf30d62aa921e747, 0xc35e14e2fe9d2f9b, 0xf041eb4dce789e7f, 0x1ede4e7bb4ef9106, 0x4c7037fd27336b12, 0xb3cc7c62a252d53f, 0x4eaa8a8db5134ba5, 0x386609a59b8caad9, 0x24572e5c57a6a540, 0x952f057e75b2d9ea, 0xc451a53b0896ad06, 0xced825ff76147b56, 0x157888dbbff3c714, 0xfe327864760f6a02, 0xe14812fbf2c9f4bd, 0x2681e1b1ee53bae9, 0x9cd66fd424ab5257, 0x2d75400927ec4c76, 0x99f40c24efe79cce, 0x76cdc64580b62737, 0xf10725bd0cfe4778, 0x77fb36ae946625df, 0x77ef10083f683fd0, 0x05eeb4a1ef54449e,
+    },
+    {
+    0x2a56217db4e684e9, 0xefd9be7c8ed05934, 0xd5bffad1bd8f125c, 0x03fb5363e1edfb14, 0x1cc6cc64c4d0ae1f, 0x0a42031ee4292ddc, 0xf301f9059c9f18df, 0x1af5471bd5244a32, 0x7c7ea2e05b9caf0d, 0xfd14b6b8f328dbbc, 0x3c8434c59f2ac51c, 0xb2c4539718146cf3, 0x7c57e402dfa9d30b, 0xe32ea9139bbaa8fb, 0xec7fe2183e1047e1, 0x8eb981ad72a62dd6, 0x433a2771754ec560, 0x5b4c387fe5449a87, 0x25090e65b33c7fa5, 0xfc479b37a179ebb8, 0xd4bdfa34464c1ef3, 0x5e561efcb0eed040, 0x39a48202df8f4c7f, 0x598f1a20ab01585e, 0xaa2d4a27ec04a943, 0xf8c313551fe9b622, 0x4ae1ceea76c17a7c, 0x6fe875a68dcc86e2, 0x447ad01c13e542c1, 0xe400f20550810995, 0x96d49f014cd98fca, 0x5ed385299c7b4dfe, 0xf76b155e53afbfeb, 0x5e04d2ee1866a1d3, 0x7f2947d0cc9fbdaf, 0x32cd48c6924917b2, 0xeda920681759dac8, 0xf128a276a5875d34, 0x7b61d4508840d642, 0x56fcc9b9ec0c5b19, 0x286ee601e0677a6b, 0xc2d38be64e18d90e, 0x97b0f73b230f6c6b, 0xe51c164458c48dab, 0x5e8c90d219b3f05a, 0xd4f9042b2a886a9c, 0x4973f70976bec492, 0x8d125fc3576e1f4a, 0x7d04238d15b03dbc, 0x1ad3e0fb8cbf01ac, 0x552889b6f89b226f, 0xd3be2ccfe71bad98, 0x194c18a44225502b, 0x3c1342293f7d8758, 0x3c55a71842d822e7, 0x429ef302612a85f5, 0x8778fef06d83323c, 0xd4a6dccde826dc6f, 0xd08961f8bcd0aae7, 0x8f7bddf23e6e5943, 0x336b311a93c6c5e1, 0x0ca66bd597df1ea0, 0x305b2bd1379da391, 0x44ba8703ddaa9264, 0x19df8550c98bb110, 0x654e939254612508, 0x86c299ab6ab25a0e, 0x7b0890ae521e8533, 0xbc43e5ab2fe50a30, 0xd92ea375251cc568, 0x148e8f2917940c46, 0x6cd9dc6ff544fe7e, 0x8b1e28ab642e561f, 0x660776023bc2cf22, 0x34c2dfa5b20f690e, 0x8696dcd5d31e8443, 0xcc4fab161a9769f9, 0x1666e5268191f4b8, 0x190f9860f9715459, 0x9b96062d8ace3903, 0xf9769277ef1e35cc, 0x84da71550dd5e12e, 0x28e9114a9d82f629, 0xf0123e1a100c2b32, 0xca7b2c16287ff884, 0x9e033549734d526c, 0x1047bdb6d289b437, 0x7d5e90dbb8dbfdb8, 0xe7f5db555bea587a, 0xe0813e5683b7c932, 0x7f0cf9dec9364053, 0x4b51e815d18ba7b0, 0x5442aae2b67f3f75, 0x3e016e48f53685a7, 0x418b19bf6b7dae9a, 0xa5cd71e10d2b07bf, 0x77910d9f7ea5986f, 0xbca898a323103afb, 0x9a1a705c170bc1c6, 0xd3af2645987bf970, 0x01e992f0723bf52f, 0x587600e55581ac01, 0x67d16733b2effc05, 0x4d9b139f2edbb511, 0x7254a7b70ccbd629, 0xeb34ca5036cbdca7, 0xef01c6006ea8deb2, 0x52825250d2fbae30, 0x783058f42afbe1d5, 0x5c8777aa43452402, 0xfadb9609e3097035, 0x26a60ccf9c9a6bcc, 0x9c0f764a411520a8, 0xa452a6ef30600db1, 0x07ff2a1a7dd635b7, 0x7421a0fccb49019a, 0xcd366f636226d1a9, 0xc3789659bc65500b, 0x7eadf2fc78431397, 0x39ac21bfaa8fb1c1, 0x1bd1044a1056e136, 0xd8d03bf161c45d8b, 0xa511107ae70ef91f, 0x4578577bf3401ad7, 0x7c747dd3185ac6f9, 0xae0f70b1e3d09eaf, 0x34315f4df64ec30f, 0x18b14e9991d7e75d, 0x2fb793a55cc3d6a1, 0xdeec732db958e157, 0x14822e15ce378be5, 0xa7890eb4399904ff, 0x03635ac694453df4, 0x3db6ebf58882a657, 0xb48e4695b2ba4450, 0xa3737c721d865244, 0x02d14362965a4abf, 0xff08ef4806d880aa, 0x75325bbce2624d55, 0x25c08941a17f15a1, 0x5a9bda82aedf1631, 0x86a6454b0ad89de0, 0x3431e81e69f3272f, 0xedd40f8308cf1760, 0xe02e37d2a28bf538, 0xe9a937eef9393b02, 0x1624ce92539df4e8, 0xabd1b62b1f30ee74, 0xa9cbcba7fa08005f, 0x0cb2ef53a66f43e9, 0x9e4380060bca9250, 0x8d0e1a93db1ce300, 0x38c355289c9e2599, 0x505e684e514832c3, 0x4a2e806b8d273941, 0x9cbeddcbffbe7c68, 0xdc5679abed54d763, 0x415002250f8db2f2, 0x7068cc805ee3fcc2, 0x331e549068e34679, 0x2cf7b17f7c1c1429, 0xf56b5b8e160e6f39, 0xa1a52ce0472d370a, 0xa019766ba1516e03, 0x50d65e1a44c81bc7, 0x2b821396f26b4f8b, 0xca68a52ad5b4d3ee, 0x5b8c38f8751ed2a1, 0x5d7f679a79227e10, 0x554296b41e0465aa, 0x9c93098b0649659b, 0x6e4b0c5f0b38c0f0, 0xb98a0ed0d12d0683, 0x3df67a148a059c8b, 0xc540796f70d1da65, 0x30e979d9a14cf535, 0x2a01c7abbdddcd67, 0x56b9828e1afab21a, 0x8ba914926bf7b789, 0x73ce0e23ec8277aa, 0x88946c2ed16236bb, 0x78b3f48c210ff713, 0x353f05a0cd499072, 0x4ca2b54469718a85, 0x7d49332ae3aa820c, 0xf91dc24e6b26d0a9, 0xa44f11be64887621, 0x350a66d62dcdc0ef, 0x0aef379c4bc78b26, 0x8db49dc3b772f90c, 0x1a6fb16f766f8569, 0xa5e6512c63882797, 0xd20a287e698666e4, 0x67368df4deee5bff, 0x4712556de85489d5, 0x9e7b838a28c71245, 0xe66f3915761df924, 0x451318769b9f2e7b, 0x7a3fdc9968ed3682, 0x6983aae7ce79736e, 0x38d4d5317189ddef, 0x599cf389921caf79, 0xc79bdbb20f0a655d, 0x4f8b4bcce704f85a,
+    0xc348cd9a28c98ff6, 0x6e320be1b5a41d45, 0x8fef58da19bc2f6a, 0x9194b77af87e49e0, 0xc67e195b0c368ea6, 0xad14b1accc6c322f, 0x668645f2798545a7, 0xf6e30036843fcc30, 0x886410567bbc5d8f, 0x818d5b833ca5f60c, 0xafb2568c92411155, 0x7d14aaf154c7e21e, 0x3bc86219a3d8bbc7, 0xc10ebbc3b69e9412, 0xf7b5f7ff0c119182, 0x03bc3b8a327daa8f, 0xcba179fe94ffbabf, 0x59ab5d718f499bac, 0xae52c530c3d2ccd4, 0x9542aaa796d65f7b, 0x82676e2203703ed6, 0xd42ec1caa36c91a6, 0xf998d9cb425d1bea, 0x2ea9111019bfde4c, 0x47c73357f1537f18, 0xbecb28ccb3d5ad74, 0x2ee54b2939756235, 0x926750d275c4ca4a, 0xb0876ebdf17df92f, 0x7e46ab036486d51f, 0xf82281750609d41b, 0xb4bd4ce265f7acce, 0x964c14b31bc4ff70, 0x6d164994e041a402, 0xadfdaeb0d3f5ae7a, 0xce2165e87f4d6f83, 0x6847c6a461ecab8f, 0x81d0538a06450fa7, 0xa421f73ef5306975, 0xd5be7ce8e0cd329e, 0x7dba1cc002b8b151, 0xec22e7c23e5f55c8, 0x9c9d1fb4ec0e97e5, 0x49f970017912e3a0, 0xaed66e59557a741a, 0x0d0d16ee28d6c13d, 0x66af528b4dd156ba, 0xe14636d09a64c4c7, 0xd9aefe8a0b2b52bc, 0x52d78a74baf8f8db, 0x195318f6527513e3, 0xa3506be0fd193599,
+    },
+    {
+    0x693c954182b4741c, 0x68f2b504c79f50f4, 0x9ea2d3fcd31219be, 0xdeb379709610abaf, 0x252c789c065bc767, 0xd53f8b1079a53490, 0xf9a295c7237f72d4, 0x16b079440a1ee6e5, 0x95b90c538974ec37, 0x9f7e700f6115fc58, 0x226cae7b140ad1d5, 0x079694f207e3eeb5, 0xb5d9ca4428146a90, 0x602e940309e3812b, 0x9c997f99ee478ab5, 0x57ce949fa068ed62, 0x2bceddb522fb218e, 0xf6cda5f368302b3b, 0xbc3415845cb3f0ec, 0x6b6b6c1124c65f20, 0xa70c0cf3a07fddf2, 0xa82d3063e783ecee, 0xe45f2bcb8cc21ecf, 0x31a06bb1c2c4c87b, 0x10cb087ac4218670, 0xcee2cfc8aab38285, 0x32dc35d2c9e66a9c, 0x54d190bb546a7407, 0xab83af6298bfbd45, 0x3299f79be4d69bd5, 0x4232803bab2cbc3c, 0x365fc10f694dadc7, 0xab6461f35ca74fd7, 0x4d79db0109543f2d, 0xc20a65a40c1a7c75, 0xc480935ea03366f2, 0x69051ec1905745de, 0xc434caaf41d67fb4, 0x43600d4b75af4651, 0x26a0263f6670d3bf, 0x22a56bf53b14e42f, 0x27903ae759cd1bf0, 0x4d93a0ffe7823e59, 0x51ba7d675e503778, 0x31d1591bc39e1ee6, 0x47abcfb03f7fbe6a, 0xc374032ffbb2529d, 0xc947a9a6d22c8a77, 0x4cbdb8657757deb6, 0x3ddac2d6494105e6, 0x6a71d630068b6937, 0xe91d52f7c517c4a0, 0xca5b5b34e3d39a30, 0xe85aebe2a54f3b0c, 0xa6c6e4855b9bc019, 0x180684b73cf3bf1c, 0x899c91feca383c61, 0x2a86d2bbc21d072c, 0xe360fc3e5090918e, 0x7fe7617c889df4cd, 0xf623d4c6855984bc, 0x853e73111dd8b7a1, 0x44019810ef8f4e5d, 0xa3a3da1cb9b0d365, 0x623d252bfad4920e, 0xd0a7e63d2c3c1716, 0x0ba0707098019a41, 0x5e53c2e2c983c23e, 0x450b52be3d49d69e, 0x9ab447f195a55292, 0x102d78a093d7f1df, 0x3f7c94cf01499b19, 0x33e76d65080e3507, 0x8359da72871602eb, 0x5ef38d12d198cd49, 0x086093b0a719e03e, 0x79d43fc327975daa, 0x40603f81e5322103, 0xa3d0c309f83d6668, 0x7bd49ef8dab4bf13, 0xe8c42b598fbc95d0, 0x40e3f510fd4ee26f, 0xfc2336b4a41d7551, 0x4f05c2ce8cfdec8e, 0xac2751b38bea1d1c, 0xe144cb59ecf184ef, 0x2d9b520a145c8c16, 0x5406f27989bc96f6, 0x23852a7bae978b47, 0xd524b42073f547c0, 0x576b9e7e58ceb7bc, 0xd9fa306c26bc5f59, 0x98fb2ccfdb313b82, 0xdd9af8268276b52e, 0x0ceb16a78c1976ce, 0x36af4961f8495a94, 0x73806309402da06b, 0x95039f7c76076df2, 0x6f7e82fae9851c79, 0x109333447490d749, 0x1f7b22431e0def0f, 0x7dc4611f05707f17, 0x5813730f34eefad7, 0xdb1fcb04572ae4a2, 0x407d37ae2379c94d, 0x4954946ac085f469, 0xf458acbf19e6c1dc, 0x2be788825f25be5c, 0xb0a0c5df6f9288f4, 0x6ea1bbd73d1ae82e, 0x6d5e97db179b28b9, 0x75316e71e46cd0e8, 0x45e01681319d4af7, 0x6040c6b875672aac, 0x7410ff82c09dcd91, 0x7d0089bcb5a48794, 0xa523a1c12fa0802c, 0xc706705b526a1e20, 0xd9f04815c0a6456c, 0x5f4636135077785e, 0x739dc5f1763648ba, 0x7904fcb8dd9bc1f4, 0x1a0ada96d0e5c541, 0x536030bc09131cb2, 0x5ca7a3fd6cd28982, 0x5a547f3cd3550cfe, 0x2ba4bb1c218f771f, 0x0ca35e7f151cf0c9, 0x7df42783cbb3c2a1, 0x6e595392af3971a3, 0x58cec5b3c93fd809, 0xe0542bc9c4bf2efc, 0x0ed113ff0880017c, 0xcb83af5016ca1a5c, 0xe6bcdf6f4858fca3, 0xc62a665e28b71345, 0x7a54581d47c2c867, 0x4caad9fc4b10e1f5, 0x40f2f726071d98d5, 0xc2359aa4ed087f16, 0x9b9aec929bc2c626, 0xd8fd364849d25a77, 0x8c0c8dcb6f8737cc, 0x94a189e5e3927e4f, 0x27f8743006e6c34e, 0xd5a7ee0aa548f683, 0x063e2b21d7d094e9, 0xd4213f38104a5537, 0x71b6218a8230c5ba, 0x10b229265323b4d2, 0x22234374a2734482, 0x569bb9d4b9a86b85, 0xaab0651b418d3b46, 0x8571f8d5871bbf1a, 0x48f130a098213091, 0xdf03437620ff0b3b, 0x3cecf68a577637e7, 0x6a696cfe69915e8b, 0x8dbeb76a195c4bce, 0x035186a897f2a454, 0xfc2c666ace71b74d, 0xd8eb7d118a1f8f80, 0x83047174cd5de1e4, 0x23ba618cb482fa3d, 0x26898e552ea4231b, 0xc04fe13b89da5b34, 0x0cfe71a23468a6ac, 0x58cafe5f25162c0b, 0xdcd2af6d1599e02f, 0x7935c18e94667b84, 0x11816440f6daf103, 0xdb45ed1052ac7f5a, 0x7f800876907b2cf2, 0x47ad23232c79a575, 0x31c14a166b1ecfa1, 0x2016835b15bc7dcb, 0xe36e6f0dc3163bf7, 0x8ac4fd23c1434c1e, 0xea8658a15b3dc7fe, 0x9e097194ae34823a, 0x4c512342a1df82f3, 0xbc466d229b632dda, 0x6f58bf666deb6ac1, 0x1cf0ce404ac53c42, 0x0d7b776466bf4144, 0xd11ec613cb7fd08e, 0xb41380a877ab9a37, 0x0c3f31d6ca1f15da, 0x5c4367d3b7153bb0, 0x008f82043cdb673b, 0xcbc376c830250dfa, 0xd69657607aa62bf1, 0x76c76e0eb1d2e2a9, 0xbda67585c841a49a, 0x935a6956f82ea2d4, 0xf2ca4947064cdf9a, 0x394ae4a2cd6c5e31, 0xffc5b5acf2dd77fd, 0xd2c1e4b30eb836e1, 0xa26050157ce432b6, 0xb4118e6f65668938, 0x4ba5e3cedc2f33b9, 0x5320d4d82a7f3a52, 0x6500f53bfe4e40d4,
+    0x74b48ff4c216ff96, 0x24c60a293dc8ea12, 0x8be1ac3d6efc61ae, 0x467684a7fdae0f6f, 0x8fe8c9a92a7f547b, 0x7824e120587a07b1, 0x2fcec2a5b63c3bf4, 0x1b73d74a5e67ba18, 0xb4581385dc6d258e, 0xf45e65a2291cca42, 0xce06c8be6f931f25, 0x83256e283e8982f7, 0xb9ae8d52823e0688, 0x6dbd2c9b6581a333, 0x84a84ee825e9ebab, 0x512149c44e3a5fa5, 0x4080391afd375a5e, 0x1b56667fcc708c58, 0x758f5051d80f4990, 0x3c7252fd2f439ab6, 0x22c150a13f828513, 0xfb4e2a09669176d2, 0x3762920064169341, 0xc78b23529c6bd59d, 0x76a6d00007ce11f8, 0x1ffd518cc0483314, 0x20a6c708a2ec7bb3, 0x24b4ddd0f6d1b755, 0x298018f86e982755, 0xb0f82e23a1e85822, 0xecac046a868165c2, 0x5fdf358d794a07c7, 0xb72398853a968370, 0x7418df33c3ff125e, 0x172bcb5300b3aab7, 0x2a4048baec8aeabe, 0x7ea98016f7c85ec1, 0x1afc507dc66113a8, 0xde2cbcb467b19ba1, 0xf5a60fe30f9e61e5, 0x25cccf868b6b0e3b, 0xce8a8992473f0d92, 0x2bed9615c305de79, 0x99cd4a3a30b13dcd, 0x264a0526e87720ef, 0x7c7b33cb6639c9af, 0x8b19555ad810fc7c, 0xdac5e39d6f01a264, 0x6fabe6f54bfe61cf, 0xee542526eaea7792, 0xf7feffe02cab8d9f, 0xc21130a67aaf401f,
+    },
+    {
+    0x8c9519953a0631c2, 0x865d637bb43b22be, 0xacb65a491f886c05, 0xfc4d38f8192d9757, 0x8d395f9f6dc42f0c, 0x53087dcadb04465e, 0x5a986f1d06d0fdd7, 0x60ecef2322d82ca6, 0xecb5726c2d321cf0, 0xc35d5a4bef2702b1, 0x5dd2b609c322b708, 0xbcf3b95dc3d729ca, 0x1d9170b9e1453366, 0x186553051a4e25be, 0x2a53d48fb82f77ee, 0xf46c760a61c999c1, 0xd0cc0a9782d8fb6c, 0x527f4ba4e3b5aeaf, 0x4bbe8f93317c3443, 0x045620b59ed19311, 0xc7b0c55124f293df, 0x4109b240dd664668, 0xe3eb86d84935282e, 0xdb084f7c80268067, 0x77453a7266c1d524, 0x64f313320afdd17e, 0x8bae4a0bda3e3e66, 0x2de67df4d33aa930, 0xa3068a663be947d2, 0xcea196429334afa7, 0xe7e0cea87cfab510, 0x741c10a48562fa44, 0x14494d5a04a6c202, 0x40e7a06890d50be4, 0xe1aa5f52250a65b5, 0xf4a25f6b7e5a6194, 0x565c00e7636c7c52, 0xf75c7fb672be9b3f, 0xe6afa72a659ee1bf, 0x6b64f6e131206bbc, 0xc890512940bbd56f, 0x2534086dc1a69681, 0xb00843f54690b56e, 0xcc5516083f0bdb0c, 0xa95c2e34fdc988c9, 0x849b826becbbf7f3, 0xebeb852f8b802332, 0x961f2b23e86a263c, 0xd72a88270464d6f2, 0x6763243e613eeff2, 0x254cdfdd48b072a1, 0xac3ab9855e94f5f0, 0x9fb7af5dbfd5dbb9, 0xc94e6163f1c0a8cf, 0xf288005dd9ade2e1, 0x87ef56d822a9de2d, 0x8ecab7bed5a7e85e, 0x83ea1bcff015e925, 0x1271137d38e2ff89, 0xa27a077e369d6a32, 0xb9f8913c07ce52b7, 0x5fe99b17468419b0, 0x9fdd39945a89b502, 0x794bc51777290510, 0xe852021031f3314c, 0xf67cff6fd964bac3, 0x7ac582022256c0c0, 0x77604f6943584c22, 0x323a2ce1c1306628, 0x08dd88c98350de11, 0xb8342e4f577818ca, 0xa5bc1d61a6d047c1, 0xddaa89ea44bbbf88, 0xc1a542d3e1620265, 0x8b764ba499a45119, 0x22d9c1904111885d, 0x101ae7d9ebba120f, 0x82ca6f603e96e10e, 0xd05317bd6f339e88, 0x54a4848959d20898, 0x00b0acad87d94f8e, 0xf2ab9bac6fdc358c, 0x95ca172cb6bf18f7, 0x08291eaf145b6f9c, 0xa485afdec21bf027, 0x2c25fdf6dc219361, 0x1a5318cefb7c2036, 0xcfcfafedd3ab62d8, 0x4c7ed76ef4b8ccd6, 0x473466ac0ac16385, 0x2c04553a07032b8a, 0xd31e4a32538843c7, 0x12dd65e138f6644d, 0x161130caf6577e8b, 0x63e5b4ff2c5441b1, 0xa8b0a84d06a58ed6, 0x125dac5910d5ea2b, 0xa97c12ae44459255, 0xb3cc802562a5fd6c, 0x4409c1575626adaa, 0xa067d92c8e8b8dbb, 0xfededf949bad949a, 0xb9d172a91a3a9009, 0x2ec5d64517692699, 0xc8412695f56eb641, 0x7ad115e4cb00eddd, 0x32c2242ed438b420, 0xa429ceede7138969, 0x7f6db9822cf384e9, 0x2dc39b0bca1f2932, 0xde92a487585eaca6, 0x87c87d0858a70e1e, 0xd2419508213e36f4, 0x18bbfbb774311ea8, 0xb625e058422964c0, 0x4334b1e764f683ad, 0xde817e4a6a1f7368, 0x39329302a8c2c2a9, 0xaa4c1e6c6f785843, 0x8dc7ef86d9d2d0f7, 0x597965f6f249653a, 0xff346827822aa645, 0x368a3637d29d39f2, 0x93775a1b92fc4b06, 0x310b2ad79909ac04, 0x689a60e616b90265, 0x85e4b485dab67c13, 0xd90f3f8b9d54d5d5, 0x7e14be6743d94267, 0x48faebe7430aef01, 0xae5b11c885ecdfb5, 0x52eb6c5cef5587c3, 0x17023e5091ba5663, 0x5bdd13b983a987aa, 0x8a398958b5466a69, 0x6dc644518b81ab09, 0xfb4bb0dd2d533832, 0x1d4baa7ded0c80ed, 0x97d94a33f3f1dc18, 0x4eaa82471d6d2466, 0xdc80226d2e5a87b8, 0x6e7946ed519df314, 0x5e78f110eb7b9c3d, 0xae1445dc0b05b119, 0xaf3a2b4ad6a7e4f3, 0x2b3fab4aff638a6f, 0x482ee0a0ee944717, 0x6c638acb12e6ef9a, 0xf59ffb896ccd4720, 0x4c3e1c2d1a151201, 0xb6f9b34cec313119, 0x4c44261e36facdd5, 0xc59d9c212c3ea06c, 0x9388664dc1d30ca2, 0x29b9d9b5eabffb2c, 0x7808d154b0ec1b3a, 0xc4aaf60b740308fa, 0x6f7aca6f0741b2b5, 0x2edf4f889c09621d, 0x1fa820ee83261292, 0x7a162fd7a082003c, 0x870c2de2f130ff5a, 0xc32d6a90658eb347, 0x0b5a6e242082dc01, 0xf15db35e61e3e4f9, 0x05cb98edaeddde3b, 0xf42a568ba8650943, 0x7a70bb17d9251446, 0x1192a280cb3f6e41, 0x1e793c64522c4580, 0x29a80c9474c9b619, 0x4ccd8e988ea18381, 0xb8838a1136fbfc48, 0x108a0978ea3b5737, 0xe8d5e36bf6eebbc7, 0x62b9f51dbf53b7e6, 0x6996fcba08c05853, 0x500713ada63dec92, 0x55e48c824b0341da, 0xa76a0dee1b82ae93, 0x95706b1d4d8ec8ec, 0x2b100b833913b0a5, 0x2dcc7ccc931f4a69, 0xf198714708999d3e, 0x36d8a256f6be869f, 0x74791453a9159591, 0x3e0e78593608f415, 0xc18f5072b7288cc1, 0x1e045ab5b4deeb8b, 0x55ce3a1e68cc7318, 0xadb069bc0ea75ce6, 0x5f0c72e82faa5d12, 0xb0dcec4f4a5b32eb, 0xb2af1e588a6b3d69, 0x7133d52a1259083f, 0xaf5cbeb8c7d7b228, 0xe00640c93ba3c4e0, 0x5264e4f06b24e751, 0xf4b8ad26a7e50c2d, 0x034dcdb8121f9b91, 0x8d18c62ff6c0b2c9, 0x04c378dbb5c92962, 0x7c7c041b704a39bb, 0x8c7cac2bd6fab046,
+    0xe830751a18b1fb5e, 0x9e30fb31b333ccec, 0x4fba9d374256093e, 0x598628d4b2871fde, 0xd6854917cc217ab4, 0x4da3839966d614cd, 0x6c2ee98d9f0a6bf9, 0xa643c8991753f6e4, 0x4a7982d0be1d0930, 0x441b590a0694d4f4, 0xac70c5107d531b97, 0xbb9e36477a76bbd2, 0x921ccfb831039d8e, 0x61f6991dc827545c, 0x6c5afe13298cf2ad, 0xecf28b9022ab3a75, 0x11e0265d86c2d913, 0x51b4aeded81317ec, 0x5bdccaa59f2cbeb8, 0xb9c76e9f66388e78, 0xa6babe827af99e38, 0x7c92e55ca21c6159, 0xe49bad2924782213, 0x3c2f72423ad5f50d, 0xb3755cfa70e505b4, 0x9f55bc675f2dd8d0, 0xf2891d2b3c912007, 0xbfe5cf184e166eff, 0x0e43e71fdd72d966, 0xf56228bcd5c95ca0, 0x80fa47660411d1a6, 0x92166503e32b5c2f, 0x542096f618073022, 0x5dd3a8ea782205c5, 0xb520095d8dff2a5e, 0x045b81afc2f56ade, 0xb85681ed6f1de692, 0xb9a75fdde941cf34, 0x58e17def17bb5d6b, 0xd4b11a833adbd178, 0x787ab0355e2fda17, 0x38e5bda322c1a58a, 0x0c1bf5f6457d6d33, 0x93172c3a82e1c498, 0xf3d6f541b2b86965, 0x4e7f9e55316d0a31, 0xc2e824b016aab50b, 0x1d6c62558ea1c109, 0x5370f2b9133e09ee, 0x43137d4fa9a8437f, 0xe5239ad79830662b, 0x4e109cd3220f67dd,
+    },
+            };
+            size_t res = 0;
+
+            for (size_t i = 0; i < 8; ++i)
+            {
+                res ^= random[i][UInt8(x)];
+                x >>= 8;
+            }
+
+            return res;
+        }
+    };
+}
+
+
+
+template <template <typename...> class Map, typename Hash>
+void NO_INLINE test(const Key * data, size_t size, std::function<void(Map<Key, Value, Hash>&)> init = {})
 {
-    /// The state of this structure is enough to get the buffer size of the hash table.
+    Stopwatch watch;
 
-    /// Specifies the initial size of the hash table.
-    static const size_t initial_size_degree = 16;
-    Grower() { size_degree = initial_size_degree; }
+    Map<Key, Value, Hash> map;
+    if (init)
+        init(map);
 
-//    size_t max_fill = (1ULL << initial_size_degree) * 0.9;
+    for (auto end = data + size; data < end; ++data)
+        ++map[*data];
 
-    /// The size of the hash table in the cells.
-    size_t bufSize() const               { return 1ULL << size_degree; }
+    watch.stop();
+    std::cerr << __PRETTY_FUNCTION__
+        << ":\nElapsed: " << watch.elapsedSeconds()
+        << " (" << size / watch.elapsedSeconds() << " elem/sec.)"
+        << std::endl;
+}
 
-    size_t maxFill() const               { return 1ULL << (size_degree - 1); }
-//    size_t maxFill() const             { return max_fill; }
+template <template <typename...> class Map, typename Init>
+void NO_INLINE testForEachHash(const Key * data, size_t size, Init && init)
+{
+    test<Map, Hashes::IdentityHash>(data, size, init);
+    test<Map, Hashes::SimpleMultiplyHash>(data, size, init);
+    test<Map, Hashes::MultiplyAndMixHash>(data, size, init);
+    test<Map, Hashes::MixMultiplyMixHash>(data, size, init);
+    test<Map, Hashes::MurMurMixHash>(data, size, init);
+    test<Map, Hashes::MixAllBitsHash>(data, size, init);
+    test<Map, Hashes::IntHash32>(data, size, init);
+    test<Map, Hashes::ArcadiaNumericHash>(data, size, init);
+    test<Map, Hashes::MurMurButDifferentHash>(data, size, init);
+    test<Map, Hashes::TwoRoundsTwoVarsHash>(data, size, init);
+    test<Map, Hashes::TwoRoundsLessOpsHash>(data, size, init);
+    test<Map, Hashes::CRC32Hash>(data, size, init);
+    test<Map, Hashes::MulShiftHash>(data, size, init);
+    test<Map, Hashes::TabulationHash>(data, size, init);
+}
 
-    size_t mask() const                  { return bufSize() - 1; }
+void NO_INLINE testForEachMapAndHash(const Key * data, size_t size)
+{
+    auto nothing = [](auto & map){};
 
-    /// From the hash value, get the cell number in the hash table.
-    size_t place(size_t x) const         { return x & mask(); }
+    testForEachHash<HashMap>(data, size, nothing);
+    testForEachHash<std::unordered_map>(data, size, nothing);
+    testForEachHash<google::dense_hash_map>(data, size, [](auto & map){ map.set_empty_key(-1); });
+    testForEachHash<google::sparse_hash_map>(data, size, nothing);
+}
 
-    /// The next cell in the collision resolution chain.
-    size_t next(size_t pos) const        { ++pos; return pos & mask(); }
-
-    /// Whether the hash table is sufficiently full. You need to increase the size of the hash table, or remove something unnecessary from it.
-    bool overflow(size_t elems) const    { return elems > maxFill(); }
-
-    /// Increase the size of the hash table.
-    void increaseSize()
-    {
-        size_degree += size_degree >= 23 ? 1 : 2;
-//        max_fill = (1ULL << size_degree) * 0.9;
-    }
-
-    /// Set the buffer size by the number of elements in the hash table. Used when deserializing a hash table.
-    void set(size_t num_elems)
-    {
-        throw Poco::Exception(__PRETTY_FUNCTION__);
-    }
-};
 
 
 int main(int argc, char ** argv)
 {
     size_t n = atoi(argv[1]);
-    size_t m = atoi(argv[2]);
+//    size_t m = atoi(argv[2]);
+
+    std::cerr << std::fixed << std::setprecision(2);
 
     std::vector<Key> data(n);
 
@@ -97,93 +335,13 @@ int main(int argc, char ** argv)
         in2.readStrict(reinterpret_cast<char*>(&data[0]), sizeof(data[0]) * n);
 
         watch.stop();
-        std::cerr << std::fixed << std::setprecision(2)
+        std::cerr
             << "Vector. Size: " << n
             << ", elapsed: " << watch.elapsedSeconds()
             << " (" << n / watch.elapsedSeconds() << " elem/sec.)"
             << std::endl;
     }
 
-    if (m == 1)
-    {
-        Stopwatch watch;
-
-//        using Map = HashMap<Key, Value>;
-
-        /// Due to `WithoutZero`, it's faster by 0.7% (if not fits into L3-cache) - 2.3% (if fits into L3-cache).
-        using Map = HashMapTable<Key, CellWithoutZeroWithSavedHash, DefaultHash<Key>, Grower>;
-
-        Map map;
-        Map::iterator it;
-        bool inserted;
-
-        for (size_t i = 0; i < n; ++i)
-        {
-            map.emplace(data[i], it, inserted);
-            if (inserted)
-                it->second = 0;
-            ++it->second;
-        }
-
-        watch.stop();
-        std::cerr << std::fixed << std::setprecision(2)
-            << "HashMap. Size: " << map.size()
-            << ", elapsed: " << watch.elapsedSeconds()
-            << " (" << n / watch.elapsedSeconds() << " elem/sec.)"
-#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
-            << ", collisions: " << map.getCollisions()
-#endif
-            << std::endl;
-    }
-
-    if (m == 2)
-    {
-        Stopwatch watch;
-
-        std::unordered_map<Key, Value, DefaultHash<Key> > map;
-        for (size_t i = 0; i < n; ++i)
-            ++map[data[i]];
-
-        watch.stop();
-        std::cerr << std::fixed << std::setprecision(2)
-            << "std::unordered_map. Size: " << map.size()
-            << ", elapsed: " << watch.elapsedSeconds()
-            << " (" << n / watch.elapsedSeconds() << " elem/sec.)"
-            << std::endl;
-    }
-
-    if (m == 3)
-    {
-        Stopwatch watch;
-
-        google::dense_hash_map<Key, Value, DefaultHash<Key> > map;
-        map.set_empty_key(-1ULL);
-        for (size_t i = 0; i < n; ++i)
-              ++map[data[i]];
-
-        watch.stop();
-        std::cerr << std::fixed << std::setprecision(2)
-            << "google::dense_hash_map. Size: " << map.size()
-            << ", elapsed: " << watch.elapsedSeconds()
-            << " (" << n / watch.elapsedSeconds() << " elem/sec.)"
-            << std::endl;
-    }
-
-    if (m == 4)
-    {
-        Stopwatch watch;
-
-        google::sparse_hash_map<Key, Value, DefaultHash<Key> > map;
-        for (size_t i = 0; i < n; ++i)
-            ++map[data[i]];
-
-        watch.stop();
-        std::cerr << std::fixed << std::setprecision(2)
-            << "google::sparse_hash_map. Size: " << map.size()
-            << ", elapsed: " << watch.elapsedSeconds()
-            << " (" << n / watch.elapsedSeconds() << " elem/sec.)"
-            << std::endl;
-    }
-
+    testForEachMapAndHash(data.data(), data.size());
     return 0;
 }

From 3748b7b3327ade18b4282572c064294f2f6e50ec Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 21 Aug 2017 23:20:59 +0300
Subject: [PATCH 251/281] Added hopscotch-map just for tests
 [#CLICKHOUSE-3244].

---
 dbms/src/Common/tests/hopscotch-map/LICENSE   |   21 +
 dbms/src/Common/tests/hopscotch-map/README.md |  303 +++
 .../tests/hopscotch-map/src/hopscotch_hash.h  | 1873 +++++++++++++++++
 .../tests/hopscotch-map/src/hopscotch_map.h   |  666 ++++++
 .../hopscotch-map/src/hopscotch_sc_map.h      |  663 ++++++
 .../hopscotch-map/src/hopscotch_sc_set.h      |  518 +++++
 .../tests/hopscotch-map/src/hopscotch_set.h   |  513 +++++
 7 files changed, 4557 insertions(+)
 create mode 100644 dbms/src/Common/tests/hopscotch-map/LICENSE
 create mode 100644 dbms/src/Common/tests/hopscotch-map/README.md
 create mode 100644 dbms/src/Common/tests/hopscotch-map/src/hopscotch_hash.h
 create mode 100644 dbms/src/Common/tests/hopscotch-map/src/hopscotch_map.h
 create mode 100644 dbms/src/Common/tests/hopscotch-map/src/hopscotch_sc_map.h
 create mode 100644 dbms/src/Common/tests/hopscotch-map/src/hopscotch_sc_set.h
 create mode 100644 dbms/src/Common/tests/hopscotch-map/src/hopscotch_set.h

diff --git a/dbms/src/Common/tests/hopscotch-map/LICENSE b/dbms/src/Common/tests/hopscotch-map/LICENSE
new file mode 100644
index 00000000000..86b99c1e5ec
--- /dev/null
+++ b/dbms/src/Common/tests/hopscotch-map/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2016 Tessil
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/dbms/src/Common/tests/hopscotch-map/README.md b/dbms/src/Common/tests/hopscotch-map/README.md
new file mode 100644
index 00000000000..f8e96247b4b
--- /dev/null
+++ b/dbms/src/Common/tests/hopscotch-map/README.md
@@ -0,0 +1,303 @@
+[![Build Status](https://travis-ci.org/Tessil/hopscotch-map.svg?branch=master)](https://travis-ci.org/Tessil/hopscotch-map) [![Build status](https://ci.appveyor.com/api/projects/status/e97rjkcn3qwrhpvf/branch/master?svg=true)](https://ci.appveyor.com/project/Tessil/hopscotch-map/branch/master)
+
+## A C++ implementation of a fast hash map using hopscotch hashing
+
+The hopscotch-map library is a C++ implementation of a fast hash map and hash set using open-addressing and hopscotch hashing to resolve collisions. It is a cache-friendly data structure offering better performances than `std::unordered_map` in most cases and is closely similar to `google::dense_hash_map` while using less memory and providing more functionalities.
+
+The library provides four classes: `tsl::hopscotch_map`, `tsl::hopscotch_set`, `tsl::hopscotch_sc_map` and `tsl::hopscotch_sc_set`. The `tsl::hopscotch_sc_map` and `tsl::hopscotch_sc_set` classes have an additional requirement for the key, must be `LessThanComparable`, but provide a better upper bound, see [details](https://github.com/Tessil/hopscotch-map#deny-of-service-dos-attack) in example. Nonetheless, `tsl::hopscotch_map` and `tsl::hopscotch_set` should be sufficient in most cases and should be your default pick as they perform better in general.
+
+An overview of hopscotch hashing and some implementation details may be found [here](https://tessil.github.io/2016/08/29/hopscotch-hashing.html).
+
+A **benchmark** of `tsl::hopscotch_map` against other hash maps may be found [there](https://tessil.github.io/2016/08/29/benchmark-hopscotch-map.html).
+
+**Note**: By default the library uses a power of two for the size of its buckets array to take advantage of the [fast modulo](https://en.wikipedia.org/wiki/Modulo_operation#Performance_issues). For good performance, it requires the hash table to have a well-distributed hash function. If you encounter performance issues check the [GrowthPolicy](https://github.com/Tessil/hopscotch-map#growth-policy) section to change the default behaviour or change your hash function.
+
+### Key features
+- Header-only library, just include [src/](src/) to your include path and you are ready to go.
+- Fast hash table, see [benchmark](https://tessil.github.io/2016/08/29/benchmark-hopscotch-map.html) for some numbers.
+- Support for move-only and non-default constructible key/value.
+- Support for heterogeneous lookups (e.g. if you have a map that uses `std::unique_ptr<int>` as key, you could use an `int*` or a `std::uintptr_t` as key parameter to `find`, see [example](https://github.com/Tessil/hopscotch-map#heterogeneous-lookups)).
+- No need to reserve any sentinel value from the keys.
+- Possibility to store the hash value on insert for faster rehash and lookup if the hash or the key equal functions are expensive to compute (see the [StoreHash](https://tessil.github.io/hopscotch-map/doc/html/classtsl_1_1hopscotch__map.html#details) template parameter).
+- If the hash is known before a lookup, it is possible to pass it as parameter to speed-up the lookup.
+- The `tsl::hopscotch_sc_map` and `tsl::hopscotch_sc_set` provide a worst-case of O(log n) on lookup and delete making these classes resistant to hash table Deny of Service (DoS) attacks (see [details](https://github.com/Tessil/hopscotch-map#deny-of-service-dos-attack) in example).
+- API closely similar to `std::unordered_map` and `std::unordered_set`.
+
+### Differences compare to `std::unordered_map`
+`tsl::hopscotch_map` tries to have an interface similar to `std::unordered_map`, but some differences exist.
+- Iterator invalidation on insert doesn't behave in the same way (see [API](https://tessil.github.io/hopscotch-map/doc/html/classtsl_1_1hopscotch__map.html#details) for details).
+- References and pointers to keys or values in the map are invalidated in the same way as iterators to these keys-values on insert.
+- The size of the bucket array in the map grows by a factor of two, the size will always be a power of two, which may be a too steep growth rate for some purposes. The growth policy is modifiable (see the [`GrowthPolicy`](https://github.com/Tessil/hopscotch-map#growth-policy) template parameter) but it may reduce the speed of the hash map.
+- For iterators, `operator*()` and `operator->()` return a reference and a pointer to `const std::pair<Key, T>` instead of `std::pair<const Key, T>` making the value `T` not modifiable. To modify the value you have to call the `value()` method of the iterator to get a mutable reference. Example:
+```c++
+tsl::hopscotch_map<int, int> map = {{1, 1}, {2, 1}, {3, 1}};
+for(auto it = map.begin(); it != map.end(); ++it) {
+    //it->second = 2; // Illegal
+    it.value() = 2; // Ok
+}
+```
+- Move-only types must have a nothrow move constructor (with open addressing, it is not possible to keep the strong exception guarantee on rehash if the move constructor may throw).
+- No support for some buckets related methods (like bucket_size, bucket, ...).
+
+These differences also apply between `std::unordered_set` and `tsl::hopscotch_set`.
+
+Thread-safety and exceptions guarantees are the same as `std::unordered_map/set`.
+
+### Differences compare to `google::dense_hash_map`
+`tsl::hopscotch_map` has comparable performances to `google::dense_hash_map` (see [benchmark](https://tessil.github.io/2016/08/29/benchmark-hopscotch-map.html)), but come with some advantages.
+- There is no need to reserve sentinel values for the key as it is required by `google::dense_hash_map` where you need to have a sentinel for empty and deleted keys.
+- The type of the value in the map doesn't need a default constructor.
+- The key and the value of the map don't need a copy constructor/operator, move-only types are supported.
+- It uses less memory for its speed as it can sustain a load factor of 0.95 (which is the default value in the library compare to the 0.5 of `google::dense_hash_map`) while keeping good performances.
+
+### Growth policy
+
+By default `tsl::hopscotch_map/set` uses `tsl::power_of_two_growth_policy` as `GrowthPolicy`. This policy keeps the size of the map to a power of two by doubling the size of the map when a rehash is required. It allows the map to avoid the usage of the slow modulo operation, instead of <code>hash % 2<sup>n</sup></code>, it uses <code>hash & (2<sup>n</sup> - 1)</code>.
+
+This may cause a lot of collisions with a poor hash function as the modulo just masks the most significant bits.
+
+If you encounter poor performances, check `overflow_size()`. If it is not zero, you may have a lot of collisions due to a common pattern in the least significant bits. Either change the hash function for something more uniform or use `tsl::prime_growth_policy` which keeps the size of the map to a prime size.
+
+You can also use `tsl::mod_growth_policy` if you want a more configurable growth rate or you could even define your own policy (see [API](https://tessil.github.io/hopscotch-map/doc/html/classtsl_1_1hopscotch__map.html#details)).
+
+A bad distribution may lead to a runtime complexity of O(n) for lookups. Unfortunately it is sometimes difficult to guard yourself against it (e.g. DoS attack on the hash map). If needed, check `tsl::hopscotch_sc_map/set` which offer a worst-case scenario of O(log n) on lookups, see [details](https://github.com/Tessil/hopscotch-map#deny-of-service-dos-attack) in example.
+
+### Installation
+To use hopscotch-map, just add the [src/](src/) directory to your include path. It is a **header-only** library.
+
+The code should work with any C++11 standard-compliant compiler and has been tested with GCC 4.8.4, Clang 3.5.0 and Visual Studio 2015.
+
+To run the tests you will need the Boost Test library and CMake. 
+
+```bash
+git clone https://github.com/Tessil/hopscotch-map.git
+cd hopscotch-map
+mkdir build
+cd build
+cmake ..
+make
+./test_hopscotch_map 
+```
+
+
+### Usage
+The API can be found [here](https://tessil.github.io/hopscotch-map/doc/html/). 
+
+All methods are not documented yet, but they replicate the behaviour of the ones in `std::unordered_map` and `std::unordered_set`, except if specified otherwise.
+
+### Example
+```c++
+#include <cstdint>
+#include <iostream>
+#include <string>
+#include "hopscotch_map.h"
+#include "hopscotch_set.h"
+
+int main() {
+    tsl::hopscotch_map<std::string, int> map = {{"a", 1}, {"b", 2}};
+    map["c"] = 3;
+    map["d"] = 4;
+    
+    map.insert({"e", 5});
+    map.erase("b");
+    
+    for(auto it = map.begin(); it != map.end(); ++it) {
+        //it->second += 2; // Not valid.
+        it.value() += 2;
+    }
+    
+    // {d, 6} {a, 3} {e, 7} {c, 5}
+    for(const auto& key_value : map) {
+        std::cout << "{" << key_value.first << ", " << key_value.second << "}" << std::endl;
+    }
+    
+    
+    
+    
+    /*
+     * Calculating the hash and comparing two std::string may be slow. 
+     * We can store the hash of each std::string in the hash map to make 
+     * the inserts and lookups faster by setting StoreHash to true.
+     */ 
+    tsl::hopscotch_map<std::string, int, std::hash<std::string>, 
+                       std::equal_to<std::string>,
+                       std::allocator<std::pair<std::string, int>>,
+                       30, true> map2;
+                       
+    map2["a"] = 1;
+    map2["b"] = 2;
+    
+    // {a, 1} {b, 2}
+    for(const auto& key_value : map2) {
+        std::cout << "{" << key_value.first << ", " << key_value.second << "}" << std::endl;
+    }
+    
+    
+    
+    
+    tsl::hopscotch_set<int> set;
+    set.insert({1, 9, 0});
+    set.insert({2, -1, 9});
+    
+    // {0} {1} {2} {9} {-1}
+    for(const auto& key : set) {
+        std::cout << "{" << key << "}" << std::endl;
+    }
+} 
+```
+
+#### Heterogeneous lookups
+
+Heterogeneous overloads allow the usage of other types than `Key` for lookup and erase operations as long as the used types are hashable and comparable to `Key`.
+
+To activate the heterogeneous overloads in `tsl::hopscotch_map/set`, the qualified-id `KeyEqual::is_transparent` must be valid. It works the same way as for [`std::map::find`](http://en.cppreference.com/w/cpp/container/map/find). You can either use [`std::equal_to<>`](http://en.cppreference.com/w/cpp/utility/functional/equal_to_void) or define your own function object.
+
+Both `KeyEqual` and `Hash` will need to be able to deal with the different types.
+
+```c++
+#include <functional>
+#include <iostream>
+#include <string>
+#include "hopscotch_map.h"
+
+
+struct employee {
+    employee(int id, std::string name) : m_id(id), m_name(std::move(name)) {
+    }
+    
+    friend bool operator==(const employee& empl, int empl_id) {
+        return empl.m_id == empl_id;
+    }
+    
+    friend bool operator==(int empl_id, const employee& empl) {
+        return empl_id == empl.m_id;
+    }
+    
+    friend bool operator==(const employee& empl1, const employee& empl2) {
+        return empl1.m_id == empl2.m_id;
+    }
+    
+    
+    int m_id;
+    std::string m_name;
+};
+
+struct hash_employee {
+    std::size_t operator()(const employee& empl) const {
+        return std::hash<int>()(empl.m_id);
+    }
+    
+    std::size_t operator()(int id) const {
+        return std::hash<int>()(id);
+    }
+};
+
+struct equal_employee {
+    using is_transparent = void;
+    
+    bool operator()(const employee& empl, int empl_id) const {
+        return empl.m_id == empl_id;
+    }
+    
+    bool operator()(int empl_id, const employee& empl) const {
+        return empl_id == empl.m_id;
+    }
+    
+    bool operator()(const employee& empl1, const employee& empl2) const {
+        return empl1.m_id == empl2.m_id;
+    }
+};
+
+
+int main() {
+    // Use std::equal_to<> which will automatically deduce and forward the parameters
+    tsl::hopscotch_map<employee, int, hash_employee, std::equal_to<>> map; 
+    map.insert({employee(1, "John Doe"), 2001});
+    map.insert({employee(2, "Jane Doe"), 2002});
+    map.insert({employee(3, "John Smith"), 2003});
+
+    // John Smith 2003
+    auto it = map.find(3);
+    if(it != map.end()) {
+        std::cout << it->first.m_name << " " << it->second << std::endl;
+    }
+
+    map.erase(1);
+
+
+
+    // Use a custom KeyEqual which has an is_transparent member type
+    tsl::hopscotch_map<employee, int, hash_employee, equal_employee> map2;
+    map2.insert({employee(4, "Johnny Doe"), 2004});
+
+    // 2004
+    std::cout << map2.at(4) << std::endl;
+} 
+```
+
+#### Deny of Service (DoS) attack
+In addition to `tsl::hopscotch_map` and `tsl::hopscotch_set`, the library provides two more "secure" options: `tsl::hopscotch_sc_map` and `tsl::hopscotch_sc_set`. 
+
+These two additions have a worst-case runtime of O(log n) for lookups and deletions and an amortized worst case of O(log n) for insertions (amortized due to the possibility of rehash which would be in O(n)). Even if the hash function maps all the elements to the same bucket, the O(log n) would still hold.
+
+This provides a security against hash table Deny of Service attacks. 
+
+To achieve this, the "secure" versions use a binary search tree for the overflown elements (see [implementation details](https://tessil.github.io/2016/08/29/hopscotch-hashing.html)) and thus need the elements to be `LessThanComparable`. An additional `Compare` template parameter is needed.
+
+```c++
+#include <chrono>
+#include <cstdint>
+#include <iostream>
+#include "hopscotch_map.h"
+#include "hopscotch_sc_map.h"
+
+/*
+ * Poor hash function which always returns 1 to simulate
+ * a Deny of Service attack.
+ */
+struct dos_attack_simulation_hash {
+    std::size_t operator()(int id) const {
+        return 1;
+    }
+};
+
+int main() {
+    /*
+     * Slow due to the hash function, insertions are done in O(n).
+     */
+    tsl::hopscotch_map<int, int, dos_attack_simulation_hash> map;
+    
+    auto start = std::chrono::high_resolution_clock::now();
+    for(int i=0; i < 10000; i++) {
+        map.insert({i, 0});
+    }
+    auto end = std::chrono::high_resolution_clock::now();
+    
+    // 110 ms
+    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end-start);
+    std::cout << duration.count() << " ms" << std::endl;
+    
+    
+    
+    
+    /*
+     * Faster. Even with the poor hash function, insertions end-up to
+     * be O(log n) in average (and O(n) when a rehash occurs).
+     */
+    tsl::hopscotch_sc_map<int, int, dos_attack_simulation_hash> map_secure;
+    
+    start = std::chrono::high_resolution_clock::now();
+    for(int i=0; i < 10000; i++) {
+        map_secure.insert({i, 0});
+    }
+    end = std::chrono::high_resolution_clock::now();
+    
+    // 2 ms
+    duration = std::chrono::duration_cast<std::chrono::milliseconds>(end-start);
+    std::cout << duration.count() << " ms" << std::endl;
+} 
+```
+
+### License
+
+The code is licensed under the MIT license, see the [LICENSE file](LICENSE) for details.
diff --git a/dbms/src/Common/tests/hopscotch-map/src/hopscotch_hash.h b/dbms/src/Common/tests/hopscotch-map/src/hopscotch_hash.h
new file mode 100644
index 00000000000..2840c8880b5
--- /dev/null
+++ b/dbms/src/Common/tests/hopscotch-map/src/hopscotch_hash.h
@@ -0,0 +1,1873 @@
+/**
+ * MIT License
+ * 
+ * Copyright (c) 2017 Tessil
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef TSL_HOPSCOTCH_HASH_H
+#define TSL_HOPSCOTCH_HASH_H
+
+
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <cmath>
+#include <climits>
+#include <cstddef>
+#include <cstdint>
+#include <exception>
+#include <functional>
+#include <initializer_list>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <ratio>
+#include <stdexcept>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#if (defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9))
+#define TSL_NO_RANGE_ERASE_WITH_CONST_ITERATOR
+#endif
+
+
+
+/*
+ * Only activate tsl_assert if TSL_DEBUG is defined. 
+ * This way we avoid the performance hit when NDEBUG is not defined with assert as tsl_assert is used a lot
+ * (people usually compile with "-O3" and not "-O3 -DNDEBUG").
+ */
+#ifndef tsl_assert
+    #ifdef TSL_DEBUG
+    #define tsl_assert(expr) assert(expr)
+    #else
+    #define tsl_assert(expr) (static_cast<void>(0))
+    #endif
+#endif
+
+namespace tsl {
+
+/**
+ * Grow the map by a factor of two keeping bucket_count to a power of two. It allows
+ * the map to use a mask operation instead of a modulo operation to map a hash to a bucket.
+ */
+class power_of_two_growth_policy {
+public:
+    /**
+     * Called on map creation and rehash. The number of buckets requested is passed by parameter.
+     * This number is a minimum, the policy may update this value with a higher value if needed.
+     */
+    power_of_two_growth_policy(std::size_t& min_bucket_count_in_out) {
+        if(min_bucket_count_in_out > max_bucket_count()) {
+            throw std::length_error("The map exceeds its maxmimum size.");
+        }
+        
+        static_assert(MIN_BUCKETS_SIZE > 0, "");
+        const std::size_t min_bucket_count = MIN_BUCKETS_SIZE;
+        
+        min_bucket_count_in_out = std::max(min_bucket_count, min_bucket_count_in_out);
+        min_bucket_count_in_out = round_up_to_power_of_two(min_bucket_count_in_out);
+        m_mask = min_bucket_count_in_out - 1;
+    }
+    
+    /**
+     * Return the bucket [0, bucket_count()) to which the hash belongs.
+     */
+    std::size_t bucket_for_hash(std::size_t hash) const {
+        return hash & m_mask;
+    }
+    
+    /**
+     * Return the bucket count to uses when the bucket array grows on rehash.
+     */
+    std::size_t next_bucket_count() const {
+        if((m_mask + 1) > max_bucket_count()/2) {
+            throw std::length_error("The map exceeds its maxmimum size.");
+        }
+        
+        return (m_mask + 1) * 2;
+    }
+    
+    /**
+     * Return the maximum number of buckets supported by the policy.
+     */
+    std::size_t max_bucket_count() const {
+        return std::numeric_limits<std::size_t>::max()/2 + 1;
+    }
+    
+private:
+    static std::size_t round_up_to_power_of_two(std::size_t value) {
+        if(value == 0) {
+            return 1;
+        }
+        
+        if(is_power_of_two(value)) {
+            return value;
+        }
+            
+        --value;
+        for(std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) {
+            value |= value >> i;
+        }
+        
+        return value + 1;
+    }
+    
+    static constexpr bool is_power_of_two(std::size_t value) {
+        return value != 0 && (value & (value - 1)) == 0;
+    }
+    
+private:
+    static const std::size_t MIN_BUCKETS_SIZE = 2;
+    
+    std::size_t m_mask;
+};
+
+/**
+ * Grow the map by GrowthFactor::num/GrowthFactor::den and use a modulo to map a hash
+ * to a bucket. Slower but it can be usefull if you want a slower growth.
+ */
+template<class GrowthFactor = std::ratio<3, 2>>
+class mod_growth_policy {
+public:
+    mod_growth_policy(std::size_t& min_bucket_count_in_out) {
+        if(min_bucket_count_in_out > max_bucket_count()) {
+            throw std::length_error("The map exceeds its maxmimum size.");
+        }
+        
+        static_assert(MIN_BUCKETS_SIZE > 0, "");
+        const std::size_t min_bucket_count = MIN_BUCKETS_SIZE;
+        
+        min_bucket_count_in_out = std::max(min_bucket_count, min_bucket_count_in_out);
+        m_bucket_count = min_bucket_count_in_out;
+    }
+    
+    std::size_t bucket_for_hash(std::size_t hash) const {
+        tsl_assert(m_bucket_count != 0);
+        return hash % m_bucket_count;
+    }
+    
+    std::size_t next_bucket_count() const {
+        if(m_bucket_count == max_bucket_count()) {
+            throw std::length_error("The map exceeds its maxmimum size.");
+        }
+        
+        const double next_bucket_count = std::ceil(double(m_bucket_count) * REHASH_SIZE_MULTIPLICATION_FACTOR);
+        if(!std::isnormal(next_bucket_count)) {
+            throw std::length_error("The map exceeds its maxmimum size.");
+        }
+        
+        if(next_bucket_count > double(max_bucket_count())) {
+            return max_bucket_count();
+        }
+        else {
+            return std::size_t(next_bucket_count);
+        }
+    }
+    
+    std::size_t max_bucket_count() const {
+        return MAX_BUCKET_COUNT;
+    }
+    
+private:
+    static const std::size_t MIN_BUCKETS_SIZE = 2;
+    static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR = 1.0*GrowthFactor::num/GrowthFactor::den;
+    static const std::size_t MAX_BUCKET_COUNT = 
+            std::size_t(double(
+                    std::numeric_limits<std::size_t>::max()/REHASH_SIZE_MULTIPLICATION_FACTOR
+            ));
+            
+    static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, "Growth factor should be >= 1.1.");
+    
+    std::size_t m_bucket_count;
+};
+
+
+
+namespace detail_hopscotch_hash {
+
+static constexpr const std::array<std::size_t, 39> PRIMES = {{
+    5ul, 17ul, 29ul, 37ul, 53ul, 67ul, 79ul, 97ul, 131ul, 193ul, 257ul, 389ul, 521ul, 769ul, 1031ul, 1543ul, 2053ul, 
+    3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul, 3145739ul, 
+    6291469ul, 12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul, 
+    1610612741ul, 3221225473ul, 4294967291ul
+}};
+
+template<unsigned int IPrime>
+static std::size_t mod(std::size_t hash) { return hash % PRIMES[IPrime]; }
+
+// MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for faster modulo as the
+// compiler can optimize the modulo code better with a constant known at the compilation.
+static constexpr const std::array<std::size_t(*)(std::size_t), 39> MOD_PRIME = {{ 
+    &mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, &mod<6>, &mod<7>, &mod<8>, &mod<9>, &mod<10>, 
+    &mod<11>, &mod<12>, &mod<13>, &mod<14>, &mod<15>, &mod<16>, &mod<17>, &mod<18>, &mod<19>, &mod<20>, 
+    &mod<21>, &mod<22>, &mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>, &mod<29>, &mod<30>, 
+    &mod<31>, &mod<32>, &mod<33>, &mod<34>, &mod<35>, &mod<36>, &mod<37> , &mod<38>
+}};
+
+}
+
+/**
+ * Grow the map by using prime numbers as size. Slower than tsl::power_of_two_growth_policy in general 
+ * but will probably distribute the values around better in the buckets with a poor hash function.
+ */
+class prime_growth_policy {
+public:
+    prime_growth_policy(std::size_t& min_bucket_count_in_out) {
+        auto it_prime = std::lower_bound(tsl::detail_hopscotch_hash::PRIMES.begin(), 
+                                         tsl::detail_hopscotch_hash::PRIMES.end(), min_bucket_count_in_out);
+        if(it_prime == tsl::detail_hopscotch_hash::PRIMES.end()) {
+            throw std::length_error("The map exceeds its maxmimum size.");
+        }
+        
+        m_iprime = static_cast<unsigned int>(std::distance(tsl::detail_hopscotch_hash::PRIMES.begin(), it_prime));
+        min_bucket_count_in_out = *it_prime;
+    }
+    
+    std::size_t bucket_for_hash(std::size_t hash) const {
+        return bucket_for_hash_iprime(hash, m_iprime);
+    }
+    
+    std::size_t next_bucket_count() const {
+        if(m_iprime + 1 >= tsl::detail_hopscotch_hash::PRIMES.size()) {
+            throw std::length_error("The map exceeds its maxmimum size.");
+        }
+        
+        return tsl::detail_hopscotch_hash::PRIMES[m_iprime + 1];
+    }   
+    
+    std::size_t max_bucket_count() const {
+        return tsl::detail_hopscotch_hash::PRIMES.back();
+    }
+    
+private:  
+    std::size_t bucket_for_hash_iprime(std::size_t hash, unsigned int iprime) const {
+        tsl_assert(iprime < tsl::detail_hopscotch_hash::MOD_PRIME.size());
+        return tsl::detail_hopscotch_hash::MOD_PRIME[iprime](hash);
+    }
+    
+private:
+    unsigned int m_iprime;
+};
+
+
+namespace detail_hopscotch_hash {
+    
+    
+    
+template<typename T>
+struct make_void {
+    using type = void;
+};
+
+
+template<typename T, typename = void>
+struct has_is_transparent : std::false_type {
+};
+
+template<typename T>
+struct has_is_transparent<T, typename make_void<typename T::is_transparent>::type> : std::true_type {
+};
+
+
+template<typename T, typename = void>
+struct has_key_compare : std::false_type {
+};
+
+template<typename T>
+struct has_key_compare<T, typename make_void<typename T::key_compare>::type> : std::true_type {
+};
+
+
+
+
+
+/*
+ * smallest_type_for_min_bits::type returns the smallest type that can fit MinBits.
+ */
+static const size_t SMALLEST_TYPE_MAX_BITS_SUPPORTED = 64;
+template<unsigned int MinBits, typename Enable = void>
+class smallest_type_for_min_bits {
+};
+
+template<unsigned int MinBits>
+class smallest_type_for_min_bits<MinBits, typename std::enable_if<(MinBits > 0) && (MinBits <= 8)>::type> {
+public:
+    using type = std::uint_least8_t;
+};
+
+template<unsigned int MinBits>
+class smallest_type_for_min_bits<MinBits, typename std::enable_if<(MinBits > 8) && (MinBits <= 16)>::type> {
+public:
+    using type = std::uint_least16_t;
+};
+
+template<unsigned int MinBits>
+class smallest_type_for_min_bits<MinBits, typename std::enable_if<(MinBits > 16) && (MinBits <= 32)>::type> {
+public:
+    using type = std::uint_least32_t;
+};
+
+template<unsigned int MinBits>
+class smallest_type_for_min_bits<MinBits, typename std::enable_if<(MinBits > 32) && (MinBits <= 64)>::type> {
+public:
+    using type = std::uint_least64_t;
+};
+        
+
+
+/*
+ * Each bucket may store up to three elements:
+ * - An aligned storage to store a value_type object with placement-new.
+ * - An (optional) hash of the value in the bucket.
+ * - An unsigned integer of type neighborhood_bitmap used to tell us which buckets in the neighborhood of the 
+ *   current bucket contain a value with a hash belonging to the current bucket. 
+ * 
+ * For a bucket 'b' a bit 'i' (counting from 0 and from the least significant bit to the most significant) 
+ * set to 1 means that the bucket 'b+i' contains a value with a hash belonging to bucket 'b'.
+ * The bits used for that, start from the third least significant bit.
+ * 
+ * The least significant bit is set to 1 if there is a value in the bucket storage.
+ * The second least significant bit is set to 1 if there is an overflow. More than NeighborhoodSize values 
+ * give the same hash, all overflow values are stored in the m_overflow_elements list of the map.
+ */
+static const std::size_t NB_RESERVED_BITS_IN_NEIGHBORHOOD = 2; 
+
+
+template<bool StoreHash>
+class hopscotch_bucket_hash {
+public:    
+    using hash_type = std::false_type;
+    
+    bool bucket_hash_equal(std::size_t /*hash*/) const noexcept {
+        return true;
+    }
+    
+    std::size_t truncated_bucket_hash() const noexcept {
+        assert(false);
+        return 0;
+    }
+    
+protected:    
+    void copy_hash(const hopscotch_bucket_hash& ) noexcept {
+    }
+    
+    void set_hash(std::size_t /*hash*/) noexcept {
+    }
+};
+
+template<>
+class hopscotch_bucket_hash<true> {
+public:
+    using hash_type = std::uint_least32_t;
+    static_assert(sizeof(hash_type) <= sizeof(std::size_t), "");
+    
+    bool bucket_hash_equal(std::size_t hash) const noexcept {
+        return m_hash == hash_type(hash);
+    }
+    
+    std::size_t truncated_bucket_hash() const noexcept {
+        return m_hash;
+    }
+    
+protected:    
+    void copy_hash(const hopscotch_bucket_hash& bucket) noexcept {
+        m_hash = bucket.m_hash;
+    }
+    
+    void set_hash(std::size_t hash) noexcept {
+        m_hash = hash_type(hash);
+    }
+    
+private:    
+    hash_type m_hash;
+};
+
+template<typename ValueType, unsigned int NeighborhoodSize, bool StoreHash>
+class hopscotch_bucket: public hopscotch_bucket_hash<StoreHash> {
+private:
+    static const size_t MIN_NEIGHBORHOOD_SIZE = 4;
+    static const size_t MAX_NEIGHBORHOOD_SIZE = SMALLEST_TYPE_MAX_BITS_SUPPORTED - NB_RESERVED_BITS_IN_NEIGHBORHOOD; 
+    
+    
+    static_assert(NeighborhoodSize >= 4, "NeighborhoodSize should be >= 4.");
+    // We can't put a variable in the message, ensure coherence
+    static_assert(MIN_NEIGHBORHOOD_SIZE == 4, ""); 
+    
+    static_assert(NeighborhoodSize <= 62, "NeighborhoodSize should be <= 62.");
+    // We can't put a variable in the message, ensure coherence
+    static_assert(MAX_NEIGHBORHOOD_SIZE == 62, ""); 
+    
+    
+    static_assert(!StoreHash || NeighborhoodSize <= 30, 
+                  "NeighborhoodSize should be <= 30 if StoreHash is true.");
+    // We can't put a variable in the message, ensure coherence
+    static_assert(MAX_NEIGHBORHOOD_SIZE - 32 == 30, "");
+    
+    using bucket_hash = hopscotch_bucket_hash<StoreHash>;
+    
+public:
+    using value_type = ValueType;
+    using neighborhood_bitmap = 
+                typename smallest_type_for_min_bits<NeighborhoodSize + NB_RESERVED_BITS_IN_NEIGHBORHOOD>::type;
+
+
+    hopscotch_bucket() noexcept: bucket_hash(), m_neighborhood_infos(0) {
+        tsl_assert(empty());
+    }
+    
+    
+    hopscotch_bucket(const hopscotch_bucket& bucket) 
+        noexcept(std::is_nothrow_copy_constructible<value_type>::value): bucket_hash(bucket), 
+                                                                         m_neighborhood_infos(0) 
+    {
+        if(!bucket.empty()) {
+            ::new (static_cast<void*>(std::addressof(m_value))) value_type(bucket.value());
+        }
+        
+        m_neighborhood_infos = bucket.m_neighborhood_infos;
+    }
+    
+    hopscotch_bucket(hopscotch_bucket&& bucket)
+        noexcept(std::is_nothrow_move_constructible<value_type>::value) : bucket_hash(std::move(bucket)),
+                                                                          m_neighborhood_infos(0) 
+    {
+        if(!bucket.empty()) {
+            ::new (static_cast<void*>(std::addressof(m_value))) value_type(std::move(bucket.value()));
+        }
+        
+        m_neighborhood_infos = bucket.m_neighborhood_infos;
+    }
+     
+    hopscotch_bucket& operator=(const hopscotch_bucket& bucket) 
+        noexcept(std::is_nothrow_copy_constructible<value_type>::value) 
+    {
+        if(this != &bucket) {
+            remove_value();
+            
+            bucket_hash::operator=(bucket);
+            if(!bucket.empty()) {
+                ::new (static_cast<void*>(std::addressof(m_value))) value_type(bucket.value());
+            }
+            
+            m_neighborhood_infos = bucket.m_neighborhood_infos;
+        }
+        
+        return *this;
+    }
+    
+    hopscotch_bucket& operator=(hopscotch_bucket&& ) = delete;
+     
+    ~hopscotch_bucket() noexcept {
+        if(!empty()) {
+            destroy_value();
+        }
+    }
+    
+    neighborhood_bitmap neighborhood_infos() const noexcept {
+        return neighborhood_bitmap(m_neighborhood_infos >> NB_RESERVED_BITS_IN_NEIGHBORHOOD);
+    }
+    
+    void set_overflow(bool has_overflow) noexcept {
+        if(has_overflow) {
+            m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos | 2);
+        }
+        else {
+            m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos & ~2);
+        }
+    }
+    
+    bool has_overflow() const noexcept {
+        return (m_neighborhood_infos & 2) != 0;
+    }
+    
+    bool empty() const noexcept {
+        return (m_neighborhood_infos & 1) == 0;
+    }
+    
+    void toggle_neighbor_presence(std::size_t ineighbor) noexcept {
+        tsl_assert(ineighbor <= NeighborhoodSize);
+        m_neighborhood_infos = neighborhood_bitmap(
+                                    m_neighborhood_infos ^ (1ull << (ineighbor + NB_RESERVED_BITS_IN_NEIGHBORHOOD)));
+    }
+    
+    bool check_neighbor_presence(std::size_t ineighbor) const noexcept {
+        tsl_assert(ineighbor <= NeighborhoodSize);
+        if(((m_neighborhood_infos >> (ineighbor + NB_RESERVED_BITS_IN_NEIGHBORHOOD)) & 1) == 1) {
+            return true;
+        }
+        
+        return false;
+    }
+    
+    value_type& value() noexcept {
+        tsl_assert(!empty());
+        return *reinterpret_cast<value_type*>(std::addressof(m_value));
+    }
+    
+    const value_type& value() const noexcept {
+        tsl_assert(!empty());
+        return *reinterpret_cast<const value_type*>(std::addressof(m_value));
+    }
+    
+    template<typename... Args>
+    void set_value_of_empty_bucket(std::size_t hash, Args&&... value_type_args) {
+        tsl_assert(empty());
+        
+        ::new (static_cast<void*>(std::addressof(m_value))) value_type(std::forward<Args>(value_type_args)...);
+        set_empty(false);
+        this->set_hash(hash);
+    }
+    
+    void swap_value_into_empty_bucket(hopscotch_bucket& empty_bucket) {
+        tsl_assert(empty_bucket.empty());
+        if(!empty()) {
+            ::new (static_cast<void*>(std::addressof(empty_bucket.m_value))) value_type(std::move(value()));
+            empty_bucket.copy_hash(*this);
+            empty_bucket.set_empty(false);
+            
+            destroy_value();
+            set_empty(true);
+        }
+    }
+    
+    void remove_value() noexcept {
+        if(!empty()) {
+            destroy_value();
+            set_empty(true);
+        }
+    }
+    
+    void clear() noexcept {
+        if(!empty()) {
+            destroy_value();
+        }
+        
+        m_neighborhood_infos = 0;
+        tsl_assert(empty());
+    }
+    
+    static std::size_t max_size() noexcept {
+        if(StoreHash) {
+            return std::numeric_limits<typename bucket_hash::hash_type>::max();
+        }
+        else {
+            return std::numeric_limits<std::size_t>::max();
+        }
+    }
+    
+private:
+    void set_empty(bool is_empty) noexcept {
+        if(is_empty) {
+            m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos & ~1);
+        }
+        else {
+            m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos | 1);
+        }
+    }
+    
+    void destroy_value() noexcept {
+        try {
+            tsl_assert(!empty());
+            
+            value().~value_type();
+        }
+        catch(...) {
+            std::terminate();
+        }
+    }
+    
+private:
+    using storage = typename std::aligned_storage<sizeof(value_type), alignof(value_type)>::type;
+    
+    neighborhood_bitmap m_neighborhood_infos;
+    storage m_value;
+};
+
+
+/**
+ * Internal common class used by hopscotch_(sc)_map and hopscotch_(sc)_set.
+ * 
+ * ValueType is what will be stored by hopscotch_hash (usually std::pair<Key, T> for map and Key for set).
+ * 
+ * KeySelect should be a FunctionObject which takes a ValueType in parameter and returns a reference to the key.
+ * 
+ * ValueSelect should be a FunctionObject which takes a ValueType in parameter and returns a reference to the value.
+ * ValueSelect should be void if there is no value (in set for example).
+ * 
+ * OverflowContainer will be used as containers for overflown elements. Usually it should be a list<ValueType>
+ * or a set<Key>/map<Key, T>.
+ */
+template<class ValueType,
+         class KeySelect,
+         class ValueSelect,
+         class Hash,
+         class KeyEqual,
+         class Allocator,
+         unsigned int NeighborhoodSize,
+         bool StoreHash,
+         class GrowthPolicy,
+         class OverflowContainer>
+class hopscotch_hash: private Hash, private KeyEqual, private GrowthPolicy {
+private:
+    template<typename U>
+    using has_mapped_type = typename std::integral_constant<bool, !std::is_same<U, void>::value>;
+    
+public:
+    template<bool is_const>
+    class hopscotch_iterator;
+    
+    using key_type = typename KeySelect::key_type;
+    using value_type = ValueType;
+    using size_type = std::size_t;
+    using difference_type = std::ptrdiff_t;
+    using hasher = Hash;
+    using key_equal = KeyEqual;
+    using allocator_type = Allocator;
+    using reference = value_type&;
+    using const_reference = const value_type&;
+    using pointer = value_type*;
+    using const_pointer = const value_type*;
+    using iterator = hopscotch_iterator<false>;
+    using const_iterator = hopscotch_iterator<true>;
+    
+private:
+    using hopscotch_bucket = tsl::detail_hopscotch_hash::hopscotch_bucket<ValueType, NeighborhoodSize, StoreHash>;
+    using neighborhood_bitmap = typename hopscotch_bucket::neighborhood_bitmap;
+    
+    using buckets_allocator = typename std::allocator_traits<allocator_type>::template rebind_alloc<hopscotch_bucket>;
+    using buckets_container_type = std::vector<hopscotch_bucket, buckets_allocator>;  
+    
+    using overflow_container_type = OverflowContainer;
+    
+    static_assert(std::is_same<typename overflow_container_type::value_type, ValueType>::value, 
+                  "OverflowContainer should have ValueType as type.");
+    
+    static_assert(std::is_same<typename overflow_container_type::allocator_type, Allocator>::value, 
+                  "Invalid allocator, not the same type as the value_type.");
+    
+    
+    using iterator_buckets = typename buckets_container_type::iterator; 
+    using const_iterator_buckets = typename buckets_container_type::const_iterator;
+    
+    using iterator_overflow = typename overflow_container_type::iterator; 
+    using const_iterator_overflow = typename overflow_container_type::const_iterator; 
+    
+public:    
+    /**
+     * The 'operator*()' and 'operator->()' methods return a const reference and const pointer respectively to the 
+     * stored value type.
+     * 
+     * In case of a map, to get a modifiable reference to the value associated to a key (the '.second' in the 
+     * stored pair), you have to call 'value()'.
+     */
+    template<bool is_const>
+    class hopscotch_iterator {
+        friend class hopscotch_hash;
+    private:
+        using iterator_bucket = typename std::conditional<is_const, 
+                                                            typename hopscotch_hash::const_iterator_buckets, 
+                                                            typename hopscotch_hash::iterator_buckets>::type;
+        using iterator_overflow = typename std::conditional<is_const, 
+                                                            typename hopscotch_hash::const_iterator_overflow, 
+                                                            typename hopscotch_hash::iterator_overflow>::type;
+    
+        
+        hopscotch_iterator(iterator_bucket buckets_iterator, iterator_bucket buckets_end_iterator, 
+                           iterator_overflow overflow_iterator) noexcept : 
+            m_buckets_iterator(buckets_iterator), m_buckets_end_iterator(buckets_end_iterator),
+            m_overflow_iterator(overflow_iterator)
+        {
+        }
+        
+    public:
+        using iterator_category = std::forward_iterator_tag;
+        using value_type = const typename hopscotch_hash::value_type;
+        using difference_type = std::ptrdiff_t;
+        using reference = value_type&;
+        using pointer = value_type*;
+        
+        
+        hopscotch_iterator() noexcept {
+        }
+        
+        hopscotch_iterator(const hopscotch_iterator<false>& other) noexcept :
+            m_buckets_iterator(other.m_buckets_iterator), m_buckets_end_iterator(other.m_buckets_end_iterator),
+            m_overflow_iterator(other.m_overflow_iterator)
+        {
+        }
+        
+        const typename hopscotch_hash::key_type& key() const {
+            if(m_buckets_iterator != m_buckets_end_iterator) {
+                return KeySelect()(m_buckets_iterator->value());
+            }
+            
+            return KeySelect()(*m_overflow_iterator);
+        }
+
+        template<class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+        typename std::conditional<
+                        is_const, 
+                        const typename U::value_type&, 
+                        typename U::value_type&>::type value() const
+        {
+            if(m_buckets_iterator != m_buckets_end_iterator) {
+                return U()(m_buckets_iterator->value());
+            }
+            
+            return U()(*m_overflow_iterator);
+        }
+        
+        reference operator*() const { 
+            if(m_buckets_iterator != m_buckets_end_iterator) {
+                return m_buckets_iterator->value();
+            }
+            
+            return *m_overflow_iterator;
+        }
+        
+        pointer operator->() const { 
+            if(m_buckets_iterator != m_buckets_end_iterator) {
+                return std::addressof(m_buckets_iterator->value()); 
+            }
+            
+            return std::addressof(*m_overflow_iterator); 
+        }
+        
+        hopscotch_iterator& operator++() {
+            if(m_buckets_iterator == m_buckets_end_iterator) {
+                ++m_overflow_iterator;
+                return *this;
+            }
+            
+            do {
+                ++m_buckets_iterator;
+            } while(m_buckets_iterator != m_buckets_end_iterator && m_buckets_iterator->empty());
+            
+            return *this; 
+        }
+        
+        hopscotch_iterator operator++(int) {
+            hopscotch_iterator tmp(*this);
+            ++*this;
+            
+            return tmp;
+        }
+        
+        friend bool operator==(const hopscotch_iterator& lhs, const hopscotch_iterator& rhs) { 
+            return lhs.m_buckets_iterator == rhs.m_buckets_iterator && 
+                   lhs.m_overflow_iterator == rhs.m_overflow_iterator; 
+        }
+        
+        friend bool operator!=(const hopscotch_iterator& lhs, const hopscotch_iterator& rhs) { 
+            return !(lhs == rhs); 
+        }
+        
+    private:
+        iterator_bucket m_buckets_iterator;
+        iterator_bucket m_buckets_end_iterator;
+        iterator_overflow m_overflow_iterator;
+    };
+    
+
+    
+public:
+    template<class OC = OverflowContainer, typename std::enable_if<!has_key_compare<OC>::value>::type* = nullptr>
+    hopscotch_hash(size_type bucket_count, 
+                  const Hash& hash,
+                  const KeyEqual& equal,
+                  const Allocator& alloc,
+                  float max_load_factor) :  Hash(hash),
+                                            KeyEqual(equal),
+                                            GrowthPolicy(bucket_count),
+                                            m_buckets(alloc), 
+                                            m_overflow_elements(alloc),
+                                            m_nb_elements(0)
+    {
+        if(bucket_count > max_bucket_count()) {
+            throw std::length_error("The map exceeds its maxmimum size.");
+        }
+        
+        static_assert(NeighborhoodSize - 1 > 0, "");
+        m_buckets.resize(bucket_count + NeighborhoodSize - 1);
+        
+        
+        this->max_load_factor(max_load_factor);
+    }
+    
+    template<class OC = OverflowContainer, typename std::enable_if<has_key_compare<OC>::value>::type* = nullptr>
+    hopscotch_hash(size_type bucket_count, 
+                  const Hash& hash,
+                  const KeyEqual& equal,
+                  const Allocator& alloc,
+                  float max_load_factor,
+                  const typename OC::key_compare& comp) : Hash(hash),
+                                                          KeyEqual(equal),
+                                                          GrowthPolicy(bucket_count),
+                                                          m_buckets(alloc), 
+                                                          m_overflow_elements(comp, alloc),
+                                                          m_nb_elements(0)
+    {
+        
+        if(bucket_count > max_bucket_count()) {
+            throw std::length_error("The map exceeds its maxmimum size.");
+        }
+        
+        static_assert(NeighborhoodSize - 1 > 0, "");
+        
+        // Can't directly construct with the appropriate size in the initializer 
+        // as m_buckets(bucket_count, alloc) is not supported by GCC 4.8
+        m_buckets.resize(bucket_count + NeighborhoodSize - 1);
+        
+        
+        this->max_load_factor(max_load_factor);
+    }
+    
+    hopscotch_hash(const hopscotch_hash& other) = default;
+    
+    hopscotch_hash(hopscotch_hash&& other) 
+                        noexcept(
+                            std::is_nothrow_move_constructible<Hash>::value &&
+                            std::is_nothrow_move_constructible<KeyEqual>::value &&
+                            std::is_nothrow_move_constructible<GrowthPolicy>::value &&
+                            std::is_nothrow_move_constructible<buckets_container_type>::value &&
+                            std::is_nothrow_move_constructible<overflow_container_type>::value
+                        )
+                        : Hash(std::move(static_cast<Hash&>(other))),
+                          KeyEqual(std::move(static_cast<KeyEqual&>(other))),
+                          GrowthPolicy(std::move(static_cast<GrowthPolicy&>(other))),
+                          m_buckets(std::move(other.m_buckets)),
+                          m_overflow_elements(std::move(other.m_overflow_elements)),
+                          m_nb_elements(other.m_nb_elements),
+                          m_max_load_factor(other.m_max_load_factor),
+                          m_load_threshold(other.m_load_threshold),
+                          m_min_load_factor_rehash_threshold(other.m_min_load_factor_rehash_threshold)
+    {
+        other.clear();
+    }
+    
+    hopscotch_hash& operator=(const hopscotch_hash& other) = default;
+    
+    hopscotch_hash& operator=(hopscotch_hash&& other) {
+        other.swap(*this);
+        other.clear();
+        
+        return *this;
+    }
+    
+    allocator_type get_allocator() const {
+        return m_buckets.get_allocator();
+    }
+    
+    
+    /*
+     * Iterators
+     */
+    iterator begin() noexcept {
+        auto begin = m_buckets.begin();
+        while(begin != m_buckets.end() && begin->empty()) {
+            ++begin;
+        }
+        
+        return iterator(begin, m_buckets.end(), m_overflow_elements.begin());
+    }
+    
+    const_iterator begin() const noexcept {
+        return cbegin();
+    }
+    
+    const_iterator cbegin() const noexcept {
+        auto begin = m_buckets.cbegin();
+        while(begin != m_buckets.cend() && begin->empty()) {
+            ++begin;
+        }
+        
+        return const_iterator(begin, m_buckets.cend(), m_overflow_elements.cbegin());
+    }
+    
+    iterator end() noexcept {
+        return iterator(m_buckets.end(), m_buckets.end(), m_overflow_elements.end());
+    }
+    
+    const_iterator end() const noexcept {
+        return cend();
+    }
+    
+    const_iterator cend() const noexcept {
+        return const_iterator(m_buckets.cend(), m_buckets.cend(), m_overflow_elements.cend());
+    }
+    
+    
+    /*
+     * Capacity
+     */
+    bool empty() const noexcept {
+        return m_nb_elements == 0;
+    }
+    
+    size_type size() const noexcept {
+        return m_nb_elements;
+    }
+    
+    size_type max_size() const noexcept {
+        return hopscotch_bucket::max_size();
+    }
+    
+    /*
+     * Modifiers
+     */
+    void clear() noexcept {
+        for(auto& bucket : m_buckets) {
+            bucket.clear();
+        }
+        
+        m_overflow_elements.clear();
+        m_nb_elements = 0;
+    }
+    
+    
+    std::pair<iterator, bool> insert(const value_type& value) { 
+        return insert_impl(value); 
+    }
+        
+    template<class P, typename std::enable_if<std::is_constructible<value_type, P&&>::value>::type* = nullptr>
+    std::pair<iterator, bool> insert(P&& value) { 
+        return emplace(std::forward<P>(value)); 
+    }
+    
+    std::pair<iterator, bool> insert(value_type&& value) { 
+        return insert_impl(std::move(value)); 
+    }
+    
+    
+    iterator insert(const_iterator hint, const value_type& value) { 
+        if(hint != cend() && compare_keys(KeySelect()(*hint), KeySelect()(value))) { 
+            return mutable_iterator(hint); 
+        }
+        
+        return insert(value).first; 
+    }
+        
+    template<class P, typename std::enable_if<std::is_constructible<value_type, P&&>::value>::type* = nullptr>
+    iterator insert(const_iterator hint, P&& value) {
+        return emplace_hint(hint, std::forward<P>(value)); 
+    }
+    
+    iterator insert(const_iterator hint, value_type&& value) { 
+        if(hint != cend() && compare_keys(KeySelect()(*hint), KeySelect()(value))) { 
+            return mutable_iterator(hint); 
+        }
+        
+        return insert(std::move(value)).first; 
+    }
+    
+    
+    template<class InputIt>
+    void insert(InputIt first, InputIt last) {
+        if(std::is_base_of<std::forward_iterator_tag, 
+                           typename std::iterator_traits<InputIt>::iterator_category>::value) 
+        {
+            const auto nb_elements_insert = std::distance(first, last);
+            const std::size_t nb_elements_in_buckets = m_nb_elements - m_overflow_elements.size();
+            const std::size_t nb_free_buckets = m_load_threshold - nb_elements_in_buckets;
+            tsl_assert(m_nb_elements >= m_overflow_elements.size());
+            tsl_assert(m_load_threshold >= nb_elements_in_buckets);
+            
+            if(nb_elements_insert > 0 && nb_free_buckets < std::size_t(nb_elements_insert)) {
+                reserve(nb_elements_in_buckets + std::size_t(nb_elements_insert));
+            }
+        }
+        
+        for(; first != last; ++first) {
+            insert(*first);
+        }
+    }
+    
+    
+    template<class M>
+    std::pair<iterator, bool> insert_or_assign(const key_type& k, M&& obj) { 
+        return insert_or_assign_impl(k, std::forward<M>(obj)); 
+    }
+
+    template<class M>
+    std::pair<iterator, bool> insert_or_assign(key_type&& k, M&& obj) { 
+        return insert_or_assign_impl(std::move(k), std::forward<M>(obj)); 
+    }
+    
+    
+    template<class M>
+    iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj) {
+        if(hint != cend() && compare_keys(KeySelect()(*hint), k)) { 
+            auto it = mutable_iterator(hint); 
+            it.value() = std::forward<M>(obj);
+            
+            return it;
+        }
+        
+        return insert_or_assign(k, std::forward<M>(obj)).first;
+    }
+    
+    template<class M>
+    iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj) {
+        if(hint != cend() && compare_keys(KeySelect()(*hint), k)) {
+            auto it = mutable_iterator(hint); 
+            it.value() = std::forward<M>(obj);
+            
+            return it;
+        }
+        
+        return insert_or_assign(std::move(k), std::forward<M>(obj)).first;
+    }
+    
+    
+    template<class... Args>
+    std::pair<iterator, bool> emplace(Args&&... args) {
+        return insert(value_type(std::forward<Args>(args)...));
+    }
+    
+    template<class... Args>
+    iterator emplace_hint(const_iterator hint, Args&&... args) {
+        return insert(hint, value_type(std::forward<Args>(args)...));        
+    }
+    
+    template<class... Args>
+    std::pair<iterator, bool> try_emplace(const key_type& k, Args&&... args) { 
+        return try_emplace_impl(k, std::forward<Args>(args)...);
+    }
+    
+    template<class... Args>
+    std::pair<iterator, bool> try_emplace(key_type&& k, Args&&... args) {
+        return try_emplace_impl(std::move(k), std::forward<Args>(args)...);
+    }
+    
+    template<class... Args>
+    iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args) { 
+        if(hint != cend() && compare_keys(KeySelect()(*hint), k)) { 
+            return mutable_iterator(hint); 
+        }
+        
+        return try_emplace(k, std::forward<Args>(args)...).first;
+    }
+    
+    template<class... Args>
+    iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args) {
+        if(hint != cend() && compare_keys(KeySelect()(*hint), k)) { 
+            return mutable_iterator(hint); 
+        }
+        
+        return try_emplace(std::move(k), std::forward<Args>(args)...).first;
+    }
+    
+    
+    
+    iterator erase(iterator pos) {
+        return erase(const_iterator(pos));
+    }
+    
+    iterator erase(const_iterator pos) {
+        const std::size_t ibucket_for_hash = bucket_for_hash(hash_key(pos.key()));
+        
+        if(pos.m_buckets_iterator != pos.m_buckets_end_iterator) {
+            auto it_bucket = m_buckets.begin() + std::distance(m_buckets.cbegin(), pos.m_buckets_iterator);
+            erase_from_bucket(it_bucket, ibucket_for_hash);
+            
+            return ++iterator(it_bucket, m_buckets.end(), m_overflow_elements.begin()); 
+        }
+        else {
+            auto it_next_overflow = erase_from_overflow(pos.m_overflow_iterator, ibucket_for_hash);
+            return iterator(m_buckets.end(), m_buckets.end(), it_next_overflow);
+        }
+    }
+    
+    iterator erase(const_iterator first, const_iterator last) {
+        if(first == last) {
+            return mutable_iterator(first);
+        }
+        
+        auto to_delete = erase(first);
+        while(to_delete != last) {
+            to_delete = erase(to_delete);
+        }
+        
+        return to_delete;
+    }
+    
+    template<class K>
+    size_type erase(const K& key) {
+        return erase(key, hash_key(key));
+    }
+    
+    template<class K>
+    size_type erase(const K& key, std::size_t hash) {
+        const std::size_t ibucket_for_hash = bucket_for_hash(hash);
+        
+        auto it_find = find_in_buckets(key, hash, m_buckets.begin() + ibucket_for_hash);
+        if(it_find != m_buckets.end()) {
+            erase_from_bucket(it_find, ibucket_for_hash);
+
+            return 1;
+        }
+        
+        if(m_buckets[ibucket_for_hash].has_overflow()) {
+            auto it_overflow = find_in_overflow(key);
+            if(it_overflow != m_overflow_elements.end()) {
+                erase_from_overflow(it_overflow, ibucket_for_hash);
+                
+                return 1;
+            }
+        }
+        
+        return 0;
+    }
+    
+    void swap(hopscotch_hash& other) {
+        using std::swap;
+        
+        swap(static_cast<Hash&>(*this), static_cast<Hash&>(other));
+        swap(static_cast<KeyEqual&>(*this), static_cast<KeyEqual&>(other));
+        swap(static_cast<GrowthPolicy&>(*this), static_cast<GrowthPolicy&>(other));
+        swap(m_buckets, other.m_buckets);
+        swap(m_overflow_elements, other.m_overflow_elements);
+        swap(m_nb_elements, other.m_nb_elements);
+        swap(m_max_load_factor, other.m_max_load_factor);
+        swap(m_load_threshold, other.m_load_threshold);
+        swap(m_min_load_factor_rehash_threshold, other.m_min_load_factor_rehash_threshold);
+    }
+    
+    
+    /*
+     * Lookup
+     */
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    typename U::value_type& at(const K& key) {
+        return at(key, hash_key(key));
+    }
+    
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    typename U::value_type& at(const K& key, std::size_t hash) {
+        return const_cast<typename U::value_type&>(static_cast<const hopscotch_hash*>(this)->at(key, hash));
+    }
+    
+    
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    const typename U::value_type& at(const K& key) const {
+        return at(key, hash_key(key));
+    }
+    
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    const typename U::value_type& at(const K& key, std::size_t hash) const {
+        using T = typename U::value_type;
+        
+        const T* value = find_value_impl(key, hash, m_buckets.begin() + bucket_for_hash(hash));
+        if(value == nullptr) {
+            throw std::out_of_range("Couldn't find key.");
+        }
+        else {
+            return *value;
+        }
+    }
+    
+    
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    typename U::value_type& operator[](K&& key) {
+        using T = typename U::value_type;
+        
+        const std::size_t hash = hash_key(key);
+        const std::size_t ibucket_for_hash = bucket_for_hash(hash);
+        
+        T* value = find_value_impl(key, hash, m_buckets.begin() + ibucket_for_hash);
+        if(value != nullptr) {
+            return *value;
+        }
+        else {
+            return insert_impl(ibucket_for_hash, hash, std::piecewise_construct, 
+                                                       std::forward_as_tuple(std::forward<K>(key)), 
+                                                       std::forward_as_tuple()).first.value();
+        }
+    }
+    
+    
+    template<class K>
+    size_type count(const K& key) const {
+        return count(key, hash_key(key));
+    }
+    
+    template<class K>
+    size_type count(const K& key, std::size_t hash) const {
+        return count_impl(key, hash, m_buckets.cbegin() + bucket_for_hash(hash));
+    }
+    
+    
+    template<class K>
+    iterator find(const K& key) {
+        return find(key, hash_key(key));
+    }
+    
+    template<class K>
+    iterator find(const K& key, std::size_t hash) {
+        return find_impl(key, hash, m_buckets.begin() + bucket_for_hash(hash));
+    }
+    
+    
+    template<class K>
+    const_iterator find(const K& key) const {
+        return find(key, hash_key(key));
+    }
+    
+    template<class K>
+    const_iterator find(const K& key, std::size_t hash) const {
+        return find_impl(key, hash, m_buckets.begin() + bucket_for_hash(hash));
+    }
+    
+    
+    template<class K>
+    std::pair<iterator, iterator> equal_range(const K& key) {
+        return equal_range(key, hash_key(key));
+    }
+    
+    template<class K>
+    std::pair<iterator, iterator> equal_range(const K& key, std::size_t hash) {
+        iterator it = find(key, hash);
+        return std::make_pair(it, (it == end())?it:std::next(it));
+    }
+    
+    
+    template<class K>
+    std::pair<const_iterator, const_iterator> equal_range(const K& key) const {
+        return equal_range(key, hash_key(key));
+    }
+    
+    template<class K>
+    std::pair<const_iterator, const_iterator> equal_range(const K& key, std::size_t hash) const {
+        const_iterator it = find(key, hash);
+        return std::make_pair(it, (it == cend())?it:std::next(it));
+    }
+    
+    /*
+     * Bucket interface 
+     */
+    size_type bucket_count() const {
+        /*
+         * So that the last bucket can have NeighborhoodSize neighbors, the size of the bucket array is a little
+         * bigger than the real number of buckets. We could use some of the buckets at the beginning, but
+         * it is easier this way and we avoid weird behaviour with iterators.
+         */
+        return m_buckets.size() - NeighborhoodSize + 1; 
+    }
+    
+    size_type max_bucket_count() const {
+        const std::size_t max_bucket_count = std::min(GrowthPolicy::max_bucket_count(), m_buckets.max_size());
+        return max_bucket_count - NeighborhoodSize + 1;
+    }
+    
+    
+    /*
+     *  Hash policy 
+     */
+    float load_factor() const {
+        return float(m_nb_elements)/float(bucket_count());
+    }
+    
+    float max_load_factor() const {
+        return m_max_load_factor;
+    }
+    
+    void max_load_factor(float ml) {
+        m_max_load_factor = ml;
+        m_load_threshold = size_type(float(bucket_count())*m_max_load_factor);
+        m_min_load_factor_rehash_threshold = size_type(bucket_count()*MIN_LOAD_FACTOR_FOR_REHASH);
+    }
+    
+    void rehash(size_type count) {
+        count = std::max(count, size_type(std::ceil(float(size())/max_load_factor())));
+        rehash_impl(count);
+    }
+    
+    void reserve(size_type count) {
+        rehash(size_type(std::ceil(float(count)/max_load_factor())));
+    }
+    
+    
+    /*
+     * Observers
+     */
+    hasher hash_function() const {
+        return static_cast<Hash>(*this);
+    }
+    
+    key_equal key_eq() const {
+        return static_cast<KeyEqual>(*this);
+    }
+    
+    /*
+     * Other
+     */
+    iterator mutable_iterator(const_iterator pos) {
+        if(pos.m_buckets_iterator != pos.m_buckets_end_iterator) {
+            // Get a non-const iterator
+            auto it = m_buckets.begin() + std::distance(m_buckets.cbegin(), pos.m_buckets_iterator);
+            return iterator(it, m_buckets.end(), m_overflow_elements.begin());
+        }
+        else {
+            // Get a non-const iterator
+            auto it = mutable_overflow_iterator(pos.m_overflow_iterator);
+            
+            return iterator(m_buckets.end(), m_buckets.end(), it);
+        }
+    }
+    
+    size_type overflow_size() const noexcept {
+        return m_overflow_elements.size();
+    }
+    
+    template<class U = OverflowContainer, typename std::enable_if<has_key_compare<U>::value>::type* = nullptr>
+    typename U::key_compare key_comp() const {
+        return m_overflow_elements.key_comp();
+    }
+    
+    
+private:
+    template<class K>
+    std::size_t hash_key(const K& key) const {
+        return Hash::operator()(key);
+    }
+    
+    template<class K1, class K2>
+    bool compare_keys(const K1& key1, const K2& key2) const {
+        return KeyEqual::operator()(key1, key2);
+    }
+    
+    std::size_t bucket_for_hash(std::size_t hash) const {
+        return GrowthPolicy::bucket_for_hash(hash);
+    }
+    
+    
+    static_assert(std::is_nothrow_move_constructible<value_type>::value || 
+                  std::is_copy_constructible<value_type>::value, 
+                  "value_type must be either copy constructible or nothrow move constructible.");
+    
+    template<typename U = value_type, 
+             typename std::enable_if<std::is_nothrow_move_constructible<U>::value>::type* = nullptr>
+    void rehash_impl(size_type count) {
+        hopscotch_hash new_map = new_hopscotch_hash(count);
+        
+        if(!m_overflow_elements.empty()) {
+            new_map.m_overflow_elements.swap(m_overflow_elements);
+            new_map.m_nb_elements += new_map.m_overflow_elements.size();
+            
+            for(const value_type& value : new_map.m_overflow_elements) {
+                const std::size_t ibucket_for_hash = new_map.bucket_for_hash(new_map.hash_key(KeySelect()(value)));
+                new_map.m_buckets[ibucket_for_hash].set_overflow(true);
+            }
+        }
+        
+        try {
+            for(auto it_bucket = m_buckets.begin(); it_bucket != m_buckets.end(); ++it_bucket) {
+                if(it_bucket->empty()) {
+                    continue;
+                }
+                
+                const std::size_t hash = USE_STORED_HASH_ON_REHASH?
+                                            it_bucket->truncated_bucket_hash():
+                                            new_map.hash_key(KeySelect()(it_bucket->value()));
+                const std::size_t ibucket_for_hash = new_map.bucket_for_hash(hash);
+                
+                new_map.insert_impl(ibucket_for_hash, hash, std::move(it_bucket->value()));
+                
+                
+                erase_from_bucket(it_bucket, bucket_for_hash(hash));
+            }
+        } 
+        /*
+         * The call to insert_impl may throw an exception if an element is added to the overflow
+         * list. Rollback the elements in this case.
+         */
+        catch(...) {
+            m_overflow_elements.swap(new_map.m_overflow_elements);
+            
+            for(auto it_bucket = new_map.m_buckets.begin(); it_bucket != new_map.m_buckets.end(); ++it_bucket) {
+                if(it_bucket->empty()) {
+                    continue;
+                }
+                
+                const std::size_t hash = USE_STORED_HASH_ON_REHASH?
+                                            it_bucket->truncated_bucket_hash():
+                                            hash_key(KeySelect()(it_bucket->value()));
+                const std::size_t ibucket_for_hash = bucket_for_hash(hash);
+                
+                // The elements we insert were not in the overflow list before the switch.
+                // They will not be go in the overflow list if we rollback the switch.
+                insert_impl(ibucket_for_hash, hash, std::move(it_bucket->value()));
+            }
+            
+            throw;
+        }
+        
+        new_map.swap(*this);
+    }
+    
+    template<typename U = value_type, 
+             typename std::enable_if<std::is_copy_constructible<U>::value && 
+                                     !std::is_nothrow_move_constructible<U>::value>::type* = nullptr>
+    void rehash_impl(size_type count) {
+        hopscotch_hash new_map = new_hopscotch_hash(count);
+                
+        for(const hopscotch_bucket& bucket: m_buckets) {
+            if(bucket.empty()) {
+                continue;
+            }
+            
+            const std::size_t hash = USE_STORED_HASH_ON_REHASH?
+                                         bucket.truncated_bucket_hash():
+                                         new_map.hash_key(KeySelect()(bucket.value()));
+            const std::size_t ibucket_for_hash = new_map.bucket_for_hash(hash);
+            
+            new_map.insert_impl(ibucket_for_hash, hash, bucket.value());
+        }
+        
+        for(const value_type& value: m_overflow_elements) {
+            const std::size_t hash = new_map.hash_key(KeySelect()(value));
+            const std::size_t ibucket_for_hash = new_map.bucket_for_hash(hash);
+            
+            new_map.insert_impl(ibucket_for_hash, hash, value);
+        }
+            
+        new_map.swap(*this);
+    }
+    
+#ifdef TSL_NO_RANGE_ERASE_WITH_CONST_ITERATOR
+    iterator_overflow mutable_overflow_iterator(const_iterator_overflow it) {
+        return std::next(m_overflow_elements.begin(), std::distance(m_overflow_elements.cbegin(), it));        
+    }
+#else            
+    iterator_overflow mutable_overflow_iterator(const_iterator_overflow it) {
+        return m_overflow_elements.erase(it, it);       
+    }
+#endif    
+
+    // iterator is in overflow list
+    iterator_overflow erase_from_overflow(const_iterator_overflow pos, std::size_t ibucket_for_hash) {
+#ifdef TSL_NO_RANGE_ERASE_WITH_CONST_ITERATOR        
+        auto it_next = m_overflow_elements.erase(mutable_overflow_iterator(pos));
+#else
+        auto it_next = m_overflow_elements.erase(pos);
+#endif
+        m_nb_elements--;
+        
+        
+        // Check if we can remove the overflow flag
+        tsl_assert(m_buckets[ibucket_for_hash].has_overflow());
+        for(const value_type& value: m_overflow_elements) {
+            const std::size_t bucket_for_value = bucket_for_hash(hash_key(KeySelect()(value)));
+            if(bucket_for_value == ibucket_for_hash) {
+                return it_next;
+            }
+        }
+        
+        m_buckets[ibucket_for_hash].set_overflow(false);
+        return it_next;
+    }
+    
+    // iterator is in bucket
+    void erase_from_bucket(iterator_buckets pos, std::size_t ibucket_for_hash) noexcept {
+        const std::size_t ibucket_for_pos = std::distance(m_buckets.begin(), pos);
+        tsl_assert(ibucket_for_pos >= ibucket_for_hash);
+        
+        m_buckets[ibucket_for_pos].remove_value();
+        m_buckets[ibucket_for_hash].toggle_neighbor_presence(ibucket_for_pos - ibucket_for_hash);
+        m_nb_elements--;
+    }
+    
+
+    
+    template<class K, class M>
+    std::pair<iterator, bool> insert_or_assign_impl(K&& key, M&& obj) {
+        auto it = try_emplace_impl(std::forward<K>(key), std::forward<M>(obj));
+        if(!it.second) {
+            it.first.value() = std::forward<M>(obj);
+        }
+        
+        return it;
+    }
+    
+    template<typename P, class... Args>
+    std::pair<iterator, bool> try_emplace_impl(P&& key, Args&&... args_value) {
+        const std::size_t hash = hash_key(key);
+        const std::size_t ibucket_for_hash = bucket_for_hash(hash);
+        
+        // Check if already presents
+        auto it_find = find_impl(key, hash, m_buckets.begin() + ibucket_for_hash);
+        if(it_find != end()) {
+            return std::make_pair(it_find, false);
+        }
+        
+        return insert_impl(ibucket_for_hash, hash, std::piecewise_construct, 
+                                                   std::forward_as_tuple(std::forward<P>(key)), 
+                                                   std::forward_as_tuple(std::forward<Args>(args_value)...));
+    }
+    
+    template<typename P>
+    std::pair<iterator, bool> insert_impl(P&& value) {
+        const std::size_t hash = hash_key(KeySelect()(value));
+        const std::size_t ibucket_for_hash = bucket_for_hash(hash);
+        
+        // Check if already presents
+        auto it_find = find_impl(KeySelect()(value), hash, m_buckets.begin() + ibucket_for_hash);
+        if(it_find != end()) {
+            return std::make_pair(it_find, false);
+        }
+        
+        
+        return insert_impl(ibucket_for_hash, hash, std::forward<P>(value));
+    }
+    
+    template<typename... Args>
+    std::pair<iterator, bool> insert_impl(std::size_t ibucket_for_hash, std::size_t hash, Args&&... value_type_args) {
+        if((m_nb_elements - m_overflow_elements.size()) >= m_load_threshold) {
+            rehash(GrowthPolicy::next_bucket_count());
+            ibucket_for_hash = bucket_for_hash(hash);
+        }
+        
+        std::size_t ibucket_empty = find_empty_bucket(ibucket_for_hash);
+        if(ibucket_empty < m_buckets.size()) {
+            do {
+                tsl_assert(ibucket_empty >= ibucket_for_hash);
+                
+                // Empty bucket is in range of NeighborhoodSize, use it
+                if(ibucket_empty - ibucket_for_hash < NeighborhoodSize) {
+                    auto it = insert_in_bucket(ibucket_empty, ibucket_for_hash, 
+                                               hash, std::forward<Args>(value_type_args)...);
+                    return std::make_pair(iterator(it, m_buckets.end(), m_overflow_elements.begin()), true);
+                }
+            }
+            // else, try to swap values to get a closer empty bucket
+            while(swap_empty_bucket_closer(ibucket_empty));
+        }
+            
+        // Load factor is too low or a rehash will not change the neighborhood, put the value in overflow list
+        if(size() < m_min_load_factor_rehash_threshold || !will_neighborhood_change_on_rehash(ibucket_for_hash)) {
+            auto it_insert = insert_in_overflow(std::forward<Args>(value_type_args)...);
+            
+            m_buckets[ibucket_for_hash].set_overflow(true);
+            m_nb_elements++;
+            
+            return std::make_pair(iterator(m_buckets.end(), m_buckets.end(), it_insert), true);
+        }
+    
+        rehash(GrowthPolicy::next_bucket_count());
+        
+        ibucket_for_hash = bucket_for_hash(hash);
+        return insert_impl(ibucket_for_hash, hash, std::forward<Args>(value_type_args)...);
+    }    
+    
+    /*
+     * Return true if a rehash will change the position of a key-value in the neighborhood of 
+     * ibucket_neighborhood_check. In this case a rehash is needed instead of puting the value in overflow list.
+     */
+    bool will_neighborhood_change_on_rehash(size_t ibucket_neighborhood_check) const {
+        std::size_t expand_bucket_count = GrowthPolicy::next_bucket_count();
+        GrowthPolicy expand_growth_policy(expand_bucket_count);
+        
+        for(size_t ibucket = ibucket_neighborhood_check; 
+            ibucket < m_buckets.size() && (ibucket - ibucket_neighborhood_check) < NeighborhoodSize; 
+            ++ibucket)
+        {
+            tsl_assert(!m_buckets[ibucket].empty());
+            
+            const size_t hash = USE_STORED_HASH_ON_REHASH?
+                                    m_buckets[ibucket].truncated_bucket_hash():
+                                    hash_key(KeySelect()(m_buckets[ibucket].value()));
+            if(bucket_for_hash(hash) != expand_growth_policy.bucket_for_hash(hash)) {
+                return true;
+            }
+        }
+        
+        return false;
+    }
+    
+    /*
+     * Return the index of an empty bucket in m_buckets.
+     * If none, the returned index equals m_buckets.size()
+     */
+    std::size_t find_empty_bucket(std::size_t ibucket_start) const {
+        const std::size_t limit = std::min(ibucket_start + MAX_PROBES_FOR_EMPTY_BUCKET, m_buckets.size());
+        for(; ibucket_start < limit; ibucket_start++) {
+            if(m_buckets[ibucket_start].empty()) {
+                return ibucket_start;
+            }
+        }
+        
+        return m_buckets.size();
+    }
+    
+    /*
+     * Insert value in ibucket_empty where value originally belongs to ibucket_for_hash
+     * 
+     * Return bucket iterator to ibucket_empty
+     */
+    template<typename... Args>
+    iterator_buckets insert_in_bucket(std::size_t ibucket_empty, std::size_t ibucket_for_hash,
+                                      std::size_t hash, Args&&... value_type_args) 
+    {
+        tsl_assert(ibucket_empty >= ibucket_for_hash );
+        tsl_assert(m_buckets[ibucket_empty].empty());
+        m_buckets[ibucket_empty].set_value_of_empty_bucket(hash, std::forward<Args>(value_type_args)...);
+        
+        tsl_assert(!m_buckets[ibucket_for_hash].empty());
+        m_buckets[ibucket_for_hash].toggle_neighbor_presence(ibucket_empty - ibucket_for_hash);
+        m_nb_elements++;
+        
+        return m_buckets.begin() + ibucket_empty;
+    }
+    
+    /*
+     * Try to swap the bucket ibucket_empty_in_out with a bucket preceding it while keeping the neighborhood 
+     * conditions correct.
+     * 
+     * If a swap was possible, the position of ibucket_empty_in_out will be closer to 0 and true will re returned.
+     */
+    bool swap_empty_bucket_closer(std::size_t& ibucket_empty_in_out) {
+        tsl_assert(ibucket_empty_in_out >= NeighborhoodSize);
+        const std::size_t neighborhood_start = ibucket_empty_in_out - NeighborhoodSize + 1;
+        
+        for(std::size_t to_check = neighborhood_start; to_check < ibucket_empty_in_out; to_check++) {
+            neighborhood_bitmap neighborhood_infos = m_buckets[to_check].neighborhood_infos();
+            std::size_t to_swap = to_check;
+            
+            while(neighborhood_infos != 0 && to_swap < ibucket_empty_in_out) {
+                if((neighborhood_infos & 1) == 1) {
+                    tsl_assert(m_buckets[ibucket_empty_in_out].empty());
+                    tsl_assert(!m_buckets[to_swap].empty());
+                    
+                    m_buckets[to_swap].swap_value_into_empty_bucket(m_buckets[ibucket_empty_in_out]);
+                    
+                    tsl_assert(!m_buckets[to_check].check_neighbor_presence(ibucket_empty_in_out - to_check));
+                    tsl_assert(m_buckets[to_check].check_neighbor_presence(to_swap - to_check));
+                    
+                    m_buckets[to_check].toggle_neighbor_presence(ibucket_empty_in_out - to_check);
+                    m_buckets[to_check].toggle_neighbor_presence(to_swap - to_check);
+                    
+                    
+                    ibucket_empty_in_out = to_swap;
+                    
+                    return true;
+                }
+                
+                to_swap++;
+                neighborhood_infos = neighborhood_bitmap(neighborhood_infos >> 1);
+            }
+        }
+        
+        return false;
+    }
+    
+    
+    
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    typename U::value_type* find_value_impl(const K& key, std::size_t hash, iterator_buckets it_bucket) {
+        return const_cast<typename U::value_type*>(
+                    static_cast<const hopscotch_hash*>(this)->find_value_impl(key, hash, it_bucket));
+    }
+    
+    /*
+     * Avoid the creation of an iterator to just get the value for operator[] and at() in maps. Faster this way.
+     *
+     * Return null if no value for key (TODO use std::optional when available).
+     */
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    const typename U::value_type* find_value_impl(const K& key, std::size_t hash, 
+                                                  const_iterator_buckets it_bucket) const 
+    {
+        auto it_find = find_in_buckets(key, hash, it_bucket);
+        if(it_find != m_buckets.cend()) {
+            return std::addressof(ValueSelect()(it_find->value()));
+        }
+        
+        if(it_bucket->has_overflow()) {
+            auto it_overflow = find_in_overflow(key);
+            if(it_overflow != m_overflow_elements.end()) {
+                return std::addressof(ValueSelect()(*it_overflow));
+            }
+        }
+        
+        return nullptr;
+    }
+    
+    template<class K>
+    size_type count_impl(const K& key, std::size_t hash, const_iterator_buckets it_bucket) const {
+        if(find_in_buckets(key, hash, it_bucket) != m_buckets.cend()) {
+            return 1;
+        }
+        else if(it_bucket->has_overflow() && find_in_overflow(key) != m_overflow_elements.cend()) {
+            return 1;
+        }
+        else {
+            return 0;
+        }
+    }
+    
+    template<class K>
+    iterator find_impl(const K& key, std::size_t hash, iterator_buckets it_bucket) {
+        auto it = find_in_buckets(key, hash, it_bucket);
+        if(it != m_buckets.end()) {
+            return iterator(it, m_buckets.end(), m_overflow_elements.begin());
+        }
+        
+        if(!it_bucket->has_overflow()) {
+            return end();
+        }
+        
+        return iterator(m_buckets.end(), m_buckets.end(), find_in_overflow(key));
+    }
+    
+    template<class K>
+    const_iterator find_impl(const K& key, std::size_t hash, const_iterator_buckets it_bucket) const {
+        auto it = find_in_buckets(key, hash, it_bucket);
+        if(it != m_buckets.cend()) {
+            return const_iterator(it, m_buckets.cend(), m_overflow_elements.cbegin());
+        }
+        
+        if(!it_bucket->has_overflow()) {
+            return cend();
+        }
+
+        
+        return const_iterator(m_buckets.cend(), m_buckets.cend(), find_in_overflow(key));
+    }
+    
+    template<class K>
+    iterator_buckets find_in_buckets(const K& key, std::size_t hash, iterator_buckets it_bucket) {   
+        auto it_find = static_cast<const hopscotch_hash*>(this)->find_in_buckets(key, hash, it_bucket); 
+        return m_buckets.begin() + std::distance(m_buckets.cbegin(), it_find);
+    }
+
+    
+    template<class K>
+    const_iterator_buckets find_in_buckets(const K& key, std::size_t hash, const_iterator_buckets it_bucket) const {      
+        (void) hash; // Avoid warning of unused variable when StoreHash is false;
+
+        // TODO Try to optimize the function. 
+        // I tried to use ffs and  __builtin_ffs functions but I could not reduce the time the function
+        // takes with -march=native
+        
+        neighborhood_bitmap neighborhood_infos = it_bucket->neighborhood_infos();
+        while(neighborhood_infos != 0) {
+            if((neighborhood_infos & 1) == 1) {
+                // Check StoreHash before calling bucket_hash_equal. Functionally it doesn't change anythin. 
+                // If StoreHash is false, bucket_hash_equal is a no-op. Avoiding the call is there to help 
+                // GCC optimizes `hash` parameter away, it seems to not be able to do without this hint.
+                if((!StoreHash || it_bucket->bucket_hash_equal(hash)) && 
+                    compare_keys(KeySelect()(it_bucket->value()), key)) 
+                {
+                    return it_bucket;
+                }
+            }
+            
+            ++it_bucket;
+            neighborhood_infos = neighborhood_bitmap(neighborhood_infos >> 1);
+        }
+        
+        return m_buckets.end();
+    }
+    
+
+    
+    template<class K, class U = OverflowContainer, typename std::enable_if<!has_key_compare<U>::value>::type* = nullptr>
+    iterator_overflow find_in_overflow(const K& key) {
+        return std::find_if(m_overflow_elements.begin(), m_overflow_elements.end(), 
+                            [&](const value_type& value) { 
+                                return compare_keys(key, KeySelect()(value)); 
+                            });
+    }
+    
+    template<class K, class U = OverflowContainer, typename std::enable_if<!has_key_compare<U>::value>::type* = nullptr>
+    const_iterator_overflow find_in_overflow(const K& key) const {
+        return std::find_if(m_overflow_elements.cbegin(), m_overflow_elements.cend(), 
+                            [&](const value_type& value) { 
+                                return compare_keys(key, KeySelect()(value)); 
+                            });
+    }
+    
+    template<class K, class U = OverflowContainer, typename std::enable_if<has_key_compare<U>::value>::type* = nullptr>
+    iterator_overflow find_in_overflow(const K& key) {
+        return m_overflow_elements.find(key);
+    }
+    
+    template<class K, class U = OverflowContainer, typename std::enable_if<has_key_compare<U>::value>::type* = nullptr>
+    const_iterator_overflow find_in_overflow(const K& key) const {
+        return m_overflow_elements.find(key);
+    }
+    
+    
+    
+    template<class... Args, class U = OverflowContainer, typename std::enable_if<!has_key_compare<U>::value>::type* = nullptr>
+    iterator_overflow insert_in_overflow(Args&&... value_type_args) {
+        return m_overflow_elements.emplace(m_overflow_elements.end(), std::forward<Args>(value_type_args)...);
+    }
+    
+    template<class... Args, class U = OverflowContainer, typename std::enable_if<has_key_compare<U>::value>::type* = nullptr>
+    iterator_overflow insert_in_overflow(Args&&... value_type_args) {
+        return m_overflow_elements.emplace(std::forward<Args>(value_type_args)...).first;
+    }
+    
+    
+    
+    template<class U = OverflowContainer, typename std::enable_if<!has_key_compare<U>::value>::type* = nullptr>
+    hopscotch_hash new_hopscotch_hash(size_type bucket_count) {
+        return hopscotch_hash(bucket_count, static_cast<Hash&>(*this), static_cast<KeyEqual&>(*this), 
+                              get_allocator(), m_max_load_factor);
+    }
+    
+    template<class U = OverflowContainer, typename std::enable_if<has_key_compare<U>::value>::type* = nullptr>
+    hopscotch_hash new_hopscotch_hash(size_type bucket_count) {
+        return hopscotch_hash(bucket_count, static_cast<Hash&>(*this), static_cast<KeyEqual&>(*this), 
+                              get_allocator(), m_max_load_factor, m_overflow_elements.key_comp());
+    }
+    
+public:    
+    static const size_type DEFAULT_INIT_BUCKETS_SIZE = 16;
+    static constexpr float DEFAULT_MAX_LOAD_FACTOR = (NeighborhoodSize <= 30)?0.8f:0.9f;
+    
+private:    
+    static const std::size_t MAX_PROBES_FOR_EMPTY_BUCKET = 12*NeighborhoodSize;
+    static constexpr float MIN_LOAD_FACTOR_FOR_REHASH = 0.1f;
+    
+    static const bool USE_STORED_HASH_ON_REHASH = 
+                StoreHash && std::is_same<GrowthPolicy, tsl::power_of_two_growth_policy>::value;
+    
+private:    
+    buckets_container_type m_buckets;
+    overflow_container_type m_overflow_elements;
+    
+    size_type m_nb_elements;
+    
+    float m_max_load_factor;
+    size_type m_load_threshold;
+    size_type m_min_load_factor_rehash_threshold;
+};
+
+} // end namespace detail_hopscotch_hash
+
+
+} // end namespace tsl
+
+#endif
diff --git a/dbms/src/Common/tests/hopscotch-map/src/hopscotch_map.h b/dbms/src/Common/tests/hopscotch-map/src/hopscotch_map.h
new file mode 100644
index 00000000000..e7b9c2c3d3f
--- /dev/null
+++ b/dbms/src/Common/tests/hopscotch-map/src/hopscotch_map.h
@@ -0,0 +1,666 @@
+/**
+ * MIT License
+ * 
+ * Copyright (c) 2017 Tessil
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef TSL_HOPSCOTCH_MAP_H
+#define TSL_HOPSCOTCH_MAP_H
+
+
+#include <algorithm>
+#include <cstddef>
+#include <functional>
+#include <initializer_list>
+#include <list>
+#include <memory>
+#include <type_traits>
+#include <utility>
+#include "hopscotch_hash.h"
+
+
+namespace tsl {
+
+/**
+ * Implementation of a hash map using the hopscotch hashing algorithm.
+ * 
+ * The Key and the value T must be either nothrow move-constructible, copy-constuctible or both.
+ * 
+ * The size of the neighborhood (NeighborhoodSize) must be > 0 and <= 62 if StoreHash is false.
+ * When StoreHash is true, 32-bits of the hash will be stored alongside the neighborhood limiting
+ * the NeighborhoodSize to <= 30. There is no memory usage difference between 
+ * 'NeighborhoodSize 62; StoreHash false' and 'NeighborhoodSize 30; StoreHash true'.
+ * 
+ * Storing the hash may improve performance on insert during the rehash process if the hash takes time
+ * to compute. It may also improve read performance if the KeyEqual function takes time (or incurs a cache-miss).
+ * If used with simple Hash and KeyEqual it may slow things down.
+ * 
+ * StoreHash can only be set if the GrowthPolicy is set to tsl::power_of_two_growth_policy.
+ * 
+ * GrowthPolicy defines how the map grows and consequently how a hash value is mapped to a bucket. 
+ * By default the map uses tsl::power_of_two_growth_policy. This policy keeps the number of buckets 
+ * to a power of two and uses a mask to map the hash to a bucket instead of the slow modulo.
+ * You may define your own growth policy, check tsl::power_of_two_growth_policy for the interface.
+ * 
+ * If the destructors of Key or T throw an exception, behaviour of the class is undefined.
+ * 
+ * Iterators invalidation:
+ *  - clear, operator=, reserve, rehash: always invalidate the iterators.
+ *  - insert, emplace, emplace_hint, operator[]: if there is an effective insert, invalidate the iterators 
+ *    if a displacement is needed to resolve a collision (which mean that most of the time, 
+ *    insert will invalidate the iterators). Or if there is a rehash.
+ *  - erase: iterator on the erased element is the only one which become invalid.
+ */
+template<class Key, 
+         class T, 
+         class Hash = std::hash<Key>,
+         class KeyEqual = std::equal_to<Key>,
+         class Allocator = std::allocator<std::pair<Key, T>>,
+         unsigned int NeighborhoodSize = 62,
+         bool StoreHash = false,
+         class GrowthPolicy = tsl::power_of_two_growth_policy>
+class hopscotch_map {
+private:    
+    template<typename U>
+    using has_is_transparent = tsl::detail_hopscotch_hash::has_is_transparent<U>;
+    
+    class KeySelect {
+    public:
+        using key_type = Key;
+        
+        const key_type& operator()(const std::pair<Key, T>& key_value) const {
+            return key_value.first;
+        }
+        
+        key_type& operator()(std::pair<Key, T>& key_value) {
+            return key_value.first;
+        }
+    };  
+    
+    class ValueSelect {
+    public:
+        using value_type = T;
+        
+        const value_type& operator()(const std::pair<Key, T>& key_value) const {
+            return key_value.second;
+        }
+        
+        value_type& operator()(std::pair<Key, T>& key_value) {
+            return key_value.second;
+        }
+    };
+    
+    
+    using overflow_container_type = std::list<std::pair<Key, T>, Allocator>;
+    using ht = detail_hopscotch_hash::hopscotch_hash<std::pair<Key, T>, KeySelect, ValueSelect,
+                                                     Hash, KeyEqual, 
+                                                     Allocator, NeighborhoodSize, 
+                                                     StoreHash, GrowthPolicy,
+                                                     overflow_container_type>;
+    
+public:
+    using key_type = typename ht::key_type;
+    using mapped_type = T;
+    using value_type = typename ht::value_type;
+    using size_type = typename ht::size_type;
+    using difference_type = typename ht::difference_type;
+    using hasher = typename ht::hasher;
+    using key_equal = typename ht::key_equal;
+    using allocator_type = typename ht::allocator_type;
+    using reference = typename ht::reference;
+    using const_reference = typename ht::const_reference;
+    using pointer = typename ht::pointer;
+    using const_pointer = typename ht::const_pointer;
+    using iterator = typename ht::iterator;
+    using const_iterator = typename ht::const_iterator;
+    
+    
+    
+    /*
+     * Constructors
+     */
+    hopscotch_map() : hopscotch_map(ht::DEFAULT_INIT_BUCKETS_SIZE) {
+    }
+    
+    explicit hopscotch_map(size_type bucket_count, 
+                        const Hash& hash = Hash(),
+                        const KeyEqual& equal = KeyEqual(),
+                        const Allocator& alloc = Allocator()) : 
+                        m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR)
+    {
+    }
+    
+    hopscotch_map(size_type bucket_count,
+                  const Allocator& alloc) : hopscotch_map(bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+    
+    hopscotch_map(size_type bucket_count,
+                  const Hash& hash,
+                  const Allocator& alloc) : hopscotch_map(bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+    
+    explicit hopscotch_map(const Allocator& alloc) : hopscotch_map(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) {
+    }
+    
+    template<class InputIt>
+    hopscotch_map(InputIt first, InputIt last,
+                size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
+                const Hash& hash = Hash(),
+                const KeyEqual& equal = KeyEqual(),
+                const Allocator& alloc = Allocator()) : hopscotch_map(bucket_count, hash, equal, alloc)
+    {
+        insert(first, last);
+    }
+    
+    template<class InputIt>
+    hopscotch_map(InputIt first, InputIt last,
+                size_type bucket_count,
+                const Allocator& alloc) : hopscotch_map(first, last, bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+    
+    template<class InputIt>
+    hopscotch_map(InputIt first, InputIt last,
+                size_type bucket_count,
+                const Hash& hash,
+                const Allocator& alloc) : hopscotch_map(first, last, bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+
+    hopscotch_map(std::initializer_list<value_type> init,
+                    size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
+                    const Hash& hash = Hash(),
+                    const KeyEqual& equal = KeyEqual(),
+                    const Allocator& alloc = Allocator()) : 
+                    hopscotch_map(init.begin(), init.end(), bucket_count, hash, equal, alloc)
+    {
+    }
+
+    hopscotch_map(std::initializer_list<value_type> init,
+                    size_type bucket_count,
+                    const Allocator& alloc) : 
+                    hopscotch_map(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+
+    hopscotch_map(std::initializer_list<value_type> init,
+                    size_type bucket_count,
+                    const Hash& hash,
+                    const Allocator& alloc) : 
+                    hopscotch_map(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+
+    
+    hopscotch_map& operator=(std::initializer_list<value_type> ilist) {
+        m_ht.clear();
+        
+        m_ht.reserve(ilist.size());
+        m_ht.insert(ilist.begin(), ilist.end());
+        
+        return *this;
+    }
+    
+    allocator_type get_allocator() const { return m_ht.get_allocator(); }
+    
+    
+    /*
+     * Iterators
+     */
+    iterator begin() noexcept { return m_ht.begin(); }
+    const_iterator begin() const noexcept { return m_ht.begin(); }
+    const_iterator cbegin() const noexcept { return m_ht.cbegin(); }
+    
+    iterator end() noexcept { return m_ht.end(); }
+    const_iterator end() const noexcept { return m_ht.end(); }
+    const_iterator cend() const noexcept { return m_ht.cend(); }
+    
+    
+    /*
+     * Capacity
+     */
+    bool empty() const noexcept { return m_ht.empty(); }
+    size_type size() const noexcept { return m_ht.size(); }
+    size_type max_size() const noexcept { return m_ht.max_size(); }
+    
+    /*
+     * Modifiers
+     */
+    void clear() noexcept { m_ht.clear(); }
+    
+    
+    
+    
+    std::pair<iterator, bool> insert(const value_type& value) { 
+        return m_ht.insert(value); 
+    }
+        
+    template<class P, typename std::enable_if<std::is_constructible<value_type, P&&>::value>::type* = nullptr>
+    std::pair<iterator, bool> insert(P&& value) { 
+        return m_ht.insert(std::forward<P>(value)); 
+    }
+    
+    std::pair<iterator, bool> insert(value_type&& value) { 
+        return m_ht.insert(std::move(value)); 
+    }
+    
+    
+    iterator insert(const_iterator hint, const value_type& value) { 
+        return m_ht.insert(hint, value); 
+    }
+        
+    template<class P, typename std::enable_if<std::is_constructible<value_type, P&&>::value>::type* = nullptr>
+    iterator insert(const_iterator hint, P&& value) { 
+        return m_ht.insert(hint, std::forward<P>(value));
+    }
+    
+    iterator insert(const_iterator hint, value_type&& value) { 
+        return m_ht.insert(hint, std::move(value)); 
+    }
+    
+    
+    template<class InputIt>
+    void insert(InputIt first, InputIt last) { 
+        m_ht.insert(first, last); 
+    }
+    
+    void insert(std::initializer_list<value_type> ilist) { 
+        m_ht.insert(ilist.begin(), ilist.end()); 
+    }
+
+    
+    
+    
+    template<class M>
+    std::pair<iterator, bool> insert_or_assign(const key_type& k, M&& obj) { 
+        return m_ht.insert_or_assign(k, std::forward<M>(obj)); 
+    }
+
+    template<class M>
+    std::pair<iterator, bool> insert_or_assign(key_type&& k, M&& obj) { 
+        return m_ht.insert_or_assign(std::move(k), std::forward<M>(obj)); 
+    }
+    
+    template<class M>
+    iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj) {
+        return m_ht.insert_or_assign(hint, k, std::forward<M>(obj));
+    }
+    
+    template<class M>
+    iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj) {
+        return m_ht.insert_or_assign(hint, std::move(k), std::forward<M>(obj));
+    }
+    
+    
+    
+    
+    /**
+     * Due to the way elements are stored, emplace will need to move or copy the key-value once.
+     * The method is equivalent to insert(value_type(std::forward<Args>(args)...));
+     * 
+     * Mainly here for compatibility with the std::unordered_map interface.
+     */
+    template<class... Args>
+    std::pair<iterator, bool> emplace(Args&&... args) { 
+        return m_ht.emplace(std::forward<Args>(args)...); 
+    }
+    
+    
+    
+    
+    /**
+     * Due to the way elements are stored, emplace_hint will need to move or copy the key-value once.
+     * The method is equivalent to insert(hint, value_type(std::forward<Args>(args)...));
+     * 
+     * Mainly here for compatibility with the std::unordered_map interface.
+     */
+    template<class... Args>
+    iterator emplace_hint(const_iterator hint, Args&&... args) {
+        return m_ht.emplace_hint(hint, std::forward<Args>(args)...);
+    }
+    
+    
+    
+    
+    template<class... Args>
+    std::pair<iterator, bool> try_emplace(const key_type& k, Args&&... args) { 
+        return m_ht.try_emplace(k, std::forward<Args>(args)...);
+    }
+    
+    template<class... Args>
+    std::pair<iterator, bool> try_emplace(key_type&& k, Args&&... args) {
+        return m_ht.try_emplace(std::move(k), std::forward<Args>(args)...);
+    }
+    
+    template<class... Args>
+    iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args) {
+        return m_ht.try_emplace(hint, k, std::forward<Args>(args)...);
+    }
+    
+    template<class... Args>
+    iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args) {
+        return m_ht.try_emplace(hint, std::move(k), std::forward<Args>(args)...);
+    }
+    
+    
+
+    
+    iterator erase(iterator pos) { return m_ht.erase(pos); }
+    iterator erase(const_iterator pos) { return m_ht.erase(pos); }
+    iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); }
+    size_type erase(const key_type& key) { return m_ht.erase(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash.
+     */    
+    size_type erase(const key_type& key, std::size_t precalculated_hash) { 
+        return m_ht.erase(key, precalculated_hash); 
+    }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    size_type erase(const K& key) { return m_ht.erase(key); }
+    
+    /**
+     * @copydoc erase(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash.
+     */    
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    size_type erase(const K& key, std::size_t precalculated_hash) { 
+        return m_ht.erase(key, precalculated_hash); 
+    }
+    
+    
+    
+    
+    void swap(hopscotch_map& other) { other.m_ht.swap(m_ht); }
+    
+    /*
+     * Lookup
+     */
+    T& at(const Key& key) { return m_ht.at(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    T& at(const Key& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); }
+    
+    
+    const T& at(const Key& key) const { return m_ht.at(key); }
+    
+    /**
+     * @copydoc at(const Key& key, std::size_t precalculated_hash)
+     */
+    const T& at(const Key& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); }
+    
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    T& at(const K& key) { return m_ht.at(key); }
+
+    /**
+     * @copydoc at(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */    
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    T& at(const K& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); }
+    
+    
+    /**
+     * @copydoc at(const K& key)
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    const T& at(const K& key) const { return m_ht.at(key); }
+    
+    /**
+     * @copydoc at(const K& key, std::size_t precalculated_hash)
+     */    
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    const T& at(const K& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); }
+    
+    
+    
+    
+    T& operator[](const Key& key) { return m_ht[key]; }    
+    T& operator[](Key&& key) { return m_ht[std::move(key)]; }
+    
+    
+    
+    
+    size_type count(const Key& key) const { return m_ht.count(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    size_type count(const Key& key, std::size_t precalculated_hash) const { 
+        return m_ht.count(key, precalculated_hash); 
+    }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    size_type count(const K& key) const { return m_ht.count(key); }
+    
+    /**
+     * @copydoc count(const K& key) const
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */     
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    size_type count(const K& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); }
+    
+    
+    
+    
+    iterator find(const Key& key) { return m_ht.find(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    iterator find(const Key& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); }
+    
+    const_iterator find(const Key& key) const { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const Key& key, std::size_t precalculated_hash)
+     */
+    const_iterator find(const Key& key, std::size_t precalculated_hash) const { 
+        return m_ht.find(key, precalculated_hash);
+    }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    iterator find(const K& key) { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    iterator find(const K& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); }
+    
+    /**
+     * @copydoc find(const K& key)
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    const_iterator find(const K& key) const { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    const_iterator find(const K& key, std::size_t precalculated_hash) const { 
+        return m_ht.find(key, precalculated_hash); 
+    }
+    
+    
+    
+    
+    std::pair<iterator, iterator> equal_range(const Key& key) { return m_ht.equal_range(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    std::pair<iterator, iterator> equal_range(const Key& key, std::size_t precalculated_hash) { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    std::pair<const_iterator, const_iterator> equal_range(const Key& key) const { return m_ht.equal_range(key); }
+    
+    /**
+     * @copydoc equal_range(const Key& key, std::size_t precalculated_hash)
+     */
+    std::pair<const_iterator, const_iterator> equal_range(const Key& key, std::size_t precalculated_hash) const { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    std::pair<iterator, iterator> equal_range(const K& key) { return m_ht.equal_range(key); }
+    
+    
+    /**
+     * @copydoc equal_range(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    std::pair<iterator, iterator> equal_range(const K& key, std::size_t precalculated_hash) { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    /**
+     * @copydoc equal_range(const K& key)
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    std::pair<const_iterator, const_iterator> equal_range(const K& key) const { return m_ht.equal_range(key); }
+    
+    /**
+     * @copydoc equal_range(const K& key, std::size_t precalculated_hash)
+     */    
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    std::pair<const_iterator, const_iterator> equal_range(const K& key, std::size_t precalculated_hash) const { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    
+    
+    
+    /*
+     * Bucket interface 
+     */
+    size_type bucket_count() const { return m_ht.bucket_count(); }
+    size_type max_bucket_count() const { return m_ht.max_bucket_count(); }
+    
+    
+    /*
+     *  Hash policy 
+     */
+    float load_factor() const { return m_ht.load_factor(); }
+    float max_load_factor() const { return m_ht.max_load_factor(); }
+    void max_load_factor(float ml) { m_ht.max_load_factor(ml); }
+    
+    void rehash(size_type count) { m_ht.rehash(count); }
+    void reserve(size_type count) { m_ht.reserve(count); }
+    
+    
+    /*
+     * Observers
+     */
+    hasher hash_function() const { return m_ht.hash_function(); }
+    key_equal key_eq() const { return m_ht.key_eq(); }
+    
+    /*
+     * Other
+     */
+    
+    /**
+     * Convert a const_iterator to an iterator.
+     */
+    iterator mutable_iterator(const_iterator pos) {
+        return m_ht.mutable_iterator(pos);
+    }
+    
+    size_type overflow_size() const noexcept { return m_ht.overflow_size(); }
+    
+    friend bool operator==(const hopscotch_map& lhs, const hopscotch_map& rhs) {
+        if(lhs.size() != rhs.size()) {
+            return false;
+        }
+        
+        for(const auto& element_lhs : lhs) {
+            const auto it_element_rhs = rhs.find(element_lhs.first);
+            if(it_element_rhs == rhs.cend() || element_lhs.second != it_element_rhs->second) {
+                return false;
+            }
+        }
+        
+        return true;
+    }
+
+    friend bool operator!=(const hopscotch_map& lhs, const hopscotch_map& rhs) {
+        return !operator==(lhs, rhs);
+    }
+
+    friend void swap(hopscotch_map& lhs, hopscotch_map& rhs) {
+        lhs.swap(rhs);
+    }
+
+
+    
+private:
+    ht m_ht;
+};
+
+} // end namespace tsl
+
+#endif
diff --git a/dbms/src/Common/tests/hopscotch-map/src/hopscotch_sc_map.h b/dbms/src/Common/tests/hopscotch-map/src/hopscotch_sc_map.h
new file mode 100644
index 00000000000..6af85f3867c
--- /dev/null
+++ b/dbms/src/Common/tests/hopscotch-map/src/hopscotch_sc_map.h
@@ -0,0 +1,663 @@
+/**
+ * MIT License
+ * 
+ * Copyright (c) 2017 Tessil
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef TSL_HOPSCOTCH_SC_MAP_H
+#define TSL_HOPSCOTCH_SC_MAP_H 
+
+
+#include <algorithm>
+#include <cstddef>
+#include <functional>
+#include <initializer_list>
+#include <map>
+#include <memory>
+#include <type_traits>
+#include <utility>
+#include "hopscotch_hash.h"
+
+
+namespace tsl {
+
+    
+/**
+ * Similar to tsl::hopscotch_map but instead of using a list for overflowing elements it uses
+ * a binary search tree. It thus needs an additional template parameter Compare. Compare should
+ * be arithmetically coherent with KeyEqual.
+ * 
+ * The binary search tree allows the map to have a worst-case scenario of O(log n) for search 
+ * and delete, even if the hash function maps all the elements to the same bucket. 
+ * For insert, the amortized worst case is O(log n), but the worst case is O(n) in case of rehash.
+ * 
+ * This makes the map resistant to DoS attacks (but doesn't preclude you to have a good hash function,
+ * as an element in the bucket array is faster to retrieve than in the tree).
+ * 
+ * @copydoc hopscotch_map
+ */
+template<class Key, 
+         class T, 
+         class Hash = std::hash<Key>,
+         class KeyEqual = std::equal_to<Key>,
+         class Compare = std::less<Key>,
+         class Allocator = std::allocator<std::pair<const Key, T>>,
+         unsigned int NeighborhoodSize = 62,
+         bool StoreHash = false,
+         class GrowthPolicy = tsl::power_of_two_growth_policy>
+class hopscotch_sc_map {
+private:
+    template<typename U>
+    using has_is_transparent = tsl::detail_hopscotch_hash::has_is_transparent<U>;
+    
+    class KeySelect {
+    public:
+        using key_type = Key;
+        
+        const key_type& operator()(const std::pair<const Key, T>& key_value) const {
+            return key_value.first;
+        }
+        
+        const key_type& operator()(std::pair<const Key, T>& key_value) {
+            return key_value.first;
+        }
+    };  
+    
+    class ValueSelect {
+    public:
+        using value_type = T;
+        
+        const value_type& operator()(const std::pair<const Key, T>& key_value) const {
+            return key_value.second;
+        }
+        
+        value_type& operator()(std::pair<Key, T>& key_value) {
+            return key_value.second;
+        }
+    };
+    
+    
+    // TODO Not optimal as we have to use std::pair<const Key, T> as ValueType which forbid 
+    // us to move the key in the bucket array, we have to use copy. Optimize.
+    using overflow_container_type = std::map<Key, T, Compare, Allocator>;
+    using ht = detail_hopscotch_hash::hopscotch_hash<std::pair<const Key, T>, KeySelect, ValueSelect,
+                                                     Hash, KeyEqual, 
+                                                     Allocator, NeighborhoodSize, 
+                                                     StoreHash, GrowthPolicy,
+                                                     overflow_container_type>;
+    
+public:
+    using key_type = typename ht::key_type;
+    using mapped_type = T;
+    using value_type = typename ht::value_type;
+    using size_type = typename ht::size_type;
+    using difference_type = typename ht::difference_type;
+    using hasher = typename ht::hasher;
+    using key_equal = typename ht::key_equal;
+    using key_compare = Compare;
+    using allocator_type = typename ht::allocator_type;
+    using reference = typename ht::reference;
+    using const_reference = typename ht::const_reference;
+    using pointer = typename ht::pointer;
+    using const_pointer = typename ht::const_pointer;
+    using iterator = typename ht::iterator;
+    using const_iterator = typename ht::const_iterator;
+    
+    
+    /*
+     * Constructors
+     */
+    hopscotch_sc_map() : hopscotch_sc_map(ht::DEFAULT_INIT_BUCKETS_SIZE) {
+    }
+    
+    explicit hopscotch_sc_map(size_type bucket_count, 
+                        const Hash& hash = Hash(),
+                        const KeyEqual& equal = KeyEqual(),
+                        const Allocator& alloc = Allocator(),
+                        const Compare& comp = Compare()) : 
+                        m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR, comp)
+    {
+    }
+    
+    hopscotch_sc_map(size_type bucket_count,
+                  const Allocator& alloc) : hopscotch_sc_map(bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+    
+    hopscotch_sc_map(size_type bucket_count,
+                  const Hash& hash,
+                  const Allocator& alloc) : hopscotch_sc_map(bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+    
+    explicit hopscotch_sc_map(const Allocator& alloc) : hopscotch_sc_map(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) {
+    }
+    
+    template<class InputIt>
+    hopscotch_sc_map(InputIt first, InputIt last,
+                size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
+                const Hash& hash = Hash(),
+                const KeyEqual& equal = KeyEqual(),
+                const Allocator& alloc = Allocator()) : hopscotch_sc_map(bucket_count, hash, equal, alloc)
+    {
+        insert(first, last);
+    }
+    
+    template<class InputIt>
+    hopscotch_sc_map(InputIt first, InputIt last,
+                size_type bucket_count,
+                const Allocator& alloc) : hopscotch_sc_map(first, last, bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+    
+    template<class InputIt>
+    hopscotch_sc_map(InputIt first, InputIt last,
+                size_type bucket_count,
+                const Hash& hash,
+                const Allocator& alloc) : hopscotch_sc_map(first, last, bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+
+    hopscotch_sc_map(std::initializer_list<value_type> init,
+                    size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
+                    const Hash& hash = Hash(),
+                    const KeyEqual& equal = KeyEqual(),
+                    const Allocator& alloc = Allocator()) : 
+                    hopscotch_sc_map(init.begin(), init.end(), bucket_count, hash, equal, alloc)
+    {
+    }
+
+    hopscotch_sc_map(std::initializer_list<value_type> init,
+                    size_type bucket_count,
+                    const Allocator& alloc) : 
+                    hopscotch_sc_map(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+
+    hopscotch_sc_map(std::initializer_list<value_type> init,
+                    size_type bucket_count,
+                    const Hash& hash,
+                    const Allocator& alloc) : 
+                    hopscotch_sc_map(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+
+    
+    hopscotch_sc_map& operator=(std::initializer_list<value_type> ilist) {
+        m_ht.clear();
+        
+        m_ht.reserve(ilist.size());
+        m_ht.insert(ilist.begin(), ilist.end());
+        
+        return *this;
+    }
+    
+    allocator_type get_allocator() const { return m_ht.get_allocator(); }
+    
+    
+    /*
+     * Iterators
+     */
+    iterator begin() noexcept { return m_ht.begin(); }
+    const_iterator begin() const noexcept { return m_ht.begin(); }
+    const_iterator cbegin() const noexcept { return m_ht.cbegin(); }
+    
+    iterator end() noexcept { return m_ht.end(); }
+    const_iterator end() const noexcept { return m_ht.end(); }
+    const_iterator cend() const noexcept { return m_ht.cend(); }
+    
+    
+    /*
+     * Capacity
+     */
+    bool empty() const noexcept { return m_ht.empty(); }
+    size_type size() const noexcept { return m_ht.size(); }
+    size_type max_size() const noexcept { return m_ht.max_size(); }
+    
+    /*
+     * Modifiers
+     */
+    void clear() noexcept { m_ht.clear(); }
+    
+    
+    
+    
+    std::pair<iterator, bool> insert(const value_type& value) { 
+        return m_ht.insert(value); 
+    }
+        
+    template<class P, typename std::enable_if<std::is_constructible<value_type, P&&>::value>::type* = nullptr>
+    std::pair<iterator, bool> insert(P&& value) { 
+        return m_ht.insert(std::forward<P>(value)); 
+    }
+    
+    std::pair<iterator, bool> insert(value_type&& value) { 
+        return m_ht.insert(std::move(value)); 
+    }
+    
+    
+    iterator insert(const_iterator hint, const value_type& value) { 
+        return m_ht.insert(hint, value); 
+    }
+        
+    template<class P, typename std::enable_if<std::is_constructible<value_type, P&&>::value>::type* = nullptr>
+    iterator insert(const_iterator hint, P&& value) { 
+        return m_ht.insert(hint, std::forward<P>(value));
+    }
+    
+    iterator insert(const_iterator hint, value_type&& value) { 
+        return m_ht.insert(hint, std::move(value)); 
+    }
+    
+    
+    template<class InputIt>
+    void insert(InputIt first, InputIt last) { 
+        m_ht.insert(first, last); 
+    }
+    
+    void insert(std::initializer_list<value_type> ilist) { 
+        m_ht.insert(ilist.begin(), ilist.end()); 
+    }
+
+    
+    
+    
+    template<class M>
+    std::pair<iterator, bool> insert_or_assign(const key_type& k, M&& obj) { 
+        return m_ht.insert_or_assign(k, std::forward<M>(obj)); 
+    }
+
+    template<class M>
+    std::pair<iterator, bool> insert_or_assign(key_type&& k, M&& obj) { 
+        return m_ht.insert_or_assign(std::move(k), std::forward<M>(obj)); 
+    }
+    
+    template<class M>
+    iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj) {
+        return m_ht.insert_or_assign(hint, k, std::forward<M>(obj));
+    }
+    
+    template<class M>
+    iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj) {
+        return m_ht.insert_or_assign(hint, std::move(k), std::forward<M>(obj));
+    }
+    
+    
+    
+    /**
+     * Due to the way elements are stored, emplace will need to move or copy the key-value once.
+     * The method is equivalent to insert(value_type(std::forward<Args>(args)...));
+     * 
+     * Mainly here for compatibility with the std::unordered_map interface.
+     */
+    template<class... Args>
+    std::pair<iterator, bool> emplace(Args&&... args) { 
+        return m_ht.emplace(std::forward<Args>(args)...); 
+    }
+    
+    
+    
+    
+    /**
+     * Due to the way elements are stored, emplace_hint will need to move or copy the key-value once.
+     * The method is equivalent to insert(hint, value_type(std::forward<Args>(args)...));
+     * 
+     * Mainly here for compatibility with the std::unordered_map interface.
+     */
+    template<class... Args>
+    iterator emplace_hint(const_iterator hint, Args&&... args) {
+        return m_ht.emplace_hint(hint, std::forward<Args>(args)...);
+    }
+    
+    
+    
+    
+    template<class... Args>
+    std::pair<iterator, bool> try_emplace(const key_type& k, Args&&... args) { 
+        return m_ht.try_emplace(k, std::forward<Args>(args)...);
+    }
+    
+    template<class... Args>
+    std::pair<iterator, bool> try_emplace(key_type&& k, Args&&... args) {
+        return m_ht.try_emplace(std::move(k), std::forward<Args>(args)...);
+    }
+    
+    template<class... Args>
+    iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args) {
+        return m_ht.try_emplace(hint, k, std::forward<Args>(args)...);
+    }
+    
+    template<class... Args>
+    iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args) {
+        return m_ht.try_emplace(hint, std::move(k), std::forward<Args>(args)...);
+    }
+    
+    
+
+    
+    iterator erase(iterator pos) { return m_ht.erase(pos); }
+    iterator erase(const_iterator pos) { return m_ht.erase(pos); }
+    iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); }
+    size_type erase(const key_type& key) { return m_ht.erase(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash.
+     */    
+    size_type erase(const key_type& key, std::size_t precalculated_hash) { 
+        return m_ht.erase(key, precalculated_hash); 
+    }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent 
+     * and Compare::is_transparent exist. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    size_type erase(const K& key) { return m_ht.erase(key); }
+    
+    /**
+     * @copydoc erase(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    size_type erase(const K& key, std::size_t precalculated_hash) { return m_ht.erase(key, precalculated_hash); }
+    
+    
+    
+    
+    void swap(hopscotch_sc_map& other) { other.m_ht.swap(m_ht); }
+    
+    /*
+     * Lookup
+     */
+    T& at(const Key& key) { return m_ht.at(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */    
+    T& at(const Key& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); }
+    
+    const T& at(const Key& key) const { return m_ht.at(key); }
+    
+    /**
+     * @copydoc at(const Key& key, std::size_t precalculated_hash)
+     */    
+    const T& at(const Key& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent 
+     * and Compare::is_transparent exist. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    T& at(const K& key) { return m_ht.at(key); }
+    
+    /**
+     * @copydoc at(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */    
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    T& at(const K& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); }
+    
+    /**
+     * @copydoc at(const K& key)
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    const T& at(const K& key) const { return m_ht.at(key); }
+    
+    /**
+     * @copydoc at(const K& key, std::size_t precalculated_hash)
+     */    
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    const T& at(const K& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); }
+    
+    
+    
+    
+    T& operator[](const Key& key) { return m_ht[key]; }    
+    T& operator[](Key&& key) { return m_ht[std::move(key)]; }
+    
+    
+    
+    
+    size_type count(const Key& key) const { return m_ht.count(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    size_type count(const Key& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent 
+     * and Compare::is_transparent exist. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    size_type count(const K& key) const { return m_ht.count(key); }
+    
+    /**
+     * @copydoc count(const K& key) const
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */     
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    size_type count(const K& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); }
+    
+    
+    
+    
+    iterator find(const Key& key) { return m_ht.find(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    iterator find(const Key& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); }
+    
+    const_iterator find(const Key& key) const { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const Key& key, std::size_t precalculated_hash)
+     */
+    const_iterator find(const Key& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent 
+     * and Compare::is_transparent exist. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    iterator find(const K& key) { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    iterator find(const K& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); }
+    
+    /**
+     * @copydoc find(const K& key)
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    const_iterator find(const K& key) const { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const K& key, std::size_t precalculated_hash)
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    const_iterator find(const K& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); }
+    
+    
+    
+    
+    std::pair<iterator, iterator> equal_range(const Key& key) { return m_ht.equal_range(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    std::pair<iterator, iterator> equal_range(const Key& key, std::size_t precalculated_hash) { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    std::pair<const_iterator, const_iterator> equal_range(const Key& key) const { return m_ht.equal_range(key); }
+    
+    /**
+     * @copydoc equal_range(const Key& key, std::size_t precalculated_hash)
+     */
+    std::pair<const_iterator, const_iterator> equal_range(const Key& key, std::size_t precalculated_hash) const { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent 
+     * and Compare::is_transparent exist. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    std::pair<iterator, iterator> equal_range(const K& key) { return m_ht.equal_range(key); }
+    
+    /**
+     * @copydoc equal_range(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */    
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    std::pair<iterator, iterator> equal_range(const K& key, std::size_t precalculated_hash) { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    /**
+     * @copydoc equal_range(const K& key)
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    std::pair<const_iterator, const_iterator> equal_range(const K& key) const { return m_ht.equal_range(key); }
+    
+    /**
+     * @copydoc equal_range(const K& key, std::size_t precalculated_hash)
+     */    
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    std::pair<const_iterator, const_iterator> equal_range(const K& key, std::size_t precalculated_hash) const { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    
+    
+    
+    /*
+     * Bucket interface 
+     */
+    size_type bucket_count() const { return m_ht.bucket_count(); }
+    size_type max_bucket_count() const { return m_ht.max_bucket_count(); }
+    
+    
+    /*
+     *  Hash policy 
+     */
+    float load_factor() const { return m_ht.load_factor(); }
+    float max_load_factor() const { return m_ht.max_load_factor(); }
+    void max_load_factor(float ml) { m_ht.max_load_factor(ml); }
+    
+    void rehash(size_type count) { m_ht.rehash(count); }
+    void reserve(size_type count) { m_ht.reserve(count); }
+    
+    
+    /*
+     * Observers
+     */
+    hasher hash_function() const { return m_ht.hash_function(); }
+    key_equal key_eq() const { return m_ht.key_eq(); }
+    key_compare key_comp() const { return m_ht.key_comp(); }
+    
+    /*
+     * Other
+     */
+    
+    /**
+     * Convert a const_iterator to an iterator.
+     */
+    iterator mutable_iterator(const_iterator pos) {
+        return m_ht.mutable_iterator(pos);
+    }
+    
+    size_type overflow_size() const noexcept { return m_ht.overflow_size(); }
+    
+    friend bool operator==(const hopscotch_sc_map& lhs, const hopscotch_sc_map& rhs) {
+        if(lhs.size() != rhs.size()) {
+            return false;
+        }
+        
+        for(const auto& element_lhs : lhs) {
+            const auto it_element_rhs = rhs.find(element_lhs.first);
+            if(it_element_rhs == rhs.cend() || element_lhs.second != it_element_rhs->second) {
+                return false;
+            }
+        }
+        
+        return true;
+    }
+
+    friend bool operator!=(const hopscotch_sc_map& lhs, const hopscotch_sc_map& rhs) {
+        return !operator==(lhs, rhs);
+    }
+
+    friend void swap(hopscotch_sc_map& lhs, hopscotch_sc_map& rhs) {
+        lhs.swap(rhs);
+    }
+
+
+    
+private:
+    ht m_ht;
+};
+  
+
+} // end namespace tsl
+
+
+#endif
diff --git a/dbms/src/Common/tests/hopscotch-map/src/hopscotch_sc_set.h b/dbms/src/Common/tests/hopscotch-map/src/hopscotch_sc_set.h
new file mode 100644
index 00000000000..29eb1d5e3b7
--- /dev/null
+++ b/dbms/src/Common/tests/hopscotch-map/src/hopscotch_sc_set.h
@@ -0,0 +1,518 @@
+/**
+ * MIT License
+ * 
+ * Copyright (c) 2017 Tessil
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef TSL_HOPSCOTCH_SC_SET_H
+#define TSL_HOPSCOTCH_SC_SET_H 
+
+
+#include <algorithm>
+#include <cstddef>
+#include <functional>
+#include <initializer_list>
+#include <memory>
+#include <set>
+#include <type_traits>
+#include <utility>
+#include "hopscotch_hash.h"
+
+
+namespace tsl {
+    
+    
+/**
+ * Similar to tsl::hopscotch_set but instead of using a list for overflowing elements it uses
+ * a binary search tree. It thus needs an additional template parameter Compare. Compare should
+ * be arithmetically coherent with KeyEqual.
+ * 
+ * The binary search tree allows the set to have a worst-case scenario of O(log n) for search 
+ * and delete, even if the hash function maps all the elements to the same bucket. 
+ * For insert, the amortized worst case is O(log n), but the worst case is O(n) in case of rehash.
+ * 
+ * This makes the set resistant to DoS attacks (but doesn't preclude you to have a good hash function,
+ * as an element in the bucket array is faster to retrieve than in the tree).
+ * 
+ * @copydoc hopscotch_set
+ */
+template<class Key, 
+         class Hash = std::hash<Key>,
+         class KeyEqual = std::equal_to<Key>,
+         class Compare = std::less<Key>,
+         class Allocator = std::allocator<Key>,
+         unsigned int NeighborhoodSize = 62,
+         bool StoreHash = false,
+         class GrowthPolicy = tsl::power_of_two_growth_policy>
+class hopscotch_sc_set {
+private:    
+    template<typename U>
+    using has_is_transparent = tsl::detail_hopscotch_hash::has_is_transparent<U>;
+    
+    class KeySelect {
+    public:
+        using key_type = Key;
+        
+        const key_type& operator()(const Key& key) const {
+            return key;
+        }
+        
+        key_type& operator()(Key& key) {
+            return key;
+        }
+    };
+    
+    
+    using overflow_container_type = std::set<Key, Compare, Allocator>;
+    using ht = tsl::detail_hopscotch_hash::hopscotch_hash<Key, KeySelect, void,
+                                                     Hash, KeyEqual, 
+                                                     Allocator, NeighborhoodSize, 
+                                                     StoreHash, GrowthPolicy,
+                                                     overflow_container_type>;
+            
+public:
+    using key_type = typename ht::key_type;
+    using value_type = typename ht::value_type;
+    using size_type = typename ht::size_type;
+    using difference_type = typename ht::difference_type;
+    using hasher = typename ht::hasher;
+    using key_equal = typename ht::key_equal;
+    using key_compare = Compare;
+    using allocator_type = typename ht::allocator_type;
+    using reference = typename ht::reference;
+    using const_reference = typename ht::const_reference;
+    using pointer = typename ht::pointer;
+    using const_pointer = typename ht::const_pointer;
+    using iterator = typename ht::iterator;
+    using const_iterator = typename ht::const_iterator;
+
+    
+    /*
+     * Constructors
+     */
+    hopscotch_sc_set() : hopscotch_sc_set(ht::DEFAULT_INIT_BUCKETS_SIZE) {
+    }
+    
+    explicit hopscotch_sc_set(size_type bucket_count, 
+                        const Hash& hash = Hash(),
+                        const KeyEqual& equal = KeyEqual(),
+                        const Allocator& alloc = Allocator(),
+                        const Compare& comp = Compare()) : 
+                        m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR, comp)
+    {
+    }
+    
+    hopscotch_sc_set(size_type bucket_count,
+                  const Allocator& alloc) : hopscotch_sc_set(bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+    
+    hopscotch_sc_set(size_type bucket_count,
+                  const Hash& hash,
+                  const Allocator& alloc) : hopscotch_sc_set(bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+    
+    explicit hopscotch_sc_set(const Allocator& alloc) : hopscotch_sc_set(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) {
+    }
+    
+    template<class InputIt>
+    hopscotch_sc_set(InputIt first, InputIt last,
+                size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
+                const Hash& hash = Hash(),
+                const KeyEqual& equal = KeyEqual(),
+                const Allocator& alloc = Allocator()) : hopscotch_sc_set(bucket_count, hash, equal, alloc)
+    {
+        insert(first, last);
+    }
+    
+    template<class InputIt>
+    hopscotch_sc_set(InputIt first, InputIt last,
+                size_type bucket_count,
+                const Allocator& alloc) : hopscotch_sc_set(first, last, bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+    
+    template<class InputIt>
+    hopscotch_sc_set(InputIt first, InputIt last,
+                size_type bucket_count,
+                const Hash& hash,
+                const Allocator& alloc) : hopscotch_sc_set(first, last, bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+
+    hopscotch_sc_set(std::initializer_list<value_type> init,
+                    size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
+                    const Hash& hash = Hash(),
+                    const KeyEqual& equal = KeyEqual(),
+                    const Allocator& alloc = Allocator()) : 
+                    hopscotch_sc_set(init.begin(), init.end(), bucket_count, hash, equal, alloc)
+    {
+    }
+
+    hopscotch_sc_set(std::initializer_list<value_type> init,
+                    size_type bucket_count,
+                    const Allocator& alloc) : 
+                    hopscotch_sc_set(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+
+    hopscotch_sc_set(std::initializer_list<value_type> init,
+                    size_type bucket_count,
+                    const Hash& hash,
+                    const Allocator& alloc) : 
+                    hopscotch_sc_set(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+
+    
+    hopscotch_sc_set& operator=(std::initializer_list<value_type> ilist) {
+        m_ht.clear();
+        
+        m_ht.reserve(ilist.size());
+        m_ht.insert(ilist.begin(), ilist.end());
+        
+        return *this;
+    }
+    
+    allocator_type get_allocator() const { return m_ht.get_allocator(); }
+    
+    
+    /*
+     * Iterators
+     */
+    iterator begin() noexcept { return m_ht.begin(); }
+    const_iterator begin() const noexcept { return m_ht.begin(); }
+    const_iterator cbegin() const noexcept { return m_ht.cbegin(); }
+    
+    iterator end() noexcept { return m_ht.end(); }
+    const_iterator end() const noexcept { return m_ht.end(); }
+    const_iterator cend() const noexcept { return m_ht.cend(); }
+    
+    
+    /*
+     * Capacity
+     */
+    bool empty() const noexcept { return m_ht.empty(); }
+    size_type size() const noexcept { return m_ht.size(); }
+    size_type max_size() const noexcept { return m_ht.max_size(); }
+    
+    /*
+     * Modifiers
+     */
+    void clear() noexcept { m_ht.clear(); }
+    
+    
+    
+    
+    std::pair<iterator, bool> insert(const value_type& value) { return m_ht.insert(value); }
+    std::pair<iterator, bool> insert(value_type&& value) { return m_ht.insert(std::move(value)); }
+    
+    iterator insert(const_iterator hint, const value_type& value) { return m_ht.insert(hint, value); }
+    iterator insert(const_iterator hint, value_type&& value) { return m_ht.insert(hint, std::move(value)); }
+    
+    template<class InputIt>
+    void insert(InputIt first, InputIt last) { m_ht.insert(first, last); }
+    void insert(std::initializer_list<value_type> ilist) { m_ht.insert(ilist.begin(), ilist.end()); }
+
+    
+    
+    
+    /**
+     * Due to the way elements are stored, emplace will need to move or copy the key-value once.
+     * The method is equivalent to insert(value_type(std::forward<Args>(args)...));
+     * 
+     * Mainly here for compatibility with the std::unordered_map interface.
+     */
+    template<class... Args>
+    std::pair<iterator, bool> emplace(Args&&... args) { return m_ht.emplace(std::forward<Args>(args)...); }
+    
+    
+    
+    
+    /**
+     * Due to the way elements are stored, emplace_hint will need to move or copy the key-value once.
+     * The method is equivalent to insert(hint, value_type(std::forward<Args>(args)...));
+     * 
+     * Mainly here for compatibility with the std::unordered_map interface.
+     */
+    template<class... Args>
+    iterator emplace_hint(const_iterator hint, Args&&... args) {
+        return m_ht.emplace_hint(hint, std::forward<Args>(args)...);
+    }
+
+    
+    
+    
+    iterator erase(iterator pos) { return m_ht.erase(pos); }
+    iterator erase(const_iterator pos) { return m_ht.erase(pos); }
+    iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); }
+    size_type erase(const key_type& key) { return m_ht.erase(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash.
+     */    
+    size_type erase(const key_type& key, std::size_t precalculated_hash) { 
+        return m_ht.erase(key, precalculated_hash); 
+    }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent 
+     * and Compare::is_transparent exist. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    size_type erase(const K& key) { return m_ht.erase(key); }
+    
+    /**
+     * @copydoc erase(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    size_type erase(const K& key, std::size_t precalculated_hash) { return m_ht.erase(key, precalculated_hash); }
+    
+    
+    
+    
+    void swap(hopscotch_sc_set& other) { other.m_ht.swap(m_ht); }
+    
+    
+    /*
+     * Lookup
+     */
+    size_type count(const Key& key) const { return m_ht.count(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    size_type count(const Key& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent 
+     * and Compare::is_transparent exist. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    size_type count(const K& key) const { return m_ht.count(key); }
+    
+    /**
+     * @copydoc count(const K& key) const
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    size_type count(const K& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); }
+    
+    
+    
+    
+    iterator find(const Key& key) { return m_ht.find(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    iterator find(const Key& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); }
+    
+    const_iterator find(const Key& key) const { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const Key& key, std::size_t precalculated_hash)
+     */
+    const_iterator find(const Key& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); }
+        
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent 
+     * and Compare::is_transparent exist. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    iterator find(const K& key) { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    iterator find(const K& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); }
+    
+    /**
+     * @copydoc find(const K& key)
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    const_iterator find(const K& key) const { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    const_iterator find(const K& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); }
+    
+    
+    
+    
+    std::pair<iterator, iterator> equal_range(const Key& key) { return m_ht.equal_range(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    std::pair<iterator, iterator> equal_range(const Key& key, std::size_t precalculated_hash) { 
+        return m_ht.equal_range(key, precalculated_hash);
+    }
+    
+    std::pair<const_iterator, const_iterator> equal_range(const Key& key) const { return m_ht.equal_range(key); }
+    
+    /**
+     * @copydoc equal_range(const Key& key, std::size_t precalculated_hash)
+     */
+    std::pair<const_iterator, const_iterator> equal_range(const Key& key, std::size_t precalculated_hash) const { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent 
+     * and Compare::is_transparent exist. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    std::pair<iterator, iterator> equal_range(const K& key) { return m_ht.equal_range(key); }
+    
+    /**
+     * @copydoc equal_range(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    std::pair<iterator, iterator> equal_range(const K& key, std::size_t precalculated_hash) { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }    
+    
+    /**
+     * @copydoc equal_range(const K& key)
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    std::pair<const_iterator, const_iterator> equal_range(const K& key) const { return m_ht.equal_range(key); }    
+    
+    /**
+     * @copydoc equal_range(const K& key, std::size_t precalculated_hash)
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    std::pair<const_iterator, const_iterator> equal_range(const K& key, std::size_t precalculated_hash) const { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    
+    
+
+    /*
+     * Bucket interface 
+     */
+    size_type bucket_count() const { return m_ht.bucket_count(); }
+    size_type max_bucket_count() const { return m_ht.max_bucket_count(); }
+    
+    
+    /*
+     *  Hash policy 
+     */
+    float load_factor() const { return m_ht.load_factor(); }
+    float max_load_factor() const { return m_ht.max_load_factor(); }
+    void max_load_factor(float ml) { m_ht.max_load_factor(ml); }
+    
+    void rehash(size_type count) { m_ht.rehash(count); }
+    void reserve(size_type count) { m_ht.reserve(count); }
+    
+    
+    /*
+     * Observers
+     */
+    hasher hash_function() const { return m_ht.hash_function(); }
+    key_equal key_eq() const { return m_ht.key_eq(); }
+    key_compare key_comp() const { return m_ht.key_comp(); }
+    
+    
+    /*
+     * Other
+     */
+    
+    /**
+     * Convert a const_iterator to an iterator.
+     */
+    iterator mutable_iterator(const_iterator pos) {
+        return m_ht.mutable_iterator(pos);
+    }
+    
+    size_type overflow_size() const noexcept { return m_ht.overflow_size(); }
+    
+    friend bool operator==(const hopscotch_sc_set& lhs, const hopscotch_sc_set& rhs) {
+        if(lhs.size() != rhs.size()) {
+            return false;
+        }
+        
+        for(const auto& element_lhs : lhs) {
+            const auto it_element_rhs = rhs.find(element_lhs);
+            if(it_element_rhs == rhs.cend()) {
+                return false;
+            }
+        }
+        
+        return true;
+    }
+
+    friend bool operator!=(const hopscotch_sc_set& lhs, const hopscotch_sc_set& rhs) {
+        return !operator==(lhs, rhs);
+    }
+
+    friend void swap(hopscotch_sc_set& lhs, hopscotch_sc_set& rhs) {
+        lhs.swap(rhs);
+    }
+    
+private:
+    ht m_ht;    
+};
+  
+
+} // end namespace tsl
+
+
+#endif 
diff --git a/dbms/src/Common/tests/hopscotch-map/src/hopscotch_set.h b/dbms/src/Common/tests/hopscotch-map/src/hopscotch_set.h
new file mode 100644
index 00000000000..72be57fa7e3
--- /dev/null
+++ b/dbms/src/Common/tests/hopscotch-map/src/hopscotch_set.h
@@ -0,0 +1,513 @@
+/**
+ * MIT License
+ * 
+ * Copyright (c) 2017 Tessil
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef TSL_HOPSCOTCH_SET_H
+#define TSL_HOPSCOTCH_SET_H
+
+
+#include <algorithm>
+#include <cstddef>
+#include <functional>
+#include <initializer_list>
+#include <list>
+#include <memory>
+#include <type_traits>
+#include <utility>
+#include "hopscotch_hash.h"
+
+
+namespace tsl {
+
+/**
+ * Implementation of a hash set using the hopscotch hashing algorithm.
+ * 
+ * The Key must be either nothrow move-constructible, copy-constuctible or both.
+ * 
+ * The size of the neighborhood (NeighborhoodSize) must be > 0 and <= 62 if StoreHash is false.
+ * When StoreHash is true, 32-bits of the hash will be stored alongside the neighborhood limiting
+ * the NeighborhoodSize to <= 30. There is no memory usage difference between 
+ * 'NeighborhoodSize 62; StoreHash false' and 'NeighborhoodSize 30; StoreHash true'.
+ * 
+ * Storing the hash may improve performance on insert during the rehash process if the hash takes time
+ * to compute. It may also improve read performance if the KeyEqual function takes time (or incurs a cache-miss).
+ * If used with simple Hash and KeyEqual it may slow things down.
+ * 
+ * StoreHash can only be set if the GrowthPolicy is set to tsl::power_of_two_growth_policy.
+ * 
+ * GrowthPolicy defines how the set grows and consequently how a hash value is mapped to a bucket. 
+ * By default the set uses tsl::power_of_two_growth_policy. This policy keeps the number of buckets 
+ * to a power of two and uses a mask to set the hash to a bucket instead of the slow modulo.
+ * You may define your own growth policy, check tsl::power_of_two_growth_policy for the interface.
+ * 
+ * If the destructor of Key throws an exception, behaviour of the class is undefined.
+ * 
+ * Iterators invalidation:
+ *  - clear, operator=, reserve, rehash: always invalidate the iterators.
+ *  - insert, emplace, emplace_hint, operator[]: if there is an effective insert, invalidate the iterators 
+ *    if a displacement is needed to resolve a collision (which mean that most of the time, 
+ *    insert will invalidate the iterators). Or if there is a rehash.
+ *  - erase: iterator on the erased element is the only one which become invalid.
+ */
+template<class Key, 
+         class Hash = std::hash<Key>,
+         class KeyEqual = std::equal_to<Key>,
+         class Allocator = std::allocator<Key>,
+         unsigned int NeighborhoodSize = 62,
+         bool StoreHash = false,
+         class GrowthPolicy = tsl::power_of_two_growth_policy>
+class hopscotch_set {
+private:    
+    template<typename U>
+    using has_is_transparent = tsl::detail_hopscotch_hash::has_is_transparent<U>;
+    
+    class KeySelect {
+    public:
+        using key_type = Key;
+        
+        const key_type& operator()(const Key& key) const {
+            return key;
+        }
+        
+        key_type& operator()(Key& key) {
+            return key;
+        }
+    };
+    
+    
+    using overflow_container_type = std::list<Key, Allocator>;
+    using ht = detail_hopscotch_hash::hopscotch_hash<Key, KeySelect, void,
+                                                     Hash, KeyEqual, 
+                                                     Allocator, NeighborhoodSize, 
+                                                     StoreHash, GrowthPolicy,
+                                                     overflow_container_type>;
+            
+public:
+    using key_type = typename ht::key_type;
+    using value_type = typename ht::value_type;
+    using size_type = typename ht::size_type;
+    using difference_type = typename ht::difference_type;
+    using hasher = typename ht::hasher;
+    using key_equal = typename ht::key_equal;
+    using allocator_type = typename ht::allocator_type;
+    using reference = typename ht::reference;
+    using const_reference = typename ht::const_reference;
+    using pointer = typename ht::pointer;
+    using const_pointer = typename ht::const_pointer;
+    using iterator = typename ht::iterator;
+    using const_iterator = typename ht::const_iterator;
+
+    
+    /*
+     * Constructors
+     */
+    hopscotch_set() : hopscotch_set(ht::DEFAULT_INIT_BUCKETS_SIZE) {
+    }
+    
+    explicit hopscotch_set(size_type bucket_count, 
+                        const Hash& hash = Hash(),
+                        const KeyEqual& equal = KeyEqual(),
+                        const Allocator& alloc = Allocator()) : 
+                        m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR)
+    {
+    }
+    
+    hopscotch_set(size_type bucket_count,
+                  const Allocator& alloc) : hopscotch_set(bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+    
+    hopscotch_set(size_type bucket_count,
+                  const Hash& hash,
+                  const Allocator& alloc) : hopscotch_set(bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+    
+    explicit hopscotch_set(const Allocator& alloc) : hopscotch_set(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) {
+    }
+    
+    template<class InputIt>
+    hopscotch_set(InputIt first, InputIt last,
+                size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
+                const Hash& hash = Hash(),
+                const KeyEqual& equal = KeyEqual(),
+                const Allocator& alloc = Allocator()) : hopscotch_set(bucket_count, hash, equal, alloc)
+    {
+        insert(first, last);
+    }
+    
+    template<class InputIt>
+    hopscotch_set(InputIt first, InputIt last,
+                size_type bucket_count,
+                const Allocator& alloc) : hopscotch_set(first, last, bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+    
+    template<class InputIt>
+    hopscotch_set(InputIt first, InputIt last,
+                size_type bucket_count,
+                const Hash& hash,
+                const Allocator& alloc) : hopscotch_set(first, last, bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+
+    hopscotch_set(std::initializer_list<value_type> init,
+                    size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
+                    const Hash& hash = Hash(),
+                    const KeyEqual& equal = KeyEqual(),
+                    const Allocator& alloc = Allocator()) : 
+                    hopscotch_set(init.begin(), init.end(), bucket_count, hash, equal, alloc)
+    {
+    }
+
+    hopscotch_set(std::initializer_list<value_type> init,
+                    size_type bucket_count,
+                    const Allocator& alloc) : 
+                    hopscotch_set(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+
+    hopscotch_set(std::initializer_list<value_type> init,
+                    size_type bucket_count,
+                    const Hash& hash,
+                    const Allocator& alloc) : 
+                    hopscotch_set(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+
+    
+    hopscotch_set& operator=(std::initializer_list<value_type> ilist) {
+        m_ht.clear();
+        
+        m_ht.reserve(ilist.size());
+        m_ht.insert(ilist.begin(), ilist.end());
+        
+        return *this;
+    }
+    
+    allocator_type get_allocator() const { return m_ht.get_allocator(); }
+    
+    
+    /*
+     * Iterators
+     */
+    iterator begin() noexcept { return m_ht.begin(); }
+    const_iterator begin() const noexcept { return m_ht.begin(); }
+    const_iterator cbegin() const noexcept { return m_ht.cbegin(); }
+    
+    iterator end() noexcept { return m_ht.end(); }
+    const_iterator end() const noexcept { return m_ht.end(); }
+    const_iterator cend() const noexcept { return m_ht.cend(); }
+    
+    
+    /*
+     * Capacity
+     */
+    bool empty() const noexcept { return m_ht.empty(); }
+    size_type size() const noexcept { return m_ht.size(); }
+    size_type max_size() const noexcept { return m_ht.max_size(); }
+    
+    /*
+     * Modifiers
+     */
+    void clear() noexcept { m_ht.clear(); }
+    
+    
+    
+    
+    std::pair<iterator, bool> insert(const value_type& value) { return m_ht.insert(value); }
+    std::pair<iterator, bool> insert(value_type&& value) { return m_ht.insert(std::move(value)); }
+    
+    iterator insert(const_iterator hint, const value_type& value) { return m_ht.insert(hint, value); }
+    iterator insert(const_iterator hint, value_type&& value) { return m_ht.insert(hint, std::move(value)); }
+    
+    template<class InputIt>
+    void insert(InputIt first, InputIt last) { m_ht.insert(first, last); }
+    void insert(std::initializer_list<value_type> ilist) { m_ht.insert(ilist.begin(), ilist.end()); }
+
+    
+    
+    
+    /**
+     * Due to the way elements are stored, emplace will need to move or copy the key-value once.
+     * The method is equivalent to insert(value_type(std::forward<Args>(args)...));
+     * 
+     * Mainly here for compatibility with the std::unordered_map interface.
+     */
+    template<class... Args>
+    std::pair<iterator, bool> emplace(Args&&... args) { return m_ht.emplace(std::forward<Args>(args)...); }
+    
+    
+    
+    
+    /**
+     * Due to the way elements are stored, emplace_hint will need to move or copy the key-value once.
+     * The method is equivalent to insert(hint, value_type(std::forward<Args>(args)...));
+     * 
+     * Mainly here for compatibility with the std::unordered_map interface.
+     */
+    template<class... Args>
+    iterator emplace_hint(const_iterator hint, Args&&... args) {
+        return m_ht.emplace_hint(hint, std::forward<Args>(args)...);
+    }
+
+    
+    
+    
+    iterator erase(iterator pos) { return m_ht.erase(pos); }
+    iterator erase(const_iterator pos) { return m_ht.erase(pos); }
+    iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); }
+    size_type erase(const key_type& key) { return m_ht.erase(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash.
+     */    
+    size_type erase(const key_type& key, std::size_t precalculated_hash) { 
+        return m_ht.erase(key, precalculated_hash); 
+    }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    size_type erase(const K& key) { return m_ht.erase(key); }
+    
+    /**
+     * @copydoc erase(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash.
+     */    
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    size_type erase(const K& key, std::size_t precalculated_hash) { 
+        return m_ht.erase(key, precalculated_hash); 
+    }
+    
+    
+    
+    
+    void swap(hopscotch_set& other) { other.m_ht.swap(m_ht); }
+    
+    
+    /*
+     * Lookup
+     */
+    size_type count(const Key& key) const { return m_ht.count(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    size_type count(const Key& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    size_type count(const K& key) const { return m_ht.count(key); }
+    
+    /**
+     * @copydoc count(const K& key) const
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    size_type count(const K& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); }
+    
+    
+    
+    
+    iterator find(const Key& key) { return m_ht.find(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    iterator find(const Key& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); }
+    
+    const_iterator find(const Key& key) const { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const Key& key, std::size_t precalculated_hash)
+     */
+    const_iterator find(const Key& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    iterator find(const K& key) { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    iterator find(const K& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); }
+    
+    /**
+     * @copydoc find(const K& key)
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    const_iterator find(const K& key) const { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    const_iterator find(const K& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); }
+    
+    
+    
+    
+    std::pair<iterator, iterator> equal_range(const Key& key) { return m_ht.equal_range(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    std::pair<iterator, iterator> equal_range(const Key& key, std::size_t precalculated_hash) { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    std::pair<const_iterator, const_iterator> equal_range(const Key& key) const { return m_ht.equal_range(key); }
+    
+    /**
+     * @copydoc equal_range(const Key& key, std::size_t precalculated_hash)
+     */
+    std::pair<const_iterator, const_iterator> equal_range(const Key& key, std::size_t precalculated_hash) const { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    std::pair<iterator, iterator> equal_range(const K& key) { return m_ht.equal_range(key); }
+    
+    /**
+     * @copydoc equal_range(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */    
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    std::pair<iterator, iterator> equal_range(const K& key, std::size_t precalculated_hash) { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    /**
+     * @copydoc equal_range(const K& key)
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    std::pair<const_iterator, const_iterator> equal_range(const K& key) const { return m_ht.equal_range(key); }
+
+    /**
+     * @copydoc equal_range(const K& key, std::size_t precalculated_hash)
+     */    
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    std::pair<const_iterator, const_iterator> equal_range(const K& key, std::size_t precalculated_hash) const { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    
+    
+
+    /*
+     * Bucket interface 
+     */
+    size_type bucket_count() const { return m_ht.bucket_count(); }
+    size_type max_bucket_count() const { return m_ht.max_bucket_count(); }
+    
+    
+    /*
+     *  Hash policy 
+     */
+    float load_factor() const { return m_ht.load_factor(); }
+    float max_load_factor() const { return m_ht.max_load_factor(); }
+    void max_load_factor(float ml) { m_ht.max_load_factor(ml); }
+    
+    void rehash(size_type count) { m_ht.rehash(count); }
+    void reserve(size_type count) { m_ht.reserve(count); }
+    
+    
+    /*
+     * Observers
+     */
+    hasher hash_function() const { return m_ht.hash_function(); }
+    key_equal key_eq() const { return m_ht.key_eq(); }
+    
+    
+    /*
+     * Other
+     */
+    
+    /**
+     * Convert a const_iterator to an iterator.
+     */
+    iterator mutable_iterator(const_iterator pos) {
+        return m_ht.mutable_iterator(pos);
+    }
+    
+    size_type overflow_size() const noexcept { return m_ht.overflow_size(); }
+    
+    friend bool operator==(const hopscotch_set& lhs, const hopscotch_set& rhs) {
+        if(lhs.size() != rhs.size()) {
+            return false;
+        }
+        
+        for(const auto& element_lhs : lhs) {
+            const auto it_element_rhs = rhs.find(element_lhs);
+            if(it_element_rhs == rhs.cend()) {
+                return false;
+            }
+        }
+        
+        return true;
+    }
+
+    friend bool operator!=(const hopscotch_set& lhs, const hopscotch_set& rhs) {
+        return !operator==(lhs, rhs);
+    }
+
+    friend void swap(hopscotch_set& lhs, hopscotch_set& rhs) {
+        lhs.swap(rhs);
+    }
+    
+private:
+    ht m_ht;    
+};
+
+} // end namespace tsl
+
+#endif

From f8b2400a841e11ef9d7a0eac43f507a4209cd084 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 21 Aug 2017 23:22:08 +0300
Subject: [PATCH 252/281] More sophisticated test of hash tables with integer
 keys [#CLICKHOUSE-3244].

---
 dbms/src/Common/tests/CMakeLists.txt                        | 4 ++++
 .../tests/integer_hash_tables_and_hashes.cpp}               | 6 ++++++
 dbms/src/Interpreters/tests/CMakeLists.txt                  | 4 ----
 3 files changed, 10 insertions(+), 4 deletions(-)
 rename dbms/src/{Interpreters/tests/hash_map2.cpp => Common/tests/integer_hash_tables_and_hashes.cpp} (99%)

diff --git a/dbms/src/Common/tests/CMakeLists.txt b/dbms/src/Common/tests/CMakeLists.txt
index 072d5547301..dd5f67c997b 100644
--- a/dbms/src/Common/tests/CMakeLists.txt
+++ b/dbms/src/Common/tests/CMakeLists.txt
@@ -57,3 +57,7 @@ target_link_libraries (array_cache dbms)
 
 add_executable (space_saving space_saving.cpp)
 target_link_libraries (space_saving dbms)
+
+add_executable (integer_hash_tables_and_hashes integer_hash_tables_and_hashes.cpp)
+target_include_directories (integer_hash_tables_and_hashes BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
+target_link_libraries (integer_hash_tables_and_hashes dbms)
diff --git a/dbms/src/Interpreters/tests/hash_map2.cpp b/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp
similarity index 99%
rename from dbms/src/Interpreters/tests/hash_map2.cpp
rename to dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp
index 0d946eb43a8..bce973ba223 100644
--- a/dbms/src/Interpreters/tests/hash_map2.cpp
+++ b/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp
@@ -17,6 +17,8 @@
 #include <IO/CompressedReadBuffer.h>
 #include <Common/HashTable/HashMap.h>
 
+#include "hopscotch-map/src/hopscotch_map.h"
+
 
 using Key = UInt64;
 using Value = UInt64;
@@ -304,6 +306,9 @@ void NO_INLINE testForEachHash(const Key * data, size_t size, Init && init)
     test<Map, Hashes::TabulationHash>(data, size, init);
 }
 
+template <typename Key, typename Mapped, typename Hash>
+using HopscotchMap = tsl::hopscotch_map<Key, Mapped, Hash>;
+
 void NO_INLINE testForEachMapAndHash(const Key * data, size_t size)
 {
     auto nothing = [](auto & map){};
@@ -312,6 +317,7 @@ void NO_INLINE testForEachMapAndHash(const Key * data, size_t size)
     testForEachHash<std::unordered_map>(data, size, nothing);
     testForEachHash<google::dense_hash_map>(data, size, [](auto & map){ map.set_empty_key(-1); });
     testForEachHash<google::sparse_hash_map>(data, size, nothing);
+    testForEachHash<HopscotchMap>(data, size, nothing);
 }
 
 
diff --git a/dbms/src/Interpreters/tests/CMakeLists.txt b/dbms/src/Interpreters/tests/CMakeLists.txt
index 233b4a8e5b7..48793cbcf8d 100644
--- a/dbms/src/Interpreters/tests/CMakeLists.txt
+++ b/dbms/src/Interpreters/tests/CMakeLists.txt
@@ -14,10 +14,6 @@ add_executable (hash_map hash_map.cpp)
 target_include_directories (hash_map BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
 target_link_libraries (hash_map dbms)
 
-add_executable (hash_map2 hash_map2.cpp)
-target_include_directories (hash_map2 BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
-target_link_libraries (hash_map2 dbms)
-
 add_executable (hash_map3 hash_map3.cpp)
 target_link_libraries (hash_map3 dbms)
 

From 8085359518f8223d5d4280a1bbf5d199fa6bb944 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 21 Aug 2017 23:36:00 +0300
Subject: [PATCH 253/281] More sophisticated test of hash tables with integer
 keys [#CLICKHOUSE-3244].

---
 .../tests/integer_hash_tables_and_hashes.cpp  | 25 ++++++++++++++-----
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp b/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp
index bce973ba223..0909b419cbe 100644
--- a/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp
+++ b/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp
@@ -10,7 +10,7 @@
 #include <Common/Stopwatch.h>
 
 //#define DBMS_HASH_MAP_COUNT_COLLISIONS
-#define DBMS_HASH_MAP_DEBUG_RESIZES
+//#define DBMS_HASH_MAP_DEBUG_RESIZES
 
 #include <Core/Types.h>
 #include <IO/ReadBufferFromFile.h>
@@ -37,6 +37,7 @@ namespace Hashes
         }
     };
 
+    /// Actually this is even worse than IdentityHash.
     struct SimpleMultiplyHash
     {
         size_t operator()(Key x) const
@@ -267,10 +268,21 @@ namespace Hashes
 }
 
 
+template <typename Key, typename Mapped, typename Hash>
+using HopscotchMap = tsl::hopscotch_map<Key, Mapped, Hash>;
+
+
 
 template <template <typename...> class Map, typename Hash>
 void NO_INLINE test(const Key * data, size_t size, std::function<void(Map<Key, Value, Hash>&)> init = {})
 {
+    if (std::is_same<Map<Key, Value, Hash>, HopscotchMap<Key, Value, Hashes::IdentityHash>>::value
+        || std::is_same<Map<Key, Value, Hash>, HopscotchMap<Key, Value, Hashes::SimpleMultiplyHash>>::value)
+    {
+        std::cerr << __PRETTY_FUNCTION__ << ":\nDisqualified\n";
+        return;
+    }
+
     Stopwatch watch;
 
     Map<Key, Value, Hash> map;
@@ -283,8 +295,7 @@ void NO_INLINE test(const Key * data, size_t size, std::function<void(Map<Key, V
     watch.stop();
     std::cerr << __PRETTY_FUNCTION__
         << ":\nElapsed: " << watch.elapsedSeconds()
-        << " (" << size / watch.elapsedSeconds() << " elem/sec.)"
-        << std::endl;
+        << " (" << size / watch.elapsedSeconds() << " elem/sec.)\n";
 }
 
 template <template <typename...> class Map, typename Init>
@@ -306,9 +317,6 @@ void NO_INLINE testForEachHash(const Key * data, size_t size, Init && init)
     test<Map, Hashes::TabulationHash>(data, size, init);
 }
 
-template <typename Key, typename Mapped, typename Hash>
-using HopscotchMap = tsl::hopscotch_map<Key, Mapped, Hash>;
-
 void NO_INLINE testForEachMapAndHash(const Key * data, size_t size)
 {
     auto nothing = [](auto & map){};
@@ -348,6 +356,11 @@ int main(int argc, char ** argv)
             << std::endl;
     }
 
+    /** Actually we should not run multiple test within same invocation of binary,
+      *  because order of test could alter test results (due to state of allocator and various minor reasons),
+      *  but in this case it's Ok.
+      */
+
     testForEachMapAndHash(data.data(), data.size());
     return 0;
 }

From f39db7ba1aefa99a04717c5d4e2c99b08a767b3f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 21 Aug 2017 23:38:16 +0300
Subject: [PATCH 254/281] More sophisticated test of hash tables with integer
 keys [#CLICKHOUSE-3244].

---
 dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp b/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp
index 0909b419cbe..3e24376e798 100644
--- a/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp
+++ b/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp
@@ -335,7 +335,7 @@ int main(int argc, char ** argv)
     size_t n = atoi(argv[1]);
 //    size_t m = atoi(argv[2]);
 
-    std::cerr << std::fixed << std::setprecision(2);
+    std::cerr << std::fixed << std::setprecision(3);
 
     std::vector<Key> data(n);
 

From 003d2525e0e01dca3cfae4bfc51bb66f92457f18 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 21 Aug 2017 23:39:26 +0300
Subject: [PATCH 255/281] More sophisticated test of hash tables with integer
 keys [#CLICKHOUSE-3244].

---
 dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp b/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp
index 3e24376e798..e2f7b8613c1 100644
--- a/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp
+++ b/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp
@@ -80,6 +80,7 @@ namespace Hashes
         }
     };
 
+    /// Pretty bad, only for illustration purposes.
     struct MixAllBitsHash
     {
         size_t operator()(Key x) const
@@ -277,7 +278,8 @@ template <template <typename...> class Map, typename Hash>
 void NO_INLINE test(const Key * data, size_t size, std::function<void(Map<Key, Value, Hash>&)> init = {})
 {
     if (std::is_same<Map<Key, Value, Hash>, HopscotchMap<Key, Value, Hashes::IdentityHash>>::value
-        || std::is_same<Map<Key, Value, Hash>, HopscotchMap<Key, Value, Hashes::SimpleMultiplyHash>>::value)
+        || std::is_same<Map<Key, Value, Hash>, HopscotchMap<Key, Value, Hashes::SimpleMultiplyHash>>::value
+        || std::is_same<Map<Key, Value, Hash>, HopscotchMap<Key, Value, Hashes::MixAllBitsHash>>::value)
     {
         std::cerr << __PRETTY_FUNCTION__ << ":\nDisqualified\n";
         return;

From 033b139288b85962717fffebc466522f147c77d9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 21 Aug 2017 23:45:21 +0300
Subject: [PATCH 256/281] More sophisticated test of hash tables with integer
 keys [#CLICKHOUSE-3244].

---
 dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp b/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp
index e2f7b8613c1..06613e5014d 100644
--- a/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp
+++ b/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp
@@ -297,7 +297,8 @@ void NO_INLINE test(const Key * data, size_t size, std::function<void(Map<Key, V
     watch.stop();
     std::cerr << __PRETTY_FUNCTION__
         << ":\nElapsed: " << watch.elapsedSeconds()
-        << " (" << size / watch.elapsedSeconds() << " elem/sec.)\n";
+        << " (" << size / watch.elapsedSeconds() << " elem/sec.)"
+        << ", map size: " << map.size() << "\n";
 }
 
 template <template <typename...> class Map, typename Init>

From c34211e0c5215f255493cbb1ef66708a359f877d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 22 Aug 2017 00:43:20 +0300
Subject: [PATCH 257/281] More sophisticated test of hash tables with integer
 keys [#CLICKHOUSE-3244].

---
 .../tests/integer_hash_tables_and_hashes.cpp  | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp b/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp
index 06613e5014d..dfcf4259832 100644
--- a/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp
+++ b/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp
@@ -16,6 +16,7 @@
 #include <IO/ReadBufferFromFile.h>
 #include <IO/CompressedReadBuffer.h>
 #include <Common/HashTable/HashMap.h>
+#include <Common/SipHash.h>
 
 #include "hopscotch-map/src/hopscotch_map.h"
 
@@ -190,6 +191,24 @@ namespace Hashes
         }
     };
 
+    struct CityHash
+    {
+        size_t operator()(Key x) const
+        {
+            return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&x), sizeof(x));
+        }
+    };
+
+    struct SipHash
+    {
+        size_t operator()(Key x) const
+        {
+            ::SipHash hash;
+            hash.update(reinterpret_cast<const char *>(&x), sizeof(x));
+            return hash.get64();
+        }
+    };
+
     struct MulShiftHash
     {
         size_t operator()(Key x) const
@@ -318,6 +337,8 @@ void NO_INLINE testForEachHash(const Key * data, size_t size, Init && init)
     test<Map, Hashes::CRC32Hash>(data, size, init);
     test<Map, Hashes::MulShiftHash>(data, size, init);
     test<Map, Hashes::TabulationHash>(data, size, init);
+    test<Map, Hashes::CityHash>(data, size, init);
+    test<Map, Hashes::SipHash>(data, size, init);
 }
 
 void NO_INLINE testForEachMapAndHash(const Key * data, size_t size)

From 4eb77e6628a807174113b6ac9b2b63fd4290b093 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 22 Aug 2017 00:59:27 +0300
Subject: [PATCH 258/281] Updated test of string hashes [#CLICKHOUSE-3244].

---
 .../Interpreters/tests/hash_map_string_2.cpp  | 20 ++++-----
 .../Interpreters/tests/hash_map_string_3.cpp  | 41 ++++++++++++++-----
 2 files changed, 41 insertions(+), 20 deletions(-)

diff --git a/dbms/src/Interpreters/tests/hash_map_string_2.cpp b/dbms/src/Interpreters/tests/hash_map_string_2.cpp
index 4fb21fae998..3958592e3d5 100644
--- a/dbms/src/Interpreters/tests/hash_map_string_2.cpp
+++ b/dbms/src/Interpreters/tests/hash_map_string_2.cpp
@@ -636,20 +636,20 @@ int main(int argc, char ** argv)
     }
 
     if (!m || m == 1) bench<StringRef_Compare1_Ptrs>                (data, "StringRef_Compare1_Ptrs");
-    if (!m || m == 2) bench<StringRef_Compare1_Index>                (data, "StringRef_Compare1_Index");
+    if (!m || m == 2) bench<StringRef_Compare1_Index>               (data, "StringRef_Compare1_Index");
     if (!m || m == 3) bench<StringRef_CompareMemcmp>                (data, "StringRef_CompareMemcmp");
-    if (!m || m == 4) bench<StringRef_Compare8_1_byUInt64>            (data, "StringRef_Compare8_1_byUInt64");
-    if (!m || m == 5) bench<StringRef_Compare16_1_byMemcmp>            (data, "StringRef_Compare16_1_byMemcmp");
+    if (!m || m == 4) bench<StringRef_Compare8_1_byUInt64>          (data, "StringRef_Compare8_1_byUInt64");
+    if (!m || m == 5) bench<StringRef_Compare16_1_byMemcmp>         (data, "StringRef_Compare16_1_byMemcmp");
     if (!m || m == 6) bench<StringRef_Compare16_1_byUInt64_logicAnd>(data, "StringRef_Compare16_1_byUInt64_logicAnd");
-    if (!m || m == 7) bench<StringRef_Compare16_1_byUInt64_bitAnd>    (data, "StringRef_Compare16_1_byUInt64_bitAnd");
+    if (!m || m == 7) bench<StringRef_Compare16_1_byUInt64_bitAnd>  (data, "StringRef_Compare16_1_byUInt64_bitAnd");
 #if __SSE4_1__
-    if (!m || m == 8) bench<StringRef_Compare16_1_byIntSSE>            (data, "StringRef_Compare16_1_byIntSSE");
-    if (!m || m == 9) bench<StringRef_Compare16_1_byFloatSSE>        (data, "StringRef_Compare16_1_byFloatSSE");
-    if (!m || m == 10) bench<StringRef_Compare16_1_bySSE4>            (data, "StringRef_Compare16_1_bySSE4");
-    if (!m || m == 11) bench<StringRef_Compare16_1_bySSE4_wide>        (data, "StringRef_Compare16_1_bySSE4_wide");
-    if (!m || m == 12) bench<StringRef_Compare16_1_bySSE_wide>        (data, "StringRef_Compare16_1_bySSE_wide");
+    if (!m || m == 8) bench<StringRef_Compare16_1_byIntSSE>         (data, "StringRef_Compare16_1_byIntSSE");
+    if (!m || m == 9) bench<StringRef_Compare16_1_byFloatSSE>       (data, "StringRef_Compare16_1_byFloatSSE");
+    if (!m || m == 10) bench<StringRef_Compare16_1_bySSE4>          (data, "StringRef_Compare16_1_bySSE4");
+    if (!m || m == 11) bench<StringRef_Compare16_1_bySSE4_wide>     (data, "StringRef_Compare16_1_bySSE4_wide");
+    if (!m || m == 12) bench<StringRef_Compare16_1_bySSE_wide>      (data, "StringRef_Compare16_1_bySSE_wide");
 #endif
-    if (!m || m == 100) bench<StringRef_CompareAlwaysTrue>            (data, "StringRef_CompareAlwaysTrue");
+    if (!m || m == 100) bench<StringRef_CompareAlwaysTrue>          (data, "StringRef_CompareAlwaysTrue");
     if (!m || m == 101) bench<StringRef_CompareAlmostAlwaysTrue>    (data, "StringRef_CompareAlmostAlwaysTrue");
 
     /// 10 > 8, 9
diff --git a/dbms/src/Interpreters/tests/hash_map_string_3.cpp b/dbms/src/Interpreters/tests/hash_map_string_3.cpp
index dfebabdcbbf..7176b47fc4d 100644
--- a/dbms/src/Interpreters/tests/hash_map_string_3.cpp
+++ b/dbms/src/Interpreters/tests/hash_map_string_3.cpp
@@ -139,6 +139,26 @@ struct FastHash64
 };
 
 
+struct FNV1a
+{
+    size_t operator() (StringRef x) const
+    {
+        size_t res = 0xcbf29ce484222325ULL;
+
+        const char * pos = x.data;
+        const char * end = x.data + x.size;
+
+        for (; pos < end; ++pos)
+        {
+            res *= 1099511628211ULL;
+            res ^= *pos;
+        }
+
+        return res;
+    }
+};
+
+
 #if __SSE4_1__
 
 struct CrapWow
@@ -462,20 +482,21 @@ int main(int argc, char ** argv)
             << std::endl;
     }
 
-    if (!m || m == 1) bench<StringRef_CompareMemcmp, StringRefHash64>(data, "StringRef_CityHash64");
-    if (!m || m == 2) bench<StringRef_CompareMemcmp, FastHash64>    (data, "StringRef_FastHash64");
-    if (!m || m == 3) bench<StringRef_CompareMemcmp, SimpleHash>    (data, "StringRef_SimpleHash");
+    if (!m || m == 1) bench<StringRef, StringRefHash64>(data, "StringRef_CityHash64");
+    if (!m || m == 2) bench<StringRef, FastHash64>     (data, "StringRef_FastHash64");
+    if (!m || m == 3) bench<StringRef, SimpleHash>     (data, "StringRef_SimpleHash");
+    if (!m || m == 3) bench<StringRef, FNV1a>          (data, "StringRef_FNV1a");
 
 #if __SSE4_1__
-    if (!m || m == 4) bench<StringRef_CompareMemcmp, CrapWow>        (data, "StringRef_CrapWow");
-    if (!m || m == 5) bench<StringRef_CompareMemcmp, CRC32Hash>        (data, "StringRef_CRC32Hash");
-    if (!m || m == 6) bench<StringRef_CompareMemcmp, CRC32ILPHash>    (data, "StringRef_CRC32ILPHash");
+    if (!m || m == 4) bench<StringRef, CrapWow>        (data, "StringRef_CrapWow");
+    if (!m || m == 5) bench<StringRef, CRC32Hash>      (data, "StringRef_CRC32Hash");
+    if (!m || m == 6) bench<StringRef, CRC32ILPHash>   (data, "StringRef_CRC32ILPHash");
 #endif
 
-    if (!m || m == 7) bench<StringRef_CompareMemcmp, VerySimpleHash>(data, "StringRef_VerySimpleHash");
-    if (!m || m == 8) bench<StringRef_CompareMemcmp, FarmHash64>(data, "StringRef_FarmHash64");
-    if (!m || m == 9) bench<StringRef_CompareMemcmp, MetroHash64<metrohash64_1>>(data, "StringRef_MetroHash64_1");
-    if (!m || m == 10) bench<StringRef_CompareMemcmp, MetroHash64<metrohash64_2>>(data, "StringRef_MetroHash64_2");
+    if (!m || m == 7) bench<StringRef, VerySimpleHash> (data, "StringRef_VerySimpleHash");
+    if (!m || m == 8) bench<StringRef, FarmHash64>     (data, "StringRef_FarmHash64");
+    if (!m || m == 9) bench<StringRef, MetroHash64<metrohash64_1>>(data, "StringRef_MetroHash64_1");
+    if (!m || m == 10) bench<StringRef, MetroHash64<metrohash64_2>>(data, "StringRef_MetroHash64_2");
 
     return 0;
 }

From ee553aac6524187c836244e7fd7d2b63f9c9f09e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 22 Aug 2017 01:03:28 +0300
Subject: [PATCH 259/281] Updated test of string hashes [#CLICKHOUSE-3244].

---
 .../Interpreters/tests/hash_map_string_3.cpp   | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/dbms/src/Interpreters/tests/hash_map_string_3.cpp b/dbms/src/Interpreters/tests/hash_map_string_3.cpp
index 7176b47fc4d..7f637d765e3 100644
--- a/dbms/src/Interpreters/tests/hash_map_string_3.cpp
+++ b/dbms/src/Interpreters/tests/hash_map_string_3.cpp
@@ -29,7 +29,7 @@ for file in MobilePhoneModel PageCharset Params URLDomain UTMSource Referer URL
   echo
   BEST_METHOD=0
   BEST_RESULT=0
-  for method in {1..10}; do
+  for method in {1..11}; do
    echo -ne $file $size $method '';
    TOTAL_ELEMS=0
    for i in {0..1000}; do
@@ -485,18 +485,18 @@ int main(int argc, char ** argv)
     if (!m || m == 1) bench<StringRef, StringRefHash64>(data, "StringRef_CityHash64");
     if (!m || m == 2) bench<StringRef, FastHash64>     (data, "StringRef_FastHash64");
     if (!m || m == 3) bench<StringRef, SimpleHash>     (data, "StringRef_SimpleHash");
-    if (!m || m == 3) bench<StringRef, FNV1a>          (data, "StringRef_FNV1a");
+    if (!m || m == 4) bench<StringRef, FNV1a>          (data, "StringRef_FNV1a");
 
 #if __SSE4_1__
-    if (!m || m == 4) bench<StringRef, CrapWow>        (data, "StringRef_CrapWow");
-    if (!m || m == 5) bench<StringRef, CRC32Hash>      (data, "StringRef_CRC32Hash");
-    if (!m || m == 6) bench<StringRef, CRC32ILPHash>   (data, "StringRef_CRC32ILPHash");
+    if (!m || m == 5) bench<StringRef, CrapWow>        (data, "StringRef_CrapWow");
+    if (!m || m == 6) bench<StringRef, CRC32Hash>      (data, "StringRef_CRC32Hash");
+    if (!m || m == 7) bench<StringRef, CRC32ILPHash>   (data, "StringRef_CRC32ILPHash");
 #endif
 
-    if (!m || m == 7) bench<StringRef, VerySimpleHash> (data, "StringRef_VerySimpleHash");
-    if (!m || m == 8) bench<StringRef, FarmHash64>     (data, "StringRef_FarmHash64");
-    if (!m || m == 9) bench<StringRef, MetroHash64<metrohash64_1>>(data, "StringRef_MetroHash64_1");
-    if (!m || m == 10) bench<StringRef, MetroHash64<metrohash64_2>>(data, "StringRef_MetroHash64_2");
+    if (!m || m == 8) bench<StringRef, VerySimpleHash> (data, "StringRef_VerySimpleHash");
+    if (!m || m == 9) bench<StringRef, FarmHash64>     (data, "StringRef_FarmHash64");
+    if (!m || m == 10) bench<StringRef, MetroHash64<metrohash64_1>>(data, "StringRef_MetroHash64_1");
+    if (!m || m == 11) bench<StringRef, MetroHash64<metrohash64_2>>(data, "StringRef_MetroHash64_2");
 
     return 0;
 }

From 61289c5c6115a8d7de92d1b22a4a425e593bdd59 Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Wed, 23 Aug 2017 14:44:53 +0300
Subject: [PATCH 260/281] Fixed test after forcing ENGINE for MV.
 [#CLICKHOUSE-3]

---
 .../queries/0_stateless/00180_attach_materialized_view.sql    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/tests/queries/0_stateless/00180_attach_materialized_view.sql b/dbms/tests/queries/0_stateless/00180_attach_materialized_view.sql
index 81b162ee4dd..787cea6caf7 100644
--- a/dbms/tests/queries/0_stateless/00180_attach_materialized_view.sql
+++ b/dbms/tests/queries/0_stateless/00180_attach_materialized_view.sql
@@ -3,10 +3,10 @@ DROP TABLE IF EXISTS test.mv;
 DROP TABLE IF EXISTS test.`.inner.mv`;
 
 CREATE TABLE test.t (x UInt8) ENGINE = Null;
-CREATE MATERIALIZED VIEW test.mv AS SELECT * FROM test.t;
+CREATE MATERIALIZED VIEW test.mv ENGINE = Null AS SELECT * FROM test.t;
 
 DETACH TABLE test.mv;
-ATTACH MATERIALIZED VIEW test.mv AS SELECT * FROM test.t;
+ATTACH MATERIALIZED VIEW test.mv ENGINE = Null AS SELECT * FROM test.t;
 
 DROP TABLE test.t;
 DROP TABLE test.mv;

From c43a13cb81fd90b91d8d8a94ef872ee8baf110f0 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Wed, 23 Aug 2017 13:23:16 +0300
Subject: [PATCH 261/281] fixed async insertion into local shard of distributed
 [#CLICKHOUSE-3245]

---
 dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
index 046a4b500df..9a9d6f76743 100644
--- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
+++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
@@ -367,7 +367,8 @@ void DistributedBlockOutputStream::writeAsyncImpl(const Block & block, const siz
         for (const auto & address : cluster->getShardsAddresses()[shard_id])
             if (!address.is_local)
                 dir_names.push_back(address.toStringFull());
-        writeToShard(block, dir_names);
+        if (!dir_names.empty())
+            writeToShard(block, dir_names);
     }
 }
 

From 7fa337c2976073451218e568dc77f265e4b479e5 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Wed, 23 Aug 2017 13:45:23 +0300
Subject: [PATCH 262/281] added tests [#CLICKHOUSE-3245]

---
 .../configs/remote_servers.xml                  |  8 ++++++++
 .../test_insert_into_distributed/test.py        | 17 +++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/dbms/tests/integration/test_insert_into_distributed/configs/remote_servers.xml b/dbms/tests/integration/test_insert_into_distributed/configs/remote_servers.xml
index 54a7fd95a79..84b98cc0223 100644
--- a/dbms/tests/integration/test_insert_into_distributed/configs/remote_servers.xml
+++ b/dbms/tests/integration/test_insert_into_distributed/configs/remote_servers.xml
@@ -8,5 +8,13 @@
                 </replica>
             </shard>
         </test_cluster>
+        <test_local_cluster>
+            <shard>
+                <replica>
+                    <host>localhost</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </test_local_cluster>
     </remote_servers>
 </yandex>
diff --git a/dbms/tests/integration/test_insert_into_distributed/test.py b/dbms/tests/integration/test_insert_into_distributed/test.py
index 69c7b62f9ee..414ac2babc8 100644
--- a/dbms/tests/integration/test_insert_into_distributed/test.py
+++ b/dbms/tests/integration/test_insert_into_distributed/test.py
@@ -14,6 +14,11 @@ instance_test_inserts_batching = cluster.add_instance(
     main_configs=['configs/remote_servers.xml'], user_configs=['configs/enable_distributed_inserts_batching.xml'])
 remote = cluster.add_instance('remote', user_configs=['configs/forbid_background_merges.xml'])
 
+instance_test_inserts_local_cluster = cluster.add_instance(
+    'instance_test_inserts_local_cluster',
+    main_configs=['configs/remote_servers.xml'])
+
+
 @pytest.fixture(scope="module")
 def started_cluster():
     try:
@@ -27,6 +32,11 @@ CREATE TABLE distributed (x UInt32) ENGINE = Distributed('test_cluster', 'defaul
         remote.query("CREATE TABLE local2 (d Date, x UInt32, s String) ENGINE = MergeTree(d, x, 8192)")
         instance_test_inserts_batching.query('''
 CREATE TABLE distributed (d Date, x UInt32) ENGINE = Distributed('test_cluster', 'default', 'local2')
+''')
+
+        instance_test_inserts_local_cluster.query("CREATE TABLE local (d Date, x UInt32) ENGINE = MergeTree(d, x, 8192)")
+        instance_test_inserts_local_cluster.query('''
+CREATE TABLE distributed_on_local (d Date, x UInt32) ENGINE = Distributed('test_local_cluster', 'default', 'local')
 ''')
 
         yield cluster
@@ -105,3 +115,10 @@ def test_inserts_batching(started_cluster):
 20000101_20000101_5_5_0	[6,9]
 '''
     assert TSV(result) == TSV(expected)
+
+
+def test_inserts_local(started_cluster):
+    instance = instance_test_inserts_local_cluster
+    instance.query("INSERT INTO distributed_on_local VALUES ('2000-01-01', 1)")
+    time.sleep(0.5)
+    assert instance.query("SELECT count(*) FROM local").strip() == '1'

From 02f0bcb17f01eb47da59d07598b36e5dc0b25ce4 Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Tue, 22 Aug 2017 22:51:03 +0300
Subject: [PATCH 263/281] Fixed incorrect assertion in INSERT queries with
 binary formats. [#CLICKHOUSE-3242]

---
 dbms/src/Parsers/ParserInsertQuery.cpp        | 23 +++++++--------
 .../00497_whitespaces_in_insert.reference     |  9 ++++++
 .../00497_whitespaces_in_insert.sh            | 28 +++++++++++++++++++
 3 files changed, 49 insertions(+), 11 deletions(-)
 create mode 100644 dbms/tests/queries/0_stateless/00497_whitespaces_in_insert.reference
 create mode 100755 dbms/tests/queries/0_stateless/00497_whitespaces_in_insert.sh

diff --git a/dbms/src/Parsers/ParserInsertQuery.cpp b/dbms/src/Parsers/ParserInsertQuery.cpp
index 1bb43d47f3d..709e9e79233 100644
--- a/dbms/src/Parsers/ParserInsertQuery.cpp
+++ b/dbms/src/Parsers/ParserInsertQuery.cpp
@@ -80,22 +80,23 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
         if (!name_p.parse(pos, format, expected))
             return false;
 
-        if (pos->type == TokenType::Semicolon)
-            throw Exception("You have excessive ';' symbol before data for INSERT.\n"
-                "Example:\n\n"
-                "INSERT INTO t (x, y) FORMAT TabSeparated\n"
-                "1\tHello\n"
-                "2\tWorld\n"
-                "\n"
-                "Note that there is no ';' in first line.", ErrorCodes::SYNTAX_ERROR);
-
-        /// Data starts after the first newline, if there is one, or after all the whitespace characters, otherwise.
-
         data = name_pos->end;
 
+        if (data < end && *data == ';')
+            throw Exception("You have excessive ';' symbol before data for INSERT.\n"
+                                    "Example:\n\n"
+                                    "INSERT INTO t (x, y) FORMAT TabSeparated\n"
+                                    ";\tHello\n"
+                                    "2\tWorld\n"
+                                    "\n"
+                                    "Note that there is no ';' just after format name, "
+                                    "you need to put at least one whitespace symbol before the data.", ErrorCodes::SYNTAX_ERROR);
+
         while (data < end && (*data == ' ' || *data == '\t' || *data == '\f'))
             ++data;
 
+        /// Data starts after the first newline, if there is one, or after all the whitespace characters, otherwise.
+
         if (data < end && *data == '\r')
             ++data;
 
diff --git a/dbms/tests/queries/0_stateless/00497_whitespaces_in_insert.reference b/dbms/tests/queries/0_stateless/00497_whitespaces_in_insert.reference
new file mode 100644
index 00000000000..9055e59470d
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00497_whitespaces_in_insert.reference
@@ -0,0 +1,9 @@
+59
+59
+32
+59
+32
+59
+
+;
+;
diff --git a/dbms/tests/queries/0_stateless/00497_whitespaces_in_insert.sh b/dbms/tests/queries/0_stateless/00497_whitespaces_in_insert.sh
new file mode 100755
index 00000000000..09d1aa3de88
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00497_whitespaces_in_insert.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+clickhouse-client -q "DROP TABLE IF EXISTS test.ws";
+clickhouse-client -q "CREATE TABLE test.ws (i UInt8) ENGINE = Memory";
+
+clickhouse-client -q "INSERT INTO test.ws FORMAT RowBinary ;";
+clickhouse-client -q "INSERT INTO test.ws FORMAT RowBinary 	; ";
+clickhouse-client -q "INSERT INTO test.ws FORMAT RowBinary
+; ";
+echo -n ";" | clickhouse-client -q "INSERT INTO test.ws FORMAT RowBinary";
+
+clickhouse-client --max_threads=1 -q "SELECT * FROM test.ws";
+clickhouse-client -q "DROP TABLE test.ws";
+
+
+clickhouse-client -q "SELECT ''";
+
+
+clickhouse-client -q "CREATE TABLE test.ws (s String) ENGINE = Memory";
+clickhouse-client -q "INSERT INTO test.ws FORMAT TSV	;
+";
+echo ";" | clickhouse-client -q "INSERT INTO test.ws FORMAT TSV"
+if clickhouse-client -q "INSERT INTO test.ws FORMAT TSV;" 1>/dev/null 2>/dev/null; then
+    echo ERROR;
+fi
+clickhouse-client --max_threads=1 -q "SELECT * FROM test.ws";
+
+clickhouse-client -q "DROP TABLE test.ws";

From 8620dd6876992ecf3e6f50911ab3f1913c02b667 Mon Sep 17 00:00:00 2001
From: Alexey Zatelepin <ztlpn@yandex-team.ru>
Date: Tue, 22 Aug 2017 23:29:15 +0300
Subject: [PATCH 264/281] runtime compilation fix: put groupArray
 implementation out of anonymous namespace [#CLICKHOUSE-3247]

Demangled symbols from anonymous namespace contain "::(anonymous namespace)::" string
and thus cannot be used with runtime compilation of aggregation functions.
---
 .../AggregateFunctionGroupArray.cpp           |  4 +--
 .../AggregateFunctionGroupArray.h             | 26 +++++++------------
 2 files changed, 11 insertions(+), 19 deletions(-)

diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
index 12151e4b771..6019cb3bf06 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
+++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
@@ -24,9 +24,9 @@ inline AggregateFunctionPtr createAggregateFunctionGroupArrayImpl(const DataType
         return AggregateFunctionPtr(res);
 
     if (typeid_cast<const DataTypeString *>(argument_type.get()))
-        return std::make_shared<GroupArrayGeneralListImpl<NodeString, has_limit::value>>(std::forward<TArgs>(args)...);
+        return std::make_shared<GroupArrayGeneralListImpl<GroupArrayListNodeString, has_limit::value>>(std::forward<TArgs>(args)...);
 
-    return std::make_shared<GroupArrayGeneralListImpl<NodeGeneral, has_limit::value>>(std::forward<TArgs>(args)...);
+    return std::make_shared<GroupArrayGeneralListImpl<GroupArrayListNodeGeneral, has_limit::value>>(std::forward<TArgs>(args)...);
 };
 
 
diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h
index b7db1f393e5..c766d8c02d6 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h
@@ -31,9 +31,6 @@ namespace ErrorCodes
 }
 
 
-namespace
-{
-
 /// A particular case is an implementation for numeric types.
 template <typename T>
 struct GroupArrayNumericData
@@ -146,12 +143,10 @@ public:
 /// General case
 
 
-/// Nodes used to implement linked list for stoarge of groupArray states
-struct NodeString;
-struct NodeGeneral;
+/// Nodes used to implement a linked list for storage of groupArray states
 
 template <typename Node>
-struct NodeBase
+struct GroupArrayListNodeBase
 {
     Node * next;
     UInt64 size; // size of payload
@@ -159,7 +154,7 @@ struct NodeBase
     /// Returns pointer to actual payload
     char * data()
     {
-        static_assert(sizeof(NodeBase) == sizeof(Node));
+        static_assert(sizeof(GroupArrayListNodeBase) == sizeof(Node));
         return reinterpret_cast<char *>(this) + sizeof(Node);
     }
 
@@ -189,9 +184,9 @@ struct NodeBase
     }
 };
 
-struct NodeString : public NodeBase<NodeString>
+struct GroupArrayListNodeString : public GroupArrayListNodeBase<GroupArrayListNodeString>
 {
-    using Node = NodeString;
+    using Node = GroupArrayListNodeString;
 
     /// Create node from string
     static Node * allocate(const IColumn & column, size_t row_num, Arena * arena)
@@ -212,9 +207,9 @@ struct NodeString : public NodeBase<NodeString>
     }
 };
 
-struct NodeGeneral : public NodeBase<NodeGeneral>
+struct GroupArrayListNodeGeneral : public GroupArrayListNodeBase<GroupArrayListNodeGeneral>
 {
-    using Node = NodeGeneral;
+    using Node = GroupArrayListNodeGeneral;
 
     static Node * allocate(const IColumn & column, size_t row_num, Arena * arena)
     {
@@ -267,7 +262,7 @@ public:
     void setParameters(const Array & params) override
     {
         if (!limit_num_elems && !params.empty())
-            throw Exception("This instatintion of " + getName() + "aggregate function doesn't accept any parameters. It is a bug.", ErrorCodes::LOGICAL_ERROR);
+            throw Exception("This instantiation of " + getName() + "aggregate function doesn't accept any parameters. It is a bug.", ErrorCodes::LOGICAL_ERROR);
     }
 
     void setArgument(const DataTypePtr & argument)
@@ -395,7 +390,7 @@ public:
 
         auto & column_data = column_array.getData();
 
-        if (std::is_same<Node, NodeString>::value)
+        if (std::is_same<Node, GroupArrayListNodeString>::value)
         {
             auto & string_offsets = static_cast<ColumnString &>(column_data).getOffsets();
             string_offsets.reserve(string_offsets.size() + data(place).elems);
@@ -415,9 +410,6 @@ public:
     }
 };
 
-}
-
-
 #undef AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ARRAY_SIZE
 
 }

From ec96183f5933a87f45fc9df9ffaa0bfb83d757c5 Mon Sep 17 00:00:00 2001
From: Alexey Zatelepin <ztlpn@yandex-team.ru>
Date: Wed, 23 Aug 2017 16:29:44 +0300
Subject: [PATCH 265/281] put AggregateFunctionStatistics implementation out of
 anonymous namespace [#CLICKHOUSE-3247]

---
 .../AggregateFunctionStatistics.h             | 44 +++++++------------
 1 file changed, 17 insertions(+), 27 deletions(-)

diff --git a/dbms/src/AggregateFunctions/AggregateFunctionStatistics.h b/dbms/src/AggregateFunctions/AggregateFunctionStatistics.h
index 96cfdcda3b8..89c0bfa2027 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionStatistics.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionStatistics.h
@@ -155,12 +155,9 @@ public:
     }
 };
 
-namespace
-{
-
 /** Implementing the varSamp function.
   */
-struct VarSampImpl
+struct AggregateFunctionVarSampImpl
 {
     static constexpr auto name = "varSamp";
 
@@ -175,19 +172,19 @@ struct VarSampImpl
 
 /** Implementing the stddevSamp function.
   */
-struct StdDevSampImpl
+struct AggregateFunctionStdDevSampImpl
 {
     static constexpr auto name = "stddevSamp";
 
     static inline Float64 apply(Float64 m2, UInt64 count)
     {
-        return sqrt(VarSampImpl::apply(m2, count));
+        return sqrt(AggregateFunctionVarSampImpl::apply(m2, count));
     }
 };
 
 /** Implementing the varPop function.
   */
-struct VarPopImpl
+struct AggregateFunctionVarPopImpl
 {
     static constexpr auto name = "varPop";
 
@@ -204,18 +201,16 @@ struct VarPopImpl
 
 /** Implementing the stddevPop function.
   */
-struct StdDevPopImpl
+struct AggregateFunctionStdDevPopImpl
 {
     static constexpr auto name = "stddevPop";
 
     static inline Float64 apply(Float64 m2, UInt64 count)
     {
-        return sqrt(VarPopImpl::apply(m2, count));
+        return sqrt(AggregateFunctionVarPopImpl::apply(m2, count));
     }
 };
 
-}
-
 /** If `compute_marginal_moments` flag is set this class provides the successor
   * CovarianceData support of marginal moments for calculating the correlation.
   */
@@ -423,12 +418,9 @@ public:
     }
 };
 
-namespace
-{
-
 /** Implementing the covarSamp function.
   */
-struct CovarSampImpl
+struct AggregateFunctionCovarSampImpl
 {
     static constexpr auto name = "covarSamp";
 
@@ -443,7 +435,7 @@ struct CovarSampImpl
 
 /** Implementing the covarPop function.
   */
-struct CovarPopImpl
+struct AggregateFunctionCovarPopImpl
 {
     static constexpr auto name = "covarPop";
 
@@ -460,7 +452,7 @@ struct CovarPopImpl
 
 /** `corr` function implementation.
   */
-struct CorrImpl
+struct AggregateFunctionCorrImpl
 {
     static constexpr auto name = "corr";
 
@@ -473,27 +465,25 @@ struct CorrImpl
     }
 };
 
-}
+template<typename T>
+using AggregateFunctionVarSamp = AggregateFunctionVariance<T, AggregateFunctionVarSampImpl>;
 
 template<typename T>
-using AggregateFunctionVarSamp = AggregateFunctionVariance<T, VarSampImpl>;
+using AggregateFunctionStdDevSamp = AggregateFunctionVariance<T, AggregateFunctionStdDevSampImpl>;
 
 template<typename T>
-using AggregateFunctionStdDevSamp = AggregateFunctionVariance<T, StdDevSampImpl>;
+using AggregateFunctionVarPop = AggregateFunctionVariance<T, AggregateFunctionVarPopImpl>;
 
 template<typename T>
-using AggregateFunctionVarPop = AggregateFunctionVariance<T, VarPopImpl>;
-
-template<typename T>
-using AggregateFunctionStdDevPop = AggregateFunctionVariance<T, StdDevPopImpl>;
+using AggregateFunctionStdDevPop = AggregateFunctionVariance<T, AggregateFunctionStdDevPopImpl>;
 
 template<typename T, typename U>
-using AggregateFunctionCovarSamp = AggregateFunctionCovariance<T, U, CovarSampImpl>;
+using AggregateFunctionCovarSamp = AggregateFunctionCovariance<T, U, AggregateFunctionCovarSampImpl>;
 
 template<typename T, typename U>
-using AggregateFunctionCovarPop = AggregateFunctionCovariance<T, U, CovarPopImpl>;
+using AggregateFunctionCovarPop = AggregateFunctionCovariance<T, U, AggregateFunctionCovarPopImpl>;
 
 template<typename T, typename U>
-using AggregateFunctionCorr = AggregateFunctionCovariance<T, U, CorrImpl, true>;
+using AggregateFunctionCorr = AggregateFunctionCovariance<T, U, AggregateFunctionCorrImpl, true>;
 
 }

From eec6a3ecb994fc2f54794a95e88f6e987cbedbe0 Mon Sep 17 00:00:00 2001
From: Alexey Zatelepin <ztlpn@yandex-team.ru>
Date: Wed, 23 Aug 2017 16:32:30 +0300
Subject: [PATCH 266/281] add forgotten aggregate functions to
 SpecializedAggregator.h [#CLICKHOUSE-3247]

---
 .../AggregateFunctions/IAggregateFunction.h   |  3 ++
 dbms/src/Interpreters/Aggregator.cpp          |  3 +-
 dbms/src/Interpreters/SpecializedAggregator.h | 35 ++++++++++++-------
 3 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/dbms/src/AggregateFunctions/IAggregateFunction.h b/dbms/src/AggregateFunctions/IAggregateFunction.h
index 895844c1b7f..98ad8fe396e 100644
--- a/dbms/src/AggregateFunctions/IAggregateFunction.h
+++ b/dbms/src/AggregateFunctions/IAggregateFunction.h
@@ -41,6 +41,9 @@ namespace ErrorCodes
   * The data resulting from the aggregation (intermediate computing states) is stored in other objects
   *  (which can be created in some pool),
   *  and IAggregateFunction is the external interface for manipulating them.
+  *
+  * NOTE: If you add a new aggregate function, don't forget to add it to Interpreters/SpecializedAggregator.h
+  *  so that the new function works with runtime compilation.
   */
 class IAggregateFunction
 {
diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp
index 4f0772a4980..9b7fd40e619 100644
--- a/dbms/src/Interpreters/Aggregator.cpp
+++ b/dbms/src/Interpreters/Aggregator.cpp
@@ -356,7 +356,8 @@ void Aggregator::compileIfPossible(AggregatedDataVariants::Type type)
       *  at the end of which `on_ready` callback is called.
       */
     SharedLibraryPtr lib = params.compiler->getOrCount(key, params.min_count_to_compile,
-        "-include " INTERNAL_COMPILER_HEADERS "/dbms/src/Interpreters/SpecializedAggregator.h",
+        "-include " INTERNAL_COMPILER_HEADERS "/dbms/src/Interpreters/SpecializedAggregator.h "
+        "-Wno-unused-function",
         get_code, on_ready);
 
     /// If the result is already ready.
diff --git a/dbms/src/Interpreters/SpecializedAggregator.h b/dbms/src/Interpreters/SpecializedAggregator.h
index acfefe0f44d..c582e72b4fe 100644
--- a/dbms/src/Interpreters/SpecializedAggregator.h
+++ b/dbms/src/Interpreters/SpecializedAggregator.h
@@ -1,21 +1,30 @@
 #include <Interpreters/Aggregator.h>
 
-#include <AggregateFunctions/AggregateFunctionCount.h>
-#include <AggregateFunctions/AggregateFunctionSum.h>
-#include <AggregateFunctions/AggregateFunctionAvg.h>
-#include <AggregateFunctions/AggregateFunctionMinMaxAny.h>
 #include <AggregateFunctions/AggregateFunctionArgMinMax.h>
+#include <AggregateFunctions/AggregateFunctionArray.h>
+#include <AggregateFunctions/AggregateFunctionAvg.h>
+#include <AggregateFunctions/AggregateFunctionCount.h>
+#include <AggregateFunctions/AggregateFunctionForEach.h>
+#include <AggregateFunctions/AggregateFunctionGroupArray.h>
+#include <AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h>
+#include <AggregateFunctions/AggregateFunctionGroupUniqArray.h>
+#include <AggregateFunctions/AggregateFunctionIf.h>
+#include <AggregateFunctions/AggregateFunctionMerge.h>
+#include <AggregateFunctions/AggregateFunctionMinMaxAny.h>
+#include <AggregateFunctions/AggregateFunctionNull.h>
+#include <AggregateFunctions/AggregateFunctionQuantileDeterministic.h>
+#include <AggregateFunctions/AggregateFunctionQuantileExact.h>
+#include <AggregateFunctions/AggregateFunctionQuantileExactWeighted.h>
+#include <AggregateFunctions/AggregateFunctionQuantile.h>
+#include <AggregateFunctions/AggregateFunctionQuantileTDigest.h>
+#include <AggregateFunctions/AggregateFunctionQuantileTiming.h>
+#include <AggregateFunctions/AggregateFunctionSequenceMatch.h>
+#include <AggregateFunctions/AggregateFunctionState.h>
+#include <AggregateFunctions/AggregateFunctionStatistics.h>
+#include <AggregateFunctions/AggregateFunctionSum.h>
+#include <AggregateFunctions/AggregateFunctionTopK.h>
 #include <AggregateFunctions/AggregateFunctionUniq.h>
 #include <AggregateFunctions/AggregateFunctionUniqUpTo.h>
-#include <AggregateFunctions/AggregateFunctionGroupArray.h>
-#include <AggregateFunctions/AggregateFunctionGroupUniqArray.h>
-#include <AggregateFunctions/AggregateFunctionQuantile.h>
-#include <AggregateFunctions/AggregateFunctionQuantileTiming.h>
-#include <AggregateFunctions/AggregateFunctionIf.h>
-#include <AggregateFunctions/AggregateFunctionArray.h>
-#include <AggregateFunctions/AggregateFunctionState.h>
-#include <AggregateFunctions/AggregateFunctionMerge.h>
-#include <AggregateFunctions/AggregateFunctionNull.h>
 
 
 namespace DB

From 47705e21a542c9ad36bb3f124ff03c5c38fc492c Mon Sep 17 00:00:00 2001
From: proller <proller@github.com>
Date: Wed, 23 Aug 2017 18:42:15 +0300
Subject: [PATCH 267/281] Cmake: ccache detect fix

---
 CMakeLists.txt | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 11eb0790aed..d98476dca39 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -27,11 +27,11 @@ endif ()
 cmake_policy (SET CMP0014 OLD) # Ignore warning about CMakeLists.txt in each directory
 cmake_policy (SET CMP0012 NEW) # Don't dereference TRUE and FALSE
 
-find_program(CCACHE_FOUND ccache)
-if(CCACHE_FOUND AND NOT CMAKE_CXX_COMPILER_LAUNCHER MATCHES "ccache")
-  set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "ccache")
-  set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK "ccache")
-endif()
+find_program (CCACHE_FOUND ccache)
+if (CCACHE_FOUND AND NOT CMAKE_CXX_COMPILER_LAUNCHER MATCHES "ccache" AND NOT CMAKE_CXX_COMPILER MATCHES "ccache")
+  set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "ccache")
+  set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "ccache")
+endif ()
 
 if (NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "None")
     message (STATUS "CMAKE_BUILD_TYPE is not set, set to default = RELWITHDEBINFO")

From 49ae03a4a70d2df056e7f8160cf84448f99c4d52 Mon Sep 17 00:00:00 2001
From: proller <proller@users.noreply.github.com>
Date: Wed, 23 Aug 2017 18:58:42 +0300
Subject: [PATCH 268/281] Fix build in freebsd (#1129)

---
 dbms/src/Core/tests/string_pool.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/src/Core/tests/string_pool.cpp b/dbms/src/Core/tests/string_pool.cpp
index 79b0a9c824b..83aae929f32 100644
--- a/dbms/src/Core/tests/string_pool.cpp
+++ b/dbms/src/Core/tests/string_pool.cpp
@@ -214,7 +214,7 @@ int main(int argc, char ** argv)
             << std::endl;
 
         size_t i = 0;
-        for (RefsHashMap::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
+        for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
         {
             devnull.write(it->first.data, it->first.size);
             devnull << std::endl;
@@ -241,7 +241,7 @@ int main(int argc, char ** argv)
             << std::endl;
 
         size_t i = 0;
-        for (RefsHashMap::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
+        for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
         {
             devnull.write(it->first.data, it->first.size);
             devnull << std::endl;

From c6ccdeeebb6c482a2b6e399fb1f3db0efe19bc95 Mon Sep 17 00:00:00 2001
From: Alex Zatelepin <ztlpn@users.noreply.github.com>
Date: Wed, 23 Aug 2017 21:39:28 +0300
Subject: [PATCH 269/281] Update CHANGELOG_RU.md

---
 CHANGELOG_RU.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md
index 5bdd899f8db..c6f6034c95c 100644
--- a/CHANGELOG_RU.md
+++ b/CHANGELOG_RU.md
@@ -1,3 +1,10 @@
+# Релиз ClickHouse 1.1.54282
+
+Релиз содержит исправления к предыдущему релизу 1.1.54276:
+* Исправлена ошибка `DB::Exception: Assertion violation: !_path.empty()` при вставке в Distributed таблицу.
+* Исправлен парсинг при вставке в формате RowBinary, если входные данные начинаются с ';'.
+* Исправлена ошибка при рантайм-компиляции некоторых агрегатных функций (например, `groupArray()`).
+
 # Релиз ClickHouse 1.1.54276
 
 ## Новые возможности:

From df42f234f19ad21c0507d29185cdc1dc5ca4ce8f Mon Sep 17 00:00:00 2001
From: Alex Zatelepin <ztlpn@users.noreply.github.com>
Date: Wed, 23 Aug 2017 21:46:17 +0300
Subject: [PATCH 270/281] Update CHANGELOG.md

---
 CHANGELOG.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 912718808a6..a7eb95a73b0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+# ClickHouse release 1.1.54282
+
+This is a bugfix release. The following bugs were fixed:
+* `DB::Exception: Assertion violation: !_path.empty()` error when inserting into a Distributed table.
+* Error when parsing inserted data in RowBinary format if the data begins with ';' character.
+* Errors during runtime compilation of certain aggregate functions (e.g. `groupArray()`).
+
 # ClickHouse release 1.1.54276
 
 ## New features:

From 8adbc812bce1d75a52c736bc2bde0eb47bf1f0ee Mon Sep 17 00:00:00 2001
From: proller <proller@github.com>
Date: Wed, 23 Aug 2017 19:03:26 +0300
Subject: [PATCH 271/281] Isolated build system: add initial config for
 pbuilder

---
 debian/.pbuilderrc | 111 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 111 insertions(+)
 create mode 100644 debian/.pbuilderrc

diff --git a/debian/.pbuilderrc b/debian/.pbuilderrc
new file mode 100644
index 00000000000..70f3c33a1cb
--- /dev/null
+++ b/debian/.pbuilderrc
@@ -0,0 +1,111 @@
+# ubuntu:
+# sudo DIST=trusty pbuilder create --configfile debian/.pbuilderrc && DIST=trusty pdebuild --configfile debian/.pbuilderrc
+# sudo DIST=xenial pbuilder create --configfile debian/.pbuilderrc && DIST=xenial pdebuild --configfile debian/.pbuilderrc
+# sudo DIST=zesty  pbuilder create --configfile debian/.pbuilderrc && DIST=zesty  pdebuild --configfile debian/.pbuilderrc
+# debian:
+# sudo DIST=experimental pbuilder create --configfile debian/.pbuilderrc && DIST=experimental pdebuild --configfile debian/.pbuilderrc
+# sudo DIST=testing      pbuilder create --configfile debian/.pbuilderrc && DIST=testing      pdebuild --configfile debian/.pbuilderrc
+# sudo DIST=unstable     pbuilder create --configfile debian/.pbuilderrc && DIST=unstable     pdebuild --configfile debian/.pbuilderrc
+# sudo DIST=stable       pbuilder create --configfile debian/.pbuilderrc && DIST=stable       pdebuild --configfile debian/.pbuilderrc
+# TODO:
+# sudo DIST=zesty        ARCH=i386  pbuilder create --configfile debian/.pbuilderrc && DIST=zesty        ARCH=i386  pdebuild --configfile debian/.pbuilderrc
+# sudo DIST=experimental ARCH=arm64 pbuilder create --configfile debian/.pbuilderrc && DIST=experimental ARCH=arm64 pdebuild --configfile debian/.pbuilderrc
+
+# from https://wiki.debian.org/PbuilderTricks :
+
+# Codenames for Debian suites according to their alias. Update these when
+# needed.
+UNSTABLE_CODENAME="sid"
+TESTING_CODENAME="buster"
+STABLE_CODENAME="stretch"
+STABLE_BACKPORTS_SUITE="$STABLE_CODENAME-backports"
+
+# List of Debian suites.
+DEBIAN_SUITES=($UNSTABLE_CODENAME $TESTING_CODENAME $STABLE_CODENAME $STABLE_BACKPORTS_SUITE
+    "experimental" "unstable" "testing" "stable")
+
+# List of Ubuntu suites. Update these when needed.
+UBUNTU_SUITES=("zesty" "xenial" "trusty")
+
+# Mirrors to use. Update these to your preferred mirror.
+#DEBIAN_MIRROR="deb.debian.org"
+#UBUNTU_MIRROR="mirrors.kernel.org"
+
+UBUNTU_MIRROR="mirror.yandex.ru"
+DEBIAN_MIRROR="mirror.yandex.ru"
+
+# Optionally use the changelog of a package to determine the suite to use if
+# none set.
+if [ -z "${DIST}" ] && [ -r "debian/changelog" ]; then
+    DIST=$(dpkg-parsechangelog --show-field=Distribution)
+fi
+
+# Optionally set a default distribution if none is used. Note that you can set
+# your own default (i.e. ${DIST:="unstable"}).
+: ${DIST:="$(lsb_release --short --codename)"}
+
+# Optionally change Debian codenames in $DIST to their aliases.
+case "$DIST" in
+    $UNSTABLE_CODENAME)
+        DIST="unstable"
+        ;;
+    $TESTING_CODENAME)
+        DIST="testing"
+        ;;
+    $STABLE_CODENAME)
+        DIST="stable"
+        ;;
+esac
+
+# Optionally set the architecture to the host architecture if none set. Note
+# that you can set your own default (i.e. ${ARCH:="i386"}).
+: ${ARCH:="$(dpkg --print-architecture)"}
+
+NAME="$DIST"
+if [ -n "${ARCH}" ]; then
+    NAME="$NAME-$ARCH"
+    DEBOOTSTRAPOPTS=("--arch" "$ARCH" "${DEBOOTSTRAPOPTS[@]}")
+fi
+
+BASETGZ="/var/cache/pbuilder/$NAME-base.tgz"
+DISTRIBUTION="$DIST"
+BUILDRESULT="/var/cache/pbuilder/$NAME/result/"
+APTCACHE="/var/cache/pbuilder/$NAME/aptcache/"
+BUILDPLACE="/var/cache/pbuilder/build/"
+
+if $(echo ${DEBIAN_SUITES[@]} | grep -q $DIST); then
+    # Debian configuration
+    OSNAME=debian
+    MIRRORSITE="http://$DEBIAN_MIRROR/$OSNAME/"
+    COMPONENTS="main contrib non-free"
+    if $(echo "$STABLE_CODENAME stable" | grep -q $DIST); then
+        OTHERMIRROR="$OTHERMIRROR | deb $MIRRORSITE $STABLE_BACKPORTS_SUITE $COMPONENTS"
+    fi
+    # APTKEYRINGS=/usr/share/keyrings/debian-archive-keyring.gpg
+    # sudo apt install debian-archive-keyring
+    DEBOOTSTRAPOPTS+=( '--keyring' '/usr/share/keyrings/debian-archive-keyring.gpg' )
+elif $(echo ${UBUNTU_SUITES[@]} | grep -q $DIST); then
+    # Ubuntu configuration
+    OSNAME=ubuntu
+    MIRRORSITE="http://$UBUNTU_MIRROR/$OSNAME/"
+    COMPONENTS="main restricted universe multiverse"
+    OTHERMIRROR+="deb http://ppa.launchpad.net/ubuntu-toolchain-r/test/$OSNAME $DIST main"
+    # deb http://apt.llvm.org/zesty/ llvm-toolchain-zesty-5.0 main
+    ALLOWUNTRUSTED=yes
+else
+    echo "Unknown distribution: $DIST"
+    exit 1
+fi
+
+echo "using $NAME $OSNAME $DIST $ARCH $LOGNAME"
+
+CCACHEDIR=/var/cache/pbuilder/ccache
+export CCACHE_PREFIX=
+
+export DEB_BUILD_OPTIONS=parallel=`nproc`
+
+# Floating bug with permissions:
+sudo mkdir -p /var/cache/pbuilder/ccache
+sudo chmod -R a+rwx /var/cache/pbuilder/ccache
+
+# echo "DEBOOTSTRAPOPTS = ${DEBOOTSTRAPOPTS[@]}"

From 2d12fea90ff60b7394b3110922532cddb5b16e94 Mon Sep 17 00:00:00 2001
From: Hiroaki Nakamura <hnakamur@gmail.com>
Date: Thu, 24 Aug 2017 16:23:09 +0900
Subject: [PATCH 272/281] Update zstd to 1.3.1

---
 contrib/libzstd/CMakeLists.txt                |   44 +-
 contrib/libzstd/README                        |    2 +-
 .../libzstd/include/zstd/common/bitstream.h   |  124 +-
 .../include/zstd/common/entropy_common.c      |   47 +-
 .../include/zstd/common/error_private.c       |   47 +
 .../include/zstd/common/error_private.h       |   32 +-
 .../include/zstd/common/error_public.h        |   59 -
 contrib/libzstd/include/zstd/common/fse.h     |  150 +-
 .../include/zstd/common/fse_decompress.c      |   37 +-
 contrib/libzstd/include/zstd/common/huf.h     |  133 +-
 contrib/libzstd/include/zstd/common/mem.h     |   49 +-
 contrib/libzstd/include/zstd/common/pool.c    |  206 +
 contrib/libzstd/include/zstd/common/pool.h    |   61 +
 .../libzstd/include/zstd/common/threading.c   |   79 +
 .../libzstd/include/zstd/common/threading.h   |  104 +
 contrib/libzstd/include/zstd/common/xxhash.c  |    4 +-
 contrib/libzstd/include/zstd/common/xxhash.h  |   26 +-
 .../libzstd/include/zstd/common/zstd_common.c |   59 +-
 .../libzstd/include/zstd/common/zstd_errors.h |   83 +
 .../include/zstd/common/zstd_internal.h       |   95 +-
 .../include/zstd/compress/fse_compress.c      |  247 +-
 .../include/zstd/compress/huf_compress.c      |  308 +-
 .../include/zstd/compress/zbuff_compress.c    |  319 --
 .../include/zstd/compress/zstd_compress.c     | 2194 +++++---
 .../libzstd/include/zstd/compress/zstd_opt.h  |  214 +-
 .../include/zstd/compress/zstdmt_compress.c   |  955 ++++
 .../include/zstd/compress/zstdmt_compress.h   |  114 +
 .../include/zstd/decompress/huf_decompress.c  |  221 +-
 .../zstd/decompress/zbuff_decompress.c        |  252 -
 .../include/zstd/decompress/zstd_decompress.c | 1885 ++++---
 .../zstd/{common => deprecated}/zbuff.h       |   99 +-
 .../include/zstd/deprecated/zbuff_common.c    |   26 +
 .../include/zstd/deprecated/zbuff_compress.c  |  145 +
 .../zstd/deprecated/zbuff_decompress.c        |   74 +
 .../libzstd/include/zstd/dictBuilder/cover.c  | 1036 ++++
 .../libzstd/include/zstd/dictBuilder/zdict.c  |  256 +-
 .../libzstd/include/zstd/dictBuilder/zdict.h  |  185 +-
 .../libzstd/include/zstd/legacy/zstd_legacy.h |  241 +-
 .../libzstd/include/zstd/legacy/zstd_v01.c    | 2126 ++++++++
 .../libzstd/include/zstd/legacy/zstd_v01.h    |   88 +
 .../libzstd/include/zstd/legacy/zstd_v02.c    | 3555 +++++++++++++
 .../libzstd/include/zstd/legacy/zstd_v02.h    |   87 +
 .../libzstd/include/zstd/legacy/zstd_v03.c    | 3196 ++++++++++++
 .../libzstd/include/zstd/legacy/zstd_v03.h    |   87 +
 .../libzstd/include/zstd/legacy/zstd_v04.c    | 3823 ++++++++++++++
 .../libzstd/include/zstd/legacy/zstd_v04.h    |  136 +
 .../libzstd/include/zstd/legacy/zstd_v05.c    | 4082 +++++++++++++++
 .../libzstd/include/zstd/legacy/zstd_v05.h    |  156 +
 .../libzstd/include/zstd/legacy/zstd_v06.c    |  111 +-
 .../libzstd/include/zstd/legacy/zstd_v06.h    |    7 +
 .../libzstd/include/zstd/legacy/zstd_v07.c    | 4577 +++++++++++++++++
 .../libzstd/include/zstd/legacy/zstd_v07.h    |  181 +
 contrib/libzstd/include/zstd/zstd.h           |  988 +++-
 53 files changed, 30585 insertions(+), 2827 deletions(-)
 create mode 100644 contrib/libzstd/include/zstd/common/error_private.c
 delete mode 100644 contrib/libzstd/include/zstd/common/error_public.h
 create mode 100644 contrib/libzstd/include/zstd/common/pool.c
 create mode 100644 contrib/libzstd/include/zstd/common/pool.h
 create mode 100644 contrib/libzstd/include/zstd/common/threading.c
 create mode 100644 contrib/libzstd/include/zstd/common/threading.h
 create mode 100644 contrib/libzstd/include/zstd/common/zstd_errors.h
 delete mode 100644 contrib/libzstd/include/zstd/compress/zbuff_compress.c
 create mode 100644 contrib/libzstd/include/zstd/compress/zstdmt_compress.c
 create mode 100644 contrib/libzstd/include/zstd/compress/zstdmt_compress.h
 delete mode 100644 contrib/libzstd/include/zstd/decompress/zbuff_decompress.c
 rename contrib/libzstd/include/zstd/{common => deprecated}/zbuff.h (66%)
 create mode 100644 contrib/libzstd/include/zstd/deprecated/zbuff_common.c
 create mode 100644 contrib/libzstd/include/zstd/deprecated/zbuff_compress.c
 create mode 100644 contrib/libzstd/include/zstd/deprecated/zbuff_decompress.c
 create mode 100644 contrib/libzstd/include/zstd/dictBuilder/cover.c
 create mode 100644 contrib/libzstd/include/zstd/legacy/zstd_v01.c
 create mode 100644 contrib/libzstd/include/zstd/legacy/zstd_v01.h
 create mode 100644 contrib/libzstd/include/zstd/legacy/zstd_v02.c
 create mode 100644 contrib/libzstd/include/zstd/legacy/zstd_v02.h
 create mode 100644 contrib/libzstd/include/zstd/legacy/zstd_v03.c
 create mode 100644 contrib/libzstd/include/zstd/legacy/zstd_v03.h
 create mode 100644 contrib/libzstd/include/zstd/legacy/zstd_v04.c
 create mode 100644 contrib/libzstd/include/zstd/legacy/zstd_v04.h
 create mode 100644 contrib/libzstd/include/zstd/legacy/zstd_v05.c
 create mode 100644 contrib/libzstd/include/zstd/legacy/zstd_v05.h
 create mode 100644 contrib/libzstd/include/zstd/legacy/zstd_v07.c
 create mode 100644 contrib/libzstd/include/zstd/legacy/zstd_v07.h

diff --git a/contrib/libzstd/CMakeLists.txt b/contrib/libzstd/CMakeLists.txt
index 00d20cf2146..1900017ebe3 100644
--- a/contrib/libzstd/CMakeLists.txt
+++ b/contrib/libzstd/CMakeLists.txt
@@ -51,30 +51,42 @@ MESSAGE(STATUS "ZSTD VERSION ${LIBVER_MAJOR}.${LIBVER_MINOR}.${LIBVER_RELEASE}")
 
 SET(Sources
         ${LIBRARY_DIR}/common/entropy_common.c
-        ${LIBRARY_DIR}/common/zstd_common.c
-        ${LIBRARY_DIR}/common/xxhash.c
+        ${LIBRARY_DIR}/common/error_private.c
         ${LIBRARY_DIR}/common/fse_decompress.c
+        ${LIBRARY_DIR}/common/pool.c
+        ${LIBRARY_DIR}/common/threading.c
+        ${LIBRARY_DIR}/common/xxhash.c
+        ${LIBRARY_DIR}/common/zstd_common.c
         ${LIBRARY_DIR}/compress/fse_compress.c
         ${LIBRARY_DIR}/compress/huf_compress.c
-        ${LIBRARY_DIR}/compress/zbuff_compress.c
         ${LIBRARY_DIR}/compress/zstd_compress.c
+        ${LIBRARY_DIR}/compress/zstdmt_compress.c
         ${LIBRARY_DIR}/decompress/huf_decompress.c
-        ${LIBRARY_DIR}/decompress/zbuff_decompress.c
         ${LIBRARY_DIR}/decompress/zstd_decompress.c
+        ${LIBRARY_DIR}/deprecated/zbuff_common.c
+        ${LIBRARY_DIR}/deprecated/zbuff_compress.c
+        ${LIBRARY_DIR}/deprecated/zbuff_decompress.c
+        ${LIBRARY_DIR}/dictBuilder/cover.c
         ${LIBRARY_DIR}/dictBuilder/divsufsort.c
         ${LIBRARY_DIR}/dictBuilder/zdict.c)
 
 SET(Headers
         ${LIBRARY_DIR}/common/bitstream.h
         ${LIBRARY_DIR}/common/error_private.h
-        ${LIBRARY_DIR}/common/error_public.h
         ${LIBRARY_DIR}/common/fse.h
         ${LIBRARY_DIR}/common/huf.h
         ${LIBRARY_DIR}/common/mem.h
-        ${LIBRARY_DIR}/common/zbuff.h
+        ${LIBRARY_DIR}/common/pool.h
+        ${LIBRARY_DIR}/common/threading.h
+        ${LIBRARY_DIR}/common/xxhash.h
+        ${LIBRARY_DIR}/common/zstd_errors.h
         ${LIBRARY_DIR}/common/zstd_internal.h
-        ${LIBRARY_DIR}/zstd.h
-        ${LIBRARY_DIR}/dictBuilder/zdict.h)
+        ${LIBRARY_DIR}/compress/zstdmt_compress.h
+        ${LIBRARY_DIR}/compress/zstd_opt.h
+        ${LIBRARY_DIR}/deprecated/zbuff.h
+        ${LIBRARY_DIR}/dictBuilder/divsufsort.h
+        ${LIBRARY_DIR}/dictBuilder/zdict.h
+        ${LIBRARY_DIR}/zstd.h)
 
 SET(ZSTD_LEGACY_SUPPORT true)
 
@@ -84,11 +96,23 @@ IF (ZSTD_LEGACY_SUPPORT)
     ADD_DEFINITIONS(-D ZSTD_LEGACY_SUPPORT=1)
 
     SET(Sources ${Sources}
-            ${LIBRARY_LEGACY_DIR}/zstd_v06.c)
+            ${LIBRARY_LEGACY_DIR}/zstd_v01.c
+            ${LIBRARY_LEGACY_DIR}/zstd_v02.c
+            ${LIBRARY_LEGACY_DIR}/zstd_v03.c
+            ${LIBRARY_LEGACY_DIR}/zstd_v04.c
+            ${LIBRARY_LEGACY_DIR}/zstd_v05.c
+            ${LIBRARY_LEGACY_DIR}/zstd_v06.c
+            ${LIBRARY_LEGACY_DIR}/zstd_v07.c)
 
     SET(Headers ${Headers}
             ${LIBRARY_LEGACY_DIR}/zstd_legacy.h
-            ${LIBRARY_LEGACY_DIR}/zstd_v06.h)
+            ${LIBRARY_LEGACY_DIR}/zstd_v01.h
+            ${LIBRARY_LEGACY_DIR}/zstd_v02.h
+            ${LIBRARY_LEGACY_DIR}/zstd_v03.h
+            ${LIBRARY_LEGACY_DIR}/zstd_v04.h
+            ${LIBRARY_LEGACY_DIR}/zstd_v05.h
+            ${LIBRARY_LEGACY_DIR}/zstd_v06.h
+            ${LIBRARY_LEGACY_DIR}/zstd_v07.h)
 ENDIF (ZSTD_LEGACY_SUPPORT)
 
 ADD_LIBRARY(zstd ${Sources} ${Headers})
diff --git a/contrib/libzstd/README b/contrib/libzstd/README
index 5662f703b55..595103c2c34 100644
--- a/contrib/libzstd/README
+++ b/contrib/libzstd/README
@@ -1 +1 @@
-https://github.com/facebook/zstd/tree/v1.1.0
+https://github.com/facebook/zstd/tree/v1.3.1
diff --git a/contrib/libzstd/include/zstd/common/bitstream.h b/contrib/libzstd/include/zstd/common/bitstream.h
index e96798fe47b..07b85026c95 100644
--- a/contrib/libzstd/include/zstd/common/bitstream.h
+++ b/contrib/libzstd/include/zstd/common/bitstream.h
@@ -2,7 +2,7 @@
    bitstream
    Part of FSE library
    header file (to include)
-   Copyright (C) 2013-2016, Yann Collet.
+   Copyright (C) 2013-2017, Yann Collet.
 
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
@@ -39,7 +39,6 @@
 extern "C" {
 #endif
 
-
 /*
 *  This API consists of small unitary functions, which must be inlined for best performance.
 *  Since link-time-optimization is not available for all compilers,
@@ -53,6 +52,18 @@ extern "C" {
 #include "error_private.h"  /* error codes and messages */
 
 
+/*-*************************************
+*  Debug
+***************************************/
+#if defined(BIT_DEBUG) && (BIT_DEBUG>=1)
+#  include <assert.h>
+#else
+#  ifndef assert
+#    define assert(condition) ((void)0)
+#  endif
+#endif
+
+
 /*=========================================
 *  Target specific
 =========================================*/
@@ -60,6 +71,10 @@ extern "C" {
 #  include <immintrin.h>   /* support for bextr (experimental) */
 #endif
 
+#define STREAM_ACCUMULATOR_MIN_32  25
+#define STREAM_ACCUMULATOR_MIN_64  57
+#define STREAM_ACCUMULATOR_MIN    ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
+
 
 /*-******************************************
 *  bitStream encoding API (write forward)
@@ -71,7 +86,7 @@ extern "C" {
 typedef struct
 {
     size_t bitContainer;
-    int    bitPos;
+    unsigned bitPos;
     char*  startPtr;
     char*  ptr;
     char*  endPtr;
@@ -109,6 +124,7 @@ typedef struct
     unsigned bitsConsumed;
     const char* ptr;
     const char* start;
+    const char* limitPtr;
 } BIT_DStream_t;
 
 typedef enum { BIT_DStream_unfinished = 0,
@@ -160,7 +176,10 @@ MEM_STATIC unsigned BIT_highbit32 (register U32 val)
 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
     return 31 - __builtin_clz (val);
 #   else   /* Software version */
-    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    static const unsigned DeBruijnClz[32] = { 0,  9,  1, 10, 13, 21,  2, 29,
+                                             11, 14, 16, 18, 22, 25,  3, 30,
+                                              8, 12, 20, 28, 15, 17, 24,  7,
+                                             19, 27, 23,  6, 26,  5,  4, 31 };
     U32 v = val;
     v |= v >> 1;
     v |= v >> 2;
@@ -172,31 +191,36 @@ MEM_STATIC unsigned BIT_highbit32 (register U32 val)
 }
 
 /*=====    Local Constants   =====*/
-static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,  0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF };   /* up to 26 bits */
+static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F,
+                                    0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF,
+                                    0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,
+                                    0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF };   /* up to 26 bits */
 
 
 /*-**************************************************************
 *  bitStream encoding
 ****************************************************************/
 /*! BIT_initCStream() :
- *  `dstCapacity` must be > sizeof(void*)
+ *  `dstCapacity` must be > sizeof(size_t)
  *  @return : 0 if success,
               otherwise an error code (can be tested using ERR_isError() ) */
-MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* startPtr, size_t dstCapacity)
+MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
+                                  void* startPtr, size_t dstCapacity)
 {
     bitC->bitContainer = 0;
     bitC->bitPos = 0;
     bitC->startPtr = (char*)startPtr;
     bitC->ptr = bitC->startPtr;
-    bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->ptr);
-    if (dstCapacity <= sizeof(bitC->ptr)) return ERROR(dstSize_tooSmall);
+    bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
+    if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
     return 0;
 }
 
 /*! BIT_addBits() :
     can add up to 26 bits into `bitC`.
     Does not check for register overflow ! */
-MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits)
+MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
+                            size_t value, unsigned nbBits)
 {
     bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
     bitC->bitPos += nbBits;
@@ -204,34 +228,42 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits)
 
 /*! BIT_addBitsFast() :
  *  works only if `value` is _clean_, meaning all high bits above nbBits are 0 */
-MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits)
+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
+                                size_t value, unsigned nbBits)
 {
+    assert((value>>nbBits) == 0);
     bitC->bitContainer |= value << bitC->bitPos;
     bitC->bitPos += nbBits;
 }
 
 /*! BIT_flushBitsFast() :
+ *  assumption : bitContainer has not overflowed
  *  unsafe version; does not check buffer overflow */
 MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
 {
     size_t const nbBytes = bitC->bitPos >> 3;
+    assert( bitC->bitPos <= (sizeof(bitC->bitContainer)*8) );
     MEM_writeLEST(bitC->ptr, bitC->bitContainer);
     bitC->ptr += nbBytes;
+    assert(bitC->ptr <= bitC->endPtr);
     bitC->bitPos &= 7;
-    bitC->bitContainer >>= nbBytes*8;   /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
+    bitC->bitContainer >>= nbBytes*8;
 }
 
 /*! BIT_flushBits() :
+ *  assumption : bitContainer has not overflowed
  *  safe version; check for buffer overflow, and prevents it.
- *  note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */
+ *  note : does not signal buffer overflow.
+ *  overflow will be revealed later on using BIT_closeCStream() */
 MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
 {
     size_t const nbBytes = bitC->bitPos >> 3;
+    assert( bitC->bitPos <= (sizeof(bitC->bitContainer)*8) );
     MEM_writeLEST(bitC->ptr, bitC->bitContainer);
     bitC->ptr += nbBytes;
     if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
     bitC->bitPos &= 7;
-    bitC->bitContainer >>= nbBytes*8;   /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
+    bitC->bitContainer >>= nbBytes*8;
 }
 
 /*! BIT_closeCStream() :
@@ -241,9 +273,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
 {
     BIT_addBitsFast(bitC, 1, 1);   /* endMark */
     BIT_flushBits(bitC);
-
-    if (bitC->ptr >= bitC->endPtr) return 0; /* doesn't fit within authorized budget : cancel */
-
+    if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
     return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
 }
 
@@ -261,26 +291,39 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
 {
     if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
 
+    bitD->start = (const char*)srcBuffer;
+    bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
+
     if (srcSize >=  sizeof(bitD->bitContainer)) {  /* normal case */
-        bitD->start = (const char*)srcBuffer;
         bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
         bitD->bitContainer = MEM_readLEST(bitD->ptr);
         { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
-          bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
+          bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;  /* ensures bitsConsumed is always set */
           if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
     } else {
-        bitD->start = (const char*)srcBuffer;
         bitD->ptr   = bitD->start;
         bitD->bitContainer = *(const BYTE*)(bitD->start);
         switch(srcSize)
         {
-            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
-            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
-            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
-            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
-            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
-            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
-            default:;
+	    case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
+	            /* fall-through */
+
+	    case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
+	            /* fall-through */
+
+	    case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
+	            /* fall-through */
+
+	    case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
+	            /* fall-through */
+
+	    case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
+	            /* fall-through */
+
+	    case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
+	            /* fall-through */
+
+            default: break;
         }
         { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
           bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
@@ -298,7 +341,7 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
 
 MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
 {
-#if defined(__BMI__) && defined(__GNUC__)   /* experimental */
+#if defined(__BMI__) && defined(__GNUC__) && __GNUC__*1000+__GNUC_MINOR__ >= 4008  /* experimental */
 #  if defined(__x86_64__)
     if (sizeof(bitContainer)==8)
         return _bextr_u64(bitContainer, start, nbBits);
@@ -327,17 +370,18 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
 #if defined(__BMI__) && defined(__GNUC__)   /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */
     return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
 #else
-    U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
-    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+    U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
 #endif
 }
 
 /*! BIT_lookBitsFast() :
-*   unsafe version; only works only if nbBits >= 1 */
+ *  unsafe version; only works if nbBits >= 1 */
 MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
 {
-    U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
-    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+    U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
+    assert(nbBits >= 1);
+    return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
 }
 
 MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
@@ -362,21 +406,22 @@ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
 MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
 {
     size_t const value = BIT_lookBitsFast(bitD, nbBits);
+    assert(nbBits >= 1);
     BIT_skipBits(bitD, nbBits);
     return value;
 }
 
 /*! BIT_reloadDStream() :
-*   Refill `BIT_DStream_t` from src buffer previously defined (see BIT_initDStream() ).
+*   Refill `bitD` from buffer previously set in BIT_initDStream() .
 *   This function is safe, it guarantees it will not read beyond src buffer.
 *   @return : status of `BIT_DStream_t` internal register.
-              if status == unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */
+              if status == BIT_DStream_unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */
 MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
 {
-	if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should not happen => corruption detected */
-		return BIT_DStream_overflow;
+    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* overflow detected, like end of stream */
+        return BIT_DStream_overflow;
 
-    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) {
+    if (bitD->ptr >= bitD->limitPtr) {
         bitD->ptr -= bitD->bitsConsumed >> 3;
         bitD->bitsConsumed &= 7;
         bitD->bitContainer = MEM_readLEST(bitD->ptr);
@@ -386,6 +431,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
         if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
         return BIT_DStream_completed;
     }
+    /* start < ptr < limitPtr */
     {   U32 nbBytes = bitD->bitsConsumed >> 3;
         BIT_DStream_status result = BIT_DStream_unfinished;
         if (bitD->ptr - nbBytes < bitD->start) {
@@ -394,7 +440,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
         }
         bitD->ptr -= nbBytes;
         bitD->bitsConsumed -= nbBytes*8;
-        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
         return result;
     }
 }
diff --git a/contrib/libzstd/include/zstd/common/entropy_common.c b/contrib/libzstd/include/zstd/common/entropy_common.c
index acd96699976..b37a082fee2 100644
--- a/contrib/libzstd/include/zstd/common/entropy_common.c
+++ b/contrib/libzstd/include/zstd/common/entropy_common.c
@@ -43,27 +43,21 @@
 #include "huf.h"
 
 
-/*-****************************************
-*  FSE Error Management
-******************************************/
-unsigned FSE_isError(size_t code) { return ERR_isError(code); }
+/*===   Version   ===*/
+unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
 
+
+/*===   Error Management   ===*/
+unsigned FSE_isError(size_t code) { return ERR_isError(code); }
 const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
 
-
-/* **************************************************************
-*  HUF Error Management
-****************************************************************/
 unsigned HUF_isError(size_t code) { return ERR_isError(code); }
-
 const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
 
 
 /*-**************************************************************
 *  FSE NCount encoding-decoding
 ****************************************************************/
-static short FSE_abs(short a) { return (short)(a<0 ? -a : a); }
-
 size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
                  const void* headerBuffer, size_t hbSize)
 {
@@ -117,21 +111,21 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
             } else {
                 bitStream >>= 2;
         }   }
-        {   short const max = (short)((2*threshold-1)-remaining);
-            short count;
+        {   int const max = (2*threshold-1) - remaining;
+            int count;
 
             if ((bitStream & (threshold-1)) < (U32)max) {
-                count = (short)(bitStream & (threshold-1));
-                bitCount   += nbBits-1;
+                count = bitStream & (threshold-1);
+                bitCount += nbBits-1;
             } else {
-                count = (short)(bitStream & (2*threshold-1));
+                count = bitStream & (2*threshold-1);
                 if (count >= threshold) count -= max;
-                bitCount   += nbBits;
+                bitCount += nbBits;
             }
 
             count--;   /* extra accuracy */
-            remaining -= FSE_abs(count);
-            normalizedCounter[charnum++] = count;
+            remaining -= count < 0 ? -count : count;   /* -1 means +1 */
+            normalizedCounter[charnum++] = (short)count;
             previous0 = !count;
             while (remaining < threshold) {
                 nbBits--;
@@ -159,6 +153,7 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
 /*! HUF_readStats() :
     Read compact Huffman tree, saved by HUF_writeCTable().
     `huffWeight` is destination buffer.
+    `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
     @return : size read from `src` , or an error Code .
     Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
 */
@@ -168,9 +163,11 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
 {
     U32 weightTotal;
     const BYTE* ip = (const BYTE*) src;
-    size_t iSize = ip[0];
+    size_t iSize;
     size_t oSize;
 
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
     /* memset(huffWeight, 0, hwSize);   *//* is not necessary, even though some analyzer complain ... */
 
     if (iSize >= 128) {  /* special header */
@@ -185,23 +182,25 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
                 huffWeight[n+1] = ip[n/2] & 15;
     }   }   }
     else  {   /* header compressed with FSE (normal case) */
+        FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)];  /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */
         if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
-        oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
+        oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6);   /* max (hwSize-1) values decoded, as last one is implied */
         if (FSE_isError(oSize)) return oSize;
     }
 
     /* collect weight stats */
-    memset(rankStats, 0, (HUF_TABLELOG_ABSOLUTEMAX + 1) * sizeof(U32));
+    memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
     weightTotal = 0;
     {   U32 n; for (n=0; n<oSize; n++) {
-            if (huffWeight[n] >= HUF_TABLELOG_ABSOLUTEMAX) return ERROR(corruption_detected);
+            if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
             rankStats[huffWeight[n]]++;
             weightTotal += (1 << huffWeight[n]) >> 1;
     }   }
+    if (weightTotal == 0) return ERROR(corruption_detected);
 
     /* get last non-null symbol weight (implied, total must be 2^n) */
     {   U32 const tableLog = BIT_highbit32(weightTotal) + 1;
-        if (tableLog > HUF_TABLELOG_ABSOLUTEMAX) return ERROR(corruption_detected);
+        if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
         *tableLogPtr = tableLog;
         /* determine last weight */
         {   U32 const total = 1 << tableLog;
diff --git a/contrib/libzstd/include/zstd/common/error_private.c b/contrib/libzstd/include/zstd/common/error_private.c
new file mode 100644
index 00000000000..2d752cd23a7
--- /dev/null
+++ b/contrib/libzstd/include/zstd/common/error_private.c
@@ -0,0 +1,47 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+/* The purpose of this file is to have a single list of error strings embedded in binary */
+
+#include "error_private.h"
+
+const char* ERR_getErrorString(ERR_enum code)
+{
+    static const char* const notErrorCode = "Unspecified error code";
+    switch( code )
+    {
+    case PREFIX(no_error): return "No error detected";
+    case PREFIX(GENERIC):  return "Error (generic)";
+    case PREFIX(prefix_unknown): return "Unknown frame descriptor";
+    case PREFIX(version_unsupported): return "Version not supported";
+    case PREFIX(parameter_unknown): return "Unknown parameter type";
+    case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
+    case PREFIX(frameParameter_unsupportedBy32bits): return "Frame parameter unsupported in 32-bits mode";
+    case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
+    case PREFIX(compressionParameter_unsupported): return "Compression parameter is not supported";
+    case PREFIX(compressionParameter_outOfBound): return "Compression parameter is out of bound";
+    case PREFIX(init_missing): return "Context should be init first";
+    case PREFIX(memory_allocation): return "Allocation error : not enough memory";
+    case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
+    case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
+    case PREFIX(srcSize_wrong): return "Src size is incorrect";
+    case PREFIX(corruption_detected): return "Corrupted block detected";
+    case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
+    case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
+    case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
+    case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
+    case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
+    case PREFIX(dictionary_wrong): return "Dictionary mismatch";
+    case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
+    case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
+    case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
+    case PREFIX(maxCode):
+    default: return notErrorCode;
+    }
+}
diff --git a/contrib/libzstd/include/zstd/common/error_private.h b/contrib/libzstd/include/zstd/common/error_private.h
index d27e15af8b4..1bc2e495481 100644
--- a/contrib/libzstd/include/zstd/common/error_private.h
+++ b/contrib/libzstd/include/zstd/common/error_private.h
@@ -21,7 +21,7 @@ extern "C" {
 *  Dependencies
 ******************************************/
 #include <stddef.h>        /* size_t */
-#include "error_public.h"  /* enum list */
+#include "zstd_errors.h"  /* enum list */
 
 
 /* ****************************************
@@ -62,35 +62,7 @@ ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) retu
 *  Error Strings
 ******************************************/
 
-ERR_STATIC const char* ERR_getErrorString(ERR_enum code)
-{
-    static const char* notErrorCode = "Unspecified error code";
-    switch( code )
-    {
-    case PREFIX(no_error): return "No error detected";
-    case PREFIX(GENERIC):  return "Error (generic)";
-    case PREFIX(prefix_unknown): return "Unknown frame descriptor";
-    case PREFIX(version_unsupported): return "Version not supported";
-    case PREFIX(parameter_unknown): return "Unknown parameter type";
-    case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
-    case PREFIX(frameParameter_unsupportedBy32bits): return "Frame parameter unsupported in 32-bits mode";
-    case PREFIX(compressionParameter_unsupported): return "Compression parameter is out of bound";
-    case PREFIX(init_missing): return "Context should be init first";
-    case PREFIX(memory_allocation): return "Allocation error : not enough memory";
-    case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
-    case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
-    case PREFIX(srcSize_wrong): return "Src size incorrect";
-    case PREFIX(corruption_detected): return "Corrupted block detected";
-    case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
-    case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
-    case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
-    case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
-    case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
-    case PREFIX(dictionary_wrong): return "Dictionary mismatch";
-    case PREFIX(maxCode):
-    default: return notErrorCode;
-    }
-}
+const char* ERR_getErrorString(ERR_enum code);   /* error_private.c */
 
 ERR_STATIC const char* ERR_getErrorName(size_t code)
 {
diff --git a/contrib/libzstd/include/zstd/common/error_public.h b/contrib/libzstd/include/zstd/common/error_public.h
deleted file mode 100644
index d46abd2c720..00000000000
--- a/contrib/libzstd/include/zstd/common/error_public.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
- */
-
-#ifndef ERROR_PUBLIC_H_MODULE
-#define ERROR_PUBLIC_H_MODULE
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-/*===== dependency =====*/
-#include <stddef.h>   /* size_t */
-
-
-/*-****************************************
-*  error codes list
-******************************************/
-typedef enum {
-  ZSTD_error_no_error,
-  ZSTD_error_GENERIC,
-  ZSTD_error_prefix_unknown,
-  ZSTD_error_version_unsupported,
-  ZSTD_error_parameter_unknown,
-  ZSTD_error_frameParameter_unsupported,
-  ZSTD_error_frameParameter_unsupportedBy32bits,
-  ZSTD_error_compressionParameter_unsupported,
-  ZSTD_error_init_missing,
-  ZSTD_error_memory_allocation,
-  ZSTD_error_stage_wrong,
-  ZSTD_error_dstSize_tooSmall,
-  ZSTD_error_srcSize_wrong,
-  ZSTD_error_corruption_detected,
-  ZSTD_error_checksum_wrong,
-  ZSTD_error_tableLog_tooLarge,
-  ZSTD_error_maxSymbolValue_tooLarge,
-  ZSTD_error_maxSymbolValue_tooSmall,
-  ZSTD_error_dictionary_corrupted,
-  ZSTD_error_dictionary_wrong,
-  ZSTD_error_maxCode
-} ZSTD_ErrorCode;
-
-/*! ZSTD_getErrorCode() :
-    convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
-    which can be used to compare directly with enum list published into "error_public.h" */
-ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
-const char* ZSTD_getErrorString(ZSTD_ErrorCode code);
-
-
-#if defined (__cplusplus)
-}
-#endif
-
-#endif /* ERROR_PUBLIC_H_MODULE */
diff --git a/contrib/libzstd/include/zstd/common/fse.h b/contrib/libzstd/include/zstd/common/fse.h
index 720d54b111e..6d5d41def19 100644
--- a/contrib/libzstd/include/zstd/common/fse.h
+++ b/contrib/libzstd/include/zstd/common/fse.h
@@ -45,6 +45,32 @@ extern "C" {
 #include <stddef.h>    /* size_t, ptrdiff_t */
 
 
+/*-*****************************************
+*  FSE_PUBLIC_API : control library symbols visibility
+******************************************/
+#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
+#  define FSE_PUBLIC_API __attribute__ ((visibility ("default")))
+#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1)   /* Visual expected */
+#  define FSE_PUBLIC_API __declspec(dllexport)
+#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
+#  define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#  define FSE_PUBLIC_API
+#endif
+
+/*------   Version   ------*/
+#define FSE_VERSION_MAJOR    0
+#define FSE_VERSION_MINOR    9
+#define FSE_VERSION_RELEASE  0
+
+#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE
+#define FSE_QUOTE(str) #str
+#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str)
+#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION)
+
+#define FSE_VERSION_NUMBER  (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE)
+FSE_PUBLIC_API unsigned FSE_versionNumber(void);   /**< library version number; to be used when checking dll version */
+
 /*-****************************************
 *  FSE simple functions
 ******************************************/
@@ -56,8 +82,8 @@ extern "C" {
                      if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
                      if FSE_isError(return), compression failed (more details using FSE_getErrorName())
 */
-size_t FSE_compress(void* dst, size_t dstCapacity,
-              const void* src, size_t srcSize);
+FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
+                             const void* src, size_t srcSize);
 
 /*! FSE_decompress():
     Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
@@ -69,18 +95,18 @@ size_t FSE_compress(void* dst, size_t dstCapacity,
     Why ? : making this distinction requires a header.
     Header management is intentionally delegated to the user layer, which can better manage special cases.
 */
-size_t FSE_decompress(void* dst,  size_t dstCapacity,
-                const void* cSrc, size_t cSrcSize);
+FSE_PUBLIC_API size_t FSE_decompress(void* dst,  size_t dstCapacity,
+                               const void* cSrc, size_t cSrcSize);
 
 
 /*-*****************************************
 *  Tool functions
 ******************************************/
-size_t FSE_compressBound(size_t size);       /* maximum compressed size */
+FSE_PUBLIC_API size_t FSE_compressBound(size_t size);       /* maximum compressed size */
 
 /* Error Management */
-unsigned    FSE_isError(size_t code);        /* tells if a return value is an error code */
-const char* FSE_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+FSE_PUBLIC_API unsigned    FSE_isError(size_t code);        /* tells if a return value is an error code */
+FSE_PUBLIC_API const char* FSE_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
 
 
 /*-*****************************************
@@ -94,7 +120,7 @@ const char* FSE_getErrorName(size_t code);   /* provides error code string (usef
                      if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
                      if FSE_isError(return), it's an error code.
 */
-size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
 
 
 /*-*****************************************
@@ -127,50 +153,50 @@ or to save and provide normalized distribution using external method.
     @return : the count of the most frequent symbol (which is not identified).
               if return == srcSize, there is only one symbol.
               Can also return an error code, which can be tested with FSE_isError(). */
-size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
+FSE_PUBLIC_API size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
 
 /*! FSE_optimalTableLog():
     dynamically downsize 'tableLog' when conditions are met.
     It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
     @return : recommended tableLog (necessarily <= 'maxTableLog') */
-unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
+FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
 
 /*! FSE_normalizeCount():
     normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
     'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
     @return : tableLog,
               or an errorCode, which can be tested using FSE_isError() */
-size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
+FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
 
 /*! FSE_NCountWriteBound():
     Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
     Typically useful for allocation purpose. */
-size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
+FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
 
 /*! FSE_writeNCount():
     Compactly save 'normalizedCounter' into 'buffer'.
     @return : size of the compressed table,
               or an errorCode, which can be tested using FSE_isError(). */
-size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
 
 
 /*! Constructor and Destructor of FSE_CTable.
     Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
 typedef unsigned FSE_CTable;   /* don't allocate that. It's only meant to be more restrictive than void* */
-FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue);
-void        FSE_freeCTable (FSE_CTable* ct);
+FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue);
+FSE_PUBLIC_API void        FSE_freeCTable (FSE_CTable* ct);
 
 /*! FSE_buildCTable():
     Builds `ct`, which must be already allocated, using FSE_createCTable().
     @return : 0, or an errorCode, which can be tested using FSE_isError() */
-size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
 
 /*! FSE_compress_usingCTable():
     Compress `src` using `ct` into `dst` which must be already allocated.
     @return : size of compressed data (<= `dstCapacity`),
               or 0 if compressed data could not fit into `dst`,
               or an errorCode, which can be tested using FSE_isError() */
-size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
+FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
 
 /*!
 Tutorial :
@@ -223,25 +249,25 @@ If there is an error, the function will return an ErrorCode (which can be tested
     @return : size read from 'rBuffer',
               or an errorCode, which can be tested using FSE_isError().
               maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
-size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
+FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
 
 /*! Constructor and Destructor of FSE_DTable.
     Note that its size depends on 'tableLog' */
 typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
-FSE_DTable* FSE_createDTable(unsigned tableLog);
-void        FSE_freeDTable(FSE_DTable* dt);
+FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
+FSE_PUBLIC_API void        FSE_freeDTable(FSE_DTable* dt);
 
 /*! FSE_buildDTable():
     Builds 'dt', which must be already allocated, using FSE_createDTable().
     return : 0, or an errorCode, which can be tested using FSE_isError() */
-size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
 
 /*! FSE_decompress_usingDTable():
     Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
     into `dst` which must be already allocated.
     @return : size of regenerated data (necessarily <= `dstCapacity`),
               or an errorCode, which can be tested using FSE_isError() */
-size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
+FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
 
 /*!
 Tutorial :
@@ -286,45 +312,84 @@ If there is an error, the function will return an error code, which can be teste
 #define FSE_BLOCKBOUND(size) (size + (size>>7))
 #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
 
-/* It is possible to statically allocate FSE CTable/DTable as a table of unsigned using below macros */
+/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
 #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
 #define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
 
+/* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
+#define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue)   (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
+#define FSE_DTABLE_SIZE(maxTableLog)                   (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable))
+
 
 /* *****************************************
 *  FSE advanced API
 *******************************************/
+/* FSE_count_wksp() :
+ * Same as FSE_count(), but using an externally provided scratch buffer.
+ * `workSpace` size must be table of >= `1024` unsigned
+ */
+size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                 const void* source, size_t sourceSize, unsigned* workSpace);
+
+/** FSE_countFast() :
+ *  same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr
+ */
 size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
-/**< same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr  */
+
+/* FSE_countFast_wksp() :
+ * Same as FSE_countFast(), but using an externally provided scratch buffer.
+ * `workSpace` must be a table of minimum `1024` unsigned
+ */
+size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* workSpace);
+
+/*! FSE_count_simple
+ * Same as FSE_countFast(), but does not use any additional memory (not even on stack).
+ * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`).
+*/
+size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
+
+
 
 unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
 /**< same as FSE_optimalTableLog(), which used `minus==2` */
 
+/* FSE_compress_wksp() :
+ * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
+ * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
+ */
+#define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue)   ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
+size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+
 size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
-/**< build a fake FSE_CTable, designed to not compress an input, where each symbol uses nbBits */
+/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
 
 size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
 /**< build a fake FSE_CTable, designed to compress always the same symbolValue */
 
+/* FSE_buildCTable_wksp() :
+ * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
+ * `wkspSize` must be >= `(1<<tableLog)`.
+ */
+size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+
 size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
-/**< build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
+/**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
 
 size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
 /**< build a fake FSE_DTable, designed to always generate the same symbolValue */
 
+size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
+/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
+
 
 /* *****************************************
 *  FSE symbol compression API
 *******************************************/
 /*!
    This API consists of small unitary functions, which highly benefit from being inlined.
-   You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
-   Visual seems to do it automatically.
-   For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
-   If none of these solutions is applicable, include "fse.c" directly.
+   Hence their body are included in next section.
 */
-typedef struct
-{
+typedef struct {
     ptrdiff_t   value;
     const void* stateTable;
     const void* symbolTT;
@@ -384,8 +449,7 @@ If there is an error, it returns an errorCode (which can be tested using FSE_isE
 /* *****************************************
 *  FSE symbol decompression API
 *******************************************/
-typedef struct
-{
+typedef struct {
     size_t      state;
     const void* table;   /* precise table may vary, depending on U16 */
 } FSE_DState_t;
@@ -490,9 +554,9 @@ MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U3
 
 MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol)
 {
-    const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
+    FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
     const U16* const stateTable = (const U16*)(statePtr->stateTable);
-    U32 nbBitsOut  = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
+    U32 const nbBitsOut  = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
     BIT_addBits(bitC, statePtr->value, nbBitsOut);
     statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
 }
@@ -503,6 +567,7 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt
     BIT_flushBits(bitC);
 }
 
+
 /* ======    Decompression    ====== */
 
 typedef struct {
@@ -581,14 +646,19 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
 *  Increasing memory usage improves compression ratio
 *  Reduced memory usage can improve speed, due to cache effect
 *  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
-#define FSE_MAX_MEMORY_USAGE 14
-#define FSE_DEFAULT_MEMORY_USAGE 13
+#ifndef FSE_MAX_MEMORY_USAGE
+#  define FSE_MAX_MEMORY_USAGE 14
+#endif
+#ifndef FSE_DEFAULT_MEMORY_USAGE
+#  define FSE_DEFAULT_MEMORY_USAGE 13
+#endif
 
 /*!FSE_MAX_SYMBOL_VALUE :
 *  Maximum symbol value authorized.
 *  Required for proper stack allocation */
-#define FSE_MAX_SYMBOL_VALUE 255
-
+#ifndef FSE_MAX_SYMBOL_VALUE
+#  define FSE_MAX_SYMBOL_VALUE 255
+#endif
 
 /* **************************************************************
 *  template functions type & suffix
diff --git a/contrib/libzstd/include/zstd/common/fse_decompress.c b/contrib/libzstd/include/zstd/common/fse_decompress.c
index 7492a3832b7..8474a4c079b 100644
--- a/contrib/libzstd/include/zstd/common/fse_decompress.c
+++ b/contrib/libzstd/include/zstd/common/fse_decompress.c
@@ -59,7 +59,6 @@
 ****************************************************************/
 #include <stdlib.h>     /* malloc, free, qsort */
 #include <string.h>     /* memcpy, memset */
-#include <stdio.h>      /* printf (debug) */
 #include "bitstream.h"
 #define FSE_STATIC_LINKING_ONLY
 #include "fse.h"
@@ -75,12 +74,6 @@
 #define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; }
 
 
-/* **************************************************************
-*  Complex types
-****************************************************************/
-typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
-
-
 /* **************************************************************
 *  Templates
 ****************************************************************/
@@ -300,28 +293,34 @@ size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
 }
 
 
-size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog)
 {
     const BYTE* const istart = (const BYTE*)cSrc;
     const BYTE* ip = istart;
     short counting[FSE_MAX_SYMBOL_VALUE+1];
-    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
     unsigned tableLog;
     unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
 
-    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */
-
     /* normal FSE decoding mode */
-    {   size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
-        if (FSE_isError(NCountLength)) return NCountLength;
-        if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */
-        ip += NCountLength;
-        cSrcSize -= NCountLength;
-    }
+    size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSE_isError(NCountLength)) return NCountLength;
+    //if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */
+    if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
+    ip += NCountLength;
+    cSrcSize -= NCountLength;
 
-    CHECK_F( FSE_buildDTable (dt, counting, maxSymbolValue, tableLog) );
+    CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) );
 
-    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);   /* always return, even if it is an error code */
+    return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace);   /* always return, even if it is an error code */
+}
+
+
+typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
+{
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG);
 }
 
 
diff --git a/contrib/libzstd/include/zstd/common/huf.h b/contrib/libzstd/include/zstd/common/huf.h
index 29bab4b7646..dabd359915a 100644
--- a/contrib/libzstd/include/zstd/common/huf.h
+++ b/contrib/libzstd/include/zstd/common/huf.h
@@ -43,6 +43,21 @@ extern "C" {
 #include <stddef.h>    /* size_t */
 
 
+/* *** library symbols visibility *** */
+/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual,
+ *        HUF symbols remain "private" (internal symbols for library only).
+ *        Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */
+#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
+#  define HUF_PUBLIC_API __attribute__ ((visibility ("default")))
+#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1)   /* Visual expected */
+#  define HUF_PUBLIC_API __declspec(dllexport)
+#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
+#  define HUF_PUBLIC_API __declspec(dllimport)  /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */
+#else
+#  define HUF_PUBLIC_API
+#endif
+
+
 /* *** simple functions *** */
 /**
 HUF_compress() :
@@ -55,42 +70,68 @@ HUF_compress() :
                      if return == 1, srcData is a single repeated byte symbol (RLE compression).
                      if HUF_isError(return), compression failed (more details using HUF_getErrorName())
 */
-size_t HUF_compress(void* dst, size_t dstCapacity,
-              const void* src, size_t srcSize);
+HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity,
+                             const void* src, size_t srcSize);
 
 /**
 HUF_decompress() :
     Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
     into already allocated buffer 'dst', of minimum size 'dstSize'.
-    `dstSize` : **must** be the ***exact*** size of original (uncompressed) data.
+    `originalSize` : **must** be the ***exact*** size of original (uncompressed) data.
     Note : in contrast with FSE, HUF_decompress can regenerate
            RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
            because it knows size to regenerate.
-    @return : size of regenerated data (== dstSize),
+    @return : size of regenerated data (== originalSize),
               or an error code, which can be tested using HUF_isError()
 */
-size_t HUF_decompress(void* dst,  size_t dstSize,
-                const void* cSrc, size_t cSrcSize);
+HUF_PUBLIC_API size_t HUF_decompress(void* dst,  size_t originalSize,
+                               const void* cSrc, size_t cSrcSize);
 
 
-/* ****************************************
-*  Tool functions
-******************************************/
-#define HUF_BLOCKSIZE_MAX (128 * 1024)
-size_t HUF_compressBound(size_t size);       /**< maximum compressed size (worst case) */
+/* ***   Tool functions *** */
+#define HUF_BLOCKSIZE_MAX (128 * 1024)                  /**< maximum input size for a single block compressed with HUF_compress */
+HUF_PUBLIC_API size_t HUF_compressBound(size_t size);   /**< maximum compressed size (worst case) */
 
 /* Error Management */
-unsigned    HUF_isError(size_t code);        /**< tells if a return value is an error code */
-const char* HUF_getErrorName(size_t code);   /**< provides error code string (useful for debugging) */
+HUF_PUBLIC_API unsigned    HUF_isError(size_t code);       /**< tells if a return value is an error code */
+HUF_PUBLIC_API const char* HUF_getErrorName(size_t code);  /**< provides error code string (useful for debugging) */
 
 
-/* *** Advanced function *** */
+/* ***   Advanced function   *** */
 
 /** HUF_compress2() :
-*   Same as HUF_compress(), but offers direct control over `maxSymbolValue` and `tableLog` */
-size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+ *  Same as HUF_compress(), but offers direct control over `maxSymbolValue` and `tableLog`.
+ *  `tableLog` must be `<= HUF_TABLELOG_MAX` . */
+HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+
+/** HUF_compress4X_wksp() :
+ *  Same as HUF_compress2(), but uses externally allocated `workSpace`.
+ *  `workspace` must have minimum alignment of 4, and be at least as large as following macro */
+#define HUF_WORKSPACE_SIZE (6 << 10)
+#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
+HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+
+/**
+ *  The minimum workspace size for the `workSpace` used in
+ *  HUF_readDTableX2_wksp() and HUF_readDTableX4_wksp().
+ *
+ *  The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
+ *  HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
+ *  Buffer overflow errors may potentially occur if code modifications result in
+ *  a required workspace size greater than that specified in the following
+ *  macro.
+ */
+#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
+#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
 
 
+/* ******************************************************************
+ *  WARNING !!
+ *  The following section contains advanced and experimental definitions
+ *  which shall never be used in the context of dll
+ *  because they are not guaranteed to remain stable in the future.
+ *  Only consider them in association with static linking.
+ *******************************************************************/
 #ifdef HUF_STATIC_LINKING_ONLY
 
 /* *** Dependencies *** */
@@ -98,10 +139,11 @@ size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize
 
 
 /* *** Constants *** */
-#define HUF_TABLELOG_ABSOLUTEMAX  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
-#define HUF_TABLELOG_MAX  12           /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
+#define HUF_TABLELOG_MAX      12       /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
 #define HUF_TABLELOG_DEFAULT  11       /* tableLog by default, when not specified */
-#define HUF_SYMBOLVALUE_MAX 255
+#define HUF_SYMBOLVALUE_MAX  255
+
+#define HUF_TABLELOG_ABSOLUTEMAX  15   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
 #if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
 #  error "HUF_TABLELOG_MAX is too large !"
 #endif
@@ -112,12 +154,14 @@ size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize
 ******************************************/
 /* HUF buffer bounds */
 #define HUF_CTABLEBOUND 129
-#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true if incompressible pre-filtered with fast heuristic */
+#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true when incompressible is pre-filtered with fast heuristic */
 #define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
 
 /* static allocation of HUF's Compression Table */
+#define HUF_CTABLE_SIZE_U32(maxSymbolValue)   ((maxSymbolValue)+1)   /* Use tables of U32, for proper alignment */
+#define HUF_CTABLE_SIZE(maxSymbolValue)       (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
 #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
-    U32 name##hb[maxSymbolValue+1]; \
+    U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \
     void* name##hv = &(name##hb); \
     HUF_CElt* name = (HUF_CElt*)(name##hv)   /* no final ; */
 
@@ -125,9 +169,9 @@ size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize
 typedef U32 HUF_DTable;
 #define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<(maxTableLog)))
 #define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
-        HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1)*0x1000001) }
+        HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) }
 #define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
-        HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog)*0x1000001) }
+        HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) }
 
 
 /* ****************************************
@@ -138,12 +182,11 @@ size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cS
 
 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< decodes RLE and uncompressed */
 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */
 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
+size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< single-symbol decoder */
 size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
-
-size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
-size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
-size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
+size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< double-symbols decoder */
 
 
 /* ****************************************
@@ -168,6 +211,23 @@ size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSym
 size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
 
+typedef enum {
+   HUF_repeat_none,  /**< Cannot use the previous table */
+   HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
+   HUF_repeat_valid  /**< Can use the previous table and it is asumed to be valid */
+ } HUF_repeat;
+/** HUF_compress4X_repeat() :
+*   Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+*   If it uses hufTable it does not modify hufTable or repeat.
+*   If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
+*   If preferRepeat then the old table will always be used if valid. */
+size_t HUF_compress4X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat);  /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
+
+/** HUF_buildCTable_wksp() :
+ *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
+ *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
+ */
+size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize);
 
 /*! HUF_readStats() :
     Read compact Huffman tree, saved by HUF_writeCTable().
@@ -198,7 +258,9 @@ HUF_decompress() does the following:
 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
 
 size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
+size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
 size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize);
+size_t HUF_readDTableX4_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
 
 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
 size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
@@ -208,16 +270,29 @@ size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* c
 /* single stream variants */
 
 size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);  /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
 size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
+/** HUF_compress1X_repeat() :
+*   Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+*   If it uses hufTable it does not modify hufTable or repeat.
+*   If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
+*   If preferRepeat then the old table will always be used if valid. */
+size_t HUF_compress1X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat);  /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
 
 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
 size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbol decoder */
 
-size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
+size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< single-symbol decoder */
+size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
+size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< double-symbols decoder */
+
+size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);   /**< automatic selection of sing or double symbol decoder, based on DTable */
 size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
 size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
 
-
 #endif /* HUF_STATIC_LINKING_ONLY */
 
 
diff --git a/contrib/libzstd/include/zstd/common/mem.h b/contrib/libzstd/include/zstd/common/mem.h
index 681dd35d2da..b0e5bf60b43 100644
--- a/contrib/libzstd/include/zstd/common/mem.h
+++ b/contrib/libzstd/include/zstd/common/mem.h
@@ -39,7 +39,7 @@ extern "C" {
 #endif
 
 /* code only tested on 32 and 64 bits systems */
-#define MEM_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(int)(!!(c)) }; }
+#define MEM_STATIC_ASSERT(c)   { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
 MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
 
 
@@ -48,21 +48,25 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size
 *****************************************************************/
 #if  !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
 # include <stdint.h>
-  typedef  uint8_t BYTE;
-  typedef uint16_t U16;
-  typedef  int16_t S16;
-  typedef uint32_t U32;
-  typedef  int32_t S32;
-  typedef uint64_t U64;
-  typedef  int64_t S64;
+  typedef   uint8_t BYTE;
+  typedef  uint16_t U16;
+  typedef   int16_t S16;
+  typedef  uint32_t U32;
+  typedef   int32_t S32;
+  typedef  uint64_t U64;
+  typedef   int64_t S64;
+  typedef  intptr_t iPtrDiff;
+  typedef uintptr_t uPtrDiff;
 #else
-  typedef unsigned char       BYTE;
+  typedef unsigned char      BYTE;
   typedef unsigned short      U16;
   typedef   signed short      S16;
   typedef unsigned int        U32;
   typedef   signed int        S32;
   typedef unsigned long long  U64;
   typedef   signed long long  S64;
+  typedef ptrdiff_t      iPtrDiff;
+  typedef size_t         uPtrDiff;
 #endif
 
 
@@ -74,19 +78,18 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size
  * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
  * The below switch allow to select different access method for improved performance.
  * Method 0 (default) : use `memcpy()`. Safe and portable.
- * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable).
  *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
  * Method 2 : direct access. This method is portable but violate C standard.
  *            It can generate buggy code on targets depending on alignment.
- *            In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ *            In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6)
  * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
  * Prefer these methods in priority order (0 > 1 > 2)
  */
 #ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
 #  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
 #    define MEM_FORCE_MEMORY_ACCESS 2
-#  elif defined(__INTEL_COMPILER) /*|| defined(_MSC_VER)*/ || \
-  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#  elif defined(__INTEL_COMPILER) || defined(__GNUC__)
 #    define MEM_FORCE_MEMORY_ACCESS 1
 #  endif
 #endif
@@ -118,7 +121,7 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
 /* currently only defined for gcc and icc */
 #if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
-	__pragma( pack(push, 1) )
+    __pragma( pack(push, 1) )
     typedef union { U16 u16; U32 u32; U64 u64; size_t st; } unalign;
     __pragma( pack(pop) )
 #else
@@ -180,7 +183,7 @@ MEM_STATIC U32 MEM_swap32(U32 in)
 {
 #if defined(_MSC_VER)     /* Visual Studio */
     return _byteswap_ulong(in);
-#elif defined (__GNUC__)
+#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)
     return __builtin_bswap32(in);
 #else
     return  ((in << 24) & 0xff000000 ) |
@@ -194,7 +197,7 @@ MEM_STATIC U64 MEM_swap64(U64 in)
 {
 #if defined(_MSC_VER)     /* Visual Studio */
     return _byteswap_uint64(in);
-#elif defined (__GNUC__)
+#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)
     return __builtin_bswap64(in);
 #else
     return  ((in << 56) & 0xff00000000000000ULL) |
@@ -349,20 +352,6 @@ MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
 }
 
 
-/* function safe only for comparisons */
-MEM_STATIC U32 MEM_readMINMATCH(const void* memPtr, U32 length)
-{
-    switch (length)
-    {
-    default :
-    case 4 : return MEM_read32(memPtr);
-    case 3 : if (MEM_isLittleEndian())
-                return MEM_read32(memPtr)<<8;
-             else
-                return MEM_read32(memPtr)>>8;
-    }
-}
-
 #if defined (__cplusplus)
 }
 #endif
diff --git a/contrib/libzstd/include/zstd/common/pool.c b/contrib/libzstd/include/zstd/common/pool.c
new file mode 100644
index 00000000000..749fa4f2f7b
--- /dev/null
+++ b/contrib/libzstd/include/zstd/common/pool.c
@@ -0,0 +1,206 @@
+/**
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+
+/* ======   Dependencies   ======= */
+#include <stddef.h>  /* size_t */
+#include <stdlib.h>  /* malloc, calloc, free */
+#include "pool.h"
+
+/* ======   Compiler specifics   ====== */
+#if defined(_MSC_VER)
+#  pragma warning(disable : 4204)        /* disable: C4204: non-constant aggregate initializer */
+#endif
+
+
+#ifdef ZSTD_MULTITHREAD
+
+#include "threading.h"   /* pthread adaptation */
+
+/* A job is a function and an opaque argument */
+typedef struct POOL_job_s {
+  POOL_function function;
+  void *opaque;
+} POOL_job;
+
+struct POOL_ctx_s {
+    /* Keep track of the threads */
+    pthread_t *threads;
+    size_t numThreads;
+
+    /* The queue is a circular buffer */
+    POOL_job *queue;
+    size_t queueHead;
+    size_t queueTail;
+    size_t queueSize;
+    /* The mutex protects the queue */
+    pthread_mutex_t queueMutex;
+    /* Condition variable for pushers to wait on when the queue is full */
+    pthread_cond_t queuePushCond;
+    /* Condition variables for poppers to wait on when the queue is empty */
+    pthread_cond_t queuePopCond;
+    /* Indicates if the queue is shutting down */
+    int shutdown;
+};
+
+/* POOL_thread() :
+   Work thread for the thread pool.
+   Waits for jobs and executes them.
+   @returns : NULL on failure else non-null.
+*/
+static void* POOL_thread(void* opaque) {
+    POOL_ctx* const ctx = (POOL_ctx*)opaque;
+    if (!ctx) { return NULL; }
+    for (;;) {
+        /* Lock the mutex and wait for a non-empty queue or until shutdown */
+        pthread_mutex_lock(&ctx->queueMutex);
+        while (ctx->queueHead == ctx->queueTail && !ctx->shutdown) {
+            pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);
+        }
+        /* empty => shutting down: so stop */
+        if (ctx->queueHead == ctx->queueTail) {
+            pthread_mutex_unlock(&ctx->queueMutex);
+            return opaque;
+        }
+        /* Pop a job off the queue */
+        {   POOL_job const job = ctx->queue[ctx->queueHead];
+            ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
+            /* Unlock the mutex, signal a pusher, and run the job */
+            pthread_mutex_unlock(&ctx->queueMutex);
+            pthread_cond_signal(&ctx->queuePushCond);
+            job.function(job.opaque);
+        }
+    }
+    /* Unreachable */
+}
+
+POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) {
+    POOL_ctx *ctx;
+    /* Check the parameters */
+    if (!numThreads || !queueSize) { return NULL; }
+    /* Allocate the context and zero initialize */
+    ctx = (POOL_ctx *)calloc(1, sizeof(POOL_ctx));
+    if (!ctx) { return NULL; }
+    /* Initialize the job queue.
+     * It needs one extra space since one space is wasted to differentiate empty
+     * and full queues.
+     */
+    ctx->queueSize = queueSize + 1;
+    ctx->queue = (POOL_job *)malloc(ctx->queueSize * sizeof(POOL_job));
+    ctx->queueHead = 0;
+    ctx->queueTail = 0;
+    pthread_mutex_init(&ctx->queueMutex, NULL);
+    pthread_cond_init(&ctx->queuePushCond, NULL);
+    pthread_cond_init(&ctx->queuePopCond, NULL);
+    ctx->shutdown = 0;
+    /* Allocate space for the thread handles */
+    ctx->threads = (pthread_t *)malloc(numThreads * sizeof(pthread_t));
+    ctx->numThreads = 0;
+    /* Check for errors */
+    if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; }
+    /* Initialize the threads */
+    {   size_t i;
+        for (i = 0; i < numThreads; ++i) {
+            if (pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) {
+                ctx->numThreads = i;
+                POOL_free(ctx);
+                return NULL;
+        }   }
+        ctx->numThreads = numThreads;
+    }
+    return ctx;
+}
+
+/*! POOL_join() :
+    Shutdown the queue, wake any sleeping threads, and join all of the threads.
+*/
+static void POOL_join(POOL_ctx *ctx) {
+    /* Shut down the queue */
+    pthread_mutex_lock(&ctx->queueMutex);
+    ctx->shutdown = 1;
+    pthread_mutex_unlock(&ctx->queueMutex);
+    /* Wake up sleeping threads */
+    pthread_cond_broadcast(&ctx->queuePushCond);
+    pthread_cond_broadcast(&ctx->queuePopCond);
+    /* Join all of the threads */
+    {   size_t i;
+        for (i = 0; i < ctx->numThreads; ++i) {
+            pthread_join(ctx->threads[i], NULL);
+    }   }
+}
+
+void POOL_free(POOL_ctx *ctx) {
+    if (!ctx) { return; }
+    POOL_join(ctx);
+    pthread_mutex_destroy(&ctx->queueMutex);
+    pthread_cond_destroy(&ctx->queuePushCond);
+    pthread_cond_destroy(&ctx->queuePopCond);
+    if (ctx->queue) free(ctx->queue);
+    if (ctx->threads) free(ctx->threads);
+    free(ctx);
+}
+
+size_t POOL_sizeof(POOL_ctx *ctx) {
+    if (ctx==NULL) return 0;  /* supports sizeof NULL */
+    return sizeof(*ctx)
+        + ctx->queueSize * sizeof(POOL_job)
+        + ctx->numThreads * sizeof(pthread_t);
+}
+
+void POOL_add(void *ctxVoid, POOL_function function, void *opaque) {
+    POOL_ctx *ctx = (POOL_ctx *)ctxVoid;
+    if (!ctx) { return; }
+
+    pthread_mutex_lock(&ctx->queueMutex);
+    {   POOL_job const job = {function, opaque};
+        /* Wait until there is space in the queue for the new job */
+        size_t newTail = (ctx->queueTail + 1) % ctx->queueSize;
+        while (ctx->queueHead == newTail && !ctx->shutdown) {
+          pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
+          newTail = (ctx->queueTail + 1) % ctx->queueSize;
+        }
+        /* The queue is still going => there is space */
+        if (!ctx->shutdown) {
+            ctx->queue[ctx->queueTail] = job;
+            ctx->queueTail = newTail;
+        }
+    }
+    pthread_mutex_unlock(&ctx->queueMutex);
+    pthread_cond_signal(&ctx->queuePopCond);
+}
+
+#else  /* ZSTD_MULTITHREAD  not defined */
+/* No multi-threading support */
+
+/* We don't need any data, but if it is empty malloc() might return NULL. */
+struct POOL_ctx_s {
+  int data;
+};
+
+POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) {
+  (void)numThreads;
+  (void)queueSize;
+  return (POOL_ctx *)malloc(sizeof(POOL_ctx));
+}
+
+void POOL_free(POOL_ctx *ctx) {
+  if (ctx) free(ctx);
+}
+
+void POOL_add(void *ctx, POOL_function function, void *opaque) {
+  (void)ctx;
+  function(opaque);
+}
+
+size_t POOL_sizeof(POOL_ctx *ctx) {
+    if (ctx==NULL) return 0;  /* supports sizeof NULL */
+    return sizeof(*ctx);
+}
+
+#endif  /* ZSTD_MULTITHREAD */
diff --git a/contrib/libzstd/include/zstd/common/pool.h b/contrib/libzstd/include/zstd/common/pool.h
new file mode 100644
index 00000000000..386cd674b7c
--- /dev/null
+++ b/contrib/libzstd/include/zstd/common/pool.h
@@ -0,0 +1,61 @@
+/**
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+#ifndef POOL_H
+#define POOL_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+#include <stddef.h>   /* size_t */
+
+typedef struct POOL_ctx_s POOL_ctx;
+
+/*! POOL_create() :
+    Create a thread pool with at most `numThreads` threads.
+    `numThreads` must be at least 1.
+    The maximum number of queued jobs before blocking is `queueSize`.
+    `queueSize` must be at least 1.
+    @return : The POOL_ctx pointer on success else NULL.
+*/
+POOL_ctx *POOL_create(size_t numThreads, size_t queueSize);
+
+/*! POOL_free() :
+    Free a thread pool returned by POOL_create().
+*/
+void POOL_free(POOL_ctx *ctx);
+
+/*! POOL_sizeof() :
+    return memory usage of pool returned by POOL_create().
+*/
+size_t POOL_sizeof(POOL_ctx *ctx);
+
+/*! POOL_function :
+    The function type that can be added to a thread pool.
+*/
+typedef void (*POOL_function)(void *);
+/*! POOL_add_function :
+    The function type for a generic thread pool add function.
+*/
+typedef void (*POOL_add_function)(void *, POOL_function, void *);
+
+/*! POOL_add() :
+    Add the job `function(opaque)` to the thread pool.
+    Possibly blocks until there is room in the queue.
+    Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed.
+*/
+void POOL_add(void *ctx, POOL_function function, void *opaque);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif
diff --git a/contrib/libzstd/include/zstd/common/threading.c b/contrib/libzstd/include/zstd/common/threading.c
new file mode 100644
index 00000000000..141376c5619
--- /dev/null
+++ b/contrib/libzstd/include/zstd/common/threading.c
@@ -0,0 +1,79 @@
+/**
+ * Copyright (c) 2016 Tino Reichardt
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ *
+ * You can contact the author at:
+ * - zstdmt source repository: https://github.com/mcmilk/zstdmt
+ */
+
+/**
+ * This file will hold wrapper for systems, which do not support pthreads
+ */
+
+/* When ZSTD_MULTITHREAD is not defined, this file would become an empty translation unit.
+* Include some ISO C header code to prevent this and portably avoid related warnings.
+* (Visual C++: C4206 / GCC: -Wpedantic / Clang: -Wempty-translation-unit)
+*/
+#include <stddef.h>
+
+
+#if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
+
+/**
+ * Windows minimalist Pthread Wrapper, based on :
+ * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
+ */
+
+
+/* ===  Dependencies  === */
+#include <process.h>
+#include <errno.h>
+#include "threading.h"
+
+
+/* ===  Implementation  === */
+
+static unsigned __stdcall worker(void *arg)
+{
+    pthread_t* const thread = (pthread_t*) arg;
+    thread->arg = thread->start_routine(thread->arg);
+    return 0;
+}
+
+int pthread_create(pthread_t* thread, const void* unused,
+            void* (*start_routine) (void*), void* arg)
+{
+    (void)unused;
+    thread->arg = arg;
+    thread->start_routine = start_routine;
+    thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL);
+
+    if (!thread->handle)
+        return errno;
+    else
+        return 0;
+}
+
+int _pthread_join(pthread_t * thread, void **value_ptr)
+{
+    DWORD result;
+
+    if (!thread->handle) return 0;
+
+    result = WaitForSingleObject(thread->handle, INFINITE);
+    switch (result) {
+    case WAIT_OBJECT_0:
+        if (value_ptr) *value_ptr = thread->arg;
+        return 0;
+    case WAIT_ABANDONED:
+        return EINVAL;
+    default:
+        return GetLastError();
+    }
+}
+
+#endif   /* ZSTD_MULTITHREAD */
diff --git a/contrib/libzstd/include/zstd/common/threading.h b/contrib/libzstd/include/zstd/common/threading.h
new file mode 100644
index 00000000000..c0086139ea3
--- /dev/null
+++ b/contrib/libzstd/include/zstd/common/threading.h
@@ -0,0 +1,104 @@
+
+/**
+ * Copyright (c) 2016 Tino Reichardt
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ *
+ * You can contact the author at:
+ * - zstdmt source repository: https://github.com/mcmilk/zstdmt
+ */
+
+#ifndef THREADING_H_938743
+#define THREADING_H_938743
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
+
+/**
+ * Windows minimalist Pthread Wrapper, based on :
+ * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
+ */
+#ifdef WINVER
+#  undef WINVER
+#endif
+#define WINVER       0x0600
+
+#ifdef _WIN32_WINNT
+#  undef _WIN32_WINNT
+#endif
+#define _WIN32_WINNT 0x0600
+
+#ifndef WIN32_LEAN_AND_MEAN
+#  define WIN32_LEAN_AND_MEAN
+#endif
+
+#include <windows.h>
+
+/* mutex */
+#define pthread_mutex_t           CRITICAL_SECTION
+#define pthread_mutex_init(a,b)   InitializeCriticalSection((a))
+#define pthread_mutex_destroy(a)  DeleteCriticalSection((a))
+#define pthread_mutex_lock(a)     EnterCriticalSection((a))
+#define pthread_mutex_unlock(a)   LeaveCriticalSection((a))
+
+/* condition variable */
+#define pthread_cond_t             CONDITION_VARIABLE
+#define pthread_cond_init(a, b)    InitializeConditionVariable((a))
+#define pthread_cond_destroy(a)    /* No delete */
+#define pthread_cond_wait(a, b)    SleepConditionVariableCS((a), (b), INFINITE)
+#define pthread_cond_signal(a)     WakeConditionVariable((a))
+#define pthread_cond_broadcast(a)  WakeAllConditionVariable((a))
+
+/* pthread_create() and pthread_join() */
+typedef struct {
+    HANDLE handle;
+    void* (*start_routine)(void*);
+    void* arg;
+} pthread_t;
+
+int pthread_create(pthread_t* thread, const void* unused,
+                   void* (*start_routine) (void*), void* arg);
+
+#define pthread_join(a, b) _pthread_join(&(a), (b))
+int _pthread_join(pthread_t* thread, void** value_ptr);
+
+/**
+ * add here more wrappers as required
+ */
+
+
+#elif defined(ZSTD_MULTITHREAD)   /* posix assumed ; need a better detection method */
+/* ===   POSIX Systems   === */
+#  include <pthread.h>
+
+#else  /* ZSTD_MULTITHREAD not defined */
+/* No multithreading support */
+
+#define pthread_mutex_t int   /* #define rather than typedef, as sometimes pthread support is implicit, resulting in duplicated symbols */
+#define pthread_mutex_init(a,b)
+#define pthread_mutex_destroy(a)
+#define pthread_mutex_lock(a)
+#define pthread_mutex_unlock(a)
+
+#define pthread_cond_t int
+#define pthread_cond_init(a,b)
+#define pthread_cond_destroy(a)
+#define pthread_cond_wait(a,b)
+#define pthread_cond_signal(a)
+#define pthread_cond_broadcast(a)
+
+/* do not use pthread_t */
+
+#endif /* ZSTD_MULTITHREAD */
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* THREADING_H_938743 */
diff --git a/contrib/libzstd/include/zstd/common/xxhash.c b/contrib/libzstd/include/zstd/common/xxhash.c
index 29e4fa628f3..eb44222c5fc 100644
--- a/contrib/libzstd/include/zstd/common/xxhash.c
+++ b/contrib/libzstd/include/zstd/common/xxhash.c
@@ -104,7 +104,9 @@ static void  XXH_free  (void* p)  { free(p); }
 #include <string.h>
 static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
 
-#define XXH_STATIC_LINKING_ONLY
+#ifndef XXH_STATIC_LINKING_ONLY
+#  define XXH_STATIC_LINKING_ONLY
+#endif
 #include "xxhash.h"
 
 
diff --git a/contrib/libzstd/include/zstd/common/xxhash.h b/contrib/libzstd/include/zstd/common/xxhash.h
index 2c9b7c61bf4..9bad1f59f63 100644
--- a/contrib/libzstd/include/zstd/common/xxhash.h
+++ b/contrib/libzstd/include/zstd/common/xxhash.h
@@ -64,16 +64,12 @@ XXH64       13.8 GB/s            1.9 GB/s
 XXH32        6.8 GB/s            6.0 GB/s
 */
 
-#ifndef XXHASH_H_5627135585666179
-#define XXHASH_H_5627135585666179 1
-
 #if defined (__cplusplus)
 extern "C" {
 #endif
 
-#ifndef XXH_NAMESPACE
-#  define XXH_NAMESPACE ZSTD_  /* Zstandard specific */
-#endif
+#ifndef XXHASH_H_5627135585666179
+#define XXHASH_H_5627135585666179 1
 
 
 /* ****************************
@@ -242,6 +238,11 @@ XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH
 /* **************************
 *  Canonical representation
 ****************************/
+/* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
+*  The canonical representation uses human-readable write convention, aka big-endian (large digits first).
+*  These functions allow transformation of hash result into and from its canonical format.
+*  This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
+*/
 typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
 typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
 
@@ -251,14 +252,9 @@ XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t
 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
 XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
 
-/* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
-*  The canonical representation uses human-readable write convention, aka big-endian (large digits first).
-*  These functions allow transformation of hash result into and from its canonical format.
-*  This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
-*/
+#endif /* XXHASH_H_5627135585666179 */
 
 
-#ifdef XXH_STATIC_LINKING_ONLY
 
 /* ================================================================================================
    This section contains definitions which are not guaranteed to remain stable.
@@ -266,6 +262,8 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
    They shall only be used with static linking.
    Never use these definitions in association with dynamic linking !
 =================================================================================================== */
+#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345)
+#define XXH_STATIC_H_3543687687345
 
 /* These definitions are only meant to allow allocation of XXH state
    statically, on stack, or in a struct for example.
@@ -299,11 +297,9 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
 #    include "xxhash.c"   /* include xxhash functions as `static`, for inlining */
 #  endif
 
-#endif /* XXH_STATIC_LINKING_ONLY */
+#endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */
 
 
 #if defined (__cplusplus)
 }
 #endif
-
-#endif /* XXHASH_H_5627135585666179 */
diff --git a/contrib/libzstd/include/zstd/common/zstd_common.c b/contrib/libzstd/include/zstd/common/zstd_common.c
index 54bc91c8992..f6816723815 100644
--- a/contrib/libzstd/include/zstd/common/zstd_common.c
+++ b/contrib/libzstd/include/zstd/common/zstd_common.c
@@ -12,17 +12,19 @@
 /*-*************************************
 *  Dependencies
 ***************************************/
-#include <stdlib.h>         /* malloc */
+#include <stdlib.h>      /* malloc, calloc, free */
+#include <string.h>      /* memset */
 #include "error_private.h"
 #define ZSTD_STATIC_LINKING_ONLY
-#include "zstd.h"           /* declaration of ZSTD_isError, ZSTD_getErrorName, ZSTD_getErrorCode, ZSTD_getErrorString, ZSTD_versionNumber */
-#include "zbuff.h"          /* declaration of ZBUFF_isError, ZBUFF_getErrorName */
+#include "zstd.h"
 
 
 /*-****************************************
 *  Version
 ******************************************/
-unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; }
+unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; }
+
+const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; }
 
 
 /*-****************************************
@@ -42,42 +44,37 @@ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
 
 /*! ZSTD_getErrorString() :
 *   provides error code string from enum */
-const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorName(code); }
-
-
-/* **************************************************************
-*  ZBUFF Error Management
-****************************************************************/
-unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); }
-
-const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
-
+const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
 
 
 /*=**************************************************************
 *  Custom allocator
 ****************************************************************/
-/* default uses stdlib */
-void* ZSTD_defaultAllocFunction(void* opaque, size_t size)
-{
-    void* address = malloc(size);
-    (void)opaque;
-    return address;
-}
-
-void ZSTD_defaultFreeFunction(void* opaque, void* address)
-{
-    (void)opaque;
-    free(address);
-}
-
 void* ZSTD_malloc(size_t size, ZSTD_customMem customMem)
 {
-    return customMem.customAlloc(customMem.opaque, size);
+    if (customMem.customAlloc)
+        return customMem.customAlloc(customMem.opaque, size);
+    return malloc(size);
+}
+
+void* ZSTD_calloc(size_t size, ZSTD_customMem customMem)
+{
+    if (customMem.customAlloc) {
+        /* calloc implemented as malloc+memset;
+         * not as efficient as calloc, but next best guess for custom malloc */
+        void* const ptr = customMem.customAlloc(customMem.opaque, size);
+        memset(ptr, 0, size);
+        return ptr;
+    }
+    return calloc(1, size);
 }
 
 void ZSTD_free(void* ptr, ZSTD_customMem customMem)
 {
-    if (ptr!=NULL)
-        customMem.customFree(customMem.opaque, ptr);
+    if (ptr!=NULL) {
+        if (customMem.customFree)
+            customMem.customFree(customMem.opaque, ptr);
+        else
+            free(ptr);
+    }
 }
diff --git a/contrib/libzstd/include/zstd/common/zstd_errors.h b/contrib/libzstd/include/zstd/common/zstd_errors.h
new file mode 100644
index 00000000000..19f1597aa34
--- /dev/null
+++ b/contrib/libzstd/include/zstd/common/zstd_errors.h
@@ -0,0 +1,83 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+#ifndef ZSTD_ERRORS_H_398273423
+#define ZSTD_ERRORS_H_398273423
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*===== dependency =====*/
+#include <stddef.h>   /* size_t */
+
+
+/* =====   ZSTDERRORLIB_API : control library symbols visibility   ===== */
+#ifndef ZSTDERRORLIB_VISIBILITY
+#  if defined(__GNUC__) && (__GNUC__ >= 4)
+#    define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default")))
+#  else
+#    define ZSTDERRORLIB_VISIBILITY
+#  endif
+#endif
+#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
+#  define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY
+#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
+#  define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#  define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
+#endif
+
+/*-****************************************
+ *  error codes list
+ *  note : this API is still considered unstable
+ *         it should not be used with a dynamic library
+ *         only static linking is allowed
+ ******************************************/
+typedef enum {
+  ZSTD_error_no_error,
+  ZSTD_error_GENERIC,
+  ZSTD_error_prefix_unknown,
+  ZSTD_error_version_unsupported,
+  ZSTD_error_parameter_unknown,
+  ZSTD_error_frameParameter_unsupported,
+  ZSTD_error_frameParameter_unsupportedBy32bits,
+  ZSTD_error_frameParameter_windowTooLarge,
+  ZSTD_error_compressionParameter_unsupported,
+  ZSTD_error_compressionParameter_outOfBound,
+  ZSTD_error_init_missing,
+  ZSTD_error_memory_allocation,
+  ZSTD_error_stage_wrong,
+  ZSTD_error_dstSize_tooSmall,
+  ZSTD_error_srcSize_wrong,
+  ZSTD_error_corruption_detected,
+  ZSTD_error_checksum_wrong,
+  ZSTD_error_tableLog_tooLarge,
+  ZSTD_error_maxSymbolValue_tooLarge,
+  ZSTD_error_maxSymbolValue_tooSmall,
+  ZSTD_error_dictionary_corrupted,
+  ZSTD_error_dictionary_wrong,
+  ZSTD_error_dictionaryCreation_failed,
+  ZSTD_error_frameIndex_tooLarge,
+  ZSTD_error_seekableIO,
+  ZSTD_error_maxCode
+} ZSTD_ErrorCode;
+
+/*! ZSTD_getErrorCode() :
+    convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
+    which can be used to compare with enum list published above */
+ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
+ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_ERRORS_H_398273423 */
diff --git a/contrib/libzstd/include/zstd/common/zstd_internal.h b/contrib/libzstd/include/zstd/common/zstd_internal.h
index f40e00aabb3..f2c4e6249fb 100644
--- a/contrib/libzstd/include/zstd/common/zstd_internal.h
+++ b/contrib/libzstd/include/zstd/common/zstd_internal.h
@@ -16,9 +16,10 @@
 #ifdef _MSC_VER    /* Visual Studio */
 #  define FORCE_INLINE static __forceinline
 #  include <intrin.h>                    /* For Visual 2005 */
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
-#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
 #  pragma warning(disable : 4100)        /* disable: C4100: unreferenced formal parameter */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4204)        /* disable: C4204: non-constant aggregate initializer */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
 #else
 #  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
 #    ifdef __GNUC__
@@ -31,6 +32,16 @@
 #  endif /* __STDC_VERSION__ */
 #endif
 
+#ifdef _MSC_VER
+#  define FORCE_NOINLINE static __declspec(noinline)
+#else
+#  ifdef __GNUC__
+#    define FORCE_NOINLINE static __attribute__((__noinline__))
+#  else
+#    define FORCE_NOINLINE static
+#  endif
+#endif
+
 
 /*-*************************************
 *  Dependencies
@@ -39,11 +50,50 @@
 #include "error_private.h"
 #define ZSTD_STATIC_LINKING_ONLY
 #include "zstd.h"
+#ifndef XXH_STATIC_LINKING_ONLY
+#  define XXH_STATIC_LINKING_ONLY  /* XXH64_state_t */
+#endif
+#include "xxhash.h"                /* XXH_reset, update, digest */
+
+
+/*-*************************************
+*  Debug
+***************************************/
+#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
+#  include <assert.h>
+#else
+#  ifndef assert
+#    define assert(condition) ((void)0)
+#  endif
+#endif
+
+#define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; }
+
+#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
+#  include <stdio.h>
+/* recommended values for ZSTD_DEBUG display levels :
+ * 1 : no display, enables assert() only
+ * 2 : reserved for currently active debugging path
+ * 3 : events once per object lifetime (CCtx, CDict)
+ * 4 : events once per frame
+ * 5 : events once per block
+ * 6 : events once per sequence (*very* verbose) */
+#  define DEBUGLOG(l, ...) {                         \
+                if (l<=ZSTD_DEBUG) {                 \
+                    fprintf(stderr, __FILE__ ": ");  \
+                    fprintf(stderr, __VA_ARGS__);    \
+                    fprintf(stderr, " \n");          \
+            }   }
+#else
+#  define DEBUGLOG(l, ...)      {}    /* disabled */
+#endif
 
 
 /*-*************************************
 *  shared macros
 ***************************************/
+#undef MIN
+#undef MAX
 #define MIN(a,b) ((a)<(b) ? (a) : (b))
 #define MAX(a,b) ((a)>(b) ? (a) : (b))
 #define CHECK_F(f) { size_t const errcod = f; if (ERR_isError(errcod)) return errcod; }  /* check and Forward error code */
@@ -54,7 +104,6 @@
 *  Common constants
 ***************************************/
 #define ZSTD_OPT_NUM    (1<<12)
-#define ZSTD_DICT_MAGIC  0xEC30A437   /* v0.7+ */
 
 #define ZSTD_REP_NUM      3                 /* number of repcodes */
 #define ZSTD_REP_CHECK    (ZSTD_REP_NUM)    /* number of repcodes to check by the optimal parser */
@@ -90,7 +139,6 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
 #define LONGNBSEQ 0x7F00
 
 #define MINMATCH 3
-#define EQUAL_READ32 4
 
 #define Litbits  8
 #define MaxLit ((1<<Litbits) - 1)
@@ -137,7 +185,7 @@ static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
 /*! ZSTD_wildcopy() :
 *   custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
 #define WILDCOPY_OVERLENGTH 8
-MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, size_t length)
+MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
 {
     const BYTE* ip = (const BYTE*)src;
     BYTE* op = (BYTE*)dst;
@@ -212,6 +260,7 @@ typedef struct {
     U32  log2litSum;
     U32  log2offCodeSum;
     U32  factor;
+    U32  staticPrices;
     U32  cachedPrice;
     U32  cachedLitLength;
     const BYTE* cachedLiterals;
@@ -219,13 +268,10 @@ typedef struct {
 
 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);
 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);
-int ZSTD_isSkipFrame(ZSTD_DCtx* dctx);
 
 /* custom memory allocation functions */
-void* ZSTD_defaultAllocFunction(void* opaque, size_t size);
-void ZSTD_defaultFreeFunction(void* opaque, void* address);
-static const ZSTD_customMem defaultCustomMem = { ZSTD_defaultAllocFunction, ZSTD_defaultFreeFunction, NULL };
 void* ZSTD_malloc(size_t size, ZSTD_customMem customMem);
+void* ZSTD_calloc(size_t size, ZSTD_customMem customMem);
 void ZSTD_free(void* ptr, ZSTD_customMem customMem);
 
 
@@ -254,4 +300,35 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val)
 }
 
 
+/* hidden functions */
+
+/* ZSTD_invalidateRepCodes() :
+ * ensures next compression will not use repcodes from previous block.
+ * Note : only works with regular variant;
+ *        do not use with extDict variant ! */
+void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx);
+
+
+/*! ZSTD_initCStream_internal() :
+ *  Private use only. Init streaming operation.
+ *  expects params to be valid.
+ *  must receive dict, or cdict, or none, but not both.
+ *  @return : 0, or an error code */
+size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
+                     const void* dict, size_t dictSize,
+                     const ZSTD_CDict* cdict,
+                     ZSTD_parameters params, unsigned long long pledgedSrcSize);
+
+/*! ZSTD_compressStream_generic() :
+ *  Private use only. To be called from zstdmt_compress.c in single-thread mode. */
+size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
+                                   ZSTD_outBuffer* output,
+                                   ZSTD_inBuffer* input,
+                                   ZSTD_EndDirective const flushMode);
+
+/*! ZSTD_getParamsFromCDict() :
+ *  as the name implies */
+ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict* cdict);
+
+
 #endif   /* ZSTD_CCOMMON_H_MODULE */
diff --git a/contrib/libzstd/include/zstd/compress/fse_compress.c b/contrib/libzstd/include/zstd/compress/fse_compress.c
index 679dbdb83be..26e8052ddcc 100644
--- a/contrib/libzstd/include/zstd/compress/fse_compress.c
+++ b/contrib/libzstd/include/zstd/compress/fse_compress.c
@@ -70,12 +70,6 @@
 #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
 
 
-/* **************************************************************
-*  Complex types
-****************************************************************/
-typedef U32 CTable_max_t[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
-
-
 /* **************************************************************
 *  Templates
 ****************************************************************/
@@ -100,7 +94,13 @@ typedef U32 CTable_max_t[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VA
 
 
 /* Function templates */
-size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+
+/* FSE_buildCTable_wksp() :
+ * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
+ * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
+ * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
+ */
+size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
 {
     U32 const tableSize = 1 << tableLog;
     U32 const tableMask = tableSize - 1;
@@ -111,10 +111,11 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
     U32 const step = FSE_TABLESTEP(tableSize);
     U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
 
-    FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
+    FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace;
     U32 highThreshold = tableSize-1;
 
     /* CTable header */
+    if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
     tableU16[-2] = (U16) tableLog;
     tableU16[-1] = (U16) maxSymbolValue;
 
@@ -181,6 +182,13 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
 }
 
 
+size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE];   /* memset() is not necessary, even if static analyzer complain about it */
+    return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
+}
+
+
 
 #ifndef FSE_COMMONDEFS_ONLY
 
@@ -189,12 +197,10 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
 ****************************************************************/
 size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
 {
-    size_t maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
+    size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
     return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND;  /* maxSymbolValue==0 ? use default */
 }
 
-static short FSE_abs(short a) { return (short)(a<0 ? -a : a); }
-
 static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
                                        const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
                                        unsigned writeIsSafe)
@@ -250,16 +256,16 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
                 bitStream >>= 16;
                 bitCount -= 16;
         }   }
-        {   short count = normalizedCounter[charnum++];
-            const short max = (short)((2*threshold-1)-remaining);
-            remaining -= FSE_abs(count);
-            if (remaining<1) return ERROR(GENERIC);
+        {   int count = normalizedCounter[charnum++];
+            int const max = (2*threshold-1)-remaining;
+            remaining -= count < 0 ? -count : count;
             count++;   /* +1 for extra accuracy */
             if (count>=threshold) count += max;   /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
             bitStream += count << bitCount;
             bitCount  += nbBits;
             bitCount  -= (count<max);
             previous0  = (count==1);
+            if (remaining<1) return ERROR(GENERIC);
             while (remaining<threshold) nbBits--, threshold>>=1;
         }
         if (bitCount>16) {
@@ -285,7 +291,7 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
 
 size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
 {
-    if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC);   /* Unsupported */
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported */
     if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported */
 
     if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
@@ -300,21 +306,20 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalized
 *  Counting histogram
 ****************************************************************/
 /*! FSE_count_simple
-    This function just counts byte values within `src`,
-    and store the histogram into table `count`.
-    This function is unsafe : it doesn't check that all values within `src` can fit into `count`.
+    This function counts byte values within `src`, and store the histogram into table `count`.
+    It doesn't use any additional memory.
+    But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
     For this reason, prefer using a table `count` with 256 elements.
     @return : count of most numerous element
 */
-static size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
-                               const void* src, size_t srcSize)
+size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
+                        const void* src, size_t srcSize)
 {
     const BYTE* ip = (const BYTE*)src;
     const BYTE* const end = ip + srcSize;
     unsigned maxSymbolValue = *maxSymbolValuePtr;
     unsigned max=0;
 
-
     memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
     if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
 
@@ -329,20 +334,24 @@ static size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
 }
 
 
-static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr,
+/* FSE_count_parallel_wksp() :
+ * Same as FSE_count_parallel(), but using an externally provided scratch buffer.
+ * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */
+static size_t FSE_count_parallel_wksp(
+                                unsigned* count, unsigned* maxSymbolValuePtr,
                                 const void* source, size_t sourceSize,
-                                unsigned checkMax)
+                                unsigned checkMax, unsigned* const workSpace)
 {
     const BYTE* ip = (const BYTE*)source;
     const BYTE* const iend = ip+sourceSize;
     unsigned maxSymbolValue = *maxSymbolValuePtr;
     unsigned max=0;
+    U32* const Counting1 = workSpace;
+    U32* const Counting2 = Counting1 + 256;
+    U32* const Counting3 = Counting2 + 256;
+    U32* const Counting4 = Counting3 + 256;
 
-
-    U32 Counting1[256] = { 0 };
-    U32 Counting2[256] = { 0 };
-    U32 Counting3[256] = { 0 };
-    U32 Counting4[256] = { 0 };
+    memset(Counting1, 0, 4*256*sizeof(unsigned));
 
     /* safety checks */
     if (!sourceSize) {
@@ -388,31 +397,51 @@ static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr,
             if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
     }   }
 
-    { U32 s; for (s=0; s<=maxSymbolValue; s++) {
-        count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
-        if (count[s] > max) max = count[s];
-    }}
+    {   U32 s; for (s=0; s<=maxSymbolValue; s++) {
+            count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
+            if (count[s] > max) max = count[s];
+    }   }
 
     while (!count[maxSymbolValue]) maxSymbolValue--;
     *maxSymbolValuePtr = maxSymbolValue;
     return (size_t)max;
 }
 
+/* FSE_countFast_wksp() :
+ * Same as FSE_countFast(), but using an externally provided scratch buffer.
+ * `workSpace` size must be table of >= `1024` unsigned */
+size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                     const void* source, size_t sourceSize, unsigned* workSpace)
+{
+    if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
+    return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
+}
+
 /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
 size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
                      const void* source, size_t sourceSize)
 {
-    if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
-    return FSE_count_parallel(count, maxSymbolValuePtr, source, sourceSize, 0);
+    unsigned tmpCounters[1024];
+    return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters);
+}
+
+/* FSE_count_wksp() :
+ * Same as FSE_count(), but using an externally provided scratch buffer.
+ * `workSpace` size must be table of >= `1024` unsigned */
+size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                 const void* source, size_t sourceSize, unsigned* workSpace)
+{
+    if (*maxSymbolValuePtr < 255)
+        return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
+    *maxSymbolValuePtr = 255;
+    return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
 }
 
 size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
-                 const void* source, size_t sourceSize)
+                 const void* src, size_t srcSize)
 {
-    if (*maxSymbolValuePtr <255)
-        return FSE_count_parallel(count, maxSymbolValuePtr, source, sourceSize, 1);
-    *maxSymbolValuePtr = 255;
-    return FSE_countFast(count, maxSymbolValuePtr, source, sourceSize);
+    unsigned tmpCounters[1024];
+    return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters);
 }
 
 
@@ -428,14 +457,10 @@ size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
     `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];`  // This size is variable
 Allocation is manual (C standard does not support variable-size structures).
 */
-
 size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
 {
-    size_t size;
-    FSE_STATIC_ASSERT((size_t)FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)*4 >= sizeof(CTable_max_t));   /* A compilation error here means FSE_CTABLE_SIZE_U32 is not large enough */
-    if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC);
-    size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
-    return size;
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    return FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
 }
 
 FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
@@ -451,20 +476,20 @@ void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
 /* provides the minimum logSize to safely represent a distribution */
 static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
 {
-	U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
-	U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
-	U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
-	return minBits;
+    U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
+    U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
+    U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
+    return minBits;
 }
 
 unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
 {
-	U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
+    U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
     U32 tableLog = maxTableLog;
-	U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
+    U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
     if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
-	if (maxBitsSrc < tableLog) tableLog = maxBitsSrc;   /* Accuracy can be reduced */
-	if (minBits > tableLog) tableLog = minBits;   /* Need a minimum to safely represent all symbol values */
+    if (maxBitsSrc < tableLog) tableLog = maxBitsSrc;   /* Accuracy can be reduced */
+    if (minBits > tableLog) tableLog = minBits;   /* Need a minimum to safely represent all symbol values */
     if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
     if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
     return tableLog;
@@ -481,12 +506,13 @@ unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
 
 static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
 {
+    short const NOT_YET_ASSIGNED = -2;
     U32 s;
     U32 distributed = 0;
     U32 ToDistribute;
 
     /* Init */
-    U32 lowThreshold = (U32)(total >> tableLog);
+    U32 const lowThreshold = (U32)(total >> tableLog);
     U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
 
     for (s=0; s<=maxSymbolValue; s++) {
@@ -506,7 +532,8 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
             total -= count[s];
             continue;
         }
-        norm[s]=-2;
+
+        norm[s]=NOT_YET_ASSIGNED;
     }
     ToDistribute = (1 << tableLog) - distributed;
 
@@ -514,7 +541,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
         /* risk of rounding to zero */
         lowOne = (U32)((total * 3) / (ToDistribute * 2));
         for (s=0; s<=maxSymbolValue; s++) {
-            if ((norm[s] == -2) && (count[s] <= lowOne)) {
+            if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
                 norm[s] = 1;
                 distributed++;
                 total -= count[s];
@@ -534,17 +561,23 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
         return 0;
     }
 
-    {
-        U64 const vStepLog = 62 - tableLog;
+    if (total == 0) {
+        /* all of the symbols were low enough for the lowOne or lowThreshold */
+        for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
+            if (norm[s] > 0) ToDistribute--, norm[s]++;
+        return 0;
+    }
+
+    {   U64 const vStepLog = 62 - tableLog;
         U64 const mid = (1ULL << (vStepLog-1)) - 1;
         U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total;   /* scale on remaining */
         U64 tmpTotal = mid;
         for (s=0; s<=maxSymbolValue; s++) {
-            if (norm[s]==-2) {
-                U64 end = tmpTotal + (count[s] * rStep);
-                U32 sStart = (U32)(tmpTotal >> vStepLog);
-                U32 sEnd = (U32)(end >> vStepLog);
-                U32 weight = sEnd - sStart;
+            if (norm[s]==NOT_YET_ASSIGNED) {
+                U64 const end = tmpTotal + (count[s] * rStep);
+                U32 const sStart = (U32)(tmpTotal >> vStepLog);
+                U32 const sEnd = (U32)(end >> vStepLog);
+                U32 const weight = sEnd - sStart;
                 if (weight < 1)
                     return ERROR(GENERIC);
                 norm[s] = (short)weight;
@@ -566,7 +599,6 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
     if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC);   /* Too small tableLog, compression potentially impossible */
 
     {   U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
-
         U64 const scale = 62 - tableLog;
         U64 const step = ((U64)1<<62) / total;   /* <== here, one division ! */
         U64 const vStep = 1ULL<<(scale-20);
@@ -594,7 +626,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
         }   }
         if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
             /* corner case, need another normalization method */
-            size_t errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
+            size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
             if (FSE_isError(errorCode)) return errorCode;
         }
         else normalizedCounter[largest] += (short)stillToDistribute;
@@ -643,17 +675,15 @@ size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
 
     /* Build Symbol Transformation Table */
     {   const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
-
         for (s=0; s<=maxSymbolValue; s++) {
             symbolTT[s].deltaNbBits = deltaNbBits;
             symbolTT[s].deltaFindState = s-1;
     }   }
 
-
     return 0;
 }
 
-/* fake FSE_CTable, for rle (100% always same symbol) input */
+/* fake FSE_CTable, for rle input (always same symbol) */
 size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
 {
     void* ptr = ct;
@@ -685,14 +715,13 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
     const BYTE* const iend = istart + srcSize;
     const BYTE* ip=iend;
 
-
     BIT_CStream_t bitC;
     FSE_CState_t CState1, CState2;
 
     /* init */
     if (srcSize <= 2) return 0;
-    { size_t const errorCode = BIT_initCStream(&bitC, dst, dstSize);
-      if (FSE_isError(errorCode)) return 0; }
+    { size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
+      if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ }
 
 #define FSE_FLUSHBITS(s)  (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
 
@@ -715,7 +744,7 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
     }
 
     /* 2 or 4 encoding per loop */
-    for ( ; ip>istart ; ) {
+    while ( ip>istart ) {
 
         FSE_encodeSymbol(&bitC, &CState2, *--ip);
 
@@ -741,7 +770,7 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
                            const void* src, size_t srcSize,
                            const FSE_CTable* ct)
 {
-    const unsigned fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
+    unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
 
     if (fast)
         return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
@@ -752,58 +781,76 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
 
 size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
 
-size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
-{
-    const BYTE* const istart = (const BYTE*) src;
-    const BYTE* ip = istart;
+#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return f
+#define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
 
+/* FSE_compress_wksp() :
+ * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
+ * `wkspSize` size must be `(1<<tableLog)`.
+ */
+size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
+{
     BYTE* const ostart = (BYTE*) dst;
     BYTE* op = ostart;
     BYTE* const oend = ostart + dstSize;
 
     U32   count[FSE_MAX_SYMBOL_VALUE+1];
     S16   norm[FSE_MAX_SYMBOL_VALUE+1];
-    CTable_max_t ct;
-    size_t errorCode;
+    FSE_CTable* CTable = (FSE_CTable*)workSpace;
+    size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
+    void* scratchBuffer = (void*)(CTable + CTableSize);
+    size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
 
     /* init conditions */
-    if (srcSize <= 1) return 0;  /* Uncompressible */
+    if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
+    if (srcSize <= 1) return 0;  /* Not compressible */
     if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
     if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
 
     /* Scan input and build symbol stats */
-    errorCode = FSE_count (count, &maxSymbolValue, ip, srcSize);
-    if (FSE_isError(errorCode)) return errorCode;
-    if (errorCode == srcSize) return 1;
-    if (errorCode == 1) return 0;   /* each symbol only present once */
-    if (errorCode < (srcSize >> 7)) return 0;   /* Heuristic : not compressible enough */
+    {   CHECK_V_F(maxCount, FSE_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) );
+        if (maxCount == srcSize) return 1;   /* only a single symbol in src : rle */
+        if (maxCount == 1) return 0;         /* each symbol present maximum once => not compressible */
+        if (maxCount < (srcSize >> 7)) return 0;   /* Heuristic : not compressible enough */
+    }
 
     tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
-    errorCode = FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue);
-    if (FSE_isError(errorCode)) return errorCode;
+    CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) );
 
     /* Write table description header */
-    errorCode = FSE_writeNCount (op, oend-op, norm, maxSymbolValue, tableLog);
-    if (FSE_isError(errorCode)) return errorCode;
-    op += errorCode;
+    {   CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
+        op += nc_err;
+    }
 
     /* Compress */
-    errorCode = FSE_buildCTable (ct, norm, maxSymbolValue, tableLog);
-    if (FSE_isError(errorCode)) return errorCode;
-    errorCode = FSE_compress_usingCTable(op, oend - op, ip, srcSize, ct);
-    if (errorCode == 0) return 0;   /* not enough space for compressed data */
-    op += errorCode;
+    CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) );
+    {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) );
+        if (cSize == 0) return 0;   /* not enough space for compressed data */
+        op += cSize;
+    }
 
     /* check compressibility */
-    if ( (size_t)(op-ostart) >= srcSize-1 )
-        return 0;
+    if ( (size_t)(op-ostart) >= srcSize-1 ) return 0;
 
     return op-ostart;
 }
 
-size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize)
+typedef struct {
+    FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
+    BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
+} fseWkspMax_t;
+
+size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
 {
-    return FSE_compress2(dst, dstSize, src, (U32)srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
+    fseWkspMax_t scratchBuffer;
+    FSE_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE));   /* compilation failures here means scratchBuffer is not large enough */
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
+}
+
+size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
 }
 
 
diff --git a/contrib/libzstd/include/zstd/compress/huf_compress.c b/contrib/libzstd/include/zstd/compress/huf_compress.c
index b7d3d77a241..7af0789a9c5 100644
--- a/contrib/libzstd/include/zstd/compress/huf_compress.c
+++ b/contrib/libzstd/include/zstd/compress/huf_compress.c
@@ -56,6 +56,8 @@
 *  Error Management
 ****************************************************************/
 #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return f
+#define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
 
 
 /* **************************************************************
@@ -70,31 +72,73 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
 /* *******************************************************
 *  HUF : Huffman block compression
 *********************************************************/
+/* HUF_compressWeights() :
+ * Same as FSE_compress(), but dedicated to huff0's weights compression.
+ * The use case needs much less stack memory.
+ * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
+ */
+#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
+size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + dstSize;
+
+    U32 maxSymbolValue = HUF_TABLELOG_MAX;
+    U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
+
+    FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
+    BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
+
+    U32 count[HUF_TABLELOG_MAX+1];
+    S16 norm[HUF_TABLELOG_MAX+1];
+
+    /* init conditions */
+    if (wtSize <= 1) return 0;  /* Not compressible */
+
+    /* Scan input and build symbol stats */
+    {   CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize) );
+        if (maxCount == wtSize) return 1;   /* only a single symbol in src : rle */
+        if (maxCount == 1) return 0;         /* each symbol present maximum once => not compressible */
+    }
+
+    tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
+    CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
+
+    /* Write table description header */
+    {   CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
+        op += hSize;
+    }
+
+    /* Compress */
+    CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
+    {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) );
+        if (cSize == 0) return 0;   /* not enough space for compressed data */
+        op += cSize;
+    }
+
+    return op-ostart;
+}
+
+
 struct HUF_CElt_s {
   U16  val;
   BYTE nbBits;
 };   /* typedef'd to HUF_CElt within "huf.h" */
 
-typedef struct nodeElt_s {
-    U32 count;
-    U16 parent;
-    BYTE byte;
-    BYTE nbBits;
-} nodeElt;
-
 /*! HUF_writeCTable() :
-    `CTable` : huffman tree to save, using huf representation.
+    `CTable` : Huffman tree to save, using huf representation.
     @return : size of saved CTable */
 size_t HUF_writeCTable (void* dst, size_t maxDstSize,
                         const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog)
 {
-    BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];
+    BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];   /* precomputed conversion table */
     BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
     BYTE* op = (BYTE*)dst;
     U32 n;
 
      /* check conditions */
-    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC);
+    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
 
     /* convert to weight */
     bitsToWeight[0] = 0;
@@ -103,38 +147,33 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
     for (n=0; n<maxSymbolValue; n++)
         huffWeight[n] = bitsToWeight[CTable[n].nbBits];
 
-    {   size_t const size = FSE_compress(op+1, maxDstSize-1, huffWeight, maxSymbolValue);
-        if (FSE_isError(size)) return size;
-        if ((size>1) & (size < maxSymbolValue/2)) {   /* FSE compressed */
-            op[0] = (BYTE)size;
-            return size+1;
-        }
-    }
+    /* attempt weights compression by FSE */
+    {   CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
+        if ((hSize>1) & (hSize < maxSymbolValue/2)) {   /* FSE compressed */
+            op[0] = (BYTE)hSize;
+            return hSize+1;
+    }   }
 
-    /* raw values */
-    if (maxSymbolValue > (256-128)) return ERROR(GENERIC);   /* should not happen */
+    /* write raw values as 4-bits (max : 15) */
+    if (maxSymbolValue > (256-128)) return ERROR(GENERIC);   /* should not happen : likely means source cannot be compressed */
     if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
     op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
-    huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause issue in final combination */
+    huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause msan issue in final combination */
     for (n=0; n<maxSymbolValue; n+=2)
         op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
     return ((maxSymbolValue+1)/2) + 1;
-
 }
 
 
 size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, size_t srcSize)
 {
-    BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
+    BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];   /* init not required, even though some static analyzer may complain */
     U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
     U32 tableLog = 0;
-    size_t readSize;
     U32 nbSymbols = 0;
-    /*memset(huffWeight, 0, sizeof(huffWeight));*/   /* is not necessary, even though some analyzer complain ... */
 
     /* get symbol weights */
-    readSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize);
-    if (HUF_isError(readSize)) return readSize;
+    CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
 
     /* check result */
     if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
@@ -155,13 +194,14 @@ size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, si
     }   }
 
     /* fill val */
-    {   U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
-        U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
+    {   U16 nbPerRank[HUF_TABLELOG_MAX+2]  = {0};  /* support w=0=>n=tableLog+1 */
+        U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
         { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
         /* determine stating value per rank */
+        valPerRank[tableLog+1] = 0;   /* for w==0 */
         {   U16 min = 0;
-            U32 n; for (n=HUF_TABLELOG_MAX; n>0; n--) {
-                valPerRank[n] = min;      /* get starting value within each rank */
+            U32 n; for (n=tableLog; n>0; n--) {  /* start at n=tablelog <-> w=1 */
+                valPerRank[n] = min;     /* get starting value within each rank */
                 min += nbPerRank[n];
                 min >>= 1;
         }   }
@@ -173,6 +213,13 @@ size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, si
 }
 
 
+typedef struct nodeElt_s {
+    U32 count;
+    U16 parent;
+    BYTE byte;
+    BYTE nbBits;
+} nodeElt;
+
 static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
 {
     const U32 largestBits = huffNode[lastNonNull].nbBits;
@@ -219,7 +266,8 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
                         if (highTotal <= lowTotal) break;
                 }   }
                 /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
-                while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))  /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
+                /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
+                while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
                     nBitsToDecrease ++;
                 totalCost -= 1 << (nBitsToDecrease-1);
                 if (rankLast[nBitsToDecrease-1] == noSymbol)
@@ -278,20 +326,26 @@ static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
 }
 
 
+/** HUF_buildCTable_wksp() :
+ *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
+ *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
+ */
 #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
-size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
+typedef nodeElt huffNodeTable[2*HUF_SYMBOLVALUE_MAX+1 +1];
+size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
 {
-    nodeElt huffNode0[2*HUF_SYMBOLVALUE_MAX+1 +1];
-    nodeElt* huffNode = huffNode0 + 1;
+    nodeElt* const huffNode0 = (nodeElt*)workSpace;
+    nodeElt* const huffNode = huffNode0+1;
     U32 n, nonNullRank;
     int lowS, lowN;
     U16 nodeNb = STARTNODE;
     U32 nodeRoot;
 
     /* safety checks */
+    if (wkspSize < sizeof(huffNodeTable)) return ERROR(GENERIC);   /* workSpace is not large enough */
     if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC);
-    memset(huffNode0, 0, sizeof(huffNode0));
+    memset(huffNode0, 0, sizeof(huffNodeTable));
 
     /* sort, decreasing order */
     HUF_sort(huffNode, count, maxSymbolValue);
@@ -304,7 +358,7 @@ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U3
     huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
     nodeNb++; lowS-=2;
     for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
-    huffNode0[0].count = (U32)(1U<<31);
+    huffNode0[0].count = (U32)(1U<<31);  /* fake entry, strong barrier */
 
     /* create parents */
     while (nodeNb <= nodeRoot) {
@@ -347,6 +401,34 @@ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U3
     return maxNbBits;
 }
 
+/** HUF_buildCTable() :
+ *  Note : count is used before tree is written, so they can safely overlap
+ */
+size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
+{
+    huffNodeTable nodeTable;
+    return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
+}
+
+static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
+{
+    size_t nbBits = 0;
+    int s;
+    for (s = 0; s <= (int)maxSymbolValue; ++s) {
+        nbBits += CTable[s].nbBits * count[s];
+    }
+    return nbBits >> 3;
+}
+
+static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
+  int bad = 0;
+  int s;
+  for (s = 0; s <= (int)maxSymbolValue; ++s) {
+    bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
+  }
+  return !bad;
+}
+
 static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
 {
     BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
@@ -374,20 +456,23 @@ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, si
 
     /* init */
     if (dstSize < 8) return 0;   /* not enough space to compress */
-    { size_t const errorCode = BIT_initCStream(&bitC, op, oend-op);
-      if (HUF_isError(errorCode)) return 0; }
+    { size_t const initErr = BIT_initCStream(&bitC, op, oend-op);
+      if (HUF_isError(initErr)) return 0; }
 
     n = srcSize & ~3;  /* join to mod 4 */
     switch (srcSize & 3)
     {
         case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
                  HUF_FLUSHBITS_2(&bitC);
+		 /* fall-through */
         case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
                  HUF_FLUSHBITS_1(&bitC);
+		 /* fall-through */
         case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
                  HUF_FLUSHBITS(&bitC);
-        case 0 :
-        default: ;
+		 /* fall-through */
+        case 0 : /* fall-through */
+        default: break;
     }
 
     for (; n>0; n-=4) {  /* note : n&3==0 at this stage */
@@ -418,32 +503,28 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si
     if (srcSize < 12) return 0;   /* no saving possible : too small input */
     op += 6;   /* jumpTable */
 
-    {   size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable);
-        if (HUF_isError(cSize)) return cSize;
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) );
         if (cSize==0) return 0;
         MEM_writeLE16(ostart, (U16)cSize);
         op += cSize;
     }
 
     ip += segmentSize;
-    {   size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable);
-        if (HUF_isError(cSize)) return cSize;
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) );
         if (cSize==0) return 0;
         MEM_writeLE16(ostart+2, (U16)cSize);
         op += cSize;
     }
 
     ip += segmentSize;
-    {   size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable);
-        if (HUF_isError(cSize)) return cSize;
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) );
         if (cSize==0) return 0;
         MEM_writeLE16(ostart+4, (U16)cSize);
         op += cSize;
     }
 
     ip += segmentSize;
-    {   size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable);
-        if (HUF_isError(cSize)) return cSize;
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable) );
         if (cSize==0) return 0;
         op += cSize;
     }
@@ -452,20 +533,43 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si
 }
 
 
+static size_t HUF_compressCTable_internal(
+                BYTE* const ostart, BYTE* op, BYTE* const oend,
+                const void* src, size_t srcSize,
+                unsigned singleStream, const HUF_CElt* CTable)
+{
+    size_t const cSize = singleStream ?
+                         HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) :
+                         HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable);
+    if (HUF_isError(cSize)) { return cSize; }
+    if (cSize==0) { return 0; }   /* uncompressible */
+    op += cSize;
+    /* check compressibility */
+    if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
+    return op-ostart;
+}
+
+
+/* `workSpace` must a table of at least 1024 unsigned */
 static size_t HUF_compress_internal (
                 void* dst, size_t dstSize,
                 const void* src, size_t srcSize,
                 unsigned maxSymbolValue, unsigned huffLog,
-                unsigned singleStream)
+                unsigned singleStream,
+                void* workSpace, size_t wkspSize,
+                HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat)
 {
     BYTE* const ostart = (BYTE*)dst;
     BYTE* const oend = ostart + dstSize;
     BYTE* op = ostart;
 
-    U32 count[HUF_SYMBOLVALUE_MAX+1];
-    HUF_CElt CTable[HUF_SYMBOLVALUE_MAX+1];
+    U32* count;
+    size_t const countSize = sizeof(U32) * (HUF_SYMBOLVALUE_MAX + 1);
+    HUF_CElt* CTable;
+    size_t const CTableSize = sizeof(HUF_CElt) * (HUF_SYMBOLVALUE_MAX + 1);
 
     /* checks & inits */
+    if (wkspSize < sizeof(huffNodeTable) + countSize + CTableSize) return ERROR(GENERIC);
     if (!srcSize) return 0;  /* Uncompressed (note : 1 means rle, so first byte must be correct) */
     if (!dstSize) return 0;  /* cannot fit within dst budget */
     if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);   /* current block size limit */
@@ -473,59 +577,111 @@ static size_t HUF_compress_internal (
     if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
     if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
 
+    count = (U32*)workSpace;
+    workSpace = (BYTE*)workSpace + countSize;
+    wkspSize -= countSize;
+    CTable = (HUF_CElt*)workSpace;
+    workSpace = (BYTE*)workSpace + CTableSize;
+    wkspSize -= CTableSize;
+
+    /* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */
+    if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
+        return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
+    }
+
     /* Scan input and build symbol stats */
-    {   size_t const largest = FSE_count (count, &maxSymbolValue, (const BYTE*)src, srcSize);
-        if (HUF_isError(largest)) return largest;
+    {   CHECK_V_F(largest, FSE_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, (U32*)workSpace) );
         if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }   /* single symbol, rle */
         if (largest <= (srcSize >> 7)+1) return 0;   /* Fast heuristic : not compressible enough */
     }
 
+    /* Check validity of previous table */
+    if (repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, count, maxSymbolValue)) {
+        *repeat = HUF_repeat_none;
+    }
+    /* Heuristic : use existing table for small inputs */
+    if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
+        return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
+    }
+
     /* Build Huffman Tree */
     huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
-    {   size_t const maxBits = HUF_buildCTable (CTable, count, maxSymbolValue, huffLog);
-        if (HUF_isError(maxBits)) return maxBits;
+    {   CHECK_V_F(maxBits, HUF_buildCTable_wksp (CTable, count, maxSymbolValue, huffLog, workSpace, wkspSize) );
         huffLog = (U32)maxBits;
+        /* Zero the unused symbols so we can check it for validity */
+        memset(CTable + maxSymbolValue + 1, 0, CTableSize - (maxSymbolValue + 1) * sizeof(HUF_CElt));
     }
 
     /* Write table description header */
-    {   size_t const hSize = HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog);
-        if (HUF_isError(hSize)) return hSize;
-        if (hSize + 12 >= srcSize) return 0;   /* not useful to try compression */
+    {   CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog) );
+        /* Check if using the previous table will be beneficial */
+        if (repeat && *repeat != HUF_repeat_none) {
+            size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, count, maxSymbolValue);
+            size_t const newSize = HUF_estimateCompressedSize(CTable, count, maxSymbolValue);
+            if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
+                return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
+            }
+        }
+        /* Use the new table */
+        if (hSize + 12ul >= srcSize) { return 0; }
         op += hSize;
+        if (repeat) { *repeat = HUF_repeat_none; }
+        if (oldHufTable) { memcpy(oldHufTable, CTable, CTableSize); } /* Save the new table */
     }
-
-    /* Compress */
-    {   size_t const cSize = (singleStream) ?
-                            HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) :   /* single segment */
-                            HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable);
-        if (HUF_isError(cSize)) return cSize;
-        if (cSize==0) return 0;   /* uncompressible */
-        op += cSize;
-    }
-
-    /* check compressibility */
-    if ((size_t)(op-ostart) >= srcSize-1)
-        return 0;
-
-    return op-ostart;
+    return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, CTable);
 }
 
 
+size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0);
+}
+
+size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize,
+                      HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
+}
+
 size_t HUF_compress1X (void* dst, size_t dstSize,
                  const void* src, size_t srcSize,
                  unsigned maxSymbolValue, unsigned huffLog)
 {
-    return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1);
+    unsigned workSpace[1024];
+    return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
+}
+
+size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0);
+}
+
+size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize,
+                      HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
 }
 
 size_t HUF_compress2 (void* dst, size_t dstSize,
                 const void* src, size_t srcSize,
                 unsigned maxSymbolValue, unsigned huffLog)
 {
-    return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0);
+    unsigned workSpace[1024];
+    return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
 }
 
-
 size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
 {
     return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_TABLELOG_DEFAULT);
diff --git a/contrib/libzstd/include/zstd/compress/zbuff_compress.c b/contrib/libzstd/include/zstd/compress/zbuff_compress.c
deleted file mode 100644
index 5095b43e644..00000000000
--- a/contrib/libzstd/include/zstd/compress/zbuff_compress.c
+++ /dev/null
@@ -1,319 +0,0 @@
-/**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
- */
-
-
-
-/* *************************************
-*  Dependencies
-***************************************/
-#include <stdlib.h>
-#include "error_private.h"
-#include "zstd_internal.h"  /* MIN, ZSTD_BLOCKHEADERSIZE, defaultCustomMem */
-#define ZBUFF_STATIC_LINKING_ONLY
-#include "zbuff.h"
-
-
-/* *************************************
-*  Constants
-***************************************/
-static size_t const ZBUFF_endFrameSize = ZSTD_BLOCKHEADERSIZE;
-
-
-/*-***********************************************************
-*  Streaming compression
-*
-*  A ZBUFF_CCtx object is required to track streaming operation.
-*  Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources.
-*  Use ZBUFF_compressInit() to start a new compression operation.
-*  ZBUFF_CCtx objects can be reused multiple times.
-*
-*  Use ZBUFF_compressContinue() repetitively to consume your input.
-*  *srcSizePtr and *dstCapacityPtr can be any size.
-*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
-*  Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input.
-*  The content of dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change dst .
-*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
-*            or an error code, which can be tested using ZBUFF_isError().
-*
-*  ZBUFF_compressFlush() can be used to instruct ZBUFF to compress and output whatever remains within its buffer.
-*  Note that it will not output more than *dstCapacityPtr.
-*  Therefore, some content might still be left into its internal buffer if dst buffer is too small.
-*  @return : nb of bytes still present into internal buffer (0 if it's empty)
-*            or an error code, which can be tested using ZBUFF_isError().
-*
-*  ZBUFF_compressEnd() instructs to finish a frame.
-*  It will perform a flush and write frame epilogue.
-*  Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *dstCapacityPtr is too small.
-*  @return : nb of bytes still present into internal buffer (0 if it's empty)
-*            or an error code, which can be tested using ZBUFF_isError().
-*
-*  Hint : recommended buffer sizes (not compulsory)
-*  input : ZSTD_BLOCKSIZE_MAX (128 KB), internal unit size, it improves latency to use this value.
-*  output : ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + ZBUFF_endFrameSize : ensures it's always possible to write/flush/end a full block at best speed.
-* ***********************************************************/
-
-typedef enum { ZBUFFcs_init, ZBUFFcs_load, ZBUFFcs_flush, ZBUFFcs_final } ZBUFF_cStage;
-
-/* *** Resources *** */
-struct ZBUFF_CCtx_s {
-    ZSTD_CCtx* zc;
-    char*  inBuff;
-    size_t inBuffSize;
-    size_t inToCompress;
-    size_t inBuffPos;
-    size_t inBuffTarget;
-    size_t blockSize;
-    char*  outBuff;
-    size_t outBuffSize;
-    size_t outBuffContentSize;
-    size_t outBuffFlushedSize;
-    ZBUFF_cStage stage;
-    U32    checksum;
-    U32    frameEnded;
-    ZSTD_customMem customMem;
-};   /* typedef'd tp ZBUFF_CCtx within "zbuff.h" */
-
-ZBUFF_CCtx* ZBUFF_createCCtx(void)
-{
-    return ZBUFF_createCCtx_advanced(defaultCustomMem);
-}
-
-ZBUFF_CCtx* ZBUFF_createCCtx_advanced(ZSTD_customMem customMem)
-{
-    ZBUFF_CCtx* zbc;
-
-    if (!customMem.customAlloc && !customMem.customFree)
-        customMem = defaultCustomMem;
-
-    if (!customMem.customAlloc || !customMem.customFree)
-        return NULL;
-
-    zbc = (ZBUFF_CCtx*)customMem.customAlloc(customMem.opaque, sizeof(ZBUFF_CCtx));
-    if (zbc==NULL) return NULL;
-    memset(zbc, 0, sizeof(ZBUFF_CCtx));
-    memcpy(&zbc->customMem, &customMem, sizeof(ZSTD_customMem));
-    zbc->zc = ZSTD_createCCtx_advanced(customMem);
-    if (zbc->zc == NULL) { ZBUFF_freeCCtx(zbc); return NULL; }
-    return zbc;
-}
-
-size_t ZBUFF_freeCCtx(ZBUFF_CCtx* zbc)
-{
-    if (zbc==NULL) return 0;   /* support free on NULL */
-    ZSTD_freeCCtx(zbc->zc);
-    if (zbc->inBuff) zbc->customMem.customFree(zbc->customMem.opaque, zbc->inBuff);
-    if (zbc->outBuff) zbc->customMem.customFree(zbc->customMem.opaque, zbc->outBuff);
-    zbc->customMem.customFree(zbc->customMem.opaque, zbc);
-    return 0;
-}
-
-
-/* ======   Initialization   ====== */
-
-size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
-                                   const void* dict, size_t dictSize,
-                                   ZSTD_parameters params, unsigned long long pledgedSrcSize)
-{
-    /* allocate buffers */
-    {   size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog;
-        if (zbc->inBuffSize < neededInBuffSize) {
-            zbc->inBuffSize = neededInBuffSize;
-            zbc->customMem.customFree(zbc->customMem.opaque, zbc->inBuff);   /* should not be necessary */
-            zbc->inBuff = (char*)zbc->customMem.customAlloc(zbc->customMem.opaque, neededInBuffSize);
-            if (zbc->inBuff == NULL) return ERROR(memory_allocation);
-        }
-        zbc->blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, neededInBuffSize);
-    }
-    if (zbc->outBuffSize < ZSTD_compressBound(zbc->blockSize)+1) {
-        zbc->outBuffSize = ZSTD_compressBound(zbc->blockSize)+1;
-        zbc->customMem.customFree(zbc->customMem.opaque, zbc->outBuff);   /* should not be necessary */
-        zbc->outBuff = (char*)zbc->customMem.customAlloc(zbc->customMem.opaque, zbc->outBuffSize);
-        if (zbc->outBuff == NULL) return ERROR(memory_allocation);
-    }
-
-    { size_t const errorCode = ZSTD_compressBegin_advanced(zbc->zc, dict, dictSize, params, pledgedSrcSize);
-      if (ZSTD_isError(errorCode)) return errorCode; }
-
-    zbc->inToCompress = 0;
-    zbc->inBuffPos = 0;
-    zbc->inBuffTarget = zbc->blockSize;
-    zbc->outBuffContentSize = zbc->outBuffFlushedSize = 0;
-    zbc->stage = ZBUFFcs_load;
-    zbc->checksum = params.fParams.checksumFlag > 0;
-    zbc->frameEnded = 0;
-    return 0;   /* ready to go */
-}
-
-
-size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel)
-{
-    ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
-    return ZBUFF_compressInit_advanced(zbc, dict, dictSize, params, 0);
-}
-
-size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel)
-{
-    return ZBUFF_compressInitDictionary(zbc, NULL, 0, compressionLevel);
-}
-
-
-/* internal util function */
-MEM_STATIC size_t ZBUFF_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
-{
-    size_t const length = MIN(dstCapacity, srcSize);
-    memcpy(dst, src, length);
-    return length;
-}
-
-
-/* ======   Compression   ====== */
-
-typedef enum { zbf_gather, zbf_flush, zbf_end } ZBUFF_flush_e;
-
-static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc,
-                              void* dst, size_t* dstCapacityPtr,
-                        const void* src, size_t* srcSizePtr,
-                              ZBUFF_flush_e const flush)
-{
-    U32 someMoreWork = 1;
-    const char* const istart = (const char*)src;
-    const char* const iend = istart + *srcSizePtr;
-    const char* ip = istart;
-    char* const ostart = (char*)dst;
-    char* const oend = ostart + *dstCapacityPtr;
-    char* op = ostart;
-
-    while (someMoreWork) {
-        switch(zbc->stage)
-        {
-        case ZBUFFcs_init: return ERROR(init_missing);   /* call ZBUFF_compressInit() first ! */
-
-        case ZBUFFcs_load:
-            /* complete inBuffer */
-            {   size_t const toLoad = zbc->inBuffTarget - zbc->inBuffPos;
-                size_t const loaded = ZBUFF_limitCopy(zbc->inBuff + zbc->inBuffPos, toLoad, ip, iend-ip);
-                zbc->inBuffPos += loaded;
-                ip += loaded;
-                if ( (zbc->inBuffPos==zbc->inToCompress) || (!flush && (toLoad != loaded)) ) {
-                    someMoreWork = 0; break;  /* not enough input to get a full block : stop there, wait for more */
-            }   }
-            /* compress current block (note : this stage cannot be stopped in the middle) */
-            {   void* cDst;
-                size_t cSize;
-                size_t const iSize = zbc->inBuffPos - zbc->inToCompress;
-                size_t oSize = oend-op;
-                if (oSize >= ZSTD_compressBound(iSize))
-                    cDst = op;   /* compress directly into output buffer (avoid flush stage) */
-                else
-                    cDst = zbc->outBuff, oSize = zbc->outBuffSize;
-                cSize = (flush == zbf_end) ?
-                        ZSTD_compressEnd(zbc->zc, cDst, oSize, zbc->inBuff + zbc->inToCompress, iSize) :
-                        ZSTD_compressContinue(zbc->zc, cDst, oSize, zbc->inBuff + zbc->inToCompress, iSize);
-                if (ZSTD_isError(cSize)) return cSize;
-                if (flush == zbf_end) zbc->frameEnded = 1;
-                /* prepare next block */
-                zbc->inBuffTarget = zbc->inBuffPos + zbc->blockSize;
-                if (zbc->inBuffTarget > zbc->inBuffSize)
-                    zbc->inBuffPos = 0, zbc->inBuffTarget = zbc->blockSize;   /* note : inBuffSize >= blockSize */
-                zbc->inToCompress = zbc->inBuffPos;
-                if (cDst == op) { op += cSize; break; }   /* no need to flush */
-                zbc->outBuffContentSize = cSize;
-                zbc->outBuffFlushedSize = 0;
-                zbc->stage = ZBUFFcs_flush;   /* continue to flush stage */
-            }
-
-        case ZBUFFcs_flush:
-            {   size_t const toFlush = zbc->outBuffContentSize - zbc->outBuffFlushedSize;
-                size_t const flushed = ZBUFF_limitCopy(op, oend-op, zbc->outBuff + zbc->outBuffFlushedSize, toFlush);
-                op += flushed;
-                zbc->outBuffFlushedSize += flushed;
-                if (toFlush!=flushed) { someMoreWork = 0; break; } /* dst too small to store flushed data : stop there */
-                zbc->outBuffContentSize = zbc->outBuffFlushedSize = 0;
-                zbc->stage = ZBUFFcs_load;
-                break;
-            }
-
-        case ZBUFFcs_final:
-            someMoreWork = 0;   /* do nothing */
-            break;
-
-        default:
-            return ERROR(GENERIC);   /* impossible */
-        }
-    }
-
-    *srcSizePtr = ip - istart;
-    *dstCapacityPtr = op - ostart;
-    if (zbc->frameEnded) return 0;
-    {   size_t hintInSize = zbc->inBuffTarget - zbc->inBuffPos;
-        if (hintInSize==0) hintInSize = zbc->blockSize;
-        return hintInSize;
-    }
-}
-
-size_t ZBUFF_compressContinue(ZBUFF_CCtx* zbc,
-                              void* dst, size_t* dstCapacityPtr,
-                        const void* src, size_t* srcSizePtr)
-{
-    return ZBUFF_compressContinue_generic(zbc, dst, dstCapacityPtr, src, srcSizePtr, zbf_gather);
-}
-
-
-
-/* ======   Finalize   ====== */
-
-size_t ZBUFF_compressFlush(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr)
-{
-    size_t srcSize = 0;
-    ZBUFF_compressContinue_generic(zbc, dst, dstCapacityPtr, &srcSize, &srcSize, zbf_flush);  /* use a valid src address instead of NULL */
-    return zbc->outBuffContentSize - zbc->outBuffFlushedSize;
-}
-
-
-size_t ZBUFF_compressEnd(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr)
-{
-    BYTE* const ostart = (BYTE*)dst;
-    BYTE* const oend = ostart + *dstCapacityPtr;
-    BYTE* op = ostart;
-
-    if (zbc->stage != ZBUFFcs_final) {
-        /* flush whatever remains */
-        size_t outSize = *dstCapacityPtr;
-        size_t srcSize = 0;
-        size_t const notEnded = ZBUFF_compressContinue_generic(zbc, dst, &outSize, &srcSize, &srcSize, zbf_end);  /* use a valid address instead of NULL */
-        size_t const remainingToFlush = zbc->outBuffContentSize - zbc->outBuffFlushedSize;
-        op += outSize;
-        if (remainingToFlush) {
-            *dstCapacityPtr = op-ostart;
-            return remainingToFlush + ZBUFF_endFrameSize + (zbc->checksum * 4);
-        }
-        /* create epilogue */
-        zbc->stage = ZBUFFcs_final;
-        zbc->outBuffContentSize = !notEnded ? 0 :
-            ZSTD_compressEnd(zbc->zc, zbc->outBuff, zbc->outBuffSize, NULL, 0);  /* write epilogue into outBuff */
-    }
-
-    /* flush epilogue */
-    {   size_t const toFlush = zbc->outBuffContentSize - zbc->outBuffFlushedSize;
-        size_t const flushed = ZBUFF_limitCopy(op, oend-op, zbc->outBuff + zbc->outBuffFlushedSize, toFlush);
-        op += flushed;
-        zbc->outBuffFlushedSize += flushed;
-        *dstCapacityPtr = op-ostart;
-        if (toFlush==flushed) zbc->stage = ZBUFFcs_init;  /* end reached */
-        return toFlush - flushed;
-    }
-}
-
-
-
-/* *************************************
-*  Tool functions
-***************************************/
-size_t ZBUFF_recommendedCInSize(void)  { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
-size_t ZBUFF_recommendedCOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + ZBUFF_endFrameSize; }
diff --git a/contrib/libzstd/include/zstd/compress/zstd_compress.c b/contrib/libzstd/include/zstd/compress/zstd_compress.c
index 94f4b5a2586..9300357f2d3 100644
--- a/contrib/libzstd/include/zstd/compress/zstd_compress.c
+++ b/contrib/libzstd/include/zstd/compress/zstd_compress.c
@@ -8,18 +8,25 @@
  */
 
 
+/*-*************************************
+*  Tuning parameters
+***************************************/
+#ifndef ZSTD_CLEVEL_DEFAULT
+#  define ZSTD_CLEVEL_DEFAULT 3
+#endif
+
+
 /*-*************************************
 *  Dependencies
 ***************************************/
 #include <string.h>         /* memset */
 #include "mem.h"
-#define XXH_STATIC_LINKING_ONLY   /* XXH64_state_t */
-#include "xxhash.h"               /* XXH_reset, update, digest */
 #define FSE_STATIC_LINKING_ONLY   /* FSE_encodeSymbol */
 #include "fse.h"
 #define HUF_STATIC_LINKING_ONLY
 #include "huf.h"
 #include "zstd_internal.h"  /* includes zstd.h */
+#include "zstdmt_compress.h"
 
 
 /*-*************************************
@@ -29,11 +36,22 @@ static const U32 g_searchStrength = 8;   /* control skip over incompressible dat
 #define HASH_READ_SIZE 8
 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
 
+/* entropy tables always have same size */
+static size_t const hufCTable_size = HUF_CTABLE_SIZE(255);
+static size_t const litlengthCTable_size = FSE_CTABLE_SIZE(LLFSELog, MaxLL);
+static size_t const offcodeCTable_size = FSE_CTABLE_SIZE(OffFSELog, MaxOff);
+static size_t const matchlengthCTable_size = FSE_CTABLE_SIZE(MLFSELog, MaxML);
+static size_t const entropyScratchSpace_size = HUF_WORKSPACE_SIZE;
+
 
 /*-*************************************
 *  Helper functions
 ***************************************/
-size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; }
+size_t ZSTD_compressBound(size_t srcSize) {
+    size_t const lowLimit = 256 KB;
+    size_t const margin = (srcSize < lowLimit) ? (lowLimit-srcSize) >> 12 : 0;  /* from 64 to 0 */
+    return srcSize + (srcSize >> 8) + margin;
+}
 
 
 /*-*************************************
@@ -50,8 +68,16 @@ static void ZSTD_resetSeqStore(seqStore_t* ssPtr)
 /*-*************************************
 *  Context memory management
 ***************************************/
-struct ZSTD_CCtx_s
-{
+typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
+
+struct ZSTD_CDict_s {
+    void* dictBuffer;
+    const void* dictContent;
+    size_t dictContentSize;
+    ZSTD_CCtx* refContext;
+};  /* typedef'd to ZSTD_CDict within "zstd.h" */
+
+struct ZSTD_CCtx_s {
     const BYTE* nextSrc;    /* next block here to continue on current prefix */
     const BYTE* base;       /* All regular indexes relative to this position */
     const BYTE* dictBase;   /* extDict indexes relative to this position */
@@ -60,53 +86,121 @@ struct ZSTD_CCtx_s
     U32   nextToUpdate;     /* index from which to continue dictionary update */
     U32   nextToUpdate3;    /* index from which to continue dictionary update */
     U32   hashLog3;         /* dispatch table : larger == faster, more memory */
-    U32   loadedDictEnd;
+    U32   loadedDictEnd;    /* index of end of dictionary */
+    U32   forceWindow;      /* force back-references to respect limit of 1<<wLog, even for dictionary */
     ZSTD_compressionStage_e stage;
     U32   rep[ZSTD_REP_NUM];
-    U32   savedRep[ZSTD_REP_NUM];
+    U32   repToConfirm[ZSTD_REP_NUM];
     U32   dictID;
-    ZSTD_parameters params;
+    int   compressionLevel;
+    ZSTD_parameters requestedParams;
+    ZSTD_parameters appliedParams;
     void* workSpace;
     size_t workSpaceSize;
     size_t blockSize;
-    U64 frameContentSize;
+    U64 pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
+    U64 consumedSrcSize;
     XXH64_state_t xxhState;
     ZSTD_customMem customMem;
+    size_t staticSize;
 
     seqStore_t seqStore;    /* sequences storage ptrs */
     U32* hashTable;
     U32* hashTable3;
     U32* chainTable;
-    HUF_CElt* hufTable;
-    U32 flagStaticTables;
-    FSE_CTable offcodeCTable  [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
-    FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
-    FSE_CTable litlengthCTable  [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
+    HUF_repeat hufCTable_repeatMode;
+    HUF_CElt* hufCTable;
+    U32 fseCTables_ready;
+    FSE_CTable* offcodeCTable;
+    FSE_CTable* matchlengthCTable;
+    FSE_CTable* litlengthCTable;
+    unsigned* entropyScratchSpace;
+
+    /* streaming */
+    char*  inBuff;
+    size_t inBuffSize;
+    size_t inToCompress;
+    size_t inBuffPos;
+    size_t inBuffTarget;
+    char*  outBuff;
+    size_t outBuffSize;
+    size_t outBuffContentSize;
+    size_t outBuffFlushedSize;
+    ZSTD_cStreamStage streamStage;
+    U32    frameEnded;
+
+    /* Dictionary */
+    ZSTD_dictMode_e dictMode; /* select restricting dictionary to "rawContent" or "fullDict" only */
+    U32 dictContentByRef;
+    ZSTD_CDict* cdictLocal;
+    const ZSTD_CDict* cdict;
+    const void* prefix;
+    size_t prefixSize;
+
+    /* Multi-threading */
+    U32 nbThreads;
+    ZSTDMT_CCtx* mtctx;
 };
 
+
 ZSTD_CCtx* ZSTD_createCCtx(void)
 {
-    return ZSTD_createCCtx_advanced(defaultCustomMem);
+    return ZSTD_createCCtx_advanced(ZSTD_defaultCMem);
 }
 
 ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
 {
     ZSTD_CCtx* cctx;
 
-    if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
-    if (!customMem.customAlloc || !customMem.customFree) return NULL;
+    if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
 
-    cctx = (ZSTD_CCtx*) ZSTD_malloc(sizeof(ZSTD_CCtx), customMem);
+    cctx = (ZSTD_CCtx*) ZSTD_calloc(sizeof(ZSTD_CCtx), customMem);
     if (!cctx) return NULL;
-    memset(cctx, 0, sizeof(ZSTD_CCtx));
-    memcpy(&(cctx->customMem), &customMem, sizeof(customMem));
+    cctx->customMem = customMem;
+    cctx->compressionLevel = ZSTD_CLEVEL_DEFAULT;
+    ZSTD_STATIC_ASSERT(zcss_init==0);
+    ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1));
+    return cctx;
+}
+
+ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize)
+{
+    ZSTD_CCtx* cctx = (ZSTD_CCtx*) workspace;
+    if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL;  /* minimum size */
+    if ((size_t)workspace & 7) return NULL;  /* must be 8-aligned */
+    memset(workspace, 0, workspaceSize);   /* may be a bit generous, could memset be smaller ? */
+    cctx->staticSize = workspaceSize;
+    cctx->workSpace = (void*)(cctx+1);
+    cctx->workSpaceSize = workspaceSize - sizeof(ZSTD_CCtx);
+
+    /* entropy space (never moves) */
+    /* note : this code should be shared with resetCCtx, rather than copy/pasted */
+    {   void* ptr = cctx->workSpace;
+        cctx->hufCTable = (HUF_CElt*)ptr;
+        ptr = (char*)cctx->hufCTable + hufCTable_size;
+        cctx->offcodeCTable = (FSE_CTable*) ptr;
+        ptr = (char*)ptr + offcodeCTable_size;
+        cctx->matchlengthCTable = (FSE_CTable*) ptr;
+        ptr = (char*)ptr + matchlengthCTable_size;
+        cctx->litlengthCTable = (FSE_CTable*) ptr;
+        ptr = (char*)ptr + litlengthCTable_size;
+        assert(((size_t)ptr & 3) == 0);   /* ensure correct alignment */
+        cctx->entropyScratchSpace = (unsigned*) ptr;
+    }
+
     return cctx;
 }
 
 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
 {
     if (cctx==NULL) return 0;   /* support free on NULL */
+    if (cctx->staticSize) return ERROR(memory_allocation);   /* not compatible with static CCtx */
     ZSTD_free(cctx->workSpace, cctx->customMem);
+    cctx->workSpace = NULL;
+    ZSTD_freeCDict(cctx->cdictLocal);
+    cctx->cdictLocal = NULL;
+    ZSTDMT_freeCCtx(cctx->mtctx);
+    cctx->mtctx = NULL;
     ZSTD_free(cctx, cctx->customMem);
     return 0;   /* reserved as a potential error code in the future */
 }
@@ -114,42 +208,312 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
 size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
 {
     if (cctx==NULL) return 0;   /* support sizeof on NULL */
-    return sizeof(*cctx) + cctx->workSpaceSize;
+    DEBUGLOG(5, "sizeof(*cctx) : %u", (U32)sizeof(*cctx));
+    DEBUGLOG(5, "workSpaceSize : %u", (U32)cctx->workSpaceSize);
+    DEBUGLOG(5, "streaming buffers : %u", (U32)(cctx->outBuffSize + cctx->inBuffSize));
+    DEBUGLOG(5, "inner MTCTX : %u", (U32)ZSTDMT_sizeof_CCtx(cctx->mtctx));
+    return sizeof(*cctx) + cctx->workSpaceSize
+           + ZSTD_sizeof_CDict(cctx->cdictLocal)
+           + cctx->outBuffSize + cctx->inBuffSize
+           + ZSTDMT_sizeof_CCtx(cctx->mtctx);
 }
 
-const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx)   /* hidden interface */
+size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
 {
-    return &(ctx->seqStore);
+    return ZSTD_sizeof_CCtx(zcs);  /* same object */
+}
+
+/* private API call, for dictBuilder only */
+const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
+
+static ZSTD_parameters ZSTD_getParamsFromCCtx(const ZSTD_CCtx* cctx) { return cctx->appliedParams; }
+
+/* older variant; will be deprecated */
+size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value)
+{
+    switch(param)
+    {
+    case ZSTD_p_forceWindow : cctx->forceWindow = value>0; cctx->loadedDictEnd = 0; return 0;
+    ZSTD_STATIC_ASSERT(ZSTD_dm_auto==0);
+    ZSTD_STATIC_ASSERT(ZSTD_dm_rawContent==1);
+    case ZSTD_p_forceRawDict : cctx->dictMode = (ZSTD_dictMode_e)(value>0); return 0;
+    default: return ERROR(parameter_unknown);
+    }
 }
 
 
-/** ZSTD_checkParams() :
-    ensure param values remain within authorized range.
+#define ZSTD_CLEVEL_CUSTOM 999
+static void ZSTD_cLevelToCParams(ZSTD_CCtx* cctx)
+{
+    if (cctx->compressionLevel==ZSTD_CLEVEL_CUSTOM) return;
+    cctx->requestedParams.cParams = ZSTD_getCParams(cctx->compressionLevel,
+                                            cctx->pledgedSrcSizePlusOne-1, 0);
+    cctx->compressionLevel = ZSTD_CLEVEL_CUSTOM;
+}
+
+#define CLAMPCHECK(val,min,max) {                       \
+    if (((val)<(min)) | ((val)>(max))) {                \
+        return ERROR(compressionParameter_outOfBound);  \
+}   }
+
+size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value)
+{
+    if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
+
+    switch(param)
+    {
+    case ZSTD_p_compressionLevel :
+        if ((int)value > ZSTD_maxCLevel()) value = ZSTD_maxCLevel();   /* cap max compression level */
+        if (value == 0) return 0;  /* special value : 0 means "don't change anything" */
+        if (cctx->cdict) return ERROR(stage_wrong);
+        cctx->compressionLevel = value;
+        return 0;
+
+    case ZSTD_p_windowLog :
+        DEBUGLOG(5, "setting ZSTD_p_windowLog = %u (cdict:%u)",
+                    value, (cctx->cdict!=NULL));
+        if (value == 0) return 0;  /* special value : 0 means "don't change anything" */
+        if (cctx->cdict) return ERROR(stage_wrong);
+        CLAMPCHECK(value, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
+        ZSTD_cLevelToCParams(cctx);
+        cctx->requestedParams.cParams.windowLog = value;
+        return 0;
+
+    case ZSTD_p_hashLog :
+        if (value == 0) return 0;  /* special value : 0 means "don't change anything" */
+        if (cctx->cdict) return ERROR(stage_wrong);
+        CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
+        ZSTD_cLevelToCParams(cctx);
+        cctx->requestedParams.cParams.hashLog = value;
+        return 0;
+
+    case ZSTD_p_chainLog :
+        if (value == 0) return 0;  /* special value : 0 means "don't change anything" */
+        if (cctx->cdict) return ERROR(stage_wrong);
+        CLAMPCHECK(value, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
+        ZSTD_cLevelToCParams(cctx);
+        cctx->requestedParams.cParams.chainLog = value;
+        return 0;
+
+    case ZSTD_p_searchLog :
+        if (value == 0) return 0;  /* special value : 0 means "don't change anything" */
+        if (cctx->cdict) return ERROR(stage_wrong);
+        CLAMPCHECK(value, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
+        ZSTD_cLevelToCParams(cctx);
+        cctx->requestedParams.cParams.searchLog = value;
+        return 0;
+
+    case ZSTD_p_minMatch :
+        if (value == 0) return 0;  /* special value : 0 means "don't change anything" */
+        if (cctx->cdict) return ERROR(stage_wrong);
+        CLAMPCHECK(value, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
+        ZSTD_cLevelToCParams(cctx);
+        cctx->requestedParams.cParams.searchLength = value;
+        return 0;
+
+    case ZSTD_p_targetLength :
+        if (value == 0) return 0;  /* special value : 0 means "don't change anything" */
+        if (cctx->cdict) return ERROR(stage_wrong);
+        CLAMPCHECK(value, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
+        ZSTD_cLevelToCParams(cctx);
+        cctx->requestedParams.cParams.targetLength = value;
+        return 0;
+
+    case ZSTD_p_compressionStrategy :
+        if (value == 0) return 0;  /* special value : 0 means "don't change anything" */
+        if (cctx->cdict) return ERROR(stage_wrong);
+        CLAMPCHECK(value, (unsigned)ZSTD_fast, (unsigned)ZSTD_btultra);
+        ZSTD_cLevelToCParams(cctx);
+        cctx->requestedParams.cParams.strategy = (ZSTD_strategy)value;
+        return 0;
+
+    case ZSTD_p_contentSizeFlag :
+        DEBUGLOG(5, "set content size flag = %u", (value>0));
+        /* Content size written in frame header _when known_ (default:1) */
+        cctx->requestedParams.fParams.contentSizeFlag = value>0;
+        return 0;
+
+    case ZSTD_p_checksumFlag :
+        /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
+        cctx->requestedParams.fParams.checksumFlag = value>0;
+        return 0;
+
+    case ZSTD_p_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
+        DEBUGLOG(5, "set dictIDFlag = %u", (value>0));
+        cctx->requestedParams.fParams.noDictIDFlag = (value==0);
+        return 0;
+
+    /* Dictionary parameters */
+    case ZSTD_p_dictMode :
+        if (cctx->cdict) return ERROR(stage_wrong);  /* must be set before loading */
+        /* restrict dictionary mode, to "rawContent" or "fullDict" only */
+        ZSTD_STATIC_ASSERT((U32)ZSTD_dm_fullDict > (U32)ZSTD_dm_rawContent);
+        if (value > (unsigned)ZSTD_dm_fullDict)
+            return ERROR(compressionParameter_outOfBound);
+        cctx->dictMode = (ZSTD_dictMode_e)value;
+        return 0;
+
+    case ZSTD_p_refDictContent :
+        if (cctx->cdict) return ERROR(stage_wrong);  /* must be set before loading */
+        /* dictionary content will be referenced, instead of copied */
+        cctx->dictContentByRef = value>0;
+        return 0;
+
+    case ZSTD_p_forceMaxWindow :  /* Force back-references to remain < windowSize,
+                                   * even when referencing into Dictionary content
+                                   * default : 0 when using a CDict, 1 when using a Prefix */
+        cctx->forceWindow = value>0;
+        cctx->loadedDictEnd = 0;
+        return 0;
+
+    case ZSTD_p_nbThreads:
+        if (value==0) return 0;
+        DEBUGLOG(5, " setting nbThreads : %u", value);
+#ifndef ZSTD_MULTITHREAD
+        if (value > 1) return ERROR(compressionParameter_unsupported);
+#endif
+        if ((value>1) && (cctx->nbThreads != value)) {
+            if (cctx->staticSize)  /* MT not compatible with static alloc */
+                return ERROR(compressionParameter_unsupported);
+            ZSTDMT_freeCCtx(cctx->mtctx);
+            cctx->nbThreads = 1;
+            cctx->mtctx = ZSTDMT_createCCtx(value);
+            if (cctx->mtctx == NULL) return ERROR(memory_allocation);
+        }
+        cctx->nbThreads = value;
+        return 0;
+
+    case ZSTD_p_jobSize:
+        if (cctx->nbThreads <= 1) return ERROR(compressionParameter_unsupported);
+        assert(cctx->mtctx != NULL);
+        return ZSTDMT_setMTCtxParameter(cctx->mtctx, ZSTDMT_p_sectionSize, value);
+
+    case ZSTD_p_overlapSizeLog:
+        DEBUGLOG(5, " setting overlap with nbThreads == %u", cctx->nbThreads);
+        if (cctx->nbThreads <= 1) return ERROR(compressionParameter_unsupported);
+        assert(cctx->mtctx != NULL);
+        return ZSTDMT_setMTCtxParameter(cctx->mtctx, ZSTDMT_p_overlapSectionLog, value);
+
+    default: return ERROR(parameter_unknown);
+    }
+}
+
+ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(5, " setting pledgedSrcSize to %u", (U32)pledgedSrcSize);
+    if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
+    cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
+    return 0;
+}
+
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
+{
+    if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
+    if (cctx->staticSize) return ERROR(memory_allocation);  /* no malloc for static CCtx */
+    DEBUGLOG(5, "load dictionary of size %u", (U32)dictSize);
+    ZSTD_freeCDict(cctx->cdictLocal);  /* in case one already exists */
+    if (dict==NULL || dictSize==0) {   /* no dictionary mode */
+        cctx->cdictLocal = NULL;
+        cctx->cdict = NULL;
+    } else {
+        ZSTD_compressionParameters const cParams =
+                cctx->compressionLevel == ZSTD_CLEVEL_CUSTOM ?
+                cctx->requestedParams.cParams :
+                ZSTD_getCParams(cctx->compressionLevel, 0, dictSize);
+        cctx->cdictLocal = ZSTD_createCDict_advanced(
+                                dict, dictSize,
+                                cctx->dictContentByRef, cctx->dictMode,
+                                cParams, cctx->customMem);
+        cctx->cdict = cctx->cdictLocal;
+        if (cctx->cdictLocal == NULL)
+            return ERROR(memory_allocation);
+    }
+    return 0;
+}
+
+size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
+{
+    if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
+    cctx->cdict = cdict;
+    cctx->prefix = NULL;   /* exclusive */
+    cctx->prefixSize = 0;
+    return 0;
+}
+
+size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize)
+{
+    if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
+    cctx->cdict = NULL;   /* prefix discards any prior cdict */
+    cctx->prefix = prefix;
+    cctx->prefixSize = prefixSize;
+    return 0;
+}
+
+static void ZSTD_startNewCompression(ZSTD_CCtx* cctx)
+{
+    cctx->streamStage = zcss_init;
+    cctx->pledgedSrcSizePlusOne = 0;
+}
+
+/*! ZSTD_CCtx_reset() :
+ *  Also dumps dictionary */
+void ZSTD_CCtx_reset(ZSTD_CCtx* cctx)
+{
+    ZSTD_startNewCompression(cctx);
+    cctx->cdict = NULL;
+}
+
+/** ZSTD_checkCParams() :
+    control CParam values remain within authorized range.
     @return : 0, or an error code if one value is beyond authorized range */
 size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
 {
-#   define CLAMPCHECK(val,min,max) { if ((val<min) | (val>max)) return ERROR(compressionParameter_unsupported); }
     CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
     CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
     CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
     CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
-    { U32 const searchLengthMin = ((cParams.strategy == ZSTD_fast) | (cParams.strategy == ZSTD_greedy)) ? ZSTD_SEARCHLENGTH_MIN+1 : ZSTD_SEARCHLENGTH_MIN;
-      U32 const searchLengthMax = (cParams.strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1;
-      CLAMPCHECK(cParams.searchLength, searchLengthMin, searchLengthMax); }
+    CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
     CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
-    if ((U32)(cParams.strategy) > (U32)ZSTD_btopt) return ERROR(compressionParameter_unsupported);
+    if ((U32)(cParams.strategy) > (U32)ZSTD_btultra) return ERROR(compressionParameter_unsupported);
     return 0;
 }
 
+/** ZSTD_clampCParams() :
+ *  make CParam values within valid range.
+ *  @return : valid CParams */
+static ZSTD_compressionParameters ZSTD_clampCParams(ZSTD_compressionParameters cParams)
+{
+#   define CLAMP(val,min,max) {      \
+        if (val<min) val=min;        \
+        else if (val>max) val=max;   \
+    }
+    CLAMP(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
+    CLAMP(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
+    CLAMP(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
+    CLAMP(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
+    CLAMP(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
+    CLAMP(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
+    if ((U32)(cParams.strategy) > (U32)ZSTD_btultra) cParams.strategy = ZSTD_btultra;
+    return cParams;
+}
 
-/** ZSTD_adjustCParams() :
+/** ZSTD_cycleLog() :
+ *  condition for correct operation : hashLog > 1 */
+static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
+{
+    U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
+    return hashLog - btScale;
+}
+
+/** ZSTD_adjustCParams_internal() :
     optimize `cPar` for a given input (`srcSize` and `dictSize`).
     mostly downsizing to reduce memory consumption and initialization.
     Both `srcSize` and `dictSize` are optional (use 0 if unknown),
     but if both are 0, no optimization can be done.
     Note : cPar is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */
-ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize)
+ZSTD_compressionParameters ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize)
 {
+    assert(ZSTD_checkCParams(cPar)==0);
     if (srcSize+dictSize == 0) return cPar;   /* no size information available : no adjustment */
 
     /* resize params, to use less memory when necessary */
@@ -160,19 +524,25 @@ ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, u
             if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
     }   }
     if (cPar.hashLog > cPar.windowLog) cPar.hashLog = cPar.windowLog;
-    {   U32 const btPlus = (cPar.strategy == ZSTD_btlazy2) | (cPar.strategy == ZSTD_btopt);
-        U32 const maxChainLog = cPar.windowLog+btPlus;
-        if (cPar.chainLog > maxChainLog) cPar.chainLog = maxChainLog; }   /* <= ZSTD_CHAINLOG_MAX */
+    {   U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
+        if (cycleLog > cPar.windowLog) cPar.chainLog -= (cycleLog - cPar.windowLog);
+    }
 
     if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN;  /* required for frame header */
 
     return cPar;
 }
 
-
-size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams)
+ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize)
 {
-    size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << cParams.windowLog);
+    cPar = ZSTD_clampCParams(cPar);
+    return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize);
+}
+
+
+size_t ZSTD_estimateCCtxSize_advanced(ZSTD_compressionParameters cParams)
+{
+    size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
     U32    const divider = (cParams.searchLength==3) ? 3 : 4;
     size_t const maxNbSeq = blockSize / divider;
     size_t const tokenSpace = blockSize + 11*maxNbSeq;
@@ -181,32 +551,69 @@ size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams)
     size_t const hSize = ((size_t)1) << cParams.hashLog;
     U32    const hashLog3 = (cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog);
     size_t const h3Size = ((size_t)1) << hashLog3;
+    size_t const entropySpace = hufCTable_size + litlengthCTable_size
+                              + offcodeCTable_size + matchlengthCTable_size
+                              + entropyScratchSpace_size;
     size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
 
-    size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
+    size_t const optBudget = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
                           + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
-    size_t const neededSpace = tableSpace + (256*sizeof(U32)) /* huffTable */ + tokenSpace
-                             + ((cParams.strategy == ZSTD_btopt) ? optSpace : 0);
+    size_t const optSpace = ((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btultra)) ? optBudget : 0;
+    size_t const neededSpace = entropySpace + tableSpace + tokenSpace + optSpace;
 
+    DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)sizeof(ZSTD_CCtx));
+    DEBUGLOG(5, "estimate workSpace : %u", (U32)neededSpace);
     return sizeof(ZSTD_CCtx) + neededSpace;
 }
 
-
-static U32 ZSTD_equivalentParams(ZSTD_parameters param1, ZSTD_parameters param2)
+size_t ZSTD_estimateCCtxSize(int compressionLevel)
 {
-    return (param1.cParams.hashLog  == param2.cParams.hashLog)
-         & (param1.cParams.chainLog == param2.cParams.chainLog)
-         & (param1.cParams.strategy == param2.cParams.strategy)
-         & ((param1.cParams.searchLength==3) == (param2.cParams.searchLength==3));
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0);
+    return ZSTD_estimateCCtxSize_advanced(cParams);
+}
+
+size_t ZSTD_estimateCStreamSize_advanced(ZSTD_compressionParameters cParams)
+{
+    size_t const CCtxSize = ZSTD_estimateCCtxSize_advanced(cParams);
+    size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
+    size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize;
+    size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1;
+    size_t const streamingSize = inBuffSize + outBuffSize;
+
+    return CCtxSize + streamingSize;
+}
+
+size_t ZSTD_estimateCStreamSize(int compressionLevel) {
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0);
+    return ZSTD_estimateCStreamSize_advanced(cParams);
+}
+
+
+static U32 ZSTD_equivalentParams(ZSTD_compressionParameters cParams1,
+                                 ZSTD_compressionParameters cParams2)
+{
+    U32 bslog1 = MIN(cParams1.windowLog, ZSTD_BLOCKSIZELOG_MAX);
+    U32 bslog2 = MIN(cParams2.windowLog, ZSTD_BLOCKSIZELOG_MAX);
+    return (bslog1 == bslog2)   /* same block size */
+         & (cParams1.hashLog  == cParams2.hashLog)
+         & (cParams1.chainLog == cParams2.chainLog)
+         & (cParams1.strategy == cParams2.strategy)   /* opt parser space */
+         & ((cParams1.searchLength==3) == (cParams2.searchLength==3));  /* hashlog3 space */
 }
 
 /*! ZSTD_continueCCtx() :
-    reuse CCtx without reset (note : requires no dictionary) */
-static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_parameters params, U64 frameContentSize)
+ *  reuse CCtx without reset (note : requires no dictionary) */
+static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_parameters params, U64 pledgedSrcSize)
 {
     U32 const end = (U32)(cctx->nextSrc - cctx->base);
-    cctx->params = params;
-    cctx->frameContentSize = frameContentSize;
+    DEBUGLOG(5, "continue mode");
+    cctx->appliedParams = params;
+    cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
+    cctx->consumedSrcSize = 0;
+    if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
+        cctx->appliedParams.fParams.contentSizeFlag = 0;
+    DEBUGLOG(5, "pledged content size : %u ; flag : %u",
+        (U32)pledgedSrcSize, cctx->appliedParams.fParams.contentSizeFlag);
     cctx->lowLimit = end;
     cctx->dictLimit = end;
     cctx->nextToUpdate = end+1;
@@ -219,64 +626,119 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_parameters params, U64 fra
     return 0;
 }
 
-typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset, ZSTDcrp_fullReset } ZSTD_compResetPolicy_e;
+typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e;
+typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e;
 
-/*! ZSTD_resetCCtx_advanced() :
-    note : 'params' must be validated */
-static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
-                                       ZSTD_parameters params, U64 frameContentSize,
-                                       ZSTD_compResetPolicy_e const crp)
+/*! ZSTD_resetCCtx_internal() :
+    note : `params` are assumed fully validated at this stage */
+static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
+                                      ZSTD_parameters params, U64 pledgedSrcSize,
+                                      ZSTD_compResetPolicy_e const crp,
+                                      ZSTD_buffered_policy_e const zbuff)
 {
-    if (crp == ZSTDcrp_continue)
-        if (ZSTD_equivalentParams(params, zc->params))
-            return ZSTD_continueCCtx(zc, params, frameContentSize);
+    assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
 
-    {   size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << params.cParams.windowLog);
+    if (crp == ZSTDcrp_continue) {
+        if (ZSTD_equivalentParams(params.cParams, zc->appliedParams.cParams)) {
+            DEBUGLOG(5, "ZSTD_equivalentParams()==1");
+            zc->fseCTables_ready = 0;
+            zc->hufCTable_repeatMode = HUF_repeat_none;
+            return ZSTD_continueCCtx(zc, params, pledgedSrcSize);
+    }   }
+
+    {   size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.cParams.windowLog);
         U32    const divider = (params.cParams.searchLength==3) ? 3 : 4;
         size_t const maxNbSeq = blockSize / divider;
         size_t const tokenSpace = blockSize + 11*maxNbSeq;
-        size_t const chainSize = (params.cParams.strategy == ZSTD_fast) ? 0 : (1 << params.cParams.chainLog);
+        size_t const chainSize = (params.cParams.strategy == ZSTD_fast) ?
+                                0 : (1 << params.cParams.chainLog);
         size_t const hSize = ((size_t)1) << params.cParams.hashLog;
-        U32    const hashLog3 = (params.cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog);
+        U32    const hashLog3 = (params.cParams.searchLength>3) ?
+                                0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog);
         size_t const h3Size = ((size_t)1) << hashLog3;
         size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
+        size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0;
+        size_t const buffInSize = (zbuff==ZSTDb_buffered) ? ((size_t)1 << params.cParams.windowLog) + blockSize : 0;
         void* ptr;
 
         /* Check if workSpace is large enough, alloc a new one if needed */
-        {   size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
-                                  + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
-            size_t const neededSpace = tableSpace + (256*sizeof(U32)) /* huffTable */ + tokenSpace
-                                  + ((params.cParams.strategy == ZSTD_btopt) ? optSpace : 0);
-            if (zc->workSpaceSize < neededSpace) {
+        {   size_t const entropySpace = hufCTable_size + litlengthCTable_size
+                                  + offcodeCTable_size + matchlengthCTable_size
+                                  + entropyScratchSpace_size;
+            size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits)) * sizeof(U32)
+                                  + (ZSTD_OPT_NUM+1) * (sizeof(ZSTD_match_t)+sizeof(ZSTD_optimal_t));
+            size_t const optSpace = ( (params.cParams.strategy == ZSTD_btopt)
+                                    || (params.cParams.strategy == ZSTD_btultra)) ?
+                                    optPotentialSpace : 0;
+            size_t const bufferSpace = buffInSize + buffOutSize;
+            size_t const neededSpace = entropySpace + optSpace + tableSpace
+                                     + tokenSpace + bufferSpace;
+
+            if (zc->workSpaceSize < neededSpace) {  /* too small : resize /*/
+                DEBUGLOG(5, "Need to update workSpaceSize from %uK to %uK \n",
+                            (unsigned)zc->workSpaceSize>>10,
+                            (unsigned)neededSpace>>10);
+                /* static cctx : no resize, error out */
+                if (zc->staticSize) return ERROR(memory_allocation);
+
+                zc->workSpaceSize = 0;
                 ZSTD_free(zc->workSpace, zc->customMem);
                 zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem);
                 if (zc->workSpace == NULL) return ERROR(memory_allocation);
                 zc->workSpaceSize = neededSpace;
+                ptr = zc->workSpace;
+
+                /* entropy space */
+                zc->hufCTable = (HUF_CElt*)ptr;
+                ptr = (char*)zc->hufCTable + hufCTable_size;  /* note : HUF_CElt* is incomplete type, size is estimated via macro */
+                zc->offcodeCTable = (FSE_CTable*) ptr;
+                ptr = (char*)ptr + offcodeCTable_size;
+                zc->matchlengthCTable = (FSE_CTable*) ptr;
+                ptr = (char*)ptr + matchlengthCTable_size;
+                zc->litlengthCTable = (FSE_CTable*) ptr;
+                ptr = (char*)ptr + litlengthCTable_size;
+                assert(((size_t)ptr & 3) == 0);   /* ensure correct alignment */
+                zc->entropyScratchSpace = (unsigned*) ptr;
         }   }
 
-        if (crp!=ZSTDcrp_noMemset) memset(zc->workSpace, 0, tableSpace);   /* reset tables only */
-        XXH64_reset(&zc->xxhState, 0);
-        zc->hashLog3 = hashLog3;
-        zc->hashTable = (U32*)(zc->workSpace);
-        zc->chainTable = zc->hashTable + hSize;
-        zc->hashTable3 = zc->chainTable + chainSize;
-        ptr = zc->hashTable3 + h3Size;
-        zc->hufTable = (HUF_CElt*)ptr;
-        zc->flagStaticTables = 0;
-        ptr = ((U32*)ptr) + 256;  /* note : HUF_CElt* is incomplete type, size is simulated using U32 */
+        /* init params */
+        zc->appliedParams = params;
+        zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
+        zc->consumedSrcSize = 0;
+        if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
+            zc->appliedParams.fParams.contentSizeFlag = 0;
+        DEBUGLOG(5, "pledged content size : %u ; flag : %u",
+            (U32)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag);
+        zc->blockSize = blockSize;
 
+        XXH64_reset(&zc->xxhState, 0);
+        zc->stage = ZSTDcs_init;
+        zc->dictID = 0;
+        zc->loadedDictEnd = 0;
+        zc->fseCTables_ready = 0;
+        zc->hufCTable_repeatMode = HUF_repeat_none;
         zc->nextToUpdate = 1;
         zc->nextSrc = NULL;
         zc->base = NULL;
         zc->dictBase = NULL;
         zc->dictLimit = 0;
         zc->lowLimit = 0;
-        zc->params = params;
-        zc->blockSize = blockSize;
-        zc->frameContentSize = frameContentSize;
         { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = repStartValue[i]; }
+        zc->hashLog3 = hashLog3;
+        zc->seqStore.litLengthSum = 0;
 
-        if (params.cParams.strategy == ZSTD_btopt) {
+        /* ensure entropy tables are close together at the beginning */
+        assert((void*)zc->hufCTable == zc->workSpace);
+        assert((char*)zc->offcodeCTable == (char*)zc->hufCTable + hufCTable_size);
+        assert((char*)zc->matchlengthCTable == (char*)zc->offcodeCTable + offcodeCTable_size);
+        assert((char*)zc->litlengthCTable == (char*)zc->matchlengthCTable + matchlengthCTable_size);
+        assert((char*)zc->entropyScratchSpace == (char*)zc->litlengthCTable + litlengthCTable_size);
+        ptr = (char*)zc->entropyScratchSpace + entropyScratchSpace_size;
+
+        /* opt parser space */
+        if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btultra)) {
+            DEBUGLOG(5, "reserving optimal parser space");
+            assert(((size_t)ptr & 3) == 0);  /* ensure ptr is properly aligned */
             zc->seqStore.litFreq = (U32*)ptr;
             zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<<Litbits);
             zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1);
@@ -286,41 +748,74 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
             ptr = zc->seqStore.matchTable + ZSTD_OPT_NUM+1;
             zc->seqStore.priceTable = (ZSTD_optimal_t*)ptr;
             ptr = zc->seqStore.priceTable + ZSTD_OPT_NUM+1;
-            zc->seqStore.litLengthSum = 0;
         }
+
+        /* table Space */
+        if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace);   /* reset tables only */
+        assert(((size_t)ptr & 3) == 0);  /* ensure ptr is properly aligned */
+        zc->hashTable = (U32*)(ptr);
+        zc->chainTable = zc->hashTable + hSize;
+        zc->hashTable3 = zc->chainTable + chainSize;
+        ptr = zc->hashTable3 + h3Size;
+
+        /* sequences storage */
         zc->seqStore.sequencesStart = (seqDef*)ptr;
         ptr = zc->seqStore.sequencesStart + maxNbSeq;
         zc->seqStore.llCode = (BYTE*) ptr;
         zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq;
         zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq;
         zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq;
+        ptr = zc->seqStore.litStart + blockSize;
 
-        zc->stage = ZSTDcs_init;
-        zc->dictID = 0;
-        zc->loadedDictEnd = 0;
+        /* buffers */
+        zc->inBuffSize = buffInSize;
+        zc->inBuff = (char*)ptr;
+        zc->outBuffSize = buffOutSize;
+        zc->outBuff = zc->inBuff + buffInSize;
 
         return 0;
     }
 }
 
+/* ZSTD_invalidateRepCodes() :
+ * ensures next compression will not use repcodes from previous block.
+ * Note : only works with regular variant;
+ *        do not use with extDict variant ! */
+void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
+    int i;
+    for (i=0; i<ZSTD_REP_NUM; i++) cctx->rep[i] = 0;
+}
 
-/*! ZSTD_copyCCtx() :
-*   Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
-*   Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
-*   @return : 0, or an error code */
-size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
+
+/*! ZSTD_copyCCtx_internal() :
+ *  Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
+ *  Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
+ *  pledgedSrcSize=0 means "empty" if fParams.contentSizeFlag=1
+ *  @return : 0, or an error code */
+static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
+                            const ZSTD_CCtx* srcCCtx,
+                            ZSTD_frameParameters fParams,
+                            unsigned long long pledgedSrcSize,
+                            ZSTD_buffered_policy_e zbuff)
 {
+    DEBUGLOG(5, "ZSTD_copyCCtx_internal");
     if (srcCCtx->stage!=ZSTDcs_init) return ERROR(stage_wrong);
 
     memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
-    ZSTD_resetCCtx_advanced(dstCCtx, srcCCtx->params, pledgedSrcSize, ZSTDcrp_noMemset);
+    {   ZSTD_parameters params = srcCCtx->appliedParams;
+        params.fParams = fParams;
+        ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize,
+                                ZSTDcrp_noMemset, zbuff);
+    }
 
     /* copy tables */
-    {   size_t const chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog);
-        size_t const hSize = ((size_t)1) << srcCCtx->params.cParams.hashLog;
+    {   size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->appliedParams.cParams.chainLog);
+        size_t const hSize =  (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
         size_t const h3Size = (size_t)1 << srcCCtx->hashLog3;
         size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
-        memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace);
+        assert((U32*)dstCCtx->chainTable == (U32*)dstCCtx->hashTable + hSize);  /* chainTable must follow hashTable */
+        assert((U32*)dstCCtx->hashTable3 == (U32*)dstCCtx->chainTable + chainSize);
+        memcpy(dstCCtx->hashTable, srcCCtx->hashTable, tableSpace);   /* presumes all tables follow each other */
     }
 
     /* copy dictionary offsets */
@@ -335,20 +830,38 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long
     dstCCtx->dictID       = srcCCtx->dictID;
 
     /* copy entropy tables */
-    dstCCtx->flagStaticTables = srcCCtx->flagStaticTables;
-    if (srcCCtx->flagStaticTables) {
-        memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4);
-        memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable));
-        memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable));
-        memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, sizeof(dstCCtx->offcodeCTable));
+    dstCCtx->fseCTables_ready = srcCCtx->fseCTables_ready;
+    if (srcCCtx->fseCTables_ready) {
+        memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, litlengthCTable_size);
+        memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, matchlengthCTable_size);
+        memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, offcodeCTable_size);
+    }
+    dstCCtx->hufCTable_repeatMode = srcCCtx->hufCTable_repeatMode;
+    if (srcCCtx->hufCTable_repeatMode) {
+        memcpy(dstCCtx->hufCTable, srcCCtx->hufCTable, hufCTable_size);
     }
 
     return 0;
 }
 
+/*! ZSTD_copyCCtx() :
+ *  Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
+ *  Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
+ *  pledgedSrcSize==0 means "unknown".
+*   @return : 0, or an error code */
+size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
+{
+    ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
+    ZSTD_buffered_policy_e const zbuff = (ZSTD_buffered_policy_e)(srcCCtx->inBuffSize>0);
+    ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1);
+    fParams.contentSizeFlag = pledgedSrcSize>0;
+
+    return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, fParams, pledgedSrcSize, zbuff);
+}
+
 
 /*! ZSTD_reduceTable() :
-*   reduce table indexes by `reducerValue` */
+ *  reduce table indexes by `reducerValue` */
 static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue)
 {
     U32 u;
@@ -362,10 +875,10 @@ static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reduce
 *   rescale all indexes to avoid future overflow (indexes are U32) */
 static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
 {
-    { U32 const hSize = 1 << zc->params.cParams.hashLog;
+    { U32 const hSize = 1 << zc->appliedParams.cParams.hashLog;
       ZSTD_reduceTable(zc->hashTable, hSize, reducerValue); }
 
-    { U32 const chainSize = (zc->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->params.cParams.chainLog);
+    { U32 const chainSize = (zc->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->appliedParams.cParams.chainLog);
       ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue); }
 
     { U32 const h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0;
@@ -377,7 +890,7 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
 *  Block entropic compression
 *********************************************************/
 
-/* See zstd_compression_format.md for detailed format description */
+/* See doc/zstd_compression_format.md for detailed format description */
 
 size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
@@ -403,10 +916,11 @@ static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void
         case 2: /* 2 - 2 - 12 */
             MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
             break;
-        default:   /*note : should not be necessary : flSize is within {1,2,3} */
         case 3: /* 2 - 2 - 20 */
             MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
             break;
+        default:   /* not necessary : flSize is {1,2,3} */
+            assert(0);
     }
 
     memcpy(ostart + flSize, src, srcSize);
@@ -428,10 +942,11 @@ static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, cons
         case 2: /* 2 - 2 - 12 */
             MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
             break;
-        default:   /*note : should not be necessary : flSize is necessarily within {1,2,3} */
         case 3: /* 2 - 2 - 20 */
             MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
             break;
+        default:   /* not necessary : flSize is {1,2,3} */
+            assert(0);
     }
 
     ostart[flSize] = *(const BYTE*)src;
@@ -455,24 +970,30 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
 
     /* small ? don't even attempt compression (speed opt) */
 #   define LITERAL_NOENTROPY 63
-    {   size_t const minLitSize = zc->flagStaticTables ? 6 : LITERAL_NOENTROPY;
+    {   size_t const minLitSize = zc->hufCTable_repeatMode == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY;
         if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
     }
 
     if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall);   /* not enough space for compression */
-    if (zc->flagStaticTables && (lhSize==3)) {
-        hType = set_repeat;
-        singleStream = 1;
-        cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable);
-    } else {
-        cLitSize = singleStream ? HUF_compress1X(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11)
-                                : HUF_compress2 (ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11);
+    {   HUF_repeat repeat = zc->hufCTable_repeatMode;
+        int const preferRepeat = zc->appliedParams.cParams.strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
+        if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
+        cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
+                                      zc->entropyScratchSpace, entropyScratchSpace_size, zc->hufCTable, &repeat, preferRepeat)
+                                : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
+                                      zc->entropyScratchSpace, entropyScratchSpace_size, zc->hufCTable, &repeat, preferRepeat);
+        if (repeat != HUF_repeat_none) { hType = set_repeat; }    /* reused the existing table */
+        else { zc->hufCTable_repeatMode = HUF_repeat_check; }       /* now have a table to reuse */
     }
 
-    if ((cLitSize==0) | (cLitSize >= srcSize - minGain))
+    if ((cLitSize==0) | (cLitSize >= srcSize - minGain)) {
+        zc->hufCTable_repeatMode = HUF_repeat_none;
         return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
-    if (cLitSize==1)
+    }
+    if (cLitSize==1) {
+        zc->hufCTable_repeatMode = HUF_repeat_none;
         return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
+    }
 
     /* Build header */
     switch(lhSize)
@@ -487,13 +1008,14 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
             MEM_writeLE32(ostart, lhc);
             break;
         }
-    default:   /* should not be necessary, lhSize is only {3,4,5} */
     case 5: /* 2 - 2 - 18 - 18 */
         {   U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
             MEM_writeLE32(ostart, lhc);
             ostart[4] = (BYTE)(cLitSize >> 10);
             break;
         }
+    default:   /* not possible : lhSize is {3,4,5} */
+        assert(0);
     }
     return lhSize+cLitSize;
 }
@@ -540,11 +1062,11 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
         mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
 }
 
-
-size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
+MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
                               void* dst, size_t dstCapacity,
                               size_t srcSize)
 {
+    const int longOffsets = zc->appliedParams.cParams.windowLog > STREAM_ACCUMULATOR_MIN;
     const seqStore_t* seqStorePtr = &(zc->seqStore);
     U32 count[MaxSeq+1];
     S16 norm[MaxSeq+1];
@@ -561,6 +1083,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
     BYTE* op = ostart;
     size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
     BYTE* seqHead;
+    BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)];
 
     /* Compress literals */
     {   const BYTE* const literals = seqStorePtr->litStart;
@@ -588,15 +1111,15 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
 
     /* CTable for Literal Lengths */
     {   U32 max = MaxLL;
-        size_t const mostFrequent = FSE_countFast(count, &max, llCodeTable, nbSeq);
+        size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, zc->entropyScratchSpace);
         if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
             *op++ = llCodeTable[0];
             FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
             LLtype = set_rle;
-        } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
+        } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
             LLtype = set_repeat;
         } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) {
-            FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog);
+            FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
             LLtype = set_basic;
         } else {
             size_t nbSeq_1 = nbSeq;
@@ -604,23 +1127,23 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
             if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
             FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
             { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
-              if (FSE_isError(NCountSize)) return ERROR(GENERIC);
+              if (FSE_isError(NCountSize)) return NCountSize;
               op += NCountSize; }
-            FSE_buildCTable(CTable_LitLength, norm, max, tableLog);
+            FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
             LLtype = set_compressed;
     }   }
 
     /* CTable for Offsets */
     {   U32 max = MaxOff;
-        size_t const mostFrequent = FSE_countFast(count, &max, ofCodeTable, nbSeq);
+        size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, zc->entropyScratchSpace);
         if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
             *op++ = ofCodeTable[0];
             FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
             Offtype = set_rle;
-        } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
+        } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
             Offtype = set_repeat;
         } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) {
-            FSE_buildCTable(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog);
+            FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
             Offtype = set_basic;
         } else {
             size_t nbSeq_1 = nbSeq;
@@ -628,23 +1151,23 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
             if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; }
             FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
             { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
-              if (FSE_isError(NCountSize)) return ERROR(GENERIC);
+              if (FSE_isError(NCountSize)) return NCountSize;
               op += NCountSize; }
-            FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog);
+            FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
             Offtype = set_compressed;
     }   }
 
     /* CTable for MatchLengths */
     {   U32 max = MaxML;
-        size_t const mostFrequent = FSE_countFast(count, &max, mlCodeTable, nbSeq);
+        size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, zc->entropyScratchSpace);
         if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
             *op++ = *mlCodeTable;
             FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
             MLtype = set_rle;
-        } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
+        } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
             MLtype = set_repeat;
         } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) {
-            FSE_buildCTable(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog);
+            FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
             MLtype = set_basic;
         } else {
             size_t nbSeq_1 = nbSeq;
@@ -652,14 +1175,14 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
             if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; }
             FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
             { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
-              if (FSE_isError(NCountSize)) return ERROR(GENERIC);
+              if (FSE_isError(NCountSize)) return NCountSize;
               op += NCountSize; }
-            FSE_buildCTable(CTable_MatchLength, norm, max, tableLog);
+            FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
             MLtype = set_compressed;
     }   }
 
     *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
-    zc->flagStaticTables = 0;
+    zc->fseCTables_ready = 0;
 
     /* Encoding Sequences */
     {   BIT_CStream_t blockStream;
@@ -677,7 +1200,18 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
         if (MEM_32bits()) BIT_flushBits(&blockStream);
         BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
         if (MEM_32bits()) BIT_flushBits(&blockStream);
-        BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
+        if (longOffsets) {
+            U32 const ofBits = ofCodeTable[nbSeq-1];
+            int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
+            if (extraBits) {
+                BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
+                BIT_flushBits(&blockStream);
+            }
+            BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
+                        ofBits - extraBits);
+        } else {
+            BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
+        }
         BIT_flushBits(&blockStream);
 
         {   size_t n;
@@ -699,7 +1233,17 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
                 if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
                 BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
                 if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
-                BIT_addBits(&blockStream, sequences[n].offset, ofBits);         /* 31 */
+                if (longOffsets) {
+                    int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
+                    if (extraBits) {
+                        BIT_addBits(&blockStream, sequences[n].offset, extraBits);
+                        BIT_flushBits(&blockStream);                            /* (7)*/
+                    }
+                    BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
+                                ofBits - extraBits);                            /* 31 */
+                } else {
+                    BIT_addBits(&blockStream, sequences[n].offset, ofBits);     /* 31 */
+                }
                 BIT_flushBits(&blockStream);                                    /* (7)*/
         }   }
 
@@ -714,12 +1258,15 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
 
     /* check compressibility */
 _check_compressibility:
-    { size_t const minGain = ZSTD_minGain(srcSize);
-      size_t const maxCSize = srcSize - minGain;
-      if ((size_t)(op-ostart) >= maxCSize) return 0; }
+    {   size_t const minGain = ZSTD_minGain(srcSize);
+        size_t const maxCSize = srcSize - minGain;
+        if ((size_t)(op-ostart) >= maxCSize) {
+            zc->hufCTable_repeatMode = HUF_repeat_none;
+            return 0;
+    }   }
 
     /* confirm repcodes */
-    { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->savedRep[i]; }
+    { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->repToConfirm[i]; }
 
     return op - ostart;
 }
@@ -732,27 +1279,34 @@ _check_compressibility:
 */
 MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode)
 {
-#if 0  /* for debug */
+#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 6)
     static const BYTE* g_start = NULL;
-    const U32 pos = (U32)(literals - g_start);
-    if (g_start==NULL) g_start = literals;
-    //if ((pos > 1) && (pos < 50000))
-        printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n",
+    U32 const pos = (U32)((const BYTE*)literals - g_start);
+    if (g_start==NULL) g_start = (const BYTE*)literals;
+    if ((pos > 0) && (pos < 1000000000))
+        DEBUGLOG(6, "Cpos %6u :%5u literals & match %3u bytes at distance %6u",
                pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode);
 #endif
     /* copy Literals */
+    assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + 128 KB);
     ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
     seqStorePtr->lit += litLength;
 
     /* literal Length */
-    if (litLength>0xFFFF) { seqStorePtr->longLengthID = 1; seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); }
+    if (litLength>0xFFFF) {
+        seqStorePtr->longLengthID = 1;
+        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    }
     seqStorePtr->sequences[0].litLength = (U16)litLength;
 
     /* match offset */
     seqStorePtr->sequences[0].offset = offsetCode + 1;
 
     /* match Length */
-    if (matchCode>0xFFFF) { seqStorePtr->longLengthID = 2; seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); }
+    if (matchCode>0xFFFF) {
+        seqStorePtr->longLengthID = 2;
+        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    }
     seqStorePtr->sequences[0].matchLength = (U16)matchCode;
 
     seqStorePtr->sequences++;
@@ -773,7 +1327,14 @@ static unsigned ZSTD_NbCommonBytes (register size_t val)
 #       elif defined(__GNUC__) && (__GNUC__ >= 3)
             return (__builtin_ctzll((U64)val) >> 3);
 #       else
-            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
+            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
+                                                     0, 3, 1, 3, 1, 4, 2, 7,
+                                                     0, 2, 3, 6, 1, 5, 3, 5,
+                                                     1, 3, 4, 4, 2, 5, 6, 7,
+                                                     7, 0, 1, 2, 3, 3, 4, 6,
+                                                     2, 6, 5, 5, 3, 4, 5, 6,
+                                                     7, 1, 2, 4, 6, 4, 4, 5,
+                                                     7, 2, 6, 5, 7, 6, 7, 7 };
             return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
 #       endif
         } else { /* 32 bits */
@@ -784,7 +1345,10 @@ static unsigned ZSTD_NbCommonBytes (register size_t val)
 #       elif defined(__GNUC__) && (__GNUC__ >= 3)
             return (__builtin_ctz((U32)val) >> 3);
 #       else
-            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
+            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
+                                                     3, 2, 2, 1, 3, 2, 0, 1,
+                                                     3, 3, 1, 2, 2, 2, 2, 0,
+                                                     3, 1, 2, 0, 1, 0, 1, 1 };
             return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
 #       endif
         }
@@ -856,7 +1420,7 @@ static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE
 ***************************************/
 static const U32 prime3bytes = 506832829U;
 static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
-MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); }   /* only in zstd_opt.h */
+MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
 
 static const U32 prime4bytes = 2654435761U;
 static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
@@ -898,7 +1462,7 @@ static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
 static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls)
 {
     U32* const hashTable = zc->hashTable;
-    U32  const hBits = zc->params.cParams.hashLog;
+    U32  const hBits = zc->appliedParams.cParams.hashLog;
     const BYTE* const base = zc->base;
     const BYTE* ip = base + zc->nextToUpdate;
     const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
@@ -917,7 +1481,7 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
                                const U32 mls)
 {
     U32* const hashTable = cctx->hashTable;
-    U32  const hBits = cctx->params.cParams.hashLog;
+    U32  const hBits = cctx->appliedParams.cParams.hashLog;
     seqStore_t* seqStorePtr = &(cctx->seqStore);
     const BYTE* const base = cctx->base;
     const BYTE* const istart = (const BYTE*)src;
@@ -988,8 +1552,8 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
     }   }   }
 
     /* save reps for next block */
-    cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved;
-    cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved;
+    cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
+    cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -1002,10 +1566,10 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
 static void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
                        const void* src, size_t srcSize)
 {
-    const U32 mls = ctx->params.cParams.searchLength;
+    const U32 mls = ctx->appliedParams.cParams.searchLength;
     switch(mls)
     {
-    default:
+    default: /* includes case 3 */
     case 4 :
         ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return;
     case 5 :
@@ -1023,7 +1587,7 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
                                  const U32 mls)
 {
     U32* hashTable = ctx->hashTable;
-    const U32 hBits = ctx->params.cParams.hashLog;
+    const U32 hBits = ctx->appliedParams.cParams.hashLog;
     seqStore_t* seqStorePtr = &(ctx->seqStore);
     const BYTE* const base = ctx->base;
     const BYTE* const dictBase = ctx->dictBase;
@@ -1055,7 +1619,7 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
         if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
            && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
             const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
-            mLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32;
+            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
             ip++;
             ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
         } else {
@@ -1067,7 +1631,7 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
             {   const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
                 const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
                 U32 offset;
-                mLength = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iend, matchEnd, lowPrefixPtr) + EQUAL_READ32;
+                mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4;
                 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
                 offset = current - matchIndex;
                 offset_2 = offset_1;
@@ -1091,7 +1655,7 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
                 if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex))  /* intentional overflow */
                    && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
                     const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
-                    size_t repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32;
+                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4;
                     U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
                     ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
                     hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2;
@@ -1103,7 +1667,7 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
     }   }   }
 
     /* save reps for next block */
-    ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
+    ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -1116,10 +1680,10 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
 static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
                          const void* src, size_t srcSize)
 {
-    U32 const mls = ctx->params.cParams.searchLength;
+    U32 const mls = ctx->appliedParams.cParams.searchLength;
     switch(mls)
     {
-    default:
+    default: /* includes case 3 */
     case 4 :
         ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return;
     case 5 :
@@ -1138,9 +1702,9 @@ static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
 static void ZSTD_fillDoubleHashTable (ZSTD_CCtx* cctx, const void* end, const U32 mls)
 {
     U32* const hashLarge = cctx->hashTable;
-    U32  const hBitsL = cctx->params.cParams.hashLog;
+    U32  const hBitsL = cctx->appliedParams.cParams.hashLog;
     U32* const hashSmall = cctx->chainTable;
-    U32  const hBitsS = cctx->params.cParams.chainLog;
+    U32  const hBitsS = cctx->appliedParams.cParams.chainLog;
     const BYTE* const base = cctx->base;
     const BYTE* ip = base + cctx->nextToUpdate;
     const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
@@ -1160,9 +1724,9 @@ void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
                                  const U32 mls)
 {
     U32* const hashLong = cctx->hashTable;
-    const U32 hBitsL = cctx->params.cParams.hashLog;
+    const U32 hBitsL = cctx->appliedParams.cParams.hashLog;
     U32* const hashSmall = cctx->chainTable;
-    const U32 hBitsS = cctx->params.cParams.chainLog;
+    const U32 hBitsS = cctx->appliedParams.cParams.chainLog;
     seqStore_t* seqStorePtr = &(cctx->seqStore);
     const BYTE* const base = cctx->base;
     const BYTE* const istart = (const BYTE*)src;
@@ -1194,7 +1758,9 @@ void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
         const BYTE* match = base + matchIndexS;
         hashLong[h2] = hashSmall[h] = current;   /* update hash tables */
 
-        if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { /* note : by construction, offset_1 <= current */
+        assert(offset_1 <= current);   /* supposed guaranteed by construction */
+        if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
+            /* favor repcode */
             mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
             ip++;
             ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
@@ -1205,15 +1771,15 @@ void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
                 offset = (U32)(ip-matchLong);
                 while (((ip>anchor) & (matchLong>lowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
             } else if ( (matchIndexS > lowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) {
-                size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
-                U32 const matchIndex3 = hashLong[h3];
-                const BYTE* match3 = base + matchIndex3;
-                hashLong[h3] = current + 1;
-                if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
-                    mLength = ZSTD_count(ip+9, match3+8, iend) + 8;
+                size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
+                U32 const matchIndexL3 = hashLong[hl3];
+                const BYTE* matchL3 = base + matchIndexL3;
+                hashLong[hl3] = current + 1;
+                if ( (matchIndexL3 > lowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1)) ) {
+                    mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
                     ip++;
-                    offset = (U32)(ip-match3);
-                    while (((ip>anchor) & (match3>lowest)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
+                    offset = (U32)(ip-matchL3);
+                    while (((ip>anchor) & (matchL3>lowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
                 } else {
                     mLength = ZSTD_count(ip+4, match+4, iend) + 4;
                     offset = (U32)(ip-match);
@@ -1257,8 +1823,8 @@ void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
     }   }   }
 
     /* save reps for next block */
-    cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved;
-    cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved;
+    cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
+    cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -1270,10 +1836,10 @@ void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
 
 static void ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
-    const U32 mls = ctx->params.cParams.searchLength;
+    const U32 mls = ctx->appliedParams.cParams.searchLength;
     switch(mls)
     {
-    default:
+    default: /* includes case 3 */
     case 4 :
         ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); return;
     case 5 :
@@ -1291,9 +1857,9 @@ static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
                                  const U32 mls)
 {
     U32* const hashLong = ctx->hashTable;
-    U32  const hBitsL = ctx->params.cParams.hashLog;
+    U32  const hBitsL = ctx->appliedParams.cParams.hashLog;
     U32* const hashSmall = ctx->chainTable;
-    U32  const hBitsS = ctx->params.cParams.chainLog;
+    U32  const hBitsS = ctx->appliedParams.cParams.chainLog;
     seqStore_t* seqStorePtr = &(ctx->seqStore);
     const BYTE* const base = ctx->base;
     const BYTE* const dictBase = ctx->dictBase;
@@ -1382,8 +1948,8 @@ static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
 
         if (ip <= ilimit) {
             /* Fill Table */
-			hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
-			hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
+            hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
+            hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
             hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
             hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
             /* check immediate repcode */
@@ -1394,7 +1960,7 @@ static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
                 if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex))  /* intentional overflow */
                    && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
                     const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
-                    size_t const repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32;
+                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4;
                     U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
                     ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
                     hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
@@ -1407,7 +1973,7 @@ static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
     }   }   }
 
     /* save reps for next block */
-    ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
+    ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -1420,10 +1986,10 @@ static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
 static void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx,
                          const void* src, size_t srcSize)
 {
-    U32 const mls = ctx->params.cParams.searchLength;
+    U32 const mls = ctx->appliedParams.cParams.searchLength;
     switch(mls)
     {
-    default:
+    default: /* includes case 3 */
     case 4 :
         ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); return;
     case 5 :
@@ -1446,10 +2012,10 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
                           U32 extDict)
 {
     U32*   const hashTable = zc->hashTable;
-    U32    const hashLog = zc->params.cParams.hashLog;
+    U32    const hashLog = zc->appliedParams.cParams.hashLog;
     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
     U32*   const bt = zc->chainTable;
-    U32    const btLog  = zc->params.cParams.chainLog - 1;
+    U32    const btLog  = zc->appliedParams.cParams.chainLog - 1;
     U32    const btMask = (1 << btLog) - 1;
     U32 matchIndex = hashTable[h];
     size_t commonLengthSmaller=0, commonLengthLarger=0;
@@ -1458,7 +2024,7 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
     const U32 dictLimit = zc->dictLimit;
     const BYTE* const dictEnd = dictBase + dictLimit;
     const BYTE* const prefixStart = base + dictLimit;
-    const BYTE* match = base + matchIndex;
+    const BYTE* match;
     const U32 current = (U32)(ip-base);
     const U32 btLow = btMask >= current ? 0 : current - btMask;
     U32* smallerPtr = bt + 2*(current&btMask);
@@ -1477,8 +2043,9 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
     hashTable[h] = current;   /* Update Hash Table */
 
     while (nbCompares-- && (matchIndex > windowLow)) {
-        U32* nextPtr = bt + 2*(matchIndex & btMask);
+        U32* const nextPtr = bt + 2*(matchIndex & btMask);
         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+
 #ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
         const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
         if (matchIndex == predictedSmall) {
@@ -1507,7 +2074,7 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
             match = dictBase + matchIndex;
             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
             if (matchIndex+matchLength >= dictLimit)
-				match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
+                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
         }
 
         if (matchLength > bestLength) {
@@ -1550,10 +2117,10 @@ static size_t ZSTD_insertBtAndFindBestMatch (
                         U32 extDict)
 {
     U32*   const hashTable = zc->hashTable;
-    U32    const hashLog = zc->params.cParams.hashLog;
+    U32    const hashLog = zc->appliedParams.cParams.hashLog;
     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
     U32*   const bt = zc->chainTable;
-    U32    const btLog  = zc->params.cParams.chainLog - 1;
+    U32    const btLog  = zc->appliedParams.cParams.chainLog - 1;
     U32    const btMask = (1 << btLog) - 1;
     U32 matchIndex  = hashTable[h];
     size_t commonLengthSmaller=0, commonLengthLarger=0;
@@ -1574,7 +2141,7 @@ static size_t ZSTD_insertBtAndFindBestMatch (
     hashTable[h] = current;   /* Update Hash Table */
 
     while (nbCompares-- && (matchIndex > windowLow)) {
-        U32* nextPtr = bt + 2*(matchIndex & btMask);
+        U32* const nextPtr = bt + 2*(matchIndex & btMask);
         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
         const BYTE* match;
 
@@ -1586,7 +2153,7 @@ static size_t ZSTD_insertBtAndFindBestMatch (
             match = dictBase + matchIndex;
             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
             if (matchIndex+matchLength >= dictLimit)
-				match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
+                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
         }
 
         if (matchLength > bestLength) {
@@ -1652,9 +2219,10 @@ static size_t ZSTD_BtFindBestMatch_selectMLS (
 {
     switch(matchLengthSearch)
     {
-    default :
+    default : /* includes case 3 */
     case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
     case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
+    case 7 :
     case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
     }
 }
@@ -1691,9 +2259,10 @@ static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
 {
     switch(matchLengthSearch)
     {
-    default :
+    default : /* includes case 3 */
     case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
     case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
+    case 7 :
     case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
     }
 }
@@ -1706,14 +2275,14 @@ static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
 #define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & mask]
 
 /* Update chains up to ip (excluded)
-   Assumption : always within prefix (ie. not within extDict) */
+   Assumption : always within prefix (i.e. not within extDict) */
 FORCE_INLINE
 U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
 {
     U32* const hashTable  = zc->hashTable;
-    const U32 hashLog = zc->params.cParams.hashLog;
+    const U32 hashLog = zc->appliedParams.cParams.hashLog;
     U32* const chainTable = zc->chainTable;
-    const U32 chainMask = (1 << zc->params.cParams.chainLog) - 1;
+    const U32 chainMask = (1 << zc->appliedParams.cParams.chainLog) - 1;
     const BYTE* const base = zc->base;
     const U32 target = (U32)(ip - base);
     U32 idx = zc->nextToUpdate;
@@ -1730,8 +2299,8 @@ U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
 }
 
 
-
-FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
+/* inlining is important to hardwire a hot branch (template emulation) */
+FORCE_INLINE
 size_t ZSTD_HcFindBestMatch_generic (
                         ZSTD_CCtx* zc,   /* Index table will be updated */
                         const BYTE* const ip, const BYTE* const iLimit,
@@ -1739,7 +2308,7 @@ size_t ZSTD_HcFindBestMatch_generic (
                         const U32 maxNbAttempts, const U32 mls, const U32 extDict)
 {
     U32* const chainTable = zc->chainTable;
-    const U32 chainSize = (1 << zc->params.cParams.chainLog);
+    const U32 chainSize = (1 << zc->appliedParams.cParams.chainLog);
     const U32 chainMask = chainSize-1;
     const BYTE* const base = zc->base;
     const BYTE* const dictBase = zc->dictBase;
@@ -1750,7 +2319,7 @@ size_t ZSTD_HcFindBestMatch_generic (
     const U32 current = (U32)(ip-base);
     const U32 minChain = current > chainSize ? current - chainSize : 0;
     int nbAttempts=maxNbAttempts;
-    size_t ml=EQUAL_READ32-1;
+    size_t ml=4-1;
 
     /* HC4 match finder */
     U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls);
@@ -1765,11 +2334,15 @@ size_t ZSTD_HcFindBestMatch_generic (
         } else {
             match = dictBase + matchIndex;
             if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
-                currentMl = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iLimit, dictEnd, prefixStart) + EQUAL_READ32;
+                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
         }
 
         /* save best solution */
-        if (currentMl > ml) { ml = currentMl; *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; if (ip+currentMl == iLimit) break; /* best possible, and avoid read overflow*/ }
+        if (currentMl > ml) {
+            ml = currentMl;
+            *offsetPtr = current - matchIndex + ZSTD_REP_MOVE;
+            if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+        }
 
         if (matchIndex <= minChain) break;
         matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
@@ -1787,9 +2360,10 @@ FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS (
 {
     switch(matchLengthSearch)
     {
-    default :
+    default : /* includes case 3 */
     case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0);
     case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0);
+    case 7 :
     case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0);
     }
 }
@@ -1803,9 +2377,10 @@ FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
 {
     switch(matchLengthSearch)
     {
-    default :
+    default : /* includes case 3 */
     case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1);
     case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1);
+    case 7 :
     case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1);
     }
 }
@@ -1827,8 +2402,8 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
     const BYTE* const ilimit = iend - 8;
     const BYTE* const base = ctx->base + ctx->dictLimit;
 
-    U32 const maxSearches = 1 << ctx->params.cParams.searchLog;
-    U32 const mls = ctx->params.cParams.searchLength;
+    U32 const maxSearches = 1 << ctx->appliedParams.cParams.searchLog;
+    U32 const mls = ctx->appliedParams.cParams.searchLength;
 
     typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
                         size_t* offsetPtr,
@@ -1853,7 +2428,7 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
         /* check repCode */
         if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) {
             /* repcode : we take it */
-            matchLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
+            matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
             if (depth==0) goto _storeSequence;
         }
 
@@ -1864,7 +2439,7 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
                 matchLength = ml2, start = ip, offset=offsetFound;
         }
 
-        if (matchLength < EQUAL_READ32) {
+        if (matchLength < 4) {
             ip += ((ip-anchor) >> g_searchStrength) + 1;   /* jump faster over incompressible sections */
             continue;
         }
@@ -1874,17 +2449,17 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
         while (ip<ilimit) {
             ip ++;
             if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
-                size_t const mlRep = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
+                size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
                 int const gain2 = (int)(mlRep * 3);
                 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
-                if ((mlRep >= EQUAL_READ32) && (gain2 > gain1))
+                if ((mlRep >= 4) && (gain2 > gain1))
                     matchLength = mlRep, offset = 0, start = ip;
             }
             {   size_t offset2=99999999;
                 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
                 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
                 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
-                if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
+                if ((ml2 >= 4) && (gain2 > gain1)) {
                     matchLength = ml2, offset = offset2, start = ip;
                     continue;   /* search a better one */
             }   }
@@ -1893,30 +2468,36 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
             if ((depth==2) && (ip<ilimit)) {
                 ip ++;
                 if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
-                    size_t const ml2 = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
+                    size_t const ml2 = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
                     int const gain2 = (int)(ml2 * 4);
                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
-                    if ((ml2 >= EQUAL_READ32) && (gain2 > gain1))
+                    if ((ml2 >= 4) && (gain2 > gain1))
                         matchLength = ml2, offset = 0, start = ip;
                 }
                 {   size_t offset2=99999999;
                     size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
                     int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
-                    if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
+                    if ((ml2 >= 4) && (gain2 > gain1)) {
                         matchLength = ml2, offset = offset2, start = ip;
                         continue;
             }   }   }
             break;  /* nothing found : store previous solution */
         }
 
+        /* NOTE:
+         * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
+         * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
+         * overflows the pointer, which is undefined behavior.
+         */
         /* catch up */
         if (offset) {
-            while ((start>anchor) && (start>base+offset-ZSTD_REP_MOVE) && (start[-1] == start[-1-offset+ZSTD_REP_MOVE]))   /* only search for offset within prefix */
+            while ( (start > anchor)
+                 && (start > base+offset-ZSTD_REP_MOVE)
+                 && (start[-1] == (start-offset+ZSTD_REP_MOVE)[-1]) )  /* only search for offset within prefix */
                 { start--; matchLength++; }
             offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
         }
-
         /* store sequence */
 _storeSequence:
         {   size_t const litLength = start - anchor;
@@ -1929,7 +2510,7 @@ _storeSequence:
              && ((offset_2>0)
              & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
             /* store sequence */
-            matchLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32;
+            matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
             offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
             ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
             ip += matchLength;
@@ -1938,8 +2519,8 @@ _storeSequence:
     }   }
 
     /* Save reps for next block */
-    ctx->savedRep[0] = offset_1 ? offset_1 : savedOffset;
-    ctx->savedRep[1] = offset_2 ? offset_2 : savedOffset;
+    ctx->repToConfirm[0] = offset_1 ? offset_1 : savedOffset;
+    ctx->repToConfirm[1] = offset_2 ? offset_2 : savedOffset;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -1989,8 +2570,8 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
     const BYTE* const dictEnd  = dictBase + dictLimit;
     const BYTE* const dictStart  = dictBase + ctx->lowLimit;
 
-    const U32 maxSearches = 1 << ctx->params.cParams.searchLog;
-    const U32 mls = ctx->params.cParams.searchLength;
+    const U32 maxSearches = 1 << ctx->appliedParams.cParams.searchLog;
+    const U32 mls = ctx->appliedParams.cParams.searchLength;
 
     typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
                         size_t* offsetPtr,
@@ -2018,7 +2599,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
             if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
                 /* repcode detected we should take it */
                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                matchLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
+                matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
                 if (depth==0) goto _storeSequence;
         }   }
 
@@ -2029,7 +2610,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
                 matchLength = ml2, start = ip, offset=offsetFound;
         }
 
-         if (matchLength < EQUAL_READ32) {
+         if (matchLength < 4) {
             ip += ((ip-anchor) >> g_searchStrength) + 1;   /* jump faster over incompressible sections */
             continue;
         }
@@ -2048,10 +2629,10 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
                 if (MEM_read32(ip) == MEM_read32(repMatch)) {
                     /* repcode detected */
                     const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                    size_t const repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
+                    size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
                     int const gain2 = (int)(repLength * 3);
                     int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
-                    if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
+                    if ((repLength >= 4) && (gain2 > gain1))
                         matchLength = repLength, offset = 0, start = ip;
             }   }
 
@@ -2060,7 +2641,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
                 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
                 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
                 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
-                if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
+                if ((ml2 >= 4) && (gain2 > gain1)) {
                     matchLength = ml2, offset = offset2, start = ip;
                     continue;   /* search a better one */
             }   }
@@ -2078,10 +2659,10 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
                     if (MEM_read32(ip) == MEM_read32(repMatch)) {
                         /* repcode detected */
                         const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                        size_t repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
-                        int gain2 = (int)(repLength * 4);
-                        int gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
-                        if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
+                        size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+                        int const gain2 = (int)(repLength * 4);
+                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
+                        if ((repLength >= 4) && (gain2 > gain1))
                             matchLength = repLength, offset = 0, start = ip;
                 }   }
 
@@ -2090,7 +2671,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
                     size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
                     int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
-                    if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
+                    if ((ml2 >= 4) && (gain2 > gain1)) {
                         matchLength = ml2, offset = offset2, start = ip;
                         continue;
             }   }   }
@@ -2122,7 +2703,7 @@ _storeSequence:
             if (MEM_read32(ip) == MEM_read32(repMatch)) {
                 /* repcode detected we should take it */
                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                matchLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
+                matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
                 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset history */
                 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
                 ip += matchLength;
@@ -2133,7 +2714,7 @@ _storeSequence:
     }   }
 
     /* Save reps for next block */
-    ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
+    ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -2170,7 +2751,17 @@ static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src,
 static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
 #ifdef ZSTD_OPT_H_91842398743
-    ZSTD_compressBlock_opt_generic(ctx, src, srcSize);
+    ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0);
+#else
+    (void)ctx; (void)src; (void)srcSize;
+    return;
+#endif
+}
+
+static void ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+{
+#ifdef ZSTD_OPT_H_91842398743
+    ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1);
 #else
     (void)ctx; (void)src; (void)srcSize;
     return;
@@ -2180,7 +2771,17 @@ static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t src
 static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
 #ifdef ZSTD_OPT_H_91842398743
-    ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize);
+    ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0);
+#else
+    (void)ctx; (void)src; (void)srcSize;
+    return;
+#endif
+}
+
+static void ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+{
+#ifdef ZSTD_OPT_H_91842398743
+    ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1);
 #else
     (void)ctx; (void)src; (void)srcSize;
     return;
@@ -2188,42 +2789,52 @@ static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, si
 }
 
 
+/* ZSTD_selectBlockCompressor() :
+ * assumption : strat is a valid strategy */
 typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize);
-
 static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
 {
-    static const ZSTD_blockCompressor blockCompressor[2][7] = {
-        { ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt },
-        { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict }
+    static const ZSTD_blockCompressor blockCompressor[2][(unsigned)ZSTD_btultra+1] = {
+        { ZSTD_compressBlock_fast  /* default for 0 */,
+          ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy,
+          ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2,
+          ZSTD_compressBlock_btopt, ZSTD_compressBlock_btultra },
+        { ZSTD_compressBlock_fast_extDict  /* default for 0 */,
+          ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict,
+          ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict,
+          ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btultra_extDict }
     };
+    ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
+    assert((U32)strat >= (U32)ZSTD_fast);
+    assert((U32)strat <= (U32)ZSTD_btultra);
 
-    return blockCompressor[extDict][(U32)strat];
+    return blockCompressor[extDict!=0][(U32)strat];
 }
 
 
 static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
-    ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit);
+    ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, zc->lowLimit < zc->dictLimit);
     const BYTE* const base = zc->base;
     const BYTE* const istart = (const BYTE*)src;
     const U32 current = (U32)(istart-base);
     if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0;   /* don't even attempt compression below a certain srcSize */
     ZSTD_resetSeqStore(&(zc->seqStore));
     if (current > zc->nextToUpdate + 384)
-        zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384));   /* update tree not updated after finding very long rep matches */
+        zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384));   /* limited update after finding a very long match */
     blockCompressor(zc, src, srcSize);
     return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize);
 }
 
 
-/*! ZSTD_compress_generic() :
+/*! ZSTD_compress_frameChunk() :
 *   Compress a chunk of data into one or multiple blocks.
 *   All blocks will be terminated, all input will be consumed.
 *   Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
 *   Frame is supposed already started (header already produced)
 *   @return : compressed size, or an error code
 */
-static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
+static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
                                      void* dst, size_t dstCapacity,
                                const void* src, size_t srcSize,
                                      U32 lastFrameChunk)
@@ -2233,29 +2844,30 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
     const BYTE* ip = (const BYTE*)src;
     BYTE* const ostart = (BYTE*)dst;
     BYTE* op = ostart;
-    U32 const maxDist = 1 << cctx->params.cParams.windowLog;
+    U32 const maxDist = 1 << cctx->appliedParams.cParams.windowLog;
 
-    if (cctx->params.fParams.checksumFlag)
+    if (cctx->appliedParams.fParams.checksumFlag && srcSize)
         XXH64_update(&cctx->xxhState, src, srcSize);
 
     while (remaining) {
         U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
         size_t cSize;
 
-        if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall);   /* not enough space to store compressed block */
+        if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE)
+            return ERROR(dstSize_tooSmall);   /* not enough space to store compressed block */
         if (remaining < blockSize) blockSize = remaining;
 
         /* preemptive overflow correction */
-        if (cctx->lowLimit > (1<<30)) {
-            U32 const btplus = (cctx->params.cParams.strategy == ZSTD_btlazy2) | (cctx->params.cParams.strategy == ZSTD_btopt);
-            U32 const chainMask = (1 << (cctx->params.cParams.chainLog - btplus)) - 1;
-            U32 const supLog = MAX(cctx->params.cParams.chainLog, 17 /* blockSize */);
-            U32 const newLowLimit = (cctx->lowLimit & chainMask) + (1 << supLog);   /* preserve position % chainSize, ensure current-repcode doesn't underflow */
-            U32 const correction = cctx->lowLimit - newLowLimit;
+        if (cctx->lowLimit > (3U<<29)) {
+            U32 const cycleMask = (1 << ZSTD_cycleLog(cctx->appliedParams.cParams.hashLog, cctx->appliedParams.cParams.strategy)) - 1;
+            U32 const current = (U32)(ip - cctx->base);
+            U32 const newCurrent = (current & cycleMask) + (1 << cctx->appliedParams.cParams.windowLog);
+            U32 const correction = current - newCurrent;
+            ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_64 <= 30);
             ZSTD_reduceIndex(cctx, correction);
             cctx->base += correction;
             cctx->dictBase += correction;
-            cctx->lowLimit = newLowLimit;
+            cctx->lowLimit -= correction;
             cctx->dictLimit -= correction;
             if (cctx->nextToUpdate < correction) cctx->nextToUpdate = 0;
             else cctx->nextToUpdate -= correction;
@@ -2297,25 +2909,27 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
 static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
                                     ZSTD_parameters params, U64 pledgedSrcSize, U32 dictID)
 {   BYTE* const op = (BYTE*)dst;
-    U32   const dictIDSizeCode = (dictID>0) + (dictID>=256) + (dictID>=65536);   /* 0-3 */
+    U32   const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536);   /* 0-3 */
+    U32   const dictIDSizeCode = params.fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength;   /* 0-3 */
     U32   const checksumFlag = params.fParams.checksumFlag>0;
     U32   const windowSize = 1U << params.cParams.windowLog;
-    U32   const singleSegment = params.fParams.contentSizeFlag && (windowSize > (pledgedSrcSize-1));
+    U32   const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
     BYTE  const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
     U32   const fcsCode = params.fParams.contentSizeFlag ?
-                     (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) :   /* 0-3 */
-                      0;
+                     (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0;  /* 0-3 */
     BYTE  const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
     size_t pos;
 
     if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall);
+    DEBUGLOG(5, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
+                !params.fParams.noDictIDFlag, dictID,  dictIDSizeCode);
 
     MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
     op[4] = frameHeaderDecriptionByte; pos=5;
     if (!singleSegment) op[pos++] = windowLogByte;
     switch(dictIDSizeCode)
     {
-        default:   /* impossible */
+        default:  assert(0); /* impossible */
         case 0 : break;
         case 1 : op[pos] = (BYTE)(dictID); pos++; break;
         case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;
@@ -2323,7 +2937,7 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
     }
     switch(fcsCode)
     {
-        default:   /* impossible */
+        default:  assert(0); /* impossible */
         case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;
         case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;
         case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
@@ -2341,10 +2955,13 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
     const BYTE* const ip = (const BYTE*) src;
     size_t fhSize = 0;
 
+    DEBUGLOG(5, "ZSTD_compressContinue_internal");
+    DEBUGLOG(5, "stage: %u", cctx->stage);
     if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong);   /* missing init (ZSTD_compressBegin) */
 
     if (frame && (cctx->stage==ZSTDcs_init)) {
-        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, cctx->frameContentSize, cctx->dictID);
+        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams,
+                                cctx->pledgedSrcSizePlusOne-1, cctx->dictID);
         if (ZSTD_isError(fhSize)) return fhSize;
         dstCapacity -= fhSize;
         dst = (char*)dst + fhSize;
@@ -2372,12 +2989,15 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
 
     cctx->nextSrc = ip + srcSize;
 
-    {   size_t const cSize = frame ?
-                             ZSTD_compress_generic (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
+    if (srcSize) {
+        size_t const cSize = frame ?
+                             ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
                              ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
         if (ZSTD_isError(cSize)) return cSize;
+        cctx->consumedSrcSize += srcSize;
         return cSize + fhSize;
-    }
+    } else
+        return fhSize;
 }
 
 
@@ -2385,23 +3005,29 @@ size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
                               void* dst, size_t dstCapacity,
                         const void* src, size_t srcSize)
 {
-    return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 0);
+    return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
 }
 
 
-size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx)
+size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
 {
-    return MIN (ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << cctx->params.cParams.windowLog);
+    U32 const cLevel = cctx->compressionLevel;
+    ZSTD_compressionParameters cParams = (cLevel == ZSTD_CLEVEL_CUSTOM) ?
+                                        cctx->appliedParams.cParams :
+                                        ZSTD_getCParams(cLevel, 0, 0);
+    return MIN (ZSTD_BLOCKSIZE_MAX, 1 << cParams.windowLog);
 }
 
 size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
-    size_t const blockSizeMax = ZSTD_getBlockSizeMax(cctx);
+    size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
     if (srcSize > blockSizeMax) return ERROR(srcSize_wrong);
-    return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0, 0);
+    return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
 }
 
-
+/*! ZSTD_loadDictionaryContent() :
+ *  @return : 0, or an error code
+ */
 static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t srcSize)
 {
     const BYTE* const ip = (const BYTE*) src;
@@ -2413,125 +3039,197 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t
     zc->dictBase = zc->base;
     zc->base += ip - zc->nextSrc;
     zc->nextToUpdate = zc->dictLimit;
-    zc->loadedDictEnd = (U32)(iend - zc->base);
+    zc->loadedDictEnd = zc->forceWindow ? 0 : (U32)(iend - zc->base);
 
     zc->nextSrc = iend;
     if (srcSize <= HASH_READ_SIZE) return 0;
 
-    switch(zc->params.cParams.strategy)
+    switch(zc->appliedParams.cParams.strategy)
     {
     case ZSTD_fast:
-        ZSTD_fillHashTable (zc, iend, zc->params.cParams.searchLength);
+        ZSTD_fillHashTable (zc, iend, zc->appliedParams.cParams.searchLength);
         break;
 
     case ZSTD_dfast:
-        ZSTD_fillDoubleHashTable (zc, iend, zc->params.cParams.searchLength);
+        ZSTD_fillDoubleHashTable (zc, iend, zc->appliedParams.cParams.searchLength);
         break;
 
     case ZSTD_greedy:
     case ZSTD_lazy:
     case ZSTD_lazy2:
-        ZSTD_insertAndFindFirstIndex (zc, iend-HASH_READ_SIZE, zc->params.cParams.searchLength);
+        if (srcSize >= HASH_READ_SIZE)
+            ZSTD_insertAndFindFirstIndex(zc, iend-HASH_READ_SIZE, zc->appliedParams.cParams.searchLength);
         break;
 
     case ZSTD_btlazy2:
     case ZSTD_btopt:
-        ZSTD_updateTree(zc, iend-HASH_READ_SIZE, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength);
+    case ZSTD_btultra:
+        if (srcSize >= HASH_READ_SIZE)
+            ZSTD_updateTree(zc, iend-HASH_READ_SIZE, iend, 1 << zc->appliedParams.cParams.searchLog, zc->appliedParams.cParams.searchLength);
         break;
 
     default:
-        return ERROR(GENERIC);   /* strategy doesn't exist; impossible */
+        assert(0);  /* not possible : not a valid strategy id */
     }
 
-    zc->nextToUpdate = zc->loadedDictEnd;
+    zc->nextToUpdate = (U32)(iend - zc->base);
+    return 0;
+}
+
+
+/* Dictionaries that assign zero probability to symbols that show up causes problems
+   when FSE encoding.  Refuse dictionaries that assign zero probability to symbols
+   that we may encounter during compression.
+   NOTE: This behavior is not standard and could be improved in the future. */
+static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) {
+    U32 s;
+    if (dictMaxSymbolValue < maxSymbolValue) return ERROR(dictionary_corrupted);
+    for (s = 0; s <= maxSymbolValue; ++s) {
+        if (normalizedCounter[s] == 0) return ERROR(dictionary_corrupted);
+    }
     return 0;
 }
 
 
 /* Dictionary format :
-     Magic == ZSTD_DICT_MAGIC (4 bytes)
-     HUF_writeCTable(256)
-     FSE_writeNCount(off)
-     FSE_writeNCount(ml)
-     FSE_writeNCount(ll)
-     RepOffsets
-     Dictionary content
-*/
-/*! ZSTD_loadDictEntropyStats() :
-    @return : size read from dictionary
-    note : magic number supposed already checked */
-static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
+ * See :
+ * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
+ */
+/*! ZSTD_loadZstdDictionary() :
+ * @return : 0, or an error code
+ *  assumptions : magic number supposed already checked
+ *                dictSize supposed > 8
+ */
+static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
 {
     const BYTE* dictPtr = (const BYTE*)dict;
     const BYTE* const dictEnd = dictPtr + dictSize;
+    short offcodeNCount[MaxOff+1];
+    unsigned offcodeMaxValue = MaxOff;
+    BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)];
 
-    {   size_t const hufHeaderSize = HUF_readCTable(cctx->hufTable, 255, dict, dictSize);
+    dictPtr += 4;   /* skip magic number */
+    cctx->dictID = cctx->appliedParams.fParams.noDictIDFlag ? 0 :  MEM_readLE32(dictPtr);
+    dictPtr += 4;
+
+    {   size_t const hufHeaderSize = HUF_readCTable(cctx->hufCTable, 255, dictPtr, dictEnd-dictPtr);
         if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
         dictPtr += hufHeaderSize;
     }
 
-    {   short offcodeNCount[MaxOff+1];
-        unsigned offcodeMaxValue = MaxOff, offcodeLog = OffFSELog;
+    {   unsigned offcodeLog;
         size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
-        CHECK_E (FSE_buildCTable(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog), dictionary_corrupted);
+        if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
+        /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
+        CHECK_E( FSE_buildCTable_wksp(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, scratchBuffer, sizeof(scratchBuffer)),
+                 dictionary_corrupted);
         dictPtr += offcodeHeaderSize;
     }
 
     {   short matchlengthNCount[MaxML+1];
-        unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog;
+        unsigned matchlengthMaxValue = MaxML, matchlengthLog;
         size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
-        CHECK_E (FSE_buildCTable(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog), dictionary_corrupted);
+        if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
+        /* Every match length code must have non-zero probability */
+        CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
+        CHECK_E( FSE_buildCTable_wksp(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, scratchBuffer, sizeof(scratchBuffer)),
+                 dictionary_corrupted);
         dictPtr += matchlengthHeaderSize;
     }
 
     {   short litlengthNCount[MaxLL+1];
-        unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog;
+        unsigned litlengthMaxValue = MaxLL, litlengthLog;
         size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
-        CHECK_E(FSE_buildCTable(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog), dictionary_corrupted);
+        if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
+        /* Every literal length code must have non-zero probability */
+        CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
+        CHECK_E( FSE_buildCTable_wksp(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, scratchBuffer, sizeof(scratchBuffer)),
+                 dictionary_corrupted);
         dictPtr += litlengthHeaderSize;
     }
 
     if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
-    cctx->rep[0] = MEM_readLE32(dictPtr+0); if (cctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
-    cctx->rep[1] = MEM_readLE32(dictPtr+4); if (cctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
-    cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
+    cctx->rep[0] = MEM_readLE32(dictPtr+0);
+    cctx->rep[1] = MEM_readLE32(dictPtr+4);
+    cctx->rep[2] = MEM_readLE32(dictPtr+8);
     dictPtr += 12;
 
-    cctx->flagStaticTables = 1;
-    return dictPtr - (const BYTE*)dict;
+    {   size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
+        U32 offcodeMax = MaxOff;
+        if (dictContentSize <= ((U32)-1) - 128 KB) {
+            U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
+            offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */
+        }
+        /* All offset values <= dictContentSize + 128 KB must be representable */
+        CHECK_F (ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)));
+        /* All repCodes must be <= dictContentSize and != 0*/
+        {   U32 u;
+            for (u=0; u<3; u++) {
+                if (cctx->rep[u] == 0) return ERROR(dictionary_corrupted);
+                if (cctx->rep[u] > dictContentSize) return ERROR(dictionary_corrupted);
+        }   }
+
+        cctx->fseCTables_ready = 1;
+        cctx->hufCTable_repeatMode = HUF_repeat_valid;
+        return ZSTD_loadDictionaryContent(cctx, dictPtr, dictContentSize);
+    }
 }
 
 /** ZSTD_compress_insertDictionary() :
 *   @return : 0, or an error code */
-static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, size_t dictSize)
+static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* cctx,
+                                       const void* dict, size_t dictSize,
+                                             ZSTD_dictMode_e dictMode)
 {
+    DEBUGLOG(5, "ZSTD_compress_insertDictionary");
     if ((dict==NULL) || (dictSize<=8)) return 0;
 
-    /* default : dict is pure content */
-    if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return ZSTD_loadDictionaryContent(zc, dict, dictSize);
-    zc->dictID = zc->params.fParams.noDictIDFlag ? 0 :  MEM_readLE32((const char*)dict+4);
+    /* dict restricted modes */
+    if (dictMode==ZSTD_dm_rawContent)
+        return ZSTD_loadDictionaryContent(cctx, dict, dictSize);
 
-    /* known magic number : dict is parsed for entropy stats and content */
-    {   size_t const loadError = ZSTD_loadDictEntropyStats(zc, (const char*)dict+8 /* skip dictHeader */, dictSize-8);
-        size_t const eSize = loadError + 8;
-        if (ZSTD_isError(loadError)) return loadError;
-        return ZSTD_loadDictionaryContent(zc, (const char*)dict+eSize, dictSize-eSize);
+    if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
+        if (dictMode == ZSTD_dm_auto) {
+            DEBUGLOG(5, "raw content dictionary detected");
+            return ZSTD_loadDictionaryContent(cctx, dict, dictSize);
+        }
+        if (dictMode == ZSTD_dm_fullDict)
+            return ERROR(dictionary_wrong);
+        assert(0);   /* impossible */
     }
+
+    /* dict as full zstd dictionary */
+    return ZSTD_loadZstdDictionary(cctx, dict, dictSize);
 }
 
-
 /*! ZSTD_compressBegin_internal() :
-*   @return : 0, or an error code */
+ * @return : 0, or an error code */
 static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
                              const void* dict, size_t dictSize,
-                                   ZSTD_parameters params, U64 pledgedSrcSize)
+                             ZSTD_dictMode_e dictMode,
+                             const ZSTD_CDict* cdict,
+                                   ZSTD_parameters params, U64 pledgedSrcSize,
+                                   ZSTD_buffered_policy_e zbuff)
 {
-    ZSTD_compResetPolicy_e const crp = dictSize ? ZSTDcrp_fullReset : ZSTDcrp_continue;
-    CHECK_F(ZSTD_resetCCtx_advanced(cctx, params, pledgedSrcSize, crp));
-    return ZSTD_compress_insertDictionary(cctx, dict, dictSize);
+    DEBUGLOG(4, "ZSTD_compressBegin_internal");
+    DEBUGLOG(4, "dict ? %s", dict ? "dict" : (cdict ? "cdict" : "none"));
+    DEBUGLOG(4, "dictMode : %u", (U32)dictMode);
+    /* params are supposed to be fully validated at this point */
+    assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
+
+    if (cdict && cdict->dictContentSize>0) {
+        return ZSTD_copyCCtx_internal(cctx, cdict->refContext,
+                                      params.fParams, pledgedSrcSize,
+                                      zbuff);
+    }
+
+    CHECK_F( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+                                     ZSTDcrp_continue, zbuff) );
+    return ZSTD_compress_insertDictionary(cctx, dict, dictSize, dictMode);
 }
 
 
@@ -2543,20 +3241,22 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
 {
     /* compression parameters verification and optimization */
     CHECK_F(ZSTD_checkCParams(params.cParams));
-    return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, pledgedSrcSize);
+    return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dm_auto, NULL,
+                                    params, pledgedSrcSize, ZSTDb_not_buffered);
 }
 
 
 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
 {
     ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
-    return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, 0);
+    return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dm_auto, NULL,
+                                       params, 0, ZSTDb_not_buffered);
 }
 
 
-size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel)
+size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
 {
-    return ZSTD_compressBegin_usingDict(zc, NULL, 0, compressionLevel);
+    return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
 }
 
 
@@ -2569,11 +3269,12 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
     BYTE* op = ostart;
     size_t fhSize = 0;
 
+    DEBUGLOG(5, "ZSTD_writeEpilogue");
     if (cctx->stage == ZSTDcs_created) return ERROR(stage_wrong);  /* init missing */
 
     /* special case : empty frame */
     if (cctx->stage == ZSTDcs_init) {
-        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, 0, 0);
+        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, 0, 0);
         if (ZSTD_isError(fhSize)) return fhSize;
         dstCapacity -= fhSize;
         op += fhSize;
@@ -2589,7 +3290,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
         dstCapacity -= ZSTD_blockHeaderSize;
     }
 
-    if (cctx->params.fParams.checksumFlag) {
+    if (cctx->appliedParams.fParams.checksumFlag) {
         U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
         if (dstCapacity<4) return ERROR(dstSize_tooSmall);
         MEM_writeLE32(op, checksum);
@@ -2606,10 +3307,19 @@ size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
                    const void* src, size_t srcSize)
 {
     size_t endResult;
-    size_t const cSize = ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 1);
+    size_t const cSize = ZSTD_compressContinue_internal(cctx,
+                                dst, dstCapacity, src, srcSize,
+                                1 /* frame mode */, 1 /* last chunk */);
     if (ZSTD_isError(cSize)) return cSize;
     endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
     if (ZSTD_isError(endResult)) return endResult;
+    if (cctx->appliedParams.fParams.contentSizeFlag) {  /* control src size */
+        DEBUGLOG(5, "end of frame : controlling src size");
+        if (cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1) {
+            DEBUGLOG(5, "error : pledgedSrcSize = %u, while realSrcSize = %u",
+                (U32)cctx->pledgedSrcSizePlusOne-1, (U32)cctx->consumedSrcSize);
+            return ERROR(srcSize_wrong);
+    }   }
     return cSize + endResult;
 }
 
@@ -2620,7 +3330,8 @@ static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
                          const void* dict,size_t dictSize,
                                ZSTD_parameters params)
 {
-    CHECK_F(ZSTD_compressBegin_internal(cctx, dict, dictSize, params, srcSize));
+    CHECK_F( ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dm_auto, NULL,
+                                         params, srcSize, ZSTDb_not_buffered) );
     return ZSTD_compressEnd(cctx, dst,  dstCapacity, src, srcSize);
 }
 
@@ -2634,9 +3345,10 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
     return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
 }
 
-size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel)
+size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize,
+                               const void* dict, size_t dictSize, int compressionLevel)
 {
-    ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, dictSize);
+    ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, dict ? dictSize : 0);
     params.fParams.contentSizeFlag = 1;
     return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
 }
@@ -2651,65 +3363,122 @@ size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcS
     size_t result;
     ZSTD_CCtx ctxBody;
     memset(&ctxBody, 0, sizeof(ctxBody));
-    memcpy(&ctxBody.customMem, &defaultCustomMem, sizeof(ZSTD_customMem));
+    ctxBody.customMem = ZSTD_defaultCMem;
     result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel);
-    ZSTD_free(ctxBody.workSpace, defaultCustomMem);  /* can't free ctxBody itself, as it's on stack; free only heap content */
+    ZSTD_free(ctxBody.workSpace, ZSTD_defaultCMem);  /* can't free ctxBody itself, as it's on stack; free only heap content */
     return result;
 }
 
 
 /* =====  Dictionary API  ===== */
 
-struct ZSTD_CDict_s {
-    void* dictContent;
-    size_t dictContentSize;
-    ZSTD_CCtx* refContext;
-};  /* typedef'd tp ZSTD_CDict within "zstd.h" */
+/*! ZSTD_estimateCDictSize_advanced() :
+ *  Estimate amount of memory that will be needed to create a dictionary with following arguments */
+size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, unsigned byReference)
+{
+    DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (U32)sizeof(ZSTD_CDict));
+    DEBUGLOG(5, "CCtx estimate : %u", (U32)ZSTD_estimateCCtxSize_advanced(cParams));
+    return sizeof(ZSTD_CDict) + ZSTD_estimateCCtxSize_advanced(cParams)
+           + (byReference ? 0 : dictSize);
+}
+
+size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel)
+{
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
+    return ZSTD_estimateCDictSize_advanced(dictSize, cParams, 0);
+}
 
 size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
 {
     if (cdict==NULL) return 0;   /* support sizeof on NULL */
-    return ZSTD_sizeof_CCtx(cdict->refContext) + cdict->dictContentSize;
+    DEBUGLOG(5, "sizeof(*cdict) : %u", (U32)sizeof(*cdict));
+    DEBUGLOG(5, "ZSTD_sizeof_CCtx : %u", (U32)ZSTD_sizeof_CCtx(cdict->refContext));
+    return ZSTD_sizeof_CCtx(cdict->refContext) + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict);
 }
 
-ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, ZSTD_parameters params, ZSTD_customMem customMem)
+static ZSTD_parameters ZSTD_makeParams(ZSTD_compressionParameters cParams, ZSTD_frameParameters fParams)
 {
-    if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
-    if (!customMem.customAlloc || !customMem.customFree) return NULL;
+    ZSTD_parameters params;
+    params.cParams = cParams;
+    params.fParams = fParams;
+    return params;
+}
 
-    {   ZSTD_CDict* const cdict = (ZSTD_CDict*) ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
-        void* const dictContent = ZSTD_malloc(dictSize, customMem);
+static size_t ZSTD_initCDict_internal(
+                    ZSTD_CDict* cdict,
+              const void* dictBuffer, size_t dictSize,
+                    unsigned byReference, ZSTD_dictMode_e dictMode,
+                    ZSTD_compressionParameters cParams)
+{
+    DEBUGLOG(5, "ZSTD_initCDict_internal, mode %u", (U32)dictMode);
+    if ((byReference) || (!dictBuffer) || (!dictSize)) {
+        cdict->dictBuffer = NULL;
+        cdict->dictContent = dictBuffer;
+    } else {
+        void* const internalBuffer = ZSTD_malloc(dictSize, cdict->refContext->customMem);
+        cdict->dictBuffer = internalBuffer;
+        cdict->dictContent = internalBuffer;
+        if (!internalBuffer) return ERROR(memory_allocation);
+        memcpy(internalBuffer, dictBuffer, dictSize);
+    }
+    cdict->dictContentSize = dictSize;
+
+    {   ZSTD_frameParameters const fParams = { 0 /* contentSizeFlag */,
+                    0 /* checksumFlag */, 0 /* noDictIDFlag */ };  /* dummy */
+        ZSTD_parameters const params = ZSTD_makeParams(cParams, fParams);
+        CHECK_F( ZSTD_compressBegin_internal(cdict->refContext,
+                                        cdict->dictContent, dictSize, dictMode,
+                                        NULL,
+                                        params, ZSTD_CONTENTSIZE_UNKNOWN,
+                                        ZSTDb_not_buffered) );
+    }
+
+    return 0;
+}
+
+ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
+                                      unsigned byReference, ZSTD_dictMode_e dictMode,
+                                      ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
+{
+    DEBUGLOG(5, "ZSTD_createCDict_advanced, mode %u", (U32)dictMode);
+    if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
+
+    {   ZSTD_CDict* const cdict = (ZSTD_CDict*)ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
         ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(customMem);
 
-        if (!dictContent || !cdict || !cctx) {
-            ZSTD_free(dictContent, customMem);
+        if (!cdict || !cctx) {
             ZSTD_free(cdict, customMem);
-            ZSTD_free(cctx, customMem);
+            ZSTD_freeCCtx(cctx);
+            return NULL;
+        }
+        cdict->refContext = cctx;
+
+        if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
+                                        dictBuffer, dictSize,
+                                        byReference, dictMode,
+                                        cParams) )) {
+            ZSTD_freeCDict(cdict);
             return NULL;
         }
 
-        memcpy(dictContent, dict, dictSize);
-        {   size_t const errorCode = ZSTD_compressBegin_advanced(cctx, dictContent, dictSize, params, 0);
-            if (ZSTD_isError(errorCode)) {
-                ZSTD_free(dictContent, customMem);
-                ZSTD_free(cdict, customMem);
-                ZSTD_free(cctx, customMem);
-                return NULL;
-        }   }
-
-        cdict->dictContent = dictContent;
-        cdict->dictContentSize = dictSize;
-        cdict->refContext = cctx;
         return cdict;
     }
 }
 
 ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
 {
-    ZSTD_customMem const allocator = { NULL, NULL, NULL };
-    ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize);
-    params.fParams.contentSizeFlag = 1;
-    return ZSTD_createCDict_advanced(dict, dictSize, params, allocator);
+    ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
+    return ZSTD_createCDict_advanced(dict, dictSize,
+                                     0 /* byReference */, ZSTD_dm_auto,
+                                     cParams, ZSTD_defaultCMem);
+}
+
+ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
+{
+    ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
+    return ZSTD_createCDict_advanced(dict, dictSize,
+                                     1 /* byReference */, ZSTD_dm_auto,
+                                     cParams, ZSTD_defaultCMem);
 }
 
 size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
@@ -2717,36 +3486,112 @@ size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
     if (cdict==NULL) return 0;   /* support free on NULL */
     {   ZSTD_customMem const cMem = cdict->refContext->customMem;
         ZSTD_freeCCtx(cdict->refContext);
-        ZSTD_free(cdict->dictContent, cMem);
+        ZSTD_free(cdict->dictBuffer, cMem);
         ZSTD_free(cdict, cMem);
         return 0;
     }
 }
 
-size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, U64 pledgedSrcSize)
+/*! ZSTD_initStaticCDict_advanced() :
+ *  Generate a digested dictionary in provided memory area.
+ *  workspace: The memory area to emplace the dictionary into.
+ *             Provided pointer must 8-bytes aligned.
+ *             It must outlive dictionary usage.
+ *  workspaceSize: Use ZSTD_estimateCDictSize()
+ *                 to determine how large workspace must be.
+ *  cParams : use ZSTD_getCParams() to transform a compression level
+ *            into its relevants cParams.
+ * @return : pointer to ZSTD_CDict*, or NULL if error (size too small)
+ *  Note : there is no corresponding "free" function.
+ *         Since workspace was allocated externally, it must be freed externally.
+ */
+ZSTD_CDict* ZSTD_initStaticCDict(void* workspace, size_t workspaceSize,
+                           const void* dict, size_t dictSize,
+                                 unsigned byReference, ZSTD_dictMode_e dictMode,
+                                 ZSTD_compressionParameters cParams)
 {
-    if (cdict->dictContentSize) CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize))
-    else CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, cdict->refContext->params, pledgedSrcSize));
-    return 0;
+    size_t const cctxSize = ZSTD_estimateCCtxSize_advanced(cParams);
+    size_t const neededSize = sizeof(ZSTD_CDict) + (byReference ? 0 : dictSize)
+                            + cctxSize;
+    ZSTD_CDict* const cdict = (ZSTD_CDict*) workspace;
+    void* ptr;
+    DEBUGLOG(5, "(size_t)workspace & 7 : %u", (U32)(size_t)workspace & 7);
+    if ((size_t)workspace & 7) return NULL;  /* 8-aligned */
+    DEBUGLOG(5, "(workspaceSize < neededSize) : (%u < %u) => %u",
+        (U32)workspaceSize, (U32)neededSize, (U32)(workspaceSize < neededSize));
+    if (workspaceSize < neededSize) return NULL;
+
+    if (!byReference) {
+        memcpy(cdict+1, dict, dictSize);
+        dict = cdict+1;
+        ptr = (char*)workspace + sizeof(ZSTD_CDict) + dictSize;
+    } else {
+        ptr = cdict+1;
+    }
+    cdict->refContext = ZSTD_initStaticCCtx(ptr, cctxSize);
+
+    if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
+                                              dict, dictSize,
+                                              1 /* byReference */, dictMode,
+                                              cParams) ))
+        return NULL;
+
+    return cdict;
+}
+
+ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict* cdict) {
+    return ZSTD_getParamsFromCCtx(cdict->refContext);
+}
+
+/* ZSTD_compressBegin_usingCDict_advanced() :
+ * cdict must be != NULL */
+size_t ZSTD_compressBegin_usingCDict_advanced(
+    ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
+    ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
+{
+    if (cdict==NULL) return ERROR(dictionary_wrong);
+    {   ZSTD_parameters params = cdict->refContext->appliedParams;
+        params.fParams = fParams;
+        DEBUGLOG(5, "ZSTD_compressBegin_usingCDict_advanced");
+        return ZSTD_compressBegin_internal(cctx,
+                                           NULL, 0, ZSTD_dm_auto,
+                                           cdict,
+                                           params, pledgedSrcSize,
+                                           ZSTDb_not_buffered);
+    }
+}
+
+/* ZSTD_compressBegin_usingCDict() :
+ * pledgedSrcSize=0 means "unknown"
+ * if pledgedSrcSize>0, it will enable contentSizeFlag */
+size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
+{
+    ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
+    DEBUGLOG(5, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag);
+    return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, 0);
+}
+
+size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+                                void* dst, size_t dstCapacity,
+                                const void* src, size_t srcSize,
+                                const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
+{
+    CHECK_F (ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize));   /* will check if cdict != NULL */
+    return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
 }
 
 /*! ZSTD_compress_usingCDict() :
-*   Compression using a digested Dictionary.
-*   Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
-*   Note that compression level is decided during dictionary creation */
+ *  Compression using a digested Dictionary.
+ *  Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
+ *  Note that compression parameters are decided at CDict creation time
+ *  while frame parameters are hardcoded */
 size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
                                 void* dst, size_t dstCapacity,
                                 const void* src, size_t srcSize,
                                 const ZSTD_CDict* cdict)
 {
-    CHECK_F(ZSTD_compressBegin_usingCDict(cctx, cdict, srcSize));
-
-    if (cdict->refContext->params.fParams.contentSizeFlag==1) {
-        cctx->params.fParams.contentSizeFlag = 1;
-        cctx->frameContentSize = srcSize;
-    }
-
-    return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
+    ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
+    return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
 }
 
 
@@ -2755,212 +3600,314 @@ size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
 *  Streaming
 ********************************************************************/
 
-typedef enum { zcss_init, zcss_load, zcss_flush, zcss_final } ZSTD_cStreamStage;
-
-struct ZSTD_CStream_s {
-    ZSTD_CCtx* cctx;
-    ZSTD_CDict* cdict;
-    char*  inBuff;
-    size_t inBuffSize;
-    size_t inToCompress;
-    size_t inBuffPos;
-    size_t inBuffTarget;
-    size_t blockSize;
-    char*  outBuff;
-    size_t outBuffSize;
-    size_t outBuffContentSize;
-    size_t outBuffFlushedSize;
-    ZSTD_cStreamStage stage;
-    U32    checksum;
-    U32    frameEnded;
-    ZSTD_customMem customMem;
-};   /* typedef'd to ZSTD_CStream within "zstd.h" */
-
 ZSTD_CStream* ZSTD_createCStream(void)
 {
-    return ZSTD_createCStream_advanced(defaultCustomMem);
+    return ZSTD_createCStream_advanced(ZSTD_defaultCMem);
+}
+
+ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize)
+{
+    return ZSTD_initStaticCCtx(workspace, workspaceSize);
 }
 
 ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem)
-{
-    ZSTD_CStream* zcs;
-
-    if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
-    if (!customMem.customAlloc || !customMem.customFree) return NULL;
-
-    zcs = (ZSTD_CStream*)ZSTD_malloc(sizeof(ZSTD_CStream), customMem);
-    if (zcs==NULL) return NULL;
-    memset(zcs, 0, sizeof(ZSTD_CStream));
-    memcpy(&zcs->customMem, &customMem, sizeof(ZSTD_customMem));
-    zcs->cctx = ZSTD_createCCtx_advanced(customMem);
-    if (zcs->cctx == NULL) { ZSTD_freeCStream(zcs); return NULL; }
-    return zcs;
+{   /* CStream and CCtx are now same object */
+    return ZSTD_createCCtx_advanced(customMem);
 }
 
 size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
 {
-    if (zcs==NULL) return 0;   /* support free on NULL */
-    {   ZSTD_customMem const cMem = zcs->customMem;
-        ZSTD_freeCCtx(zcs->cctx);
-        ZSTD_freeCDict(zcs->cdict);
-        ZSTD_free(zcs->inBuff, cMem);
-        ZSTD_free(zcs->outBuff, cMem);
-        ZSTD_free(zcs, cMem);
-        return 0;
-    }
+    return ZSTD_freeCCtx(zcs);   /* same object */
 }
 
 
+
 /*======   Initialization   ======*/
 
-size_t ZSTD_CStreamInSize(void)  { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
-size_t ZSTD_CStreamOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; }
+size_t ZSTD_CStreamInSize(void)  { return ZSTD_BLOCKSIZE_MAX; }
 
-size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
+size_t ZSTD_CStreamOutSize(void)
 {
-    CHECK_F(ZSTD_compressBegin_usingCDict(zcs->cctx, zcs->cdict, pledgedSrcSize));
+    return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;
+}
+
+static size_t ZSTD_resetCStream_internal(ZSTD_CStream* zcs,
+                    const void* dict, size_t dictSize, ZSTD_dictMode_e dictMode,
+                    const ZSTD_CDict* cdict,
+                    ZSTD_parameters params, unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTD_resetCStream_internal");
+    /* params are supposed to be fully validated at this point */
+    assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
+
+    CHECK_F( ZSTD_compressBegin_internal(zcs,
+                                        dict, dictSize, dictMode,
+                                        cdict,
+                                        params, pledgedSrcSize,
+                                        ZSTDb_buffered) );
 
     zcs->inToCompress = 0;
     zcs->inBuffPos = 0;
     zcs->inBuffTarget = zcs->blockSize;
     zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
-    zcs->stage = zcss_load;
+    zcs->streamStage = zcss_load;
     zcs->frameEnded = 0;
     return 0;   /* ready to go */
 }
 
+size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
+{
+    ZSTD_parameters params = zcs->requestedParams;
+    params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
+    DEBUGLOG(5, "ZSTD_resetCStream");
+    if (zcs->compressionLevel != ZSTD_CLEVEL_CUSTOM) {
+        params.cParams = ZSTD_getCParams(zcs->compressionLevel, pledgedSrcSize, 0 /* dictSize */);
+    }
+    return ZSTD_resetCStream_internal(zcs, NULL, 0, zcs->dictMode, zcs->cdict, params, pledgedSrcSize);
+}
+
+/*! ZSTD_initCStream_internal() :
+ *  Note : not static, but hidden (not exposed). Used by zstdmt_compress.c
+ *  Assumption 1 : params are valid
+ *  Assumption 2 : either dict, or cdict, is defined, not both */
+size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
+                    const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
+                    ZSTD_parameters params, unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(5, "ZSTD_initCStream_internal");
+    assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
+
+    if (dict && dictSize >= 8) {
+        DEBUGLOG(5, "loading dictionary of size %u", (U32)dictSize);
+        if (zcs->staticSize) {   /* static CCtx : never uses malloc */
+            /* incompatible with internal cdict creation */
+            return ERROR(memory_allocation);
+        }
+        ZSTD_freeCDict(zcs->cdictLocal);
+        zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
+                                            zcs->dictContentByRef, zcs->dictMode,
+                                            params.cParams, zcs->customMem);
+        zcs->cdict = zcs->cdictLocal;
+        if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
+    } else {
+        if (cdict) {
+            ZSTD_parameters const cdictParams = ZSTD_getParamsFromCDict(cdict);
+            params.cParams = cdictParams.cParams;  /* cParams are enforced from cdict */
+        }
+        ZSTD_freeCDict(zcs->cdictLocal);
+        zcs->cdictLocal = NULL;
+        zcs->cdict = cdict;
+    }
+
+    zcs->requestedParams = params;
+    zcs->compressionLevel = ZSTD_CLEVEL_CUSTOM;
+    return ZSTD_resetCStream_internal(zcs, NULL, 0, zcs->dictMode, zcs->cdict, params, pledgedSrcSize);
+}
+
+/* ZSTD_initCStream_usingCDict_advanced() :
+ * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */
+size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+                                            const ZSTD_CDict* cdict,
+                                            ZSTD_frameParameters fParams,
+                                            unsigned long long pledgedSrcSize)
+{   /* cannot handle NULL cdict (does not know what to do) */
+    if (!cdict) return ERROR(dictionary_wrong);
+    {   ZSTD_parameters params = ZSTD_getParamsFromCDict(cdict);
+        params.fParams = fParams;
+        return ZSTD_initCStream_internal(zcs,
+                                NULL, 0, cdict,
+                                params, pledgedSrcSize);
+    }
+}
+
+/* note : cdict must outlive compression session */
+size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
+{
+    ZSTD_frameParameters const fParams = { 0 /* contentSize */, 0 /* checksum */, 0 /* hideDictID */ };
+    return ZSTD_initCStream_usingCDict_advanced(zcs, cdict, fParams, 0);  /* note : will check that cdict != NULL */
+}
+
 size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
                                  const void* dict, size_t dictSize,
                                  ZSTD_parameters params, unsigned long long pledgedSrcSize)
 {
-    /* allocate buffers */
-    {   size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog;
-        if (zcs->inBuffSize < neededInBuffSize) {
-            zcs->inBuffSize = neededInBuffSize;
-            ZSTD_free(zcs->inBuff, zcs->customMem);
-            zcs->inBuff = (char*) ZSTD_malloc(neededInBuffSize, zcs->customMem);
-            if (zcs->inBuff == NULL) return ERROR(memory_allocation);
-        }
-        zcs->blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, neededInBuffSize);
-    }
-    if (zcs->outBuffSize < ZSTD_compressBound(zcs->blockSize)+1) {
-        zcs->outBuffSize = ZSTD_compressBound(zcs->blockSize)+1;
-        ZSTD_free(zcs->outBuff, zcs->customMem);
-        zcs->outBuff = (char*) ZSTD_malloc(zcs->outBuffSize, zcs->customMem);
-        if (zcs->outBuff == NULL) return ERROR(memory_allocation);
-    }
-
-    ZSTD_freeCDict(zcs->cdict);
-    zcs->cdict = ZSTD_createCDict_advanced(dict, dictSize, params, zcs->customMem);
-    if (zcs->cdict == NULL) return ERROR(memory_allocation);
-
-    zcs->checksum = params.fParams.checksumFlag > 0;
-
-    return ZSTD_resetCStream(zcs, pledgedSrcSize);
+    CHECK_F( ZSTD_checkCParams(params.cParams) );
+    zcs->requestedParams = params;
+    zcs->compressionLevel = ZSTD_CLEVEL_CUSTOM;
+    return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL, params, pledgedSrcSize);
 }
 
 size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
 {
     ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
-    return ZSTD_initCStream_advanced(zcs, dict, dictSize, params, 0);
+    zcs->compressionLevel = compressionLevel;
+    return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL, params, 0);
+}
+
+size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize)
+{
+    ZSTD_parameters params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0);
+    params.fParams.contentSizeFlag = (pledgedSrcSize>0);
+    return ZSTD_initCStream_internal(zcs, NULL, 0, NULL, params, pledgedSrcSize);
 }
 
 size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
 {
-    return ZSTD_initCStream_usingDict(zcs, NULL, 0, compressionLevel);
-}
-
-size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
-{
-    if (zcs==NULL) return 0;   /* support sizeof on NULL */
-    return sizeof(zcs) + ZSTD_sizeof_CCtx(zcs->cctx) + ZSTD_sizeof_CDict(zcs->cdict) + zcs->outBuffSize + zcs->inBuffSize;
+    ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0);
+    return ZSTD_initCStream_internal(zcs, NULL, 0, NULL, params, 0);
 }
 
 /*======   Compression   ======*/
 
-typedef enum { zsf_gather, zsf_flush, zsf_end } ZSTD_flush_e;
-
-MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity,
+                           const void* src, size_t srcSize)
 {
     size_t const length = MIN(dstCapacity, srcSize);
-    memcpy(dst, src, length);
+    if (length) memcpy(dst, src, length);
     return length;
 }
 
-static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
-                              void* dst, size_t* dstCapacityPtr,
-                        const void* src, size_t* srcSizePtr,
-                              ZSTD_flush_e const flush)
+/** ZSTD_compressStream_generic():
+ *  internal function for all *compressStream*() variants and *compress_generic()
+ * @return : hint size for next input */
+size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
+                                   ZSTD_outBuffer* output,
+                                   ZSTD_inBuffer* input,
+                                   ZSTD_EndDirective const flushMode)
 {
+    const char* const istart = (const char*)input->src;
+    const char* const iend = istart + input->size;
+    const char* ip = istart + input->pos;
+    char* const ostart = (char*)output->dst;
+    char* const oend = ostart + output->size;
+    char* op = ostart + output->pos;
     U32 someMoreWork = 1;
-    const char* const istart = (const char*)src;
-    const char* const iend = istart + *srcSizePtr;
-    const char* ip = istart;
-    char* const ostart = (char*)dst;
-    char* const oend = ostart + *dstCapacityPtr;
-    char* op = ostart;
+
+    /* check expectations */
+    DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (U32)flushMode);
+    assert(zcs->inBuff != NULL);
+    assert(zcs->inBuffSize>0);
+    assert(zcs->outBuff!= NULL);
+    assert(zcs->outBuffSize>0);
+    assert(output->pos <= output->size);
+    assert(input->pos <= input->size);
 
     while (someMoreWork) {
-        switch(zcs->stage)
+        switch(zcs->streamStage)
         {
-        case zcss_init: return ERROR(init_missing);   /* call ZBUFF_compressInit() first ! */
+        case zcss_init:
+            /* call ZSTD_initCStream() first ! */
+            return ERROR(init_missing);
 
         case zcss_load:
-            /* complete inBuffer */
+            if ( (flushMode == ZSTD_e_end)
+              && ((size_t)(oend-op) >= ZSTD_compressBound(iend-ip))  /* enough dstCapacity */
+              && (zcs->inBuffPos == 0) ) {
+                /* shortcut to compression pass directly into output buffer */
+                size_t const cSize = ZSTD_compressEnd(zcs,
+                                                op, oend-op, ip, iend-ip);
+                DEBUGLOG(4, "ZSTD_compressEnd : %u", (U32)cSize);
+                if (ZSTD_isError(cSize)) return cSize;
+                ip = iend;
+                op += cSize;
+                zcs->frameEnded = 1;
+                ZSTD_startNewCompression(zcs);
+                someMoreWork = 0; break;
+            }
+            /* complete loading into inBuffer */
             {   size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
-                size_t const loaded = ZSTD_limitCopy(zcs->inBuff + zcs->inBuffPos, toLoad, ip, iend-ip);
+                size_t const loaded = ZSTD_limitCopy(
+                                        zcs->inBuff + zcs->inBuffPos, toLoad,
+                                        ip, iend-ip);
                 zcs->inBuffPos += loaded;
                 ip += loaded;
-                if ( (zcs->inBuffPos==zcs->inToCompress) || (!flush && (toLoad != loaded)) ) {
-                    someMoreWork = 0; break;  /* not enough input to get a full block : stop there, wait for more */
-            }   }
+                if ( (flushMode == ZSTD_e_continue)
+                  && (zcs->inBuffPos < zcs->inBuffTarget) ) {
+                    /* not enough input to fill full block : stop here */
+                    someMoreWork = 0; break;
+                }
+                if ( (flushMode == ZSTD_e_flush)
+                  && (zcs->inBuffPos == zcs->inToCompress) ) {
+                    /* empty */
+                    someMoreWork = 0; break;
+                }
+            }
             /* compress current block (note : this stage cannot be stopped in the middle) */
+            DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode);
             {   void* cDst;
                 size_t cSize;
                 size_t const iSize = zcs->inBuffPos - zcs->inToCompress;
                 size_t oSize = oend-op;
+                unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);
                 if (oSize >= ZSTD_compressBound(iSize))
-                    cDst = op;   /* compress directly into output buffer (avoid flush stage) */
+                    cDst = op;   /* compress into output buffer, to skip flush stage */
                 else
                     cDst = zcs->outBuff, oSize = zcs->outBuffSize;
-                cSize = (flush == zsf_end) ?
-                        ZSTD_compressEnd(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize) :
-                        ZSTD_compressContinue(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize);
+                cSize = lastBlock ?
+                        ZSTD_compressEnd(zcs, cDst, oSize,
+                                    zcs->inBuff + zcs->inToCompress, iSize) :
+                        ZSTD_compressContinue(zcs, cDst, oSize,
+                                    zcs->inBuff + zcs->inToCompress, iSize);
                 if (ZSTD_isError(cSize)) return cSize;
-                if (flush == zsf_end) zcs->frameEnded = 1;
+                zcs->frameEnded = lastBlock;
                 /* prepare next block */
                 zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
                 if (zcs->inBuffTarget > zcs->inBuffSize)
-                    zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;   /* note : inBuffSize >= blockSize */
+                    zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;
+                DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",
+                         (U32)zcs->inBuffTarget, (U32)zcs->inBuffSize);
+                if (!lastBlock)
+                    assert(zcs->inBuffTarget <= zcs->inBuffSize);
                 zcs->inToCompress = zcs->inBuffPos;
-                if (cDst == op) { op += cSize; break; }   /* no need to flush */
+                if (cDst == op) {  /* no need to flush */
+                    op += cSize;
+                    if (zcs->frameEnded) {
+                        DEBUGLOG(5, "Frame completed directly in outBuffer");
+                        someMoreWork = 0;
+                        ZSTD_startNewCompression(zcs);
+                    }
+                    break;
+                }
                 zcs->outBuffContentSize = cSize;
                 zcs->outBuffFlushedSize = 0;
-                zcs->stage = zcss_flush;   /* pass-through to flush stage */
+                zcs->streamStage = zcss_flush; /* pass-through to flush stage */
             }
-
+	    /* fall-through */
         case zcss_flush:
+            DEBUGLOG(5, "flush stage");
             {   size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
-                size_t const flushed = ZSTD_limitCopy(op, oend-op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
+                size_t const flushed = ZSTD_limitCopy(op, oend-op,
+                            zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
+                DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
+                            (U32)toFlush, (U32)(oend-op), (U32)flushed);
                 op += flushed;
                 zcs->outBuffFlushedSize += flushed;
-                if (toFlush!=flushed) { someMoreWork = 0; break; }  /* dst too small to store flushed data : stop there */
+                if (toFlush!=flushed) {
+                    /* flush not fully completed, presumably because dst is too small */
+                    assert(op==oend);
+                    someMoreWork = 0;
+                    break;
+                }
                 zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
-                zcs->stage = zcss_load;
+                if (zcs->frameEnded) {
+                    DEBUGLOG(5, "Frame completed on flush");
+                    someMoreWork = 0;
+                    ZSTD_startNewCompression(zcs);
+                    break;
+                }
+                zcs->streamStage = zcss_load;
                 break;
             }
 
-        case zcss_final:
-            someMoreWork = 0;   /* do nothing */
-            break;
-
-        default:
-            return ERROR(GENERIC);   /* impossible */
+        default: /* impossible */
+            assert(0);
         }
     }
 
-    *srcSizePtr = ip - istart;
-    *dstCapacityPtr = op - ostart;
+    input->pos = ip - istart;
+    output->pos = op - ostart;
     if (zcs->frameEnded) return 0;
     {   size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos;
         if (hintInSize==0) hintInSize = zcs->blockSize;
@@ -2970,14 +3917,86 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
 
 size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
 {
-    size_t sizeRead = input->size - input->pos;
-    size_t sizeWritten = output->size - output->pos;
-    size_t const result = ZSTD_compressStream_generic(zcs,
-                                                      (char*)(output->dst) + output->pos, &sizeWritten,
-                                                      (const char*)(input->src) + input->pos, &sizeRead, zsf_gather);
-    input->pos += sizeRead;
-    output->pos += sizeWritten;
-    return result;
+    /* check conditions */
+    if (output->pos > output->size) return ERROR(GENERIC);
+    if (input->pos  > input->size)  return ERROR(GENERIC);
+
+    return ZSTD_compressStream_generic(zcs, output, input, ZSTD_e_continue);
+}
+
+/*! ZSTDMT_initCStream_internal() :
+ *  Private use only. Init streaming operation.
+ *  expects params to be valid.
+ *  must receive dict, or cdict, or none, but not both.
+ *  @return : 0, or an error code */
+size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
+                    const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
+                    ZSTD_parameters params, unsigned long long pledgedSrcSize);
+
+
+size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
+                              ZSTD_outBuffer* output,
+                              ZSTD_inBuffer* input,
+                              ZSTD_EndDirective endOp)
+{
+    /* check conditions */
+    if (output->pos > output->size) return ERROR(GENERIC);
+    if (input->pos  > input->size)  return ERROR(GENERIC);
+    assert(cctx!=NULL);
+
+    /* transparent initialization stage */
+    if (cctx->streamStage == zcss_init) {
+        const void* const prefix = cctx->prefix;
+        size_t const prefixSize = cctx->prefixSize;
+        ZSTD_parameters params = cctx->requestedParams;
+        if (cctx->compressionLevel != ZSTD_CLEVEL_CUSTOM)
+            params.cParams = ZSTD_getCParams(cctx->compressionLevel,
+                                cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/);
+        cctx->prefix = NULL; cctx->prefixSize = 0;   /* single usage */
+        assert(prefix==NULL || cctx->cdict==NULL);   /* only one can be set */
+
+#ifdef ZSTD_MULTITHREAD
+        if (cctx->nbThreads > 1) {
+            DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbThreads=%u", cctx->nbThreads);
+            CHECK_F( ZSTDMT_initCStream_internal(cctx->mtctx, prefix, prefixSize, cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) );
+            cctx->streamStage = zcss_load;
+        } else
+#endif
+        {
+            CHECK_F( ZSTD_resetCStream_internal(cctx, prefix, prefixSize, cctx->dictMode, cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) );
+    }   }
+
+    /* compression stage */
+#ifdef ZSTD_MULTITHREAD
+    if (cctx->nbThreads > 1) {
+        size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
+        DEBUGLOG(5, "ZSTDMT_compressStream_generic : %u", (U32)flushMin);
+        if ( ZSTD_isError(flushMin)
+          || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */
+            ZSTD_startNewCompression(cctx);
+        }
+        return flushMin;
+    }
+#endif
+
+    CHECK_F( ZSTD_compressStream_generic(cctx, output, input, endOp) );
+    DEBUGLOG(5, "completed ZSTD_compress_generic");
+    return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */
+}
+
+size_t ZSTD_compress_generic_simpleArgs (
+                            ZSTD_CCtx* cctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos,
+                            ZSTD_EndDirective endOp)
+{
+    ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
+    ZSTD_inBuffer  input  = { src, srcSize, *srcPos };
+    /* ZSTD_compress_generic() will check validity of dstPos and srcPos */
+    size_t const cErr = ZSTD_compress_generic(cctx, &output, &input, endOp);
+    *dstPos = output.pos;
+    *srcPos = input.pos;
+    return cErr;
 }
 
 
@@ -2987,86 +4006,59 @@ size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuf
 *   @return : amount of data remaining to flush */
 size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
 {
-    size_t srcSize = 0;
-    size_t sizeWritten = output->size - output->pos;
-    size_t const result = ZSTD_compressStream_generic(zcs,
-                                                     (char*)(output->dst) + output->pos, &sizeWritten,
-                                                     &srcSize, &srcSize, /* use a valid src address instead of NULL */
-                                                      zsf_flush);
-    output->pos += sizeWritten;
-    if (ZSTD_isError(result)) return result;
-    return zcs->outBuffContentSize - zcs->outBuffFlushedSize;   /* remaining to flush */
+    ZSTD_inBuffer input = { NULL, 0, 0 };
+    if (output->pos > output->size) return ERROR(GENERIC);
+    CHECK_F( ZSTD_compressStream_generic(zcs, output, &input, ZSTD_e_flush) );
+    return zcs->outBuffContentSize - zcs->outBuffFlushedSize;  /* remaining to flush */
 }
 
 
 size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
 {
-    BYTE* const ostart = (BYTE*)(output->dst) + output->pos;
-    BYTE* const oend = (BYTE*)(output->dst) + output->size;
-    BYTE* op = ostart;
-
-    if (zcs->stage != zcss_final) {
-        /* flush whatever remains */
-        size_t srcSize = 0;
-        size_t sizeWritten = output->size - output->pos;
-        size_t const notEnded = ZSTD_compressStream_generic(zcs, ostart, &sizeWritten, &srcSize, &srcSize, zsf_end);  /* use a valid src address instead of NULL */
-        size_t const remainingToFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
-        op += sizeWritten;
-        if (remainingToFlush) {
-            output->pos += sizeWritten;
-            return remainingToFlush + ZSTD_BLOCKHEADERSIZE /* final empty block */ + (zcs->checksum * 4);
-        }
-        /* create epilogue */
-        zcs->stage = zcss_final;
-        zcs->outBuffContentSize = !notEnded ? 0 :
-            ZSTD_compressEnd(zcs->cctx, zcs->outBuff, zcs->outBuffSize, NULL, 0);  /* write epilogue, including final empty block, into outBuff */
-    }
-
-    /* flush epilogue */
-    {   size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
-        size_t const flushed = ZSTD_limitCopy(op, oend-op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
-        op += flushed;
-        zcs->outBuffFlushedSize += flushed;
-        output->pos += op-ostart;
-        if (toFlush==flushed) zcs->stage = zcss_init;  /* end reached */
-        return toFlush - flushed;
+    ZSTD_inBuffer input = { NULL, 0, 0 };
+    if (output->pos > output->size) return ERROR(GENERIC);
+    CHECK_F( ZSTD_compressStream_generic(zcs, output, &input, ZSTD_e_end) );
+    {   size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
+        size_t const checksumSize = zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4;
+        size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize + lastBlockSize + checksumSize;
+        DEBUGLOG(5, "ZSTD_endStream : remaining to flush : %u",
+                (unsigned)toFlush);
+        return toFlush;
     }
 }
 
 
-
 /*-=====  Pre-defined compression levels  =====-*/
 
-#define ZSTD_DEFAULT_CLEVEL 1
 #define ZSTD_MAX_CLEVEL     22
 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
 
 static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
-{   /* "default" */
+{   /* "default" - guarantees a monotonically increasing memory budget */
     /* W,  C,  H,  S,  L, TL, strat */
     { 18, 12, 12,  1,  7, 16, ZSTD_fast    },  /* level  0 - never used */
     { 19, 13, 14,  1,  7, 16, ZSTD_fast    },  /* level  1 */
     { 19, 15, 16,  1,  6, 16, ZSTD_fast    },  /* level  2 */
-    { 20, 16, 17,  1,  5, 16, ZSTD_dfast   },  /* level  3.*/
-    { 20, 18, 18,  1,  5, 16, ZSTD_dfast   },  /* level  4.*/
-    { 20, 15, 18,  3,  5, 16, ZSTD_greedy  },  /* level  5 */
-    { 21, 16, 19,  2,  5, 16, ZSTD_lazy    },  /* level  6 */
-    { 21, 17, 20,  3,  5, 16, ZSTD_lazy    },  /* level  7 */
+    { 20, 16, 17,  1,  5, 16, ZSTD_dfast   },  /* level  3 */
+    { 20, 17, 18,  1,  5, 16, ZSTD_dfast   },  /* level  4 */
+    { 20, 17, 18,  2,  5, 16, ZSTD_greedy  },  /* level  5 */
+    { 21, 17, 19,  2,  5, 16, ZSTD_lazy    },  /* level  6 */
+    { 21, 18, 19,  3,  5, 16, ZSTD_lazy    },  /* level  7 */
     { 21, 18, 20,  3,  5, 16, ZSTD_lazy2   },  /* level  8 */
-    { 21, 20, 20,  3,  5, 16, ZSTD_lazy2   },  /* level  9 */
+    { 21, 19, 20,  3,  5, 16, ZSTD_lazy2   },  /* level  9 */
     { 21, 19, 21,  4,  5, 16, ZSTD_lazy2   },  /* level 10 */
     { 22, 20, 22,  4,  5, 16, ZSTD_lazy2   },  /* level 11 */
     { 22, 20, 22,  5,  5, 16, ZSTD_lazy2   },  /* level 12 */
     { 22, 21, 22,  5,  5, 16, ZSTD_lazy2   },  /* level 13 */
     { 22, 21, 22,  6,  5, 16, ZSTD_lazy2   },  /* level 14 */
-    { 22, 21, 21,  5,  5, 16, ZSTD_btlazy2 },  /* level 15 */
+    { 22, 21, 22,  5,  5, 16, ZSTD_btlazy2 },  /* level 15 */
     { 23, 22, 22,  5,  5, 16, ZSTD_btlazy2 },  /* level 16 */
-    { 23, 21, 22,  4,  5, 24, ZSTD_btopt   },  /* level 17 */
-    { 23, 23, 22,  6,  5, 32, ZSTD_btopt   },  /* level 18 */
+    { 23, 22, 22,  4,  5, 24, ZSTD_btopt   },  /* level 17 */
+    { 23, 22, 22,  5,  4, 32, ZSTD_btopt   },  /* level 18 */
     { 23, 23, 22,  6,  3, 48, ZSTD_btopt   },  /* level 19 */
-    { 25, 25, 23,  7,  3, 64, ZSTD_btopt   },  /* level 20 */
-    { 26, 26, 23,  7,  3,256, ZSTD_btopt   },  /* level 21 */
-    { 27, 27, 25,  9,  3,512, ZSTD_btopt   },  /* level 22 */
+    { 25, 25, 23,  7,  3, 64, ZSTD_btultra },  /* level 20 */
+    { 26, 26, 24,  7,  3,256, ZSTD_btultra },  /* level 21 */
+    { 27, 27, 25,  9,  3,512, ZSTD_btultra },  /* level 22 */
 },
 {   /* for srcSize <= 256 KB */
     /* W,  C,  H,  S,  L,  T, strat */
@@ -3090,9 +4082,9 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
     { 18, 19, 18,  8,  3, 64, ZSTD_btopt   },  /* level 17.*/
     { 18, 19, 18,  9,  3,128, ZSTD_btopt   },  /* level 18.*/
     { 18, 19, 18, 10,  3,256, ZSTD_btopt   },  /* level 19.*/
-    { 18, 19, 18, 11,  3,512, ZSTD_btopt   },  /* level 20.*/
-    { 18, 19, 18, 12,  3,512, ZSTD_btopt   },  /* level 21.*/
-    { 18, 19, 18, 13,  3,512, ZSTD_btopt   },  /* level 22.*/
+    { 18, 19, 18, 11,  3,512, ZSTD_btultra },  /* level 20.*/
+    { 18, 19, 18, 12,  3,512, ZSTD_btultra },  /* level 21.*/
+    { 18, 19, 18, 13,  3,512, ZSTD_btultra },  /* level 22.*/
 },
 {   /* for srcSize <= 128 KB */
     /* W,  C,  H,  S,  L,  T, strat */
@@ -3116,9 +4108,9 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
     { 17, 18, 17,  7,  3, 64, ZSTD_btopt   },  /* level 17.*/
     { 17, 18, 17,  7,  3,256, ZSTD_btopt   },  /* level 18.*/
     { 17, 18, 17,  8,  3,256, ZSTD_btopt   },  /* level 19.*/
-    { 17, 18, 17,  9,  3,256, ZSTD_btopt   },  /* level 20.*/
-    { 17, 18, 17, 10,  3,256, ZSTD_btopt   },  /* level 21.*/
-    { 17, 18, 17, 11,  3,512, ZSTD_btopt   },  /* level 22.*/
+    { 17, 18, 17,  9,  3,256, ZSTD_btultra },  /* level 20.*/
+    { 17, 18, 17, 10,  3,256, ZSTD_btultra },  /* level 21.*/
+    { 17, 18, 17, 11,  3,512, ZSTD_btultra },  /* level 22.*/
 },
 {   /* for srcSize <= 16 KB */
     /* W,  C,  H,  S,  L,  T, strat */
@@ -3142,39 +4134,59 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
     { 14, 15, 15,  6,  3,128, ZSTD_btopt   },  /* level 17.*/
     { 14, 15, 15,  6,  3,256, ZSTD_btopt   },  /* level 18.*/
     { 14, 15, 15,  7,  3,256, ZSTD_btopt   },  /* level 19.*/
-    { 14, 15, 15,  8,  3,256, ZSTD_btopt   },  /* level 20.*/
-    { 14, 15, 15,  9,  3,256, ZSTD_btopt   },  /* level 21.*/
-    { 14, 15, 15, 10,  3,256, ZSTD_btopt   },  /* level 22.*/
+    { 14, 15, 15,  8,  3,256, ZSTD_btultra },  /* level 20.*/
+    { 14, 15, 15,  9,  3,256, ZSTD_btultra },  /* level 21.*/
+    { 14, 15, 15, 10,  3,256, ZSTD_btultra },  /* level 22.*/
 },
 };
 
+#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
+/* This function just controls
+ * the monotonic memory budget increase of ZSTD_defaultCParameters[0].
+ * Run once, on first ZSTD_getCParams() usage, if ZSTD_DEBUG is enabled
+ */
+MEM_STATIC void ZSTD_check_compressionLevel_monotonicIncrease_memoryBudget(void)
+{
+    int level;
+    for (level=1; level<ZSTD_maxCLevel(); level++) {
+        ZSTD_compressionParameters const c1 = ZSTD_defaultCParameters[0][level];
+        ZSTD_compressionParameters const c2 = ZSTD_defaultCParameters[0][level+1];
+        assert(c1.windowLog <= c2.windowLog);
+#       define ZSTD_TABLECOST(h,c) ((1<<(h)) + (1<<(c)))
+        assert(ZSTD_TABLECOST(c1.hashLog, c1.chainLog) <= ZSTD_TABLECOST(c2.hashLog, c2.chainLog));
+    }
+}
+#endif
+
 /*! ZSTD_getCParams() :
 *   @return ZSTD_compressionParameters structure for a selected compression level, `srcSize` and `dictSize`.
 *   Size values are optional, provide 0 if not known or unused */
-ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSize, size_t dictSize)
+ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
 {
-    ZSTD_compressionParameters cp;
-    size_t const addedSize = srcSize ? 0 : 500;
-    U64 const rSize = srcSize+dictSize ? srcSize+dictSize+addedSize : (U64)-1;
+    size_t const addedSize = srcSizeHint ? 0 : 500;
+    U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : (U64)-1;
     U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB);   /* intentional underflow for srcSizeHint == 0 */
-    if (compressionLevel <= 0) compressionLevel = ZSTD_DEFAULT_CLEVEL;   /* 0 == default; no negative compressionLevel yet */
-    if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL;
-    cp = ZSTD_defaultCParameters[tableID][compressionLevel];
-    if (MEM_32bits()) {   /* auto-correction, for 32-bits mode */
-        if (cp.windowLog > ZSTD_WINDOWLOG_MAX) cp.windowLog = ZSTD_WINDOWLOG_MAX;
-        if (cp.chainLog > ZSTD_CHAINLOG_MAX) cp.chainLog = ZSTD_CHAINLOG_MAX;
-        if (cp.hashLog > ZSTD_HASHLOG_MAX) cp.hashLog = ZSTD_HASHLOG_MAX;
+
+#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
+    static int g_monotonicTest = 1;
+    if (g_monotonicTest) {
+        ZSTD_check_compressionLevel_monotonicIncrease_memoryBudget();
+        g_monotonicTest=0;
     }
-    cp = ZSTD_adjustCParams(cp, srcSize, dictSize);
-    return cp;
+#endif
+
+    if (compressionLevel <= 0) compressionLevel = ZSTD_CLEVEL_DEFAULT;   /* 0 == default; no negative compressionLevel yet */
+    if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL;
+    { ZSTD_compressionParameters const cp = ZSTD_defaultCParameters[tableID][compressionLevel];
+      return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); }
 }
 
 /*! ZSTD_getParams() :
 *   same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object (instead of `ZSTD_compressionParameters`).
 *   All fields of `ZSTD_frameParameters` are set to default (0) */
-ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, size_t dictSize) {
+ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
     ZSTD_parameters params;
-    ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSize, dictSize);
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize);
     memset(&params, 0, sizeof(params));
     params.cParams = cParams;
     return params;
diff --git a/contrib/libzstd/include/zstd/compress/zstd_opt.h b/contrib/libzstd/include/zstd/compress/zstd_opt.h
index cb5872908f2..e8e98915ea3 100644
--- a/contrib/libzstd/include/zstd/compress/zstd_opt.h
+++ b/contrib/libzstd/include/zstd/compress/zstd_opt.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under the BSD-style license found in the
@@ -15,7 +15,9 @@
 #define ZSTD_OPT_H_91842398743
 
 
-#define ZSTD_FREQ_DIV   5
+#define ZSTD_LITFREQ_ADD    2
+#define ZSTD_FREQ_DIV       4
+#define ZSTD_MAX_PRICE      (1<<30)
 
 /*-*************************************
 *  Price functions for optimal parser
@@ -30,22 +32,33 @@ FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t* ssPtr)
 }
 
 
-MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr)
+MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr, const BYTE* src, size_t srcSize)
 {
     unsigned u;
 
     ssPtr->cachedLiterals = NULL;
     ssPtr->cachedPrice = ssPtr->cachedLitLength = 0;
+    ssPtr->staticPrices = 0;
 
     if (ssPtr->litLengthSum == 0) {
-        ssPtr->litSum = (2<<Litbits);
+        if (srcSize <= 1024) ssPtr->staticPrices = 1;
+
+        assert(ssPtr->litFreq!=NULL);
+        for (u=0; u<=MaxLit; u++)
+            ssPtr->litFreq[u] = 0;
+        for (u=0; u<srcSize; u++)
+            ssPtr->litFreq[src[u]]++;
+
+        ssPtr->litSum = 0;
         ssPtr->litLengthSum = MaxLL+1;
         ssPtr->matchLengthSum = MaxML+1;
         ssPtr->offCodeSum = (MaxOff+1);
-        ssPtr->matchSum = (2<<Litbits);
+        ssPtr->matchSum = (ZSTD_LITFREQ_ADD<<Litbits);
 
-        for (u=0; u<=MaxLit; u++)
-            ssPtr->litFreq[u] = 2;
+        for (u=0; u<=MaxLit; u++) {
+            ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV);
+            ssPtr->litSum += ssPtr->litFreq[u];
+        }
         for (u=0; u<=MaxLL; u++)
             ssPtr->litLengthFreq[u] = 1;
         for (u=0; u<=MaxML; u++)
@@ -60,11 +73,11 @@ MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr)
         ssPtr->litSum = 0;
 
         for (u=0; u<=MaxLit; u++) {
-            ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV);
+            ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>(ZSTD_FREQ_DIV+1));
             ssPtr->litSum += ssPtr->litFreq[u];
         }
         for (u=0; u<=MaxLL; u++) {
-            ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>ZSTD_FREQ_DIV);
+            ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1));
             ssPtr->litLengthSum += ssPtr->litLengthFreq[u];
         }
         for (u=0; u<=MaxML; u++) {
@@ -72,6 +85,7 @@ MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr)
             ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u];
             ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3);
         }
+        ssPtr->matchSum *= ZSTD_LITFREQ_ADD;
         for (u=0; u<=MaxOff; u++) {
             ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
             ssPtr->offCodeSum += ssPtr->offCodeFreq[u];
@@ -86,6 +100,9 @@ FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BY
 {
     U32 price, u;
 
+    if (ssPtr->staticPrices)
+        return ZSTD_highbit32((U32)litLength+1) + (litLength*6);
+
     if (litLength == 0)
         return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0]+1);
 
@@ -120,11 +137,17 @@ FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BY
 }
 
 
-FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength)
+FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra)
 {
     /* offset */
+    U32 price;
     BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
-    U32 price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1);
+
+    if (seqStorePtr->staticPrices)
+        return ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit32((U32)matchLength+1) + 16 + offCode;
+
+    price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1);
+    if (!ultra && offCode >= 20) price += (offCode-19)*2;
 
     /* match Length */
     {   const BYTE ML_deltaCode = 36;
@@ -141,9 +164,9 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
     U32 u;
 
     /* literals */
-    seqStorePtr->litSum += litLength;
+    seqStorePtr->litSum += litLength*ZSTD_LITFREQ_ADD;
     for (u=0; u < litLength; u++)
-        seqStorePtr->litFreq[literals[u]]++;
+        seqStorePtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
 
     /* literal Length */
     {   const BYTE LL_deltaCode = 19;
@@ -153,10 +176,10 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
     }
 
     /* match offset */
-	{   BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
-		seqStorePtr->offCodeSum++;
-		seqStorePtr->offCodeFreq[offCode]++;
-	}
+    {   BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
+        seqStorePtr->offCodeSum++;
+        seqStorePtr->offCodeFreq[offCode]++;
+    }
 
     /* match Length */
     {   const BYTE ML_deltaCode = 36;
@@ -171,7 +194,7 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
 
 #define SET_PRICE(pos, mlen_, offset_, litlen_, price_)   \
     {                                                 \
-        while (last_pos < pos)  { opt[last_pos+1].price = 1<<30; last_pos++; } \
+        while (last_pos < pos)  { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } \
         opt[pos].mlen = mlen_;                         \
         opt[pos].off = offset_;                        \
         opt[pos].litlen = litlen_;                     \
@@ -179,9 +202,23 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
     }
 
 
+/* function safe only for comparisons */
+MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
+{
+    switch (length)
+    {
+    default :
+    case 4 : return MEM_read32(memPtr);
+    case 3 : if (MEM_isLittleEndian())
+                return MEM_read32(memPtr)<<8;
+             else
+                return MEM_read32(memPtr)>>8;
+    }
+}
+
 
 /* Update hashTable3 up to ip (excluded)
-   Assumption : always within prefix (ie. not within extDict) */
+   Assumption : always within prefix (i.e. not within extDict) */
 FORCE_INLINE
 U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip)
 {
@@ -212,12 +249,12 @@ static U32 ZSTD_insertBtAndGetAllMatches (
 {
     const BYTE* const base = zc->base;
     const U32 current = (U32)(ip-base);
-    const U32 hashLog = zc->params.cParams.hashLog;
+    const U32 hashLog = zc->appliedParams.cParams.hashLog;
     const size_t h  = ZSTD_hashPtr(ip, hashLog, mls);
     U32* const hashTable = zc->hashTable;
     U32 matchIndex  = hashTable[h];
     U32* const bt   = zc->chainTable;
-    const U32 btLog = zc->params.cParams.chainLog - 1;
+    const U32 btLog = zc->appliedParams.cParams.chainLog - 1;
     const U32 btMask= (1U << btLog) - 1;
     size_t commonLengthSmaller=0, commonLengthLarger=0;
     const BYTE* const dictBase = zc->dictBase;
@@ -245,7 +282,7 @@ static U32 ZSTD_insertBtAndGetAllMatches (
                 if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit);
             } else {
                 match = dictBase + matchIndex3;
-                if (MEM_readMINMATCH(match, MINMATCH) == MEM_readMINMATCH(ip, MINMATCH))    /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */
+                if (ZSTD_readMINMATCH(match, MINMATCH) == ZSTD_readMINMATCH(ip, MINMATCH))    /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */
                     currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH;
             }
 
@@ -338,6 +375,7 @@ static U32 ZSTD_BtGetAllMatches_selectMLS (
     default :
     case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
     case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
+    case 7 :
     case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
     }
 }
@@ -365,6 +403,7 @@ static U32 ZSTD_BtGetAllMatches_selectMLS_extDict (
     default :
     case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
     case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
+    case 7 :
     case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
     }
 }
@@ -375,7 +414,7 @@ static U32 ZSTD_BtGetAllMatches_selectMLS_extDict (
 *********************************/
 FORCE_INLINE
 void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
-                                    const void* src, size_t srcSize)
+                                    const void* src, size_t srcSize, const int ultra)
 {
     seqStore_t* seqStorePtr = &(ctx->seqStore);
     const BYTE* const istart = (const BYTE*)src;
@@ -386,10 +425,10 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
     const BYTE* const base = ctx->base;
     const BYTE* const prefixStart = base + ctx->dictLimit;
 
-    const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
-    const U32 sufficient_len = ctx->params.cParams.targetLength;
-    const U32 mls = ctx->params.cParams.searchLength;
-    const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
+    const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
+    const U32 sufficient_len = ctx->appliedParams.cParams.targetLength;
+    const U32 mls = ctx->appliedParams.cParams.searchLength;
+    const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
 
     ZSTD_optimal_t* opt = seqStorePtr->priceTable;
     ZSTD_match_t* matches = seqStorePtr->matchTable;
@@ -398,10 +437,9 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
 
     /* init */
     ctx->nextToUpdate3 = ctx->nextToUpdate;
-    ZSTD_rescaleFreqs(seqStorePtr);
+    ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize);
     ip += (ip==prefixStart);
     { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }
-    inr = ip;
 
     /* Match Loop */
     while (ip < ilimit) {
@@ -414,9 +452,9 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
         /* check repCode */
         {   U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
             for (i=(ip == anchor); i<last_i; i++) {
-                const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (ip==anchor)) ? (rep[0] - 1) : rep[i];
+                const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
                 if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart))
-                    && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - repCur, minMatch))) {
+                    && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repCur, minMatch))) {
                     mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch;
                     if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
                         best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
@@ -424,7 +462,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
                     }
                     best_off = i - (ip == anchor);
                     do {
-                        price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH);
+                        price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
                         if (mlen > last_pos || price < opt[mlen].price)
                             SET_PRICE(mlen, mlen, i, litlen, price);   /* note : macro modifies last_pos */
                         mlen--;
@@ -449,7 +487,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
             mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
             best_mlen = matches[u].len;
             while (mlen <= best_mlen) {
-                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH);
+                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
                 if (mlen > last_pos || price < opt[mlen].price)
                     SET_PRICE(mlen, mlen, matches[u].off, litlen, price);   /* note : macro modifies last_pos */
                 mlen++;
@@ -496,12 +534,12 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
                 opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
            }
 
-           best_mlen = minMatch;
+            best_mlen = minMatch;
             {   U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
                 for (i=(opt[cur].mlen != 1); i<last_i; i++) {  /* check rep */
-                    const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (opt[cur].mlen != 1)) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
+                    const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
                     if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart))
-                       && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - repCur, minMatch))) {
+                       && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(inr - repCur, minMatch))) {
                        mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch;
 
                        if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
@@ -510,21 +548,20 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
                        }
 
                        best_off = i - (opt[cur].mlen != 1);
+                       if (mlen > best_mlen) best_mlen = mlen;
 
-                       if (opt[cur].mlen == 1) {
-                            litlen = opt[cur].litlen;
-                            if (cur > litlen) {
-                                price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH);
-                            } else
-                                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH);
-                        } else {
-                            litlen = 0;
-                            price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH);
-                        }
+                       do {
+                           if (opt[cur].mlen == 1) {
+                                litlen = opt[cur].litlen;
+                                if (cur > litlen) {
+                                    price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
+                                } else
+                                    price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
+                            } else {
+                                litlen = 0;
+                                price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
+                            }
 
-                        if (mlen > best_mlen) best_mlen = mlen;
-
-                        do {
                             if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
                                 SET_PRICE(cur + mlen, mlen, i, litlen, price);
                             mlen--;
@@ -549,12 +586,12 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
                     if (opt[cur].mlen == 1) {
                         litlen = opt[cur].litlen;
                         if (cur > litlen)
-                            price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH);
+                            price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
                         else
-                            price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH);
+                            price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
                     } else {
                         litlen = 0;
-                        price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH);
+                        price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
                     }
 
                     if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
@@ -600,7 +637,7 @@ _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
                 offset--;
             } else {
                 if (offset != 0) {
-                    best_off = ((offset==ZSTD_REP_MOVE_OPT) && (litLength==0)) ? (rep[0] - 1) : (rep[offset]);
+                    best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
                     if (offset != 1) rep[2] = rep[1];
                     rep[1] = rep[0];
                     rep[0] = best_off;
@@ -614,7 +651,7 @@ _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
     }    }   /* for (cur=0; cur < last_pos; ) */
 
     /* Save reps for next block */
-    { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->savedRep[i] = rep[i]; }
+    { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; }
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -626,7 +663,7 @@ _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
 
 FORCE_INLINE
 void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
-                                     const void* src, size_t srcSize)
+                                     const void* src, size_t srcSize, const int ultra)
 {
     seqStore_t* seqStorePtr = &(ctx->seqStore);
     const BYTE* const istart = (const BYTE*)src;
@@ -641,10 +678,10 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
     const BYTE* const dictBase = ctx->dictBase;
     const BYTE* const dictEnd  = dictBase + dictLimit;
 
-    const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
-    const U32 sufficient_len = ctx->params.cParams.targetLength;
-    const U32 mls = ctx->params.cParams.searchLength;
-    const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
+    const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
+    const U32 sufficient_len = ctx->appliedParams.cParams.targetLength;
+    const U32 mls = ctx->appliedParams.cParams.searchLength;
+    const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
 
     ZSTD_optimal_t* opt = seqStorePtr->priceTable;
     ZSTD_match_t* matches = seqStorePtr->matchTable;
@@ -655,9 +692,8 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
     { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }
 
     ctx->nextToUpdate3 = ctx->nextToUpdate;
-    ZSTD_rescaleFreqs(seqStorePtr);
+    ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize);
     ip += (ip==prefixStart);
-    inr = ip;
 
     /* Match Loop */
     while (ip < ilimit) {
@@ -666,19 +702,18 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
         U32 current = (U32)(ip-base);
         memset(opt, 0, sizeof(ZSTD_optimal_t));
         last_pos = 0;
-        inr = ip;
         opt[0].litlen = (U32)(ip - anchor);
 
         /* check repCode */
         {   U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
             for (i = (ip==anchor); i<last_i; i++) {
-                const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (ip==anchor)) ? (rep[0] - 1) : rep[i];
+                const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
                 const U32 repIndex = (U32)(current - repCur);
                 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                 const BYTE* const repMatch = repBase + repIndex;
                 if ( (repCur > 0 && repCur <= (S32)current)
                    && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex))  /* intentional overflow */
-                   && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) {
+                   && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
                     /* repcode detected we should take it */
                     const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                     mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
@@ -691,7 +726,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
                     best_off = i - (ip==anchor);
                     litlen = opt[0].litlen;
                     do {
-                        price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH);
+                        price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
                         if (mlen > last_pos || price < opt[mlen].price)
                             SET_PRICE(mlen, mlen, i, litlen, price);   /* note : macro modifies last_pos */
                         mlen--;
@@ -721,7 +756,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
             best_mlen = matches[u].len;
             litlen = opt[0].litlen;
             while (mlen <= best_mlen) {
-                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH);
+                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
                 if (mlen > last_pos || price < opt[mlen].price)
                     SET_PRICE(mlen, mlen, matches[u].off, litlen, price);
                 mlen++;
@@ -765,17 +800,16 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
                 opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
             }
 
-            best_mlen = 0;
-
+            best_mlen = minMatch;
             {   U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
                 for (i = (mlen != 1); i<last_i; i++) {
-                    const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (opt[cur].mlen != 1)) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
+                    const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
                     const U32 repIndex = (U32)(current+cur - repCur);
                     const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                     const BYTE* const repMatch = repBase + repIndex;
                     if ( (repCur > 0 && repCur <= (S32)(current+cur))
                       && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex))  /* intentional overflow */
-                      && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) {
+                      && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
                         /* repcode detected */
                         const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                         mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
@@ -786,20 +820,20 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
                         }
 
                         best_off = i - (opt[cur].mlen != 1);
-                        if (opt[cur].mlen == 1) {
-                            litlen = opt[cur].litlen;
-                            if (cur > litlen) {
-                                price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH);
-                            } else
-                                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH);
-                        } else {
-                            litlen = 0;
-                            price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH);
-                        }
-
-                        best_mlen = mlen;
+                        if (mlen > best_mlen) best_mlen = mlen;
 
                         do {
+                            if (opt[cur].mlen == 1) {
+                                litlen = opt[cur].litlen;
+                                if (cur > litlen) {
+                                    price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
+                                } else
+                                    price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
+                            } else {
+                                litlen = 0;
+                                price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
+                            }
+
                             if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
                                 SET_PRICE(cur + mlen, mlen, i, litlen, price);
                             mlen--;
@@ -808,30 +842,28 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
 
             match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch);
 
-            if (match_num > 0 && matches[match_num-1].len > sufficient_len) {
+            if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
                 best_mlen = matches[match_num-1].len;
                 best_off = matches[match_num-1].off;
                 last_pos = cur + 1;
                 goto _storeSequence;
             }
 
-            best_mlen = (best_mlen > minMatch) ? best_mlen : minMatch;
-
             /* set prices using matches at position = cur */
             for (u = 0; u < match_num; u++) {
                 mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
-                best_mlen = (cur + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur;
+                best_mlen = matches[u].len;
 
                 while (mlen <= best_mlen) {
                     if (opt[cur].mlen == 1) {
                         litlen = opt[cur].litlen;
                         if (cur > litlen)
-                            price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH);
+                            price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
                         else
-                            price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH);
+                            price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
                     } else {
                         litlen = 0;
-                        price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH);
+                        price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
                     }
 
                     if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
@@ -877,7 +909,7 @@ _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
                 offset--;
             } else {
                 if (offset != 0) {
-                    best_off = ((offset==ZSTD_REP_MOVE_OPT) && (litLength==0)) ? (rep[0] - 1) : (rep[offset]);
+                    best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
                     if (offset != 1) rep[2] = rep[1];
                     rep[1] = rep[0];
                     rep[0] = best_off;
@@ -892,7 +924,7 @@ _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
     }    }   /* for (cur=0; cur < last_pos; ) */
 
     /* Save reps for next block */
-    { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->savedRep[i] = rep[i]; }
+    { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; }
 
     /* Last Literals */
     {   size_t lastLLSize = iend - anchor;
diff --git a/contrib/libzstd/include/zstd/compress/zstdmt_compress.c b/contrib/libzstd/include/zstd/compress/zstdmt_compress.c
new file mode 100644
index 00000000000..0cee01eacb8
--- /dev/null
+++ b/contrib/libzstd/include/zstd/compress/zstdmt_compress.c
@@ -0,0 +1,955 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+
+/* ======   Tuning parameters   ====== */
+#define ZSTDMT_NBTHREADS_MAX 128
+
+
+/* ======   Compiler specifics   ====== */
+#if defined(_MSC_VER)
+#  pragma warning(disable : 4204)   /* disable: C4204: non-constant aggregate initializer */
+#endif
+
+
+/* ======   Dependencies   ====== */
+#include <string.h>      /* memcpy, memset */
+#include "pool.h"        /* threadpool */
+#include "threading.h"   /* mutex */
+#include "zstd_internal.h"  /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
+#include "zstdmt_compress.h"
+
+
+/* ======   Debug   ====== */
+#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
+
+#  include <stdio.h>
+#  include <unistd.h>
+#  include <sys/times.h>
+#  define DEBUGLOGRAW(l, ...) if (l<=ZSTD_DEBUG) { fprintf(stderr, __VA_ARGS__); }
+
+#  define DEBUG_PRINTHEX(l,p,n) {            \
+    unsigned debug_u;                        \
+    for (debug_u=0; debug_u<(n); debug_u++)  \
+        DEBUGLOGRAW(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
+    DEBUGLOGRAW(l, " \n");                   \
+}
+
+static unsigned long long GetCurrentClockTimeMicroseconds(void)
+{
+   static clock_t _ticksPerSecond = 0;
+   if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK);
+
+   { struct tms junk; clock_t newTicks = (clock_t) times(&junk);
+     return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond); }
+}
+
+#define MUTEX_WAIT_TIME_DLEVEL 6
+#define PTHREAD_MUTEX_LOCK(mutex) {               \
+    if (ZSTD_DEBUG>=MUTEX_WAIT_TIME_DLEVEL) {   \
+        unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
+        pthread_mutex_lock(mutex);                \
+        {   unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
+            unsigned long long const elapsedTime = (afterTime-beforeTime); \
+            if (elapsedTime > 1000) {  /* or whatever threshold you like; I'm using 1 millisecond here */ \
+                DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
+                   elapsedTime, #mutex);          \
+        }   }                                     \
+    } else pthread_mutex_lock(mutex);             \
+}
+
+#else
+
+#  define PTHREAD_MUTEX_LOCK(m) pthread_mutex_lock(m)
+#  define DEBUG_PRINTHEX(l,p,n) {}
+
+#endif
+
+
+/* =====   Buffer Pool   ===== */
+
+typedef struct buffer_s {
+    void* start;
+    size_t size;
+} buffer_t;
+
+static const buffer_t g_nullBuffer = { NULL, 0 };
+
+typedef struct ZSTDMT_bufferPool_s {
+    unsigned totalBuffers;
+    unsigned nbBuffers;
+    ZSTD_customMem cMem;
+    buffer_t bTable[1];   /* variable size */
+} ZSTDMT_bufferPool;
+
+static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads, ZSTD_customMem cMem)
+{
+    unsigned const maxNbBuffers = 2*nbThreads + 2;
+    ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_calloc(
+        sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
+    if (bufPool==NULL) return NULL;
+    bufPool->totalBuffers = maxNbBuffers;
+    bufPool->nbBuffers = 0;
+    bufPool->cMem = cMem;
+    return bufPool;
+}
+
+static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
+{
+    unsigned u;
+    if (!bufPool) return;   /* compatibility with free on NULL */
+    for (u=0; u<bufPool->totalBuffers; u++)
+        ZSTD_free(bufPool->bTable[u].start, bufPool->cMem);
+    ZSTD_free(bufPool, bufPool->cMem);
+}
+
+/* only works at initialization, not during compression */
+static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
+{
+    size_t const poolSize = sizeof(*bufPool)
+                            + (bufPool->totalBuffers - 1) * sizeof(buffer_t);
+    unsigned u;
+    size_t totalBufferSize = 0;
+    for (u=0; u<bufPool->totalBuffers; u++)
+        totalBufferSize += bufPool->bTable[u].size;
+
+    return poolSize + totalBufferSize;
+}
+
+/** ZSTDMT_getBuffer() :
+ *  assumption : invocation from main thread only ! */
+static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize)
+{
+    if (pool->nbBuffers) {   /* try to use an existing buffer */
+        buffer_t const buf = pool->bTable[--(pool->nbBuffers)];
+        size_t const availBufferSize = buf.size;
+        if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize))
+            /* large enough, but not too much */
+            return buf;
+        /* size conditions not respected : scratch this buffer, create new one */
+        ZSTD_free(buf.start, pool->cMem);
+    }
+    /* create new buffer */
+    {   buffer_t buffer;
+        void* const start = ZSTD_malloc(bSize, pool->cMem);
+        if (start==NULL) bSize = 0;
+        buffer.start = start;   /* note : start can be NULL if malloc fails ! */
+        buffer.size = bSize;
+        return buffer;
+    }
+}
+
+/* store buffer for later re-use, up to pool capacity */
+static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf)
+{
+    if (buf.start == NULL) return;   /* release on NULL */
+    if (pool->nbBuffers < pool->totalBuffers) {
+        pool->bTable[pool->nbBuffers++] = buf;   /* store for later re-use */
+        return;
+    }
+    /* Reached bufferPool capacity (should not happen) */
+    ZSTD_free(buf.start, pool->cMem);
+}
+
+
+/* =====   CCtx Pool   ===== */
+
+typedef struct {
+    unsigned totalCCtx;
+    unsigned availCCtx;
+    ZSTD_customMem cMem;
+    ZSTD_CCtx* cctx[1];   /* variable size */
+} ZSTDMT_CCtxPool;
+
+/* assumption : CCtxPool invocation only from main thread */
+
+/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
+static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
+{
+    unsigned u;
+    for (u=0; u<pool->totalCCtx; u++)
+        ZSTD_freeCCtx(pool->cctx[u]);  /* note : compatible with free on NULL */
+    ZSTD_free(pool, pool->cMem);
+}
+
+/* ZSTDMT_createCCtxPool() :
+ * implies nbThreads >= 1 , checked by caller ZSTDMT_createCCtx() */
+static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads,
+                                              ZSTD_customMem cMem)
+{
+    ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
+        sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*), cMem);
+    if (!cctxPool) return NULL;
+    cctxPool->cMem = cMem;
+    cctxPool->totalCCtx = nbThreads;
+    cctxPool->availCCtx = 1;   /* at least one cctx for single-thread mode */
+    cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem);
+    if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
+    DEBUGLOG(3, "cctxPool created, with %u threads", nbThreads);
+    return cctxPool;
+}
+
+/* only works during initialization phase, not during compression */
+static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
+{
+    unsigned const nbThreads = cctxPool->totalCCtx;
+    size_t const poolSize = sizeof(*cctxPool)
+                            + (nbThreads-1)*sizeof(ZSTD_CCtx*);
+    unsigned u;
+    size_t totalCCtxSize = 0;
+    for (u=0; u<nbThreads; u++)
+        totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
+
+    return poolSize + totalCCtxSize;
+}
+
+static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* pool)
+{
+    if (pool->availCCtx) {
+        pool->availCCtx--;
+        return pool->cctx[pool->availCCtx];
+    }
+    return ZSTD_createCCtx();   /* note : can be NULL, when creation fails ! */
+}
+
+static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
+{
+    if (cctx==NULL) return;   /* compatibility with release on NULL */
+    if (pool->availCCtx < pool->totalCCtx)
+        pool->cctx[pool->availCCtx++] = cctx;
+    else
+        /* pool overflow : should not happen, since totalCCtx==nbThreads */
+        ZSTD_freeCCtx(cctx);
+}
+
+
+/* =====   Thread worker   ===== */
+
+typedef struct {
+    buffer_t buffer;
+    size_t filled;
+} inBuff_t;
+
+typedef struct {
+    ZSTD_CCtx* cctx;
+    buffer_t src;
+    const void* srcStart;
+    size_t   srcSize;
+    size_t   dictSize;
+    buffer_t dstBuff;
+    size_t   cSize;
+    size_t   dstFlushed;
+    unsigned firstChunk;
+    unsigned lastChunk;
+    unsigned jobCompleted;
+    unsigned jobScanned;
+    pthread_mutex_t* jobCompleted_mutex;
+    pthread_cond_t* jobCompleted_cond;
+    ZSTD_parameters params;
+    const ZSTD_CDict* cdict;
+    unsigned long long fullFrameSize;
+} ZSTDMT_jobDescription;
+
+/* ZSTDMT_compressChunk() : POOL_function type */
+void ZSTDMT_compressChunk(void* jobDescription)
+{
+    ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
+    const void* const src = (const char*)job->srcStart + job->dictSize;
+    buffer_t const dstBuff = job->dstBuff;
+    DEBUGLOG(5, "job (first:%u) (last:%u) : dictSize %u, srcSize %u",
+                 job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize);
+    if (job->cdict) {  /* should only happen for first segment */
+        size_t const initError = ZSTD_compressBegin_usingCDict_advanced(job->cctx, job->cdict, job->params.fParams, job->fullFrameSize);
+        DEBUGLOG(5, "using CDict");
+        if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
+    } else {  /* srcStart points at reloaded section */
+        if (!job->firstChunk) job->params.fParams.contentSizeFlag = 0;  /* ensure no srcSize control */
+        {   size_t const dictModeError = ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceRawDict, 1);  /* Force loading dictionary in "content-only" mode (no header analysis) */
+            size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize);
+            if (ZSTD_isError(initError) || ZSTD_isError(dictModeError)) { job->cSize = initError; goto _endJob; }
+            ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1);
+    }   }
+    if (!job->firstChunk) {  /* flush and overwrite frame header when it's not first segment */
+        size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, 0);
+        if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; }
+        ZSTD_invalidateRepCodes(job->cctx);
+    }
+
+    DEBUGLOG(5, "Compressing : ");
+    DEBUG_PRINTHEX(4, job->srcStart, 12);
+    job->cSize = (job->lastChunk) ?
+                 ZSTD_compressEnd     (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
+                 ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
+    DEBUGLOG(5, "compressed %u bytes into %u bytes   (first:%u) (last:%u)",
+                (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);
+    DEBUGLOG(5, "dstBuff.size : %u ; => %s", (U32)dstBuff.size, ZSTD_getErrorName(job->cSize));
+
+_endJob:
+    PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex);
+    job->jobCompleted = 1;
+    job->jobScanned = 0;
+    pthread_cond_signal(job->jobCompleted_cond);
+    pthread_mutex_unlock(job->jobCompleted_mutex);
+}
+
+
+/* ------------------------------------------ */
+/* =====   Multi-threaded compression   ===== */
+/* ------------------------------------------ */
+
+struct ZSTDMT_CCtx_s {
+    POOL_ctx* factory;
+    ZSTDMT_jobDescription* jobs;
+    ZSTDMT_bufferPool* buffPool;
+    ZSTDMT_CCtxPool* cctxPool;
+    pthread_mutex_t jobCompleted_mutex;
+    pthread_cond_t jobCompleted_cond;
+    size_t targetSectionSize;
+    size_t marginSize;
+    size_t inBuffSize;
+    size_t dictSize;
+    size_t targetDictSize;
+    inBuff_t inBuff;
+    ZSTD_parameters params;
+    XXH64_state_t xxhState;
+    unsigned nbThreads;
+    unsigned jobIDMask;
+    unsigned doneJobID;
+    unsigned nextJobID;
+    unsigned frameEnded;
+    unsigned allJobsCompleted;
+    unsigned overlapRLog;
+    unsigned long long frameContentSize;
+    size_t sectionSize;
+    ZSTD_customMem cMem;
+    ZSTD_CDict* cdictLocal;
+    const ZSTD_CDict* cdict;
+};
+
+static ZSTDMT_jobDescription* ZSTDMT_allocJobsTable(U32* nbJobsPtr, ZSTD_customMem cMem)
+{
+    U32 const nbJobsLog2 = ZSTD_highbit32(*nbJobsPtr) + 1;
+    U32 const nbJobs = 1 << nbJobsLog2;
+    *nbJobsPtr = nbJobs;
+    return (ZSTDMT_jobDescription*) ZSTD_calloc(
+                            nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
+}
+
+ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem)
+{
+    ZSTDMT_CCtx* mtctx;
+    U32 nbJobs = nbThreads + 2;
+    DEBUGLOG(3, "ZSTDMT_createCCtx_advanced");
+
+    if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL;
+    if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL))
+        /* invalid custom allocator */
+        return NULL;
+
+    mtctx = (ZSTDMT_CCtx*) ZSTD_calloc(sizeof(ZSTDMT_CCtx), cMem);
+    if (!mtctx) return NULL;
+    mtctx->cMem = cMem;
+    mtctx->nbThreads = nbThreads;
+    mtctx->allJobsCompleted = 1;
+    mtctx->sectionSize = 0;
+    mtctx->overlapRLog = 3;
+    mtctx->factory = POOL_create(nbThreads, 1);
+    mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, cMem);
+    mtctx->jobIDMask = nbJobs - 1;
+    mtctx->buffPool = ZSTDMT_createBufferPool(nbThreads, cMem);
+    mtctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads, cMem);
+    if (!mtctx->factory | !mtctx->jobs | !mtctx->buffPool | !mtctx->cctxPool) {
+        ZSTDMT_freeCCtx(mtctx);
+        return NULL;
+    }
+    pthread_mutex_init(&mtctx->jobCompleted_mutex, NULL);   /* Todo : check init function return */
+    pthread_cond_init(&mtctx->jobCompleted_cond, NULL);
+    DEBUGLOG(3, "mt_cctx created, for %u threads", nbThreads);
+    return mtctx;
+}
+
+ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads)
+{
+    return ZSTDMT_createCCtx_advanced(nbThreads, ZSTD_defaultCMem);
+}
+
+/* ZSTDMT_releaseAllJobResources() :
+ * note : ensure all workers are killed first ! */
+static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
+{
+    unsigned jobID;
+    DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
+    for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
+        ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].dstBuff);
+        mtctx->jobs[jobID].dstBuff = g_nullBuffer;
+        ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].src);
+        mtctx->jobs[jobID].src = g_nullBuffer;
+        ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[jobID].cctx);
+        mtctx->jobs[jobID].cctx = NULL;
+    }
+    memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
+    ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer);
+    mtctx->inBuff.buffer = g_nullBuffer;
+    mtctx->allJobsCompleted = 1;
+}
+
+size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
+{
+    if (mtctx==NULL) return 0;   /* compatible with free on NULL */
+    POOL_free(mtctx->factory);
+    if (!mtctx->allJobsCompleted) ZSTDMT_releaseAllJobResources(mtctx); /* stop workers first */
+    ZSTDMT_freeBufferPool(mtctx->buffPool);  /* release job resources into pools first */
+    ZSTD_free(mtctx->jobs, mtctx->cMem);
+    ZSTDMT_freeCCtxPool(mtctx->cctxPool);
+    ZSTD_freeCDict(mtctx->cdictLocal);
+    pthread_mutex_destroy(&mtctx->jobCompleted_mutex);
+    pthread_cond_destroy(&mtctx->jobCompleted_cond);
+    ZSTD_free(mtctx, mtctx->cMem);
+    return 0;
+}
+
+size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
+{
+    if (mtctx == NULL) return 0;   /* supports sizeof NULL */
+    return sizeof(*mtctx)
+        + POOL_sizeof(mtctx->factory)
+        + ZSTDMT_sizeof_bufferPool(mtctx->buffPool)
+        + (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription)
+        + ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool)
+        + ZSTD_sizeof_CDict(mtctx->cdictLocal);
+}
+
+size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value)
+{
+    switch(parameter)
+    {
+    case ZSTDMT_p_sectionSize :
+        mtctx->sectionSize = value;
+        return 0;
+    case ZSTDMT_p_overlapSectionLog :
+        DEBUGLOG(5, "ZSTDMT_p_overlapSectionLog : %u", value);
+        mtctx->overlapRLog = (value >= 9) ? 0 : 9 - value;
+        return 0;
+    default :
+        return ERROR(compressionParameter_unsupported);
+    }
+}
+
+
+/* ------------------------------------------ */
+/* =====   Multi-threaded compression   ===== */
+/* ------------------------------------------ */
+
+static unsigned computeNbChunks(size_t srcSize, unsigned windowLog, unsigned nbThreads) {
+    size_t const chunkSizeTarget = (size_t)1 << (windowLog + 2);
+    size_t const chunkMaxSize = chunkSizeTarget << 2;
+    size_t const passSizeMax = chunkMaxSize * nbThreads;
+    unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1;
+    unsigned const nbChunksLarge = multiplier * nbThreads;
+    unsigned const nbChunksMax = (unsigned)(srcSize / chunkSizeTarget) + 1;
+    unsigned const nbChunksSmall = MIN(nbChunksMax, nbThreads);
+    return (multiplier>1) ? nbChunksLarge : nbChunksSmall;
+}
+
+
+size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
+                           void* dst, size_t dstCapacity,
+                     const void* src, size_t srcSize,
+                     const ZSTD_CDict* cdict,
+                           ZSTD_parameters const params,
+                           unsigned overlapRLog)
+{
+    size_t const overlapSize = (overlapRLog>=9) ? 0 : (size_t)1 << (params.cParams.windowLog - overlapRLog);
+    unsigned nbChunks = computeNbChunks(srcSize, params.cParams.windowLog, mtctx->nbThreads);
+    size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks;
+    size_t const avgChunkSize = ((proposedChunkSize & 0x1FFFF) < 0x7FFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize;   /* avoid too small last block */
+    const char* const srcStart = (const char*)src;
+    size_t remainingSrcSize = srcSize;
+    unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbChunks : (unsigned)(dstCapacity / ZSTD_compressBound(avgChunkSize));  /* presumes avgChunkSize >= 256 KB, which should be the case */
+    size_t frameStartPos = 0, dstBufferPos = 0;
+
+    DEBUGLOG(4, "nbChunks  : %2u   (chunkSize : %u bytes)   ", nbChunks, (U32)avgChunkSize);
+    if (nbChunks==1) {   /* fallback to single-thread mode */
+        ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
+        if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, params.fParams);
+        return ZSTD_compress_advanced(cctx, dst, dstCapacity, src, srcSize, NULL, 0, params);
+    }
+    assert(avgChunkSize >= 256 KB);  /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), which is useful to avoid allocating extra buffers */
+
+    if (nbChunks > mtctx->jobIDMask+1) {  /* enlarge job table */
+        U32 nbJobs = nbChunks;
+        ZSTD_free(mtctx->jobs, mtctx->cMem);
+        mtctx->jobIDMask = 0;
+        mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, mtctx->cMem);
+        if (mtctx->jobs==NULL) return ERROR(memory_allocation);
+        mtctx->jobIDMask = nbJobs - 1;
+    }
+
+    {   unsigned u;
+        for (u=0; u<nbChunks; u++) {
+            size_t const chunkSize = MIN(remainingSrcSize, avgChunkSize);
+            size_t const dstBufferCapacity = ZSTD_compressBound(chunkSize);
+            buffer_t const dstAsBuffer = { (char*)dst + dstBufferPos, dstBufferCapacity };
+            buffer_t const dstBuffer = u < compressWithinDst ? dstAsBuffer : ZSTDMT_getBuffer(mtctx->buffPool, dstBufferCapacity);
+            ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(mtctx->cctxPool);
+            size_t dictSize = u ? overlapSize : 0;
+
+            if ((cctx==NULL) || (dstBuffer.start==NULL)) {
+                mtctx->jobs[u].cSize = ERROR(memory_allocation);   /* job result */
+                mtctx->jobs[u].jobCompleted = 1;
+                nbChunks = u+1;   /* only wait and free u jobs, instead of initially expected nbChunks ones */
+                break;   /* let's wait for previous jobs to complete, but don't start new ones */
+            }
+
+            mtctx->jobs[u].srcStart = srcStart + frameStartPos - dictSize;
+            mtctx->jobs[u].dictSize = dictSize;
+            mtctx->jobs[u].srcSize = chunkSize;
+            mtctx->jobs[u].cdict = mtctx->nextJobID==0 ? cdict : NULL;
+            mtctx->jobs[u].fullFrameSize = srcSize;
+            mtctx->jobs[u].params = params;
+            /* do not calculate checksum within sections, but write it in header for first section */
+            if (u!=0) mtctx->jobs[u].params.fParams.checksumFlag = 0;
+            mtctx->jobs[u].dstBuff = dstBuffer;
+            mtctx->jobs[u].cctx = cctx;
+            mtctx->jobs[u].firstChunk = (u==0);
+            mtctx->jobs[u].lastChunk = (u==nbChunks-1);
+            mtctx->jobs[u].jobCompleted = 0;
+            mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex;
+            mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond;
+
+            DEBUGLOG(5, "posting job %u   (%u bytes)", u, (U32)chunkSize);
+            DEBUG_PRINTHEX(6, mtctx->jobs[u].srcStart, 12);
+            POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]);
+
+            frameStartPos += chunkSize;
+            dstBufferPos += dstBufferCapacity;
+            remainingSrcSize -= chunkSize;
+    }   }
+
+    /* collect result */
+    {   unsigned chunkID;
+        size_t error = 0, dstPos = 0;
+        for (chunkID=0; chunkID<nbChunks; chunkID++) {
+            DEBUGLOG(5, "waiting for chunk %u ", chunkID);
+            PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex);
+            while (mtctx->jobs[chunkID].jobCompleted==0) {
+                DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", chunkID);
+                pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex);
+            }
+            pthread_mutex_unlock(&mtctx->jobCompleted_mutex);
+            DEBUGLOG(5, "ready to write chunk %u ", chunkID);
+
+            ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[chunkID].cctx);
+            mtctx->jobs[chunkID].cctx = NULL;
+            mtctx->jobs[chunkID].srcStart = NULL;
+            {   size_t const cSize = mtctx->jobs[chunkID].cSize;
+                if (ZSTD_isError(cSize)) error = cSize;
+                if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall);
+                if (chunkID) {   /* note : chunk 0 is written directly at dst, which is correct position */
+                    if (!error)
+                        memmove((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize);  /* may overlap when chunk compressed within dst */
+                    if (chunkID >= compressWithinDst) {  /* chunk compressed into its own buffer, which must be released */
+                        DEBUGLOG(5, "releasing buffer %u>=%u", chunkID, compressWithinDst);
+                        ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[chunkID].dstBuff);
+                    }
+                    mtctx->jobs[chunkID].dstBuff = g_nullBuffer;
+                }
+                dstPos += cSize ;
+            }
+        }
+        if (!error) DEBUGLOG(4, "compressed size : %u  ", (U32)dstPos);
+        return error ? error : dstPos;
+    }
+}
+
+
+size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
+                           void* dst, size_t dstCapacity,
+                     const void* src, size_t srcSize,
+                           int compressionLevel)
+{
+    U32 const overlapRLog = (compressionLevel >= ZSTD_maxCLevel()) ? 0 : 3;
+    ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
+    params.fParams.contentSizeFlag = 1;
+    return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapRLog);
+}
+
+
+/* ====================================== */
+/* =======      Streaming API     ======= */
+/* ====================================== */
+
+static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs)
+{
+    DEBUGLOG(4, "ZSTDMT_waitForAllJobsCompleted");
+    while (zcs->doneJobID < zcs->nextJobID) {
+        unsigned const jobID = zcs->doneJobID & zcs->jobIDMask;
+        PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
+        while (zcs->jobs[jobID].jobCompleted==0) {
+            DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID);   /* we want to block when waiting for data to flush */
+            pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);
+        }
+        pthread_mutex_unlock(&zcs->jobCompleted_mutex);
+        zcs->doneJobID++;
+    }
+}
+
+
+/** ZSTDMT_initCStream_internal() :
+ *  internal usage only */
+size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
+                    const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
+                    ZSTD_parameters params, unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTDMT_initCStream_internal");
+    /* params are supposed to be fully validated at this point */
+    assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
+
+    if (zcs->nbThreads==1) {
+        DEBUGLOG(4, "single thread mode");
+        return ZSTD_initCStream_internal(zcs->cctxPool->cctx[0],
+                                dict, dictSize, cdict,
+                                params, pledgedSrcSize);
+    }
+
+    if (zcs->allJobsCompleted == 0) {   /* previous compression not correctly finished */
+        ZSTDMT_waitForAllJobsCompleted(zcs);
+        ZSTDMT_releaseAllJobResources(zcs);
+        zcs->allJobsCompleted = 1;
+    }
+
+    zcs->params = params;
+    zcs->frameContentSize = pledgedSrcSize;
+    if (dict) {
+        DEBUGLOG(4,"cdictLocal: %08X", (U32)(size_t)zcs->cdictLocal);
+        ZSTD_freeCDict(zcs->cdictLocal);
+        zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
+                                                    0 /* byRef */, ZSTD_dm_auto,   /* note : a loadPrefix becomes an internal CDict */
+                                                    params.cParams, zcs->cMem);
+        zcs->cdict = zcs->cdictLocal;
+        if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
+    } else {
+        DEBUGLOG(4,"cdictLocal: %08X", (U32)(size_t)zcs->cdictLocal);
+        ZSTD_freeCDict(zcs->cdictLocal);
+        zcs->cdictLocal = NULL;
+        zcs->cdict = cdict;
+    }
+
+    zcs->targetDictSize = (zcs->overlapRLog>=9) ? 0 : (size_t)1 << (zcs->params.cParams.windowLog - zcs->overlapRLog);
+    DEBUGLOG(4, "overlapRLog : %u ", zcs->overlapRLog);
+    DEBUGLOG(4, "overlap Size : %u KB", (U32)(zcs->targetDictSize>>10));
+    zcs->targetSectionSize = zcs->sectionSize ? zcs->sectionSize : (size_t)1 << (zcs->params.cParams.windowLog + 2);
+    zcs->targetSectionSize = MAX(ZSTDMT_SECTION_SIZE_MIN, zcs->targetSectionSize);
+    zcs->targetSectionSize = MAX(zcs->targetDictSize, zcs->targetSectionSize);
+    DEBUGLOG(4, "Section Size : %u KB", (U32)(zcs->targetSectionSize>>10));
+    zcs->marginSize = zcs->targetSectionSize >> 2;
+    zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize + zcs->marginSize;
+    zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
+    if (zcs->inBuff.buffer.start == NULL) return ERROR(memory_allocation);
+    zcs->inBuff.filled = 0;
+    zcs->dictSize = 0;
+    zcs->doneJobID = 0;
+    zcs->nextJobID = 0;
+    zcs->frameEnded = 0;
+    zcs->allJobsCompleted = 0;
+    if (params.fParams.checksumFlag) XXH64_reset(&zcs->xxhState, 0);
+    return 0;
+}
+
+size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
+                                const void* dict, size_t dictSize,
+                                ZSTD_parameters params, unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(5, "ZSTDMT_initCStream_advanced");
+    return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, NULL, params, pledgedSrcSize);
+}
+
+size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
+                               const ZSTD_CDict* cdict,
+                                     ZSTD_frameParameters fParams,
+                                     unsigned long long pledgedSrcSize)
+{
+    ZSTD_parameters params = ZSTD_getParamsFromCDict(cdict);
+    if (cdict==NULL) return ERROR(dictionary_wrong);   /* method incompatible with NULL cdict */
+    params.fParams = fParams;
+    return ZSTDMT_initCStream_internal(mtctx, NULL, 0 /*dictSize*/, cdict,
+                                        params, pledgedSrcSize);
+}
+
+
+/* ZSTDMT_resetCStream() :
+ * pledgedSrcSize is optional and can be zero == unknown */
+size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize)
+{
+    if (zcs->nbThreads==1)
+        return ZSTD_resetCStream(zcs->cctxPool->cctx[0], pledgedSrcSize);
+    return ZSTDMT_initCStream_internal(zcs, NULL, 0, 0, zcs->params, pledgedSrcSize);
+}
+
+size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) {
+    ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0);
+    return ZSTDMT_initCStream_internal(zcs, NULL, 0, NULL, params, 0);
+}
+
+
+static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsigned endFrame)
+{
+    size_t const dstBufferCapacity = ZSTD_compressBound(srcSize);
+    buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity);
+    ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool);
+    unsigned const jobID = zcs->nextJobID & zcs->jobIDMask;
+
+    if ((cctx==NULL) || (dstBuffer.start==NULL)) {
+        zcs->jobs[jobID].jobCompleted = 1;
+        zcs->nextJobID++;
+        ZSTDMT_waitForAllJobsCompleted(zcs);
+        ZSTDMT_releaseAllJobResources(zcs);
+        return ERROR(memory_allocation);
+    }
+
+    DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ",
+                zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize);
+    zcs->jobs[jobID].src = zcs->inBuff.buffer;
+    zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start;
+    zcs->jobs[jobID].srcSize = srcSize;
+    zcs->jobs[jobID].dictSize = zcs->dictSize;
+    assert(zcs->inBuff.filled >= srcSize + zcs->dictSize);
+    zcs->jobs[jobID].params = zcs->params;
+    /* do not calculate checksum within sections, but write it in header for first section */
+    if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0;
+    zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL;
+    zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize;
+    zcs->jobs[jobID].dstBuff = dstBuffer;
+    zcs->jobs[jobID].cctx = cctx;
+    zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0);
+    zcs->jobs[jobID].lastChunk = endFrame;
+    zcs->jobs[jobID].jobCompleted = 0;
+    zcs->jobs[jobID].dstFlushed = 0;
+    zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex;
+    zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond;
+
+    /* get a new buffer for next input */
+    if (!endFrame) {
+        size_t const newDictSize = MIN(srcSize + zcs->dictSize, zcs->targetDictSize);
+        DEBUGLOG(5, "ZSTDMT_createCompressionJob::endFrame = %u", endFrame);
+        zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
+        if (zcs->inBuff.buffer.start == NULL) {   /* not enough memory to allocate next input buffer */
+            zcs->jobs[jobID].jobCompleted = 1;
+            zcs->nextJobID++;
+            ZSTDMT_waitForAllJobsCompleted(zcs);
+            ZSTDMT_releaseAllJobResources(zcs);
+            return ERROR(memory_allocation);
+        }
+        DEBUGLOG(5, "inBuff currently filled to %u", (U32)zcs->inBuff.filled);
+        zcs->inBuff.filled -= srcSize + zcs->dictSize - newDictSize;
+        DEBUGLOG(5, "new job : inBuff filled to %u, with %u dict and %u src",
+                    (U32)zcs->inBuff.filled, (U32)newDictSize,
+                    (U32)(zcs->inBuff.filled - newDictSize));
+        memmove(zcs->inBuff.buffer.start,
+            (const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize,
+            zcs->inBuff.filled);
+        DEBUGLOG(5, "new inBuff pre-filled");
+        zcs->dictSize = newDictSize;
+    } else {   /* if (endFrame==1) */
+        DEBUGLOG(5, "ZSTDMT_createCompressionJob::endFrame = %u", endFrame);
+        zcs->inBuff.buffer = g_nullBuffer;
+        zcs->inBuff.filled = 0;
+        zcs->dictSize = 0;
+        zcs->frameEnded = 1;
+        if (zcs->nextJobID == 0)
+            /* single chunk exception : checksum is calculated directly within worker thread */
+            zcs->params.fParams.checksumFlag = 0;
+    }
+
+    DEBUGLOG(4, "posting job %u : %u bytes  (end:%u) (note : doneJob = %u=>%u)",
+                zcs->nextJobID,
+                (U32)zcs->jobs[jobID].srcSize,
+                zcs->jobs[jobID].lastChunk,
+                zcs->doneJobID,
+                zcs->doneJobID & zcs->jobIDMask);
+    POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]);   /* this call is blocking when thread worker pool is exhausted */
+    zcs->nextJobID++;
+    return 0;
+}
+
+
+/* ZSTDMT_flushNextJob() :
+ * output : will be updated with amount of data flushed .
+ * blockToFlush : if >0, the function will block and wait if there is no data available to flush .
+ * @return : amount of data remaining within internal buffer, 1 if unknown but > 0, 0 if no more, or an error code */
+static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush)
+{
+    unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask;
+    if (zcs->doneJobID == zcs->nextJobID) return 0;   /* all flushed ! */
+    PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
+    while (zcs->jobs[wJobID].jobCompleted==0) {
+        DEBUGLOG(5, "waiting for jobCompleted signal from job %u", zcs->doneJobID);
+        if (!blockToFlush) { pthread_mutex_unlock(&zcs->jobCompleted_mutex); return 0; }  /* nothing ready to be flushed => skip */
+        pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);  /* block when nothing available to flush */
+    }
+    pthread_mutex_unlock(&zcs->jobCompleted_mutex);
+    /* compression job completed : output can be flushed */
+    {   ZSTDMT_jobDescription job = zcs->jobs[wJobID];
+        if (!job.jobScanned) {
+            if (ZSTD_isError(job.cSize)) {
+                DEBUGLOG(5, "compression error detected ");
+                ZSTDMT_waitForAllJobsCompleted(zcs);
+                ZSTDMT_releaseAllJobResources(zcs);
+                return job.cSize;
+            }
+            ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx);
+            zcs->jobs[wJobID].cctx = NULL;
+            DEBUGLOG(5, "zcs->params.fParams.checksumFlag : %u ", zcs->params.fParams.checksumFlag);
+            if (zcs->params.fParams.checksumFlag) {
+                XXH64_update(&zcs->xxhState, (const char*)job.srcStart + job.dictSize, job.srcSize);
+                if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) {  /* write checksum at end of last section */
+                    U32 const checksum = (U32)XXH64_digest(&zcs->xxhState);
+                    DEBUGLOG(5, "writing checksum : %08X \n", checksum);
+                    MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum);
+                    job.cSize += 4;
+                    zcs->jobs[wJobID].cSize += 4;
+            }   }
+            ZSTDMT_releaseBuffer(zcs->buffPool, job.src);
+            zcs->jobs[wJobID].srcStart = NULL;
+            zcs->jobs[wJobID].src = g_nullBuffer;
+            zcs->jobs[wJobID].jobScanned = 1;
+        }
+        {   size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
+            DEBUGLOG(5, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID);
+            memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite);
+            output->pos += toWrite;
+            job.dstFlushed += toWrite;
+        }
+        if (job.dstFlushed == job.cSize) {   /* output buffer fully flushed => move to next one */
+            ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff);
+            zcs->jobs[wJobID].dstBuff = g_nullBuffer;
+            zcs->jobs[wJobID].jobCompleted = 0;
+            zcs->doneJobID++;
+        } else {
+            zcs->jobs[wJobID].dstFlushed = job.dstFlushed;
+        }
+        /* return value : how many bytes left in buffer ; fake it to 1 if unknown but >0 */
+        if (job.cSize > job.dstFlushed) return (job.cSize - job.dstFlushed);
+        if (zcs->doneJobID < zcs->nextJobID) return 1;   /* still some buffer to flush */
+        zcs->allJobsCompleted = zcs->frameEnded;   /* frame completed and entirely flushed */
+        return 0;   /* everything flushed */
+}   }
+
+
+/** ZSTDMT_compressStream_generic() :
+ *  internal use only
+ *  assumption : output and input are valid (pos <= size)
+ * @return : minimum amount of data remaining to flush, 0 if none */
+size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
+                                     ZSTD_outBuffer* output,
+                                     ZSTD_inBuffer* input,
+                                     ZSTD_EndDirective endOp)
+{
+    size_t const newJobThreshold = mtctx->dictSize + mtctx->targetSectionSize + mtctx->marginSize;
+    assert(output->pos <= output->size);
+    assert(input->pos  <= input->size);
+    if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) {
+        /* current frame being ended. Only flush/end are allowed. Or start new frame with init */
+        return ERROR(stage_wrong);
+    }
+    if (mtctx->nbThreads==1) {
+        return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp);
+    }
+
+    /* single-pass shortcut (note : this is blocking-mode) */
+    if ( (mtctx->nextJobID==0)      /* just started */
+      && (mtctx->inBuff.filled==0)  /* nothing buffered */
+      && (endOp==ZSTD_e_end)        /* end order */
+      && (output->size - output->pos >= ZSTD_compressBound(input->size - input->pos)) ) { /* enough room */
+        size_t const cSize = ZSTDMT_compress_advanced(mtctx,
+                (char*)output->dst + output->pos, output->size - output->pos,
+                (const char*)input->src + input->pos, input->size - input->pos,
+                mtctx->cdict, mtctx->params, mtctx->overlapRLog);
+        if (ZSTD_isError(cSize)) return cSize;
+        input->pos = input->size;
+        output->pos += cSize;
+        ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer);  /* was allocated in initStream */
+        mtctx->allJobsCompleted = 1;
+        mtctx->frameEnded = 1;
+        return 0;
+    }
+
+    /* fill input buffer */
+    if ((input->src) && (mtctx->inBuff.buffer.start)) {   /* support NULL input */
+        size_t const toLoad = MIN(input->size - input->pos, mtctx->inBuffSize - mtctx->inBuff.filled);
+        DEBUGLOG(2, "inBuff:%08X;  inBuffSize=%u;  ToCopy=%u", (U32)(size_t)mtctx->inBuff.buffer.start, (U32)mtctx->inBuffSize, (U32)toLoad);
+        memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad);
+        input->pos += toLoad;
+        mtctx->inBuff.filled += toLoad;
+    }
+
+    if ( (mtctx->inBuff.filled >= newJobThreshold)  /* filled enough : let's compress */
+      && (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) {   /* avoid overwriting job round buffer */
+        CHECK_F( ZSTDMT_createCompressionJob(mtctx, mtctx->targetSectionSize, 0 /* endFrame */) );
+    }
+
+    /* check for potential compressed data ready to be flushed */
+    CHECK_F( ZSTDMT_flushNextJob(mtctx, output, (mtctx->inBuff.filled == mtctx->inBuffSize) /* blockToFlush */) ); /* block if it wasn't possible to create new job due to saturation */
+
+    if (input->pos < input->size)  /* input not consumed : do not flush yet */
+        endOp = ZSTD_e_continue;
+
+    switch(endOp)
+    {
+        case ZSTD_e_flush:
+            return ZSTDMT_flushStream(mtctx, output);
+        case ZSTD_e_end:
+            return ZSTDMT_endStream(mtctx, output);
+        case ZSTD_e_continue:
+            return 1;
+        default:
+            return ERROR(GENERIC);   /* invalid endDirective */
+    }
+}
+
+
+size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
+{
+    CHECK_F( ZSTDMT_compressStream_generic(zcs, output, input, ZSTD_e_continue) );
+
+    /* recommended next input size : fill current input buffer */
+    return zcs->inBuffSize - zcs->inBuff.filled;   /* note : could be zero when input buffer is fully filled and no more availability to create new job */
+}
+
+
+static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned endFrame)
+{
+    size_t const srcSize = zcs->inBuff.filled - zcs->dictSize;
+
+    if ( ((srcSize > 0) || (endFrame && !zcs->frameEnded))
+       && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) {
+        CHECK_F( ZSTDMT_createCompressionJob(zcs, srcSize, endFrame) );
+    }
+
+    /* check if there is any data available to flush */
+    return ZSTDMT_flushNextJob(zcs, output, 1 /* blockToFlush */);
+}
+
+
+size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
+{
+    DEBUGLOG(5, "ZSTDMT_flushStream");
+    if (zcs->nbThreads==1)
+        return ZSTD_flushStream(zcs->cctxPool->cctx[0], output);
+    return ZSTDMT_flushStream_internal(zcs, output, 0 /* endFrame */);
+}
+
+size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
+{
+    DEBUGLOG(4, "ZSTDMT_endStream");
+    if (zcs->nbThreads==1)
+        return ZSTD_endStream(zcs->cctxPool->cctx[0], output);
+    return ZSTDMT_flushStream_internal(zcs, output, 1 /* endFrame */);
+}
diff --git a/contrib/libzstd/include/zstd/compress/zstdmt_compress.h b/contrib/libzstd/include/zstd/compress/zstdmt_compress.h
new file mode 100644
index 00000000000..fad63b6d861
--- /dev/null
+++ b/contrib/libzstd/include/zstd/compress/zstdmt_compress.h
@@ -0,0 +1,114 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+ #ifndef ZSTDMT_COMPRESS_H
+ #define ZSTDMT_COMPRESS_H
+
+ #if defined (__cplusplus)
+ extern "C" {
+ #endif
+
+
+/* Note : All prototypes defined in this file are labelled experimental.
+ *        No guarantee of API continuity is provided on any of them.
+ *        In fact, the expectation is that these prototypes will be replaced
+ *        by ZSTD_compress_generic() API in the near future */
+
+/* ===   Dependencies   === */
+#include <stddef.h>                /* size_t */
+#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_parameters */
+#include "zstd.h"            /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
+
+
+/* ===   Memory management   === */
+typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
+ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads);
+ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads,
+                                                    ZSTD_customMem cMem);
+ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
+
+ZSTDLIB_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
+
+
+/* ===   Simple buffer-to-butter one-pass function   === */
+
+ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
+                                       void* dst, size_t dstCapacity,
+                                 const void* src, size_t srcSize,
+                                       int compressionLevel);
+
+
+
+/* ===   Streaming functions   === */
+
+ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
+ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize);    /**< pledgedSrcSize is optional and can be zero == unknown */
+
+ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+
+ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output);   /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
+ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output);     /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
+
+
+/* ===   Advanced functions and parameters  === */
+
+#ifndef ZSTDMT_SECTION_SIZE_MIN
+#  define ZSTDMT_SECTION_SIZE_MIN (1U << 20)   /* 1 MB - Minimum size of each compression job */
+#endif
+
+ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
+                                           void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize,
+                                     const ZSTD_CDict* cdict,
+                                           ZSTD_parameters const params,
+                                           unsigned overlapRLog);
+
+ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
+                                        const void* dict, size_t dictSize,   /* dict can be released after init, a local copy is preserved within zcs */
+                                        ZSTD_parameters params,
+                                        unsigned long long pledgedSrcSize);  /* pledgedSrcSize is optional and can be zero == unknown */
+
+ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
+                                        const ZSTD_CDict* cdict,
+                                        ZSTD_frameParameters fparams,
+                                        unsigned long long pledgedSrcSize);  /* note : zero means empty */
+
+/* ZSDTMT_parameter :
+ * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
+typedef enum {
+    ZSTDMT_p_sectionSize,        /* size of input "section". Each section is compressed in parallel. 0 means default, which is dynamically determined within compression functions */
+    ZSTDMT_p_overlapSectionLog   /* Log of overlapped section; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window */
+} ZSDTMT_parameter;
+
+/* ZSTDMT_setMTCtxParameter() :
+ * allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter.
+ * The function must be called typically after ZSTD_createCCtx().
+ * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
+ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value);
+
+
+/*! ZSTDMT_compressStream_generic() :
+ *  Combines ZSTDMT_compressStream() with ZSTDMT_flushStream() or ZSTDMT_endStream()
+ *  depending on flush directive.
+ * @return : minimum amount of data still to be flushed
+ *           0 if fully flushed
+ *           or an error code */
+ZSTDLIB_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
+                                                ZSTD_outBuffer* output,
+                                                ZSTD_inBuffer* input,
+                                                ZSTD_EndDirective endOp);
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* ZSTDMT_COMPRESS_H */
diff --git a/contrib/libzstd/include/zstd/decompress/huf_decompress.c b/contrib/libzstd/include/zstd/decompress/huf_decompress.c
index e94fa83ccd7..2a1b70ea5ef 100644
--- a/contrib/libzstd/include/zstd/decompress/huf_decompress.c
+++ b/contrib/libzstd/include/zstd/decompress/huf_decompress.c
@@ -35,16 +35,19 @@
 /* **************************************************************
 *  Compiler specifics
 ****************************************************************/
-#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-/* inline is defined */
-#elif defined(_MSC_VER) || defined(__GNUC__)
-#  define inline __inline
-#else
-#  define inline /* disable inline */
-#endif
-
 #ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
 #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#else
+#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#    ifdef __GNUC__
+#      define FORCE_INLINE static inline __attribute__((always_inline))
+#    else
+#      define FORCE_INLINE static inline
+#    endif
+#  else
+#    define FORCE_INLINE static
+#  endif /* __STDC_VERSION__ */
 #endif
 
 
@@ -64,6 +67,12 @@
 #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
 
 
+/* **************************************************************
+*  Byte alignment for workSpace management
+****************************************************************/
+#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
+#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
+
 /*-***************************/
 /*  generic DTableDesc       */
 /*-***************************/
@@ -84,16 +93,28 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
 
 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2;   /* single-symbol decoding */
 
-size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize)
+size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
 {
-    BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
-    U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
     U32 tableLog = 0;
     U32 nbSymbols = 0;
     size_t iSize;
     void* const dtPtr = DTable + 1;
     HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
 
+    U32* rankVal;
+    BYTE* huffWeight;
+    size_t spaceUsed32 = 0;
+
+    rankVal = (U32 *)workSpace + spaceUsed32;
+    spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
+    huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
+    spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
+
+    if ((spaceUsed32 << 2) > wkspSize)
+        return ERROR(tableLog_tooLarge);
+    workSpace = (U32 *)workSpace + spaceUsed32;
+    wkspSize -= (spaceUsed32 << 2);
+
     HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
     /* memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */
 
@@ -102,16 +123,16 @@ size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize)
 
     /* Table header */
     {   DTableDesc dtd = HUF_getDTableDesc(DTable);
-        if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, huffman tree cannot fit in */
+        if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, Huffman tree cannot fit in */
         dtd.tableType = 0;
         dtd.tableLog = (BYTE)tableLog;
         memcpy(DTable, &dtd, sizeof(dtd));
     }
 
-    /* Prepare ranks */
+    /* Calculate starting value for each rank */
     {   U32 n, nextRankStart = 0;
         for (n=1; n<tableLog+1; n++) {
-            U32 current = nextRankStart;
+            U32 const current = nextRankStart;
             nextRankStart += (rankVal[n] << (n-1));
             rankVal[n] = current;
     }   }
@@ -121,17 +142,24 @@ size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize)
         for (n=0; n<nbSymbols; n++) {
             U32 const w = huffWeight[n];
             U32 const length = (1 << w) >> 1;
-            U32 i;
+            U32 u;
             HUF_DEltX2 D;
             D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
-            for (i = rankVal[w]; i < rankVal[w] + length; i++)
-                dt[i] = D;
+            for (u = rankVal[w]; u < rankVal[w] + length; u++)
+                dt[u] = D;
             rankVal[w] += length;
     }   }
 
     return iSize;
 }
 
+size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_readDTableX2_wksp(DTable, src, srcSize,
+                                 workSpace, sizeof(workSpace));
+}
+
 
 static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
 {
@@ -152,7 +180,7 @@ static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, con
     if (MEM_64bits()) \
         HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
 
-static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
+FORCE_INLINE size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
 {
     BYTE* const pStart = p;
 
@@ -209,11 +237,13 @@ size_t HUF_decompress1X2_usingDTable(
     return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
 }
 
-size_t HUF_decompress1X2_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
 {
     const BYTE* ip = (const BYTE*) cSrc;
 
-    size_t const hSize = HUF_readDTableX2 (DCtx, cSrc, cSrcSize);
+    size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
     if (HUF_isError(hSize)) return hSize;
     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
     ip += hSize; cSrcSize -= hSize;
@@ -221,6 +251,15 @@ size_t HUF_decompress1X2_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, cons
     return HUF_decompress1X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
 }
 
+
+size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                              const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
+                                       workSpace, sizeof(workSpace));
+}
+
 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
 {
     HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
@@ -332,11 +371,14 @@ size_t HUF_decompress4X2_usingDTable(
 }
 
 
-size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
 {
     const BYTE* ip = (const BYTE*) cSrc;
 
-    size_t const hSize = HUF_readDTableX2 (dctx, cSrc, cSrcSize);
+    size_t const hSize = HUF_readDTableX2_wksp (dctx, cSrc, cSrcSize,
+                                                workSpace, wkspSize);
     if (HUF_isError(hSize)) return hSize;
     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
     ip += hSize; cSrcSize -= hSize;
@@ -344,6 +386,13 @@ size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, cons
     return HUF_decompress4X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, dctx);
 }
 
+
+size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+                                       workSpace, sizeof(workSpace));
+}
 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
 {
     HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
@@ -358,13 +407,15 @@ typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4;  /* doubl
 
 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
 
+/* HUF_fillDTableX4Level2() :
+ * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
 static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed,
                            const U32* rankValOrigin, const int minWeight,
                            const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
                            U32 nbBitsBaseline, U16 baseSeq)
 {
     HUF_DEltX4 DElt;
-    U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
+    U32 rankVal[HUF_TABLELOG_MAX + 1];
 
     /* get pre-calculated rankVal */
     memcpy(rankVal, rankValOrigin, sizeof(rankVal));
@@ -398,14 +449,15 @@ static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 co
     }   }
 }
 
-typedef U32 rankVal_t[HUF_TABLELOG_ABSOLUTEMAX][HUF_TABLELOG_ABSOLUTEMAX + 1];
+typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
+typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
 
 static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
                            const sortedSymbol_t* sortedList, const U32 sortedListSize,
                            const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
                            const U32 nbBitsBaseline)
 {
-    U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
+    U32 rankVal[HUF_TABLELOG_MAX + 1];
     const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
     const U32 minBits  = nbBitsBaseline - maxWeight;
     U32 s;
@@ -442,23 +494,46 @@ static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
     }
 }
 
-size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize)
+size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
+                             size_t srcSize, void* workSpace,
+                             size_t wkspSize)
 {
-    BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
-    sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
-    U32 rankStats[HUF_TABLELOG_ABSOLUTEMAX + 1] = { 0 };
-    U32 rankStart0[HUF_TABLELOG_ABSOLUTEMAX + 2] = { 0 };
-    U32* const rankStart = rankStart0+1;
-    rankVal_t rankVal;
     U32 tableLog, maxW, sizeOfSort, nbSymbols;
     DTableDesc dtd = HUF_getDTableDesc(DTable);
     U32 const maxTableLog = dtd.maxTableLog;
     size_t iSize;
     void* dtPtr = DTable+1;   /* force compiler to avoid strict-aliasing */
     HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr;
+    U32 *rankStart;
 
-    HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable));   /* if compilation fails here, assertion is false */
-    if (maxTableLog > HUF_TABLELOG_ABSOLUTEMAX) return ERROR(tableLog_tooLarge);
+    rankValCol_t* rankVal;
+    U32* rankStats;
+    U32* rankStart0;
+    sortedSymbol_t* sortedSymbol;
+    BYTE* weightList;
+    size_t spaceUsed32 = 0;
+
+    rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
+    spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
+    rankStats = (U32 *)workSpace + spaceUsed32;
+    spaceUsed32 += HUF_TABLELOG_MAX + 1;
+    rankStart0 = (U32 *)workSpace + spaceUsed32;
+    spaceUsed32 += HUF_TABLELOG_MAX + 2;
+    sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
+    spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
+    weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
+    spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
+
+    if ((spaceUsed32 << 2) > wkspSize)
+        return ERROR(tableLog_tooLarge);
+    workSpace = (U32 *)workSpace + spaceUsed32;
+    wkspSize -= (spaceUsed32 << 2);
+
+    rankStart = rankStart0 + 1;
+    memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable));   /* if compiler fails here, assertion is wrong */
+    if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
     /* memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
 
     iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
@@ -522,6 +597,12 @@ size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize)
     return iSize;
 }
 
+size_t HUF_readDTableX4(HUF_DTable* DTable, const void* src, size_t srcSize)
+{
+  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+  return HUF_readDTableX4_wksp(DTable, src, srcSize,
+                               workSpace, sizeof(workSpace));
+}
 
 static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
 {
@@ -540,7 +621,8 @@ static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DE
         if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
             BIT_skipBits(DStream, dt[val].nbBits);
             if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
-                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+                /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
     }   }
     return 1;
 }
@@ -557,7 +639,7 @@ static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DE
     if (MEM_64bits()) \
         ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
 
-static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
+FORCE_INLINE size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
 {
     BYTE* const pStart = p;
 
@@ -621,11 +703,14 @@ size_t HUF_decompress1X4_usingDTable(
     return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
 }
 
-size_t HUF_decompress1X4_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
 {
     const BYTE* ip = (const BYTE*) cSrc;
 
-    size_t const hSize = HUF_readDTableX4 (DCtx, cSrc, cSrcSize);
+    size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize,
+                                               workSpace, wkspSize);
     if (HUF_isError(hSize)) return hSize;
     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
     ip += hSize; cSrcSize -= hSize;
@@ -633,6 +718,15 @@ size_t HUF_decompress1X4_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, cons
     return HUF_decompress1X4_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
 }
 
+
+size_t HUF_decompress1X4_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                              const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress1X4_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
+                                       workSpace, sizeof(workSpace));
+}
+
 size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
 {
     HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX);
@@ -743,11 +837,14 @@ size_t HUF_decompress4X4_usingDTable(
 }
 
 
-size_t HUF_decompress4X4_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
 {
     const BYTE* ip = (const BYTE*) cSrc;
 
-    size_t hSize = HUF_readDTableX4 (dctx, cSrc, cSrcSize);
+    size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize,
+                                         workSpace, wkspSize);
     if (HUF_isError(hSize)) return hSize;
     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
     ip += hSize; cSrcSize -= hSize;
@@ -755,6 +852,15 @@ size_t HUF_decompress4X4_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, cons
     return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
 }
 
+
+size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
+                              const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+                                       workSpace, sizeof(workSpace));
+}
+
 size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
 {
     HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX);
@@ -856,19 +962,32 @@ size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const
     }
 }
 
-size_t HUF_decompress4X_hufOnly (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+                                         workSpace, sizeof(workSpace));
+}
+
+
+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
+                                     size_t dstSize, const void* cSrc,
+                                     size_t cSrcSize, void* workSpace,
+                                     size_t wkspSize)
 {
     /* validation checks */
     if (dstSize == 0) return ERROR(dstSize_tooSmall);
     if ((cSrcSize >= dstSize) || (cSrcSize <= 1)) return ERROR(corruption_detected);   /* invalid */
 
     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
-        return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
-                        HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
+        return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
+                        HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
     }
 }
 
-size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                  const void* cSrc, size_t cSrcSize,
+                                  void* workSpace, size_t wkspSize)
 {
     /* validation checks */
     if (dstSize == 0) return ERROR(dstSize_tooSmall);
@@ -877,7 +996,17 @@ size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const
     if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
 
     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
-        return algoNb ? HUF_decompress1X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
-                        HUF_decompress1X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
+        return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize):
+                        HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize);
     }
 }
+
+size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
+                             const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+                                      workSpace, sizeof(workSpace));
+}
diff --git a/contrib/libzstd/include/zstd/decompress/zbuff_decompress.c b/contrib/libzstd/include/zstd/decompress/zbuff_decompress.c
deleted file mode 100644
index b20ee9705cf..00000000000
--- a/contrib/libzstd/include/zstd/decompress/zbuff_decompress.c
+++ /dev/null
@@ -1,252 +0,0 @@
-/**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
- */
-
-
-
-/* *************************************
-*  Dependencies
-***************************************/
-#include <stdlib.h>
-#include "error_private.h"
-#include "zstd_internal.h"  /* MIN, ZSTD_blockHeaderSize, ZSTD_BLOCKSIZE_MAX */
-#define ZBUFF_STATIC_LINKING_ONLY
-#include "zbuff.h"
-
-
-typedef enum { ZBUFFds_init, ZBUFFds_loadHeader,
-               ZBUFFds_read, ZBUFFds_load, ZBUFFds_flush } ZBUFF_dStage;
-
-/* *** Resource management *** */
-struct ZBUFF_DCtx_s {
-    ZSTD_DCtx* zd;
-    ZSTD_frameParams fParams;
-    ZBUFF_dStage stage;
-    char*  inBuff;
-    size_t inBuffSize;
-    size_t inPos;
-    char*  outBuff;
-    size_t outBuffSize;
-    size_t outStart;
-    size_t outEnd;
-    size_t blockSize;
-    BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
-    size_t lhSize;
-    ZSTD_customMem customMem;
-};   /* typedef'd to ZBUFF_DCtx within "zbuff.h" */
-
-
-ZBUFF_DCtx* ZBUFF_createDCtx(void)
-{
-    return ZBUFF_createDCtx_advanced(defaultCustomMem);
-}
-
-ZBUFF_DCtx* ZBUFF_createDCtx_advanced(ZSTD_customMem customMem)
-{
-    ZBUFF_DCtx* zbd;
-
-    if (!customMem.customAlloc && !customMem.customFree)
-        customMem = defaultCustomMem;
-
-    if (!customMem.customAlloc || !customMem.customFree)
-        return NULL;
-
-    zbd = (ZBUFF_DCtx*)customMem.customAlloc(customMem.opaque, sizeof(ZBUFF_DCtx));
-    if (zbd==NULL) return NULL;
-    memset(zbd, 0, sizeof(ZBUFF_DCtx));
-    memcpy(&zbd->customMem, &customMem, sizeof(ZSTD_customMem));
-    zbd->zd = ZSTD_createDCtx_advanced(customMem);
-    if (zbd->zd == NULL) { ZBUFF_freeDCtx(zbd); return NULL; }
-    zbd->stage = ZBUFFds_init;
-    return zbd;
-}
-
-size_t ZBUFF_freeDCtx(ZBUFF_DCtx* zbd)
-{
-    if (zbd==NULL) return 0;   /* support free on null */
-    ZSTD_freeDCtx(zbd->zd);
-    if (zbd->inBuff) zbd->customMem.customFree(zbd->customMem.opaque, zbd->inBuff);
-    if (zbd->outBuff) zbd->customMem.customFree(zbd->customMem.opaque, zbd->outBuff);
-    zbd->customMem.customFree(zbd->customMem.opaque, zbd);
-    return 0;
-}
-
-
-/* *** Initialization *** */
-
-size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* zbd, const void* dict, size_t dictSize)
-{
-    zbd->stage = ZBUFFds_loadHeader;
-    zbd->lhSize = zbd->inPos = zbd->outStart = zbd->outEnd = 0;
-    return ZSTD_decompressBegin_usingDict(zbd->zd, dict, dictSize);
-}
-
-size_t ZBUFF_decompressInit(ZBUFF_DCtx* zbd)
-{
-    return ZBUFF_decompressInitDictionary(zbd, NULL, 0);
-}
-
-
-/* internal util function */
-MEM_STATIC size_t ZBUFF_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
-{
-    size_t const length = MIN(dstCapacity, srcSize);
-    memcpy(dst, src, length);
-    return length;
-}
-
-
-/* *** Decompression *** */
-
-size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbd,
-                                void* dst, size_t* dstCapacityPtr,
-                          const void* src, size_t* srcSizePtr)
-{
-    const char* const istart = (const char*)src;
-    const char* const iend = istart + *srcSizePtr;
-    const char* ip = istart;
-    char* const ostart = (char*)dst;
-    char* const oend = ostart + *dstCapacityPtr;
-    char* op = ostart;
-    U32 someMoreWork = 1;
-
-    while (someMoreWork) {
-        switch(zbd->stage)
-        {
-        case ZBUFFds_init :
-            return ERROR(init_missing);
-
-        case ZBUFFds_loadHeader :
-            {   size_t const hSize = ZSTD_getFrameParams(&(zbd->fParams), zbd->headerBuffer, zbd->lhSize);
-                if (ZSTD_isError(hSize)) return hSize;
-                if (hSize != 0) {   /* need more input */
-                    size_t const toLoad = hSize - zbd->lhSize;   /* if hSize!=0, hSize > zbd->lhSize */
-                    if (toLoad > (size_t)(iend-ip)) {   /* not enough input to load full header */
-                        memcpy(zbd->headerBuffer + zbd->lhSize, ip, iend-ip);
-                        zbd->lhSize += iend-ip;
-                        *dstCapacityPtr = 0;
-                        return (hSize - zbd->lhSize) + ZSTD_blockHeaderSize;   /* remaining header bytes + next block header */
-                    }
-                    memcpy(zbd->headerBuffer + zbd->lhSize, ip, toLoad); zbd->lhSize = hSize; ip += toLoad;
-                    break;
-            }   }
-
-            /* Consume header */
-            {   size_t const h1Size = ZSTD_nextSrcSizeToDecompress(zbd->zd);  /* == ZSTD_frameHeaderSize_min */
-                size_t const h1Result = ZSTD_decompressContinue(zbd->zd, NULL, 0, zbd->headerBuffer, h1Size);
-                if (ZSTD_isError(h1Result)) return h1Result;   /* should not happen : already checked */
-                if (h1Size < zbd->lhSize) {   /* long header */
-                    size_t const h2Size = ZSTD_nextSrcSizeToDecompress(zbd->zd);
-                    size_t const h2Result = ZSTD_decompressContinue(zbd->zd, NULL, 0, zbd->headerBuffer+h1Size, h2Size);
-                    if (ZSTD_isError(h2Result)) return h2Result;
-            }   }
-
-            zbd->fParams.windowSize = MAX(zbd->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
-
-            /* Frame header instruct buffer sizes */
-            {   size_t const blockSize = MIN(zbd->fParams.windowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX);
-                size_t const neededOutSize = zbd->fParams.windowSize + blockSize;
-                zbd->blockSize = blockSize;
-                if (zbd->inBuffSize < blockSize) {
-                    zbd->customMem.customFree(zbd->customMem.opaque, zbd->inBuff);
-                    zbd->inBuffSize = blockSize;
-                    zbd->inBuff = (char*)zbd->customMem.customAlloc(zbd->customMem.opaque, blockSize);
-                    if (zbd->inBuff == NULL) return ERROR(memory_allocation);
-                }
-                if (zbd->outBuffSize < neededOutSize) {
-                    zbd->customMem.customFree(zbd->customMem.opaque, zbd->outBuff);
-                    zbd->outBuffSize = neededOutSize;
-                    zbd->outBuff = (char*)zbd->customMem.customAlloc(zbd->customMem.opaque, neededOutSize);
-                    if (zbd->outBuff == NULL) return ERROR(memory_allocation);
-            }   }
-            zbd->stage = ZBUFFds_read;
-            /* pass-through */
-
-        case ZBUFFds_read:
-            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zbd->zd);
-                if (neededInSize==0) {  /* end of frame */
-                    zbd->stage = ZBUFFds_init;
-                    someMoreWork = 0;
-                    break;
-                }
-                if ((size_t)(iend-ip) >= neededInSize) {  /* decode directly from src */
-                    const int isSkipFrame = ZSTD_isSkipFrame(zbd->zd);
-                    size_t const decodedSize = ZSTD_decompressContinue(zbd->zd,
-                        zbd->outBuff + zbd->outStart, (isSkipFrame ? 0 : zbd->outBuffSize - zbd->outStart),
-                        ip, neededInSize);
-                    if (ZSTD_isError(decodedSize)) return decodedSize;
-                    ip += neededInSize;
-                    if (!decodedSize && !isSkipFrame) break;   /* this was just a header */
-                    zbd->outEnd = zbd->outStart +  decodedSize;
-                    zbd->stage = ZBUFFds_flush;
-                    break;
-                }
-                if (ip==iend) { someMoreWork = 0; break; }   /* no more input */
-                zbd->stage = ZBUFFds_load;
-                /* pass-through */
-            }
-
-        case ZBUFFds_load:
-            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zbd->zd);
-                size_t const toLoad = neededInSize - zbd->inPos;   /* should always be <= remaining space within inBuff */
-                size_t loadedSize;
-                if (toLoad > zbd->inBuffSize - zbd->inPos) return ERROR(corruption_detected);   /* should never happen */
-                loadedSize = ZBUFF_limitCopy(zbd->inBuff + zbd->inPos, toLoad, ip, iend-ip);
-                ip += loadedSize;
-                zbd->inPos += loadedSize;
-                if (loadedSize < toLoad) { someMoreWork = 0; break; }   /* not enough input, wait for more */
-
-                /* decode loaded input */
-                {  const int isSkipFrame = ZSTD_isSkipFrame(zbd->zd);
-                   size_t const decodedSize = ZSTD_decompressContinue(zbd->zd,
-                        zbd->outBuff + zbd->outStart, zbd->outBuffSize - zbd->outStart,
-                        zbd->inBuff, neededInSize);
-                    if (ZSTD_isError(decodedSize)) return decodedSize;
-                    zbd->inPos = 0;   /* input is consumed */
-                    if (!decodedSize && !isSkipFrame) { zbd->stage = ZBUFFds_read; break; }   /* this was just a header */
-                    zbd->outEnd = zbd->outStart +  decodedSize;
-                    zbd->stage = ZBUFFds_flush;
-                    /* pass-through */
-            }   }
-
-        case ZBUFFds_flush:
-            {   size_t const toFlushSize = zbd->outEnd - zbd->outStart;
-                size_t const flushedSize = ZBUFF_limitCopy(op, oend-op, zbd->outBuff + zbd->outStart, toFlushSize);
-                op += flushedSize;
-                zbd->outStart += flushedSize;
-                if (flushedSize == toFlushSize) {  /* flush completed */
-                    zbd->stage = ZBUFFds_read;
-                    if (zbd->outStart + zbd->blockSize > zbd->outBuffSize)
-                        zbd->outStart = zbd->outEnd = 0;
-                    break;
-                }
-                /* cannot flush everything */
-                someMoreWork = 0;
-                break;
-            }
-        default: return ERROR(GENERIC);   /* impossible */
-    }   }
-
-    /* result */
-    *srcSizePtr = ip-istart;
-    *dstCapacityPtr = op-ostart;
-    {   size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zbd->zd);
-        if (!nextSrcSizeHint) return (zbd->outEnd != zbd->outStart);   /* return 0 only if fully flushed too */
-        nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zbd->zd) == ZSTDnit_block);
-        if (zbd->inPos > nextSrcSizeHint) return ERROR(GENERIC);   /* should never happen */
-        nextSrcSizeHint -= zbd->inPos;   /* already loaded*/
-        return nextSrcSizeHint;
-    }
-}
-
-
-/* *************************************
-*  Tool functions
-***************************************/
-size_t ZBUFF_recommendedDInSize(void)  { return ZSTD_BLOCKSIZE_ABSOLUTEMAX + ZSTD_blockHeaderSize /* block header size*/ ; }
-size_t ZBUFF_recommendedDOutSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
diff --git a/contrib/libzstd/include/zstd/decompress/zstd_decompress.c b/contrib/libzstd/include/zstd/decompress/zstd_decompress.c
index 47b5f42c781..003d703a5eb 100644
--- a/contrib/libzstd/include/zstd/decompress/zstd_decompress.c
+++ b/contrib/libzstd/include/zstd/decompress/zstd_decompress.c
@@ -34,7 +34,7 @@
 *  Frames requiring more memory will be rejected.
 */
 #ifndef ZSTD_MAXWINDOWSIZE_DEFAULT
-#  define ZSTD_MAXWINDOWSIZE_DEFAULT (257 << 20)   /* 257 MB */
+#  define ZSTD_MAXWINDOWSIZE_DEFAULT ((1 << ZSTD_WINDOWLOG_MAX) + 1)   /* defined within zstd.h */
 #endif
 
 
@@ -43,8 +43,6 @@
 *********************************************************/
 #include <string.h>      /* memcpy, memmove, memset */
 #include "mem.h"         /* low level memory routines */
-#define XXH_STATIC_LINKING_ONLY   /* XXH64_state_t */
-#include "xxhash.h"      /* XXH64_* */
 #define FSE_STATIC_LINKING_ONLY
 #include "fse.h"
 #define HUF_STATIC_LINKING_ONLY
@@ -55,9 +53,18 @@
 #  include "zstd_legacy.h"
 #endif
 
+#if defined(_MSC_VER) && !defined(_M_IA64)  /* _mm_prefetch() is not defined for ia64 */
+#  include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
+#  define ZSTD_PREFETCH(ptr)   _mm_prefetch((const char*)ptr, _MM_HINT_T0)
+#elif defined(__GNUC__)
+#  define ZSTD_PREFETCH(ptr)   __builtin_prefetch(ptr, 0, 0)
+#else
+#  define ZSTD_PREFETCH(ptr)   /* disabled */
+#endif
+
 
 /*-*************************************
-*  Macros
+*  Errors
 ***************************************/
 #define ZSTD_isError ERR_isError   /* for inlining */
 #define FSE_isError  ERR_isError
@@ -78,23 +85,31 @@ typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
                ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
                ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
 
+typedef enum { zdss_init=0, zdss_loadHeader,
+               zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
+
+typedef struct {
+    FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
+    FSE_DTable OFTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
+    FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
+    HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)];  /* can accommodate HUF_decompress4X */
+    U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    U32 rep[ZSTD_REP_NUM];
+} ZSTD_entropyTables_t;
+
 struct ZSTD_DCtx_s
 {
     const FSE_DTable* LLTptr;
     const FSE_DTable* MLTptr;
     const FSE_DTable* OFTptr;
     const HUF_DTable* HUFptr;
-    FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
-    FSE_DTable OFTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
-    FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
-    HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)];  /* can accommodate HUF_decompress4X */
-    const void* previousDstEnd;
-    const void* base;
-    const void* vBase;
-    const void* dictEnd;
+    ZSTD_entropyTables_t entropy;
+    const void* previousDstEnd;   /* detect continuity */
+    const void* base;             /* start of current segment */
+    const void* vBase;            /* virtual start of previous segment if it was just before current one */
+    const void* dictEnd;          /* end of previous segment */
     size_t expected;
-    U32 rep[ZSTD_REP_NUM];
-    ZSTD_frameParams fParams;
+    ZSTD_frameHeader fParams;
     blockType_e bType;   /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */
     ZSTD_dStage stage;
     U32 litEntropy;
@@ -104,14 +119,41 @@ struct ZSTD_DCtx_s
     U32 dictID;
     const BYTE* litPtr;
     ZSTD_customMem customMem;
-    size_t litBufSize;
     size_t litSize;
     size_t rleSize;
-    BYTE litBuffer[ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH];
+    size_t staticSize;
+
+    /* streaming */
+    ZSTD_DDict* ddictLocal;
+    const ZSTD_DDict* ddict;
+    ZSTD_dStreamStage streamStage;
+    char*  inBuff;
+    size_t inBuffSize;
+    size_t inPos;
+    size_t maxWindowSize;
+    char*  outBuff;
+    size_t outBuffSize;
+    size_t outStart;
+    size_t outEnd;
+    size_t blockSize;
+    size_t lhSize;
+    void* legacyContext;
+    U32 previousLegacyVersion;
+    U32 legacyVersion;
+    U32 hostageByte;
+
+    /* workspace */
+    BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
     BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
 };  /* typedef'd to ZSTD_DCtx within "zstd.h" */
 
-size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx) { if (dctx==NULL) return 0; return sizeof(ZSTD_DCtx); }  /* support sizeof on NULL */
+size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx)
+{
+    if (dctx==NULL) return 0;   /* support sizeof NULL */
+    return sizeof(*dctx)
+           + ZSTD_sizeof_DDict(dctx->ddictLocal)
+           + dctx->inBuffSize + dctx->outBuffSize;
+}
 
 size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }
 
@@ -123,67 +165,86 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
     dctx->base = NULL;
     dctx->vBase = NULL;
     dctx->dictEnd = NULL;
-    dctx->hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
+    dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
     dctx->litEntropy = dctx->fseEntropy = 0;
     dctx->dictID = 0;
-    MEM_STATIC_ASSERT(sizeof(dctx->rep) == sizeof(repStartValue));
-    memcpy(dctx->rep, repStartValue, sizeof(repStartValue));  /* initial repcodes */
-    dctx->LLTptr = dctx->LLTable;
-    dctx->MLTptr = dctx->MLTable;
-    dctx->OFTptr = dctx->OFTable;
-    dctx->HUFptr = dctx->hufTable;
+    MEM_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
+    memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue));  /* initial repcodes */
+    dctx->LLTptr = dctx->entropy.LLTable;
+    dctx->MLTptr = dctx->entropy.MLTable;
+    dctx->OFTptr = dctx->entropy.OFTable;
+    dctx->HUFptr = dctx->entropy.hufTable;
     return 0;
 }
 
+static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
+{
+    ZSTD_decompressBegin(dctx);   /* cannot fail */
+    dctx->staticSize = 0;
+    dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
+    dctx->ddict   = NULL;
+    dctx->ddictLocal = NULL;
+    dctx->inBuff  = NULL;
+    dctx->inBuffSize = 0;
+    dctx->outBuffSize= 0;
+    dctx->streamStage = zdss_init;
+}
+
 ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
 {
-    ZSTD_DCtx* dctx;
+    if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
 
-    if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
-    if (!customMem.customAlloc || !customMem.customFree) return NULL;
+    {   ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(*dctx), customMem);
+        if (!dctx) return NULL;
+        dctx->customMem = customMem;
+        dctx->legacyContext = NULL;
+        dctx->previousLegacyVersion = 0;
+        ZSTD_initDCtx_internal(dctx);
+        return dctx;
+    }
+}
 
-    dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(ZSTD_DCtx), customMem);
-    if (!dctx) return NULL;
-    memcpy(&dctx->customMem, &customMem, sizeof(customMem));
-    ZSTD_decompressBegin(dctx);
+ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*) workspace;
+
+    if ((size_t)workspace & 7) return NULL;  /* 8-aligned */
+    if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL;  /* minimum size */
+
+    ZSTD_initDCtx_internal(dctx);
+    dctx->staticSize = workspaceSize;
+    dctx->inBuff = (char*)(dctx+1);
     return dctx;
 }
 
 ZSTD_DCtx* ZSTD_createDCtx(void)
 {
-    return ZSTD_createDCtx_advanced(defaultCustomMem);
+    return ZSTD_createDCtx_advanced(ZSTD_defaultCMem);
 }
 
 size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
 {
     if (dctx==NULL) return 0;   /* support free on NULL */
-    ZSTD_free(dctx, dctx->customMem);
-    return 0;   /* reserved as a potential error code in the future */
+    if (dctx->staticSize) return ERROR(memory_allocation);   /* not compatible with static DCtx */
+    {   ZSTD_customMem const cMem = dctx->customMem;
+        ZSTD_freeDDict(dctx->ddictLocal);
+        dctx->ddictLocal = NULL;
+        ZSTD_free(dctx->inBuff, cMem);
+        dctx->inBuff = NULL;
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+        if (dctx->legacyContext)
+            ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion);
+#endif
+        ZSTD_free(dctx, cMem);
+        return 0;
+    }
 }
 
+/* no longer useful */
 void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
 {
-    size_t const workSpaceSize = (ZSTD_BLOCKSIZE_ABSOLUTEMAX+WILDCOPY_OVERLENGTH) + ZSTD_frameHeaderSize_max;
-    memcpy(dstDCtx, srcDCtx, sizeof(ZSTD_DCtx) - workSpaceSize);  /* no need to copy workspace */
-}
-
-static void ZSTD_refDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
-{
-    ZSTD_decompressBegin(dstDCtx);  /* init */
-    dstDCtx->dictEnd = srcDCtx->dictEnd;
-    dstDCtx->vBase = srcDCtx->vBase;
-    dstDCtx->base = srcDCtx->base;
-    dstDCtx->previousDstEnd = srcDCtx->previousDstEnd;
-    dstDCtx->dictID = srcDCtx->dictID;
-    dstDCtx->litEntropy = srcDCtx->litEntropy;
-    dstDCtx->fseEntropy = srcDCtx->fseEntropy;
-    dstDCtx->LLTptr = srcDCtx->LLTable;
-    dstDCtx->MLTptr = srcDCtx->MLTable;
-    dstDCtx->OFTptr = srcDCtx->OFTable;
-    dstDCtx->HUFptr = srcDCtx->hufTable;
-    dstDCtx->rep[0] = srcDCtx->rep[0];
-    dstDCtx->rep[1] = srcDCtx->rep[1];
-    dstDCtx->rep[2] = srcDCtx->rep[2];
+    size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx);
+    memcpy(dstDCtx, srcDCtx, toCopy);  /* no need to copy workspace */
 }
 
 
@@ -191,12 +252,29 @@ static void ZSTD_refDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
 *   Decompression section
 ***************************************************************/
 
-/* See compression format details in : zstd_compression_format.md */
+/*! ZSTD_isFrame() :
+ *  Tells if the content of `buffer` starts with a valid Frame Identifier.
+ *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
+ *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
+ *  Note 3 : Skippable Frame Identifiers are considered valid. */
+unsigned ZSTD_isFrame(const void* buffer, size_t size)
+{
+    if (size < 4) return 0;
+    {   U32 const magic = MEM_readLE32(buffer);
+        if (magic == ZSTD_MAGICNUMBER) return 1;
+        if ((magic & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
+    }
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+    if (ZSTD_isLegacy(buffer, size)) return 1;
+#endif
+    return 0;
+}
+
 
 /** ZSTD_frameHeaderSize() :
 *   srcSize must be >= ZSTD_frameHeaderSize_prefix.
 *   @return : size of the Frame Header */
-static size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
+size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
 {
     if (srcSize < ZSTD_frameHeaderSize_prefix) return ERROR(srcSize_wrong);
     {   BYTE const fhd = ((const BYTE*)src)[4];
@@ -209,22 +287,24 @@ static size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
 }
 
 
-/** ZSTD_getFrameParams() :
+/** ZSTD_getFrameHeader() :
 *   decode Frame Header, or require larger `srcSize`.
-*   @return : 0, `fparamsPtr` is correctly filled,
+*   @return : 0, `zfhPtr` is correctly filled,
 *            >0, `srcSize` is too small, result is expected `srcSize`,
 *             or an error code, which can be tested using ZSTD_isError() */
-size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize)
+size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize)
 {
     const BYTE* ip = (const BYTE*)src;
-
     if (srcSize < ZSTD_frameHeaderSize_prefix) return ZSTD_frameHeaderSize_prefix;
+
     if (MEM_readLE32(src) != ZSTD_MAGICNUMBER) {
         if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
-            if (srcSize < ZSTD_skippableHeaderSize) return ZSTD_skippableHeaderSize; /* magic number + skippable frame length */
-            memset(fparamsPtr, 0, sizeof(*fparamsPtr));
-            fparamsPtr->frameContentSize = MEM_readLE32((const char *)src + 4);
-            fparamsPtr->windowSize = 0; /* windowSize==0 means a frame is skippable */
+            /* skippable frame */
+            if (srcSize < ZSTD_skippableHeaderSize)
+                return ZSTD_skippableHeaderSize; /* magic number + frame length */
+            memset(zfhPtr, 0, sizeof(*zfhPtr));
+            zfhPtr->frameContentSize = MEM_readLE32((const char *)src + 4);
+            zfhPtr->windowSize = 0; /* windowSize==0 means a frame is skippable */
             return 0;
         }
         return ERROR(prefix_unknown);
@@ -244,11 +324,13 @@ size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t
         U32 windowSize = 0;
         U32 dictID = 0;
         U64 frameContentSize = 0;
-        if ((fhdByte & 0x08) != 0) return ERROR(frameParameter_unsupported);   /* reserved bits, which must be zero */
+        if ((fhdByte & 0x08) != 0)
+            return ERROR(frameParameter_unsupported);   /* reserved bits, must be zero */
         if (!singleSegment) {
             BYTE const wlByte = ip[pos++];
             U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
-            if (windowLog > ZSTD_WINDOWLOG_MAX) return ERROR(frameParameter_unsupported);
+            if (windowLog > ZSTD_WINDOWLOG_MAX)
+                return ERROR(frameParameter_windowTooLarge);
             windowSize = (1U << windowLog);
             windowSize += (windowSize >> 3) * (wlByte&7);
         }
@@ -270,15 +352,91 @@ size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t
             case 3 : frameContentSize = MEM_readLE64(ip+pos); break;
         }
         if (!windowSize) windowSize = (U32)frameContentSize;
-        if (windowSize > windowSizeMax) return ERROR(frameParameter_unsupported);
-        fparamsPtr->frameContentSize = frameContentSize;
-        fparamsPtr->windowSize = windowSize;
-        fparamsPtr->dictID = dictID;
-        fparamsPtr->checksumFlag = checksumFlag;
+        if (windowSize > windowSizeMax) return ERROR(frameParameter_windowTooLarge);
+        zfhPtr->frameContentSize = frameContentSize;
+        zfhPtr->windowSize = windowSize;
+        zfhPtr->dictID = dictID;
+        zfhPtr->checksumFlag = checksumFlag;
     }
     return 0;
 }
 
+/** ZSTD_getFrameContentSize() :
+*   compatible with legacy mode
+*   @return : decompressed size of the single frame pointed to be `src` if known, otherwise
+*             - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+*             - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */
+unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize)
+{
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+    if (ZSTD_isLegacy(src, srcSize)) {
+        unsigned long long const ret = ZSTD_getDecompressedSize_legacy(src, srcSize);
+        return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret;
+    }
+#endif
+    {   ZSTD_frameHeader fParams;
+        if (ZSTD_getFrameHeader(&fParams, src, srcSize) != 0) return ZSTD_CONTENTSIZE_ERROR;
+        if (fParams.windowSize == 0) {
+            /* Either skippable or empty frame, size == 0 either way */
+            return 0;
+        } else if (fParams.frameContentSize != 0) {
+            return fParams.frameContentSize;
+        } else {
+            return ZSTD_CONTENTSIZE_UNKNOWN;
+        }
+    }
+}
+
+/** ZSTD_findDecompressedSize() :
+ *  compatible with legacy mode
+ *  `srcSize` must be the exact length of some number of ZSTD compressed and/or
+ *      skippable frames
+ *  @return : decompressed size of the frames contained */
+unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
+{
+    unsigned long long totalDstSize = 0;
+
+    while (srcSize >= ZSTD_frameHeaderSize_prefix) {
+        const U32 magicNumber = MEM_readLE32(src);
+
+        if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
+            size_t skippableSize;
+            if (srcSize < ZSTD_skippableHeaderSize)
+                return ERROR(srcSize_wrong);
+            skippableSize = MEM_readLE32((const BYTE *)src + 4) +
+                            ZSTD_skippableHeaderSize;
+            if (srcSize < skippableSize) {
+                return ZSTD_CONTENTSIZE_ERROR;
+            }
+
+            src = (const BYTE *)src + skippableSize;
+            srcSize -= skippableSize;
+            continue;
+        }
+
+        {   unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
+            if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret;
+
+            /* check for overflow */
+            if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR;
+            totalDstSize += ret;
+        }
+        {   size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
+            if (ZSTD_isError(frameSrcSize)) {
+                return ZSTD_CONTENTSIZE_ERROR;
+            }
+
+            src = (const BYTE *)src + frameSrcSize;
+            srcSize -= frameSrcSize;
+        }
+    }
+
+    if (srcSize) {
+        return ZSTD_CONTENTSIZE_ERROR;
+    }
+
+    return totalDstSize;
+}
 
 /** ZSTD_getDecompressedSize() :
 *   compatible with legacy mode
@@ -289,26 +447,22 @@ size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t
                    - frame header not complete (`srcSize` too small) */
 unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize)
 {
-#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
-    if (ZSTD_isLegacy(src, srcSize)) return ZSTD_getDecompressedSize_legacy(src, srcSize);
-#endif
-    {   ZSTD_frameParams fparams;
-        size_t const frResult = ZSTD_getFrameParams(&fparams, src, srcSize);
-        if (frResult!=0) return 0;
-        return fparams.frameContentSize;
-    }
+    unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
+    return ret >= ZSTD_CONTENTSIZE_ERROR ? 0 : ret;
 }
 
 
 /** ZSTD_decodeFrameHeader() :
-*   `srcSize` must be the size provided by ZSTD_frameHeaderSize().
+*   `headerSize` must be the size provided by ZSTD_frameHeaderSize().
 *   @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
-static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t srcSize)
+static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize)
 {
-    size_t const result = ZSTD_getFrameParams(&(dctx->fParams), src, srcSize);
+    size_t const result = ZSTD_getFrameHeader(&(dctx->fParams), src, headerSize);
+    if (ZSTD_isError(result)) return result;  /* invalid header */
+    if (result>0) return ERROR(srcSize_wrong);   /* headerSize too small */
     if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID)) return ERROR(dictionary_wrong);
     if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0);
-    return result;
+    return 0;
 }
 
 
@@ -321,7 +475,8 @@ typedef struct
 
 /*! ZSTD_getcBlockSize() :
 *   Provides the size of compressed block from block header `src` */
-size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
+                          blockProperties_t* bpPtr)
 {
     if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
     {   U32 const cBlockHeader = MEM_readLE24(src);
@@ -336,7 +491,8 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bp
 }
 
 
-static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
+                          const void* src, size_t srcSize)
 {
     if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall);
     memcpy(dst, src, srcSize);
@@ -344,7 +500,9 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src,
 }
 
 
-static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, size_t regenSize)
+static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
+                         const void* src, size_t srcSize,
+                               size_t regenSize)
 {
     if (srcSize != 1) return ERROR(srcSize_wrong);
     if (regenSize > dstCapacity) return ERROR(dstSize_tooSmall);
@@ -395,7 +553,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                     litCSize = (lhc >> 22) + (istart[4] << 10);
                     break;
                 }
-                if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(corruption_detected);
+                if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
                 if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
 
                 if (HUF_isError((litEncType==set_repeat) ?
@@ -403,15 +561,17 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                                         HUF_decompress1X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr) :
                                         HUF_decompress4X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr) ) :
                                     ( singleStream ?
-                                        HUF_decompress1X2_DCtx(dctx->hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize) :
-                                        HUF_decompress4X_hufOnly (dctx->hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize)) ))
+                                        HUF_decompress1X2_DCtx_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
+                                                                    dctx->entropy.workspace, sizeof(dctx->entropy.workspace)) :
+                                        HUF_decompress4X_hufOnly_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
+                                                                      dctx->entropy.workspace, sizeof(dctx->entropy.workspace)))))
                     return ERROR(corruption_detected);
 
                 dctx->litPtr = dctx->litBuffer;
-                dctx->litBufSize = ZSTD_BLOCKSIZE_ABSOLUTEMAX+WILDCOPY_OVERLENGTH;
                 dctx->litSize = litSize;
                 dctx->litEntropy = 1;
-                if (litEncType==set_compressed) dctx->HUFptr = dctx->hufTable;
+                if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
+                memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
                 return litCSize + lhSize;
             }
 
@@ -438,13 +598,12 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                     if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
                     memcpy(dctx->litBuffer, istart+lhSize, litSize);
                     dctx->litPtr = dctx->litBuffer;
-                    dctx->litBufSize = ZSTD_BLOCKSIZE_ABSOLUTEMAX+8;
                     dctx->litSize = litSize;
+                    memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
                     return lhSize+litSize;
                 }
                 /* direct reference into compressed stream */
                 dctx->litPtr = istart+lhSize;
-                dctx->litBufSize = srcSize-lhSize;
                 dctx->litSize = litSize;
                 return lhSize+litSize;
             }
@@ -468,10 +627,9 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                     if (srcSize<4) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
                     break;
                 }
-                if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(corruption_detected);
-                memset(dctx->litBuffer, istart[lhSize], litSize);
+                if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
+                memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
                 dctx->litPtr = dctx->litBuffer;
-                dctx->litBufSize = ZSTD_BLOCKSIZE_ABSOLUTEMAX+WILDCOPY_OVERLENGTH;
                 dctx->litSize = litSize;
                 return lhSize+1;
             }
@@ -487,176 +645,70 @@ typedef union {
     U32 alignedBy4;
 } FSE_decode_t4;
 
+/* Default FSE distribution table for Literal Lengths */
 static const FSE_decode_t4 LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
     { { LL_DEFAULTNORMLOG, 1, 1 } }, /* header : tableLog, fastMode, fastMode */
-    { {  0,  0,  4 } },              /* 0 : base, symbol, bits */
-    { { 16,  0,  4 } },
-    { { 32,  1,  5 } },
-    { {  0,  3,  5 } },
-    { {  0,  4,  5 } },
-    { {  0,  6,  5 } },
-    { {  0,  7,  5 } },
-    { {  0,  9,  5 } },
-    { {  0, 10,  5 } },
-    { {  0, 12,  5 } },
-    { {  0, 14,  6 } },
-    { {  0, 16,  5 } },
-    { {  0, 18,  5 } },
-    { {  0, 19,  5 } },
-    { {  0, 21,  5 } },
-    { {  0, 22,  5 } },
-    { {  0, 24,  5 } },
-    { { 32, 25,  5 } },
-    { {  0, 26,  5 } },
-    { {  0, 27,  6 } },
-    { {  0, 29,  6 } },
-    { {  0, 31,  6 } },
-    { { 32,  0,  4 } },
-    { {  0,  1,  4 } },
-    { {  0,  2,  5 } },
-    { { 32,  4,  5 } },
-    { {  0,  5,  5 } },
-    { { 32,  7,  5 } },
-    { {  0,  8,  5 } },
-    { { 32, 10,  5 } },
-    { {  0, 11,  5 } },
-    { {  0, 13,  6 } },
-    { { 32, 16,  5 } },
-    { {  0, 17,  5 } },
-    { { 32, 19,  5 } },
-    { {  0, 20,  5 } },
-    { { 32, 22,  5 } },
-    { {  0, 23,  5 } },
-    { {  0, 25,  4 } },
-    { { 16, 25,  4 } },
-    { { 32, 26,  5 } },
-    { {  0, 28,  6 } },
-    { {  0, 30,  6 } },
-    { { 48,  0,  4 } },
-    { { 16,  1,  4 } },
-    { { 32,  2,  5 } },
-    { { 32,  3,  5 } },
-    { { 32,  5,  5 } },
-    { { 32,  6,  5 } },
-    { { 32,  8,  5 } },
-    { { 32,  9,  5 } },
-    { { 32, 11,  5 } },
-    { { 32, 12,  5 } },
-    { {  0, 15,  6 } },
-    { { 32, 17,  5 } },
-    { { 32, 18,  5 } },
-    { { 32, 20,  5 } },
-    { { 32, 21,  5 } },
-    { { 32, 23,  5 } },
-    { { 32, 24,  5 } },
-    { {  0, 35,  6 } },
-    { {  0, 34,  6 } },
-    { {  0, 33,  6 } },
-    { {  0, 32,  6 } },
+     /* base, symbol, bits */
+    { {  0,  0,  4 } }, { { 16,  0,  4 } }, { { 32,  1,  5 } }, { {  0,  3,  5 } },
+    { {  0,  4,  5 } }, { {  0,  6,  5 } }, { {  0,  7,  5 } }, { {  0,  9,  5 } },
+    { {  0, 10,  5 } }, { {  0, 12,  5 } }, { {  0, 14,  6 } }, { {  0, 16,  5 } },
+    { {  0, 18,  5 } }, { {  0, 19,  5 } }, { {  0, 21,  5 } }, { {  0, 22,  5 } },
+    { {  0, 24,  5 } }, { { 32, 25,  5 } }, { {  0, 26,  5 } }, { {  0, 27,  6 } },
+    { {  0, 29,  6 } }, { {  0, 31,  6 } }, { { 32,  0,  4 } }, { {  0,  1,  4 } },
+    { {  0,  2,  5 } }, { { 32,  4,  5 } }, { {  0,  5,  5 } }, { { 32,  7,  5 } },
+    { {  0,  8,  5 } }, { { 32, 10,  5 } }, { {  0, 11,  5 } }, { {  0, 13,  6 } },
+    { { 32, 16,  5 } }, { {  0, 17,  5 } }, { { 32, 19,  5 } }, { {  0, 20,  5 } },
+    { { 32, 22,  5 } }, { {  0, 23,  5 } }, { {  0, 25,  4 } }, { { 16, 25,  4 } },
+    { { 32, 26,  5 } }, { {  0, 28,  6 } }, { {  0, 30,  6 } }, { { 48,  0,  4 } },
+    { { 16,  1,  4 } }, { { 32,  2,  5 } }, { { 32,  3,  5 } }, { { 32,  5,  5 } },
+    { { 32,  6,  5 } }, { { 32,  8,  5 } }, { { 32,  9,  5 } }, { { 32, 11,  5 } },
+    { { 32, 12,  5 } }, { {  0, 15,  6 } }, { { 32, 17,  5 } }, { { 32, 18,  5 } },
+    { { 32, 20,  5 } }, { { 32, 21,  5 } }, { { 32, 23,  5 } }, { { 32, 24,  5 } },
+    { {  0, 35,  6 } }, { {  0, 34,  6 } }, { {  0, 33,  6 } }, { {  0, 32,  6 } },
 };   /* LL_defaultDTable */
 
+/* Default FSE distribution table for Match Lengths */
 static const FSE_decode_t4 ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
     { { ML_DEFAULTNORMLOG, 1, 1 } }, /* header : tableLog, fastMode, fastMode */
-    { {  0,  0,  6 } },              /* 0 : base, symbol, bits */
-    { {  0,  1,  4 } },
-    { { 32,  2,  5 } },
-    { {  0,  3,  5 } },
-    { {  0,  5,  5 } },
-    { {  0,  6,  5 } },
-    { {  0,  8,  5 } },
-    { {  0, 10,  6 } },
-    { {  0, 13,  6 } },
-    { {  0, 16,  6 } },
-    { {  0, 19,  6 } },
-    { {  0, 22,  6 } },
-    { {  0, 25,  6 } },
-    { {  0, 28,  6 } },
-    { {  0, 31,  6 } },
-    { {  0, 33,  6 } },
-    { {  0, 35,  6 } },
-    { {  0, 37,  6 } },
-    { {  0, 39,  6 } },
-    { {  0, 41,  6 } },
-    { {  0, 43,  6 } },
-    { {  0, 45,  6 } },
-    { { 16,  1,  4 } },
-    { {  0,  2,  4 } },
-    { { 32,  3,  5 } },
-    { {  0,  4,  5 } },
-    { { 32,  6,  5 } },
-    { {  0,  7,  5 } },
-    { {  0,  9,  6 } },
-    { {  0, 12,  6 } },
-    { {  0, 15,  6 } },
-    { {  0, 18,  6 } },
-    { {  0, 21,  6 } },
-    { {  0, 24,  6 } },
-    { {  0, 27,  6 } },
-    { {  0, 30,  6 } },
-    { {  0, 32,  6 } },
-    { {  0, 34,  6 } },
-    { {  0, 36,  6 } },
-    { {  0, 38,  6 } },
-    { {  0, 40,  6 } },
-    { {  0, 42,  6 } },
-    { {  0, 44,  6 } },
-    { { 32,  1,  4 } },
-    { { 48,  1,  4 } },
-    { { 16,  2,  4 } },
-    { { 32,  4,  5 } },
-    { { 32,  5,  5 } },
-    { { 32,  7,  5 } },
-    { { 32,  8,  5 } },
-    { {  0, 11,  6 } },
-    { {  0, 14,  6 } },
-    { {  0, 17,  6 } },
-    { {  0, 20,  6 } },
-    { {  0, 23,  6 } },
-    { {  0, 26,  6 } },
-    { {  0, 29,  6 } },
-    { {  0, 52,  6 } },
-    { {  0, 51,  6 } },
-    { {  0, 50,  6 } },
-    { {  0, 49,  6 } },
-    { {  0, 48,  6 } },
-    { {  0, 47,  6 } },
-    { {  0, 46,  6 } },
+    /* base, symbol, bits */
+    { {  0,  0,  6 } }, { {  0,  1,  4 } }, { { 32,  2,  5 } }, { {  0,  3,  5 } },
+    { {  0,  5,  5 } }, { {  0,  6,  5 } }, { {  0,  8,  5 } }, { {  0, 10,  6 } },
+    { {  0, 13,  6 } }, { {  0, 16,  6 } }, { {  0, 19,  6 } }, { {  0, 22,  6 } },
+    { {  0, 25,  6 } }, { {  0, 28,  6 } }, { {  0, 31,  6 } }, { {  0, 33,  6 } },
+    { {  0, 35,  6 } }, { {  0, 37,  6 } }, { {  0, 39,  6 } }, { {  0, 41,  6 } },
+    { {  0, 43,  6 } }, { {  0, 45,  6 } }, { { 16,  1,  4 } }, { {  0,  2,  4 } },
+    { { 32,  3,  5 } }, { {  0,  4,  5 } }, { { 32,  6,  5 } }, { {  0,  7,  5 } },
+    { {  0,  9,  6 } }, { {  0, 12,  6 } }, { {  0, 15,  6 } }, { {  0, 18,  6 } },
+    { {  0, 21,  6 } }, { {  0, 24,  6 } }, { {  0, 27,  6 } }, { {  0, 30,  6 } },
+    { {  0, 32,  6 } }, { {  0, 34,  6 } }, { {  0, 36,  6 } }, { {  0, 38,  6 } },
+    { {  0, 40,  6 } }, { {  0, 42,  6 } }, { {  0, 44,  6 } }, { { 32,  1,  4 } },
+    { { 48,  1,  4 } }, { { 16,  2,  4 } }, { { 32,  4,  5 } }, { { 32,  5,  5 } },
+    { { 32,  7,  5 } }, { { 32,  8,  5 } }, { {  0, 11,  6 } }, { {  0, 14,  6 } },
+    { {  0, 17,  6 } }, { {  0, 20,  6 } }, { {  0, 23,  6 } }, { {  0, 26,  6 } },
+    { {  0, 29,  6 } }, { {  0, 52,  6 } }, { {  0, 51,  6 } }, { {  0, 50,  6 } },
+    { {  0, 49,  6 } }, { {  0, 48,  6 } }, { {  0, 47,  6 } }, { {  0, 46,  6 } },
 };   /* ML_defaultDTable */
 
+/* Default FSE distribution table for Offset Codes */
 static const FSE_decode_t4 OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
     { { OF_DEFAULTNORMLOG, 1, 1 } }, /* header : tableLog, fastMode, fastMode */
-    { {  0,  0,  5 } },              /* 0 : base, symbol, bits */
-    { {  0,  6,  4 } },
-    { {  0,  9,  5 } },
-    { {  0, 15,  5 } },
-    { {  0, 21,  5 } },
-    { {  0,  3,  5 } },
-    { {  0,  7,  4 } },
-    { {  0, 12,  5 } },
-    { {  0, 18,  5 } },
-    { {  0, 23,  5 } },
-    { {  0,  5,  5 } },
-    { {  0,  8,  4 } },
-    { {  0, 14,  5 } },
-    { {  0, 20,  5 } },
-    { {  0,  2,  5 } },
-    { { 16,  7,  4 } },
-    { {  0, 11,  5 } },
-    { {  0, 17,  5 } },
-    { {  0, 22,  5 } },
-    { {  0,  4,  5 } },
-    { { 16,  8,  4 } },
-    { {  0, 13,  5 } },
-    { {  0, 19,  5 } },
-    { {  0,  1,  5 } },
-    { { 16,  6,  4 } },
-    { {  0, 10,  5 } },
-    { {  0, 16,  5 } },
-    { {  0, 28,  5 } },
-    { {  0, 27,  5 } },
-    { {  0, 26,  5 } },
-    { {  0, 25,  5 } },
-    { {  0, 24,  5 } },
+    /* base, symbol, bits */
+    { {  0,  0,  5 } }, { {  0,  6,  4 } },
+    { {  0,  9,  5 } }, { {  0, 15,  5 } },
+    { {  0, 21,  5 } }, { {  0,  3,  5 } },
+    { {  0,  7,  4 } }, { {  0, 12,  5 } },
+    { {  0, 18,  5 } }, { {  0, 23,  5 } },
+    { {  0,  5,  5 } }, { {  0,  8,  4 } },
+    { {  0, 14,  5 } }, { {  0, 20,  5 } },
+    { {  0,  2,  5 } }, { { 16,  7,  4 } },
+    { {  0, 11,  5 } }, { {  0, 17,  5 } },
+    { {  0, 22,  5 } }, { {  0,  4,  5 } },
+    { { 16,  8,  4 } }, { {  0, 13,  5 } },
+    { {  0, 19,  5 } }, { {  0,  1,  5 } },
+    { { 16,  6,  4 } }, { {  0, 10,  5 } },
+    { {  0, 16,  5 } }, { {  0, 28,  5 } },
+    { {  0, 27,  5 } }, { {  0, 26,  5 } },
+    { {  0, 25,  5 } }, { {  0, 24,  5 } },
 };   /* OF_defaultDTable */
 
 /*! ZSTD_buildSeqTable() :
@@ -702,6 +754,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
     const BYTE* const istart = (const BYTE* const)src;
     const BYTE* const iend = istart + srcSize;
     const BYTE* ip = istart;
+    DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
 
     /* check */
     if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
@@ -710,10 +763,13 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
     {   int nbSeq = *ip++;
         if (!nbSeq) { *nbSeqPtr=0; return 1; }
         if (nbSeq > 0x7F) {
-            if (nbSeq == 0xFF)
+            if (nbSeq == 0xFF) {
+                if (ip+2 > iend) return ERROR(srcSize_wrong);
                 nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
-            else
+            } else {
+                if (ip >= iend) return ERROR(srcSize_wrong);
                 nbSeq = ((nbSeq-0x80)<<8) + *ip++;
+            }
         }
         *nbSeqPtr = nbSeq;
     }
@@ -726,19 +782,19 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
         ip++;
 
         /* Build DTables */
-        {   size_t const llhSize = ZSTD_buildSeqTable(dctx->LLTable, &dctx->LLTptr,
+        {   size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
                                                       LLtype, MaxLL, LLFSELog,
                                                       ip, iend-ip, LL_defaultDTable, dctx->fseEntropy);
             if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
             ip += llhSize;
         }
-        {   size_t const ofhSize = ZSTD_buildSeqTable(dctx->OFTable, &dctx->OFTptr,
+        {   size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
                                                       OFtype, MaxOff, OffFSELog,
                                                       ip, iend-ip, OF_defaultDTable, dctx->fseEntropy);
             if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
             ip += ofhSize;
         }
-        {   size_t const mlhSize = ZSTD_buildSeqTable(dctx->MLTable, &dctx->MLTptr,
+        {   size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
                                                       MLtype, MaxML, MLFSELog,
                                                       ip, iend-ip, ML_defaultDTable, dctx->fseEntropy);
             if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
@@ -754,6 +810,7 @@ typedef struct {
     size_t litLength;
     size_t matchLength;
     size_t offset;
+    const BYTE* match;
 } seq_t;
 
 typedef struct {
@@ -762,88 +819,17 @@ typedef struct {
     FSE_DState_t stateOffb;
     FSE_DState_t stateML;
     size_t prevOffset[ZSTD_REP_NUM];
+    const BYTE* base;
+    size_t pos;
+    uPtrDiff gotoDict;
 } seqState_t;
 
 
-static seq_t ZSTD_decodeSequence(seqState_t* seqState)
-{
-    seq_t seq;
-
-    U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
-    U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
-    U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb);   /* <= maxOff, by table construction */
-
-    U32 const llBits = LL_bits[llCode];
-    U32 const mlBits = ML_bits[mlCode];
-    U32 const ofBits = ofCode;
-    U32 const totalBits = llBits+mlBits+ofBits;
-
-    static const U32 LL_base[MaxLL+1] = {
-                             0,  1,  2,  3,  4,  5,  6,  7,  8,  9,   10,    11,    12,    13,    14,     15,
-                            16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
-                            0x2000, 0x4000, 0x8000, 0x10000 };
-
-    static const U32 ML_base[MaxML+1] = {
-                             3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,   14,    15,    16,    17,    18,
-                            19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,   30,    31,    32,    33,    34,
-                            35, 37, 39, 41, 43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
-                            0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
-
-    static const U32 OF_base[MaxOff+1] = {
-                 0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
-                 0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
-                 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
-                 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD };
-
-    /* sequence */
-    {   size_t offset;
-        if (!ofCode)
-            offset = 0;
-        else {
-            offset = OF_base[ofCode] + BIT_readBits(&seqState->DStream, ofBits);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
-            if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
-        }
-
-        if (ofCode <= 1) {
-            offset += (llCode==0);
-            if (offset) {
-                size_t const temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
-                if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
-                seqState->prevOffset[1] = seqState->prevOffset[0];
-                seqState->prevOffset[0] = offset = temp;
-            } else {
-                offset = seqState->prevOffset[0];
-            }
-        } else {
-            seqState->prevOffset[2] = seqState->prevOffset[1];
-            seqState->prevOffset[1] = seqState->prevOffset[0];
-            seqState->prevOffset[0] = offset;
-        }
-        seq.offset = offset;
-    }
-
-    seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BIT_readBits(&seqState->DStream, mlBits) : 0);  /* <=  16 bits */
-    if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream);
-
-    seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBits(&seqState->DStream, llBits) : 0);    /* <=  16 bits */
-    if (MEM_32bits() ||
-       (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&seqState->DStream);
-
-    /* ANS state update */
-    FSE_updateState(&seqState->stateLL, &seqState->DStream);    /* <=  9 bits */
-    FSE_updateState(&seqState->stateML, &seqState->DStream);    /* <=  9 bits */
-    if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
-    FSE_updateState(&seqState->stateOffb, &seqState->DStream);  /* <=  8 bits */
-
-    return seq;
-}
-
-
-FORCE_INLINE
-size_t ZSTD_execSequence(BYTE* op,
-                                BYTE* const oend, seq_t sequence,
-                                const BYTE** litPtr, const BYTE* const litLimit_w,
-                                const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
+FORCE_NOINLINE
+size_t ZSTD_execSequenceLast7(BYTE* op,
+                              BYTE* const oend, seq_t sequence,
+                              const BYTE** litPtr, const BYTE* const litLimit,
+                              const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
 {
     BYTE* const oLitEnd = op + sequence.litLength;
     size_t const sequenceLength = sequence.litLength + sequence.matchLength;
@@ -853,15 +839,17 @@ size_t ZSTD_execSequence(BYTE* op,
     const BYTE* match = oLitEnd - sequence.offset;
 
     /* check */
-    if ((oLitEnd>oend_w) | (oMatchEnd>oend)) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
-    if (iLitEnd > litLimit_w) return ERROR(corruption_detected);   /* over-read beyond lit buffer */
+    if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
+    if (iLitEnd > litLimit) return ERROR(corruption_detected);   /* over-read beyond lit buffer */
+    if (oLitEnd <= oend_w) return ERROR(GENERIC);   /* Precondition */
 
-    /* copy Literals */
-    ZSTD_copy8(op, *litPtr);
-    if (sequence.litLength > 8)
-        ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8);   /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
-    op = oLitEnd;
-    *litPtr = iLitEnd;   /* update for next sequence */
+    /* copy literals */
+    if (op < oend_w) {
+        ZSTD_wildcopy(op, *litPtr, oend_w - op);
+        *litPtr += oend_w - op;
+        op = oend_w;
+    }
+    while (op < oLitEnd) *op++ = *(*litPtr)++;
 
     /* copy Match */
     if (sequence.offset > (size_t)(oLitEnd - base)) {
@@ -879,12 +867,151 @@ size_t ZSTD_execSequence(BYTE* op,
             sequence.matchLength -= length1;
             match = base;
     }   }
+    while (op < oMatchEnd) *op++ = *match++;
+    return sequenceLength;
+}
+
+
+static seq_t ZSTD_decodeSequence(seqState_t* seqState)
+{
+    seq_t seq;
+
+    U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
+    U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
+    U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb);   /* <= maxOff, by table construction */
+
+    U32 const llBits = LL_bits[llCode];
+    U32 const mlBits = ML_bits[mlCode];
+    U32 const ofBits = ofCode;
+    U32 const totalBits = llBits+mlBits+ofBits;
+
+    static const U32 LL_base[MaxLL+1] = {
+                             0,    1,    2,     3,     4,     5,     6,      7,
+                             8,    9,   10,    11,    12,    13,    14,     15,
+                            16,   18,   20,    22,    24,    28,    32,     40,
+                            48,   64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
+                            0x2000, 0x4000, 0x8000, 0x10000 };
+
+    static const U32 ML_base[MaxML+1] = {
+                             3,  4,  5,    6,     7,     8,     9,    10,
+                            11, 12, 13,   14,    15,    16,    17,    18,
+                            19, 20, 21,   22,    23,    24,    25,    26,
+                            27, 28, 29,   30,    31,    32,    33,    34,
+                            35, 37, 39,   41,    43,    47,    51,    59,
+                            67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
+                            0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
+
+    static const U32 OF_base[MaxOff+1] = {
+                     0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
+                     0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
+                     0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
+                     0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD };
+
+    /* sequence */
+    {   size_t offset;
+        if (!ofCode)
+            offset = 0;
+        else {
+            offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
+            if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
+        }
+
+        if (ofCode <= 1) {
+            offset += (llCode==0);
+            if (offset) {
+                size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
+                temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */
+                if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
+                seqState->prevOffset[1] = seqState->prevOffset[0];
+                seqState->prevOffset[0] = offset = temp;
+            } else {
+                offset = seqState->prevOffset[0];
+            }
+        } else {
+            seqState->prevOffset[2] = seqState->prevOffset[1];
+            seqState->prevOffset[1] = seqState->prevOffset[0];
+            seqState->prevOffset[0] = offset;
+        }
+        seq.offset = offset;
+    }
+
+    seq.matchLength = ML_base[mlCode]
+                    + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0);  /* <=  16 bits */
+    if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream);
+
+    seq.litLength = LL_base[llCode]
+                  + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0);    /* <=  16 bits */
+    if (  MEM_32bits()
+      || (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) )
+       BIT_reloadDStream(&seqState->DStream);
+
+    DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
+                (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
+
+    /* ANS state update */
+    FSE_updateState(&seqState->stateLL, &seqState->DStream);    /* <=  9 bits */
+    FSE_updateState(&seqState->stateML, &seqState->DStream);    /* <=  9 bits */
+    if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
+    FSE_updateState(&seqState->stateOffb, &seqState->DStream);  /* <=  8 bits */
+
+    return seq;
+}
+
+
+FORCE_INLINE
+size_t ZSTD_execSequence(BYTE* op,
+                         BYTE* const oend, seq_t sequence,
+                         const BYTE** litPtr, const BYTE* const litLimit,
+                         const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
+{
+    BYTE* const oLitEnd = op + sequence.litLength;
+    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
+    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+
+    /* check */
+    if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
+    if (iLitEnd > litLimit) return ERROR(corruption_detected);   /* over-read beyond lit buffer */
+    if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd);
+
+    /* copy Literals */
+    ZSTD_copy8(op, *litPtr);
+    if (sequence.litLength > 8)
+        ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8);   /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
+    op = oLitEnd;
+    *litPtr = iLitEnd;   /* update for next sequence */
+
+    /* copy Match */
+    if (sequence.offset > (size_t)(oLitEnd - base)) {
+        /* offset beyond prefix -> go into extDict */
+        if (sequence.offset > (size_t)(oLitEnd - vBase))
+            return ERROR(corruption_detected);
+        match = dictEnd + (match - base);
+        if (match + sequence.matchLength <= dictEnd) {
+            memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {   size_t const length1 = dictEnd - match;
+            memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = base;
+            if (op > oend_w || sequence.matchLength < MINMATCH) {
+              U32 i;
+              for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
+              return sequenceLength;
+            }
+    }   }
+    /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
 
     /* match within prefix */
     if (sequence.offset < 8) {
         /* close range match, overlap */
         static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
-        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* substracted */
+        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
         int const sub2 = dec64table[sequence.offset];
         op[0] = match[0];
         op[1] = match[1];
@@ -906,7 +1033,7 @@ size_t ZSTD_execSequence(BYTE* op,
         }
         while (op < oMatchEnd) *op++ = *match++;
     } else {
-        ZSTD_wildcopy(op, match, sequence.matchLength-8);   /* works even if matchLength < 8 */
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
     }
     return sequenceLength;
 }
@@ -923,15 +1050,17 @@ static size_t ZSTD_decompressSequences(
     BYTE* const oend = ostart + maxDstSize;
     BYTE* op = ostart;
     const BYTE* litPtr = dctx->litPtr;
-    const BYTE* const litLimit_w = litPtr + dctx->litBufSize - WILDCOPY_OVERLENGTH;
     const BYTE* const litEnd = litPtr + dctx->litSize;
     const BYTE* const base = (const BYTE*) (dctx->base);
     const BYTE* const vBase = (const BYTE*) (dctx->vBase);
     const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
     int nbSeq;
+    DEBUGLOG(5, "ZSTD_decompressSequences");
 
     /* Build Decoding Tables */
     {   size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize);
+        DEBUGLOG(5, "ZSTD_decodeSeqHeaders: size=%u, nbSeq=%i",
+                    (U32)seqHSize, nbSeq);
         if (ZSTD_isError(seqHSize)) return seqHSize;
         ip += seqHSize;
     }
@@ -940,7 +1069,7 @@ static size_t ZSTD_decompressSequences(
     if (nbSeq) {
         seqState_t seqState;
         dctx->fseEntropy = 1;
-        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->rep[i]; }
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
         CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
         FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
         FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
@@ -949,15 +1078,17 @@ static size_t ZSTD_decompressSequences(
         for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
             nbSeq--;
             {   seq_t const sequence = ZSTD_decodeSequence(&seqState);
-                size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_w, base, vBase, dictEnd);
+                size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd);
+                DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
                 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
                 op += oneSeqSize;
         }   }
 
         /* check if reached exact end */
+        DEBUGLOG(5, "after decode loop, remaining nbSeq : %i", nbSeq);
         if (nbSeq) return ERROR(corruption_detected);
         /* save reps for next block */
-        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->rep[i] = (U32)(seqState.prevOffset[i]); }
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
     }
 
     /* last literal segment */
@@ -971,6 +1102,297 @@ static size_t ZSTD_decompressSequences(
 }
 
 
+FORCE_INLINE seq_t ZSTD_decodeSequenceLong_generic(seqState_t* seqState, int const longOffsets)
+{
+    seq_t seq;
+
+    U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
+    U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
+    U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb);   /* <= maxOff, by table construction */
+
+    U32 const llBits = LL_bits[llCode];
+    U32 const mlBits = ML_bits[mlCode];
+    U32 const ofBits = ofCode;
+    U32 const totalBits = llBits+mlBits+ofBits;
+
+    static const U32 LL_base[MaxLL+1] = {
+                             0,  1,    2,     3,     4,     5,     6,      7,
+                             8,  9,   10,    11,    12,    13,    14,     15,
+                            16, 18,   20,    22,    24,    28,    32,     40,
+                            48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
+                            0x2000, 0x4000, 0x8000, 0x10000 };
+
+    static const U32 ML_base[MaxML+1] = {
+                             3,  4,  5,    6,     7,     8,     9,    10,
+                            11, 12, 13,   14,    15,    16,    17,    18,
+                            19, 20, 21,   22,    23,    24,    25,    26,
+                            27, 28, 29,   30,    31,    32,    33,    34,
+                            35, 37, 39,   41,    43,    47,    51,    59,
+                            67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
+                            0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
+
+    static const U32 OF_base[MaxOff+1] = {
+                     0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
+                     0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
+                     0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
+                     0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD };
+
+    /* sequence */
+    {   size_t offset;
+        if (!ofCode)
+            offset = 0;
+        else {
+            if (longOffsets) {
+                int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN);
+                offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
+                if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
+                if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
+            } else {
+                offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
+                if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
+            }
+        }
+
+        if (ofCode <= 1) {
+            offset += (llCode==0);
+            if (offset) {
+                size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
+                temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */
+                if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
+                seqState->prevOffset[1] = seqState->prevOffset[0];
+                seqState->prevOffset[0] = offset = temp;
+            } else {
+                offset = seqState->prevOffset[0];
+            }
+        } else {
+            seqState->prevOffset[2] = seqState->prevOffset[1];
+            seqState->prevOffset[1] = seqState->prevOffset[0];
+            seqState->prevOffset[0] = offset;
+        }
+        seq.offset = offset;
+    }
+
+    seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0);  /* <=  16 bits */
+    if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream);
+
+    seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0);    /* <=  16 bits */
+    if (MEM_32bits() ||
+       (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&seqState->DStream);
+
+    {   size_t const pos = seqState->pos + seq.litLength;
+        seq.match = seqState->base + pos - seq.offset;    /* single memory segment */
+        if (seq.offset > pos) seq.match += seqState->gotoDict;   /* separate memory segment */
+        seqState->pos = pos + seq.matchLength;
+    }
+
+    /* ANS state update */
+    FSE_updateState(&seqState->stateLL, &seqState->DStream);    /* <=  9 bits */
+    FSE_updateState(&seqState->stateML, &seqState->DStream);    /* <=  9 bits */
+    if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
+    FSE_updateState(&seqState->stateOffb, &seqState->DStream);  /* <=  8 bits */
+
+    return seq;
+}
+
+static seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, unsigned const windowSize) {
+    if (ZSTD_highbit32(windowSize) > STREAM_ACCUMULATOR_MIN) {
+        return ZSTD_decodeSequenceLong_generic(seqState, 1);
+    } else {
+        return ZSTD_decodeSequenceLong_generic(seqState, 0);
+    }
+}
+
+FORCE_INLINE
+size_t ZSTD_execSequenceLong(BYTE* op,
+                                BYTE* const oend, seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit,
+                                const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
+{
+    BYTE* const oLitEnd = op + sequence.litLength;
+    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
+    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+    const BYTE* match = sequence.match;
+
+    /* check */
+#if 1
+    if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
+    if (iLitEnd > litLimit) return ERROR(corruption_detected);   /* over-read beyond lit buffer */
+    if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd);
+#endif
+
+    /* copy Literals */
+    ZSTD_copy8(op, *litPtr);
+    if (sequence.litLength > 8)
+        ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8);   /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
+    op = oLitEnd;
+    *litPtr = iLitEnd;   /* update for next sequence */
+
+    /* copy Match */
+#if 1
+    if (sequence.offset > (size_t)(oLitEnd - base)) {
+        /* offset beyond prefix */
+        if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected);
+        if (match + sequence.matchLength <= dictEnd) {
+            memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {   size_t const length1 = dictEnd - match;
+            memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = base;
+            if (op > oend_w || sequence.matchLength < MINMATCH) {
+              U32 i;
+              for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
+              return sequenceLength;
+            }
+    }   }
+    /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
+#endif
+
+    /* match within prefix */
+    if (sequence.offset < 8) {
+        /* close range match, overlap */
+        static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
+        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
+        int const sub2 = dec64table[sequence.offset];
+        op[0] = match[0];
+        op[1] = match[1];
+        op[2] = match[2];
+        op[3] = match[3];
+        match += dec32table[sequence.offset];
+        ZSTD_copy4(op+4, match);
+        match -= sub2;
+    } else {
+        ZSTD_copy8(op, match);
+    }
+    op += 8; match += 8;
+
+    if (oMatchEnd > oend-(16-MINMATCH)) {
+        if (op < oend_w) {
+            ZSTD_wildcopy(op, match, oend_w - op);
+            match += oend_w - op;
+            op = oend_w;
+        }
+        while (op < oMatchEnd) *op++ = *match++;
+    } else {
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
+    }
+    return sequenceLength;
+}
+
+static size_t ZSTD_decompressSequencesLong(
+                               ZSTD_DCtx* dctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize)
+{
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const oend = ostart + maxDstSize;
+    BYTE* op = ostart;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    const BYTE* const base = (const BYTE*) (dctx->base);
+    const BYTE* const vBase = (const BYTE*) (dctx->vBase);
+    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+    unsigned const windowSize32 = (unsigned)dctx->fParams.windowSize;
+    int nbSeq;
+
+    /* Build Decoding Tables */
+    {   size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize);
+        if (ZSTD_isError(seqHSize)) return seqHSize;
+        ip += seqHSize;
+    }
+
+    /* Regen sequences */
+    if (nbSeq) {
+#define STORED_SEQS 4
+#define STOSEQ_MASK (STORED_SEQS-1)
+#define ADVANCED_SEQS 4
+        seq_t sequences[STORED_SEQS];
+        int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
+        seqState_t seqState;
+        int seqNb;
+        dctx->fseEntropy = 1;
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
+        seqState.base = base;
+        seqState.pos = (size_t)(op-base);
+        seqState.gotoDict = (uPtrDiff)dictEnd - (uPtrDiff)base; /* cast to avoid undefined behaviour */
+        CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
+        FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
+        FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
+        FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+
+        /* prepare in advance */
+        for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNb<seqAdvance; seqNb++) {
+            sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, windowSize32);
+        }
+        if (seqNb<seqAdvance) return ERROR(corruption_detected);
+
+        /* decode and decompress */
+        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb<nbSeq ; seqNb++) {
+            seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, windowSize32);
+            size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd);
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            ZSTD_PREFETCH(sequence.match);
+            sequences[seqNb&STOSEQ_MASK] = sequence;
+            op += oneSeqSize;
+        }
+        if (seqNb<nbSeq) return ERROR(corruption_detected);
+
+        /* finish queue */
+        seqNb -= seqAdvance;
+        for ( ; seqNb<nbSeq ; seqNb++) {
+            size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd);
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* save reps for next block */
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
+    }
+
+    /* last literal segment */
+    {   size_t const lastLLSize = litEnd - litPtr;
+        if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
+        memcpy(op, litPtr, lastLLSize);
+        op += lastLLSize;
+    }
+
+    return op-ostart;
+}
+
+
+static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
+                            void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{   /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+    DEBUGLOG(5, "ZSTD_decompressBlock_internal");
+
+    if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
+
+    /* Decode literals section */
+    {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
+        DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
+        if (ZSTD_isError(litCSize)) return litCSize;
+        ip += litCSize;
+        srcSize -= litCSize;
+    }
+    if (sizeof(size_t) > 4)  /* do not enable prefetching on 32-bits x86, as it's performance detrimental */
+                             /* likely because of register pressure */
+                             /* if that's the correct cause, then 32-bits ARM should be affected differently */
+                             /* it would be good to test this on ARM real hardware, to see if prefetch version improves speed */
+        if (dctx->fParams.windowSize > (1<<23))
+            return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize);
+    return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize);
+}
+
+
 static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
 {
     if (dst != dctx->previousDstEnd) {   /* not contiguous */
@@ -981,25 +1403,6 @@ static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
     }
 }
 
-
-static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
-                            void* dst, size_t dstCapacity,
-                      const void* src, size_t srcSize)
-{   /* blockType == blockCompressed */
-    const BYTE* ip = (const BYTE*)src;
-
-    if (srcSize >= ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(srcSize_wrong);
-
-    /* Decode literals sub-block */
-    {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
-        if (ZSTD_isError(litCSize)) return litCSize;
-        ip += litCSize;
-        srcSize -= litCSize;
-    }
-    return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize);
-}
-
-
 size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
                             void* dst, size_t dstCapacity,
                       const void* src, size_t srcSize)
@@ -1029,27 +1432,81 @@ size_t ZSTD_generateNxBytes(void* dst, size_t dstCapacity, BYTE byte, size_t len
     return length;
 }
 
+/** ZSTD_findFrameCompressedSize() :
+ *  compatible with legacy mode
+ *  `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame
+ *  `srcSize` must be at least as large as the frame contained
+ *  @return : the compressed size of the frame starting at `src` */
+size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
+{
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+    if (ZSTD_isLegacy(src, srcSize)) return ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
+#endif
+    if (srcSize >= ZSTD_skippableHeaderSize &&
+            (MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
+        return ZSTD_skippableHeaderSize + MEM_readLE32((const BYTE*)src + 4);
+    } else {
+        const BYTE* ip = (const BYTE*)src;
+        const BYTE* const ipstart = ip;
+        size_t remainingSize = srcSize;
+        ZSTD_frameHeader fParams;
+
+        size_t const headerSize = ZSTD_frameHeaderSize(ip, remainingSize);
+        if (ZSTD_isError(headerSize)) return headerSize;
+
+        /* Frame Header */
+        {   size_t const ret = ZSTD_getFrameHeader(&fParams, ip, remainingSize);
+            if (ZSTD_isError(ret)) return ret;
+            if (ret > 0) return ERROR(srcSize_wrong);
+        }
+
+        ip += headerSize;
+        remainingSize -= headerSize;
+
+        /* Loop on each block */
+        while (1) {
+            blockProperties_t blockProperties;
+            size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
+            if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+            if (ZSTD_blockHeaderSize + cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+            ip += ZSTD_blockHeaderSize + cBlockSize;
+            remainingSize -= ZSTD_blockHeaderSize + cBlockSize;
+
+            if (blockProperties.lastBlock) break;
+        }
+
+        if (fParams.checksumFlag) {   /* Frame content checksum */
+            if (remainingSize < 4) return ERROR(srcSize_wrong);
+            ip += 4;
+            remainingSize -= 4;
+        }
+
+        return ip - ipstart;
+    }
+}
 
 /*! ZSTD_decompressFrame() :
-*   `dctx` must be properly initialized */
+*   @dctx must be properly initialized */
 static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
                                  void* dst, size_t dstCapacity,
-                                 const void* src, size_t srcSize)
+                                 const void** srcPtr, size_t *srcSizePtr)
 {
-    const BYTE* ip = (const BYTE*)src;
+    const BYTE* ip = (const BYTE*)(*srcPtr);
     BYTE* const ostart = (BYTE* const)dst;
     BYTE* const oend = ostart + dstCapacity;
     BYTE* op = ostart;
-    size_t remainingSize = srcSize;
+    size_t remainingSize = *srcSizePtr;
 
     /* check */
-    if (srcSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    if (remainingSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
 
     /* Frame Header */
-    {   size_t const frameHeaderSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_prefix);
+    {   size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_frameHeaderSize_prefix);
         if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
-        if (srcSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
-        CHECK_F(ZSTD_decodeFrameHeader(dctx, src, frameHeaderSize));
+        if (remainingSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+        CHECK_F(ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize));
         ip += frameHeaderSize; remainingSize -= frameHeaderSize;
     }
 
@@ -1094,25 +1551,111 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
         if (remainingSize<4) return ERROR(checksum_wrong);
         checkRead = MEM_readLE32(ip);
         if (checkRead != checkCalc) return ERROR(checksum_wrong);
+        ip += 4;
         remainingSize -= 4;
     }
 
-    if (remainingSize) return ERROR(srcSize_wrong);
+    /* Allow caller to get size read */
+    *srcPtr = ip;
+    *srcSizePtr = remainingSize;
     return op-ostart;
 }
 
+static const void* ZSTD_DDictDictContent(const ZSTD_DDict* ddict);
+static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict);
+
+static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
+                                        void* dst, size_t dstCapacity,
+                                  const void* src, size_t srcSize,
+                                  const void *dict, size_t dictSize,
+                                  const ZSTD_DDict* ddict)
+{
+    void* const dststart = dst;
+
+    if (ddict) {
+        if (dict) {
+            /* programmer error, these two cases should be mutually exclusive */
+            return ERROR(GENERIC);
+        }
+
+        dict = ZSTD_DDictDictContent(ddict);
+        dictSize = ZSTD_DDictDictSize(ddict);
+    }
+
+    while (srcSize >= ZSTD_frameHeaderSize_prefix) {
+        U32 magicNumber;
+
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+        if (ZSTD_isLegacy(src, srcSize)) {
+            size_t decodedSize;
+            size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
+            if (ZSTD_isError(frameSize)) return frameSize;
+            /* legacy support is incompatible with static dctx */
+            if (dctx->staticSize) return ERROR(memory_allocation);
+
+            decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
+
+            dst = (BYTE*)dst + decodedSize;
+            dstCapacity -= decodedSize;
+
+            src = (const BYTE*)src + frameSize;
+            srcSize -= frameSize;
+
+            continue;
+        }
+#endif
+
+        magicNumber = MEM_readLE32(src);
+        if (magicNumber != ZSTD_MAGICNUMBER) {
+            if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
+                size_t skippableSize;
+                if (srcSize < ZSTD_skippableHeaderSize)
+                    return ERROR(srcSize_wrong);
+                skippableSize = MEM_readLE32((const BYTE *)src + 4) +
+                                ZSTD_skippableHeaderSize;
+                if (srcSize < skippableSize) {
+                    return ERROR(srcSize_wrong);
+                }
+
+                src = (const BYTE *)src + skippableSize;
+                srcSize -= skippableSize;
+                continue;
+            } else {
+                return ERROR(prefix_unknown);
+            }
+        }
+
+        if (ddict) {
+            /* we were called from ZSTD_decompress_usingDDict */
+            CHECK_F(ZSTD_decompressBegin_usingDDict(dctx, ddict));
+        } else {
+            /* this will initialize correctly with no dict if dict == NULL, so
+             * use this in all cases but ddict */
+            CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize));
+        }
+        ZSTD_checkContinuity(dctx, dst);
+
+        {   const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
+                                                    &src, &srcSize);
+            if (ZSTD_isError(res)) return res;
+            /* don't need to bounds check this, ZSTD_decompressFrame will have
+             * already */
+            dst = (BYTE*)dst + res;
+            dstCapacity -= res;
+        }
+    }
+
+    if (srcSize) return ERROR(srcSize_wrong); /* input not entirely consumed */
+
+    return (BYTE*)dst - (BYTE*)dststart;
+}
 
 size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
                                  void* dst, size_t dstCapacity,
                            const void* src, size_t srcSize,
                            const void* dict, size_t dictSize)
 {
-#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
-    if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, dict, dictSize);
-#endif
-    ZSTD_decompressBegin_usingDict(dctx, dict, dictSize);
-    ZSTD_checkContinuity(dctx, dst);
-    return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize);
+    return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL);
 }
 
 
@@ -1124,7 +1667,7 @@ size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const
 
 size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
-#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE==1)
+#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1)
     size_t regenSize;
     ZSTD_DCtx* const dctx = ZSTD_createDCtx();
     if (dctx==NULL) return ERROR(memory_allocation);
@@ -1148,6 +1691,7 @@ ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
     switch(dctx->stage)
     {
     default:   /* should not happen */
+        assert(0);
     case ZSTDds_getFrameHeaderSize:
     case ZSTDds_decodeFrameHeader:
         return ZSTDnit_frameHeader;
@@ -1165,21 +1709,24 @@ ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
     }
 }
 
-int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; }   /* for zbuff */
+static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; }
 
 /** ZSTD_decompressContinue() :
-*   @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity)
-*             or an error code, which can be tested using ZSTD_isError() */
+ *  srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress())
+ *  @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity)
+ *            or an error code, which can be tested using ZSTD_isError() */
 size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
+    DEBUGLOG(5, "ZSTD_decompressContinue");
     /* Sanity check */
-    if (srcSize != dctx->expected) return ERROR(srcSize_wrong);
+    if (srcSize != dctx->expected) return ERROR(srcSize_wrong);   /* unauthorized */
     if (dstCapacity) ZSTD_checkContinuity(dctx, dst);
 
     switch (dctx->stage)
     {
     case ZSTDds_getFrameHeaderSize :
-        if (srcSize != ZSTD_frameHeaderSize_prefix) return ERROR(srcSize_wrong);      /* impossible */
+        if (srcSize != ZSTD_frameHeaderSize_prefix) return ERROR(srcSize_wrong);      /* unauthorized */
+        assert(src != NULL);
         if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {        /* skippable frame */
             memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_prefix);
             dctx->expected = ZSTD_skippableHeaderSize - ZSTD_frameHeaderSize_prefix;  /* magic number + skippable frame length */
@@ -1197,6 +1744,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
         dctx->expected = 0;   /* not necessary to copy more */
 
     case ZSTDds_decodeFrameHeader:
+        assert(src != NULL);
         memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected);
         CHECK_F(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize));
         dctx->expected = ZSTD_blockHeaderSize;
@@ -1224,17 +1772,19 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
                     dctx->stage = ZSTDds_getFrameHeaderSize;
                 }
             } else {
-                dctx->expected = 3;  /* go directly to next header */
+                dctx->expected = ZSTD_blockHeaderSize;  /* jump to next header */
                 dctx->stage = ZSTDds_decodeBlockHeader;
             }
             return 0;
         }
     case ZSTDds_decompressLastBlock:
     case ZSTDds_decompressBlock:
+        DEBUGLOG(5, "case ZSTDds_decompressBlock");
         {   size_t rSize;
             switch(dctx->bType)
             {
             case bt_compressed:
+                DEBUGLOG(5, "case bt_compressed");
                 rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize);
                 break;
             case bt_raw :
@@ -1274,7 +1824,8 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
             return 0;
         }
     case ZSTDds_decodeSkippableHeader:
-        {   memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected);
+        {   assert(src != NULL);
+            memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected);
             dctx->expected = MEM_readLE32(dctx->headerBuffer + 4);
             dctx->stage = ZSTDds_skipFrame;
             return 0;
@@ -1299,47 +1850,61 @@ static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dict
     return 0;
 }
 
-static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* const dict, size_t const dictSize)
+/* ZSTD_loadEntropy() :
+ * dict : must point at beginning of a valid zstd dictionary
+ * @return : size of entropy tables read */
+static size_t ZSTD_loadEntropy(ZSTD_entropyTables_t* entropy, const void* const dict, size_t const dictSize)
 {
     const BYTE* dictPtr = (const BYTE*)dict;
     const BYTE* const dictEnd = dictPtr + dictSize;
 
-    {   size_t const hSize = HUF_readDTableX4(dctx->hufTable, dict, dictSize);
+    if (dictSize <= 8) return ERROR(dictionary_corrupted);
+    dictPtr += 8;   /* skip header = magic + dictID */
+
+
+    {   size_t const hSize = HUF_readDTableX4_wksp(
+            entropy->hufTable, dictPtr, dictEnd - dictPtr,
+            entropy->workspace, sizeof(entropy->workspace));
         if (HUF_isError(hSize)) return ERROR(dictionary_corrupted);
         dictPtr += hSize;
     }
 
     {   short offcodeNCount[MaxOff+1];
-        U32 offcodeMaxValue=MaxOff, offcodeLog=OffFSELog;
+        U32 offcodeMaxValue = MaxOff, offcodeLog;
         size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
-        CHECK_E(FSE_buildDTable(dctx->OFTable, offcodeNCount, offcodeMaxValue, offcodeLog), dictionary_corrupted);
+        if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
+        CHECK_E(FSE_buildDTable(entropy->OFTable, offcodeNCount, offcodeMaxValue, offcodeLog), dictionary_corrupted);
         dictPtr += offcodeHeaderSize;
     }
 
     {   short matchlengthNCount[MaxML+1];
-        unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog;
+        unsigned matchlengthMaxValue = MaxML, matchlengthLog;
         size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
-        CHECK_E(FSE_buildDTable(dctx->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog), dictionary_corrupted);
+        if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
+        CHECK_E(FSE_buildDTable(entropy->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog), dictionary_corrupted);
         dictPtr += matchlengthHeaderSize;
     }
 
     {   short litlengthNCount[MaxLL+1];
-        unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog;
+        unsigned litlengthMaxValue = MaxLL, litlengthLog;
         size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
-        CHECK_E(FSE_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog), dictionary_corrupted);
+        if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
+        CHECK_E(FSE_buildDTable(entropy->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog), dictionary_corrupted);
         dictPtr += litlengthHeaderSize;
     }
 
     if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
-    dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
-    dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
-    dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
-    dictPtr += 12;
+    {   int i;
+        size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
+        for (i=0; i<3; i++) {
+            U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
+            if (rep==0 || rep >= dictContentSize) return ERROR(dictionary_corrupted);
+            entropy->rep[i] = rep;
+    }   }
 
-    dctx->litEntropy = dctx->fseEntropy = 1;
     return dictPtr - (const BYTE*)dict;
 }
 
@@ -1347,19 +1912,18 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict
 {
     if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize);
     {   U32 const magic = MEM_readLE32(dict);
-        if (magic != ZSTD_DICT_MAGIC) {
+        if (magic != ZSTD_MAGIC_DICTIONARY) {
             return ZSTD_refDictContent(dctx, dict, dictSize);   /* pure content mode */
     }   }
     dctx->dictID = MEM_readLE32((const char*)dict + 4);
 
     /* load entropy tables */
-    dict = (const char*)dict + 8;
-    dictSize -= 8;
-    {   size_t const eSize = ZSTD_loadEntropy(dctx, dict, dictSize);
+    {   size_t const eSize = ZSTD_loadEntropy(&dctx->entropy, dict, dictSize);
         if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted);
         dict = (const char*)dict + eSize;
         dictSize -= eSize;
     }
+    dctx->litEntropy = dctx->fseEntropy = 1;
 
     /* reference dictionary content */
     return ZSTD_refDictContent(dctx, dict, dictSize);
@@ -1376,67 +1940,212 @@ size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t
 /* ======   ZSTD_DDict   ====== */
 
 struct ZSTD_DDict_s {
-    void* dict;
+    void* dictBuffer;
+    const void* dictContent;
     size_t dictSize;
-    ZSTD_DCtx* refContext;
+    ZSTD_entropyTables_t entropy;
+    U32 dictID;
+    U32 entropyPresent;
+    ZSTD_customMem cMem;
 };  /* typedef'd to ZSTD_DDict within "zstd.h" */
 
-ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_customMem customMem)
+static const void* ZSTD_DDictDictContent(const ZSTD_DDict* ddict)
 {
-    if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
-    if (!customMem.customAlloc || !customMem.customFree) return NULL;
+    return ddict->dictContent;
+}
+
+static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict)
+{
+    return ddict->dictSize;
+}
+
+size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dstDCtx, const ZSTD_DDict* ddict)
+{
+    CHECK_F(ZSTD_decompressBegin(dstDCtx));
+    if (ddict) {   /* support begin on NULL */
+        dstDCtx->dictID = ddict->dictID;
+        dstDCtx->base = ddict->dictContent;
+        dstDCtx->vBase = ddict->dictContent;
+        dstDCtx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
+        dstDCtx->previousDstEnd = dstDCtx->dictEnd;
+        if (ddict->entropyPresent) {
+            dstDCtx->litEntropy = 1;
+            dstDCtx->fseEntropy = 1;
+            dstDCtx->LLTptr = ddict->entropy.LLTable;
+            dstDCtx->MLTptr = ddict->entropy.MLTable;
+            dstDCtx->OFTptr = ddict->entropy.OFTable;
+            dstDCtx->HUFptr = ddict->entropy.hufTable;
+            dstDCtx->entropy.rep[0] = ddict->entropy.rep[0];
+            dstDCtx->entropy.rep[1] = ddict->entropy.rep[1];
+            dstDCtx->entropy.rep[2] = ddict->entropy.rep[2];
+        } else {
+            dstDCtx->litEntropy = 0;
+            dstDCtx->fseEntropy = 0;
+        }
+    }
+    return 0;
+}
+
+static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict)
+{
+    ddict->dictID = 0;
+    ddict->entropyPresent = 0;
+    if (ddict->dictSize < 8) return 0;
+    {   U32 const magic = MEM_readLE32(ddict->dictContent);
+        if (magic != ZSTD_MAGIC_DICTIONARY) return 0;   /* pure content mode */
+    }
+    ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + 4);
+
+    /* load entropy tables */
+    CHECK_E( ZSTD_loadEntropy(&ddict->entropy, ddict->dictContent, ddict->dictSize), dictionary_corrupted );
+    ddict->entropyPresent = 1;
+    return 0;
+}
+
+
+static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, const void* dict, size_t dictSize, unsigned byReference)
+{
+    if ((byReference) || (!dict) || (!dictSize)) {
+        ddict->dictBuffer = NULL;
+        ddict->dictContent = dict;
+    } else {
+        void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem);
+        ddict->dictBuffer = internalBuffer;
+        ddict->dictContent = internalBuffer;
+        if (!internalBuffer) return ERROR(memory_allocation);
+        memcpy(internalBuffer, dict, dictSize);
+    }
+    ddict->dictSize = dictSize;
+    ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
+
+    /* parse dictionary content */
+    CHECK_F( ZSTD_loadEntropy_inDDict(ddict) );
+
+    return 0;
+}
+
+ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, unsigned byReference, ZSTD_customMem customMem)
+{
+    if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
 
     {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
-        void* const dictContent = ZSTD_malloc(dictSize, customMem);
-        ZSTD_DCtx* const dctx = ZSTD_createDCtx_advanced(customMem);
+        if (!ddict) return NULL;
+        ddict->cMem = customMem;
 
-        if (!dictContent || !ddict || !dctx) {
-            ZSTD_free(dictContent, customMem);
-            ZSTD_free(ddict, customMem);
-            ZSTD_free(dctx, customMem);
+        if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, byReference) )) {
+            ZSTD_freeDDict(ddict);
             return NULL;
         }
 
-        memcpy(dictContent, dict, dictSize);
-        {   size_t const errorCode = ZSTD_decompressBegin_usingDict(dctx, dictContent, dictSize);
-            if (ZSTD_isError(errorCode)) {
-                ZSTD_free(dictContent, customMem);
-                ZSTD_free(ddict, customMem);
-                ZSTD_free(dctx, customMem);
-                return NULL;
-        }   }
-
-        ddict->dict = dictContent;
-        ddict->dictSize = dictSize;
-        ddict->refContext = dctx;
         return ddict;
     }
 }
 
 /*! ZSTD_createDDict() :
-*   Create a digested dictionary, ready to start decompression without startup delay.
-*   `dict` can be released after `ZSTD_DDict` creation */
+*   Create a digested dictionary, to start decompression without startup delay.
+*   `dict` content is copied inside DDict.
+*   Consequently, `dict` can be released after `ZSTD_DDict` creation */
 ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
 {
     ZSTD_customMem const allocator = { NULL, NULL, NULL };
-    return ZSTD_createDDict_advanced(dict, dictSize, allocator);
+    return ZSTD_createDDict_advanced(dict, dictSize, 0, allocator);
 }
 
+/*! ZSTD_createDDict_byReference() :
+ *  Create a digested dictionary, to start decompression without startup delay.
+ *  Dictionary content is simply referenced, it will be accessed during decompression.
+ *  Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
+ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
+{
+    ZSTD_customMem const allocator = { NULL, NULL, NULL };
+    return ZSTD_createDDict_advanced(dictBuffer, dictSize, 1, allocator);
+}
+
+
+ZSTD_DDict* ZSTD_initStaticDDict(void* workspace, size_t workspaceSize,
+                                 const void* dict, size_t dictSize,
+                                 unsigned byReference)
+{
+    size_t const neededSpace = sizeof(ZSTD_DDict) + (byReference ? 0 : dictSize);
+    ZSTD_DDict* const ddict = (ZSTD_DDict*)workspace;
+    assert(workspace != NULL);
+    assert(dict != NULL);
+    if ((size_t)workspace & 7) return NULL;  /* 8-aligned */
+    if (workspaceSize < neededSpace) return NULL;
+    if (!byReference) {
+        memcpy(ddict+1, dict, dictSize);  /* local copy */
+        dict = ddict+1;
+    }
+    if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, 1 /* byRef */) ))
+        return NULL;
+    return ddict;
+}
+
+
 size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
 {
     if (ddict==NULL) return 0;   /* support free on NULL */
-    {   ZSTD_customMem const cMem = ddict->refContext->customMem;
-        ZSTD_freeDCtx(ddict->refContext);
-        ZSTD_free(ddict->dict, cMem);
+    {   ZSTD_customMem const cMem = ddict->cMem;
+        ZSTD_free(ddict->dictBuffer, cMem);
         ZSTD_free(ddict, cMem);
         return 0;
     }
 }
 
+/*! ZSTD_estimateDDictSize() :
+ *  Estimate amount of memory that will be needed to create a dictionary for decompression.
+ *  Note : dictionary created "byReference" are smaller */
+size_t ZSTD_estimateDDictSize(size_t dictSize, unsigned byReference)
+{
+    return sizeof(ZSTD_DDict) + (byReference ? 0 : dictSize);
+}
+
 size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
 {
     if (ddict==NULL) return 0;   /* support sizeof on NULL */
-    return sizeof(*ddict) + sizeof(ddict->refContext) + ddict->dictSize;
+    return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
+}
+
+/*! ZSTD_getDictID_fromDict() :
+ *  Provides the dictID stored within dictionary.
+ *  if @return == 0, the dictionary is not conformant with Zstandard specification.
+ *  It can still be loaded, but as a content-only dictionary. */
+unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
+{
+    if (dictSize < 8) return 0;
+    if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0;
+    return MEM_readLE32((const char*)dict + 4);
+}
+
+/*! ZSTD_getDictID_fromDDict() :
+ *  Provides the dictID of the dictionary loaded into `ddict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
+{
+    if (ddict==NULL) return 0;
+    return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
+}
+
+/*! ZSTD_getDictID_fromFrame() :
+ *  Provides the dictID required to decompresse frame stored within `src`.
+ *  If @return == 0, the dictID could not be decoded.
+ *  This could for one of the following reasons :
+ *  - The frame does not require a dictionary (most common case).
+ *  - The frame was built with dictID intentionally removed.
+ *    Needed dictionary is a hidden information.
+ *    Note : this use case also happens when using a non-conformant dictionary.
+ *  - `srcSize` is too small, and as a result, frame header could not be decoded.
+ *    Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`.
+ *  - This is not a Zstandard frame.
+ *  When identifying the exact failure cause, it's possible to use
+ *  ZSTD_getFrameHeader(), which will provide a more precise error code. */
+unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize)
+{
+    ZSTD_frameHeader zfp = { 0 , 0 , 0 , 0 };
+    size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize);
+    if (ZSTD_isError(hError)) return 0;
+    return zfp.dictID;
 }
 
 
@@ -1448,12 +2157,10 @@ size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
                             const void* src, size_t srcSize,
                             const ZSTD_DDict* ddict)
 {
-#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
-    if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, ddict->dict, ddict->dictSize);
-#endif
-    ZSTD_refDCtx(dctx, ddict->refContext);
-    ZSTD_checkContinuity(dctx, dst);
-    return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize);
+    /* pass content and size in case legacy frames are encountered */
+    return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize,
+                                     NULL, 0,
+                                     ddict);
 }
 
 
@@ -1461,87 +2168,42 @@ size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
 *   Streaming decompression
 *====================================*/
 
-typedef enum { zdss_init, zdss_loadHeader,
-               zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
-
-/* *** Resource management *** */
-struct ZSTD_DStream_s {
-    ZSTD_DCtx* dctx;
-    ZSTD_DDict* ddict;
-    ZSTD_frameParams fParams;
-    ZSTD_dStreamStage stage;
-    char*  inBuff;
-    size_t inBuffSize;
-    size_t inPos;
-    size_t maxWindowSize;
-    char*  outBuff;
-    size_t outBuffSize;
-    size_t outStart;
-    size_t outEnd;
-    size_t blockSize;
-    BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];   /* tmp buffer to store frame header */
-    size_t lhSize;
-    ZSTD_customMem customMem;
-    void* legacyContext;
-    U32 previousLegacyVersion;
-    U32 legacyVersion;
-    U32 hostageByte;
-};   /* typedef'd to ZSTD_DStream within "zstd.h" */
-
-
 ZSTD_DStream* ZSTD_createDStream(void)
 {
-    return ZSTD_createDStream_advanced(defaultCustomMem);
+    return ZSTD_createDStream_advanced(ZSTD_defaultCMem);
+}
+
+ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize)
+{
+    return ZSTD_initStaticDCtx(workspace, workspaceSize);
 }
 
 ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem)
 {
-    ZSTD_DStream* zds;
-
-    if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
-    if (!customMem.customAlloc || !customMem.customFree) return NULL;
-
-    zds = (ZSTD_DStream*) ZSTD_malloc(sizeof(ZSTD_DStream), customMem);
-    if (zds==NULL) return NULL;
-    memset(zds, 0, sizeof(ZSTD_DStream));
-    memcpy(&zds->customMem, &customMem, sizeof(ZSTD_customMem));
-    zds->dctx = ZSTD_createDCtx_advanced(customMem);
-    if (zds->dctx == NULL) { ZSTD_freeDStream(zds); return NULL; }
-    zds->stage = zdss_init;
-    zds->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
-    return zds;
+    return ZSTD_createDCtx_advanced(customMem);
 }
 
 size_t ZSTD_freeDStream(ZSTD_DStream* zds)
 {
-    if (zds==NULL) return 0;   /* support free on null */
-    {   ZSTD_customMem const cMem = zds->customMem;
-        ZSTD_freeDCtx(zds->dctx);
-        ZSTD_freeDDict(zds->ddict);
-        ZSTD_free(zds->inBuff, cMem);
-        ZSTD_free(zds->outBuff, cMem);
-#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
-        if (zds->legacyContext)
-            ZSTD_freeLegacyStreamContext(zds->legacyContext, zds->previousLegacyVersion);
-#endif
-        ZSTD_free(zds, cMem);
-        return 0;
-    }
+    return ZSTD_freeDCtx(zds);
 }
 
 
 /* *** Initialization *** */
 
-size_t ZSTD_DStreamInSize(void)  { return ZSTD_BLOCKSIZE_ABSOLUTEMAX + ZSTD_blockHeaderSize; }
-size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
+size_t ZSTD_DStreamInSize(void)  { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; }
+size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
 
 size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
 {
-    zds->stage = zdss_loadHeader;
+    zds->streamStage = zdss_loadHeader;
     zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
-    ZSTD_freeDDict(zds->ddict);
-    zds->ddict = ZSTD_createDDict(dict, dictSize);
-    if (zds->ddict == NULL) return ERROR(memory_allocation);
+    ZSTD_freeDDict(zds->ddictLocal);
+    if (dict && dictSize >= 8) {
+        zds->ddictLocal = ZSTD_createDDict(dict, dictSize);
+        if (zds->ddictLocal == NULL) return ERROR(memory_allocation);
+    } else zds->ddictLocal = NULL;
+    zds->ddict = zds->ddictLocal;
     zds->legacyVersion = 0;
     zds->hostageByte = 0;
     return ZSTD_frameHeaderSize_prefix;
@@ -1552,10 +2214,18 @@ size_t ZSTD_initDStream(ZSTD_DStream* zds)
     return ZSTD_initDStream_usingDict(zds, NULL, 0);
 }
 
+/* ZSTD_initDStream_usingDDict() :
+ * ddict will just be referenced, and must outlive decompression session */
+size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict)
+{
+    size_t const initResult = ZSTD_initDStream(zds);
+    zds->ddict = ddict;
+    return initResult;
+}
+
 size_t ZSTD_resetDStream(ZSTD_DStream* zds)
 {
-    if (zds->ddict == NULL) return ERROR(stage_wrong);  /* must be init at least once */
-    zds->stage = zdss_loadHeader;
+    zds->streamStage = zdss_loadHeader;
     zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
     zds->legacyVersion = 0;
     zds->hostageByte = 0;
@@ -1568,7 +2238,7 @@ size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds,
     switch(paramType)
     {
         default : return ERROR(parameter_unknown);
-        case ZSTDdsp_maxWindowSize : zds->maxWindowSize = paramValue; break;
+        case DStream_p_maxWindowSize : zds->maxWindowSize = paramValue ? paramValue : (U32)(-1); break;
     }
     return 0;
 }
@@ -1576,8 +2246,24 @@ size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds,
 
 size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds)
 {
-    if (zds==NULL) return 0;   /* support sizeof on NULL */
-    return sizeof(*zds) + ZSTD_sizeof_DCtx(zds->dctx) + ZSTD_sizeof_DDict(zds->ddict) + zds->inBuffSize + zds->outBuffSize;
+    return ZSTD_sizeof_DCtx(zds);
+}
+
+size_t ZSTD_estimateDStreamSize(size_t windowSize)
+{
+    size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
+    size_t const inBuffSize = blockSize;  /* no block can be larger */
+    size_t const outBuffSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2);
+    return sizeof(ZSTD_DStream) + ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize;
+}
+
+ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize)
+{
+    ZSTD_frameHeader fh;
+    size_t const err = ZSTD_getFrameHeader(&fh, src, srcSize);
+    if (ZSTD_isError(err)) return err;
+    if (err>0) return ERROR(srcSize_wrong);
+    return ZSTD_estimateDStreamSize(fh.windowSize);
 }
 
 
@@ -1601,101 +2287,148 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
     char* op = ostart;
     U32 someMoreWork = 1;
 
+    DEBUGLOG(5, "ZSTD_decompressStream");
+    DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos));
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
-    if (zds->legacyVersion)
+    if (zds->legacyVersion) {
+        /* legacy support is incompatible with static dctx */
+        if (zds->staticSize) return ERROR(memory_allocation);
         return ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input);
+    }
 #endif
 
     while (someMoreWork) {
-        switch(zds->stage)
+        switch(zds->streamStage)
         {
         case zdss_init :
-            return ERROR(init_missing);
+            ZSTD_resetDStream(zds);   /* transparent reset on starting decoding a new frame */
+            /* fall-through */
 
         case zdss_loadHeader :
-            {   size_t const hSize = ZSTD_getFrameParams(&zds->fParams, zds->headerBuffer, zds->lhSize);
-                if (ZSTD_isError(hSize))
+            {   size_t const hSize = ZSTD_getFrameHeader(&zds->fParams, zds->headerBuffer, zds->lhSize);
+                if (ZSTD_isError(hSize)) {
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
-                {   U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
+                    U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
                     if (legacyVersion) {
+                        const void* const dict = zds->ddict ? zds->ddict->dictContent : NULL;
+                        size_t const dictSize = zds->ddict ? zds->ddict->dictSize : 0;
+                        /* legacy support is incompatible with static dctx */
+                        if (zds->staticSize) return ERROR(memory_allocation);
                         CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext, zds->previousLegacyVersion, legacyVersion,
-                                                       zds->ddict->dict, zds->ddict->dictSize));
+                                                       dict, dictSize));
                         zds->legacyVersion = zds->previousLegacyVersion = legacyVersion;
                         return ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input);
                     } else {
                         return hSize; /* error */
-                }   }
+                    }
 #else
                     return hSize;
 #endif
+                }
                 if (hSize != 0) {   /* need more input */
                     size_t const toLoad = hSize - zds->lhSize;   /* if hSize!=0, hSize > zds->lhSize */
                     if (toLoad > (size_t)(iend-ip)) {   /* not enough input to load full header */
-                        memcpy(zds->headerBuffer + zds->lhSize, ip, iend-ip);
-                        zds->lhSize += iend-ip;
+                        if (iend-ip > 0) {
+                            memcpy(zds->headerBuffer + zds->lhSize, ip, iend-ip);
+                            zds->lhSize += iend-ip;
+                        }
                         input->pos = input->size;
                         return (MAX(ZSTD_frameHeaderSize_min, hSize) - zds->lhSize) + ZSTD_blockHeaderSize;   /* remaining header bytes + next block header */
                     }
+                    assert(ip != NULL);
                     memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
                     break;
             }   }
 
-            /* Consume header */
-            ZSTD_refDCtx(zds->dctx, zds->ddict->refContext);
-            {   size_t const h1Size = ZSTD_nextSrcSizeToDecompress(zds->dctx);  /* == ZSTD_frameHeaderSize_prefix */
-                CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer, h1Size));
-                {   size_t const h2Size = ZSTD_nextSrcSizeToDecompress(zds->dctx);
-                    CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer+h1Size, h2Size));
+            /* check for single-pass mode opportunity */
+            if (zds->fParams.frameContentSize && zds->fParams.windowSize /* skippable frame if == 0 */
+                && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) {
+                size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart);
+                if (cSize <= (size_t)(iend-istart)) {
+                    size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, zds->ddict);
+                    if (ZSTD_isError(decompressedSize)) return decompressedSize;
+                    ip = istart + cSize;
+                    op += decompressedSize;
+                    zds->expected = 0;
+                    zds->streamStage = zdss_init;
+                    someMoreWork = 0;
+                    break;
             }   }
 
+            /* Consume header (see ZSTDds_decodeFrameHeader) */
+            DEBUGLOG(4, "Consume header");
+            CHECK_F(ZSTD_decompressBegin_usingDDict(zds, zds->ddict));
+
+            if ((MEM_readLE32(zds->headerBuffer) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {  /* skippable frame */
+                zds->expected = MEM_readLE32(zds->headerBuffer + 4);
+                zds->stage = ZSTDds_skipFrame;
+            } else {
+                CHECK_F(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize));
+                zds->expected = ZSTD_blockHeaderSize;
+                zds->stage = ZSTDds_decodeBlockHeader;
+            }
+
+            /* control buffer memory usage */
+            DEBUGLOG(4, "Control max buffer memory usage");
             zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
-            if (zds->fParams.windowSize > zds->maxWindowSize) return ERROR(frameParameter_unsupported);
+            if (zds->fParams.windowSize > zds->maxWindowSize) return ERROR(frameParameter_windowTooLarge);
 
             /* Adapt buffer sizes to frame header instructions */
-            {   size_t const blockSize = MIN(zds->fParams.windowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX);
-                size_t const neededOutSize = zds->fParams.windowSize + blockSize;
+            {   size_t const blockSize = MIN(zds->fParams.windowSize, ZSTD_BLOCKSIZE_MAX);
+                size_t const neededOutSize = zds->fParams.windowSize + blockSize + WILDCOPY_OVERLENGTH * 2;
                 zds->blockSize = blockSize;
-                if (zds->inBuffSize < blockSize) {
-                    ZSTD_free(zds->inBuff, zds->customMem);
+                if ((zds->inBuffSize < blockSize) || (zds->outBuffSize < neededOutSize)) {
+                    size_t const bufferSize = blockSize + neededOutSize;
+                    DEBUGLOG(4, "inBuff  : from %u to %u",
+                                (U32)zds->inBuffSize, (U32)blockSize);
+                    DEBUGLOG(4, "outBuff : from %u to %u",
+                                (U32)zds->outBuffSize, (U32)neededOutSize);
+                    if (zds->staticSize) {  /* static DCtx */
+                        DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize);
+                        assert(zds->staticSize >= sizeof(ZSTD_DCtx));  /* controlled at init */
+                        if (bufferSize > zds->staticSize - sizeof(ZSTD_DCtx))
+                            return ERROR(memory_allocation);
+                    } else {
+                        ZSTD_free(zds->inBuff, zds->customMem);
+                        zds->inBuffSize = 0;
+                        zds->outBuffSize = 0;
+                        zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem);
+                        if (zds->inBuff == NULL) return ERROR(memory_allocation);
+                    }
                     zds->inBuffSize = blockSize;
-                    zds->inBuff = (char*)ZSTD_malloc(blockSize, zds->customMem);
-                    if (zds->inBuff == NULL) return ERROR(memory_allocation);
-                }
-                if (zds->outBuffSize < neededOutSize) {
-                    ZSTD_free(zds->outBuff, zds->customMem);
+                    zds->outBuff = zds->inBuff + zds->inBuffSize;
                     zds->outBuffSize = neededOutSize;
-                    zds->outBuff = (char*)ZSTD_malloc(neededOutSize, zds->customMem);
-                    if (zds->outBuff == NULL) return ERROR(memory_allocation);
             }   }
-            zds->stage = zdss_read;
+            zds->streamStage = zdss_read;
             /* pass-through */
 
         case zdss_read:
-            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx);
+            DEBUGLOG(5, "stage zdss_read");
+            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
+                DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize);
                 if (neededInSize==0) {  /* end of frame */
-                    zds->stage = zdss_init;
+                    zds->streamStage = zdss_init;
                     someMoreWork = 0;
                     break;
                 }
                 if ((size_t)(iend-ip) >= neededInSize) {  /* decode directly from src */
-                    const int isSkipFrame = ZSTD_isSkipFrame(zds->dctx);
-                    size_t const decodedSize = ZSTD_decompressContinue(zds->dctx,
+                    int const isSkipFrame = ZSTD_isSkipFrame(zds);
+                    size_t const decodedSize = ZSTD_decompressContinue(zds,
                         zds->outBuff + zds->outStart, (isSkipFrame ? 0 : zds->outBuffSize - zds->outStart),
                         ip, neededInSize);
                     if (ZSTD_isError(decodedSize)) return decodedSize;
                     ip += neededInSize;
                     if (!decodedSize && !isSkipFrame) break;   /* this was just a header */
                     zds->outEnd = zds->outStart + decodedSize;
-                    zds->stage = zdss_flush;
+                    zds->streamStage = zdss_flush;
                     break;
-                }
-                if (ip==iend) { someMoreWork = 0; break; }   /* no more input */
-                zds->stage = zdss_load;
-                /* pass-through */
-            }
+            }   }
+            if (ip==iend) { someMoreWork = 0; break; }   /* no more input */
+            zds->streamStage = zdss_load;
+            /* pass-through */
 
         case zdss_load:
-            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx);
+            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
                 size_t const toLoad = neededInSize - zds->inPos;   /* should always be <= remaining space within inBuff */
                 size_t loadedSize;
                 if (toLoad > zds->inBuffSize - zds->inPos) return ERROR(corruption_detected);   /* should never happen */
@@ -1705,17 +2438,17 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                 if (loadedSize < toLoad) { someMoreWork = 0; break; }   /* not enough input, wait for more */
 
                 /* decode loaded input */
-                {  const int isSkipFrame = ZSTD_isSkipFrame(zds->dctx);
-                   size_t const decodedSize = ZSTD_decompressContinue(zds->dctx,
+                {  const int isSkipFrame = ZSTD_isSkipFrame(zds);
+                   size_t const decodedSize = ZSTD_decompressContinue(zds,
                         zds->outBuff + zds->outStart, zds->outBuffSize - zds->outStart,
                         zds->inBuff, neededInSize);
                     if (ZSTD_isError(decodedSize)) return decodedSize;
                     zds->inPos = 0;   /* input is consumed */
-                    if (!decodedSize && !isSkipFrame) { zds->stage = zdss_read; break; }   /* this was just a header */
+                    if (!decodedSize && !isSkipFrame) { zds->streamStage = zdss_read; break; }   /* this was just a header */
                     zds->outEnd = zds->outStart +  decodedSize;
-                    zds->stage = zdss_flush;
-                    /* pass-through */
             }   }
+            zds->streamStage = zdss_flush;
+            /* pass-through */
 
         case zdss_flush:
             {   size_t const toFlushSize = zds->outEnd - zds->outStart;
@@ -1723,37 +2456,41 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                 op += flushedSize;
                 zds->outStart += flushedSize;
                 if (flushedSize == toFlushSize) {  /* flush completed */
-                    zds->stage = zdss_read;
+                    zds->streamStage = zdss_read;
                     if (zds->outStart + zds->blockSize > zds->outBuffSize)
                         zds->outStart = zds->outEnd = 0;
                     break;
-                }
-                /* cannot complete flush */
-                someMoreWork = 0;
-                break;
-            }
+            }   }
+            /* cannot complete flush */
+            someMoreWork = 0;
+            break;
+
         default: return ERROR(GENERIC);   /* impossible */
     }   }
 
     /* result */
     input->pos += (size_t)(ip-istart);
     output->pos += (size_t)(op-ostart);
-    {   size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds->dctx);
+    {   size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds);
         if (!nextSrcSizeHint) {   /* frame fully decoded */
             if (zds->outEnd == zds->outStart) {  /* output fully flushed */
                 if (zds->hostageByte) {
-                    if (input->pos >= input->size) { zds->stage = zdss_read; return 1; }  /* can't release hostage (not present) */
+                    if (input->pos >= input->size) {
+                        /* can't release hostage (not present) */
+                        zds->streamStage = zdss_read;
+                        return 1;
+                    }
                     input->pos++;  /* release hostage */
-                }
+                }   /* zds->hostageByte */
                 return 0;
-            }
+            }  /* zds->outEnd == zds->outStart */
             if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */
                 input->pos--;   /* note : pos > 0, otherwise, impossible to finish reading last block */
                 zds->hostageByte=1;
             }
             return 1;
-        }
-        nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds->dctx) == ZSTDnit_block);   /* preload header of next block */
+        }  /* nextSrcSizeHint==0 */
+        nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block);   /* preload header of next block */
         if (zds->inPos > nextSrcSizeHint) return ERROR(GENERIC);   /* should never happen */
         nextSrcSizeHint -= zds->inPos;   /* already loaded*/
         return nextSrcSizeHint;
diff --git a/contrib/libzstd/include/zstd/common/zbuff.h b/contrib/libzstd/include/zstd/deprecated/zbuff.h
similarity index 66%
rename from contrib/libzstd/include/zstd/common/zbuff.h
rename to contrib/libzstd/include/zstd/deprecated/zbuff.h
index f99e0619763..f62091976c7 100644
--- a/contrib/libzstd/include/zstd/common/zbuff.h
+++ b/contrib/libzstd/include/zstd/deprecated/zbuff.h
@@ -9,35 +9,52 @@
 
 /* ***************************************************************
 *  NOTES/WARNINGS
-*****************************************************************/
-/* The streaming API defined here will soon be deprecated by the
-* new one in 'zstd.h'; consider migrating towards newer streaming
-* API. See 'lib/README.md'.
-*****************************************************************/
+******************************************************************/
+/* The streaming API defined here is deprecated.
+ * Consider migrating towards ZSTD_compressStream() API in `zstd.h`
+ * See 'lib/README.md'.
+ *****************************************************************/
 
-#ifndef ZSTD_BUFFERED_H_23987
-#define ZSTD_BUFFERED_H_23987
 
 #if defined (__cplusplus)
 extern "C" {
 #endif
 
+#ifndef ZSTD_BUFFERED_H_23987
+#define ZSTD_BUFFERED_H_23987
+
 /* *************************************
 *  Dependencies
 ***************************************/
 #include <stddef.h>      /* size_t */
+#include "zstd.h"        /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */
 
 
 /* ***************************************************************
 *  Compiler specifics
 *****************************************************************/
-/* ZSTD_DLL_EXPORT :
-*  Enable exporting of functions when building a Windows DLL */
-#if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
-#  define ZSTDLIB_API __declspec(dllexport)
+/* Deprecation warnings */
+/* Should these warnings be a problem,
+   it is generally possible to disable them,
+   typically with -Wno-deprecated-declarations for gcc
+   or _CRT_SECURE_NO_WARNINGS in Visual.
+   Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS */
+#ifdef ZBUFF_DISABLE_DEPRECATE_WARNINGS
+#  define ZBUFF_DEPRECATED(message) ZSTDLIB_API  /* disable deprecation warnings */
 #else
-#  define ZSTDLIB_API
-#endif
+#  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
+#    define ZBUFF_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API
+#  elif (defined(__GNUC__) && (__GNUC__ >= 5)) || defined(__clang__)
+#    define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message)))
+#  elif defined(__GNUC__) && (__GNUC__ >= 3)
+#    define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated))
+#  elif defined(_MSC_VER)
+#    define ZBUFF_DEPRECATED(message) ZSTDLIB_API __declspec(deprecated(message))
+#  else
+#    pragma message("WARNING: You need to implement ZBUFF_DEPRECATED for this compiler")
+#    define ZBUFF_DEPRECATED(message) ZSTDLIB_API
+#  endif
+#endif /* ZBUFF_DISABLE_DEPRECATE_WARNINGS */
 
 
 /* *************************************
@@ -49,16 +66,16 @@ extern "C" {
 *  ZBUFF and ZSTD are 100% interoperable,
 *  frames created by one can be decoded by the other one */
 
-typedef struct ZBUFF_CCtx_s ZBUFF_CCtx;
-ZSTDLIB_API ZBUFF_CCtx* ZBUFF_createCCtx(void);
-ZSTDLIB_API size_t      ZBUFF_freeCCtx(ZBUFF_CCtx* cctx);
+typedef ZSTD_CStream ZBUFF_CCtx;
+ZBUFF_DEPRECATED("use ZSTD_createCStream") ZBUFF_CCtx* ZBUFF_createCCtx(void);
+ZBUFF_DEPRECATED("use ZSTD_freeCStream")   size_t      ZBUFF_freeCCtx(ZBUFF_CCtx* cctx);
 
-ZSTDLIB_API size_t ZBUFF_compressInit(ZBUFF_CCtx* cctx, int compressionLevel);
-ZSTDLIB_API size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
+ZBUFF_DEPRECATED("use ZSTD_initCStream")           size_t ZBUFF_compressInit(ZBUFF_CCtx* cctx, int compressionLevel);
+ZBUFF_DEPRECATED("use ZSTD_initCStream_usingDict") size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
 
-ZSTDLIB_API size_t ZBUFF_compressContinue(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr, const void* src, size_t* srcSizePtr);
-ZSTDLIB_API size_t ZBUFF_compressFlush(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr);
-ZSTDLIB_API size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr);
+ZBUFF_DEPRECATED("use ZSTD_compressStream") size_t ZBUFF_compressContinue(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr, const void* src, size_t* srcSizePtr);
+ZBUFF_DEPRECATED("use ZSTD_flushStream")    size_t ZBUFF_compressFlush(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr);
+ZBUFF_DEPRECATED("use ZSTD_endStream")      size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr);
 
 /*-*************************************************
 *  Streaming compression - howto
@@ -101,14 +118,14 @@ ZSTDLIB_API size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* dstCap
 * **************************************************/
 
 
-typedef struct ZBUFF_DCtx_s ZBUFF_DCtx;
-ZSTDLIB_API ZBUFF_DCtx* ZBUFF_createDCtx(void);
-ZSTDLIB_API size_t      ZBUFF_freeDCtx(ZBUFF_DCtx* dctx);
+typedef ZSTD_DStream ZBUFF_DCtx;
+ZBUFF_DEPRECATED("use ZSTD_createDStream") ZBUFF_DCtx* ZBUFF_createDCtx(void);
+ZBUFF_DEPRECATED("use ZSTD_freeDStream")   size_t      ZBUFF_freeDCtx(ZBUFF_DCtx* dctx);
 
-ZSTDLIB_API size_t ZBUFF_decompressInit(ZBUFF_DCtx* dctx);
-ZSTDLIB_API size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* dctx, const void* dict, size_t dictSize);
+ZBUFF_DEPRECATED("use ZSTD_initDStream")           size_t ZBUFF_decompressInit(ZBUFF_DCtx* dctx);
+ZBUFF_DEPRECATED("use ZSTD_initDStream_usingDict") size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* dctx, const void* dict, size_t dictSize);
 
-ZSTDLIB_API size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx,
+ZBUFF_DEPRECATED("use ZSTD_decompressStream") size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx,
                                             void* dst, size_t* dstCapacityPtr,
                                       const void* src, size_t* srcSizePtr);
 
@@ -141,18 +158,22 @@ ZSTDLIB_API size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx,
 /* *************************************
 *  Tool functions
 ***************************************/
-ZSTDLIB_API unsigned ZBUFF_isError(size_t errorCode);
-ZSTDLIB_API const char* ZBUFF_getErrorName(size_t errorCode);
+ZBUFF_DEPRECATED("use ZSTD_isError")      unsigned ZBUFF_isError(size_t errorCode);
+ZBUFF_DEPRECATED("use ZSTD_getErrorName") const char* ZBUFF_getErrorName(size_t errorCode);
 
 /** Functions below provide recommended buffer sizes for Compression or Decompression operations.
 *   These sizes are just hints, they tend to offer better latency */
-ZSTDLIB_API size_t ZBUFF_recommendedCInSize(void);
-ZSTDLIB_API size_t ZBUFF_recommendedCOutSize(void);
-ZSTDLIB_API size_t ZBUFF_recommendedDInSize(void);
-ZSTDLIB_API size_t ZBUFF_recommendedDOutSize(void);
+ZBUFF_DEPRECATED("use ZSTD_CStreamInSize")  size_t ZBUFF_recommendedCInSize(void);
+ZBUFF_DEPRECATED("use ZSTD_CStreamOutSize") size_t ZBUFF_recommendedCOutSize(void);
+ZBUFF_DEPRECATED("use ZSTD_DStreamInSize")  size_t ZBUFF_recommendedDInSize(void);
+ZBUFF_DEPRECATED("use ZSTD_DStreamOutSize") size_t ZBUFF_recommendedDOutSize(void);
+
+#endif  /* ZSTD_BUFFERED_H_23987 */
 
 
 #ifdef ZBUFF_STATIC_LINKING_ONLY
+#ifndef ZBUFF_STATIC_H_30298098432
+#define ZBUFF_STATIC_H_30298098432
 
 /* ====================================================================================
  * The definitions in this section are considered experimental.
@@ -169,23 +190,23 @@ ZSTDLIB_API size_t ZBUFF_recommendedDOutSize(void);
 /*--- Custom memory allocator ---*/
 /*! ZBUFF_createCCtx_advanced() :
  *  Create a ZBUFF compression context using external alloc and free functions */
-ZSTDLIB_API ZBUFF_CCtx* ZBUFF_createCCtx_advanced(ZSTD_customMem customMem);
+ZBUFF_DEPRECATED("use ZSTD_createCStream_advanced") ZBUFF_CCtx* ZBUFF_createCCtx_advanced(ZSTD_customMem customMem);
 
 /*! ZBUFF_createDCtx_advanced() :
  *  Create a ZBUFF decompression context using external alloc and free functions */
-ZSTDLIB_API ZBUFF_DCtx* ZBUFF_createDCtx_advanced(ZSTD_customMem customMem);
+ZBUFF_DEPRECATED("use ZSTD_createDStream_advanced") ZBUFF_DCtx* ZBUFF_createDCtx_advanced(ZSTD_customMem customMem);
 
 
 /*--- Advanced Streaming Initialization ---*/
-ZSTDLIB_API size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
+ZBUFF_DEPRECATED("use ZSTD_initDStream_usingDict") size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
                                                const void* dict, size_t dictSize,
                                                ZSTD_parameters params, unsigned long long pledgedSrcSize);
 
-#endif /* ZBUFF_STATIC_LINKING_ONLY */
+
+#endif    /* ZBUFF_STATIC_H_30298098432 */
+#endif    /* ZBUFF_STATIC_LINKING_ONLY */
 
 
 #if defined (__cplusplus)
 }
 #endif
-
-#endif  /* ZSTD_BUFFERED_H_23987 */
diff --git a/contrib/libzstd/include/zstd/deprecated/zbuff_common.c b/contrib/libzstd/include/zstd/deprecated/zbuff_common.c
new file mode 100644
index 00000000000..9fff6eb2095
--- /dev/null
+++ b/contrib/libzstd/include/zstd/deprecated/zbuff_common.c
@@ -0,0 +1,26 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include "error_private.h"
+#include "zbuff.h"
+
+/*-****************************************
+*  ZBUFF Error Management  (deprecated)
+******************************************/
+
+/*! ZBUFF_isError() :
+*   tells if a return value is an error code */
+unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); }
+/*! ZBUFF_getErrorName() :
+*   provides error code string from function result (useful for debugging) */
+const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
+
diff --git a/contrib/libzstd/include/zstd/deprecated/zbuff_compress.c b/contrib/libzstd/include/zstd/deprecated/zbuff_compress.c
new file mode 100644
index 00000000000..5a37a0027ae
--- /dev/null
+++ b/contrib/libzstd/include/zstd/deprecated/zbuff_compress.c
@@ -0,0 +1,145 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+
+
+/* *************************************
+*  Dependencies
+***************************************/
+#define ZBUFF_STATIC_LINKING_ONLY
+#include "zbuff.h"
+
+
+/*-***********************************************************
+*  Streaming compression
+*
+*  A ZBUFF_CCtx object is required to track streaming operation.
+*  Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources.
+*  Use ZBUFF_compressInit() to start a new compression operation.
+*  ZBUFF_CCtx objects can be reused multiple times.
+*
+*  Use ZBUFF_compressContinue() repetitively to consume your input.
+*  *srcSizePtr and *dstCapacityPtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input.
+*  The content of dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change dst .
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  ZBUFF_compressFlush() can be used to instruct ZBUFF to compress and output whatever remains within its buffer.
+*  Note that it will not output more than *dstCapacityPtr.
+*  Therefore, some content might still be left into its internal buffer if dst buffer is too small.
+*  @return : nb of bytes still present into internal buffer (0 if it's empty)
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  ZBUFF_compressEnd() instructs to finish a frame.
+*  It will perform a flush and write frame epilogue.
+*  Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *dstCapacityPtr is too small.
+*  @return : nb of bytes still present into internal buffer (0 if it's empty)
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory)
+*  input : ZSTD_BLOCKSIZE_MAX (128 KB), internal unit size, it improves latency to use this value.
+*  output : ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + ZBUFF_endFrameSize : ensures it's always possible to write/flush/end a full block at best speed.
+* ***********************************************************/
+
+ZBUFF_CCtx* ZBUFF_createCCtx(void)
+{
+    return ZSTD_createCStream();
+}
+
+ZBUFF_CCtx* ZBUFF_createCCtx_advanced(ZSTD_customMem customMem)
+{
+    return ZSTD_createCStream_advanced(customMem);
+}
+
+size_t ZBUFF_freeCCtx(ZBUFF_CCtx* zbc)
+{
+    return ZSTD_freeCStream(zbc);
+}
+
+
+/* ======   Initialization   ====== */
+
+size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
+                                   const void* dict, size_t dictSize,
+                                   ZSTD_parameters params, unsigned long long pledgedSrcSize)
+{
+    return ZSTD_initCStream_advanced(zbc, dict, dictSize, params, pledgedSrcSize);
+}
+
+
+size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel)
+{
+    return ZSTD_initCStream_usingDict(zbc, dict, dictSize, compressionLevel);
+}
+
+size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel)
+{
+    return ZSTD_initCStream(zbc, compressionLevel);
+}
+
+/* ======   Compression   ====== */
+
+
+size_t ZBUFF_compressContinue(ZBUFF_CCtx* zbc,
+                              void* dst, size_t* dstCapacityPtr,
+                        const void* src, size_t* srcSizePtr)
+{
+    size_t result;
+    ZSTD_outBuffer outBuff;
+    ZSTD_inBuffer inBuff;
+    outBuff.dst = dst;
+    outBuff.pos = 0;
+    outBuff.size = *dstCapacityPtr;
+    inBuff.src = src;
+    inBuff.pos = 0;
+    inBuff.size = *srcSizePtr;
+    result = ZSTD_compressStream(zbc, &outBuff, &inBuff);
+    *dstCapacityPtr = outBuff.pos;
+    *srcSizePtr = inBuff.pos;
+    return result;
+}
+
+
+
+/* ======   Finalize   ====== */
+
+size_t ZBUFF_compressFlush(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr)
+{
+    size_t result;
+    ZSTD_outBuffer outBuff;
+    outBuff.dst = dst;
+    outBuff.pos = 0;
+    outBuff.size = *dstCapacityPtr;
+    result = ZSTD_flushStream(zbc, &outBuff);
+    *dstCapacityPtr = outBuff.pos;
+    return result;
+}
+
+
+size_t ZBUFF_compressEnd(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr)
+{
+    size_t result;
+    ZSTD_outBuffer outBuff;
+    outBuff.dst = dst;
+    outBuff.pos = 0;
+    outBuff.size = *dstCapacityPtr;
+    result = ZSTD_endStream(zbc, &outBuff);
+    *dstCapacityPtr = outBuff.pos;
+    return result;
+}
+
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+size_t ZBUFF_recommendedCInSize(void)  { return ZSTD_CStreamInSize(); }
+size_t ZBUFF_recommendedCOutSize(void) { return ZSTD_CStreamOutSize(); }
diff --git a/contrib/libzstd/include/zstd/deprecated/zbuff_decompress.c b/contrib/libzstd/include/zstd/deprecated/zbuff_decompress.c
new file mode 100644
index 00000000000..d9c155e08eb
--- /dev/null
+++ b/contrib/libzstd/include/zstd/deprecated/zbuff_decompress.c
@@ -0,0 +1,74 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+
+
+/* *************************************
+*  Dependencies
+***************************************/
+#define ZBUFF_STATIC_LINKING_ONLY
+#include "zbuff.h"
+
+
+ZBUFF_DCtx* ZBUFF_createDCtx(void)
+{
+    return ZSTD_createDStream();
+}
+
+ZBUFF_DCtx* ZBUFF_createDCtx_advanced(ZSTD_customMem customMem)
+{
+    return ZSTD_createDStream_advanced(customMem);
+}
+
+size_t ZBUFF_freeDCtx(ZBUFF_DCtx* zbd)
+{
+    return ZSTD_freeDStream(zbd);
+}
+
+
+/* *** Initialization *** */
+
+size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* zbd, const void* dict, size_t dictSize)
+{
+    return ZSTD_initDStream_usingDict(zbd, dict, dictSize);
+}
+
+size_t ZBUFF_decompressInit(ZBUFF_DCtx* zbd)
+{
+    return ZSTD_initDStream(zbd);
+}
+
+
+/* *** Decompression *** */
+
+size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbd,
+                                void* dst, size_t* dstCapacityPtr,
+                          const void* src, size_t* srcSizePtr)
+{
+    ZSTD_outBuffer outBuff;
+    ZSTD_inBuffer inBuff;
+    size_t result;
+    outBuff.dst  = dst;
+    outBuff.pos  = 0;
+    outBuff.size = *dstCapacityPtr;
+    inBuff.src  = src;
+    inBuff.pos  = 0;
+    inBuff.size = *srcSizePtr;
+    result = ZSTD_decompressStream(zbd, &outBuff, &inBuff);
+    *dstCapacityPtr = outBuff.pos;
+    *srcSizePtr = inBuff.pos;
+    return result;
+}
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+size_t ZBUFF_recommendedDInSize(void)  { return ZSTD_DStreamInSize(); }
+size_t ZBUFF_recommendedDOutSize(void) { return ZSTD_DStreamOutSize(); }
diff --git a/contrib/libzstd/include/zstd/dictBuilder/cover.c b/contrib/libzstd/include/zstd/dictBuilder/cover.c
new file mode 100644
index 00000000000..06c1b9fadb7
--- /dev/null
+++ b/contrib/libzstd/include/zstd/dictBuilder/cover.c
@@ -0,0 +1,1036 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+/* *****************************************************************************
+ * Constructs a dictionary using a heuristic based on the following paper:
+ *
+ * Liao, Petri, Moffat, Wirth
+ * Effective Construction of Relative Lempel-Ziv Dictionaries
+ * Published in WWW 2016.
+ *
+ * Adapted from code originally written by @ot (Giuseppe Ottaviano).
+ ******************************************************************************/
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include <stdio.h>  /* fprintf */
+#include <stdlib.h> /* malloc, free, qsort */
+#include <string.h> /* memset */
+#include <time.h>   /* clock */
+
+#include "mem.h" /* read */
+#include "pool.h"
+#include "threading.h"
+#include "zstd_internal.h" /* includes zstd.h */
+#ifndef ZDICT_STATIC_LINKING_ONLY
+#define ZDICT_STATIC_LINKING_ONLY
+#endif
+#include "zdict.h"
+
+/*-*************************************
+*  Constants
+***************************************/
+#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
+
+/*-*************************************
+*  Console display
+***************************************/
+static int g_displayLevel = 2;
+#define DISPLAY(...)                                                           \
+  {                                                                            \
+    fprintf(stderr, __VA_ARGS__);                                              \
+    fflush(stderr);                                                            \
+  }
+#define LOCALDISPLAYLEVEL(displayLevel, l, ...)                                \
+  if (displayLevel >= l) {                                                     \
+    DISPLAY(__VA_ARGS__);                                                      \
+  } /* 0 : no display;   1: errors;   2: default;  3: details;  4: debug */
+#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
+
+#define LOCALDISPLAYUPDATE(displayLevel, l, ...)                               \
+  if (displayLevel >= l) {                                                     \
+    if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) {             \
+      g_time = clock();                                                        \
+      DISPLAY(__VA_ARGS__);                                                    \
+    }                                                                          \
+  }
+#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
+static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
+static clock_t g_time = 0;
+
+/*-*************************************
+* Hash table
+***************************************
+* A small specialized hash map for storing activeDmers.
+* The map does not resize, so if it becomes full it will loop forever.
+* Thus, the map must be large enough to store every value.
+* The map implements linear probing and keeps its load less than 0.5.
+*/
+
+#define MAP_EMPTY_VALUE ((U32)-1)
+typedef struct COVER_map_pair_t_s {
+  U32 key;
+  U32 value;
+} COVER_map_pair_t;
+
+typedef struct COVER_map_s {
+  COVER_map_pair_t *data;
+  U32 sizeLog;
+  U32 size;
+  U32 sizeMask;
+} COVER_map_t;
+
+/**
+ * Clear the map.
+ */
+static void COVER_map_clear(COVER_map_t *map) {
+  memset(map->data, MAP_EMPTY_VALUE, map->size * sizeof(COVER_map_pair_t));
+}
+
+/**
+ * Initializes a map of the given size.
+ * Returns 1 on success and 0 on failure.
+ * The map must be destroyed with COVER_map_destroy().
+ * The map is only guaranteed to be large enough to hold size elements.
+ */
+static int COVER_map_init(COVER_map_t *map, U32 size) {
+  map->sizeLog = ZSTD_highbit32(size) + 2;
+  map->size = (U32)1 << map->sizeLog;
+  map->sizeMask = map->size - 1;
+  map->data = (COVER_map_pair_t *)malloc(map->size * sizeof(COVER_map_pair_t));
+  if (!map->data) {
+    map->sizeLog = 0;
+    map->size = 0;
+    return 0;
+  }
+  COVER_map_clear(map);
+  return 1;
+}
+
+/**
+ * Internal hash function
+ */
+static const U32 prime4bytes = 2654435761U;
+static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
+  return (key * prime4bytes) >> (32 - map->sizeLog);
+}
+
+/**
+ * Helper function that returns the index that a key should be placed into.
+ */
+static U32 COVER_map_index(COVER_map_t *map, U32 key) {
+  const U32 hash = COVER_map_hash(map, key);
+  U32 i;
+  for (i = hash;; i = (i + 1) & map->sizeMask) {
+    COVER_map_pair_t *pos = &map->data[i];
+    if (pos->value == MAP_EMPTY_VALUE) {
+      return i;
+    }
+    if (pos->key == key) {
+      return i;
+    }
+  }
+}
+
+/**
+ * Returns the pointer to the value for key.
+ * If key is not in the map, it is inserted and the value is set to 0.
+ * The map must not be full.
+ */
+static U32 *COVER_map_at(COVER_map_t *map, U32 key) {
+  COVER_map_pair_t *pos = &map->data[COVER_map_index(map, key)];
+  if (pos->value == MAP_EMPTY_VALUE) {
+    pos->key = key;
+    pos->value = 0;
+  }
+  return &pos->value;
+}
+
+/**
+ * Deletes key from the map if present.
+ */
+static void COVER_map_remove(COVER_map_t *map, U32 key) {
+  U32 i = COVER_map_index(map, key);
+  COVER_map_pair_t *del = &map->data[i];
+  U32 shift = 1;
+  if (del->value == MAP_EMPTY_VALUE) {
+    return;
+  }
+  for (i = (i + 1) & map->sizeMask;; i = (i + 1) & map->sizeMask) {
+    COVER_map_pair_t *const pos = &map->data[i];
+    /* If the position is empty we are done */
+    if (pos->value == MAP_EMPTY_VALUE) {
+      del->value = MAP_EMPTY_VALUE;
+      return;
+    }
+    /* If pos can be moved to del do so */
+    if (((i - COVER_map_hash(map, pos->key)) & map->sizeMask) >= shift) {
+      del->key = pos->key;
+      del->value = pos->value;
+      del = pos;
+      shift = 1;
+    } else {
+      ++shift;
+    }
+  }
+}
+
+/**
+ * Destroyes a map that is inited with COVER_map_init().
+ */
+static void COVER_map_destroy(COVER_map_t *map) {
+  if (map->data) {
+    free(map->data);
+  }
+  map->data = NULL;
+  map->size = 0;
+}
+
+/*-*************************************
+* Context
+***************************************/
+
+typedef struct {
+  const BYTE *samples;
+  size_t *offsets;
+  const size_t *samplesSizes;
+  size_t nbSamples;
+  U32 *suffix;
+  size_t suffixSize;
+  U32 *freqs;
+  U32 *dmerAt;
+  unsigned d;
+} COVER_ctx_t;
+
+/* We need a global context for qsort... */
+static COVER_ctx_t *g_ctx = NULL;
+
+/*-*************************************
+*  Helper functions
+***************************************/
+
+/**
+ * Returns the sum of the sample sizes.
+ */
+static size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
+  size_t sum = 0;
+  size_t i;
+  for (i = 0; i < nbSamples; ++i) {
+    sum += samplesSizes[i];
+  }
+  return sum;
+}
+
+/**
+ * Returns -1 if the dmer at lp is less than the dmer at rp.
+ * Return 0 if the dmers at lp and rp are equal.
+ * Returns 1 if the dmer at lp is greater than the dmer at rp.
+ */
+static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) {
+  U32 const lhs = *(U32 const *)lp;
+  U32 const rhs = *(U32 const *)rp;
+  return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d);
+}
+/**
+ * Faster version for d <= 8.
+ */
+static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
+  U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1);
+  U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask;
+  U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask;
+  if (lhs < rhs) {
+    return -1;
+  }
+  return (lhs > rhs);
+}
+
+/**
+ * Same as COVER_cmp() except ties are broken by pointer value
+ * NOTE: g_ctx must be set to call this function.  A global is required because
+ * qsort doesn't take an opaque pointer.
+ */
+static int COVER_strict_cmp(const void *lp, const void *rp) {
+  int result = COVER_cmp(g_ctx, lp, rp);
+  if (result == 0) {
+    result = lp < rp ? -1 : 1;
+  }
+  return result;
+}
+/**
+ * Faster version for d <= 8.
+ */
+static int COVER_strict_cmp8(const void *lp, const void *rp) {
+  int result = COVER_cmp8(g_ctx, lp, rp);
+  if (result == 0) {
+    result = lp < rp ? -1 : 1;
+  }
+  return result;
+}
+
+/**
+ * Returns the first pointer in [first, last) whose element does not compare
+ * less than value.  If no such element exists it returns last.
+ */
+static const size_t *COVER_lower_bound(const size_t *first, const size_t *last,
+                                       size_t value) {
+  size_t count = last - first;
+  while (count != 0) {
+    size_t step = count / 2;
+    const size_t *ptr = first;
+    ptr += step;
+    if (*ptr < value) {
+      first = ++ptr;
+      count -= step + 1;
+    } else {
+      count = step;
+    }
+  }
+  return first;
+}
+
+/**
+ * Generic groupBy function.
+ * Groups an array sorted by cmp into groups with equivalent values.
+ * Calls grp for each group.
+ */
+static void
+COVER_groupBy(const void *data, size_t count, size_t size, COVER_ctx_t *ctx,
+              int (*cmp)(COVER_ctx_t *, const void *, const void *),
+              void (*grp)(COVER_ctx_t *, const void *, const void *)) {
+  const BYTE *ptr = (const BYTE *)data;
+  size_t num = 0;
+  while (num < count) {
+    const BYTE *grpEnd = ptr + size;
+    ++num;
+    while (num < count && cmp(ctx, ptr, grpEnd) == 0) {
+      grpEnd += size;
+      ++num;
+    }
+    grp(ctx, ptr, grpEnd);
+    ptr = grpEnd;
+  }
+}
+
+/*-*************************************
+*  Cover functions
+***************************************/
+
+/**
+ * Called on each group of positions with the same dmer.
+ * Counts the frequency of each dmer and saves it in the suffix array.
+ * Fills `ctx->dmerAt`.
+ */
+static void COVER_group(COVER_ctx_t *ctx, const void *group,
+                        const void *groupEnd) {
+  /* The group consists of all the positions with the same first d bytes. */
+  const U32 *grpPtr = (const U32 *)group;
+  const U32 *grpEnd = (const U32 *)groupEnd;
+  /* The dmerId is how we will reference this dmer.
+   * This allows us to map the whole dmer space to a much smaller space, the
+   * size of the suffix array.
+   */
+  const U32 dmerId = (U32)(grpPtr - ctx->suffix);
+  /* Count the number of samples this dmer shows up in */
+  U32 freq = 0;
+  /* Details */
+  const size_t *curOffsetPtr = ctx->offsets;
+  const size_t *offsetsEnd = ctx->offsets + ctx->nbSamples;
+  /* Once *grpPtr >= curSampleEnd this occurrence of the dmer is in a
+   * different sample than the last.
+   */
+  size_t curSampleEnd = ctx->offsets[0];
+  for (; grpPtr != grpEnd; ++grpPtr) {
+    /* Save the dmerId for this position so we can get back to it. */
+    ctx->dmerAt[*grpPtr] = dmerId;
+    /* Dictionaries only help for the first reference to the dmer.
+     * After that zstd can reference the match from the previous reference.
+     * So only count each dmer once for each sample it is in.
+     */
+    if (*grpPtr < curSampleEnd) {
+      continue;
+    }
+    freq += 1;
+    /* Binary search to find the end of the sample *grpPtr is in.
+     * In the common case that grpPtr + 1 == grpEnd we can skip the binary
+     * search because the loop is over.
+     */
+    if (grpPtr + 1 != grpEnd) {
+      const size_t *sampleEndPtr =
+          COVER_lower_bound(curOffsetPtr, offsetsEnd, *grpPtr);
+      curSampleEnd = *sampleEndPtr;
+      curOffsetPtr = sampleEndPtr + 1;
+    }
+  }
+  /* At this point we are never going to look at this segment of the suffix
+   * array again.  We take advantage of this fact to save memory.
+   * We store the frequency of the dmer in the first position of the group,
+   * which is dmerId.
+   */
+  ctx->suffix[dmerId] = freq;
+}
+
+/**
+ * A segment is a range in the source as well as the score of the segment.
+ */
+typedef struct {
+  U32 begin;
+  U32 end;
+  double score;
+} COVER_segment_t;
+
+/**
+ * Selects the best segment in an epoch.
+ * Segments of are scored according to the function:
+ *
+ * Let F(d) be the frequency of dmer d.
+ * Let S_i be the dmer at position i of segment S which has length k.
+ *
+ *     Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
+ *
+ * Once the dmer d is in the dictionay we set F(d) = 0.
+ */
+static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
+                                           COVER_map_t *activeDmers, U32 begin,
+                                           U32 end,
+                                           ZDICT_cover_params_t parameters) {
+  /* Constants */
+  const U32 k = parameters.k;
+  const U32 d = parameters.d;
+  const U32 dmersInK = k - d + 1;
+  /* Try each segment (activeSegment) and save the best (bestSegment) */
+  COVER_segment_t bestSegment = {0, 0, 0};
+  COVER_segment_t activeSegment;
+  /* Reset the activeDmers in the segment */
+  COVER_map_clear(activeDmers);
+  /* The activeSegment starts at the beginning of the epoch. */
+  activeSegment.begin = begin;
+  activeSegment.end = begin;
+  activeSegment.score = 0;
+  /* Slide the activeSegment through the whole epoch.
+   * Save the best segment in bestSegment.
+   */
+  while (activeSegment.end < end) {
+    /* The dmerId for the dmer at the next position */
+    U32 newDmer = ctx->dmerAt[activeSegment.end];
+    /* The entry in activeDmers for this dmerId */
+    U32 *newDmerOcc = COVER_map_at(activeDmers, newDmer);
+    /* If the dmer isn't already present in the segment add its score. */
+    if (*newDmerOcc == 0) {
+      /* The paper suggest using the L-0.5 norm, but experiments show that it
+       * doesn't help.
+       */
+      activeSegment.score += freqs[newDmer];
+    }
+    /* Add the dmer to the segment */
+    activeSegment.end += 1;
+    *newDmerOcc += 1;
+
+    /* If the window is now too large, drop the first position */
+    if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
+      U32 delDmer = ctx->dmerAt[activeSegment.begin];
+      U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer);
+      activeSegment.begin += 1;
+      *delDmerOcc -= 1;
+      /* If this is the last occurence of the dmer, subtract its score */
+      if (*delDmerOcc == 0) {
+        COVER_map_remove(activeDmers, delDmer);
+        activeSegment.score -= freqs[delDmer];
+      }
+    }
+
+    /* If this segment is the best so far save it */
+    if (activeSegment.score > bestSegment.score) {
+      bestSegment = activeSegment;
+    }
+  }
+  {
+    /* Trim off the zero frequency head and tail from the segment. */
+    U32 newBegin = bestSegment.end;
+    U32 newEnd = bestSegment.begin;
+    U32 pos;
+    for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
+      U32 freq = freqs[ctx->dmerAt[pos]];
+      if (freq != 0) {
+        newBegin = MIN(newBegin, pos);
+        newEnd = pos + 1;
+      }
+    }
+    bestSegment.begin = newBegin;
+    bestSegment.end = newEnd;
+  }
+  {
+    /* Zero out the frequency of each dmer covered by the chosen segment. */
+    U32 pos;
+    for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
+      freqs[ctx->dmerAt[pos]] = 0;
+    }
+  }
+  return bestSegment;
+}
+
+/**
+ * Check the validity of the parameters.
+ * Returns non-zero if the parameters are valid and 0 otherwise.
+ */
+static int COVER_checkParameters(ZDICT_cover_params_t parameters) {
+  /* k and d are required parameters */
+  if (parameters.d == 0 || parameters.k == 0) {
+    return 0;
+  }
+  /* d <= k */
+  if (parameters.d > parameters.k) {
+    return 0;
+  }
+  return 1;
+}
+
+/**
+ * Clean up a context initialized with `COVER_ctx_init()`.
+ */
+static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
+  if (!ctx) {
+    return;
+  }
+  if (ctx->suffix) {
+    free(ctx->suffix);
+    ctx->suffix = NULL;
+  }
+  if (ctx->freqs) {
+    free(ctx->freqs);
+    ctx->freqs = NULL;
+  }
+  if (ctx->dmerAt) {
+    free(ctx->dmerAt);
+    ctx->dmerAt = NULL;
+  }
+  if (ctx->offsets) {
+    free(ctx->offsets);
+    ctx->offsets = NULL;
+  }
+}
+
+/**
+ * Prepare a context for dictionary building.
+ * The context is only dependent on the parameter `d` and can used multiple
+ * times.
+ * Returns 1 on success or zero on error.
+ * The context must be destroyed with `COVER_ctx_destroy()`.
+ */
+static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
+                          const size_t *samplesSizes, unsigned nbSamples,
+                          unsigned d) {
+  const BYTE *const samples = (const BYTE *)samplesBuffer;
+  const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
+  /* Checks */
+  if (totalSamplesSize < MAX(d, sizeof(U64)) ||
+      totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
+    DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n",
+                 (COVER_MAX_SAMPLES_SIZE >> 20));
+    return 0;
+  }
+  /* Zero the context */
+  memset(ctx, 0, sizeof(*ctx));
+  DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbSamples,
+               (U32)totalSamplesSize);
+  ctx->samples = samples;
+  ctx->samplesSizes = samplesSizes;
+  ctx->nbSamples = nbSamples;
+  /* Partial suffix array */
+  ctx->suffixSize = totalSamplesSize - MAX(d, sizeof(U64)) + 1;
+  ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
+  /* Maps index to the dmerID */
+  ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
+  /* The offsets of each file */
+  ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t));
+  if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
+    DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
+    COVER_ctx_destroy(ctx);
+    return 0;
+  }
+  ctx->freqs = NULL;
+  ctx->d = d;
+
+  /* Fill offsets from the samlesSizes */
+  {
+    U32 i;
+    ctx->offsets[0] = 0;
+    for (i = 1; i <= nbSamples; ++i) {
+      ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];
+    }
+  }
+  DISPLAYLEVEL(2, "Constructing partial suffix array\n");
+  {
+    /* suffix is a partial suffix array.
+     * It only sorts suffixes by their first parameters.d bytes.
+     * The sort is stable, so each dmer group is sorted by position in input.
+     */
+    U32 i;
+    for (i = 0; i < ctx->suffixSize; ++i) {
+      ctx->suffix[i] = i;
+    }
+    /* qsort doesn't take an opaque pointer, so pass as a global */
+    g_ctx = ctx;
+    qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
+          (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
+  }
+  DISPLAYLEVEL(2, "Computing frequencies\n");
+  /* For each dmer group (group of positions with the same first d bytes):
+   * 1. For each position we set dmerAt[position] = dmerID.  The dmerID is
+   *    (groupBeginPtr - suffix).  This allows us to go from position to
+   *    dmerID so we can look up values in freq.
+   * 2. We calculate how many samples the dmer occurs in and save it in
+   *    freqs[dmerId].
+   */
+  COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx,
+                (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
+  ctx->freqs = ctx->suffix;
+  ctx->suffix = NULL;
+  return 1;
+}
+
+/**
+ * Given the prepared context build the dictionary.
+ */
+static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
+                                    COVER_map_t *activeDmers, void *dictBuffer,
+                                    size_t dictBufferCapacity,
+                                    ZDICT_cover_params_t parameters) {
+  BYTE *const dict = (BYTE *)dictBuffer;
+  size_t tail = dictBufferCapacity;
+  /* Divide the data up into epochs of equal size.
+   * We will select at least one segment from each epoch.
+   */
+  const U32 epochs = (U32)(dictBufferCapacity / parameters.k);
+  const U32 epochSize = (U32)(ctx->suffixSize / epochs);
+  size_t epoch;
+  DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs,
+               epochSize);
+  /* Loop through the epochs until there are no more segments or the dictionary
+   * is full.
+   */
+  for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
+    const U32 epochBegin = (U32)(epoch * epochSize);
+    const U32 epochEnd = epochBegin + epochSize;
+    size_t segmentSize;
+    /* Select a segment */
+    COVER_segment_t segment = COVER_selectSegment(
+        ctx, freqs, activeDmers, epochBegin, epochEnd, parameters);
+    /* Trim the segment if necessary and if it is empty then we are done */
+    segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
+    if (segmentSize == 0) {
+      break;
+    }
+    /* We fill the dictionary from the back to allow the best segments to be
+     * referenced with the smallest offsets.
+     */
+    tail -= segmentSize;
+    memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
+    DISPLAYUPDATE(
+        2, "\r%u%%       ",
+        (U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
+  }
+  DISPLAYLEVEL(2, "\r%79s\r", "");
+  return tail;
+}
+
+ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
+    void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
+    const size_t *samplesSizes, unsigned nbSamples,
+    ZDICT_cover_params_t parameters) {
+  BYTE *const dict = (BYTE *)dictBuffer;
+  COVER_ctx_t ctx;
+  COVER_map_t activeDmers;
+  /* Checks */
+  if (!COVER_checkParameters(parameters)) {
+    DISPLAYLEVEL(1, "Cover parameters incorrect\n");
+    return ERROR(GENERIC);
+  }
+  if (nbSamples == 0) {
+    DISPLAYLEVEL(1, "Cover must have at least one input file\n");
+    return ERROR(GENERIC);
+  }
+  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
+    DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
+                 ZDICT_DICTSIZE_MIN);
+    return ERROR(dstSize_tooSmall);
+  }
+  /* Initialize global data */
+  g_displayLevel = parameters.zParams.notificationLevel;
+  /* Initialize context and activeDmers */
+  if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
+                      parameters.d)) {
+    return ERROR(GENERIC);
+  }
+  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
+    DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
+    COVER_ctx_destroy(&ctx);
+    return ERROR(GENERIC);
+  }
+
+  DISPLAYLEVEL(2, "Building dictionary\n");
+  {
+    const size_t tail =
+        COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer,
+                              dictBufferCapacity, parameters);
+    const size_t dictionarySize = ZDICT_finalizeDictionary(
+        dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
+        samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
+    if (!ZSTD_isError(dictionarySize)) {
+      DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
+                   (U32)dictionarySize);
+    }
+    COVER_ctx_destroy(&ctx);
+    COVER_map_destroy(&activeDmers);
+    return dictionarySize;
+  }
+}
+
+/**
+ * COVER_best_t is used for two purposes:
+ * 1. Synchronizing threads.
+ * 2. Saving the best parameters and dictionary.
+ *
+ * All of the methods except COVER_best_init() are thread safe if zstd is
+ * compiled with multithreaded support.
+ */
+typedef struct COVER_best_s {
+  pthread_mutex_t mutex;
+  pthread_cond_t cond;
+  size_t liveJobs;
+  void *dict;
+  size_t dictSize;
+  ZDICT_cover_params_t parameters;
+  size_t compressedSize;
+} COVER_best_t;
+
+/**
+ * Initialize the `COVER_best_t`.
+ */
+static void COVER_best_init(COVER_best_t *best) {
+  if (!best) {
+    return;
+  }
+  pthread_mutex_init(&best->mutex, NULL);
+  pthread_cond_init(&best->cond, NULL);
+  best->liveJobs = 0;
+  best->dict = NULL;
+  best->dictSize = 0;
+  best->compressedSize = (size_t)-1;
+  memset(&best->parameters, 0, sizeof(best->parameters));
+}
+
+/**
+ * Wait until liveJobs == 0.
+ */
+static void COVER_best_wait(COVER_best_t *best) {
+  if (!best) {
+    return;
+  }
+  pthread_mutex_lock(&best->mutex);
+  while (best->liveJobs != 0) {
+    pthread_cond_wait(&best->cond, &best->mutex);
+  }
+  pthread_mutex_unlock(&best->mutex);
+}
+
+/**
+ * Call COVER_best_wait() and then destroy the COVER_best_t.
+ */
+static void COVER_best_destroy(COVER_best_t *best) {
+  if (!best) {
+    return;
+  }
+  COVER_best_wait(best);
+  if (best->dict) {
+    free(best->dict);
+  }
+  pthread_mutex_destroy(&best->mutex);
+  pthread_cond_destroy(&best->cond);
+}
+
+/**
+ * Called when a thread is about to be launched.
+ * Increments liveJobs.
+ */
+static void COVER_best_start(COVER_best_t *best) {
+  if (!best) {
+    return;
+  }
+  pthread_mutex_lock(&best->mutex);
+  ++best->liveJobs;
+  pthread_mutex_unlock(&best->mutex);
+}
+
+/**
+ * Called when a thread finishes executing, both on error or success.
+ * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
+ * If this dictionary is the best so far save it and its parameters.
+ */
+static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
+                              ZDICT_cover_params_t parameters, void *dict,
+                              size_t dictSize) {
+  if (!best) {
+    return;
+  }
+  {
+    size_t liveJobs;
+    pthread_mutex_lock(&best->mutex);
+    --best->liveJobs;
+    liveJobs = best->liveJobs;
+    /* If the new dictionary is better */
+    if (compressedSize < best->compressedSize) {
+      /* Allocate space if necessary */
+      if (!best->dict || best->dictSize < dictSize) {
+        if (best->dict) {
+          free(best->dict);
+        }
+        best->dict = malloc(dictSize);
+        if (!best->dict) {
+          best->compressedSize = ERROR(GENERIC);
+          best->dictSize = 0;
+          return;
+        }
+      }
+      /* Save the dictionary, parameters, and size */
+      memcpy(best->dict, dict, dictSize);
+      best->dictSize = dictSize;
+      best->parameters = parameters;
+      best->compressedSize = compressedSize;
+    }
+    pthread_mutex_unlock(&best->mutex);
+    if (liveJobs == 0) {
+      pthread_cond_broadcast(&best->cond);
+    }
+  }
+}
+
+/**
+ * Parameters for COVER_tryParameters().
+ */
+typedef struct COVER_tryParameters_data_s {
+  const COVER_ctx_t *ctx;
+  COVER_best_t *best;
+  size_t dictBufferCapacity;
+  ZDICT_cover_params_t parameters;
+} COVER_tryParameters_data_t;
+
+/**
+ * Tries a set of parameters and upates the COVER_best_t with the results.
+ * This function is thread safe if zstd is compiled with multithreaded support.
+ * It takes its parameters as an *OWNING* opaque pointer to support threading.
+ */
+static void COVER_tryParameters(void *opaque) {
+  /* Save parameters as local variables */
+  COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
+  const COVER_ctx_t *const ctx = data->ctx;
+  const ZDICT_cover_params_t parameters = data->parameters;
+  size_t dictBufferCapacity = data->dictBufferCapacity;
+  size_t totalCompressedSize = ERROR(GENERIC);
+  /* Allocate space for hash table, dict, and freqs */
+  COVER_map_t activeDmers;
+  BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
+  U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
+  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
+    DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
+    goto _cleanup;
+  }
+  if (!dict || !freqs) {
+    DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
+    goto _cleanup;
+  }
+  /* Copy the frequencies because we need to modify them */
+  memcpy(freqs, ctx->freqs, ctx->suffixSize * sizeof(U32));
+  /* Build the dictionary */
+  {
+    const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
+                                              dictBufferCapacity, parameters);
+    dictBufferCapacity = ZDICT_finalizeDictionary(
+        dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
+        ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples,
+        parameters.zParams);
+    if (ZDICT_isError(dictBufferCapacity)) {
+      DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
+      goto _cleanup;
+    }
+  }
+  /* Check total compressed size */
+  {
+    /* Pointers */
+    ZSTD_CCtx *cctx;
+    ZSTD_CDict *cdict;
+    void *dst;
+    /* Local variables */
+    size_t dstCapacity;
+    size_t i;
+    /* Allocate dst with enough space to compress the maximum sized sample */
+    {
+      size_t maxSampleSize = 0;
+      for (i = 0; i < ctx->nbSamples; ++i) {
+        maxSampleSize = MAX(ctx->samplesSizes[i], maxSampleSize);
+      }
+      dstCapacity = ZSTD_compressBound(maxSampleSize);
+      dst = malloc(dstCapacity);
+    }
+    /* Create the cctx and cdict */
+    cctx = ZSTD_createCCtx();
+    cdict = ZSTD_createCDict(dict, dictBufferCapacity,
+                             parameters.zParams.compressionLevel);
+    if (!dst || !cctx || !cdict) {
+      goto _compressCleanup;
+    }
+    /* Compress each sample and sum their sizes (or error) */
+    totalCompressedSize = 0;
+    for (i = 0; i < ctx->nbSamples; ++i) {
+      const size_t size = ZSTD_compress_usingCDict(
+          cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i],
+          ctx->samplesSizes[i], cdict);
+      if (ZSTD_isError(size)) {
+        totalCompressedSize = ERROR(GENERIC);
+        goto _compressCleanup;
+      }
+      totalCompressedSize += size;
+    }
+  _compressCleanup:
+    ZSTD_freeCCtx(cctx);
+    ZSTD_freeCDict(cdict);
+    if (dst) {
+      free(dst);
+    }
+  }
+
+_cleanup:
+  COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
+                    dictBufferCapacity);
+  free(data);
+  COVER_map_destroy(&activeDmers);
+  if (dict) {
+    free(dict);
+  }
+  if (freqs) {
+    free(freqs);
+  }
+}
+
+ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
+    void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
+    const size_t *samplesSizes, unsigned nbSamples,
+    ZDICT_cover_params_t *parameters) {
+  /* constants */
+  const unsigned nbThreads = parameters->nbThreads;
+  const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
+  const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
+  const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
+  const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
+  const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
+  const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
+  const unsigned kIterations =
+      (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
+  /* Local variables */
+  const int displayLevel = parameters->zParams.notificationLevel;
+  unsigned iteration = 1;
+  unsigned d;
+  unsigned k;
+  COVER_best_t best;
+  POOL_ctx *pool = NULL;
+  /* Checks */
+  if (kMinK < kMaxD || kMaxK < kMinK) {
+    LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
+    return ERROR(GENERIC);
+  }
+  if (nbSamples == 0) {
+    DISPLAYLEVEL(1, "Cover must have at least one input file\n");
+    return ERROR(GENERIC);
+  }
+  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
+    DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
+                 ZDICT_DICTSIZE_MIN);
+    return ERROR(dstSize_tooSmall);
+  }
+  if (nbThreads > 1) {
+    pool = POOL_create(nbThreads, 1);
+    if (!pool) {
+      return ERROR(memory_allocation);
+    }
+  }
+  /* Initialization */
+  COVER_best_init(&best);
+  /* Turn down global display level to clean up display at level 2 and below */
+  g_displayLevel = parameters->zParams.notificationLevel - 1;
+  /* Loop through d first because each new value needs a new context */
+  LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
+                    kIterations);
+  for (d = kMinD; d <= kMaxD; d += 2) {
+    /* Initialize the context for this value of d */
+    COVER_ctx_t ctx;
+    LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
+    if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d)) {
+      LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
+      COVER_best_destroy(&best);
+      POOL_free(pool);
+      return ERROR(GENERIC);
+    }
+    /* Loop through k reusing the same context */
+    for (k = kMinK; k <= kMaxK; k += kStepSize) {
+      /* Prepare the arguments */
+      COVER_tryParameters_data_t *data = (COVER_tryParameters_data_t *)malloc(
+          sizeof(COVER_tryParameters_data_t));
+      LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);
+      if (!data) {
+        LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");
+        COVER_best_destroy(&best);
+        COVER_ctx_destroy(&ctx);
+        POOL_free(pool);
+        return ERROR(GENERIC);
+      }
+      data->ctx = &ctx;
+      data->best = &best;
+      data->dictBufferCapacity = dictBufferCapacity;
+      data->parameters = *parameters;
+      data->parameters.k = k;
+      data->parameters.d = d;
+      data->parameters.steps = kSteps;
+      /* Check the parameters */
+      if (!COVER_checkParameters(data->parameters)) {
+        DISPLAYLEVEL(1, "Cover parameters incorrect\n");
+        free(data);
+        continue;
+      }
+      /* Call the function and pass ownership of data to it */
+      COVER_best_start(&best);
+      if (pool) {
+        POOL_add(pool, &COVER_tryParameters, data);
+      } else {
+        COVER_tryParameters(data);
+      }
+      /* Print status */
+      LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%%       ",
+                         (U32)((iteration * 100) / kIterations));
+      ++iteration;
+    }
+    COVER_best_wait(&best);
+    COVER_ctx_destroy(&ctx);
+  }
+  LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");
+  /* Fill the output buffer and parameters with output of the best parameters */
+  {
+    const size_t dictSize = best.dictSize;
+    if (ZSTD_isError(best.compressedSize)) {
+      const size_t compressedSize = best.compressedSize;
+      COVER_best_destroy(&best);
+      POOL_free(pool);
+      return compressedSize;
+    }
+    *parameters = best.parameters;
+    memcpy(dictBuffer, best.dict, dictSize);
+    COVER_best_destroy(&best);
+    POOL_free(pool);
+    return dictSize;
+  }
+}
diff --git a/contrib/libzstd/include/zstd/dictBuilder/zdict.c b/contrib/libzstd/include/zstd/dictBuilder/zdict.c
index 47a82af14b4..742586eacdd 100644
--- a/contrib/libzstd/include/zstd/dictBuilder/zdict.c
+++ b/contrib/libzstd/include/zstd/dictBuilder/zdict.c
@@ -11,8 +11,9 @@
 /*-**************************************
 *  Tuning parameters
 ****************************************/
+#define MINRATIO 4   /* minimum nb of apparition to be selected in dictionary */
 #define ZDICT_MAX_SAMPLES_SIZE (2000U << 20)
-#define ZDICT_MIN_SAMPLES_SIZE 512
+#define ZDICT_MIN_SAMPLES_SIZE (ZDICT_CONTENTSIZE_MIN * MINRATIO)
 
 
 /*-**************************************
@@ -36,12 +37,11 @@
 #include <time.h>          /* clock */
 
 #include "mem.h"           /* read */
-#include "error_private.h"
 #include "fse.h"           /* FSE_normalizeCount, FSE_writeNCount */
 #define HUF_STATIC_LINKING_ONLY
-#include "huf.h"
+#include "huf.h"           /* HUF_buildCTable, HUF_writeCTable */
 #include "zstd_internal.h" /* includes zstd.h */
-#include "xxhash.h"
+#include "xxhash.h"        /* XXH64 */
 #include "divsufsort.h"
 #ifndef ZDICT_STATIC_LINKING_ONLY
 #  define ZDICT_STATIC_LINKING_ONLY
@@ -60,11 +60,8 @@
 
 #define NOISELENGTH 32
 
-#define MINRATIO 4
-static const int g_compressionLevel_default = 5;
+static const int g_compressionLevel_default = 6;
 static const U32 g_selectivity_default = 9;
-static const size_t g_provision_entropySize = 200;
-static const size_t g_min_fast_dictContent = 192;
 
 
 /*-*************************************
@@ -97,7 +94,7 @@ const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(error
 unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
 {
     if (dictSize < 8) return 0;
-    if (MEM_readLE32(dictBuffer) != ZSTD_DICT_MAGIC) return 0;
+    if (MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return 0;
     return MEM_readLE32((const char*)dictBuffer + 4);
 }
 
@@ -307,13 +304,13 @@ static dictItem ZDICT_analyzePos(
         } while (length >=MINMATCHLENGTH);
 
         /* look backward */
-		length = MINMATCHLENGTH;
-		while ((length >= MINMATCHLENGTH) & (start > 0)) {
-			length = ZDICT_count(b + pos, b + suffix[start - 1]);
-			if (length >= LLIMIT) length = LLIMIT - 1;
-			lengthList[length]++;
-			if (length >= MINMATCHLENGTH) start--;
-		}
+        length = MINMATCHLENGTH;
+        while ((length >= MINMATCHLENGTH) & (start > 0)) {
+            length = ZDICT_count(b + pos, b + suffix[start - 1]);
+            if (length >= LLIMIT) length = LLIMIT - 1;
+            lengthList[length]++;
+            if (length >= MINMATCHLENGTH) start--;
+        }
 
         /* largest useful length */
         memset(cumulLength, 0, sizeof(cumulLength));
@@ -364,28 +361,43 @@ static dictItem ZDICT_analyzePos(
 }
 
 
+static int isIncluded(const void* in, const void* container, size_t length)
+{
+    const char* const ip = (const char*) in;
+    const char* const into = (const char*) container;
+    size_t u;
+
+    for (u=0; u<length; u++) {  /* works because end of buffer is a noisy guard band */
+        if (ip[u] != into[u]) break;
+    }
+
+    return u==length;
+}
+
 /*! ZDICT_checkMerge
     check if dictItem can be merged, do it if possible
     @return : id of destination elt, 0 if not merged
 */
-static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
+static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const void* buffer)
 {
     const U32 tableSize = table->pos;
-    const U32 max = elt.pos + (elt.length-1);
+    const U32 eltEnd = elt.pos + elt.length;
+    const char* const buf = (const char*) buffer;
 
     /* tail overlap */
     U32 u; for (u=1; u<tableSize; u++) {
         if (u==eltNbToSkip) continue;
-        if ((table[u].pos > elt.pos) && (table[u].pos < max)) {  /* overlap */
+        if ((table[u].pos > elt.pos) && (table[u].pos <= eltEnd)) {  /* overlap, existing > new */
             /* append */
-            U32 addedLength = table[u].pos - elt.pos;
+            U32 const addedLength = table[u].pos - elt.pos;
             table[u].length += addedLength;
             table[u].pos = elt.pos;
             table[u].savings += elt.savings * addedLength / elt.length;   /* rough approx */
-            table[u].savings += elt.length / 8;    /* rough approx */
+            table[u].savings += elt.length / 8;    /* rough approx bonus */
             elt = table[u];
+            /* sort : improve rank */
             while ((u>1) && (table[u-1].savings < elt.savings))
-                table[u] = table[u-1], u--;
+            table[u] = table[u-1], u--;
             table[u] = elt;
             return u;
     }   }
@@ -393,20 +405,33 @@ static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
     /* front overlap */
     for (u=1; u<tableSize; u++) {
         if (u==eltNbToSkip) continue;
-        if ((table[u].pos + table[u].length > elt.pos) && (table[u].pos < elt.pos)) {  /* overlap */
+
+        if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) {  /* overlap, existing < new */
             /* append */
-            int addedLength = (elt.pos + elt.length) - (table[u].pos + table[u].length);
-            table[u].savings += elt.length / 8;    /* rough approx */
-            if (addedLength > 0) {   /* otherwise, already included */
+            int const addedLength = (int)eltEnd - (table[u].pos + table[u].length);
+            table[u].savings += elt.length / 8;    /* rough approx bonus */
+            if (addedLength > 0) {   /* otherwise, elt fully included into existing */
                 table[u].length += addedLength;
                 table[u].savings += elt.savings * addedLength / elt.length;   /* rough approx */
             }
+            /* sort : improve rank */
             elt = table[u];
             while ((u>1) && (table[u-1].savings < elt.savings))
                 table[u] = table[u-1], u--;
             table[u] = elt;
             return u;
-    }   }
+        }
+
+        if (MEM_read64(buf + table[u].pos) == MEM_read64(buf + elt.pos + 1)) {
+            if (isIncluded(buf + table[u].pos, buf + elt.pos + 1, table[u].length)) {
+                size_t const addedLength = MAX( (int)elt.length - (int)table[u].length , 1 );
+                table[u].pos = elt.pos;
+                table[u].savings += (U32)(elt.savings * addedLength / elt.length);
+                table[u].length = MIN(elt.length, table[u].length + 1);
+                return u;
+            }
+        }
+    }
 
     return 0;
 }
@@ -424,14 +449,14 @@ static void ZDICT_removeDictItem(dictItem* table, U32 id)
 }
 
 
-static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt)
+static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt, const void* buffer)
 {
     /* merge if possible */
-    U32 mergeId = ZDICT_checkMerge(table, elt, 0);
+    U32 mergeId = ZDICT_tryMerge(table, elt, 0, buffer);
     if (mergeId) {
         U32 newMerge = 1;
         while (newMerge) {
-            newMerge = ZDICT_checkMerge(table, table[mergeId], mergeId);
+            newMerge = ZDICT_tryMerge(table, table[mergeId], mergeId, buffer);
             if (newMerge) ZDICT_removeDictItem(table, mergeId);
             mergeId = newMerge;
         }
@@ -462,7 +487,7 @@ static U32 ZDICT_dictSize(const dictItem* dictList)
 }
 
 
-static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
+static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
                             const void* const buffer, size_t bufferSize,   /* buffer must end with noisy guard band */
                             const size_t* fileSizes, unsigned nbFiles,
                             U32 minRatio, U32 notificationLevel)
@@ -479,7 +504,7 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
 #   define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
             if (ZDICT_clockSpan(displayClock) > refreshRate)  \
             { displayClock = clock(); DISPLAY(__VA_ARGS__); \
-            if (notificationLevel>=4) fflush(stdout); } }
+            if (notificationLevel>=4) fflush(stderr); } }
 
     /* init */
     DISPLAYLEVEL(2, "\r%70s\r", "");   /* clean display line */
@@ -520,7 +545,7 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
             if (doneMarks[cursor]) { cursor++; continue; }
             solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio, notificationLevel);
             if (solution.length==0) { cursor++; continue; }
-            ZDICT_insertDictItem(dictList, dictListSize, solution);
+            ZDICT_insertDictItem(dictList, dictListSize, solution, buffer);
             cursor += solution.length;
             DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
     }   }
@@ -551,7 +576,7 @@ typedef struct
 {
     ZSTD_CCtx* ref;
     ZSTD_CCtx* zc;
-    void* workPlace;   /* must be ZSTD_BLOCKSIZE_ABSOLUTEMAX allocated */
+    void* workPlace;   /* must be ZSTD_BLOCKSIZE_MAX allocated */
 } EStats_ress_t;
 
 #define MAXREPOFFSET 1024
@@ -560,15 +585,15 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
                             U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
                             const void* src, size_t srcSize, U32 notificationLevel)
 {
-    size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << params.cParams.windowLog);
+    size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog);
     size_t cSize;
 
     if (srcSize > blockSizeMax) srcSize = blockSizeMax;   /* protection vs large samples */
     {  size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
             if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
     }
-    cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize);
-    if (ZSTD_isError(cSize)) { DISPLAYLEVEL(1, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
+    cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
+    if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
 
     if (cSize) {  /* if == 0; block is not compressible */
         const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc);
@@ -609,17 +634,6 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
     }   }   }
 }
 
-/*
-static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles)
-{
-    unsigned u;
-    size_t max=0;
-    for (u=0; u<nbFiles; u++)
-        if (max < fileSizes[u]) max = fileSizes[u];
-    return max;
-}
-*/
-
 static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles)
 {
     size_t total=0;
@@ -675,26 +689,26 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
     /* init */
     esr.ref = ZSTD_createCCtx();
     esr.zc = ZSTD_createCCtx();
-    esr.workPlace = malloc(ZSTD_BLOCKSIZE_ABSOLUTEMAX);
+    esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
     if (!esr.ref || !esr.zc || !esr.workPlace) {
         eSize = ERROR(memory_allocation);
         DISPLAYLEVEL(1, "Not enough memory \n");
         goto _cleanup;
     }
     if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionary_wrong); goto _cleanup; }   /* too large dictionary */
-    for (u=0; u<256; u++) countLit[u]=1;   /* any character must be described */
-    for (u=0; u<=offcodeMax; u++) offcodeCount[u]=1;
-    for (u=0; u<=MaxML; u++) matchLengthCount[u]=1;
-    for (u=0; u<=MaxLL; u++) litLengthCount[u]=1;
+    for (u=0; u<256; u++) countLit[u] = 1;   /* any character must be described */
+    for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1;
+    for (u=0; u<=MaxML; u++) matchLengthCount[u] = 1;
+    for (u=0; u<=MaxLL; u++) litLengthCount[u] = 1;
     memset(repOffset, 0, sizeof(repOffset));
     repOffset[1] = repOffset[4] = repOffset[8] = 1;
     memset(bestRepOffset, 0, sizeof(bestRepOffset));
-    if (compressionLevel==0) compressionLevel=g_compressionLevel_default;
+    if (compressionLevel==0) compressionLevel = g_compressionLevel_default;
     params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
     {   size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
-            if (ZSTD_isError(beginResult)) {
+        if (ZSTD_isError(beginResult)) {
+            DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced() failed : %s \n", ZSTD_getErrorName(beginResult));
             eSize = ERROR(GENERIC);
-            DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced failed \n");
             goto _cleanup;
     }   }
 
@@ -811,7 +825,6 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
     MEM_writeLE32(dstPtr+4, repStartValue[1]);
     MEM_writeLE32(dstPtr+8, repStartValue[2]);
 #endif
-    dstPtr += 12;
     eSize += 12;
 
 _cleanup:
@@ -823,26 +836,66 @@ _cleanup:
 }
 
 
-size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
-                                                 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
-                                                 ZDICT_params_t params)
+
+size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
+                          const void* customDictContent, size_t dictContentSize,
+                          const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+                          ZDICT_params_t params)
 {
     size_t hSize;
+#define HBUFFSIZE 256   /* should prove large enough for all entropy headers */
+    BYTE header[HBUFFSIZE];
     int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
     U32 const notificationLevel = params.notificationLevel;
 
+    /* check conditions */
+    if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
+    if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
+    if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
+
     /* dictionary header */
-    MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC);
-    {   U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
+    MEM_writeLE32(header, ZSTD_MAGIC_DICTIONARY);
+    {   U64 const randomID = XXH64(customDictContent, dictContentSize, 0);
         U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
         U32 const dictID = params.dictID ? params.dictID : compliantID;
-        MEM_writeLE32((char*)dictBuffer+4, dictID);
+        MEM_writeLE32(header+4, dictID);
     }
     hSize = 8;
 
     /* entropy tables */
     DISPLAYLEVEL(2, "\r%70s\r", "");   /* clean display line */
     DISPLAYLEVEL(2, "statistics ... \n");
+    {   size_t const eSize = ZDICT_analyzeEntropy(header+hSize, HBUFFSIZE-hSize,
+                                  compressionLevel,
+                                  samplesBuffer, samplesSizes, nbSamples,
+                                  customDictContent, dictContentSize,
+                                  notificationLevel);
+        if (ZDICT_isError(eSize)) return eSize;
+        hSize += eSize;
+    }
+
+    /* copy elements in final buffer ; note : src and dst buffer can overlap */
+    if (hSize + dictContentSize > dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize;
+    {   size_t const dictSize = hSize + dictContentSize;
+        char* dictEnd = (char*)dictBuffer + dictSize;
+        memmove(dictEnd - dictContentSize, customDictContent, dictContentSize);
+        memcpy(dictBuffer, header, hSize);
+        return dictSize;
+    }
+}
+
+
+size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
+                                                 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+                                                 ZDICT_params_t params)
+{
+    int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
+    U32 const notificationLevel = params.notificationLevel;
+    size_t hSize = 8;
+
+    /* calculate entropy tables */
+    DISPLAYLEVEL(2, "\r%70s\r", "");   /* clean display line */
+    DISPLAYLEVEL(2, "statistics ... \n");
     {   size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize,
                                   compressionLevel,
                                   samplesBuffer, samplesSizes, nbSamples,
@@ -852,6 +905,13 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
         hSize += eSize;
     }
 
+    /* add dictionary header (after entropy tables) */
+    MEM_writeLE32(dictBuffer, ZSTD_MAGIC_DICTIONARY);
+    {   U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
+        U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
+        U32 const dictID = params.dictID ? params.dictID : compliantID;
+        MEM_writeLE32((char*)dictBuffer+4, dictID);
+    }
 
     if (hSize + dictContentSize < dictBufferCapacity)
         memmove((char*)dictBuffer + hSize, (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize);
@@ -859,14 +919,14 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
 }
 
 
-/*! ZDICT_trainFromBuffer_unsafe() :
+/*! ZDICT_trainFromBuffer_unsafe_legacy() :
 *   Warning : `samplesBuffer` must be followed by noisy guard band.
 *   @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
 */
-size_t ZDICT_trainFromBuffer_unsafe(
+size_t ZDICT_trainFromBuffer_unsafe_legacy(
                             void* dictBuffer, size_t maxDictSize,
                             const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
-                            ZDICT_params_t params)
+                            ZDICT_legacy_params_t params)
 {
     U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16));
     dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
@@ -875,33 +935,35 @@ size_t ZDICT_trainFromBuffer_unsafe(
     size_t const targetDictSize = maxDictSize;
     size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
     size_t dictSize = 0;
-    U32 const notificationLevel = params.notificationLevel;
+    U32 const notificationLevel = params.zParams.notificationLevel;
 
     /* checks */
     if (!dictList) return ERROR(memory_allocation);
-    if (maxDictSize <= g_provision_entropySize + g_min_fast_dictContent) { free(dictList); return ERROR(dstSize_tooSmall); }
-    if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return 0; }   /* not enough source to create dictionary */
+    if (maxDictSize < ZDICT_DICTSIZE_MIN) { free(dictList); return ERROR(dstSize_tooSmall); }   /* requested dictionary size is too small */
+    if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return ERROR(dictionaryCreation_failed); }   /* not enough source to create dictionary */
 
     /* init */
     ZDICT_initDictItem(dictList);
 
     /* build dictionary */
-    ZDICT_trainBuffer(dictList, dictListSize,
-                    samplesBuffer, samplesBuffSize,
-                    samplesSizes, nbSamples,
-                    minRep, notificationLevel);
+    ZDICT_trainBuffer_legacy(dictList, dictListSize,
+                       samplesBuffer, samplesBuffSize,
+                       samplesSizes, nbSamples,
+                       minRep, notificationLevel);
 
     /* display best matches */
-    if (params.notificationLevel>= 3) {
+    if (params.zParams.notificationLevel>= 3) {
         U32 const nb = MIN(25, dictList[0].pos);
         U32 const dictContentSize = ZDICT_dictSize(dictList);
         U32 u;
-        DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
-        DISPLAYLEVEL(3, "list %u best segments \n", nb);
-        for (u=1; u<=nb; u++) {
-            U32 pos = dictList[u].pos;
-            U32 length = dictList[u].length;
-            U32 printedLength = MIN(40, length);
+        DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos-1, dictContentSize);
+        DISPLAYLEVEL(3, "list %u best segments \n", nb-1);
+        for (u=1; u<nb; u++) {
+            U32 const pos = dictList[u].pos;
+            U32 const length = dictList[u].length;
+            U32 const printedLength = MIN(40, length);
+            if ((pos > samplesBuffSize) || ((pos + length) > samplesBuffSize))
+                return ERROR(GENERIC);   /* should never happen */
             DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
                          u, length, pos, dictList[u].savings);
             ZDICT_printHex((const char*)samplesBuffer+pos, printedLength);
@@ -911,14 +973,15 @@ size_t ZDICT_trainFromBuffer_unsafe(
 
     /* create dictionary */
     {   U32 dictContentSize = ZDICT_dictSize(dictList);
-        if (dictContentSize < targetDictSize/3) {
+        if (dictContentSize < ZDICT_CONTENTSIZE_MIN) { free(dictList); return ERROR(dictionaryCreation_failed); }   /* dictionary content too small */
+        if (dictContentSize < targetDictSize/4) {
             DISPLAYLEVEL(2, "!  warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (U32)maxDictSize);
+            if (samplesBuffSize < 10 * targetDictSize)
+                DISPLAYLEVEL(2, "!  consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
             if (minRep > MINRATIO) {
                 DISPLAYLEVEL(2, "!  consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1);
                 DISPLAYLEVEL(2, "!  note : larger dictionaries are not necessarily better, test its efficiency on samples \n");
             }
-            if (samplesBuffSize < 10 * targetDictSize)
-                DISPLAYLEVEL(2, "!  consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
         }
 
         if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) {
@@ -926,7 +989,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
             while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; }
             DISPLAYLEVEL(2, "!  note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (U32)maxDictSize);
             DISPLAYLEVEL(2, "!  consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity);
-            DISPLAYLEVEL(2, "!  always test dictionary efficiency on samples \n");
+            DISPLAYLEVEL(2, "!  always test dictionary efficiency on real samples \n");
         }
 
         /* limit dictionary size */
@@ -952,7 +1015,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
 
         dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize,
                                                              samplesBuffer, samplesSizes, nbSamples,
-                                                             params);
+                                                             params.zParams);
     }
 
     /* clean up */
@@ -963,9 +1026,9 @@ size_t ZDICT_trainFromBuffer_unsafe(
 
 /* issue : samplesBuffer need to be followed by a noisy guard band.
 *  work around : duplicate the buffer, and add the noise */
-size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
-                                      const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
-                                      ZDICT_params_t params)
+size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
+                              const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+                              ZDICT_legacy_params_t params)
 {
     size_t result;
     void* newBuff;
@@ -978,10 +1041,9 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
     memcpy(newBuff, samplesBuffer, sBuffSize);
     ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH);   /* guard band, for end of buffer condition */
 
-    result = ZDICT_trainFromBuffer_unsafe(
-                                        dictBuffer, dictBufferCapacity,
-                                        newBuff, samplesSizes, nbSamples,
-                                        params);
+    result =
+        ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, dictBufferCapacity, newBuff,
+                                            samplesSizes, nbSamples, params);
     free(newBuff);
     return result;
 }
@@ -990,11 +1052,13 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
 size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
                              const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
 {
-    ZDICT_params_t params;
+    ZDICT_cover_params_t params;
     memset(&params, 0, sizeof(params));
-    return ZDICT_trainFromBuffer_advanced(dictBuffer, dictBufferCapacity,
-                                          samplesBuffer, samplesSizes, nbSamples,
-                                          params);
+    params.d = 8;
+    params.steps = 4;
+    return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity,
+                                               samplesBuffer, samplesSizes,
+                                               nbSamples, &params);
 }
 
 size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
diff --git a/contrib/libzstd/include/zstd/dictBuilder/zdict.h b/contrib/libzstd/include/zstd/dictBuilder/zdict.h
index 642a43516b9..7bfbb351a1d 100644
--- a/contrib/libzstd/include/zstd/dictBuilder/zdict.h
+++ b/contrib/libzstd/include/zstd/dictBuilder/zdict.h
@@ -19,30 +19,37 @@ extern "C" {
 #include <stddef.h>  /* size_t */
 
 
-/*======  Export for Windows  ======*/
-/*!
-*  ZSTD_DLL_EXPORT :
-*  Enable exporting of functions when building a Windows DLL
-*/
-#if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
-#  define ZDICTLIB_API __declspec(dllexport)
+/* =====   ZDICTLIB_API : control library symbols visibility   ===== */
+#ifndef ZDICTLIB_VISIBILITY
+#  if defined(__GNUC__) && (__GNUC__ >= 4)
+#    define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
+#  else
+#    define ZDICTLIB_VISIBILITY
+#  endif
+#endif
+#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
+#  define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
+#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
+#  define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
 #else
-#  define ZDICTLIB_API
+#  define ZDICTLIB_API ZDICTLIB_VISIBILITY
 #endif
 
 
-/*! ZDICT_trainFromBuffer() :
-    Train a dictionary from an array of samples.
-    Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
-    supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
-    The resulting dictionary will be saved into `dictBuffer`.
-    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
-              or an error code, which can be tested with ZDICT_isError().
-    Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
-           It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
-           In general, it's recommended to provide a few thousands samples, but this can vary a lot.
-           It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
-*/
+/*! ZDICT_trainFromBuffer():
+ * Train a dictionary from an array of samples.
+ * Uses ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4.
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
+ * The resulting dictionary will be saved into `dictBuffer`.
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+ *           or an error code, which can be tested with ZDICT_isError().
+ * Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte.
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
+ *        It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
+ *        In general, it's recommended to provide a few thousands samples, but this can vary a lot.
+ *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
+ */
 ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
                        const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
 
@@ -64,42 +71,134 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
  * ==================================================================================== */
 
 typedef struct {
-    unsigned selectivityLevel;   /* 0 means default; larger => select more => larger dictionary */
     int      compressionLevel;   /* 0 means default; target a specific zstd compression level */
     unsigned notificationLevel;  /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
     unsigned dictID;             /* 0 means auto mode (32-bits random value); other : force dictID value */
-    unsigned reserved[2];        /* reserved space for future parameters */
 } ZDICT_params_t;
 
+/*! ZDICT_cover_params_t:
+ *  For all values 0 means default.
+ *  k and d are the only required parameters.
+ */
+typedef struct {
+    unsigned k;                  /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
+    unsigned d;                  /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
+    unsigned steps;              /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
+    unsigned nbThreads;          /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
+    ZDICT_params_t zParams;
+} ZDICT_cover_params_t;
 
-/*! ZDICT_trainFromBuffer_advanced() :
-    Same as ZDICT_trainFromBuffer() with control over more parameters.
-    `parameters` is optional and can be provided with values set to 0 to mean "default".
-    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferSize`),
-              or an error code, which can be tested by ZDICT_isError().
-    note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0.
-*/
-size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
+
+/*! ZDICT_trainFromBuffer_cover():
+ * Train a dictionary from an array of samples using the COVER algorithm.
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
+ * The resulting dictionary will be saved into `dictBuffer`.
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+ *           or an error code, which can be tested with ZDICT_isError().
+ * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
+ *        It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
+ *        In general, it's recommended to provide a few thousands samples, but this can vary a lot.
+ *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
+ */
+ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
+    void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
+    const size_t *samplesSizes, unsigned nbSamples,
+    ZDICT_cover_params_t parameters);
+
+/*! ZDICT_optimizeTrainFromBuffer_cover():
+ * The same requirements as above hold for all the parameters except `parameters`.
+ * This function tries many parameter combinations and picks the best parameters.
+ * `*parameters` is filled with the best parameters found, and the dictionary
+ * constructed with those parameters is stored in `dictBuffer`.
+ *
+ * All of the parameters d, k, steps are optional.
+ * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
+ * if steps is zero it defaults to its default value.
+ * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
+ *
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+ *           or an error code, which can be tested with ZDICT_isError().
+ *           On success `*parameters` contains the parameters selected.
+ * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
+ */
+ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
+    void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
+    const size_t *samplesSizes, unsigned nbSamples,
+    ZDICT_cover_params_t *parameters);
+
+/*! ZDICT_finalizeDictionary():
+ * Given a custom content as a basis for dictionary, and a set of samples,
+ * finalize dictionary by adding headers and statistics.
+ *
+ * Samples must be stored concatenated in a flat buffer `samplesBuffer`,
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
+ *
+ * dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
+ * maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
+ *
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
+ *           or an error code, which can be tested by ZDICT_isError().
+ * Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
+ * Note 2: dictBuffer and dictContent can overlap
+ */
+#define ZDICT_CONTENTSIZE_MIN 128
+#define ZDICT_DICTSIZE_MIN    256
+ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
+                                const void* dictContent, size_t dictContentSize,
                                 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
                                 ZDICT_params_t parameters);
 
+typedef struct {
+    unsigned selectivityLevel;   /* 0 means default; larger => select more => larger dictionary */
+    ZDICT_params_t zParams;
+} ZDICT_legacy_params_t;
 
-/*! ZDICT_addEntropyTablesFromBuffer() :
+/*! ZDICT_trainFromBuffer_legacy():
+ * Train a dictionary from an array of samples.
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
+ * The resulting dictionary will be saved into `dictBuffer`.
+ * `parameters` is optional and can be provided with values set to 0 to mean "default".
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+ *           or an error code, which can be tested with ZDICT_isError().
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
+ *        It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
+ *        In general, it's recommended to provide a few thousands samples, but this can vary a lot.
+ *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
+ * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
+ */
+ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
+    void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
+    const size_t *samplesSizes, unsigned nbSamples, ZDICT_legacy_params_t parameters);
 
-    Given a content-only dictionary (built using any 3rd party algorithm),
-    add entropy tables computed from an array of samples.
-    Samples must be stored concatenated in a flat buffer `samplesBuffer`,
-    supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
+/* Deprecation warnings */
+/* It is generally possible to disable deprecation warnings from compiler,
+   for example with -Wno-deprecated-declarations for gcc
+   or _CRT_SECURE_NO_WARNINGS in Visual.
+   Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
+#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
+#  define ZDICT_DEPRECATED(message) ZDICTLIB_API   /* disable deprecation warnings */
+#else
+#  define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
+#    define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API
+#  elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__)
+#    define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
+#  elif (ZDICT_GCC_VERSION >= 301)
+#    define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
+#  elif defined(_MSC_VER)
+#    define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message))
+#  else
+#    pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler")
+#    define ZDICT_DEPRECATED(message) ZDICTLIB_API
+#  endif
+#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
 
-    The input dictionary content must be stored *at the end* of `dictBuffer`.
-    Its size is `dictContentSize`.
-    The resulting dictionary with added entropy tables will be *written back to `dictBuffer`*,
-    starting from its beginning.
-    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`).
-*/
+ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead")
 size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
-                                        const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
-
+                                  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
 
 
 #endif   /* ZDICT_STATIC_LINKING_ONLY */
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_legacy.h b/contrib/libzstd/include/zstd/legacy/zstd_legacy.h
index 7a7167a8036..3c9798f880e 100644
--- a/contrib/libzstd/include/zstd/legacy/zstd_legacy.h
+++ b/contrib/libzstd/include/zstd/legacy/zstd_legacy.h
@@ -7,8 +7,6 @@
  * of patent rights can be found in the PATENTS file in the same directory.
  */
 
-/// Milovidov: we had only used version 6 in ClickHouse.
-
 #ifndef ZSTD_LEGACY_H
 #define ZSTD_LEGACY_H
 
@@ -22,8 +20,33 @@ extern "C" {
 #include "mem.h"            /* MEM_STATIC */
 #include "error_private.h"  /* ERROR */
 #include "zstd.h"           /* ZSTD_inBuffer, ZSTD_outBuffer */
-#include "zstd_v06.h"
 
+#if !defined (ZSTD_LEGACY_SUPPORT) || (ZSTD_LEGACY_SUPPORT == 0)
+#  undef ZSTD_LEGACY_SUPPORT
+#  define ZSTD_LEGACY_SUPPORT 8
+#endif
+
+#if (ZSTD_LEGACY_SUPPORT <= 1)
+#  include "zstd_v01.h"
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 2)
+#  include "zstd_v02.h"
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 3)
+#  include "zstd_v03.h"
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+#  include "zstd_v04.h"
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+#  include "zstd_v05.h"
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+#  include "zstd_v06.h"
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+#  include "zstd_v07.h"
+#endif
 
 /** ZSTD_isLegacy() :
     @return : > 0 if supported by legacy decoder. 0 otherwise.
@@ -36,7 +59,27 @@ MEM_STATIC unsigned ZSTD_isLegacy(const void* src, size_t srcSize)
     magicNumberLE = MEM_readLE32(src);
     switch(magicNumberLE)
     {
+#if (ZSTD_LEGACY_SUPPORT <= 1)
+        case ZSTDv01_magicNumberLE:return 1;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 2)
+        case ZSTDv02_magicNumber : return 2;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 3)
+        case ZSTDv03_magicNumber : return 3;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case ZSTDv04_magicNumber : return 4;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case ZSTDv05_MAGICNUMBER : return 5;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
         case ZSTDv06_MAGICNUMBER : return 6;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case ZSTDv07_MAGICNUMBER : return 7;
+#endif
         default : return 0;
     }
 }
@@ -45,12 +88,31 @@ MEM_STATIC unsigned ZSTD_isLegacy(const void* src, size_t srcSize)
 MEM_STATIC unsigned long long ZSTD_getDecompressedSize_legacy(const void* src, size_t srcSize)
 {
     U32 const version = ZSTD_isLegacy(src, srcSize);
+    if (version < 5) return 0;  /* no decompressed size in frame header, or not a legacy format */
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+    if (version==5) {
+        ZSTDv05_parameters fParams;
+        size_t const frResult = ZSTDv05_getFrameParams(&fParams, src, srcSize);
+        if (frResult != 0) return 0;
+        return fParams.srcSize;
+    }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
     if (version==6) {
         ZSTDv06_frameParams fParams;
         size_t const frResult = ZSTDv06_getFrameParams(&fParams, src, srcSize);
         if (frResult != 0) return 0;
         return fParams.frameContentSize;
     }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+    if (version==7) {
+        ZSTDv07_frameParams fParams;
+        size_t const frResult = ZSTDv07_getFrameParams(&fParams, src, srcSize);
+        if (frResult != 0) return 0;
+        return fParams.frameContentSize;
+    }
+#endif
     return 0;   /* should not be possible */
 }
 
@@ -63,6 +125,33 @@ MEM_STATIC size_t ZSTD_decompressLegacy(
     U32 const version = ZSTD_isLegacy(src, compressedSize);
     switch(version)
     {
+#if (ZSTD_LEGACY_SUPPORT <= 1)
+        case 1 :
+            return ZSTDv01_decompress(dst, dstCapacity, src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 2)
+        case 2 :
+            return ZSTDv02_decompress(dst, dstCapacity, src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 3)
+        case 3 :
+            return ZSTDv03_decompress(dst, dstCapacity, src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case 4 :
+            return ZSTDv04_decompress(dst, dstCapacity, src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case 5 :
+            {   size_t result;
+                ZSTDv05_DCtx* const zd = ZSTDv05_createDCtx();
+                if (zd==NULL) return ERROR(memory_allocation);
+                result = ZSTDv05_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize);
+                ZSTDv05_freeDCtx(zd);
+                return result;
+            }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
         case 6 :
             {   size_t result;
                 ZSTDv06_DCtx* const zd = ZSTDv06_createDCtx();
@@ -71,19 +160,82 @@ MEM_STATIC size_t ZSTD_decompressLegacy(
                 ZSTDv06_freeDCtx(zd);
                 return result;
             }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case 7 :
+            {   size_t result;
+                ZSTDv07_DCtx* const zd = ZSTDv07_createDCtx();
+                if (zd==NULL) return ERROR(memory_allocation);
+                result = ZSTDv07_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize);
+                ZSTDv07_freeDCtx(zd);
+                return result;
+            }
+#endif
         default :
             return ERROR(prefix_unknown);
     }
 }
 
+MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src,
+                                             size_t compressedSize)
+{
+    U32 const version = ZSTD_isLegacy(src, compressedSize);
+    switch(version)
+    {
+#if (ZSTD_LEGACY_SUPPORT <= 1)
+        case 1 :
+            return ZSTDv01_findFrameCompressedSize(src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 2)
+        case 2 :
+            return ZSTDv02_findFrameCompressedSize(src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 3)
+        case 3 :
+            return ZSTDv03_findFrameCompressedSize(src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case 4 :
+            return ZSTDv04_findFrameCompressedSize(src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case 5 :
+            return ZSTDv05_findFrameCompressedSize(src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+        case 6 :
+            return ZSTDv06_findFrameCompressedSize(src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case 7 :
+            return ZSTDv07_findFrameCompressedSize(src, compressedSize);
+#endif
+        default :
+            return ERROR(prefix_unknown);
+    }
+}
 
 MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)
 {
     switch(version)
     {
         default :
+        case 1 :
+        case 2 :
+        case 3 :
             return ERROR(version_unsupported);
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case 4 : return ZBUFFv04_freeDCtx((ZBUFFv04_DCtx*)legacyContext);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case 5 : return ZBUFFv05_freeDCtx((ZBUFFv05_DCtx*)legacyContext);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
         case 6 : return ZBUFFv06_freeDCtx((ZBUFFv06_DCtx*)legacyContext);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case 7 : return ZBUFFv07_freeDCtx((ZBUFFv07_DCtx*)legacyContext);
+#endif
     }
 }
 
@@ -95,7 +247,32 @@ MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U
     switch(newVersion)
     {
         default :
+        case 1 :
+        case 2 :
+        case 3 :
             return 0;
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case 4 :
+        {
+            ZBUFFv04_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv04_createDCtx() : (ZBUFFv04_DCtx*)*legacyContext;
+            if (dctx==NULL) return ERROR(memory_allocation);
+            ZBUFFv04_decompressInit(dctx);
+            ZBUFFv04_decompressWithDictionary(dctx, dict, dictSize);
+            *legacyContext = dctx;
+            return 0;
+        }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case 5 :
+        {
+            ZBUFFv05_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv05_createDCtx() : (ZBUFFv05_DCtx*)*legacyContext;
+            if (dctx==NULL) return ERROR(memory_allocation);
+            ZBUFFv05_decompressInitDictionary(dctx, dict, dictSize);
+            *legacyContext = dctx;
+            return 0;
+        }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
         case 6 :
         {
             ZBUFFv06_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv06_createDCtx() : (ZBUFFv06_DCtx*)*legacyContext;
@@ -104,6 +281,17 @@ MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U
             *legacyContext = dctx;
             return 0;
         }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case 7 :
+        {
+            ZBUFFv07_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv07_createDCtx() : (ZBUFFv07_DCtx*)*legacyContext;
+            if (dctx==NULL) return ERROR(memory_allocation);
+            ZBUFFv07_decompressInitDictionary(dctx, dict, dictSize);
+            *legacyContext = dctx;
+            return 0;
+        }
+#endif
     }
 }
 
@@ -115,7 +303,39 @@ MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version,
     switch(version)
     {
         default :
+        case 1 :
+        case 2 :
+        case 3 :
             return ERROR(version_unsupported);
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case 4 :
+            {
+                ZBUFFv04_DCtx* dctx = (ZBUFFv04_DCtx*) legacyContext;
+                const void* src = (const char*)input->src + input->pos;
+                size_t readSize = input->size - input->pos;
+                void* dst = (char*)output->dst + output->pos;
+                size_t decodedSize = output->size - output->pos;
+                size_t const hintSize = ZBUFFv04_decompressContinue(dctx, dst, &decodedSize, src, &readSize);
+                output->pos += decodedSize;
+                input->pos += readSize;
+                return hintSize;
+            }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case 5 :
+            {
+                ZBUFFv05_DCtx* dctx = (ZBUFFv05_DCtx*) legacyContext;
+                const void* src = (const char*)input->src + input->pos;
+                size_t readSize = input->size - input->pos;
+                void* dst = (char*)output->dst + output->pos;
+                size_t decodedSize = output->size - output->pos;
+                size_t const hintSize = ZBUFFv05_decompressContinue(dctx, dst, &decodedSize, src, &readSize);
+                output->pos += decodedSize;
+                input->pos += readSize;
+                return hintSize;
+            }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
         case 6 :
             {
                 ZBUFFv06_DCtx* dctx = (ZBUFFv06_DCtx*) legacyContext;
@@ -128,6 +348,21 @@ MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version,
                 input->pos += readSize;
                 return hintSize;
             }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case 7 :
+            {
+                ZBUFFv07_DCtx* dctx = (ZBUFFv07_DCtx*) legacyContext;
+                const void* src = (const char*)input->src + input->pos;
+                size_t readSize = input->size - input->pos;
+                void* dst = (char*)output->dst + output->pos;
+                size_t decodedSize = output->size - output->pos;
+                size_t const hintSize = ZBUFFv07_decompressContinue(dctx, dst, &decodedSize, src, &readSize);
+                output->pos += decodedSize;
+                input->pos += readSize;
+                return hintSize;
+            }
+#endif
     }
 }
 
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v01.c b/contrib/libzstd/include/zstd/legacy/zstd_v01.c
new file mode 100644
index 00000000000..cf5354d6a9b
--- /dev/null
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v01.c
@@ -0,0 +1,2126 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+
+/******************************************
+*  Includes
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include "zstd_v01.h"
+#include "error_private.h"
+
+
+/******************************************
+*  Static allocation
+******************************************/
+/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+
+/* You can statically allocate Huff0 DTable as a table of unsigned short using below macro */
+#define HUF_DTABLE_SIZE_U16(maxTableLog)   (1 + (1<<maxTableLog))
+#define HUF_CREATE_STATIC_DTABLE(DTable, maxTableLog) \
+        unsigned short DTable[HUF_DTABLE_SIZE_U16(maxTableLog)] = { maxTableLog }
+
+
+/******************************************
+*  Error Management
+******************************************/
+#define FSE_LIST_ERRORS(ITEM) \
+        ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \
+        ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooSmall) \
+        ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\
+        ITEM(FSE_ERROR_corruptionDetected) \
+        ITEM(FSE_ERROR_maxCode)
+
+#define FSE_GENERATE_ENUM(ENUM) ENUM,
+typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
+
+
+/******************************************
+*  FSE symbol compression API
+******************************************/
+/*
+   This API consists of small unitary functions, which highly benefit from being inlined.
+   You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
+   Visual seems to do it automatically.
+   For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
+   If none of these solutions is applicable, include "fse.c" directly.
+*/
+
+typedef unsigned FSE_CTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+
+typedef struct
+{
+    size_t bitContainer;
+    int    bitPos;
+    char*  startPtr;
+    char*  ptr;
+    char*  endPtr;
+} FSE_CStream_t;
+
+typedef struct
+{
+    ptrdiff_t   value;
+    const void* stateTable;
+    const void* symbolTT;
+    unsigned    stateLog;
+} FSE_CState_t;
+
+typedef struct
+{
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} FSE_DStream_t;
+
+typedef struct
+{
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+typedef enum { FSE_DStream_unfinished = 0,
+               FSE_DStream_endOfBuffer = 1,
+               FSE_DStream_completed = 2,
+               FSE_DStream_tooFar = 3 } FSE_DStream_status;  /* result of FSE_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... ?! */
+
+
+/****************************************************************
+*  Tuning parameters
+****************************************************************/
+/* MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSE_MAX_MEMORY_USAGE 14
+#define FSE_DEFAULT_MEMORY_USAGE 13
+
+/* FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSE_MAX_SYMBOL_VALUE 255
+
+
+/****************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+
+
+/****************************************************************
+*  Byte symbol type
+****************************************************************/
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSE_decode_t;   /* size == U32 */
+
+
+
+/****************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#    ifdef __GNUC__
+#      define FORCE_INLINE static inline __attribute__((always_inline))
+#    else
+#      define FORCE_INLINE static inline
+#    endif
+#  else
+#    define FORCE_INLINE static
+#  endif /* __STDC_VERSION__ */
+#endif
+
+
+/****************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+
+#ifndef MEM_ACCESS_MODULE
+#define MEM_ACCESS_MODULE
+/****************************************************************
+*  Basic Types
+*****************************************************************/
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+# include <stdint.h>
+typedef  uint8_t BYTE;
+typedef uint16_t U16;
+typedef  int16_t S16;
+typedef uint32_t U32;
+typedef  int32_t S32;
+typedef uint64_t U64;
+typedef  int64_t S64;
+#else
+typedef unsigned char       BYTE;
+typedef unsigned short      U16;
+typedef   signed short      S16;
+typedef unsigned int        U32;
+typedef   signed int        S32;
+typedef unsigned long long  U64;
+typedef   signed long long  S64;
+#endif
+
+#endif   /* MEM_ACCESS_MODULE */
+
+/****************************************************************
+*  Memory I/O
+*****************************************************************/
+/* FSE_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets generating assembly depending on alignment.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef FSE_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define FSE_FORCE_MEMORY_ACCESS 2
+#  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define FSE_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+
+static unsigned FSE_32bits(void)
+{
+    return sizeof(void*)==4;
+}
+
+static unsigned FSE_isLittleEndian(void)
+{
+    const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2)
+
+static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; }
+static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; }
+static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+#elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
+
+static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+#else
+
+static U16 FSE_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static U32 FSE_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static U64 FSE_read64(const void* memPtr)
+{
+    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+#endif // FSE_FORCE_MEMORY_ACCESS
+
+static U16 FSE_readLE16(const void* memPtr)
+{
+    if (FSE_isLittleEndian())
+        return FSE_read16(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+static U32 FSE_readLE32(const void* memPtr)
+{
+    if (FSE_isLittleEndian())
+        return FSE_read32(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+
+static U64 FSE_readLE64(const void* memPtr)
+{
+    if (FSE_isLittleEndian())
+        return FSE_read64(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
+                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
+    }
+}
+
+static size_t FSE_readLEST(const void* memPtr)
+{
+    if (FSE_32bits())
+        return (size_t)FSE_readLE32(memPtr);
+    else
+        return (size_t)FSE_readLE64(memPtr);
+}
+
+
+
+/****************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+
+/****************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/****************************************************************
+*  Complex types
+****************************************************************/
+typedef struct
+{
+    int deltaFindState;
+    U32 deltaNbBits;
+} FSE_symbolCompressionTransform; /* total 8 bytes */
+
+typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+/****************************************************************
+*  Internal functions
+****************************************************************/
+FORCE_INLINE unsigned FSE_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (GCC_VERSION >= 304)   /* GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    unsigned r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
+/****************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+
+static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
+
+#define FSE_DECODE_TYPE FSE_decode_t
+
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSE_DTableHeader;   /* sizeof U32 */
+
+static size_t FSE_buildDTable
+(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)(ptr) + 1;   /* because dt is unsigned, 32-bits aligned on 32-bits */
+    const U32 tableSize = 1 << tableLog;
+    const U32 tableMask = tableSize-1;
+    const U32 step = FSE_tableStep(tableSize);
+    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
+    U32 position = 0;
+    U32 highThreshold = tableSize-1;
+    const S16 largeLimit= (S16)(1 << (tableLog-1));
+    U32 noLarge = 1;
+    U32 s;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
+    if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
+
+    /* Init, lay down lowprob symbols */
+    DTableH[0].tableLog = (U16)tableLog;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        if (normalizedCounter[s]==-1)
+        {
+            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+            symbolNext[s] = 1;
+        }
+        else
+        {
+            if (normalizedCounter[s] >= largeLimit) noLarge=0;
+            symbolNext[s] = normalizedCounter[s];
+        }
+    }
+
+    /* Spread symbols */
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        int i;
+        for (i=0; i<normalizedCounter[s]; i++)
+        {
+            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+            position = (position + step) & tableMask;
+            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }
+    }
+
+    if (position!=0) return (size_t)-FSE_ERROR_GENERIC;   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+
+    /* Build Decoding table */
+    {
+        U32 i;
+        for (i=0; i<tableSize; i++)
+        {
+            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
+            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
+        }
+    }
+
+    DTableH->fastMode = (U16)noLarge;
+    return 0;
+}
+
+
+/******************************************
+*  FSE byte symbol
+******************************************/
+#ifndef FSE_COMMONDEFS_ONLY
+
+static unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
+
+static short FSE_abs(short a)
+{
+    return a<0? -a : a;
+}
+
+
+/****************************************************************
+*  Header bitstream management
+****************************************************************/
+static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong;
+    bitStream = FSE_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge;
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr))
+    {
+        if (previous0)
+        {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF)
+            {
+                n0+=24;
+                if (ip < iend-5)
+                {
+                    ip+=2;
+                    bitStream = FSE_readLE32(ip) >> bitCount;
+                }
+                else
+                {
+                    bitStream >>= 16;
+                    bitCount+=16;
+                }
+            }
+            while ((bitStream & 3) == 3)
+            {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall;
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+            {
+                ip += bitCount>>3;
+                bitCount &= 7;
+                bitStream = FSE_readLE32(ip) >> bitCount;
+            }
+            else
+                bitStream >>= 2;
+        }
+        {
+            const short max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max)
+            {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            }
+            else
+            {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSE_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold)
+            {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            {
+                if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+                {
+                    ip += bitCount>>3;
+                    bitCount &= 7;
+                }
+                else
+                {
+                    bitCount -= (int)(8 * (iend - 4 - ip));
+                    ip = iend - 4;
+                }
+                bitStream = FSE_readLE32(ip) >> (bitCount & 31);
+            }
+        }
+    }
+    if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC;
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong;
+    return ip-istart;
+}
+
+
+/*********************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC;             /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+
+/* FSE_initDStream
+ * Initialize a FSE_DStream_t.
+ * srcBuffer must point at the beginning of an FSE block.
+ * The function result is the size of the FSE_block (== srcSize).
+ * If srcSize is too small, the function will return an errorCode;
+ */
+static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong;
+
+    if (srcSize >=  sizeof(size_t))
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
+        bitD->bitContainer = FSE_readLEST(bitD->ptr);
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
+        bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
+    }
+    else
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
+            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
+            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
+            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
+            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
+            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8;
+            default:;
+        }
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
+        bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
+        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+
+/*!FSE_lookBits
+ * Provides next n bits from the bitContainer.
+ * bitContainer is not modified (bits are still present for next read/look)
+ * On 32-bits, maxNbBits==25
+ * On 64-bits, maxNbBits==57
+ * return : value extracted.
+ */
+static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+}
+
+static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 !! */
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+}
+
+static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+
+/*!FSE_readBits
+ * Read next n bits from the bitContainer.
+ * On 32-bits, don't read more than maxNbBits==25
+ * On 64-bits, don't read more than maxNbBits==57
+ * Use the fast variant *only* if n >= 1.
+ * return : value extracted.
+ */
+static size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = FSE_lookBits(bitD, nbBits);
+    FSE_skipBits(bitD, nbBits);
+    return value;
+}
+
+static size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 !! */
+{
+    size_t value = FSE_lookBitsFast(bitD, nbBits);
+    FSE_skipBits(bitD, nbBits);
+    return value;
+}
+
+static unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
+{
+    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
+        return FSE_DStream_tooFar;
+
+    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
+    {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = FSE_readLEST(bitD->ptr);
+        return FSE_DStream_unfinished;
+    }
+    if (bitD->ptr == bitD->start)
+    {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer;
+        return FSE_DStream_completed;
+    }
+    {
+        U32 nbBytes = bitD->bitsConsumed >> 3;
+        U32 result = FSE_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start)
+        {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = FSE_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = FSE_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        return result;
+    }
+}
+
+
+static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
+    DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog);
+    FSE_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+static BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32  nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = FSE_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+static BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32 nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = FSE_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+/* FSE_endOfDStream
+   Tells if bitD has reached end of bitStream or not */
+
+static unsigned FSE_endOfDStream(const FSE_DStream_t* bitD)
+{
+    return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8));
+}
+
+static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSE_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    FSE_DStream_t bitD;
+    FSE_DState_t state1;
+    FSE_DState_t state2;
+    size_t errorCode;
+
+    /* Init */
+    errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+    if (FSE_isError(errorCode)) return errorCode;
+
+    FSE_initDState(&state1, &bitD, dt);
+    FSE_initDState(&state2, &bitD, dt);
+
+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op<olimit) ; op+=4)
+    {
+        op[0] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            FSE_reloadDStream(&bitD);
+
+        op[1] = FSE_GETSYMBOL(&state2);
+
+        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            FSE_reloadDStream(&bitD);
+
+        op[3] = FSE_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */
+    while (1)
+    {
+        if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state1);
+
+        if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state2);
+    }
+
+    /* end ? */
+    if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
+        return op-ostart;
+
+    if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
+
+    return (size_t)-FSE_ERROR_corruptionDetected;
+}
+
+
+static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSE_DTable* dt)
+{
+    FSE_DTableHeader DTableH;
+    memcpy(&DTableH, dt, sizeof(DTableH));   /* memcpy() into local variable, to avoid strict aliasing warning */
+
+    /* select fast mode (static) */
+    if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSE_MAX_SYMBOL_VALUE+1];
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    size_t errorCode;
+
+    if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
+
+    /* normal FSE decoding mode */
+    errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return errorCode;
+
+    /* always return, even if it is an error code */
+    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
+}
+
+
+
+/* *******************************************************
+*  Huff0 : Huffman block compression
+*********************************************************/
+#define HUF_MAX_SYMBOL_VALUE 255
+#define HUF_DEFAULT_TABLELOG  12       /* used by default, when not specified */
+#define HUF_MAX_TABLELOG  12           /* max possible tableLog; for allocation purpose; can be modified */
+#define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
+#  error "HUF_MAX_TABLELOG is too large !"
+#endif
+
+typedef struct HUF_CElt_s {
+  U16  val;
+  BYTE nbBits;
+} HUF_CElt ;
+
+typedef struct nodeElt_s {
+    U32 count;
+    U16 parent;
+    BYTE byte;
+    BYTE nbBits;
+} nodeElt;
+
+
+/* *******************************************************
+*  Huff0 : Huffman block decompression
+*********************************************************/
+typedef struct {
+    BYTE byte;
+    BYTE nbBits;
+} HUF_DElt;
+
+static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize)
+{
+    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];  /* large enough for values from 0 to 16 */
+    U32 weightTotal;
+    U32 maxBits;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize;
+    size_t oSize;
+    U32 n;
+    U32 nextRankStart;
+    void* ptr = DTable+1;
+    HUF_DElt* const dt = (HUF_DElt*)ptr;
+
+    if (!srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
+    iSize = ip[0];
+
+    FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16));   /* if compilation fails here, assertion is false */
+    //memset(huffWeight, 0, sizeof(huffWeight));   /* should not be necessary, but some analyzer complain ... */
+    if (iSize >= 128)  /* special header */
+    {
+        if (iSize >= (242))   /* RLE */
+        {
+            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
+            oSize = l[iSize-242];
+            memset(huffWeight, 1, sizeof(huffWeight));
+            iSize = 0;
+        }
+        else   /* Incompressible */
+        {
+            oSize = iSize - 127;
+            iSize = ((oSize+1)/2);
+            if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
+            ip += 1;
+            for (n=0; n<oSize; n+=2)
+            {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+            }
+        }
+    }
+    else  /* header compressed with FSE (normal case) */
+    {
+        if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
+        oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize);   /* max 255 values decoded, last one is implied */
+        if (FSE_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    memset(rankVal, 0, sizeof(rankVal));
+    weightTotal = 0;
+    for (n=0; n<oSize; n++)
+    {
+        if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected;
+        rankVal[huffWeight[n]]++;
+        weightTotal += (1 << huffWeight[n]) >> 1;
+    }
+    if (weightTotal == 0) return (size_t)-FSE_ERROR_corruptionDetected;
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    maxBits = FSE_highbit32(weightTotal) + 1;
+    if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge;   /* DTable is too small */
+    DTable[0] = (U16)maxBits;
+    {
+        U32 total = 1 << maxBits;
+        U32 rest = total - weightTotal;
+        U32 verif = 1 << FSE_highbit32(rest);
+        U32 lastWeight = FSE_highbit32(rest) + 1;
+        if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected;    /* last value must be a clean power of 2 */
+        huffWeight[oSize] = (BYTE)lastWeight;
+        rankVal[lastWeight]++;
+    }
+
+    /* check tree construction validity */
+    if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected;   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* Prepare ranks */
+    nextRankStart = 0;
+    for (n=1; n<=maxBits; n++)
+    {
+        U32 current = nextRankStart;
+        nextRankStart += (rankVal[n] << (n-1));
+        rankVal[n] = current;
+    }
+
+    /* fill DTable */
+    for (n=0; n<=oSize; n++)
+    {
+        const U32 w = huffWeight[n];
+        const U32 length = (1 << w) >> 1;
+        U32 i;
+        HUF_DElt D;
+        D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w);
+        for (i = rankVal[w]; i < rankVal[w] + length; i++)
+            dt[i] = D;
+        rankVal[w] += length;
+    }
+
+    return iSize+1;
+}
+
+
+static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog)
+{
+        const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+        const BYTE c = dt[val].byte;
+        FSE_skipBits(Dstream, dt[val].nbBits);
+        return c;
+}
+
+static size_t HUF_decompress_usingDTable(   /* -3% slower when non static */
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U16* DTable)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-15;
+
+    const void* ptr = DTable;
+    const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
+    const U32 dtLog = DTable[0];
+    size_t errorCode;
+    U32 reloadStatus;
+
+    /* Init */
+
+    const U16* jumpTable = (const U16*)cSrc;
+    const size_t length1 = FSE_readLE16(jumpTable);
+    const size_t length2 = FSE_readLE16(jumpTable+1);
+    const size_t length3 = FSE_readLE16(jumpTable+2);
+    const size_t length4 = cSrcSize - 6 - length1 - length2 - length3;   // check coherency !!
+    const char* const start1 = (const char*)(cSrc) + 6;
+    const char* const start2 = start1 + length1;
+    const char* const start3 = start2 + length2;
+    const char* const start4 = start3 + length3;
+    FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
+
+    if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
+
+    errorCode = FSE_initDStream(&bitD1, start1, length1);
+    if (FSE_isError(errorCode)) return errorCode;
+    errorCode = FSE_initDStream(&bitD2, start2, length2);
+    if (FSE_isError(errorCode)) return errorCode;
+    errorCode = FSE_initDStream(&bitD3, start3, length3);
+    if (FSE_isError(errorCode)) return errorCode;
+    errorCode = FSE_initDStream(&bitD4, start4, length4);
+    if (FSE_isError(errorCode)) return errorCode;
+
+    reloadStatus=FSE_reloadDStream(&bitD2);
+
+    /* 16 symbols per loop */
+    for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit);  /* D2-3-4 are supposed to be synchronized and finish together */
+        op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
+    {
+#define HUF_DECODE_SYMBOL_0(n, Dstream) \
+        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
+
+#define HUF_DECODE_SYMBOL_1(n, Dstream) \
+        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
+        if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
+
+#define HUF_DECODE_SYMBOL_2(n, Dstream) \
+        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
+        if (FSE_32bits()) FSE_reloadDStream(&Dstream)
+
+        HUF_DECODE_SYMBOL_1( 0, bitD1);
+        HUF_DECODE_SYMBOL_1( 1, bitD2);
+        HUF_DECODE_SYMBOL_1( 2, bitD3);
+        HUF_DECODE_SYMBOL_1( 3, bitD4);
+        HUF_DECODE_SYMBOL_2( 4, bitD1);
+        HUF_DECODE_SYMBOL_2( 5, bitD2);
+        HUF_DECODE_SYMBOL_2( 6, bitD3);
+        HUF_DECODE_SYMBOL_2( 7, bitD4);
+        HUF_DECODE_SYMBOL_1( 8, bitD1);
+        HUF_DECODE_SYMBOL_1( 9, bitD2);
+        HUF_DECODE_SYMBOL_1(10, bitD3);
+        HUF_DECODE_SYMBOL_1(11, bitD4);
+        HUF_DECODE_SYMBOL_0(12, bitD1);
+        HUF_DECODE_SYMBOL_0(13, bitD2);
+        HUF_DECODE_SYMBOL_0(14, bitD3);
+        HUF_DECODE_SYMBOL_0(15, bitD4);
+    }
+
+    if (reloadStatus!=FSE_DStream_completed)   /* not complete : some bitStream might be FSE_DStream_unfinished */
+        return (size_t)-FSE_ERROR_corruptionDetected;
+
+    /* tail */
+    {
+        // bitTail = bitD1;   // *much* slower : -20% !??!
+        FSE_DStream_t bitTail;
+        bitTail.ptr = bitD1.ptr;
+        bitTail.bitsConsumed = bitD1.bitsConsumed;
+        bitTail.bitContainer = bitD1.bitContainer;   // required in case of FSE_DStream_endOfBuffer
+        bitTail.start = start1;
+        for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
+        {
+            HUF_DECODE_SYMBOL_0(0, bitTail);
+        }
+
+        if (FSE_endOfDStream(&bitTail))
+            return op-ostart;
+    }
+
+    if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
+
+    return (size_t)-FSE_ERROR_corruptionDetected;
+}
+
+
+static size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLE(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+    size_t errorCode;
+
+    errorCode = HUF_readDTable (DTable, cSrc, cSrcSize);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable);
+}
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
+
+/*
+    zstd - standard compression library
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/****************************************************************
+*  Tuning parameters
+*****************************************************************/
+/* MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect */
+#define ZSTD_MEMORY_USAGE 17
+
+
+/**************************************
+   CPU Feature Detection
+**************************************/
+/*
+ * Automated efficient unaligned memory access detection
+ * Based on known hardware architectures
+ * This list will be updated thanks to feedbacks
+ */
+#if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \
+    || defined(__ARM_FEATURE_UNALIGNED) \
+    || defined(__i386__) || defined(__x86_64__) \
+    || defined(_M_IX86) || defined(_M_X64) \
+    || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \
+    || (defined(_M_ARM) && (_M_ARM >= 7))
+#  define ZSTD_UNALIGNED_ACCESS 1
+#else
+#  define ZSTD_UNALIGNED_ACCESS 0
+#endif
+
+
+/********************************************************
+*  Includes
+*********************************************************/
+#include <stdlib.h>      /* calloc */
+#include <string.h>      /* memcpy, memmove */
+#include <stdio.h>       /* debug : printf */
+
+
+/********************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef __AVX2__
+#  include <immintrin.h>   /* AVX2 intrinsics */
+#endif
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#endif
+
+
+#ifndef MEM_ACCESS_MODULE
+#define MEM_ACCESS_MODULE
+/********************************************************
+*  Basic Types
+*********************************************************/
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+# include <stdint.h>
+typedef  uint8_t BYTE;
+typedef uint16_t U16;
+typedef  int16_t S16;
+typedef uint32_t U32;
+typedef  int32_t S32;
+typedef uint64_t U64;
+#else
+typedef unsigned char       BYTE;
+typedef unsigned short      U16;
+typedef   signed short      S16;
+typedef unsigned int        U32;
+typedef   signed int        S32;
+typedef unsigned long long  U64;
+#endif
+
+#endif   /* MEM_ACCESS_MODULE */
+
+
+/********************************************************
+*  Constants
+*********************************************************/
+static const U32 ZSTD_magicNumber = 0xFD2FB51E;   /* 3rd version : seqNb header */
+
+#define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
+#define HASH_TABLESIZE (1 << HASH_LOG)
+#define HASH_MASK (HASH_TABLESIZE - 1)
+
+#define KNUTH 2654435761
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BLOCKSIZE (128 KB)                 /* define, for static allocation */
+
+#define WORKPLACESIZE (BLOCKSIZE*3)
+#define MINMATCH 4
+#define MLbits   7
+#define LLbits   6
+#define Offbits  5
+#define MaxML  ((1<<MLbits )-1)
+#define MaxLL  ((1<<LLbits )-1)
+#define MaxOff ((1<<Offbits)-1)
+#define LitFSELog  11
+#define MLFSELog   10
+#define LLFSELog   10
+#define OffFSELog   9
+#define MAX(a,b) ((a)<(b)?(b):(a))
+#define MaxSeq MAX(MaxLL, MaxML)
+
+#define LITERAL_NOENTROPY 63
+#define COMMAND_NOENTROPY 7   /* to remove */
+
+static const size_t ZSTD_blockHeaderSize = 3;
+static const size_t ZSTD_frameHeaderSize = 4;
+
+
+/********************************************************
+*  Memory operations
+*********************************************************/
+static unsigned ZSTD_32bits(void) { return sizeof(void*)==4; }
+
+static unsigned ZSTD_isLittleEndian(void)
+{
+    const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+static U16    ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; }
+
+static U32    ZSTD_read32(const void* p) { U32 r; memcpy(&r, p, sizeof(r)); return r; }
+
+static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+
+#define COPY8(d,s)    { ZSTD_copy8(d,s); d+=8; s+=8; }
+
+static void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
+{
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+    while (op < oend) COPY8(op, ip);
+}
+
+static U16 ZSTD_readLE16(const void* memPtr)
+{
+    if (ZSTD_isLittleEndian()) return ZSTD_read16(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)((U16)p[0] + ((U16)p[1]<<8));
+    }
+}
+
+
+static U32 ZSTD_readLE32(const void* memPtr)
+{
+    if (ZSTD_isLittleEndian())
+        return ZSTD_read32(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+static U32 ZSTD_readBE32(const void* memPtr)
+{
+    const BYTE* p = (const BYTE*)memPtr;
+    return (U32)(((U32)p[0]<<24) + ((U32)p[1]<<16) + ((U32)p[2]<<8) + ((U32)p[3]<<0));
+}
+
+
+/**************************************
+*  Local structures
+***************************************/
+typedef struct ZSTD_Cctx_s ZSTD_Cctx;
+
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+typedef struct {
+    void* buffer;
+    U32*  offsetStart;
+    U32*  offset;
+    BYTE* offCodeStart;
+    BYTE* offCode;
+    BYTE* litStart;
+    BYTE* lit;
+    BYTE* litLengthStart;
+    BYTE* litLength;
+    BYTE* matchLengthStart;
+    BYTE* matchLength;
+    BYTE* dumpsStart;
+    BYTE* dumps;
+} seqStore_t;
+
+
+typedef struct ZSTD_Cctx_s
+{
+    const BYTE* base;
+    U32 current;
+    U32 nextUpdate;
+    seqStore_t seqStore;
+#ifdef __AVX2__
+    __m256i hashTable[HASH_TABLESIZE>>3];
+#else
+    U32 hashTable[HASH_TABLESIZE];
+#endif
+    BYTE buffer[WORKPLACESIZE];
+} cctxi_t;
+
+
+
+
+/**************************************
+*  Error Management
+**************************************/
+/* published entry point */
+unsigned ZSTDv01_isError(size_t code) { return ERR_isError(code); }
+
+
+/**************************************
+*  Tool functions
+**************************************/
+#define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
+#define ZSTD_VERSION_MINOR    1    /* for new (non-breaking) interface capabilities */
+#define ZSTD_VERSION_RELEASE  3    /* for tweaks, bug-fixes, or development */
+#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+
+/**************************************************************
+*   Decompression code
+**************************************************************/
+
+size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = (const BYTE* const)src;
+    BYTE headerFlags;
+    U32 cSize;
+
+    if (srcSize < 3) return ERROR(srcSize_wrong);
+
+    headerFlags = *in;
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+
+    bpPtr->blockType = (blockType_t)(headerFlags >> 6);
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+
+static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+static size_t ZSTD_decompressLiterals(void* ctx,
+                                      void* dst, size_t maxDstSize,
+                                const void* src, size_t srcSize)
+{
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + maxDstSize;
+    const BYTE* ip = (const BYTE*)src;
+    size_t errorCode;
+    size_t litSize;
+
+    /* check : minimum 2, for litSize, +1, for content */
+    if (srcSize <= 3) return ERROR(corruption_detected);
+
+    litSize = ip[1] + (ip[0]<<8);
+    litSize += ((ip[-3] >> 3) & 7) << 16;   // mmmmh....
+    op = oend - litSize;
+
+    (void)ctx;
+    if (litSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2);
+    if (FSE_isError(errorCode)) return ERROR(GENERIC);
+    return litSize;
+}
+
+
+size_t ZSTDv01_decodeLiteralsBlock(void* ctx,
+                                void* dst, size_t maxDstSize,
+                          const BYTE** litStart, size_t* litSize,
+                          const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const oend = ostart + maxDstSize;
+    blockProperties_t litbp;
+
+    size_t litcSize = ZSTDv01_getcBlockSize(src, srcSize, &litbp);
+    if (ZSTDv01_isError(litcSize)) return litcSize;
+    if (litcSize > srcSize - ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    ip += ZSTD_blockHeaderSize;
+
+    switch(litbp.blockType)
+    {
+    case bt_raw:
+        *litStart = ip;
+        ip += litcSize;
+        *litSize = litcSize;
+        break;
+    case bt_rle:
+        {
+            size_t rleSize = litbp.origSize;
+            if (rleSize>maxDstSize) return ERROR(dstSize_tooSmall);
+            if (!srcSize) return ERROR(srcSize_wrong);
+            memset(oend - rleSize, *ip, rleSize);
+            *litStart = oend - rleSize;
+            *litSize = rleSize;
+            ip++;
+            break;
+        }
+    case bt_compressed:
+        {
+            size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize);
+            if (ZSTDv01_isError(decodedLitSize)) return decodedLitSize;
+            *litStart = oend - decodedLitSize;
+            *litSize = decodedLitSize;
+            ip += litcSize;
+            break;
+        }
+    case bt_end:
+    default:
+        return ERROR(GENERIC);
+    }
+
+    return ip-istart;
+}
+
+
+size_t ZSTDv01_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
+                         FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
+                         const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    const BYTE* const iend = istart + srcSize;
+    U32 LLtype, Offtype, MLtype;
+    U32 LLlog, Offlog, MLlog;
+    size_t dumpsLength;
+
+    /* check */
+    if (srcSize < 5) return ERROR(srcSize_wrong);
+
+    /* SeqHead */
+    *nbSeq = ZSTD_readLE16(ip); ip+=2;
+    LLtype  = *ip >> 6;
+    Offtype = (*ip >> 4) & 3;
+    MLtype  = (*ip >> 2) & 3;
+    if (*ip & 2)
+    {
+        dumpsLength  = ip[2];
+        dumpsLength += ip[1] << 8;
+        ip += 3;
+    }
+    else
+    {
+        dumpsLength  = ip[1];
+        dumpsLength += (ip[0] & 1) << 8;
+        ip += 2;
+    }
+    *dumpsPtr = ip;
+    ip += dumpsLength;
+    *dumpsLengthPtr = dumpsLength;
+
+    /* check */
+    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
+
+    /* sequences */
+    {
+        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL and MaxOff */
+        size_t headerSize;
+
+        /* Build DTables */
+        switch(LLtype)
+        {
+        case bt_rle :
+            LLlog = 0;
+            FSE_buildDTable_rle(DTableLL, *ip++); break;
+        case bt_raw :
+            LLlog = LLbits;
+            FSE_buildDTable_raw(DTableLL, LLbits); break;
+        default :
+            {   U32 max = MaxLL;
+                headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
+                if (FSE_isError(headerSize)) return ERROR(GENERIC);
+                if (LLlog > LLFSELog) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSE_buildDTable(DTableLL, norm, max, LLlog);
+        }   }
+
+        switch(Offtype)
+        {
+        case bt_rle :
+            Offlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableOffb, *ip++); break;
+        case bt_raw :
+            Offlog = Offbits;
+            FSE_buildDTable_raw(DTableOffb, Offbits); break;
+        default :
+            {   U32 max = MaxOff;
+                headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
+                if (FSE_isError(headerSize)) return ERROR(GENERIC);
+                if (Offlog > OffFSELog) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSE_buildDTable(DTableOffb, norm, max, Offlog);
+        }   }
+
+        switch(MLtype)
+        {
+        case bt_rle :
+            MLlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableML, *ip++); break;
+        case bt_raw :
+            MLlog = MLbits;
+            FSE_buildDTable_raw(DTableML, MLbits); break;
+        default :
+            {   U32 max = MaxML;
+                headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
+                if (FSE_isError(headerSize)) return ERROR(GENERIC);
+                if (MLlog > MLFSELog) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSE_buildDTable(DTableML, norm, max, MLlog);
+    }   }   }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t offset;
+    size_t matchLength;
+} seq_t;
+
+typedef struct {
+    FSE_DStream_t DStream;
+    FSE_DState_t stateLL;
+    FSE_DState_t stateOffb;
+    FSE_DState_t stateML;
+    size_t prevOffset;
+    const BYTE* dumps;
+    const BYTE* dumpsEnd;
+} seqState_t;
+
+
+static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
+{
+    size_t litLength;
+    size_t prevOffset;
+    size_t offset;
+    size_t matchLength;
+    const BYTE* dumps = seqState->dumps;
+    const BYTE* const de = seqState->dumpsEnd;
+
+    /* Literal length */
+    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
+    prevOffset = litLength ? seq->offset : seqState->prevOffset;
+    seqState->prevOffset = seq->offset;
+    if (litLength == MaxLL)
+    {
+        U32 add = dumps<de ? *dumps++ : 0;
+        if (add < 255) litLength += add;
+        else
+        {
+            if (dumps<=(de-3))
+            {
+                litLength = ZSTD_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+                dumps += 3;
+            }
+        }
+    }
+
+    /* Offset */
+    {
+        U32 offsetCode, nbBits;
+        offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));
+        if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
+        nbBits = offsetCode - 1;
+        if (offsetCode==0) nbBits = 0;   /* cmove */
+        offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits);
+        if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
+        if (offsetCode==0) offset = prevOffset;
+    }
+
+    /* MatchLength */
+    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
+    if (matchLength == MaxML)
+    {
+        U32 add = dumps<de ? *dumps++ : 0;
+        if (add < 255) matchLength += add;
+        else
+        {
+            if (dumps<=(de-3))
+            {
+                matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+                dumps += 3;
+            }
+        }
+    }
+    matchLength += MINMATCH;
+
+    /* save result */
+    seq->litLength = litLength;
+    seq->offset = offset;
+    seq->matchLength = matchLength;
+    seqState->dumps = dumps;
+}
+
+
+static size_t ZSTD_execSequence(BYTE* op,
+                                seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit,
+                                BYTE* const base, BYTE* const oend)
+{
+    static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
+    static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* substracted */
+    const BYTE* const ostart = op;
+    const size_t litLength = sequence.litLength;
+    BYTE* const endMatch = op + litLength + sequence.matchLength;    /* risk : address space overflow (32-bits) */
+    const BYTE* const litEnd = *litPtr + litLength;
+
+    /* check */
+    if (endMatch > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
+    if (litEnd > litLimit) return ERROR(corruption_detected);
+    if (sequence.matchLength > (size_t)(*litPtr-op))  return ERROR(dstSize_tooSmall);    /* overwrite literal segment */
+
+    /* copy Literals */
+    if (((size_t)(*litPtr - op) < 8) || ((size_t)(oend-litEnd) < 8) || (op+litLength > oend-8))
+        memmove(op, *litPtr, litLength);   /* overwrite risk */
+    else
+        ZSTD_wildcopy(op, *litPtr, litLength);
+    op += litLength;
+    *litPtr = litEnd;   /* update for next sequence */
+
+    /* check : last match must be at a minimum distance of 8 from end of dest buffer */
+    if (oend-op < 8) return ERROR(dstSize_tooSmall);
+
+    /* copy Match */
+    {
+        const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12);
+        const BYTE* match = op - sequence.offset;            /* possible underflow at op - offset ? */
+        size_t qutt = 12;
+        U64 saved[2];
+
+        /* check */
+        if (match < base) return ERROR(corruption_detected);
+        if (sequence.offset > (size_t)base) return ERROR(corruption_detected);
+
+        /* save beginning of literal sequence, in case of write overlap */
+        if (overlapRisk)
+        {
+            if ((endMatch + qutt) > oend) qutt = oend-endMatch;
+            memcpy(saved, endMatch, qutt);
+        }
+
+        if (sequence.offset < 8)
+        {
+            const int dec64 = dec64table[sequence.offset];
+            op[0] = match[0];
+            op[1] = match[1];
+            op[2] = match[2];
+            op[3] = match[3];
+            match += dec32table[sequence.offset];
+            ZSTD_copy4(op+4, match);
+            match -= dec64;
+        } else { ZSTD_copy8(op, match); }
+        op += 8; match += 8;
+
+        if (endMatch > oend-(16-MINMATCH))
+        {
+            if (op < oend-8)
+            {
+                ZSTD_wildcopy(op, match, (oend-8) - op);
+                match += (oend-8) - op;
+                op = oend-8;
+            }
+            while (op<endMatch) *op++ = *match++;
+        }
+        else
+            ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
+
+        /* restore, in case of overlap */
+        if (overlapRisk) memcpy(endMatch, saved, qutt);
+    }
+
+    return endMatch-ostart;
+}
+
+typedef struct ZSTDv01_Dctx_s
+{
+    U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
+    U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
+    U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
+    void* previousDstEnd;
+    void* base;
+    size_t expected;
+    blockType_t bType;
+    U32 phase;
+} dctx_t;
+
+
+static size_t ZSTD_decompressSequences(
+                               void* ctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize,
+                         const BYTE* litStart, size_t litSize)
+{
+    dctx_t* dctx = (dctx_t*)ctx;
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t errorCode, dumpsLength;
+    const BYTE* litPtr = litStart;
+    const BYTE* const litEnd = litStart + litSize;
+    int nbSeq;
+    const BYTE* dumps;
+    U32* DTableLL = dctx->LLTable;
+    U32* DTableML = dctx->MLTable;
+    U32* DTableOffb = dctx->OffTable;
+    BYTE* const base = (BYTE*) (dctx->base);
+
+    /* Build Decoding Tables */
+    errorCode = ZSTDv01_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
+                                      DTableLL, DTableML, DTableOffb,
+                                      ip, iend-ip);
+    if (ZSTDv01_isError(errorCode)) return errorCode;
+    ip += errorCode;
+
+    /* Regen sequences */
+    {
+        seq_t sequence;
+        seqState_t seqState;
+
+        memset(&sequence, 0, sizeof(sequence));
+        seqState.dumps = dumps;
+        seqState.dumpsEnd = dumps + dumpsLength;
+        seqState.prevOffset = 1;
+        errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip);
+        if (FSE_isError(errorCode)) return ERROR(corruption_detected);
+        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
+        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
+        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
+
+        for ( ; (FSE_reloadDStream(&(seqState.DStream)) <= FSE_DStream_completed) && (nbSeq>0) ; )
+        {
+            size_t oneSeqSize;
+            nbSeq--;
+            ZSTD_decodeSequence(&sequence, &seqState);
+            oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend);
+            if (ZSTDv01_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* check if reached exact end */
+        if ( !FSE_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected);   /* requested too much : data is corrupted */
+        if (nbSeq<0) return ERROR(corruption_detected);   /* requested too many sequences : data is corrupted */
+
+        /* last literal segment */
+        {
+            size_t lastLLSize = litEnd - litPtr;
+            if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
+            if (op != litPtr) memmove(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+
+static size_t ZSTD_decompressBlock(
+                            void* ctx,
+                            void* dst, size_t maxDstSize,
+                      const void* src, size_t srcSize)
+{
+    /* blockType == blockCompressed, srcSize is trusted */
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* litPtr = NULL;
+    size_t litSize = 0;
+    size_t errorCode;
+
+    /* Decode literals sub-block */
+    errorCode = ZSTDv01_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize);
+    if (ZSTDv01_isError(errorCode)) return errorCode;
+    ip += errorCode;
+    srcSize -= errorCode;
+
+    return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize);
+}
+
+
+size_t ZSTDv01_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* iend = ip + srcSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t remainingSize = srcSize;
+    U32 magicNumber;
+    size_t errorCode=0;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    magicNumber = ZSTD_readBE32(src);
+    if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t blockSize = ZSTDv01_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTDv01_isError(blockSize)) return blockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (blockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize);
+            break;
+        case bt_raw :
+            errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet supported */
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return ERROR(srcSize_wrong);
+            break;
+        default:
+            return ERROR(GENERIC);
+        }
+        if (blockSize == 0) break;   /* bt_end */
+
+        if (ZSTDv01_isError(errorCode)) return errorCode;
+        op += errorCode;
+        ip += blockSize;
+        remainingSize -= blockSize;
+    }
+
+    return op-ostart;
+}
+
+size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    dctx_t ctx;
+    ctx.base = dst;
+    return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
+}
+
+size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    size_t remainingSize = srcSize;
+    U32 magicNumber;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    magicNumber = ZSTD_readBE32(src);
+    if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties);
+        if (ZSTDv01_isError(blockSize)) return blockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (blockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        if (blockSize == 0) break;   /* bt_end */
+
+        ip += blockSize;
+        remainingSize -= blockSize;
+    }
+
+    return ip - (const BYTE*)src;
+}
+
+/*******************************
+*  Streaming Decompression API
+*******************************/
+
+size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx)
+{
+    dctx->expected = ZSTD_frameHeaderSize;
+    dctx->phase = 0;
+    dctx->previousDstEnd = NULL;
+    dctx->base = NULL;
+    return 0;
+}
+
+ZSTDv01_Dctx* ZSTDv01_createDCtx(void)
+{
+    ZSTDv01_Dctx* dctx = (ZSTDv01_Dctx*)malloc(sizeof(ZSTDv01_Dctx));
+    if (dctx==NULL) return NULL;
+    ZSTDv01_resetDCtx(dctx);
+    return dctx;
+}
+
+size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx)
+{
+    free(dctx);
+    return 0;
+}
+
+size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx)
+{
+    return ((dctx_t*)dctx)->expected;
+}
+
+size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    dctx_t* ctx = (dctx_t*)dctx;
+
+    /* Sanity check */
+    if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
+    if (dst != ctx->previousDstEnd)  /* not contiguous */
+        ctx->base = dst;
+
+    /* Decompress : frame header */
+    if (ctx->phase == 0)
+    {
+        /* Check frame magic header */
+        U32 magicNumber = ZSTD_readBE32(src);
+        if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        return 0;
+    }
+
+    /* Decompress : block header */
+    if (ctx->phase == 1)
+    {
+        blockProperties_t bp;
+        size_t blockSize = ZSTDv01_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+        if (ZSTDv01_isError(blockSize)) return blockSize;
+        if (bp.blockType == bt_end)
+        {
+            ctx->expected = 0;
+            ctx->phase = 0;
+        }
+        else
+        {
+            ctx->expected = blockSize;
+            ctx->bType = bp.blockType;
+            ctx->phase = 2;
+        }
+
+        return 0;
+    }
+
+    /* Decompress : block content */
+    {
+        size_t rSize;
+        switch(ctx->bType)
+        {
+        case bt_compressed:
+            rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
+            break;
+        case bt_raw :
+            rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet handled */
+            break;
+        case bt_end :   /* should never happen (filtered at phase 1) */
+            rSize = 0;
+            break;
+        default:
+            return ERROR(GENERIC);
+        }
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
+        return rSize;
+    }
+
+}
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v01.h b/contrib/libzstd/include/zstd/legacy/zstd_v01.h
new file mode 100644
index 00000000000..13cb3acfdc3
--- /dev/null
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v01.h
@@ -0,0 +1,88 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+#ifndef ZSTD_V01_H_28739879432
+#define ZSTD_V01_H_28739879432
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv01_decompress() : decompress ZSTD frames compliant with v0.1.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv01_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+/**
+ZSTDv01_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.1.x format
+    compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+    return : the number of bytes that would be read to decompress this frame
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t compressedSize);
+
+/**
+ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error
+*/
+unsigned ZSTDv01_isError(size_t code);
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv01_Dctx_s ZSTDv01_Dctx;
+ZSTDv01_Dctx* ZSTDv01_createDCtx(void);
+size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx);
+
+size_t ZSTDv01_decompressDCtx(void* ctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
+
+/* *************************************
+*  Streaming functions
+***************************************/
+size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx);
+
+size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx);
+size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv01_magicNumber   0xFD2FB51E   /* Big Endian version */
+#define ZSTDv01_magicNumberLE 0x1EB52FFD   /* Little Endian version */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_V01_H_28739879432 */
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v02.c b/contrib/libzstd/include/zstd/legacy/zstd_v02.c
new file mode 100644
index 00000000000..3cf8f477825
--- /dev/null
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v02.c
@@ -0,0 +1,3555 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include "zstd_v02.h"
+#include "error_private.h"
+
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined(_MSC_VER)   /* Visual Studio */
+#   include <stdlib.h>  /* _byteswap_ulong */
+#   include <intrin.h>  /* _byteswap_* */
+#endif
+
+
+/* ******************************************************************
+   mem.h
+   low-level memory access routines
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/******************************************
+*  Includes
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include <string.h>    /* memcpy */
+
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined(__GNUC__)
+#  define MEM_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define MEM_STATIC static inline
+#elif defined(_MSC_VER)
+#  define MEM_STATIC static __inline
+#else
+#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/****************************************************************
+*  Basic Types
+*****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef  int16_t S16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+  typedef  int64_t S64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef   signed short      S16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+  typedef   signed long long  S64;
+#endif
+
+
+/****************************************************************
+*  Memory I/O
+*****************************************************************/
+/* MEM_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets generating assembly depending on alignment.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define MEM_FORCE_MEMORY_ACCESS 2
+#  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define MEM_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; }
+MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; }
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
+
+/* violates C standard on structure alignment.
+Only use if no other choice to achieve best performance on target platform */
+MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
+MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
+MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+
+#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
+
+MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+
+#else
+
+/* default method, safe and standard.
+   can sometimes prove slower */
+
+MEM_STATIC U16 MEM_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U32 MEM_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U64 MEM_read64(const void* memPtr)
+{
+    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif // MEM_FORCE_MEMORY_ACCESS
+
+
+MEM_STATIC U16 MEM_readLE16(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read16(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
+{
+    if (MEM_isLittleEndian())
+    {
+        MEM_write16(memPtr, val);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val;
+        p[1] = (BYTE)(val>>8);
+    }
+}
+
+MEM_STATIC U32 MEM_readLE32(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read32(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+
+MEM_STATIC U64 MEM_readLE64(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read64(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
+                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
+    }
+}
+
+
+MEM_STATIC size_t MEM_readLEST(const void* memPtr)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_readLE32(memPtr);
+    else
+        return (size_t)MEM_readLE64(memPtr);
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* MEM_H_MODULE */
+
+
+/* ******************************************************************
+   bitstream
+   Part of NewGen Entropy library
+   header file (to include)
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef BITSTREAM_H_MODULE
+#define BITSTREAM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*
+*  This API consists of small unitary functions, which highly benefit from being inlined.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+
+/**********************************************
+*  bitStream decompression API (read backward)
+**********************************************/
+typedef struct
+{
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} BIT_DStream_t;
+
+typedef enum { BIT_DStream_unfinished = 0,
+               BIT_DStream_endOfBuffer = 1,
+               BIT_DStream_completed = 2,
+               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
+
+
+/*
+* Start by invoking BIT_initDStream().
+* A chunk of the bitStream is then stored into a local register.
+* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+* You can then retrieve bitFields stored into the local register, **in reverse order**.
+* Local register is manually filled from memory by the BIT_reloadDStream() method.
+* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BIT_DStream_unfinished.
+* Otherwise, it can be less than that, so proceed accordingly.
+* Checking if DStream has reached its end can be performed with BIT_endOfDStream()
+*/
+
+
+/******************************************
+*  unsafe API
+******************************************/
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/****************************************************************
+*  Helper functions
+****************************************************************/
+MEM_STATIC unsigned BIT_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    unsigned r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
+
+/**********************************************************
+* bitStream decoding
+**********************************************************/
+
+/*!BIT_initDStream
+*  Initialize a BIT_DStream_t.
+*  @bitD : a pointer to an already allocated BIT_DStream_t structure
+*  @srcBuffer must point at the beginning of a bitStream
+*  @srcSize must be the exact size of the bitStream
+*  @result : size of stream (== srcSize) or an errorCode if a problem is detected
+*/
+MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
+    if (srcSize >=  sizeof(size_t))   /* normal case */
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+    }
+    else
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
+            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
+            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
+            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
+            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
+            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8;
+            default:;
+        }
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+/*!BIT_lookBits
+ * Provides next n bits from local register
+ * local register is not modified (bits are still present for next read/look)
+ * On 32-bits, maxNbBits==25
+ * On 64-bits, maxNbBits==57
+ * @return : value extracted
+ */
+MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+}
+
+/*! BIT_lookBitsFast :
+*   unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+}
+
+MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+/*!BIT_readBits
+ * Read next n bits from local register.
+ * pay attention to not read more than nbBits contained into local register.
+ * @return : extracted value.
+ */
+MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BIT_lookBits(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*!BIT_readBitsFast :
+*  unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BIT_lookBitsFast(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
+{
+    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
+        return BIT_DStream_overflow;
+
+    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
+    {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        return BIT_DStream_unfinished;
+    }
+    if (bitD->ptr == bitD->start)
+    {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
+        return BIT_DStream_completed;
+    }
+    {
+        U32 nbBytes = bitD->bitsConsumed >> 3;
+        BIT_DStream_status result = BIT_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start)
+        {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = BIT_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        return result;
+    }
+}
+
+/*! BIT_endOfDStream
+*   @return Tells if DStream has reached its exact end
+*/
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
+{
+    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* BITSTREAM_H_MODULE */
+/* ******************************************************************
+   Error codes and messages
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define ERR_STATIC static inline
+#elif defined(_MSC_VER)
+#  define ERR_STATIC static __inline
+#elif defined(__GNUC__)
+#  define ERR_STATIC static __attribute__((unused))
+#else
+#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/******************************************
+*  Error Management
+******************************************/
+#define PREFIX(name) ZSTD_error_##name
+
+#define ERROR(name) (size_t)-PREFIX(name)
+
+#define ERROR_LIST(ITEM) \
+        ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \
+        ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \
+        ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \
+        ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \
+        ITEM(PREFIX(maxCode))
+
+#define ERROR_GENERATE_ENUM(ENUM) ENUM,
+typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
+
+#define ERROR_CONVERTTOSTRING(STRING) #STRING,
+#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR)
+static const char* ERR_strings[] = { ERROR_LIST(ERROR_GENERATE_STRING) };
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
+ERR_STATIC const char* ERR_getErrorName(size_t code)
+{
+    static const char* codeError = "Unspecified error code";
+    if (ERR_isError(code)) return ERR_strings[-(int)(code)];
+    return codeError;
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_H_MODULE */
+/*
+Constructor and Destructor of type FSE_CTable
+    Note that its size depends on 'tableLog' and 'maxSymbolValue' */
+typedef unsigned FSE_CTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+
+
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/******************************************
+*  Static allocation
+******************************************/
+/* FSE buffer bounds */
+#define FSE_NCOUNTBOUND 512
+#define FSE_BLOCKBOUND(size) (size + (size>>7))
+#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+
+
+/******************************************
+*  FSE advanced API
+******************************************/
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
+/* build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
+
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
+/* build a fake FSE_DTable, designed to always generate the same symbolValue */
+
+
+/******************************************
+*  FSE symbol decompression API
+******************************************/
+typedef struct
+{
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+
+static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
+
+static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+
+static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
+
+/*
+Let's now decompose FSE_decompress_usingDTable() into its unitary components.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BIT_DStream_t DStream;    // Stream context
+FSE_DState_t  DState;     // State context. Multiple ones are possible
+FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BIT_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSE_reloadDStream(&DStream);
+
+BIT_reloadDStream() result tells if there is still some more data to read from DStream.
+BIT_DStream_unfinished : there is still some data left into the DStream.
+BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BIT_reloadDStream(&DStream) >= BIT_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BIT_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSE_endOfDState(&DState);
+*/
+
+
+/******************************************
+*  FSE unsafe API
+******************************************/
+static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/******************************************
+*  Implementation of inline functions
+******************************************/
+
+/* decompression */
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSE_DTableHeader;   /* sizeof U32 */
+
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSE_decode_t;   /* size == U32 */
+
+MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
+{
+    FSE_DTableHeader DTableH;
+    memcpy(&DTableH, dt, sizeof(DTableH));
+    DStatePtr->state = BIT_readBits(bitD, DTableH.tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32  nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BIT_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32 nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BIT_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/******************************************
+*  Static allocation macros
+******************************************/
+/* Huff0 buffer bounds */
+#define HUF_CTABLEBOUND 129
+#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true if incompressible pre-filtered with fast heuristic */
+#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* static allocation of Huff0's DTable */
+#define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<maxTableLog))  /* nb Cells; use unsigned short for X2, unsigned int for X4 */
+#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+        unsigned short DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
+        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUF_CREATE_STATIC_DTABLEX6(DTable, maxTableLog) \
+        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog) * 3 / 2] = { maxTableLog }
+
+
+/******************************************
+*  Advanced functions
+******************************************/
+static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbols decoder */
+static size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* quad-symbols decoder */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+/*
+    zstd - standard compression library
+    Header File
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Version
+***************************************/
+#define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
+#define ZSTD_VERSION_MINOR    2    /* for new (non-breaking) interface capabilities */
+#define ZSTD_VERSION_RELEASE  2    /* for tweaks, bug-fixes, or development */
+#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTD_CCtx_s ZSTD_CCtx;   /* incomplete type */
+
+#if defined (__cplusplus)
+}
+#endif
+/*
+    zstd - standard compression library
+    Header File for static linking only
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* The objects defined into this file should be considered experimental.
+ * They are not labelled stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risk of future changes.
+ */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Streaming functions
+***************************************/
+
+typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+
+/*
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTD_magicNumber 0xFD2FB522   /* v0.2 (current)*/
+
+
+#if defined (__cplusplus)
+}
+#endif
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/****************************************************************
+*  Tuning parameters
+****************************************************************/
+/* MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSE_MAX_MEMORY_USAGE 14
+#define FSE_DEFAULT_MEMORY_USAGE 13
+
+/* FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSE_MAX_SYMBOL_VALUE 255
+
+
+/****************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+
+
+/****************************************************************
+*  Byte symbol type
+****************************************************************/
+#endif   /* !FSE_COMMONDEFS_ONLY */
+
+
+/****************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#    ifdef __GNUC__
+#      define FORCE_INLINE static inline __attribute__((always_inline))
+#    else
+#      define FORCE_INLINE static inline
+#    endif
+#  else
+#    define FORCE_INLINE static
+#  endif /* __STDC_VERSION__ */
+#endif
+
+
+/****************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+/****************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+
+/****************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/****************************************************************
+*  Complex types
+****************************************************************/
+typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+
+/****************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+/* Function templates */
+
+#define FSE_DECODE_TYPE FSE_decode_t
+
+static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
+
+static size_t FSE_buildDTable
+(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    void* ptr = dt+1;
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)ptr;
+    FSE_DTableHeader DTableH;
+    const U32 tableSize = 1 << tableLog;
+    const U32 tableMask = tableSize-1;
+    const U32 step = FSE_tableStep(tableSize);
+    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
+    U32 position = 0;
+    U32 highThreshold = tableSize-1;
+    const S16 largeLimit= (S16)(1 << (tableLog-1));
+    U32 noLarge = 1;
+    U32 s;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    DTableH.tableLog = (U16)tableLog;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        if (normalizedCounter[s]==-1)
+        {
+            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+            symbolNext[s] = 1;
+        }
+        else
+        {
+            if (normalizedCounter[s] >= largeLimit) noLarge=0;
+            symbolNext[s] = normalizedCounter[s];
+        }
+    }
+
+    /* Spread symbols */
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        int i;
+        for (i=0; i<normalizedCounter[s]; i++)
+        {
+            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+            position = (position + step) & tableMask;
+            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }
+    }
+
+    if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+
+    /* Build Decoding table */
+    {
+        U32 i;
+        for (i=0; i<tableSize; i++)
+        {
+            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[i].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) );
+            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
+        }
+    }
+
+    DTableH.fastMode = (U16)noLarge;
+    memcpy(dt, &DTableH, sizeof(DTableH));   /* memcpy(), to avoid strict aliasing warnings */
+    return 0;
+}
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+/******************************************
+*  FSE helper functions
+******************************************/
+static unsigned FSE_isError(size_t code) { return ERR_isError(code); }
+
+
+/****************************************************************
+*  FSE NCount encoding-decoding
+****************************************************************/
+static short FSE_abs(short a)
+{
+    return (short)(a<0 ? -a : a);
+}
+
+static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    if (hbSize < 4) return ERROR(srcSize_wrong);
+    bitStream = MEM_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr))
+    {
+        if (previous0)
+        {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF)
+            {
+                n0+=24;
+                if (ip < iend-5)
+                {
+                    ip+=2;
+                    bitStream = MEM_readLE32(ip) >> bitCount;
+                }
+                else
+                {
+                    bitStream >>= 16;
+                    bitCount+=16;
+                }
+            }
+            while ((bitStream & 3) == 3)
+            {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+            {
+                ip += bitCount>>3;
+                bitCount &= 7;
+                bitStream = MEM_readLE32(ip) >> bitCount;
+            }
+            else
+                bitStream >>= 2;
+        }
+        {
+            const short max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max)
+            {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            }
+            else
+            {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSE_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold)
+            {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            {
+                if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+                {
+                    ip += bitCount>>3;
+                    bitCount &= 7;
+                }
+                else
+                {
+                    bitCount -= (int)(8 * (iend - 4 - ip));
+                    ip = iend - 4;
+                }
+                bitStream = MEM_readLE32(ip) >> (bitCount & 31);
+            }
+        }
+    }
+    if (remaining != 1) return ERROR(GENERIC);
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong);
+    return ip-istart;
+}
+
+
+/*********************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSE_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    BIT_DStream_t bitD;
+    FSE_DState_t state1;
+    FSE_DState_t state2;
+    size_t errorCode;
+
+    /* Init */
+    errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+    if (FSE_isError(errorCode)) return errorCode;
+
+    FSE_initDState(&state1, &bitD, dt);
+    FSE_initDState(&state2, &bitD, dt);
+
+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op<olimit) ; op+=4)
+    {
+        op[0] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[1] = FSE_GETSYMBOL(&state2);
+
+        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[3] = FSE_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
+    while (1)
+    {
+        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state1);
+
+        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state2);
+    }
+
+    /* end ? */
+    if (BIT_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
+        return op-ostart;
+
+    if (op==omax) return ERROR(dstSize_tooSmall);   /* dst buffer is full, but cSrc unfinished */
+
+    return ERROR(corruption_detected);
+}
+
+
+static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSE_DTable* dt)
+{
+    FSE_DTableHeader DTableH;
+    memcpy(&DTableH, dt, sizeof(DTableH));
+
+    /* select fast mode (static) */
+    if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSE_MAX_SYMBOL_VALUE+1];
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    size_t errorCode;
+
+    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */
+
+    /* normal FSE decoding mode */
+    errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return errorCode;
+
+    /* always return, even if it is an error code */
+    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
+}
+
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+/****************************************************************
+*  Compiler specifics
+****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+/* inline is defined */
+#elif defined(_MSC_VER)
+#  define inline __inline
+#else
+#  define inline /* disable inline */
+#endif
+
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#endif
+
+
+/****************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+/****************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/******************************************
+*  Helper functions
+******************************************/
+static unsigned HUF_isError(size_t code) { return ERR_isError(code); }
+
+#define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#define HUF_MAX_TABLELOG  12           /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
+#define HUF_DEFAULT_TABLELOG  HUF_MAX_TABLELOG   /* tableLog by default, when not specified */
+#define HUF_MAX_SYMBOL_VALUE 255
+#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
+#  error "HUF_MAX_TABLELOG is too large !"
+#endif
+
+
+
+/*********************************************************
+*  Huff0 : Huffman block decompression
+*********************************************************/
+typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2;   /* single-symbol decoding */
+
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4;  /* double-symbols decoding */
+
+typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+
+/*! HUF_readStats
+    Read compact Huffman tree, saved by HUF_writeCTable
+    @huffWeight : destination buffer
+    @return : size read from `src`
+*/
+static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                            U32* nbSymbolsPtr, U32* tableLogPtr,
+                            const void* src, size_t srcSize)
+{
+    U32 weightTotal;
+    U32 tableLog;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize;
+    size_t oSize;
+    U32 n;
+
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
+    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128)  /* special header */
+    {
+        if (iSize >= (242))   /* RLE */
+        {
+            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
+            oSize = l[iSize-242];
+            memset(huffWeight, 1, hwSize);
+            iSize = 0;
+        }
+        else   /* Incompressible */
+        {
+            oSize = iSize - 127;
+            iSize = ((oSize+1)/2);
+            if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+            if (oSize >= hwSize) return ERROR(corruption_detected);
+            ip += 1;
+            for (n=0; n<oSize; n+=2)
+            {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+            }
+        }
+    }
+    else  /* header compressed with FSE (normal case) */
+    {
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
+        if (FSE_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32));
+    weightTotal = 0;
+    for (n=0; n<oSize; n++)
+    {
+        if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+        rankStats[huffWeight[n]]++;
+        weightTotal += (1 << huffWeight[n]) >> 1;
+    }
+    if (weightTotal == 0) return ERROR(corruption_detected);
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    tableLog = BIT_highbit32(weightTotal) + 1;
+    if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+    {
+        U32 total = 1 << tableLog;
+        U32 rest = total - weightTotal;
+        U32 verif = 1 << BIT_highbit32(rest);
+        U32 lastWeight = BIT_highbit32(rest) + 1;
+        if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+        huffWeight[oSize] = (BYTE)lastWeight;
+        rankStats[lastWeight]++;
+    }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32)(oSize+1);
+    *tableLogPtr = tableLog;
+    return iSize+1;
+}
+
+
+/**************************/
+/* single-symbol decoding */
+/**************************/
+
+static size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
+{
+    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    U32 nbSymbols = 0;
+    U32 n;
+    U32 nextRankStart;
+    void* ptr = DTable+1;
+    HUF_DEltX2* const dt = (HUF_DEltX2*)ptr;
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16));   /* if compilation fails here, assertion is false */
+    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge);   /* DTable is too small */
+    DTable[0] = (U16)tableLog;   /* maybe should separate sizeof DTable, as allocated, from used size of DTable, in case of DTable re-use */
+
+    /* Prepare ranks */
+    nextRankStart = 0;
+    for (n=1; n<=tableLog; n++)
+    {
+        U32 current = nextRankStart;
+        nextRankStart += (rankVal[n] << (n-1));
+        rankVal[n] = current;
+    }
+
+    /* fill DTable */
+    for (n=0; n<nbSymbols; n++)
+    {
+        const U32 w = huffWeight[n];
+        const U32 length = (1 << w) >> 1;
+        U32 i;
+        HUF_DEltX2 D;
+        D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
+        for (i = rankVal[w]; i < rankVal[w] + length; i++)
+            dt[i] = D;
+        rankVal[w] += length;
+    }
+
+    return iSize;
+}
+
+static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+        const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+        const BYTE c = dt[val].byte;
+        BIT_skipBits(Dstream, dt[val].nbBits);
+        return c;
+}
+
+#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+    *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4))
+    {
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    /* no more data to retrieve from bitstream, hence no need to reload */
+    while (p < pEnd)
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    return pEnd-pStart;
+}
+
+
+static size_t HUF_decompress4X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U16* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+
+        const void* ptr = DTable;
+        const HUF_DEltX2* const dt = ((const HUF_DEltX2*)ptr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; )
+        {
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+    size_t errorCode;
+
+    errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    return HUF_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/***************************/
+/* double-symbols decoding */
+/***************************/
+
+static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed,
+                           const U32* rankValOrigin, const int minWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
+                           U32 nbBitsBaseline, U16 baseSeq)
+{
+    HUF_DEltX4 DElt;
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    U32 s;
+
+    /* get pre-calculated rankVal */
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1)
+    {
+        U32 i, skipSize = rankVal[minWeight];
+        MEM_writeLE16(&(DElt.sequence), baseSeq);
+        DElt.nbBits   = (BYTE)(consumed);
+        DElt.length   = 1;
+        for (i = 0; i < skipSize; i++)
+            DTable[i] = DElt;
+    }
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++)   /* note : sortedSymbols already skipped */
+    {
+        const U32 symbol = sortedSymbols[s].symbol;
+        const U32 weight = sortedSymbols[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 length = 1 << (sizeLog-nbBits);
+        const U32 start = rankVal[weight];
+        U32 i = start;
+        const U32 end = start + length;
+
+        MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+        DElt.nbBits = (BYTE)(nbBits + consumed);
+        DElt.length = 2;
+        do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
+
+        rankVal[weight] += length;
+    }
+}
+
+typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1];
+
+static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
+                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32 nbBitsBaseline)
+{
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits  = nbBitsBaseline - maxWeight;
+    U32 s;
+
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++)
+    {
+        const U16 symbol = sortedList[s].symbol;
+        const U32 weight = sortedList[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 start = rankVal[weight];
+        const U32 length = 1 << (targetLog-nbBits);
+
+        if (targetLog-nbBits >= minBits)   /* enough room for a second symbol */
+        {
+            U32 sortedRank;
+            int minWeight = nbBits + scaleLog;
+            if (minWeight < 1) minWeight = 1;
+            sortedRank = rankStart[minWeight];
+            HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
+                           rankValOrigin[nbBits], minWeight,
+                           sortedList+sortedRank, sortedListSize-sortedRank,
+                           nbBitsBaseline, symbol);
+        }
+        else
+        {
+            U32 i;
+            const U32 end = start + length;
+            HUF_DEltX4 DElt;
+
+            MEM_writeLE16(&(DElt.sequence), symbol);
+            DElt.nbBits   = (BYTE)(nbBits);
+            DElt.length   = 1;
+            for (i = start; i < end; i++)
+                DTable[i] = DElt;
+        }
+        rankVal[weight] += length;
+    }
+}
+
+static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1];
+    sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 };
+    U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    rankVal_t rankVal;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    const U32 memLog = DTable[0];
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    void* ptr = DTable;
+    HUF_DEltX4* const dt = ((HUF_DEltX4*)ptr) + 1;
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32));   /* if compilation fails here, assertion is false */
+    if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--)
+        {if (!maxW) return ERROR(GENERIC); }  /* necessarily finds a solution before maxW==0 */
+
+    /* Get start index of each weight */
+    {
+        U32 w, nextRankStart = 0;
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {
+        U32 s;
+        for (s=0; s<nbSymbols; s++)
+        {
+            U32 w = weightList[s];
+            U32 r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {
+        const U32 minBits = tableLog+1 - maxW;
+        U32 nextRankVal = 0;
+        U32 w, consumed;
+        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
+        U32* rankVal0 = rankVal[0];
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankVal;
+            nextRankVal += rankStats[w] << (w+rescale);
+            rankVal0[w] = current;
+        }
+        for (consumed = minBits; consumed <= memLog - minBits; consumed++)
+        {
+            U32* rankValPtr = rankVal[consumed];
+            for (w = 1; w <= maxW; w++)
+            {
+                rankValPtr[w] = rankVal0[w] >> consumed;
+            }
+        }
+    }
+
+    HUF_fillDTableX4(dt, memLog,
+                   sortedSymbol, sizeOfSort,
+                   rankStart0, rankVal, maxW,
+                   tableLog+1);
+
+    return iSize;
+}
+
+
+static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 2);
+    BIT_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
+
+static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 1);
+    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
+    else
+    {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8))
+        {
+            BIT_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+        }
+    }
+    return 1;
+}
+
+
+#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 8 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd-7))
+    {
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-2))
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+
+    while (p <= pEnd-2)
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    if (p < pEnd)
+        p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
+
+    return p-pStart;
+}
+
+
+
+static size_t HUF_decompress4X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+
+        const void* ptr = DTable;
+        const HUF_DEltX4* const dt = ((const HUF_DEltX4*)ptr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; )
+        {
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX4(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/**********************************/
+/* quad-symbol decoding           */
+/**********************************/
+typedef struct { BYTE nbBits; BYTE nbBytes; } HUF_DDescX6;
+typedef union { BYTE byte[4]; U32 sequence; } HUF_DSeqX6;
+
+/* recursive, up to level 3; may benefit from <template>-like strategy to nest each level inline */
+static void HUF_fillDTableX6LevelN(HUF_DDescX6* DDescription, HUF_DSeqX6* DSequence, int sizeLog,
+                           const rankVal_t rankValOrigin, const U32 consumed, const int minWeight, const U32 maxWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, const U32* rankStart,
+                           const U32 nbBitsBaseline, HUF_DSeqX6 baseSeq, HUF_DDescX6 DDesc)
+{
+    const int scaleLog = nbBitsBaseline - sizeLog;   /* note : targetLog >= (nbBitsBaseline-1), hence scaleLog <= 1 */
+    const int minBits  = nbBitsBaseline - maxWeight;
+    const U32 level = DDesc.nbBytes;
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    U32 symbolStartPos, s;
+
+    /* local rankVal, will be modified */
+    memcpy(rankVal, rankValOrigin[consumed], sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1)
+    {
+        U32 i;
+        const U32 skipSize = rankVal[minWeight];
+        for (i = 0; i < skipSize; i++)
+        {
+            DSequence[i] = baseSeq;
+            DDescription[i] = DDesc;
+        }
+    }
+
+    /* fill DTable */
+    DDesc.nbBytes++;
+    symbolStartPos = rankStart[minWeight];
+    for (s=symbolStartPos; s<sortedListSize; s++)
+    {
+        const BYTE symbol = sortedSymbols[s].symbol;
+        const U32  weight = sortedSymbols[s].weight;   /* >= 1 (sorted) */
+        const int  nbBits = nbBitsBaseline - weight;   /* >= 1 (by construction) */
+        const int  totalBits = consumed+nbBits;
+        const U32  start  = rankVal[weight];
+        const U32  length = 1 << (sizeLog-nbBits);
+        baseSeq.byte[level] = symbol;
+        DDesc.nbBits = (BYTE)totalBits;
+
+        if ((level<3) && (sizeLog-totalBits >= minBits))   /* enough room for another symbol */
+        {
+            int nextMinWeight = totalBits + scaleLog;
+            if (nextMinWeight < 1) nextMinWeight = 1;
+            HUF_fillDTableX6LevelN(DDescription+start, DSequence+start, sizeLog-nbBits,
+                           rankValOrigin, totalBits, nextMinWeight, maxWeight,
+                           sortedSymbols, sortedListSize, rankStart,
+                           nbBitsBaseline, baseSeq, DDesc);   /* recursive (max : level 3) */
+        }
+        else
+        {
+            U32 i;
+            const U32 end = start + length;
+            for (i = start; i < end; i++)
+            {
+                DDescription[i] = DDesc;
+                DSequence[i] = baseSeq;
+            }
+        }
+        rankVal[weight] += length;
+    }
+}
+
+
+/* note : same preparation as X4 */
+static size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1];
+    sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 };
+    U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    rankVal_t rankVal;
+    const U32 memLog = DTable[0];
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+
+    if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable is too small */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--)
+        { if (!maxW) return ERROR(GENERIC); }  /* necessarily finds a solution before maxW==0 */
+
+
+    /* Get start index of each weight */
+    {
+        U32 w, nextRankStart = 0;
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {
+        U32 s;
+        for (s=0; s<nbSymbols; s++)
+        {
+            U32 w = weightList[s];
+            U32 r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {
+        const U32 minBits = tableLog+1 - maxW;
+        U32 nextRankVal = 0;
+        U32 w, consumed;
+        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
+        U32* rankVal0 = rankVal[0];
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankVal;
+            nextRankVal += rankStats[w] << (w+rescale);
+            rankVal0[w] = current;
+        }
+        for (consumed = minBits; consumed <= memLog - minBits; consumed++)
+        {
+            U32* rankValPtr = rankVal[consumed];
+            for (w = 1; w <= maxW; w++)
+            {
+                rankValPtr[w] = rankVal0[w] >> consumed;
+            }
+        }
+    }
+
+
+    /* fill tables */
+    {
+        void* ptr = DTable+1;
+        HUF_DDescX6* DDescription = (HUF_DDescX6*)(ptr);
+        void* dSeqStart = DTable + 1 + ((size_t)1<<(memLog-1));
+        HUF_DSeqX6* DSequence = (HUF_DSeqX6*)(dSeqStart);
+        HUF_DSeqX6 DSeq;
+        HUF_DDescX6 DDesc;
+        DSeq.sequence = 0;
+        DDesc.nbBits = 0;
+        DDesc.nbBytes = 0;
+        HUF_fillDTableX6LevelN(DDescription, DSequence, memLog,
+                       (const U32 (*)[HUF_ABSOLUTEMAX_TABLELOG + 1])rankVal, 0, 1, maxW,
+                       sortedSymbol, sizeOfSort, rankStart0,
+                       tableLog+1, DSeq, DDesc);
+    }
+
+    return iSize;
+}
+
+
+static U32 HUF_decodeSymbolX6(void* op, BIT_DStream_t* DStream, const HUF_DDescX6* dd, const HUF_DSeqX6* ds, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, ds+val, sizeof(HUF_DSeqX6));
+    BIT_skipBits(DStream, dd[val].nbBits);
+    return dd[val].nbBytes;
+}
+
+static U32 HUF_decodeLastSymbolsX6(void* op, const U32 maxL, BIT_DStream_t* DStream,
+                                  const HUF_DDescX6* dd, const HUF_DSeqX6* ds, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    U32 length = dd[val].nbBytes;
+    if (length <= maxL)
+    {
+        memcpy(op, ds+val, length);
+        BIT_skipBits(DStream, dd[val].nbBits);
+        return length;
+    }
+    memcpy(op, ds+val, maxL);
+    if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8))
+    {
+        BIT_skipBits(DStream, dd[val].nbBits);
+        if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+            DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+    }
+    return maxL;
+}
+
+
+#define HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX6(ptr, DStreamPtr, dd, ds, dtLog)
+
+#define HUF_DECODE_SYMBOLX6_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX6_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr)
+
+static inline size_t HUF_decodeStreamX6(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const U32* DTable, const U32 dtLog)
+{
+    const void* ddPtr = DTable+1;
+    const HUF_DDescX6* dd = (const HUF_DDescX6*)(ddPtr);
+    const void* dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1));
+    const HUF_DSeqX6* ds = (const HUF_DSeqX6*)(dsPtr);
+    BYTE* const pStart = p;
+
+    /* up to 16 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-16))
+    {
+        HUF_DECODE_SYMBOLX6_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX6_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX6_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX6_0(p, bitDPtr);
+    }
+
+    /* closer to the end, up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4))
+        HUF_DECODE_SYMBOLX6_0(p, bitDPtr);
+
+    while (p <= pEnd-4)
+        HUF_DECODE_SYMBOLX6_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    while (p < pEnd)
+        p += HUF_decodeLastSymbolsX6(p, (U32)(pEnd-p), bitDPtr, dd, ds, dtLog);
+
+    return p-pStart;
+}
+
+
+
+static size_t HUF_decompress4X6_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+
+        const U32 dtLog = DTable[0];
+        const void* ddPtr = DTable+1;
+        const HUF_DDescX6* dd = (const HUF_DDescX6*)(ddPtr);
+        const void* dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1));
+        const HUF_DSeqX6* ds = (const HUF_DSeqX6*)(dsPtr);
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-64 symbols per loop (4-16 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (op3 <= opStart4) && (endSignal==BIT_DStream_unfinished) && (op4<=(oend-16)) ; )
+        {
+            HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX6_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX6_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX6(op1, &bitD1, opStart2, DTable, dtLog);
+        HUF_decodeStreamX6(op2, &bitD2, opStart3, DTable, dtLog);
+        HUF_decodeStreamX6(op3, &bitD3, opStart4, DTable, dtLog);
+        HUF_decodeStreamX6(op4, &bitD4, oend,     DTable, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX6(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX6 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress4X6_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/**********************************/
+/* Generic decompression selector */
+/**********************************/
+
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+{
+    /* single, double, quad */
+    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
+    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
+    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
+    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
+    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
+    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
+    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
+    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
+    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
+    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
+    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
+    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+};
+
+typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+
+static size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    static const decompressionAlgo decompress[3] = { HUF_decompress4X2, HUF_decompress4X4, HUF_decompress4X6 };
+    /* estimate decompression time */
+    U32 Q;
+    const U32 D256 = (U32)(dstSize >> 8);
+    U32 Dtime[3];
+    U32 algoNb = 0;
+    int n;
+
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    /* decoder timing evaluation */
+    Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */
+    for (n=0; n<3; n++)
+        Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256);
+
+    Dtime[1] += Dtime[1] >> 4; Dtime[2] += Dtime[2] >> 3; /* advantage to algorithms using less memory, for cache eviction */
+
+    if (Dtime[1] < Dtime[0]) algoNb = 1;
+    if (Dtime[2] < Dtime[algoNb]) algoNb = 2;
+
+    return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
+
+    //return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);   /* multi-streams single-symbol decoding */
+    //return HUF_decompress4X4(dst, dstSize, cSrc, cSrcSize);   /* multi-streams double-symbols decoding */
+    //return HUF_decompress4X6(dst, dstSize, cSrc, cSrcSize);   /* multi-streams quad-symbols decoding */
+}
+/*
+    zstd - standard compression library
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+*  MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*/
+#define ZSTD_MEMORY_USAGE 17
+
+/*!
+ * HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0, fastest), or in memory heap (1, requires malloc())
+ * Note that compression context is fairly large, as a consequence heap memory is recommended.
+ */
+#ifndef ZSTD_HEAPMODE
+#  define ZSTD_HEAPMODE 1
+#endif /* ZSTD_HEAPMODE */
+
+/*!
+*  LEGACY_SUPPORT :
+*  decompressor can decode older formats (starting from Zstd 0.1+)
+*/
+#ifndef ZSTD_LEGACY_SUPPORT
+#  define ZSTD_LEGACY_SUPPORT 1
+#endif
+
+
+/* *******************************************************
+*  Includes
+*********************************************************/
+#include <stdlib.h>      /* calloc */
+#include <string.h>      /* memcpy, memmove */
+#include <stdio.h>       /* debug : printf */
+
+
+/* *******************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef __AVX2__
+#  include <immintrin.h>   /* AVX2 intrinsics */
+#endif
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#endif
+
+
+/* *******************************************************
+*  Constants
+*********************************************************/
+#define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
+#define HASH_TABLESIZE (1 << HASH_LOG)
+#define HASH_MASK (HASH_TABLESIZE - 1)
+
+#define KNUTH 2654435761
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+#define BIT1   2
+#define BIT0   1
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BLOCKSIZE (128 KB)                 /* define, for static allocation */
+#define MIN_SEQUENCES_SIZE (2 /*seqNb*/ + 2 /*dumps*/ + 3 /*seqTables*/ + 1 /*bitStream*/)
+#define MIN_CBLOCK_SIZE (3 /*litCSize*/ + MIN_SEQUENCES_SIZE)
+#define IS_RAW BIT0
+#define IS_RLE BIT1
+
+#define WORKPLACESIZE (BLOCKSIZE*3)
+#define MINMATCH 4
+#define MLbits   7
+#define LLbits   6
+#define Offbits  5
+#define MaxML  ((1<<MLbits )-1)
+#define MaxLL  ((1<<LLbits )-1)
+#define MaxOff   31
+#define LitFSELog  11
+#define MLFSELog   10
+#define LLFSELog   10
+#define OffFSELog   9
+#define MAX(a,b) ((a)<(b)?(b):(a))
+#define MaxSeq MAX(MaxLL, MaxML)
+
+#define LITERAL_NOENTROPY 63
+#define COMMAND_NOENTROPY 7   /* to remove */
+
+static const size_t ZSTD_blockHeaderSize = 3;
+static const size_t ZSTD_frameHeaderSize = 4;
+
+
+/* *******************************************************
+*  Memory operations
+**********************************************************/
+static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+
+#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+
+/*! ZSTD_wildcopy : custom version of memcpy(), can copy up to 7-8 bytes too many */
+static void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
+{
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+    do COPY8(op, ip) while (op < oend);
+}
+
+
+/* **************************************
+*  Local structures
+****************************************/
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+typedef struct {
+    void* buffer;
+    U32*  offsetStart;
+    U32*  offset;
+    BYTE* offCodeStart;
+    BYTE* offCode;
+    BYTE* litStart;
+    BYTE* lit;
+    BYTE* litLengthStart;
+    BYTE* litLength;
+    BYTE* matchLengthStart;
+    BYTE* matchLength;
+    BYTE* dumpsStart;
+    BYTE* dumps;
+} seqStore_t;
+
+
+/* *************************************
+*  Error Management
+***************************************/
+/*! ZSTD_isError
+*   tells if a return value is an error code */
+static unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
+
+
+
+/* *************************************************************
+*   Decompression section
+***************************************************************/
+struct ZSTD_DCtx_s
+{
+    U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
+    U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
+    U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
+    void* previousDstEnd;
+    void* base;
+    size_t expected;
+    blockType_t bType;
+    U32 phase;
+    const BYTE* litPtr;
+    size_t litSize;
+    BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */];
+};   /* typedef'd to ZSTD_Dctx within "zstd_static.h" */
+
+
+static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = (const BYTE* const)src;
+    BYTE headerFlags;
+    U32 cSize;
+
+    if (srcSize < 3) return ERROR(srcSize_wrong);
+
+    headerFlags = *in;
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+
+    bpPtr->blockType = (blockType_t)(headerFlags >> 6);
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+/** ZSTD_decompressLiterals
+    @return : nb of bytes read from src, or an error code*/
+static size_t ZSTD_decompressLiterals(void* dst, size_t* maxDstSizePtr,
+                                const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+
+    const size_t litSize = (MEM_readLE32(src) & 0x1FFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+    const size_t litCSize = (MEM_readLE32(ip+2) & 0xFFFFFF) >> 5;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+
+    if (litSize > *maxDstSizePtr) return ERROR(corruption_detected);
+    if (litCSize + 5 > srcSize) return ERROR(corruption_detected);
+
+    if (HUF_isError(HUF_decompress(dst, litSize, ip+5, litCSize))) return ERROR(corruption_detected);
+
+    *maxDstSizePtr = litSize;
+    return litCSize + 5;
+}
+
+
+/** ZSTD_decodeLiteralsBlock
+    @return : nb of bytes read from src (< srcSize )*/
+static size_t ZSTD_decodeLiteralsBlock(void* ctx,
+                          const void* src, size_t srcSize)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx;
+    const BYTE* const istart = (const BYTE* const)src;
+
+    /* any compressed block with literals segment must be at least this size */
+    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
+
+    switch(*istart & 3)
+    {
+    default:
+    case 0:
+        {
+            size_t litSize = BLOCKSIZE;
+            const size_t readSize = ZSTD_decompressLiterals(dctx->litBuffer, &litSize, src, srcSize);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litSize = litSize;
+            memset(dctx->litBuffer + dctx->litSize, 0, 8);
+            return readSize;   /* works if it's an error too */
+        }
+    case IS_RAW:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > srcSize-11)   /* risk of reading too far with wildcopy */
+            {
+                if (litSize > srcSize-3) return ERROR(corruption_detected);
+                memcpy(dctx->litBuffer, istart, litSize);
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litSize = litSize;
+                memset(dctx->litBuffer + dctx->litSize, 0, 8);
+                return litSize+3;
+            }
+            /* direct reference into compressed stream */
+            dctx->litPtr = istart+3;
+            dctx->litSize = litSize;
+            return litSize+3;
+        }
+    case IS_RLE:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
+            memset(dctx->litBuffer, istart[3], litSize + 8);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litSize = litSize;
+            return 4;
+        }
+    }
+}
+
+
+static size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
+                         FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
+                         const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    const BYTE* const iend = istart + srcSize;
+    U32 LLtype, Offtype, MLtype;
+    U32 LLlog, Offlog, MLlog;
+    size_t dumpsLength;
+
+    /* check */
+    if (srcSize < 5) return ERROR(srcSize_wrong);
+
+    /* SeqHead */
+    *nbSeq = MEM_readLE16(ip); ip+=2;
+    LLtype  = *ip >> 6;
+    Offtype = (*ip >> 4) & 3;
+    MLtype  = (*ip >> 2) & 3;
+    if (*ip & 2)
+    {
+        dumpsLength  = ip[2];
+        dumpsLength += ip[1] << 8;
+        ip += 3;
+    }
+    else
+    {
+        dumpsLength  = ip[1];
+        dumpsLength += (ip[0] & 1) << 8;
+        ip += 2;
+    }
+    *dumpsPtr = ip;
+    ip += dumpsLength;
+    *dumpsLengthPtr = dumpsLength;
+
+    /* check */
+    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
+
+    /* sequences */
+    {
+        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL and MaxOff */
+        size_t headerSize;
+
+        /* Build DTables */
+        switch(LLtype)
+        {
+        case bt_rle :
+            LLlog = 0;
+            FSE_buildDTable_rle(DTableLL, *ip++); break;
+        case bt_raw :
+            LLlog = LLbits;
+            FSE_buildDTable_raw(DTableLL, LLbits); break;
+        default :
+            {   U32 max = MaxLL;
+                headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
+                if (FSE_isError(headerSize)) return ERROR(GENERIC);
+                if (LLlog > LLFSELog) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSE_buildDTable(DTableLL, norm, max, LLlog);
+        }   }
+
+        switch(Offtype)
+        {
+        case bt_rle :
+            Offlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong);   /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */
+            break;
+        case bt_raw :
+            Offlog = Offbits;
+            FSE_buildDTable_raw(DTableOffb, Offbits); break;
+        default :
+            {   U32 max = MaxOff;
+                headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
+                if (FSE_isError(headerSize)) return ERROR(GENERIC);
+                if (Offlog > OffFSELog) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSE_buildDTable(DTableOffb, norm, max, Offlog);
+        }   }
+
+        switch(MLtype)
+        {
+        case bt_rle :
+            MLlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableML, *ip++); break;
+        case bt_raw :
+            MLlog = MLbits;
+            FSE_buildDTable_raw(DTableML, MLbits); break;
+        default :
+            {   U32 max = MaxML;
+                headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
+                if (FSE_isError(headerSize)) return ERROR(GENERIC);
+                if (MLlog > MLFSELog) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSE_buildDTable(DTableML, norm, max, MLlog);
+    }   }   }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t offset;
+    size_t matchLength;
+} seq_t;
+
+typedef struct {
+    BIT_DStream_t DStream;
+    FSE_DState_t stateLL;
+    FSE_DState_t stateOffb;
+    FSE_DState_t stateML;
+    size_t prevOffset;
+    const BYTE* dumps;
+    const BYTE* dumpsEnd;
+} seqState_t;
+
+
+static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
+{
+    size_t litLength;
+    size_t prevOffset;
+    size_t offset;
+    size_t matchLength;
+    const BYTE* dumps = seqState->dumps;
+    const BYTE* const de = seqState->dumpsEnd;
+
+    /* Literal length */
+    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
+    prevOffset = litLength ? seq->offset : seqState->prevOffset;
+    seqState->prevOffset = seq->offset;
+    if (litLength == MaxLL)
+    {
+        U32 add = *dumps++;
+        if (add < 255) litLength += add;
+        else
+        {
+            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            dumps += 3;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+
+    /* Offset */
+    {
+        static const size_t offsetPrefix[MaxOff+1] = {  /* note : size_t faster than U32 */
+                1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
+                512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
+                524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
+        U32 offsetCode, nbBits;
+        offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));   /* <= maxOff, by table construction */
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        nbBits = offsetCode - 1;
+        if (offsetCode==0) nbBits = 0;   /* cmove */
+        offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits);
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        if (offsetCode==0) offset = prevOffset;   /* cmove */
+    }
+
+    /* MatchLength */
+    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
+    if (matchLength == MaxML)
+    {
+        U32 add = *dumps++;
+        if (add < 255) matchLength += add;
+        else
+        {
+            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            dumps += 3;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+    matchLength += MINMATCH;
+
+    /* save result */
+    seq->litLength = litLength;
+    seq->offset = offset;
+    seq->matchLength = matchLength;
+    seqState->dumps = dumps;
+}
+
+
+static size_t ZSTD_execSequence(BYTE* op,
+                                seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit,
+                                BYTE* const base, BYTE* const oend)
+{
+    static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
+    static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* substracted */
+    const BYTE* const ostart = op;
+    BYTE* const oLitEnd = op + sequence.litLength;
+    BYTE* const oMatchEnd = op + sequence.litLength + sequence.matchLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_8 = oend-8;
+    const BYTE* const litEnd = *litPtr + sequence.litLength;
+
+    /* checks */
+    if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */
+    if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
+    if (litEnd > litLimit) return ERROR(corruption_detected);   /* overRead beyond lit buffer */
+
+    /* copy Literals */
+    ZSTD_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
+    op = oLitEnd;
+    *litPtr = litEnd;   /* update for next sequence */
+
+    /* copy Match */
+    {
+        const BYTE* match = op - sequence.offset;
+
+        /* check */
+        if (sequence.offset > (size_t)op) return ERROR(corruption_detected);   /* address space overflow test (this test seems kept by clang optimizer) */
+        //if (match > op) return ERROR(corruption_detected);   /* address space overflow test (is clang optimizer removing this test ?) */
+        if (match < base) return ERROR(corruption_detected);
+
+        /* close range match, overlap */
+        if (sequence.offset < 8)
+        {
+            const int dec64 = dec64table[sequence.offset];
+            op[0] = match[0];
+            op[1] = match[1];
+            op[2] = match[2];
+            op[3] = match[3];
+            match += dec32table[sequence.offset];
+            ZSTD_copy4(op+4, match);
+            match -= dec64;
+        }
+        else
+        {
+            ZSTD_copy8(op, match);
+        }
+        op += 8; match += 8;
+
+        if (oMatchEnd > oend-(16-MINMATCH))
+        {
+            if (op < oend_8)
+            {
+                ZSTD_wildcopy(op, match, oend_8 - op);
+                match += oend_8 - op;
+                op = oend_8;
+            }
+            while (op < oMatchEnd) *op++ = *match++;
+        }
+        else
+        {
+            ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
+        }
+    }
+
+    return oMatchEnd - ostart;
+}
+
+static size_t ZSTD_decompressSequences(
+                               void* ctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx;
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t errorCode, dumpsLength;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    int nbSeq;
+    const BYTE* dumps;
+    U32* DTableLL = dctx->LLTable;
+    U32* DTableML = dctx->MLTable;
+    U32* DTableOffb = dctx->OffTable;
+    BYTE* const base = (BYTE*) (dctx->base);
+
+    /* Build Decoding Tables */
+    errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
+                                      DTableLL, DTableML, DTableOffb,
+                                      ip, iend-ip);
+    if (ZSTD_isError(errorCode)) return errorCode;
+    ip += errorCode;
+
+    /* Regen sequences */
+    {
+        seq_t sequence;
+        seqState_t seqState;
+
+        memset(&sequence, 0, sizeof(sequence));
+        seqState.dumps = dumps;
+        seqState.dumpsEnd = dumps + dumpsLength;
+        seqState.prevOffset = 1;
+        errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip);
+        if (ERR_isError(errorCode)) return ERROR(corruption_detected);
+        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
+        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
+        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
+
+        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (nbSeq>0) ; )
+        {
+            size_t oneSeqSize;
+            nbSeq--;
+            ZSTD_decodeSequence(&sequence, &seqState);
+            oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend);
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* check if reached exact end */
+        if ( !BIT_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected);   /* requested too much : data is corrupted */
+        if (nbSeq<0) return ERROR(corruption_detected);   /* requested too many sequences : data is corrupted */
+
+        /* last literal segment */
+        {
+            size_t lastLLSize = litEnd - litPtr;
+            if (litPtr > litEnd) return ERROR(corruption_detected);
+            if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
+            if (op != litPtr) memmove(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+
+static size_t ZSTD_decompressBlock(
+                            void* ctx,
+                            void* dst, size_t maxDstSize,
+                      const void* src, size_t srcSize)
+{
+    /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+
+    /* Decode literals sub-block */
+    size_t litCSize = ZSTD_decodeLiteralsBlock(ctx, src, srcSize);
+    if (ZSTD_isError(litCSize)) return litCSize;
+    ip += litCSize;
+    srcSize -= litCSize;
+
+    return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize);
+}
+
+
+static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* iend = ip + srcSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t remainingSize = srcSize;
+    U32 magicNumber;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t decodedSize=0;
+        size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            decodedSize = ZSTD_decompressBlock(ctx, op, oend-op, ip, cBlockSize);
+            break;
+        case bt_raw :
+            decodedSize = ZSTD_copyUncompressedBlock(op, oend-op, ip, cBlockSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet supported */
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return ERROR(srcSize_wrong);
+            break;
+        default:
+            return ERROR(GENERIC);   /* impossible */
+        }
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        if (ZSTD_isError(decodedSize)) return decodedSize;
+        op += decodedSize;
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return op-ostart;
+}
+
+static size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    ZSTD_DCtx ctx;
+    ctx.base = dst;
+    return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
+}
+
+static size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
+{
+
+    const BYTE* ip = (const BYTE*)src;
+    size_t remainingSize = srcSize;
+    U32 magicNumber;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
+        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return ip - (const BYTE*)src;
+}
+
+/*******************************
+*  Streaming Decompression API
+*******************************/
+
+static size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx)
+{
+    dctx->expected = ZSTD_frameHeaderSize;
+    dctx->phase = 0;
+    dctx->previousDstEnd = NULL;
+    dctx->base = NULL;
+    return 0;
+}
+
+static ZSTD_DCtx* ZSTD_createDCtx(void)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx));
+    if (dctx==NULL) return NULL;
+    ZSTD_resetDCtx(dctx);
+    return dctx;
+}
+
+static size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
+{
+    free(dctx);
+    return 0;
+}
+
+static size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx)
+{
+    return dctx->expected;
+}
+
+static size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    /* Sanity check */
+    if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
+    if (dst != ctx->previousDstEnd)  /* not contiguous */
+        ctx->base = dst;
+
+    /* Decompress : frame header */
+    if (ctx->phase == 0)
+    {
+        /* Check frame magic header */
+        U32 magicNumber = MEM_readLE32(src);
+        if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        return 0;
+    }
+
+    /* Decompress : block header */
+    if (ctx->phase == 1)
+    {
+        blockProperties_t bp;
+        size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+        if (ZSTD_isError(blockSize)) return blockSize;
+        if (bp.blockType == bt_end)
+        {
+            ctx->expected = 0;
+            ctx->phase = 0;
+        }
+        else
+        {
+            ctx->expected = blockSize;
+            ctx->bType = bp.blockType;
+            ctx->phase = 2;
+        }
+
+        return 0;
+    }
+
+    /* Decompress : block content */
+    {
+        size_t rSize;
+        switch(ctx->bType)
+        {
+        case bt_compressed:
+            rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
+            break;
+        case bt_raw :
+            rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet handled */
+            break;
+        case bt_end :   /* should never happen (filtered at phase 1) */
+            rSize = 0;
+            break;
+        default:
+            return ERROR(GENERIC);
+        }
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
+        return rSize;
+    }
+
+}
+
+
+/* wrapper layer */
+
+unsigned ZSTDv02_isError(size_t code)
+{
+    return ZSTD_isError(code);
+}
+
+size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize)
+{
+    return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
+}
+
+size_t ZSTDv02_findFrameCompressedSize(const void *src, size_t compressedSize)
+{
+    return ZSTD_findFrameCompressedSize(src, compressedSize);
+}
+
+ZSTDv02_Dctx* ZSTDv02_createDCtx(void)
+{
+    return (ZSTDv02_Dctx*)ZSTD_createDCtx();
+}
+
+size_t ZSTDv02_freeDCtx(ZSTDv02_Dctx* dctx)
+{
+    return ZSTD_freeDCtx((ZSTD_DCtx*)dctx);
+}
+
+size_t ZSTDv02_resetDCtx(ZSTDv02_Dctx* dctx)
+{
+    return ZSTD_resetDCtx((ZSTD_DCtx*)dctx);
+}
+
+size_t ZSTDv02_nextSrcSizeToDecompress(ZSTDv02_Dctx* dctx)
+{
+    return ZSTD_nextSrcSizeToDecompress((ZSTD_DCtx*)dctx);
+}
+
+size_t ZSTDv02_decompressContinue(ZSTDv02_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return ZSTD_decompressContinue((ZSTD_DCtx*)dctx, dst, maxDstSize, src, srcSize);
+}
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v02.h b/contrib/libzstd/include/zstd/legacy/zstd_v02.h
new file mode 100644
index 00000000000..d14f0293cbe
--- /dev/null
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v02.h
@@ -0,0 +1,87 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+#ifndef ZSTD_V02_H_4174539423
+#define ZSTD_V02_H_4174539423
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv02_decompress() : decompress ZSTD frames compliant with v0.2.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+/**
+ZSTDv02_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.2.x format
+    compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+    return : the number of bytes that would be read to decompress this frame
+             or an errorCode if it fails (which can be tested using ZSTDv02_isError())
+*/
+size_t ZSTDv02_findFrameCompressedSize(const void* src, size_t compressedSize);
+
+/**
+ZSTDv02_isError() : tells if the result of ZSTDv02_decompress() is an error
+*/
+unsigned ZSTDv02_isError(size_t code);
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv02_Dctx_s ZSTDv02_Dctx;
+ZSTDv02_Dctx* ZSTDv02_createDCtx(void);
+size_t ZSTDv02_freeDCtx(ZSTDv02_Dctx* dctx);
+
+size_t ZSTDv02_decompressDCtx(void* ctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
+
+/* *************************************
+*  Streaming functions
+***************************************/
+size_t ZSTDv02_resetDCtx(ZSTDv02_Dctx* dctx);
+
+size_t ZSTDv02_nextSrcSizeToDecompress(ZSTDv02_Dctx* dctx);
+size_t ZSTDv02_decompressContinue(ZSTDv02_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv02_magicNumber 0xFD2FB522   /* v0.2 */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_V02_H_4174539423 */
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v03.c b/contrib/libzstd/include/zstd/legacy/zstd_v03.c
new file mode 100644
index 00000000000..f438330a469
--- /dev/null
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v03.c
@@ -0,0 +1,3196 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include "zstd_v03.h"
+#include "error_private.h"
+
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined(_MSC_VER)   /* Visual Studio */
+#   include <stdlib.h>  /* _byteswap_ulong */
+#   include <intrin.h>  /* _byteswap_* */
+#endif
+
+
+
+/* ******************************************************************
+   mem.h
+   low-level memory access routines
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/******************************************
+*  Includes
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include <string.h>    /* memcpy */
+
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined(__GNUC__)
+#  define MEM_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define MEM_STATIC static inline
+#elif defined(_MSC_VER)
+#  define MEM_STATIC static __inline
+#else
+#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/****************************************************************
+*  Basic Types
+*****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef  int16_t S16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+  typedef  int64_t S64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef   signed short      S16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+  typedef   signed long long  S64;
+#endif
+
+
+/****************************************************************
+*  Memory I/O
+*****************************************************************/
+/* MEM_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets generating assembly depending on alignment.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define MEM_FORCE_MEMORY_ACCESS 2
+#  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define MEM_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; }
+MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; }
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
+
+/* violates C standard on structure alignment.
+Only use if no other choice to achieve best performance on target platform */
+MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
+MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
+MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+
+#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
+
+MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+
+#else
+
+/* default method, safe and standard.
+   can sometimes prove slower */
+
+MEM_STATIC U16 MEM_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U32 MEM_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U64 MEM_read64(const void* memPtr)
+{
+    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+
+#endif // MEM_FORCE_MEMORY_ACCESS
+
+
+MEM_STATIC U16 MEM_readLE16(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read16(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
+{
+    if (MEM_isLittleEndian())
+    {
+        MEM_write16(memPtr, val);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val;
+        p[1] = (BYTE)(val>>8);
+    }
+}
+
+MEM_STATIC U32 MEM_readLE32(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read32(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+MEM_STATIC U64 MEM_readLE64(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read64(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
+                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
+    }
+}
+
+
+MEM_STATIC size_t MEM_readLEST(const void* memPtr)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_readLE32(memPtr);
+    else
+        return (size_t)MEM_readLE64(memPtr);
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* MEM_H_MODULE */
+
+
+/* ******************************************************************
+   bitstream
+   Part of NewGen Entropy library
+   header file (to include)
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef BITSTREAM_H_MODULE
+#define BITSTREAM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*
+*  This API consists of small unitary functions, which highly benefit from being inlined.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+
+/**********************************************
+*  bitStream decompression API (read backward)
+**********************************************/
+typedef struct
+{
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} BIT_DStream_t;
+
+typedef enum { BIT_DStream_unfinished = 0,
+               BIT_DStream_endOfBuffer = 1,
+               BIT_DStream_completed = 2,
+               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
+
+
+/*
+* Start by invoking BIT_initDStream().
+* A chunk of the bitStream is then stored into a local register.
+* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+* You can then retrieve bitFields stored into the local register, **in reverse order**.
+* Local register is manually filled from memory by the BIT_reloadDStream() method.
+* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BIT_DStream_unfinished.
+* Otherwise, it can be less than that, so proceed accordingly.
+* Checking if DStream has reached its end can be performed with BIT_endOfDStream()
+*/
+
+
+/******************************************
+*  unsafe API
+******************************************/
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/****************************************************************
+*  Helper functions
+****************************************************************/
+MEM_STATIC unsigned BIT_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    unsigned r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
+
+/**********************************************************
+* bitStream decoding
+**********************************************************/
+
+/*!BIT_initDStream
+*  Initialize a BIT_DStream_t.
+*  @bitD : a pointer to an already allocated BIT_DStream_t structure
+*  @srcBuffer must point at the beginning of a bitStream
+*  @srcSize must be the exact size of the bitStream
+*  @result : size of stream (== srcSize) or an errorCode if a problem is detected
+*/
+MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
+    if (srcSize >=  sizeof(size_t))   /* normal case */
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+    }
+    else
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
+            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
+            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
+            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
+            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
+            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8;
+            default:;
+        }
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+/*!BIT_lookBits
+ * Provides next n bits from local register
+ * local register is not modified (bits are still present for next read/look)
+ * On 32-bits, maxNbBits==25
+ * On 64-bits, maxNbBits==57
+ * @return : value extracted
+ */
+MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+}
+
+/*! BIT_lookBitsFast :
+*   unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+}
+
+MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+/*!BIT_readBits
+ * Read next n bits from local register.
+ * pay attention to not read more than nbBits contained into local register.
+ * @return : extracted value.
+ */
+MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BIT_lookBits(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*!BIT_readBitsFast :
+*  unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BIT_lookBitsFast(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
+{
+    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
+        return BIT_DStream_overflow;
+
+    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
+    {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        return BIT_DStream_unfinished;
+    }
+    if (bitD->ptr == bitD->start)
+    {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
+        return BIT_DStream_completed;
+    }
+    {
+        U32 nbBytes = bitD->bitsConsumed >> 3;
+        BIT_DStream_status result = BIT_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start)
+        {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = BIT_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        return result;
+    }
+}
+
+/*! BIT_endOfDStream
+*   @return Tells if DStream has reached its exact end
+*/
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
+{
+    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* BITSTREAM_H_MODULE */
+/* ******************************************************************
+   Error codes and messages
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define ERR_STATIC static inline
+#elif defined(_MSC_VER)
+#  define ERR_STATIC static __inline
+#elif defined(__GNUC__)
+#  define ERR_STATIC static __attribute__((unused))
+#else
+#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/******************************************
+*  Error Management
+******************************************/
+#define PREFIX(name) ZSTD_error_##name
+
+#define ERROR(name) (size_t)-PREFIX(name)
+
+#define ERROR_LIST(ITEM) \
+        ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \
+        ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \
+        ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \
+        ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \
+        ITEM(PREFIX(maxCode))
+
+#define ERROR_GENERATE_ENUM(ENUM) ENUM,
+typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
+
+#define ERROR_CONVERTTOSTRING(STRING) #STRING,
+#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR)
+static const char* ERR_strings[] = { ERROR_LIST(ERROR_GENERATE_STRING) };
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
+ERR_STATIC const char* ERR_getErrorName(size_t code)
+{
+    static const char* codeError = "Unspecified error code";
+    if (ERR_isError(code)) return ERR_strings[-(int)(code)];
+    return codeError;
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_H_MODULE */
+/*
+Constructor and Destructor of type FSE_CTable
+    Note that its size depends on 'tableLog' and 'maxSymbolValue' */
+typedef unsigned FSE_CTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+
+
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/******************************************
+*  Static allocation
+******************************************/
+/* FSE buffer bounds */
+#define FSE_NCOUNTBOUND 512
+#define FSE_BLOCKBOUND(size) (size + (size>>7))
+#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+
+
+/******************************************
+*  FSE advanced API
+******************************************/
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
+/* build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
+
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
+/* build a fake FSE_DTable, designed to always generate the same symbolValue */
+
+
+/******************************************
+*  FSE symbol decompression API
+******************************************/
+typedef struct
+{
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+
+static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
+
+static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+
+static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
+
+/*
+Let's now decompose FSE_decompress_usingDTable() into its unitary components.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BIT_DStream_t DStream;    // Stream context
+FSE_DState_t  DState;     // State context. Multiple ones are possible
+FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BIT_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSE_reloadDStream(&DStream);
+
+BIT_reloadDStream() result tells if there is still some more data to read from DStream.
+BIT_DStream_unfinished : there is still some data left into the DStream.
+BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BIT_reloadDStream(&DStream) >= BIT_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BIT_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSE_endOfDState(&DState);
+*/
+
+
+/******************************************
+*  FSE unsafe API
+******************************************/
+static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/******************************************
+*  Implementation of inline functions
+******************************************/
+
+/* decompression */
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSE_DTableHeader;   /* sizeof U32 */
+
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSE_decode_t;   /* size == U32 */
+
+MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
+{
+    FSE_DTableHeader DTableH;
+    memcpy(&DTableH, dt, sizeof(DTableH));
+    DStatePtr->state = BIT_readBits(bitD, DTableH.tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32  nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BIT_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32 nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BIT_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/******************************************
+*  Static allocation macros
+******************************************/
+/* Huff0 buffer bounds */
+#define HUF_CTABLEBOUND 129
+#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true if incompressible pre-filtered with fast heuristic */
+#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* static allocation of Huff0's DTable */
+#define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<maxTableLog))  /* nb Cells; use unsigned short for X2, unsigned int for X4 */
+#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+        unsigned short DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
+        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUF_CREATE_STATIC_DTABLEX6(DTable, maxTableLog) \
+        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog) * 3 / 2] = { maxTableLog }
+
+
+/******************************************
+*  Advanced functions
+******************************************/
+static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbols decoder */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+/*
+    zstd - standard compression library
+    Header File
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Version
+***************************************/
+#define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
+#define ZSTD_VERSION_MINOR    2    /* for new (non-breaking) interface capabilities */
+#define ZSTD_VERSION_RELEASE  2    /* for tweaks, bug-fixes, or development */
+#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTD_CCtx_s ZSTD_CCtx;   /* incomplete type */
+
+#if defined (__cplusplus)
+}
+#endif
+/*
+    zstd - standard compression library
+    Header File for static linking only
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* The objects defined into this file should be considered experimental.
+ * They are not labelled stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risk of future changes.
+ */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Streaming functions
+***************************************/
+
+typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+
+/*
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTD_magicNumber 0xFD2FB523   /* v0.3 */
+
+
+#if defined (__cplusplus)
+}
+#endif
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/****************************************************************
+*  Tuning parameters
+****************************************************************/
+/* MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSE_MAX_MEMORY_USAGE 14
+#define FSE_DEFAULT_MEMORY_USAGE 13
+
+/* FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSE_MAX_SYMBOL_VALUE 255
+
+
+/****************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+
+
+/****************************************************************
+*  Byte symbol type
+****************************************************************/
+#endif   /* !FSE_COMMONDEFS_ONLY */
+
+
+/****************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#    ifdef __GNUC__
+#      define FORCE_INLINE static inline __attribute__((always_inline))
+#    else
+#      define FORCE_INLINE static inline
+#    endif
+#  else
+#    define FORCE_INLINE static
+#  endif /* __STDC_VERSION__ */
+#endif
+
+
+/****************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+/****************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+
+/****************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/****************************************************************
+*  Complex types
+****************************************************************/
+typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+
+/****************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+/* Function templates */
+
+#define FSE_DECODE_TYPE FSE_decode_t
+
+static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
+
+static size_t FSE_buildDTable
+(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    void* ptr = dt+1;
+    FSE_DTableHeader DTableH;
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)ptr;
+    const U32 tableSize = 1 << tableLog;
+    const U32 tableMask = tableSize-1;
+    const U32 step = FSE_tableStep(tableSize);
+    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
+    U32 position = 0;
+    U32 highThreshold = tableSize-1;
+    const S16 largeLimit= (S16)(1 << (tableLog-1));
+    U32 noLarge = 1;
+    U32 s;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    DTableH.tableLog = (U16)tableLog;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        if (normalizedCounter[s]==-1)
+        {
+            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+            symbolNext[s] = 1;
+        }
+        else
+        {
+            if (normalizedCounter[s] >= largeLimit) noLarge=0;
+            symbolNext[s] = normalizedCounter[s];
+        }
+    }
+
+    /* Spread symbols */
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        int i;
+        for (i=0; i<normalizedCounter[s]; i++)
+        {
+            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+            position = (position + step) & tableMask;
+            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }
+    }
+
+    if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+
+    /* Build Decoding table */
+    {
+        U32 i;
+        for (i=0; i<tableSize; i++)
+        {
+            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[i].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) );
+            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
+        }
+    }
+
+    DTableH.fastMode = (U16)noLarge;
+    memcpy(dt, &DTableH, sizeof(DTableH));
+    return 0;
+}
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+/******************************************
+*  FSE helper functions
+******************************************/
+static unsigned FSE_isError(size_t code) { return ERR_isError(code); }
+
+
+/****************************************************************
+*  FSE NCount encoding-decoding
+****************************************************************/
+static short FSE_abs(short a)
+{
+    return a<0 ? -a : a;
+}
+
+static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    if (hbSize < 4) return ERROR(srcSize_wrong);
+    bitStream = MEM_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr))
+    {
+        if (previous0)
+        {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF)
+            {
+                n0+=24;
+                if (ip < iend-5)
+                {
+                    ip+=2;
+                    bitStream = MEM_readLE32(ip) >> bitCount;
+                }
+                else
+                {
+                    bitStream >>= 16;
+                    bitCount+=16;
+                }
+            }
+            while ((bitStream & 3) == 3)
+            {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+            {
+                ip += bitCount>>3;
+                bitCount &= 7;
+                bitStream = MEM_readLE32(ip) >> bitCount;
+            }
+            else
+                bitStream >>= 2;
+        }
+        {
+            const short max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max)
+            {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            }
+            else
+            {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSE_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold)
+            {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            {
+                if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+                {
+                    ip += bitCount>>3;
+                    bitCount &= 7;
+                }
+                else
+                {
+                    bitCount -= (int)(8 * (iend - 4 - ip));
+                    ip = iend - 4;
+                }
+                bitStream = MEM_readLE32(ip) >> (bitCount & 31);
+            }
+        }
+    }
+    if (remaining != 1) return ERROR(GENERIC);
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong);
+    return ip-istart;
+}
+
+
+/*********************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1;
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1;
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSE_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    BIT_DStream_t bitD;
+    FSE_DState_t state1;
+    FSE_DState_t state2;
+    size_t errorCode;
+
+    /* Init */
+    errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+    if (FSE_isError(errorCode)) return errorCode;
+
+    FSE_initDState(&state1, &bitD, dt);
+    FSE_initDState(&state2, &bitD, dt);
+
+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op<olimit) ; op+=4)
+    {
+        op[0] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[1] = FSE_GETSYMBOL(&state2);
+
+        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[3] = FSE_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
+    while (1)
+    {
+        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state1);
+
+        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state2);
+    }
+
+    /* end ? */
+    if (BIT_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
+        return op-ostart;
+
+    if (op==omax) return ERROR(dstSize_tooSmall);   /* dst buffer is full, but cSrc unfinished */
+
+    return ERROR(corruption_detected);
+}
+
+
+static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSE_DTable* dt)
+{
+    FSE_DTableHeader DTableH;
+    memcpy(&DTableH, dt, sizeof(DTableH));
+
+    /* select fast mode (static) */
+    if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSE_MAX_SYMBOL_VALUE+1];
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    size_t errorCode;
+
+    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */
+
+    /* normal FSE decoding mode */
+    errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return errorCode;
+
+    /* always return, even if it is an error code */
+    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
+}
+
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+/****************************************************************
+*  Compiler specifics
+****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+/* inline is defined */
+#elif defined(_MSC_VER)
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  define inline __inline
+#else
+#  define inline /* disable inline */
+#endif
+
+
+/****************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+/****************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/******************************************
+*  Helper functions
+******************************************/
+static unsigned HUF_isError(size_t code) { return ERR_isError(code); }
+
+#define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#define HUF_MAX_TABLELOG  12           /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
+#define HUF_DEFAULT_TABLELOG  HUF_MAX_TABLELOG   /* tableLog by default, when not specified */
+#define HUF_MAX_SYMBOL_VALUE 255
+#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
+#  error "HUF_MAX_TABLELOG is too large !"
+#endif
+
+
+
+/*********************************************************
+*  Huff0 : Huffman block decompression
+*********************************************************/
+typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2;   /* single-symbol decoding */
+
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4;  /* double-symbols decoding */
+
+typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+
+/*! HUF_readStats
+    Read compact Huffman tree, saved by HUF_writeCTable
+    @huffWeight : destination buffer
+    @return : size read from `src`
+*/
+static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                            U32* nbSymbolsPtr, U32* tableLogPtr,
+                            const void* src, size_t srcSize)
+{
+    U32 weightTotal;
+    U32 tableLog;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize;
+    size_t oSize;
+    U32 n;
+
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
+    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128)  /* special header */
+    {
+        if (iSize >= (242))   /* RLE */
+        {
+            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
+            oSize = l[iSize-242];
+            memset(huffWeight, 1, hwSize);
+            iSize = 0;
+        }
+        else   /* Incompressible */
+        {
+            oSize = iSize - 127;
+            iSize = ((oSize+1)/2);
+            if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+            if (oSize >= hwSize) return ERROR(corruption_detected);
+            ip += 1;
+            for (n=0; n<oSize; n+=2)
+            {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+            }
+        }
+    }
+    else  /* header compressed with FSE (normal case) */
+    {
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
+        if (FSE_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32));
+    weightTotal = 0;
+    for (n=0; n<oSize; n++)
+    {
+        if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+        rankStats[huffWeight[n]]++;
+        weightTotal += (1 << huffWeight[n]) >> 1;
+    }
+    if (weightTotal == 0) return ERROR(corruption_detected);
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    tableLog = BIT_highbit32(weightTotal) + 1;
+    if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+    {
+        U32 total = 1 << tableLog;
+        U32 rest = total - weightTotal;
+        U32 verif = 1 << BIT_highbit32(rest);
+        U32 lastWeight = BIT_highbit32(rest) + 1;
+        if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+        huffWeight[oSize] = (BYTE)lastWeight;
+        rankStats[lastWeight]++;
+    }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32)(oSize+1);
+    *tableLogPtr = tableLog;
+    return iSize+1;
+}
+
+
+/**************************/
+/* single-symbol decoding */
+/**************************/
+
+static size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
+{
+    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    U32 nbSymbols = 0;
+    U32 n;
+    U32 nextRankStart;
+    void* ptr = DTable+1;
+    HUF_DEltX2* const dt = (HUF_DEltX2*)(ptr);
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16));   /* if compilation fails here, assertion is false */
+    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge);   /* DTable is too small */
+    DTable[0] = (U16)tableLog;   /* maybe should separate sizeof DTable, as allocated, from used size of DTable, in case of DTable re-use */
+
+    /* Prepare ranks */
+    nextRankStart = 0;
+    for (n=1; n<=tableLog; n++)
+    {
+        U32 current = nextRankStart;
+        nextRankStart += (rankVal[n] << (n-1));
+        rankVal[n] = current;
+    }
+
+    /* fill DTable */
+    for (n=0; n<nbSymbols; n++)
+    {
+        const U32 w = huffWeight[n];
+        const U32 length = (1 << w) >> 1;
+        U32 i;
+        HUF_DEltX2 D;
+        D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
+        for (i = rankVal[w]; i < rankVal[w] + length; i++)
+            dt[i] = D;
+        rankVal[w] += length;
+    }
+
+    return iSize;
+}
+
+static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+        const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+        const BYTE c = dt[val].byte;
+        BIT_skipBits(Dstream, dt[val].nbBits);
+        return c;
+}
+
+#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+    *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4))
+    {
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    /* no more data to retrieve from bitstream, hence no need to reload */
+    while (p < pEnd)
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    return pEnd-pStart;
+}
+
+
+static size_t HUF_decompress4X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U16* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+
+        const void* ptr = DTable;
+        const HUF_DEltX2* const dt = ((const HUF_DEltX2*)ptr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; )
+        {
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+    size_t errorCode;
+
+    errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    return HUF_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/***************************/
+/* double-symbols decoding */
+/***************************/
+
+static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed,
+                           const U32* rankValOrigin, const int minWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
+                           U32 nbBitsBaseline, U16 baseSeq)
+{
+    HUF_DEltX4 DElt;
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    U32 s;
+
+    /* get pre-calculated rankVal */
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1)
+    {
+        U32 i, skipSize = rankVal[minWeight];
+        MEM_writeLE16(&(DElt.sequence), baseSeq);
+        DElt.nbBits   = (BYTE)(consumed);
+        DElt.length   = 1;
+        for (i = 0; i < skipSize; i++)
+            DTable[i] = DElt;
+    }
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++)   /* note : sortedSymbols already skipped */
+    {
+        const U32 symbol = sortedSymbols[s].symbol;
+        const U32 weight = sortedSymbols[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 length = 1 << (sizeLog-nbBits);
+        const U32 start = rankVal[weight];
+        U32 i = start;
+        const U32 end = start + length;
+
+        MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+        DElt.nbBits = (BYTE)(nbBits + consumed);
+        DElt.length = 2;
+        do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
+
+        rankVal[weight] += length;
+    }
+}
+
+typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1];
+
+static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
+                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32 nbBitsBaseline)
+{
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits  = nbBitsBaseline - maxWeight;
+    U32 s;
+
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++)
+    {
+        const U16 symbol = sortedList[s].symbol;
+        const U32 weight = sortedList[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 start = rankVal[weight];
+        const U32 length = 1 << (targetLog-nbBits);
+
+        if (targetLog-nbBits >= minBits)   /* enough room for a second symbol */
+        {
+            U32 sortedRank;
+            int minWeight = nbBits + scaleLog;
+            if (minWeight < 1) minWeight = 1;
+            sortedRank = rankStart[minWeight];
+            HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
+                           rankValOrigin[nbBits], minWeight,
+                           sortedList+sortedRank, sortedListSize-sortedRank,
+                           nbBitsBaseline, symbol);
+        }
+        else
+        {
+            U32 i;
+            const U32 end = start + length;
+            HUF_DEltX4 DElt;
+
+            MEM_writeLE16(&(DElt.sequence), symbol);
+            DElt.nbBits   = (BYTE)(nbBits);
+            DElt.length   = 1;
+            for (i = start; i < end; i++)
+                DTable[i] = DElt;
+        }
+        rankVal[weight] += length;
+    }
+}
+
+static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1];
+    sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 };
+    U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    rankVal_t rankVal;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    const U32 memLog = DTable[0];
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    void* ptr = DTable;
+    HUF_DEltX4* const dt = ((HUF_DEltX4*)ptr) + 1;
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32));   /* if compilation fails here, assertion is false */
+    if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--)
+        { if (!maxW) return ERROR(GENERIC); }  /* necessarily finds a solution before maxW==0 */
+
+    /* Get start index of each weight */
+    {
+        U32 w, nextRankStart = 0;
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {
+        U32 s;
+        for (s=0; s<nbSymbols; s++)
+        {
+            U32 w = weightList[s];
+            U32 r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {
+        const U32 minBits = tableLog+1 - maxW;
+        U32 nextRankVal = 0;
+        U32 w, consumed;
+        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
+        U32* rankVal0 = rankVal[0];
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankVal;
+            nextRankVal += rankStats[w] << (w+rescale);
+            rankVal0[w] = current;
+        }
+        for (consumed = minBits; consumed <= memLog - minBits; consumed++)
+        {
+            U32* rankValPtr = rankVal[consumed];
+            for (w = 1; w <= maxW; w++)
+            {
+                rankValPtr[w] = rankVal0[w] >> consumed;
+            }
+        }
+    }
+
+    HUF_fillDTableX4(dt, memLog,
+                   sortedSymbol, sizeOfSort,
+                   rankStart0, rankVal, maxW,
+                   tableLog+1);
+
+    return iSize;
+}
+
+
+static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 2);
+    BIT_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
+
+static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 1);
+    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
+    else
+    {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8))
+        {
+            BIT_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+        }
+    }
+    return 1;
+}
+
+
+#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 8 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd-7))
+    {
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-2))
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+
+    while (p <= pEnd-2)
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    if (p < pEnd)
+        p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
+
+    return p-pStart;
+}
+
+
+
+static size_t HUF_decompress4X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+
+        const void* ptr = DTable;
+        const HUF_DEltX4* const dt = ((const HUF_DEltX4*)ptr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; )
+        {
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX4(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/**********************************/
+/* Generic decompression selector */
+/**********************************/
+
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+{
+    /* single, double, quad */
+    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
+    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
+    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
+    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
+    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
+    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
+    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
+    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
+    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
+    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
+    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
+    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+};
+
+typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+
+static size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    static const decompressionAlgo decompress[3] = { HUF_decompress4X2, HUF_decompress4X4, NULL };
+    /* estimate decompression time */
+    U32 Q;
+    const U32 D256 = (U32)(dstSize >> 8);
+    U32 Dtime[3];
+    U32 algoNb = 0;
+    int n;
+
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    /* decoder timing evaluation */
+    Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */
+    for (n=0; n<3; n++)
+        Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256);
+
+    Dtime[1] += Dtime[1] >> 4; Dtime[2] += Dtime[2] >> 3; /* advantage to algorithms using less memory, for cache eviction */
+
+    if (Dtime[1] < Dtime[0]) algoNb = 1;
+
+    return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
+
+    //return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);   /* multi-streams single-symbol decoding */
+    //return HUF_decompress4X4(dst, dstSize, cSrc, cSrcSize);   /* multi-streams double-symbols decoding */
+    //return HUF_decompress4X6(dst, dstSize, cSrc, cSrcSize);   /* multi-streams quad-symbols decoding */
+}
+/*
+    zstd - standard compression library
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+*  MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*/
+#define ZSTD_MEMORY_USAGE 17
+
+/*!
+ * HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0, fastest), or in memory heap (1, requires malloc())
+ * Note that compression context is fairly large, as a consequence heap memory is recommended.
+ */
+#ifndef ZSTD_HEAPMODE
+#  define ZSTD_HEAPMODE 1
+#endif /* ZSTD_HEAPMODE */
+
+/*!
+*  LEGACY_SUPPORT :
+*  decompressor can decode older formats (starting from Zstd 0.1+)
+*/
+#ifndef ZSTD_LEGACY_SUPPORT
+#  define ZSTD_LEGACY_SUPPORT 1
+#endif
+
+
+/* *******************************************************
+*  Includes
+*********************************************************/
+#include <stdlib.h>      /* calloc */
+#include <string.h>      /* memcpy, memmove */
+#include <stdio.h>       /* debug : printf */
+
+
+/* *******************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef __AVX2__
+#  include <immintrin.h>   /* AVX2 intrinsics */
+#endif
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#else
+#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#endif
+
+
+/* *******************************************************
+*  Constants
+*********************************************************/
+#define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
+#define HASH_TABLESIZE (1 << HASH_LOG)
+#define HASH_MASK (HASH_TABLESIZE - 1)
+
+#define KNUTH 2654435761
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+#define BIT1   2
+#define BIT0   1
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BLOCKSIZE (128 KB)                 /* define, for static allocation */
+#define MIN_SEQUENCES_SIZE (2 /*seqNb*/ + 2 /*dumps*/ + 3 /*seqTables*/ + 1 /*bitStream*/)
+#define MIN_CBLOCK_SIZE (3 /*litCSize*/ + MIN_SEQUENCES_SIZE)
+#define IS_RAW BIT0
+#define IS_RLE BIT1
+
+#define WORKPLACESIZE (BLOCKSIZE*3)
+#define MINMATCH 4
+#define MLbits   7
+#define LLbits   6
+#define Offbits  5
+#define MaxML  ((1<<MLbits )-1)
+#define MaxLL  ((1<<LLbits )-1)
+#define MaxOff   31
+#define LitFSELog  11
+#define MLFSELog   10
+#define LLFSELog   10
+#define OffFSELog   9
+#define MAX(a,b) ((a)<(b)?(b):(a))
+#define MaxSeq MAX(MaxLL, MaxML)
+
+#define LITERAL_NOENTROPY 63
+#define COMMAND_NOENTROPY 7   /* to remove */
+
+static const size_t ZSTD_blockHeaderSize = 3;
+static const size_t ZSTD_frameHeaderSize = 4;
+
+
+/* *******************************************************
+*  Memory operations
+**********************************************************/
+static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+
+#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+
+/*! ZSTD_wildcopy : custom version of memcpy(), can copy up to 7-8 bytes too many */
+static void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
+{
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+    do COPY8(op, ip) while (op < oend);
+}
+
+
+/* **************************************
+*  Local structures
+****************************************/
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+typedef struct {
+    void* buffer;
+    U32*  offsetStart;
+    U32*  offset;
+    BYTE* offCodeStart;
+    BYTE* offCode;
+    BYTE* litStart;
+    BYTE* lit;
+    BYTE* litLengthStart;
+    BYTE* litLength;
+    BYTE* matchLengthStart;
+    BYTE* matchLength;
+    BYTE* dumpsStart;
+    BYTE* dumps;
+} seqStore_t;
+
+
+/* *************************************
+*  Error Management
+***************************************/
+/*! ZSTD_isError
+*   tells if a return value is an error code */
+static unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
+
+
+
+/* *************************************************************
+*   Decompression section
+***************************************************************/
+struct ZSTD_DCtx_s
+{
+    U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
+    U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
+    U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
+    void* previousDstEnd;
+    void* base;
+    size_t expected;
+    blockType_t bType;
+    U32 phase;
+    const BYTE* litPtr;
+    size_t litSize;
+    BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */];
+};   /* typedef'd to ZSTD_Dctx within "zstd_static.h" */
+
+
+static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = (const BYTE* const)src;
+    BYTE headerFlags;
+    U32 cSize;
+
+    if (srcSize < 3) return ERROR(srcSize_wrong);
+
+    headerFlags = *in;
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+
+    bpPtr->blockType = (blockType_t)(headerFlags >> 6);
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+/** ZSTD_decompressLiterals
+    @return : nb of bytes read from src, or an error code*/
+static size_t ZSTD_decompressLiterals(void* dst, size_t* maxDstSizePtr,
+                                const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+
+    const size_t litSize = (MEM_readLE32(src) & 0x1FFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+    const size_t litCSize = (MEM_readLE32(ip+2) & 0xFFFFFF) >> 5;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+
+    if (litSize > *maxDstSizePtr) return ERROR(corruption_detected);
+    if (litCSize + 5 > srcSize) return ERROR(corruption_detected);
+
+    if (HUF_isError(HUF_decompress(dst, litSize, ip+5, litCSize))) return ERROR(corruption_detected);
+
+    *maxDstSizePtr = litSize;
+    return litCSize + 5;
+}
+
+
+/** ZSTD_decodeLiteralsBlock
+    @return : nb of bytes read from src (< srcSize )*/
+static size_t ZSTD_decodeLiteralsBlock(void* ctx,
+                          const void* src, size_t srcSize)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx;
+    const BYTE* const istart = (const BYTE* const)src;
+
+    /* any compressed block with literals segment must be at least this size */
+    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
+
+    switch(*istart & 3)
+    {
+    default:
+    case 0:
+        {
+            size_t litSize = BLOCKSIZE;
+            const size_t readSize = ZSTD_decompressLiterals(dctx->litBuffer, &litSize, src, srcSize);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litSize = litSize;
+            memset(dctx->litBuffer + dctx->litSize, 0, 8);
+            return readSize;   /* works if it's an error too */
+        }
+    case IS_RAW:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > srcSize-11)   /* risk of reading too far with wildcopy */
+            {
+                if (litSize > srcSize-3) return ERROR(corruption_detected);
+                memcpy(dctx->litBuffer, istart, litSize);
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litSize = litSize;
+                memset(dctx->litBuffer + dctx->litSize, 0, 8);
+                return litSize+3;
+            }
+            /* direct reference into compressed stream */
+            dctx->litPtr = istart+3;
+            dctx->litSize = litSize;
+            return litSize+3;
+        }
+    case IS_RLE:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
+            memset(dctx->litBuffer, istart[3], litSize + 8);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litSize = litSize;
+            return 4;
+        }
+    }
+}
+
+
+static size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
+                         FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
+                         const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    const BYTE* const iend = istart + srcSize;
+    U32 LLtype, Offtype, MLtype;
+    U32 LLlog, Offlog, MLlog;
+    size_t dumpsLength;
+
+    /* check */
+    if (srcSize < 5) return ERROR(srcSize_wrong);
+
+    /* SeqHead */
+    *nbSeq = MEM_readLE16(ip); ip+=2;
+    LLtype  = *ip >> 6;
+    Offtype = (*ip >> 4) & 3;
+    MLtype  = (*ip >> 2) & 3;
+    if (*ip & 2)
+    {
+        dumpsLength  = ip[2];
+        dumpsLength += ip[1] << 8;
+        ip += 3;
+    }
+    else
+    {
+        dumpsLength  = ip[1];
+        dumpsLength += (ip[0] & 1) << 8;
+        ip += 2;
+    }
+    *dumpsPtr = ip;
+    ip += dumpsLength;
+    *dumpsLengthPtr = dumpsLength;
+
+    /* check */
+    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
+
+    /* sequences */
+    {
+        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL and MaxOff */
+        size_t headerSize;
+
+        /* Build DTables */
+        switch(LLtype)
+        {
+        case bt_rle :
+            LLlog = 0;
+            FSE_buildDTable_rle(DTableLL, *ip++); break;
+        case bt_raw :
+            LLlog = LLbits;
+            FSE_buildDTable_raw(DTableLL, LLbits); break;
+        default :
+            {   U32 max = MaxLL;
+                headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
+                if (FSE_isError(headerSize)) return ERROR(GENERIC);
+                if (LLlog > LLFSELog) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSE_buildDTable(DTableLL, norm, max, LLlog);
+        }   }
+
+        switch(Offtype)
+        {
+        case bt_rle :
+            Offlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong);   /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */
+            break;
+        case bt_raw :
+            Offlog = Offbits;
+            FSE_buildDTable_raw(DTableOffb, Offbits); break;
+        default :
+            {   U32 max = MaxOff;
+                headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
+                if (FSE_isError(headerSize)) return ERROR(GENERIC);
+                if (Offlog > OffFSELog) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSE_buildDTable(DTableOffb, norm, max, Offlog);
+        }   }
+
+        switch(MLtype)
+        {
+        case bt_rle :
+            MLlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableML, *ip++); break;
+        case bt_raw :
+            MLlog = MLbits;
+            FSE_buildDTable_raw(DTableML, MLbits); break;
+        default :
+            {   U32 max = MaxML;
+                headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
+                if (FSE_isError(headerSize)) return ERROR(GENERIC);
+                if (MLlog > MLFSELog) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSE_buildDTable(DTableML, norm, max, MLlog);
+    }   }   }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t offset;
+    size_t matchLength;
+} seq_t;
+
+typedef struct {
+    BIT_DStream_t DStream;
+    FSE_DState_t stateLL;
+    FSE_DState_t stateOffb;
+    FSE_DState_t stateML;
+    size_t prevOffset;
+    const BYTE* dumps;
+    const BYTE* dumpsEnd;
+} seqState_t;
+
+
+static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
+{
+    size_t litLength;
+    size_t prevOffset;
+    size_t offset;
+    size_t matchLength;
+    const BYTE* dumps = seqState->dumps;
+    const BYTE* const de = seqState->dumpsEnd;
+
+    /* Literal length */
+    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
+    prevOffset = litLength ? seq->offset : seqState->prevOffset;
+    seqState->prevOffset = seq->offset;
+    if (litLength == MaxLL)
+    {
+        U32 add = *dumps++;
+        if (add < 255) litLength += add;
+        else
+        {
+            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            dumps += 3;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+
+    /* Offset */
+    {
+        static const size_t offsetPrefix[MaxOff+1] = {  /* note : size_t faster than U32 */
+                1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
+                512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
+                524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
+        U32 offsetCode, nbBits;
+        offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));   /* <= maxOff, by table construction */
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        nbBits = offsetCode - 1;
+        if (offsetCode==0) nbBits = 0;   /* cmove */
+        offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits);
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        if (offsetCode==0) offset = prevOffset;   /* cmove */
+    }
+
+    /* MatchLength */
+    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
+    if (matchLength == MaxML)
+    {
+        U32 add = *dumps++;
+        if (add < 255) matchLength += add;
+        else
+        {
+            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            dumps += 3;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+    matchLength += MINMATCH;
+
+    /* save result */
+    seq->litLength = litLength;
+    seq->offset = offset;
+    seq->matchLength = matchLength;
+    seqState->dumps = dumps;
+}
+
+
+static size_t ZSTD_execSequence(BYTE* op,
+                                seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit,
+                                BYTE* const base, BYTE* const oend)
+{
+    static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
+    static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* substracted */
+    const BYTE* const ostart = op;
+    BYTE* const oLitEnd = op + sequence.litLength;
+    BYTE* const oMatchEnd = op + sequence.litLength + sequence.matchLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_8 = oend-8;
+    const BYTE* const litEnd = *litPtr + sequence.litLength;
+
+    /* checks */
+    if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */
+    if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
+    if (litEnd > litLimit) return ERROR(corruption_detected);   /* overRead beyond lit buffer */
+
+    /* copy Literals */
+    ZSTD_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
+    op = oLitEnd;
+    *litPtr = litEnd;   /* update for next sequence */
+
+    /* copy Match */
+    {
+        const BYTE* match = op - sequence.offset;
+
+        /* check */
+        if (sequence.offset > (size_t)op) return ERROR(corruption_detected);   /* address space overflow test (this test seems kept by clang optimizer) */
+        //if (match > op) return ERROR(corruption_detected);   /* address space overflow test (is clang optimizer removing this test ?) */
+        if (match < base) return ERROR(corruption_detected);
+
+        /* close range match, overlap */
+        if (sequence.offset < 8)
+        {
+            const int dec64 = dec64table[sequence.offset];
+            op[0] = match[0];
+            op[1] = match[1];
+            op[2] = match[2];
+            op[3] = match[3];
+            match += dec32table[sequence.offset];
+            ZSTD_copy4(op+4, match);
+            match -= dec64;
+        }
+        else
+        {
+            ZSTD_copy8(op, match);
+        }
+        op += 8; match += 8;
+
+        if (oMatchEnd > oend-(16-MINMATCH))
+        {
+            if (op < oend_8)
+            {
+                ZSTD_wildcopy(op, match, oend_8 - op);
+                match += oend_8 - op;
+                op = oend_8;
+            }
+            while (op < oMatchEnd) *op++ = *match++;
+        }
+        else
+        {
+            ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
+        }
+    }
+
+    return oMatchEnd - ostart;
+}
+
+static size_t ZSTD_decompressSequences(
+                               void* ctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx;
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t errorCode, dumpsLength;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    int nbSeq;
+    const BYTE* dumps;
+    U32* DTableLL = dctx->LLTable;
+    U32* DTableML = dctx->MLTable;
+    U32* DTableOffb = dctx->OffTable;
+    BYTE* const base = (BYTE*) (dctx->base);
+
+    /* Build Decoding Tables */
+    errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
+                                      DTableLL, DTableML, DTableOffb,
+                                      ip, iend-ip);
+    if (ZSTD_isError(errorCode)) return errorCode;
+    ip += errorCode;
+
+    /* Regen sequences */
+    {
+        seq_t sequence;
+        seqState_t seqState;
+
+        memset(&sequence, 0, sizeof(sequence));
+        seqState.dumps = dumps;
+        seqState.dumpsEnd = dumps + dumpsLength;
+        seqState.prevOffset = sequence.offset = 4;
+        errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip);
+        if (ERR_isError(errorCode)) return ERROR(corruption_detected);
+        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
+        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
+        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
+
+        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (nbSeq>0) ; )
+        {
+            size_t oneSeqSize;
+            nbSeq--;
+            ZSTD_decodeSequence(&sequence, &seqState);
+            oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend);
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* check if reached exact end */
+        if ( !BIT_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected);   /* requested too much : data is corrupted */
+        if (nbSeq<0) return ERROR(corruption_detected);   /* requested too many sequences : data is corrupted */
+
+        /* last literal segment */
+        {
+            size_t lastLLSize = litEnd - litPtr;
+            if (litPtr > litEnd) return ERROR(corruption_detected);
+            if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
+            if (op != litPtr) memmove(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+
+static size_t ZSTD_decompressBlock(
+                            void* ctx,
+                            void* dst, size_t maxDstSize,
+                      const void* src, size_t srcSize)
+{
+    /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+
+    /* Decode literals sub-block */
+    size_t litCSize = ZSTD_decodeLiteralsBlock(ctx, src, srcSize);
+    if (ZSTD_isError(litCSize)) return litCSize;
+    ip += litCSize;
+    srcSize -= litCSize;
+
+    return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize);
+}
+
+
+static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* iend = ip + srcSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t remainingSize = srcSize;
+    U32 magicNumber;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t decodedSize=0;
+        size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            decodedSize = ZSTD_decompressBlock(ctx, op, oend-op, ip, cBlockSize);
+            break;
+        case bt_raw :
+            decodedSize = ZSTD_copyUncompressedBlock(op, oend-op, ip, cBlockSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet supported */
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return ERROR(srcSize_wrong);
+            break;
+        default:
+            return ERROR(GENERIC);   /* impossible */
+        }
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        if (ZSTD_isError(decodedSize)) return decodedSize;
+        op += decodedSize;
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return op-ostart;
+}
+
+static size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    ZSTD_DCtx ctx;
+    ctx.base = dst;
+    return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
+}
+
+static size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    size_t remainingSize = srcSize;
+    U32 magicNumber;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
+        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return ip - (const BYTE*)src;
+}
+
+
+/*******************************
+*  Streaming Decompression API
+*******************************/
+
+static size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx)
+{
+    dctx->expected = ZSTD_frameHeaderSize;
+    dctx->phase = 0;
+    dctx->previousDstEnd = NULL;
+    dctx->base = NULL;
+    return 0;
+}
+
+static ZSTD_DCtx* ZSTD_createDCtx(void)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx));
+    if (dctx==NULL) return NULL;
+    ZSTD_resetDCtx(dctx);
+    return dctx;
+}
+
+static size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
+{
+    free(dctx);
+    return 0;
+}
+
+static size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx)
+{
+    return dctx->expected;
+}
+
+static size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    /* Sanity check */
+    if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
+    if (dst != ctx->previousDstEnd)  /* not contiguous */
+        ctx->base = dst;
+
+    /* Decompress : frame header */
+    if (ctx->phase == 0)
+    {
+        /* Check frame magic header */
+        U32 magicNumber = MEM_readLE32(src);
+        if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        return 0;
+    }
+
+    /* Decompress : block header */
+    if (ctx->phase == 1)
+    {
+        blockProperties_t bp;
+        size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+        if (ZSTD_isError(blockSize)) return blockSize;
+        if (bp.blockType == bt_end)
+        {
+            ctx->expected = 0;
+            ctx->phase = 0;
+        }
+        else
+        {
+            ctx->expected = blockSize;
+            ctx->bType = bp.blockType;
+            ctx->phase = 2;
+        }
+
+        return 0;
+    }
+
+    /* Decompress : block content */
+    {
+        size_t rSize;
+        switch(ctx->bType)
+        {
+        case bt_compressed:
+            rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
+            break;
+        case bt_raw :
+            rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet handled */
+            break;
+        case bt_end :   /* should never happen (filtered at phase 1) */
+            rSize = 0;
+            break;
+        default:
+            return ERROR(GENERIC);
+        }
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
+        return rSize;
+    }
+
+}
+
+
+/* wrapper layer */
+
+unsigned ZSTDv03_isError(size_t code)
+{
+    return ZSTD_isError(code);
+}
+
+size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize)
+{
+    return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
+}
+
+size_t ZSTDv03_findFrameCompressedSize(const void* src, size_t srcSize)
+{
+    return ZSTD_findFrameCompressedSize(src, srcSize);
+}
+
+ZSTDv03_Dctx* ZSTDv03_createDCtx(void)
+{
+    return (ZSTDv03_Dctx*)ZSTD_createDCtx();
+}
+
+size_t ZSTDv03_freeDCtx(ZSTDv03_Dctx* dctx)
+{
+    return ZSTD_freeDCtx((ZSTD_DCtx*)dctx);
+}
+
+size_t ZSTDv03_resetDCtx(ZSTDv03_Dctx* dctx)
+{
+    return ZSTD_resetDCtx((ZSTD_DCtx*)dctx);
+}
+
+size_t ZSTDv03_nextSrcSizeToDecompress(ZSTDv03_Dctx* dctx)
+{
+    return ZSTD_nextSrcSizeToDecompress((ZSTD_DCtx*)dctx);
+}
+
+size_t ZSTDv03_decompressContinue(ZSTDv03_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return ZSTD_decompressContinue((ZSTD_DCtx*)dctx, dst, maxDstSize, src, srcSize);
+}
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v03.h b/contrib/libzstd/include/zstd/legacy/zstd_v03.h
new file mode 100644
index 00000000000..07f7597bb7c
--- /dev/null
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v03.h
@@ -0,0 +1,87 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+#ifndef ZSTD_V03_H_298734209782
+#define ZSTD_V03_H_298734209782
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv03_decompress() : decompress ZSTD frames compliant with v0.3.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+/**
+ZSTDv03_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.3.x format
+    compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+    return : the number of bytes that would be read to decompress this frame
+             or an errorCode if it fails (which can be tested using ZSTDv03_isError())
+*/
+size_t ZSTDv03_findFrameCompressedSize(const void* src, size_t compressedSize);
+
+    /**
+ZSTDv03_isError() : tells if the result of ZSTDv03_decompress() is an error
+*/
+unsigned ZSTDv03_isError(size_t code);
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv03_Dctx_s ZSTDv03_Dctx;
+ZSTDv03_Dctx* ZSTDv03_createDCtx(void);
+size_t ZSTDv03_freeDCtx(ZSTDv03_Dctx* dctx);
+
+size_t ZSTDv03_decompressDCtx(void* ctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
+
+/* *************************************
+*  Streaming functions
+***************************************/
+size_t ZSTDv03_resetDCtx(ZSTDv03_Dctx* dctx);
+
+size_t ZSTDv03_nextSrcSizeToDecompress(ZSTDv03_Dctx* dctx);
+size_t ZSTDv03_decompressContinue(ZSTDv03_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv03_magicNumber 0xFD2FB523   /* v0.3 */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_V03_H_298734209782 */
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v04.c b/contrib/libzstd/include/zstd/legacy/zstd_v04.c
new file mode 100644
index 00000000000..8b8e23cb09c
--- /dev/null
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v04.c
@@ -0,0 +1,3823 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+
+/*- Dependencies -*/
+#include "zstd_v04.h"
+#include "error_private.h"
+
+
+/* ******************************************************************
+   mem.h
+   low-level memory access routines
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/******************************************
+*  Includes
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include <string.h>    /* memcpy */
+
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined(_MSC_VER)   /* Visual Studio */
+#   include <stdlib.h>  /* _byteswap_ulong */
+#   include <intrin.h>  /* _byteswap_* */
+#endif
+#if defined(__GNUC__)
+#  define MEM_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define MEM_STATIC static inline
+#elif defined(_MSC_VER)
+#  define MEM_STATIC static __inline
+#else
+#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/****************************************************************
+*  Basic Types
+*****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef  int16_t S16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+  typedef  int64_t S64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef   signed short      S16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+  typedef   signed long long  S64;
+#endif
+
+
+/****************************************************************
+*  Memory I/O
+*****************************************************************/
+/* MEM_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets generating assembly depending on alignment.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define MEM_FORCE_MEMORY_ACCESS 2
+#  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define MEM_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; }
+MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; }
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
+
+/* violates C standard on structure alignment.
+Only use if no other choice to achieve best performance on target platform */
+MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
+MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
+MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+
+#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
+
+MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+
+#else
+
+/* default method, safe and standard.
+   can sometimes prove slower */
+
+MEM_STATIC U16 MEM_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U32 MEM_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U64 MEM_read64(const void* memPtr)
+{
+    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif // MEM_FORCE_MEMORY_ACCESS
+
+
+MEM_STATIC U16 MEM_readLE16(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read16(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
+{
+    if (MEM_isLittleEndian())
+    {
+        MEM_write16(memPtr, val);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val;
+        p[1] = (BYTE)(val>>8);
+    }
+}
+
+MEM_STATIC U32 MEM_readLE32(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read32(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+
+MEM_STATIC U64 MEM_readLE64(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read64(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
+                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
+    }
+}
+
+
+MEM_STATIC size_t MEM_readLEST(const void* memPtr)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_readLE32(memPtr);
+    else
+        return (size_t)MEM_readLE64(memPtr);
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* MEM_H_MODULE */
+
+/*
+    zstd - standard compression library
+    Header File for static linking only
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#ifndef ZSTD_STATIC_H
+#define ZSTD_STATIC_H
+
+/* The objects defined into this file shall be considered experimental.
+ * They are not considered stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risks of future changes.
+ */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Types
+***************************************/
+#define ZSTD_WINDOWLOG_MAX 26
+#define ZSTD_WINDOWLOG_MIN 18
+#define ZSTD_WINDOWLOG_ABSOLUTEMIN 11
+#define ZSTD_CONTENTLOG_MAX (ZSTD_WINDOWLOG_MAX+1)
+#define ZSTD_CONTENTLOG_MIN 4
+#define ZSTD_HASHLOG_MAX 28
+#define ZSTD_HASHLOG_MIN 4
+#define ZSTD_SEARCHLOG_MAX (ZSTD_CONTENTLOG_MAX-1)
+#define ZSTD_SEARCHLOG_MIN 1
+#define ZSTD_SEARCHLENGTH_MAX 7
+#define ZSTD_SEARCHLENGTH_MIN 4
+
+/** from faster to stronger */
+typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2 } ZSTD_strategy;
+
+typedef struct
+{
+    U64 srcSize;       /* optional : tells how much bytes are present in the frame. Use 0 if not known. */
+    U32 windowLog;     /* largest match distance : larger == more compression, more memory needed during decompression */
+    U32 contentLog;    /* full search segment : larger == more compression, slower, more memory (useless for fast) */
+    U32 hashLog;       /* dispatch table : larger == more memory, faster */
+    U32 searchLog;     /* nb of searches : larger == more compression, slower */
+    U32 searchLength;  /* size of matches : larger == faster decompression, sometimes less compression */
+    ZSTD_strategy strategy;
+} ZSTD_parameters;
+
+typedef ZSTDv04_Dctx ZSTD_DCtx;
+
+/* *************************************
+*  Advanced functions
+***************************************/
+/** ZSTD_decompress_usingDict
+*   Same as ZSTD_decompressDCtx, using a Dictionary content as prefix
+*   Note : dict can be NULL, in which case, it's equivalent to ZSTD_decompressDCtx() */
+static size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx,
+                                             void* dst, size_t maxDstSize,
+                                       const void* src, size_t srcSize,
+                                       const void* dict,size_t dictSize);
+
+
+/* **************************************
+*  Streaming functions (direct mode)
+****************************************/
+static size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx);
+static size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize);
+static void   ZSTD_decompress_insertDictionary(ZSTD_DCtx* ctx, const void* src, size_t srcSize);
+
+static size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
+static size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+
+/**
+  Streaming decompression, bufferless mode
+
+  A ZSTD_DCtx object is required to track streaming operations.
+  Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
+  A ZSTD_DCtx object can be re-used multiple times. Use ZSTD_resetDCtx() to return to fresh status.
+
+  First operation is to retrieve frame parameters, using ZSTD_getFrameParams().
+  This function doesn't consume its input. It needs enough input data to properly decode the frame header.
+  Objective is to retrieve *params.windowlog, to know minimum amount of memory required during decoding.
+  Result : 0 when successful, it means the ZSTD_parameters structure has been filled.
+           >0 : means there is not enough data into src. Provides the expected size to successfully decode header.
+           errorCode, which can be tested using ZSTD_isError() (For example, if it's not a ZSTD header)
+
+  Then, you can optionally insert a dictionary.
+  This operation must mimic the compressor behavior, otherwise decompression will fail or be corrupted.
+
+  Then it's possible to start decompression.
+  Use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() requires this exact amount of bytes, or it will fail.
+  ZSTD_decompressContinue() needs previous data blocks during decompression, up to (1 << windowlog).
+  They should preferably be located contiguously, prior to current block. Alternatively, a round buffer is also possible.
+
+  @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+
+  A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
+  Context can then be reset to start a new decompression.
+*/
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+
+#endif  /* ZSTD_STATIC_H */
+
+
+/*
+    zstd_internal - common functions to include
+    Header File for include
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#ifndef ZSTD_CCOMMON_H_MODULE
+#define ZSTD_CCOMMON_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Common macros
+***************************************/
+#define MIN(a,b) ((a)<(b) ? (a) : (b))
+#define MAX(a,b) ((a)>(b) ? (a) : (b))
+
+
+/* *************************************
+*  Common constants
+***************************************/
+#define ZSTD_MAGICNUMBER 0xFD2FB524   /* v0.4 */
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BLOCKSIZE (128 KB)                 /* define, for static allocation */
+
+static const size_t ZSTD_blockHeaderSize = 3;
+static const size_t ZSTD_frameHeaderSize_min = 5;
+#define ZSTD_frameHeaderSize_max 5         /* define, for static allocation */
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+#define BIT1   2
+#define BIT0   1
+
+#define IS_RAW BIT0
+#define IS_RLE BIT1
+
+#define MINMATCH 4
+#define REPCODE_STARTVALUE 4
+
+#define MLbits   7
+#define LLbits   6
+#define Offbits  5
+#define MaxML  ((1<<MLbits) - 1)
+#define MaxLL  ((1<<LLbits) - 1)
+#define MaxOff ((1<<Offbits)- 1)
+#define MLFSELog   10
+#define LLFSELog   10
+#define OffFSELog   9
+#define MaxSeq MAX(MaxLL, MaxML)
+
+#define MIN_SEQUENCES_SIZE (2 /*seqNb*/ + 2 /*dumps*/ + 3 /*seqTables*/ + 1 /*bitStream*/)
+#define MIN_CBLOCK_SIZE (3 /*litCSize*/ + MIN_SEQUENCES_SIZE)
+
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+
+/* ******************************************
+*  Shared functions to include for inlining
+********************************************/
+static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+
+#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+
+/*! ZSTD_wildcopy : custom version of memcpy(), can copy up to 7-8 bytes too many */
+static void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
+{
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+    do
+        COPY8(op, ip)
+    while (op < oend);
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   header file
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef FSE_H
+#define FSE_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* *****************************************
+*  Includes
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+
+
+/* *****************************************
+*  FSE simple functions
+******************************************/
+static size_t FSE_decompress(void* dst,  size_t maxDstSize,
+                const void* cSrc, size_t cSrcSize);
+/*!
+FSE_decompress():
+    Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'maxDstSize'.
+    return : size of regenerated data (<= maxDstSize)
+             or an error code, which can be tested using FSE_isError()
+
+    ** Important ** : FSE_decompress() doesn't decompress non-compressible nor RLE data !!!
+    Why ? : making this distinction requires a header.
+    Header management is intentionally delegated to the user layer, which can better manage special cases.
+*/
+
+
+/* *****************************************
+*  Tool functions
+******************************************/
+/* Error Management */
+static unsigned    FSE_isError(size_t code);        /* tells if a return value is an error code */
+
+
+
+/* *****************************************
+*  FSE detailed API
+******************************************/
+/*!
+FSE_compress() does the following:
+1. count symbol occurrence from source[] into table count[]
+2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
+3. save normalized counters to memory buffer using writeNCount()
+4. build encoding table 'CTable' from normalized counters
+5. encode the data stream using encoding table 'CTable'
+
+FSE_decompress() does the following:
+1. read normalized counters with readNCount()
+2. build decoding table 'DTable' from normalized counters
+3. decode the data stream using decoding table 'DTable'
+
+The following API allows targeting specific sub-functions for advanced tasks.
+For example, it's possible to compress several blocks using the same 'CTable',
+or to save and provide normalized distribution using external method.
+*/
+
+
+/* *** DECOMPRESSION *** */
+
+/*!
+FSE_readNCount():
+   Read compactly saved 'normalizedCounter' from 'rBuffer'.
+   return : size read from 'rBuffer'
+            or an errorCode, which can be tested using FSE_isError()
+            maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
+static  size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
+
+/*!
+Constructor and Destructor of type FSE_DTable
+    Note that its size depends on 'tableLog' */
+typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+
+/*!
+FSE_buildDTable():
+   Builds 'dt', which must be already allocated, using FSE_createDTable()
+   return : 0,
+            or an errorCode, which can be tested using FSE_isError() */
+static size_t FSE_buildDTable ( FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*!
+FSE_decompress_usingDTable():
+   Decompress compressed source 'cSrc' of size 'cSrcSize' using 'dt'
+   into 'dst' which must be already allocated.
+   return : size of regenerated data (necessarily <= maxDstSize)
+            or an errorCode, which can be tested using FSE_isError() */
+static  size_t FSE_decompress_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
+
+/*!
+Tutorial :
+----------
+(Note : these functions only decompress FSE-compressed blocks.
+ If block is uncompressed, use memcpy() instead
+ If block is a single repeated byte, use memset() instead )
+
+The first step is to obtain the normalized frequencies of symbols.
+This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
+In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
+or size the table to handle worst case situations (typically 256).
+FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
+The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
+Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
+This is performed by the function FSE_buildDTable().
+The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+'FSE_DTable' can then be used to decompress 'cSrc', with FSE_decompress_usingDTable().
+'cSrcSize' must be strictly correct, otherwise decompression will fail.
+FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=maxDstSize).
+If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
+*/
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* FSE_H */
+
+
+/* ******************************************************************
+   bitstream
+   Part of NewGen Entropy library
+   header file (to include)
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef BITSTREAM_H_MODULE
+#define BITSTREAM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*
+*  This API consists of small unitary functions, which highly benefit from being inlined.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+/**********************************************
+*  bitStream decompression API (read backward)
+**********************************************/
+typedef struct
+{
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} BIT_DStream_t;
+
+typedef enum { BIT_DStream_unfinished = 0,
+               BIT_DStream_endOfBuffer = 1,
+               BIT_DStream_completed = 2,
+               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
+
+
+/*
+* Start by invoking BIT_initDStream().
+* A chunk of the bitStream is then stored into a local register.
+* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+* You can then retrieve bitFields stored into the local register, **in reverse order**.
+* Local register is manually filled from memory by the BIT_reloadDStream() method.
+* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BIT_DStream_unfinished.
+* Otherwise, it can be less than that, so proceed accordingly.
+* Checking if DStream has reached its end can be performed with BIT_endOfDStream()
+*/
+
+
+/******************************************
+*  unsafe API
+******************************************/
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/****************************************************************
+*  Helper functions
+****************************************************************/
+MEM_STATIC unsigned BIT_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    unsigned r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
+/**********************************************************
+* bitStream decoding
+**********************************************************/
+
+/*!BIT_initDStream
+*  Initialize a BIT_DStream_t.
+*  @bitD : a pointer to an already allocated BIT_DStream_t structure
+*  @srcBuffer must point at the beginning of a bitStream
+*  @srcSize must be the exact size of the bitStream
+*  @result : size of stream (== srcSize) or an errorCode if a problem is detected
+*/
+MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
+    if (srcSize >=  sizeof(size_t))   /* normal case */
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+    }
+    else
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);/* fall-through */
+            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);/* fall-through */
+            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);/* fall-through */
+            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24; /* fall-through */
+            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16; /* fall-through */
+            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8; /* fall-through */
+            default: break;
+        }
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+/*!BIT_lookBits
+ * Provides next n bits from local register
+ * local register is not modified (bits are still present for next read/look)
+ * On 32-bits, maxNbBits==25
+ * On 64-bits, maxNbBits==57
+ * @return : value extracted
+ */
+MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+}
+
+/*! BIT_lookBitsFast :
+*   unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+}
+
+MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+/*!BIT_readBits
+ * Read next n bits from local register.
+ * pay attention to not read more than nbBits contained into local register.
+ * @return : extracted value.
+ */
+MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BIT_lookBits(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*!BIT_readBitsFast :
+*  unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BIT_lookBitsFast(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
+{
+    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
+        return BIT_DStream_overflow;
+
+    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
+    {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        return BIT_DStream_unfinished;
+    }
+    if (bitD->ptr == bitD->start)
+    {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
+        return BIT_DStream_completed;
+    }
+    {
+        U32 nbBytes = bitD->bitsConsumed >> 3;
+        BIT_DStream_status result = BIT_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start)
+        {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = BIT_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        return result;
+    }
+}
+
+/*! BIT_endOfDStream
+*   @return Tells if DStream has reached its exact end
+*/
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
+{
+    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* BITSTREAM_H_MODULE */
+
+
+
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef FSE_STATIC_H
+#define FSE_STATIC_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* *****************************************
+*  Static allocation
+*******************************************/
+/* FSE buffer bounds */
+#define FSE_NCOUNTBOUND 512
+#define FSE_BLOCKBOUND(size) (size + (size>>7))
+#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* It is possible to statically allocate FSE CTable/DTable as a table of unsigned using below macros */
+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+
+
+/* *****************************************
+*  FSE advanced API
+*******************************************/
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
+/* build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
+
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
+/* build a fake FSE_DTable, designed to always generate the same symbolValue */
+
+
+
+/* *****************************************
+*  FSE symbol decompression API
+*******************************************/
+typedef struct
+{
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+
+static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
+
+static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+
+static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
+
+/*!
+Let's now decompose FSE_decompress_usingDTable() into its unitary components.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BIT_DStream_t DStream;    // Stream context
+FSE_DState_t  DState;     // State context. Multiple ones are possible
+FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BIT_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSE_reloadDStream(&DStream);
+
+BIT_reloadDStream() result tells if there is still some more data to read from DStream.
+BIT_DStream_unfinished : there is still some data left into the DStream.
+BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BIT_reloadDStream(&DStream) >= BIT_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BIT_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSE_endOfDState(&DState);
+*/
+
+
+/* *****************************************
+*  FSE unsafe API
+*******************************************/
+static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/* *****************************************
+*  Implementation of inlined functions
+*******************************************/
+/* decompression */
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSE_DTableHeader;   /* sizeof U32 */
+
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSE_decode_t;   /* size == U32 */
+
+MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
+{
+    FSE_DTableHeader DTableH;
+    memcpy(&DTableH, dt, sizeof(DTableH));
+    DStatePtr->state = BIT_readBits(bitD, DTableH.tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32  nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BIT_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32 nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BIT_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* FSE_STATIC_H */
+
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/* **************************************************************
+*  Tuning parameters
+****************************************************************/
+/*!MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSE_MAX_MEMORY_USAGE 14
+#define FSE_DEFAULT_MEMORY_USAGE 13
+
+/*!FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSE_MAX_SYMBOL_VALUE 255
+
+
+/* **************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+#define FSE_DECODE_TYPE FSE_decode_t
+
+
+#endif   /* !FSE_COMMONDEFS_ONLY */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#    ifdef __GNUC__
+#      define FORCE_INLINE static inline __attribute__((always_inline))
+#    else
+#      define FORCE_INLINE static inline
+#    endif
+#  else
+#    define FORCE_INLINE static
+#  endif /* __STDC_VERSION__ */
+#endif
+
+
+/* **************************************************************
+*  Dependencies
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+
+/* ***************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/* **************************************************************
+*  Complex types
+****************************************************************/
+typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+
+/*-**************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
+
+
+static size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    FSE_DTableHeader DTableH;
+    void* const tdPtr = dt+1;   /* because dt is unsigned, 32-bits aligned on 32-bits */
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
+    const U32 tableSize = 1 << tableLog;
+    const U32 tableMask = tableSize-1;
+    const U32 step = FSE_tableStep(tableSize);
+    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
+    U32 position = 0;
+    U32 highThreshold = tableSize-1;
+    const S16 largeLimit= (S16)(1 << (tableLog-1));
+    U32 noLarge = 1;
+    U32 s;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    DTableH.tableLog = (U16)tableLog;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        if (normalizedCounter[s]==-1)
+        {
+            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+            symbolNext[s] = 1;
+        }
+        else
+        {
+            if (normalizedCounter[s] >= largeLimit) noLarge=0;
+            symbolNext[s] = normalizedCounter[s];
+        }
+    }
+
+    /* Spread symbols */
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        int i;
+        for (i=0; i<normalizedCounter[s]; i++)
+        {
+            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+            position = (position + step) & tableMask;
+            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }
+    }
+
+    if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+
+    /* Build Decoding table */
+    {
+        U32 i;
+        for (i=0; i<tableSize; i++)
+        {
+            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[i].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) );
+            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
+        }
+    }
+
+    DTableH.fastMode = (U16)noLarge;
+    memcpy(dt, &DTableH, sizeof(DTableH));
+    return 0;
+}
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+/******************************************
+*  FSE helper functions
+******************************************/
+static unsigned FSE_isError(size_t code) { return ERR_isError(code); }
+
+
+/****************************************************************
+*  FSE NCount encoding-decoding
+****************************************************************/
+static short FSE_abs(short a)
+{
+    return a<0 ? -a : a;
+}
+
+static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    if (hbSize < 4) return ERROR(srcSize_wrong);
+    bitStream = MEM_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr))
+    {
+        if (previous0)
+        {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF)
+            {
+                n0+=24;
+                if (ip < iend-5)
+                {
+                    ip+=2;
+                    bitStream = MEM_readLE32(ip) >> bitCount;
+                }
+                else
+                {
+                    bitStream >>= 16;
+                    bitCount+=16;
+                }
+            }
+            while ((bitStream & 3) == 3)
+            {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+            {
+                ip += bitCount>>3;
+                bitCount &= 7;
+                bitStream = MEM_readLE32(ip) >> bitCount;
+            }
+            else
+                bitStream >>= 2;
+        }
+        {
+            const short max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max)
+            {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            }
+            else
+            {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSE_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold)
+            {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            {
+                if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+                {
+                    ip += bitCount>>3;
+                    bitCount &= 7;
+                }
+                else
+                {
+                    bitCount -= (int)(8 * (iend - 4 - ip));
+                    ip = iend - 4;
+                }
+                bitStream = MEM_readLE32(ip) >> (bitCount & 31);
+            }
+        }
+    }
+    if (remaining != 1) return ERROR(GENERIC);
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong);
+    return ip-istart;
+}
+
+
+/*********************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSE_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    BIT_DStream_t bitD;
+    FSE_DState_t state1;
+    FSE_DState_t state2;
+    size_t errorCode;
+
+    /* Init */
+    errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+    if (FSE_isError(errorCode)) return errorCode;
+
+    FSE_initDState(&state1, &bitD, dt);
+    FSE_initDState(&state2, &bitD, dt);
+
+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op<olimit) ; op+=4)
+    {
+        op[0] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[1] = FSE_GETSYMBOL(&state2);
+
+        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[3] = FSE_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
+    while (1)
+    {
+        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state1);
+
+        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state2);
+    }
+
+    /* end ? */
+    if (BIT_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
+        return op-ostart;
+
+    if (op==omax) return ERROR(dstSize_tooSmall);   /* dst buffer is full, but cSrc unfinished */
+
+    return ERROR(corruption_detected);
+}
+
+
+static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSE_DTable* dt)
+{
+    FSE_DTableHeader DTableH;
+    U32 fastMode;
+
+    memcpy(&DTableH, dt, sizeof(DTableH));
+    fastMode = DTableH.fastMode;
+
+    /* select fast mode (static) */
+    if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSE_MAX_SYMBOL_VALUE+1];
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    size_t errorCode;
+
+    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */
+
+    /* normal FSE decoding mode */
+    errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return errorCode;
+
+    /* always return, even if it is an error code */
+    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
+}
+
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
+
+
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   header file
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef HUFF0_H
+#define HUFF0_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* ****************************************
+*  Dependency
+******************************************/
+#include <stddef.h>    /* size_t */
+
+
+/* ****************************************
+*  Huff0 simple functions
+******************************************/
+static size_t HUF_decompress(void* dst,  size_t dstSize,
+                const void* cSrc, size_t cSrcSize);
+/*!
+HUF_decompress():
+    Decompress Huff0 data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'dstSize'.
+    'dstSize' must be the exact size of original (uncompressed) data.
+    Note : in contrast with FSE, HUF_decompress can regenerate RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, because it knows size to regenerate.
+    @return : size of regenerated data (== dstSize)
+              or an error code, which can be tested using HUF_isError()
+*/
+
+
+/* ****************************************
+*  Tool functions
+******************************************/
+/* Error Management */
+static unsigned    HUF_isError(size_t code);        /* tells if a return value is an error code */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* HUFF0_H */
+
+
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef HUFF0_STATIC_H
+#define HUFF0_STATIC_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/* ****************************************
+*  Static allocation macros
+******************************************/
+/* static allocation of Huff0's DTable */
+#define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<maxTableLog))  /* nb Cells; use unsigned short for X2, unsigned int for X4 */
+#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+        unsigned short DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
+        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUF_CREATE_STATIC_DTABLEX6(DTable, maxTableLog) \
+        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog) * 3 / 2] = { maxTableLog }
+
+
+/* ****************************************
+*  Advanced decompression functions
+******************************************/
+static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbols decoder */
+
+
+/* ****************************************
+*  Huff0 detailed API
+******************************************/
+/*!
+HUF_decompress() does the following:
+1. select the decompression algorithm (X2, X4, X6) based on pre-computed heuristics
+2. build Huffman table from save, using HUF_readDTableXn()
+3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable
+
+*/
+static size_t HUF_readDTableX2 (unsigned short* DTable, const void* src, size_t srcSize);
+static size_t HUF_readDTableX4 (unsigned* DTable, const void* src, size_t srcSize);
+
+static size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned short* DTable);
+static size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* HUFF0_STATIC_H */
+
+
+
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+/* inline is defined */
+#elif defined(_MSC_VER)
+#  define inline __inline
+#else
+#  define inline /* disable inline */
+#endif
+
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#endif
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+
+/* **************************************************************
+*  Constants
+****************************************************************/
+#define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#define HUF_MAX_TABLELOG  12           /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
+#define HUF_DEFAULT_TABLELOG  HUF_MAX_TABLELOG   /* tableLog by default, when not specified */
+#define HUF_MAX_SYMBOL_VALUE 255
+#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
+#  error "HUF_MAX_TABLELOG is too large !"
+#endif
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+static unsigned HUF_isError(size_t code) { return ERR_isError(code); }
+#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+
+/*-*******************************************************
+*  Huff0 : Huffman block decompression
+*********************************************************/
+typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2;   /* single-symbol decoding */
+
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4;  /* double-symbols decoding */
+
+typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+
+/*! HUF_readStats
+    Read compact Huffman tree, saved by HUF_writeCTable
+    @huffWeight : destination buffer
+    @return : size read from `src`
+*/
+static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                            U32* nbSymbolsPtr, U32* tableLogPtr,
+                            const void* src, size_t srcSize)
+{
+    U32 weightTotal;
+    U32 tableLog;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize;
+    size_t oSize;
+    U32 n;
+
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
+    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128)  /* special header */
+    {
+        if (iSize >= (242))   /* RLE */
+        {
+            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
+            oSize = l[iSize-242];
+            memset(huffWeight, 1, hwSize);
+            iSize = 0;
+        }
+        else   /* Incompressible */
+        {
+            oSize = iSize - 127;
+            iSize = ((oSize+1)/2);
+            if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+            if (oSize >= hwSize) return ERROR(corruption_detected);
+            ip += 1;
+            for (n=0; n<oSize; n+=2)
+            {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+            }
+        }
+    }
+    else  /* header compressed with FSE (normal case) */
+    {
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
+        if (FSE_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32));
+    weightTotal = 0;
+    for (n=0; n<oSize; n++)
+    {
+        if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+        rankStats[huffWeight[n]]++;
+        weightTotal += (1 << huffWeight[n]) >> 1;
+    }
+    if (weightTotal == 0) return ERROR(corruption_detected);
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    tableLog = BIT_highbit32(weightTotal) + 1;
+    if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+    {
+        U32 total = 1 << tableLog;
+        U32 rest = total - weightTotal;
+        U32 verif = 1 << BIT_highbit32(rest);
+        U32 lastWeight = BIT_highbit32(rest) + 1;
+        if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+        huffWeight[oSize] = (BYTE)lastWeight;
+        rankStats[lastWeight]++;
+    }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32)(oSize+1);
+    *tableLogPtr = tableLog;
+    return iSize+1;
+}
+
+
+/**************************/
+/* single-symbol decoding */
+/**************************/
+
+static size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
+{
+    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    size_t iSize;
+    U32 nbSymbols = 0;
+    U32 n;
+    U32 nextRankStart;
+    void* const dtPtr = DTable + 1;
+    HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16));   /* if compilation fails here, assertion is false */
+    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge);   /* DTable is too small */
+    DTable[0] = (U16)tableLog;   /* maybe should separate sizeof DTable, as allocated, from used size of DTable, in case of DTable re-use */
+
+    /* Prepare ranks */
+    nextRankStart = 0;
+    for (n=1; n<=tableLog; n++)
+    {
+        U32 current = nextRankStart;
+        nextRankStart += (rankVal[n] << (n-1));
+        rankVal[n] = current;
+    }
+
+    /* fill DTable */
+    for (n=0; n<nbSymbols; n++)
+    {
+        const U32 w = huffWeight[n];
+        const U32 length = (1 << w) >> 1;
+        U32 i;
+        HUF_DEltX2 D;
+        D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
+        for (i = rankVal[w]; i < rankVal[w] + length; i++)
+            dt[i] = D;
+        rankVal[w] += length;
+    }
+
+    return iSize;
+}
+
+static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+        const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+        const BYTE c = dt[val].byte;
+        BIT_skipBits(Dstream, dt[val].nbBits);
+        return c;
+}
+
+#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+    *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4))
+    {
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    /* no more data to retrieve from bitstream, hence no need to reload */
+    while (p < pEnd)
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    return pEnd-pStart;
+}
+
+
+static size_t HUF_decompress4X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U16* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable;
+        const HUF_DEltX2* const dt = ((const HUF_DEltX2*)dtPtr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; )
+        {
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+    size_t errorCode;
+
+    errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    return HUF_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/***************************/
+/* double-symbols decoding */
+/***************************/
+
+static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed,
+                           const U32* rankValOrigin, const int minWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
+                           U32 nbBitsBaseline, U16 baseSeq)
+{
+    HUF_DEltX4 DElt;
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    U32 s;
+
+    /* get pre-calculated rankVal */
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1)
+    {
+        U32 i, skipSize = rankVal[minWeight];
+        MEM_writeLE16(&(DElt.sequence), baseSeq);
+        DElt.nbBits   = (BYTE)(consumed);
+        DElt.length   = 1;
+        for (i = 0; i < skipSize; i++)
+            DTable[i] = DElt;
+    }
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++)   /* note : sortedSymbols already skipped */
+    {
+        const U32 symbol = sortedSymbols[s].symbol;
+        const U32 weight = sortedSymbols[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 length = 1 << (sizeLog-nbBits);
+        const U32 start = rankVal[weight];
+        U32 i = start;
+        const U32 end = start + length;
+
+        MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+        DElt.nbBits = (BYTE)(nbBits + consumed);
+        DElt.length = 2;
+        do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
+
+        rankVal[weight] += length;
+    }
+}
+
+typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1];
+
+static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
+                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32 nbBitsBaseline)
+{
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits  = nbBitsBaseline - maxWeight;
+    U32 s;
+
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++)
+    {
+        const U16 symbol = sortedList[s].symbol;
+        const U32 weight = sortedList[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 start = rankVal[weight];
+        const U32 length = 1 << (targetLog-nbBits);
+
+        if (targetLog-nbBits >= minBits)   /* enough room for a second symbol */
+        {
+            U32 sortedRank;
+            int minWeight = nbBits + scaleLog;
+            if (minWeight < 1) minWeight = 1;
+            sortedRank = rankStart[minWeight];
+            HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
+                           rankValOrigin[nbBits], minWeight,
+                           sortedList+sortedRank, sortedListSize-sortedRank,
+                           nbBitsBaseline, symbol);
+        }
+        else
+        {
+            U32 i;
+            const U32 end = start + length;
+            HUF_DEltX4 DElt;
+
+            MEM_writeLE16(&(DElt.sequence), symbol);
+            DElt.nbBits   = (BYTE)(nbBits);
+            DElt.length   = 1;
+            for (i = start; i < end; i++)
+                DTable[i] = DElt;
+        }
+        rankVal[weight] += length;
+    }
+}
+
+static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1];
+    sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 };
+    U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    rankVal_t rankVal;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    const U32 memLog = DTable[0];
+    size_t iSize;
+    void* dtPtr = DTable;
+    HUF_DEltX4* const dt = ((HUF_DEltX4*)dtPtr) + 1;
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32));   /* if compilation fails here, assertion is false */
+    if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--)
+        { if (!maxW) return ERROR(GENERIC); }  /* necessarily finds a solution before maxW==0 */
+
+    /* Get start index of each weight */
+    {
+        U32 w, nextRankStart = 0;
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {
+        U32 s;
+        for (s=0; s<nbSymbols; s++)
+        {
+            U32 w = weightList[s];
+            U32 r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {
+        const U32 minBits = tableLog+1 - maxW;
+        U32 nextRankVal = 0;
+        U32 w, consumed;
+        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
+        U32* rankVal0 = rankVal[0];
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankVal;
+            nextRankVal += rankStats[w] << (w+rescale);
+            rankVal0[w] = current;
+        }
+        for (consumed = minBits; consumed <= memLog - minBits; consumed++)
+        {
+            U32* rankValPtr = rankVal[consumed];
+            for (w = 1; w <= maxW; w++)
+            {
+                rankValPtr[w] = rankVal0[w] >> consumed;
+            }
+        }
+    }
+
+    HUF_fillDTableX4(dt, memLog,
+                   sortedSymbol, sizeOfSort,
+                   rankStart0, rankVal, maxW,
+                   tableLog+1);
+
+    return iSize;
+}
+
+
+static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 2);
+    BIT_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
+
+static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 1);
+    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
+    else
+    {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8))
+        {
+            BIT_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+        }
+    }
+    return 1;
+}
+
+
+#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 8 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd-7))
+    {
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-2))
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+
+    while (p <= pEnd-2)
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    if (p < pEnd)
+        p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
+
+    return p-pStart;
+}
+
+static size_t HUF_decompress4X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable;
+        const HUF_DEltX4* const dt = ((const HUF_DEltX4*)dtPtr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; )
+        {
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX4(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/**********************************/
+/* Generic decompression selector */
+/**********************************/
+
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+{
+    /* single, double, quad */
+    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
+    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
+    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
+    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
+    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
+    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
+    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
+    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
+    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
+    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
+    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
+    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+};
+
+typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+
+static size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    static const decompressionAlgo decompress[3] = { HUF_decompress4X2, HUF_decompress4X4, NULL };
+    /* estimate decompression time */
+    U32 Q;
+    const U32 D256 = (U32)(dstSize >> 8);
+    U32 Dtime[3];
+    U32 algoNb = 0;
+    int n;
+
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    /* decoder timing evaluation */
+    Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */
+    for (n=0; n<3; n++)
+        Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256);
+
+    Dtime[1] += Dtime[1] >> 4; Dtime[2] += Dtime[2] >> 3; /* advantage to algorithms using less memory, for cache eviction */
+
+    if (Dtime[1] < Dtime[0]) algoNb = 1;
+
+    return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
+
+    //return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);   /* multi-streams single-symbol decoding */
+    //return HUF_decompress4X4(dst, dstSize, cSrc, cSrcSize);   /* multi-streams double-symbols decoding */
+    //return HUF_decompress4X6(dst, dstSize, cSrc, cSrcSize);   /* multi-streams quad-symbols decoding */
+}
+
+
+
+#endif   /* ZSTD_CCOMMON_H_MODULE */
+
+
+/*
+    zstd - decompression module fo v0.4 legacy format
+    Copyright (C) 2015-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+ * HEAPMODE :
+ * Select how default decompression function ZSTD_decompress() will allocate memory,
+ * in memory stack (0), or in memory heap (1, requires malloc())
+ */
+#ifndef ZSTD_HEAPMODE
+#  define ZSTD_HEAPMODE 1
+#endif
+
+
+/* *******************************************************
+*  Includes
+*********************************************************/
+#include <stdlib.h>      /* calloc */
+#include <string.h>      /* memcpy, memmove */
+#include <stdio.h>       /* debug : printf */
+
+
+/* *******************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#endif
+
+
+/* *************************************
+*  Local types
+***************************************/
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+
+/* *******************************************************
+*  Memory operations
+**********************************************************/
+static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+
+/* *************************************
+*  Error Management
+***************************************/
+
+/*! ZSTD_isError
+*   tells if a return value is an error code */
+static unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
+
+
+/* *************************************************************
+*   Context management
+***************************************************************/
+typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
+               ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock } ZSTD_dStage;
+
+struct ZSTDv04_Dctx_s
+{
+    U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
+    U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
+    U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
+    const void* previousDstEnd;
+    const void* base;
+    const void* vBase;
+    const void* dictEnd;
+    size_t expected;
+    size_t headerSize;
+    ZSTD_parameters params;
+    blockType_t bType;
+    ZSTD_dStage stage;
+    const BYTE* litPtr;
+    size_t litSize;
+    BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */];
+    BYTE headerBuffer[ZSTD_frameHeaderSize_max];
+};  /* typedef'd to ZSTD_DCtx within "zstd_static.h" */
+
+static size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx)
+{
+    dctx->expected = ZSTD_frameHeaderSize_min;
+    dctx->stage = ZSTDds_getFrameHeaderSize;
+    dctx->previousDstEnd = NULL;
+    dctx->base = NULL;
+    dctx->vBase = NULL;
+    dctx->dictEnd = NULL;
+    return 0;
+}
+
+static ZSTD_DCtx* ZSTD_createDCtx(void)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx));
+    if (dctx==NULL) return NULL;
+    ZSTD_resetDCtx(dctx);
+    return dctx;
+}
+
+static size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
+{
+    free(dctx);
+    return 0;
+}
+
+
+/* *************************************************************
+*   Decompression section
+***************************************************************/
+/** ZSTD_decodeFrameHeader_Part1
+*   decode the 1st part of the Frame Header, which tells Frame Header size.
+*   srcSize must be == ZSTD_frameHeaderSize_min
+*   @return : the full size of the Frame Header */
+static size_t ZSTD_decodeFrameHeader_Part1(ZSTD_DCtx* zc, const void* src, size_t srcSize)
+{
+    U32 magicNumber;
+    if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
+    zc->headerSize = ZSTD_frameHeaderSize_min;
+    return zc->headerSize;
+}
+
+
+static size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize)
+{
+    U32 magicNumber;
+    if (srcSize < ZSTD_frameHeaderSize_min) return ZSTD_frameHeaderSize_max;
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
+    memset(params, 0, sizeof(*params));
+    params->windowLog = (((const BYTE*)src)[4] & 15) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
+    if ((((const BYTE*)src)[4] >> 4) != 0) return ERROR(frameParameter_unsupported);   /* reserved bits */
+    return 0;
+}
+
+/** ZSTD_decodeFrameHeader_Part2
+*   decode the full Frame Header
+*   srcSize must be the size provided by ZSTD_decodeFrameHeader_Part1
+*   @return : 0, or an error code, which can be tested using ZSTD_isError() */
+static size_t ZSTD_decodeFrameHeader_Part2(ZSTD_DCtx* zc, const void* src, size_t srcSize)
+{
+    size_t result;
+    if (srcSize != zc->headerSize) return ERROR(srcSize_wrong);
+    result = ZSTD_getFrameParams(&(zc->params), src, srcSize);
+    if ((MEM_32bits()) && (zc->params.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bits);
+    return result;
+}
+
+
+static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = (const BYTE* const)src;
+    BYTE headerFlags;
+    U32 cSize;
+
+    if (srcSize < 3) return ERROR(srcSize_wrong);
+
+    headerFlags = *in;
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+
+    bpPtr->blockType = (blockType_t)(headerFlags >> 6);
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+static size_t ZSTD_copyRawBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+/** ZSTD_decompressLiterals
+    @return : nb of bytes read from src, or an error code*/
+static size_t ZSTD_decompressLiterals(void* dst, size_t* maxDstSizePtr,
+                                const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+
+    const size_t litSize = (MEM_readLE32(src) & 0x1FFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+    const size_t litCSize = (MEM_readLE32(ip+2) & 0xFFFFFF) >> 5;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+
+    if (litSize > *maxDstSizePtr) return ERROR(corruption_detected);
+    if (litCSize + 5 > srcSize) return ERROR(corruption_detected);
+
+    if (HUF_isError(HUF_decompress(dst, litSize, ip+5, litCSize))) return ERROR(corruption_detected);
+
+    *maxDstSizePtr = litSize;
+    return litCSize + 5;
+}
+
+
+/** ZSTD_decodeLiteralsBlock
+    @return : nb of bytes read from src (< srcSize ) */
+static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+                          const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
+{
+    const BYTE* const istart = (const BYTE*) src;
+
+    /* any compressed block with literals segment must be at least this size */
+    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
+
+    switch(*istart & 3)
+    {
+    /* compressed */
+    case 0:
+        {
+            size_t litSize = BLOCKSIZE;
+            const size_t readSize = ZSTD_decompressLiterals(dctx->litBuffer, &litSize, src, srcSize);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litSize = litSize;
+            memset(dctx->litBuffer + dctx->litSize, 0, 8);
+            return readSize;   /* works if it's an error too */
+        }
+    case IS_RAW:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > srcSize-11)   /* risk of reading too far with wildcopy */
+            {
+                if (litSize > srcSize-3) return ERROR(corruption_detected);
+                memcpy(dctx->litBuffer, istart, litSize);
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litSize = litSize;
+                memset(dctx->litBuffer + dctx->litSize, 0, 8);
+                return litSize+3;
+            }
+            /* direct reference into compressed stream */
+            dctx->litPtr = istart+3;
+            dctx->litSize = litSize;
+            return litSize+3;        }
+    case IS_RLE:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
+            memset(dctx->litBuffer, istart[3], litSize + 8);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litSize = litSize;
+            return 4;
+        }
+    default:
+        return ERROR(corruption_detected);   /* forbidden nominal case */
+    }
+}
+
+
+static size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
+                         FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
+                         const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    const BYTE* const iend = istart + srcSize;
+    U32 LLtype, Offtype, MLtype;
+    U32 LLlog, Offlog, MLlog;
+    size_t dumpsLength;
+
+    /* check */
+    if (srcSize < 5) return ERROR(srcSize_wrong);
+
+    /* SeqHead */
+    *nbSeq = MEM_readLE16(ip); ip+=2;
+    LLtype  = *ip >> 6;
+    Offtype = (*ip >> 4) & 3;
+    MLtype  = (*ip >> 2) & 3;
+    if (*ip & 2)
+    {
+        dumpsLength  = ip[2];
+        dumpsLength += ip[1] << 8;
+        ip += 3;
+    }
+    else
+    {
+        dumpsLength  = ip[1];
+        dumpsLength += (ip[0] & 1) << 8;
+        ip += 2;
+    }
+    *dumpsPtr = ip;
+    ip += dumpsLength;
+    *dumpsLengthPtr = dumpsLength;
+
+    /* check */
+    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
+
+    /* sequences */
+    {
+        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL >= MaxOff */
+        size_t headerSize;
+
+        /* Build DTables */
+        switch(LLtype)
+        {
+        case bt_rle :
+            LLlog = 0;
+            FSE_buildDTable_rle(DTableLL, *ip++); break;
+        case bt_raw :
+            LLlog = LLbits;
+            FSE_buildDTable_raw(DTableLL, LLbits); break;
+        default :
+            {   U32 max = MaxLL;
+                headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
+                if (FSE_isError(headerSize)) return ERROR(GENERIC);
+                if (LLlog > LLFSELog) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSE_buildDTable(DTableLL, norm, max, LLlog);
+        }   }
+
+        switch(Offtype)
+        {
+        case bt_rle :
+            Offlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong);   /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */
+            break;
+        case bt_raw :
+            Offlog = Offbits;
+            FSE_buildDTable_raw(DTableOffb, Offbits); break;
+        default :
+            {   U32 max = MaxOff;
+                headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
+                if (FSE_isError(headerSize)) return ERROR(GENERIC);
+                if (Offlog > OffFSELog) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSE_buildDTable(DTableOffb, norm, max, Offlog);
+        }   }
+
+        switch(MLtype)
+        {
+        case bt_rle :
+            MLlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableML, *ip++); break;
+        case bt_raw :
+            MLlog = MLbits;
+            FSE_buildDTable_raw(DTableML, MLbits); break;
+        default :
+            {   U32 max = MaxML;
+                headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
+                if (FSE_isError(headerSize)) return ERROR(GENERIC);
+                if (MLlog > MLFSELog) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSE_buildDTable(DTableML, norm, max, MLlog);
+    }   }   }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t offset;
+    size_t matchLength;
+} seq_t;
+
+typedef struct {
+    BIT_DStream_t DStream;
+    FSE_DState_t stateLL;
+    FSE_DState_t stateOffb;
+    FSE_DState_t stateML;
+    size_t prevOffset;
+    const BYTE* dumps;
+    const BYTE* dumpsEnd;
+} seqState_t;
+
+
+static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
+{
+    size_t litLength;
+    size_t prevOffset;
+    size_t offset;
+    size_t matchLength;
+    const BYTE* dumps = seqState->dumps;
+    const BYTE* const de = seqState->dumpsEnd;
+
+    /* Literal length */
+    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
+    prevOffset = litLength ? seq->offset : seqState->prevOffset;
+    if (litLength == MaxLL) {
+        U32 add = *dumps++;
+        if (add < 255) litLength += add;
+        else {
+            litLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
+            dumps += 3;
+        }
+        if (dumps > de) { litLength = MaxLL+255; }  /* late correction, to avoid using uninitialized memory */
+        if (dumps >= de) { dumps = de-1; }  /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+
+    /* Offset */
+    {   static const U32 offsetPrefix[MaxOff+1] = {
+                1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
+                512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
+                524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
+        U32 offsetCode, nbBits;
+        offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));   /* <= maxOff, by table construction */
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        nbBits = offsetCode - 1;
+        if (offsetCode==0) nbBits = 0;   /* cmove */
+        offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits);
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        if (offsetCode==0) offset = prevOffset;   /* cmove */
+        if (offsetCode | !litLength) seqState->prevOffset = seq->offset;   /* cmove */
+    }
+
+    /* MatchLength */
+    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
+    if (matchLength == MaxML) {
+        U32 add = *dumps++;
+        if (add < 255) matchLength += add;
+        else {
+            matchLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
+            dumps += 3;
+        }
+        if (dumps > de) { matchLength = MaxML+255; }  /* late correction, to avoid using uninitialized memory */
+        if (dumps >= de) { dumps = de-1; }  /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+    matchLength += MINMATCH;
+
+    /* save result */
+    seq->litLength = litLength;
+    seq->offset = offset;
+    seq->matchLength = matchLength;
+    seqState->dumps = dumps;
+}
+
+
+static size_t ZSTD_execSequence(BYTE* op,
+                                BYTE* const oend, seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit,
+                                const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
+{
+    static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
+    static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* substracted */
+    BYTE* const oLitEnd = op + sequence.litLength;
+    const size_t sequenceLength = sequence.litLength + sequence.matchLength;
+    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_8 = oend-8;
+    const BYTE* const litEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+
+    /* check */
+    if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */
+    if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
+    if (litEnd > litLimit) return ERROR(corruption_detected);   /* risk read beyond lit buffer */
+
+    /* copy Literals */
+    ZSTD_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
+    op = oLitEnd;
+    *litPtr = litEnd;   /* update for next sequence */
+
+    /* copy Match */
+    if (sequence.offset > (size_t)(oLitEnd - base))
+    {
+        /* offset beyond prefix */
+        if (sequence.offset > (size_t)(oLitEnd - vBase))
+            return ERROR(corruption_detected);
+        match = dictEnd - (base-match);
+        if (match + sequence.matchLength <= dictEnd)
+        {
+            memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {
+            size_t length1 = dictEnd - match;
+            memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = base;
+            if (op > oend_8 || sequence.matchLength < MINMATCH) {
+              while (op < oMatchEnd) *op++ = *match++;
+              return sequenceLength;
+            }
+        }
+    }
+    /* Requirement: op <= oend_8 */
+
+    /* match within prefix */
+    if (sequence.offset < 8) {
+        /* close range match, overlap */
+        const int sub2 = dec64table[sequence.offset];
+        op[0] = match[0];
+        op[1] = match[1];
+        op[2] = match[2];
+        op[3] = match[3];
+        match += dec32table[sequence.offset];
+        ZSTD_copy4(op+4, match);
+        match -= sub2;
+    } else {
+        ZSTD_copy8(op, match);
+    }
+    op += 8; match += 8;
+
+    if (oMatchEnd > oend-(16-MINMATCH))
+    {
+        if (op < oend_8)
+        {
+            ZSTD_wildcopy(op, match, oend_8 - op);
+            match += oend_8 - op;
+            op = oend_8;
+        }
+        while (op < oMatchEnd) *op++ = *match++;
+    }
+    else
+    {
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
+    }
+    return sequenceLength;
+}
+
+
+static size_t ZSTD_decompressSequences(
+                               ZSTD_DCtx* dctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize)
+{
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t errorCode, dumpsLength;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    int nbSeq;
+    const BYTE* dumps;
+    U32* DTableLL = dctx->LLTable;
+    U32* DTableML = dctx->MLTable;
+    U32* DTableOffb = dctx->OffTable;
+    const BYTE* const base = (const BYTE*) (dctx->base);
+    const BYTE* const vBase = (const BYTE*) (dctx->vBase);
+    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+
+    /* Build Decoding Tables */
+    errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
+                                      DTableLL, DTableML, DTableOffb,
+                                      ip, iend-ip);
+    if (ZSTD_isError(errorCode)) return errorCode;
+    ip += errorCode;
+
+    /* Regen sequences */
+    {
+        seq_t sequence;
+        seqState_t seqState;
+
+        memset(&sequence, 0, sizeof(sequence));
+        sequence.offset = 4;
+        seqState.dumps = dumps;
+        seqState.dumpsEnd = dumps + dumpsLength;
+        seqState.prevOffset = 4;
+        errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip);
+        if (ERR_isError(errorCode)) return ERROR(corruption_detected);
+        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
+        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
+        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
+
+        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; )
+        {
+            size_t oneSeqSize;
+            nbSeq--;
+            ZSTD_decodeSequence(&sequence, &seqState);
+            oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd);
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* check if reached exact end */
+        if ( !BIT_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected);   /* DStream should be entirely and exactly consumed; otherwise data is corrupted */
+
+        /* last literal segment */
+        {
+            size_t lastLLSize = litEnd - litPtr;
+            if (litPtr > litEnd) return ERROR(corruption_detected);
+            if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
+            if (op != litPtr) memcpy(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+
+static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
+{
+    if (dst != dctx->previousDstEnd)   /* not contiguous */
+    {
+        dctx->dictEnd = dctx->previousDstEnd;
+        dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
+        dctx->base = dst;
+        dctx->previousDstEnd = dst;
+    }
+}
+
+
+static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
+                            void* dst, size_t maxDstSize,
+                      const void* src, size_t srcSize)
+{
+    /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+
+    /* Decode literals sub-block */
+    size_t litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
+    if (ZSTD_isError(litCSize)) return litCSize;
+    ip += litCSize;
+    srcSize -= litCSize;
+
+    return ZSTD_decompressSequences(dctx, dst, maxDstSize, ip, srcSize);
+}
+
+
+static size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx,
+                                 void* dst, size_t maxDstSize,
+                                 const void* src, size_t srcSize,
+                                 const void* dict, size_t dictSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* iend = ip + srcSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t remainingSize = srcSize;
+    blockProperties_t blockProperties;
+
+    /* init */
+    ZSTD_resetDCtx(ctx);
+    if (dict)
+    {
+        ZSTD_decompress_insertDictionary(ctx, dict, dictSize);
+        ctx->dictEnd = ctx->previousDstEnd;
+        ctx->vBase = (const char*)dst - ((const char*)(ctx->previousDstEnd) - (const char*)(ctx->base));
+        ctx->base = dst;
+    }
+    else
+    {
+        ctx->vBase = ctx->base = ctx->dictEnd = dst;
+    }
+
+    /* Frame Header */
+    {
+        size_t frameHeaderSize;
+        if (srcSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+        frameHeaderSize = ZSTD_decodeFrameHeader_Part1(ctx, src, ZSTD_frameHeaderSize_min);
+        if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
+        if (srcSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+        ip += frameHeaderSize; remainingSize -= frameHeaderSize;
+        frameHeaderSize = ZSTD_decodeFrameHeader_Part2(ctx, src, frameHeaderSize);
+        if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
+    }
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t decodedSize=0;
+        size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            decodedSize = ZSTD_decompressBlock_internal(ctx, op, oend-op, ip, cBlockSize);
+            break;
+        case bt_raw :
+            decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet supported */
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return ERROR(srcSize_wrong);
+            break;
+        default:
+            return ERROR(GENERIC);   /* impossible */
+        }
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        if (ZSTD_isError(decodedSize)) return decodedSize;
+        op += decodedSize;
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return op-ostart;
+}
+
+static size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    size_t remainingSize = srcSize;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    if (srcSize < ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);
+    if (MEM_readLE32(src) != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
+    ip += ZSTD_frameHeaderSize_min; remainingSize -= ZSTD_frameHeaderSize_min;
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
+        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return ip - (const BYTE*)src;
+}
+
+/* ******************************
+*  Streaming Decompression API
+********************************/
+static size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx)
+{
+    return dctx->expected;
+}
+
+static size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    /* Sanity check */
+    if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
+    ZSTD_checkContinuity(ctx, dst);
+
+    /* Decompress : frame header; part 1 */
+    switch (ctx->stage)
+    {
+    case ZSTDds_getFrameHeaderSize :
+        /* get frame header size */
+        if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);   /* impossible */
+        ctx->headerSize = ZSTD_decodeFrameHeader_Part1(ctx, src, ZSTD_frameHeaderSize_min);
+        if (ZSTD_isError(ctx->headerSize)) return ctx->headerSize;
+        memcpy(ctx->headerBuffer, src, ZSTD_frameHeaderSize_min);
+        if (ctx->headerSize > ZSTD_frameHeaderSize_min) return ERROR(GENERIC);   /* impossible */
+        ctx->expected = 0;   /* not necessary to copy more */
+        /* fallthrough */
+    case ZSTDds_decodeFrameHeader:
+        /* get frame header */
+        {   size_t const result = ZSTD_decodeFrameHeader_Part2(ctx, ctx->headerBuffer, ctx->headerSize);
+            if (ZSTD_isError(result)) return result;
+            ctx->expected = ZSTD_blockHeaderSize;
+            ctx->stage = ZSTDds_decodeBlockHeader;
+            return 0;
+        }
+    case ZSTDds_decodeBlockHeader:
+        /* Decode block header */
+        {   blockProperties_t bp;
+            size_t const blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+            if (ZSTD_isError(blockSize)) return blockSize;
+            if (bp.blockType == bt_end)
+            {
+                ctx->expected = 0;
+                ctx->stage = ZSTDds_getFrameHeaderSize;
+            }
+            else
+            {
+                ctx->expected = blockSize;
+                ctx->bType = bp.blockType;
+                ctx->stage = ZSTDds_decompressBlock;
+            }
+            return 0;
+        }
+    case ZSTDds_decompressBlock:
+        {
+            /* Decompress : block content */
+            size_t rSize;
+            switch(ctx->bType)
+            {
+            case bt_compressed:
+                rSize = ZSTD_decompressBlock_internal(ctx, dst, maxDstSize, src, srcSize);
+                break;
+            case bt_raw :
+                rSize = ZSTD_copyRawBlock(dst, maxDstSize, src, srcSize);
+                break;
+            case bt_rle :
+                return ERROR(GENERIC);   /* not yet handled */
+                break;
+            case bt_end :   /* should never happen (filtered at phase 1) */
+                rSize = 0;
+                break;
+            default:
+                return ERROR(GENERIC);
+            }
+            ctx->stage = ZSTDds_decodeBlockHeader;
+            ctx->expected = ZSTD_blockHeaderSize;
+            ctx->previousDstEnd = (char*)dst + rSize;
+            return rSize;
+        }
+    default:
+        return ERROR(GENERIC);   /* impossible */
+    }
+}
+
+
+static void ZSTD_decompress_insertDictionary(ZSTD_DCtx* ctx, const void* dict, size_t dictSize)
+{
+    ctx->dictEnd = ctx->previousDstEnd;
+    ctx->vBase = (const char*)dict - ((const char*)(ctx->previousDstEnd) - (const char*)(ctx->base));
+    ctx->base = dict;
+    ctx->previousDstEnd = (const char*)dict + dictSize;
+}
+
+
+
+/*
+    Buffered version of Zstd compression library
+    Copyright (C) 2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* The objects defined into this file should be considered experimental.
+ * They are not labelled stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risk of future changes.
+ */
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stdlib.h>
+
+
+/** ************************************************
+*  Streaming decompression
+*
+*  A ZBUFF_DCtx object is required to track streaming operation.
+*  Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources.
+*  Use ZBUFF_decompressInit() to start a new decompression operation.
+*  ZBUFF_DCtx objects can be reused multiple times.
+*
+*  Use ZBUFF_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *maxDstSizePtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input.
+*  The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst .
+*  return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
+*            or 0 when a frame is completely decoded
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory)
+*  output : 128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded.
+*  input : just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* **************************************************/
+
+typedef enum { ZBUFFds_init, ZBUFFds_readHeader, ZBUFFds_loadHeader, ZBUFFds_decodeHeader,
+               ZBUFFds_read, ZBUFFds_load, ZBUFFds_flush } ZBUFF_dStage;
+
+/* *** Resource management *** */
+
+#define ZSTD_frameHeaderSize_max 5   /* too magical, should come from reference */
+struct ZBUFFv04_DCtx_s {
+    ZSTD_DCtx* zc;
+    ZSTD_parameters params;
+    char* inBuff;
+    size_t inBuffSize;
+    size_t inPos;
+    char* outBuff;
+    size_t outBuffSize;
+    size_t outStart;
+    size_t outEnd;
+    size_t hPos;
+    const char* dict;
+    size_t dictSize;
+    ZBUFF_dStage stage;
+    unsigned char headerBuffer[ZSTD_frameHeaderSize_max];
+};   /* typedef'd to ZBUFF_DCtx within "zstd_buffered.h" */
+
+typedef ZBUFFv04_DCtx ZBUFF_DCtx;
+
+
+static ZBUFF_DCtx* ZBUFF_createDCtx(void)
+{
+    ZBUFF_DCtx* zbc = (ZBUFF_DCtx*)malloc(sizeof(ZBUFF_DCtx));
+    if (zbc==NULL) return NULL;
+    memset(zbc, 0, sizeof(*zbc));
+    zbc->zc = ZSTD_createDCtx();
+    zbc->stage = ZBUFFds_init;
+    return zbc;
+}
+
+static size_t ZBUFF_freeDCtx(ZBUFF_DCtx* zbc)
+{
+    if (zbc==NULL) return 0;   /* support free on null */
+    ZSTD_freeDCtx(zbc->zc);
+    free(zbc->inBuff);
+    free(zbc->outBuff);
+    free(zbc);
+    return 0;
+}
+
+
+/* *** Initialization *** */
+
+static size_t ZBUFF_decompressInit(ZBUFF_DCtx* zbc)
+{
+    zbc->stage = ZBUFFds_readHeader;
+    zbc->hPos = zbc->inPos = zbc->outStart = zbc->outEnd = zbc->dictSize = 0;
+    return ZSTD_resetDCtx(zbc->zc);
+}
+
+
+static size_t ZBUFF_decompressWithDictionary(ZBUFF_DCtx* zbc, const void* src, size_t srcSize)
+{
+    zbc->dict = (const char*)src;
+    zbc->dictSize = srcSize;
+    return 0;
+}
+
+static size_t ZBUFF_limitCopy(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    size_t length = MIN(maxDstSize, srcSize);
+    memcpy(dst, src, length);
+    return length;
+}
+
+/* *** Decompression *** */
+
+static size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr)
+{
+    const char* const istart = (const char*)src;
+    const char* ip = istart;
+    const char* const iend = istart + *srcSizePtr;
+    char* const ostart = (char*)dst;
+    char* op = ostart;
+    char* const oend = ostart + *maxDstSizePtr;
+    U32 notDone = 1;
+
+    while (notDone)
+    {
+        switch(zbc->stage)
+        {
+
+        case ZBUFFds_init :
+            return ERROR(init_missing);
+
+        case ZBUFFds_readHeader :
+            /* read header from src */
+            {   size_t const headerSize = ZSTD_getFrameParams(&(zbc->params), src, *srcSizePtr);
+                if (ZSTD_isError(headerSize)) return headerSize;
+                if (headerSize) {
+                    /* not enough input to decode header : tell how many bytes would be necessary */
+                    memcpy(zbc->headerBuffer+zbc->hPos, src, *srcSizePtr);
+                    zbc->hPos += *srcSizePtr;
+                    *maxDstSizePtr = 0;
+                    zbc->stage = ZBUFFds_loadHeader;
+                    return headerSize - zbc->hPos;
+                }
+                zbc->stage = ZBUFFds_decodeHeader;
+                break;
+            }
+
+        case ZBUFFds_loadHeader:
+            /* complete header from src */
+            {   size_t headerSize = ZBUFF_limitCopy(
+                    zbc->headerBuffer + zbc->hPos, ZSTD_frameHeaderSize_max - zbc->hPos,
+                    src, *srcSizePtr);
+                zbc->hPos += headerSize;
+                ip += headerSize;
+                headerSize = ZSTD_getFrameParams(&(zbc->params), zbc->headerBuffer, zbc->hPos);
+                if (ZSTD_isError(headerSize)) return headerSize;
+                if (headerSize) {
+                    /* not enough input to decode header : tell how many bytes would be necessary */
+                    *maxDstSizePtr = 0;
+                    return headerSize - zbc->hPos;
+            }   }
+            /* intentional fallthrough */
+
+        case ZBUFFds_decodeHeader:
+                /* apply header to create / resize buffers */
+                {   size_t const neededOutSize = (size_t)1 << zbc->params.windowLog;
+                    size_t const neededInSize = BLOCKSIZE;   /* a block is never > BLOCKSIZE */
+                    if (zbc->inBuffSize < neededInSize) {
+                        free(zbc->inBuff);
+                        zbc->inBuffSize = neededInSize;
+                        zbc->inBuff = (char*)malloc(neededInSize);
+                        if (zbc->inBuff == NULL) return ERROR(memory_allocation);
+                    }
+                    if (zbc->outBuffSize < neededOutSize) {
+                        free(zbc->outBuff);
+                        zbc->outBuffSize = neededOutSize;
+                        zbc->outBuff = (char*)malloc(neededOutSize);
+                        if (zbc->outBuff == NULL) return ERROR(memory_allocation);
+                }   }
+                if (zbc->dictSize)
+                    ZSTD_decompress_insertDictionary(zbc->zc, zbc->dict, zbc->dictSize);
+                if (zbc->hPos) {
+                    /* some data already loaded into headerBuffer : transfer into inBuff */
+                    memcpy(zbc->inBuff, zbc->headerBuffer, zbc->hPos);
+                    zbc->inPos = zbc->hPos;
+                    zbc->hPos = 0;
+                    zbc->stage = ZBUFFds_load;
+                    break;
+                }
+                zbc->stage = ZBUFFds_read;
+		/* fall-through */
+        case ZBUFFds_read:
+            {
+                size_t neededInSize = ZSTD_nextSrcSizeToDecompress(zbc->zc);
+                if (neededInSize==0)   /* end of frame */
+                {
+                    zbc->stage = ZBUFFds_init;
+                    notDone = 0;
+                    break;
+                }
+                if ((size_t)(iend-ip) >= neededInSize)
+                {
+                    /* directly decode from src */
+                    size_t decodedSize = ZSTD_decompressContinue(zbc->zc,
+                        zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart,
+                        ip, neededInSize);
+                    if (ZSTD_isError(decodedSize)) return decodedSize;
+                    ip += neededInSize;
+                    if (!decodedSize) break;   /* this was just a header */
+                    zbc->outEnd = zbc->outStart +  decodedSize;
+                    zbc->stage = ZBUFFds_flush;
+                    break;
+                }
+                if (ip==iend) { notDone = 0; break; }   /* no more input */
+                zbc->stage = ZBUFFds_load;
+            }
+	    /* fall-through */
+        case ZBUFFds_load:
+            {
+                size_t neededInSize = ZSTD_nextSrcSizeToDecompress(zbc->zc);
+                size_t toLoad = neededInSize - zbc->inPos;   /* should always be <= remaining space within inBuff */
+                size_t loadedSize;
+                if (toLoad > zbc->inBuffSize - zbc->inPos) return ERROR(corruption_detected);   /* should never happen */
+                loadedSize = ZBUFF_limitCopy(zbc->inBuff + zbc->inPos, toLoad, ip, iend-ip);
+                ip += loadedSize;
+                zbc->inPos += loadedSize;
+                if (loadedSize < toLoad) { notDone = 0; break; }   /* not enough input, wait for more */
+                {
+                    size_t decodedSize = ZSTD_decompressContinue(zbc->zc,
+                        zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart,
+                        zbc->inBuff, neededInSize);
+                    if (ZSTD_isError(decodedSize)) return decodedSize;
+                    zbc->inPos = 0;   /* input is consumed */
+                    if (!decodedSize) { zbc->stage = ZBUFFds_read; break; }   /* this was just a header */
+                    zbc->outEnd = zbc->outStart +  decodedSize;
+                    zbc->stage = ZBUFFds_flush;
+                    /* ZBUFFds_flush follows */
+                }
+            }
+	    /* fall-through */
+        case ZBUFFds_flush:
+            {
+                size_t toFlushSize = zbc->outEnd - zbc->outStart;
+                size_t flushedSize = ZBUFF_limitCopy(op, oend-op, zbc->outBuff + zbc->outStart, toFlushSize);
+                op += flushedSize;
+                zbc->outStart += flushedSize;
+                if (flushedSize == toFlushSize)
+                {
+                    zbc->stage = ZBUFFds_read;
+                    if (zbc->outStart + BLOCKSIZE > zbc->outBuffSize)
+                        zbc->outStart = zbc->outEnd = 0;
+                    break;
+                }
+                /* cannot flush everything */
+                notDone = 0;
+                break;
+            }
+        default: return ERROR(GENERIC);   /* impossible */
+        }
+    }
+
+    *srcSizePtr = ip-istart;
+    *maxDstSizePtr = op-ostart;
+
+    {
+        size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zbc->zc);
+        if (nextSrcSizeHint > 3) nextSrcSizeHint+= 3;   /* get the next block header while at it */
+        nextSrcSizeHint -= zbc->inPos;   /* already loaded*/
+        return nextSrcSizeHint;
+    }
+}
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+unsigned ZBUFFv04_isError(size_t errorCode) { return ERR_isError(errorCode); }
+const char* ZBUFFv04_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
+
+size_t ZBUFFv04_recommendedDInSize()  { return BLOCKSIZE + 3; }
+size_t ZBUFFv04_recommendedDOutSize() { return BLOCKSIZE; }
+
+
+
+/*- ========================================================================= -*/
+
+/* final wrapping stage */
+
+size_t ZSTDv04_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return ZSTD_decompress_usingDict(dctx, dst, maxDstSize, src, srcSize, NULL, 0);
+}
+
+size_t ZSTDv04_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE==1)
+    size_t regenSize;
+    ZSTD_DCtx* dctx = ZSTD_createDCtx();
+    if (dctx==NULL) return ERROR(memory_allocation);
+    regenSize = ZSTDv04_decompressDCtx(dctx, dst, maxDstSize, src, srcSize);
+    ZSTD_freeDCtx(dctx);
+    return regenSize;
+#else
+    ZSTD_DCtx dctx;
+    return ZSTDv04_decompressDCtx(&dctx, dst, maxDstSize, src, srcSize);
+#endif
+}
+
+size_t ZSTDv04_findFrameCompressedSize(const void* src, size_t srcSize)
+{
+    return ZSTD_findFrameCompressedSize(src, srcSize);
+}
+
+size_t ZSTDv04_resetDCtx(ZSTDv04_Dctx* dctx) { return ZSTD_resetDCtx(dctx); }
+
+size_t ZSTDv04_nextSrcSizeToDecompress(ZSTDv04_Dctx* dctx)
+{
+    return ZSTD_nextSrcSizeToDecompress(dctx);
+}
+
+size_t ZSTDv04_decompressContinue(ZSTDv04_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return ZSTD_decompressContinue(dctx, dst, maxDstSize, src, srcSize);
+}
+
+
+
+ZBUFFv04_DCtx* ZBUFFv04_createDCtx(void) { return ZBUFF_createDCtx(); }
+size_t      ZBUFFv04_freeDCtx(ZBUFFv04_DCtx* dctx) { return ZBUFF_freeDCtx(dctx); }
+
+size_t ZBUFFv04_decompressInit(ZBUFFv04_DCtx* dctx) { return ZBUFF_decompressInit(dctx); }
+size_t ZBUFFv04_decompressWithDictionary(ZBUFFv04_DCtx* dctx, const void* src, size_t srcSize)
+{ return ZBUFF_decompressWithDictionary(dctx, src, srcSize); }
+
+size_t ZBUFFv04_decompressContinue(ZBUFFv04_DCtx* dctx, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr)
+{
+    return ZBUFF_decompressContinue(dctx, dst, maxDstSizePtr, src, srcSizePtr);
+}
+
+ZSTD_DCtx* ZSTDv04_createDCtx(void) { return ZSTD_createDCtx(); }
+size_t ZSTDv04_freeDCtx(ZSTD_DCtx* dctx) { return ZSTD_freeDCtx(dctx); }
+
+size_t ZSTDv04_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize)
+{
+    return ZSTD_getFrameParams(params, src, srcSize);
+}
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v04.h b/contrib/libzstd/include/zstd/legacy/zstd_v04.h
new file mode 100644
index 00000000000..1b5439d3924
--- /dev/null
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v04.h
@@ -0,0 +1,136 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+#ifndef ZSTD_V04_H_91868324769238
+#define ZSTD_V04_H_91868324769238
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv04_decompress() : decompress ZSTD frames compliant with v0.4.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv04_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+/**
+ZSTDv04_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.4.x format
+    compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+    return : the number of bytes that would be read to decompress this frame
+             or an errorCode if it fails (which can be tested using ZSTDv04_isError())
+*/
+size_t ZSTDv04_findFrameCompressedSize(const void* src, size_t compressedSize);
+
+/**
+ZSTDv04_isError() : tells if the result of ZSTDv04_decompress() is an error
+*/
+unsigned ZSTDv04_isError(size_t code);
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv04_Dctx_s ZSTDv04_Dctx;
+ZSTDv04_Dctx* ZSTDv04_createDCtx(void);
+size_t ZSTDv04_freeDCtx(ZSTDv04_Dctx* dctx);
+
+size_t ZSTDv04_decompressDCtx(ZSTDv04_Dctx* dctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
+
+
+/* *************************************
+*  Direct Streaming
+***************************************/
+size_t ZSTDv04_resetDCtx(ZSTDv04_Dctx* dctx);
+
+size_t ZSTDv04_nextSrcSizeToDecompress(ZSTDv04_Dctx* dctx);
+size_t ZSTDv04_decompressContinue(ZSTDv04_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+
+/* *************************************
+*  Buffered Streaming
+***************************************/
+typedef struct ZBUFFv04_DCtx_s ZBUFFv04_DCtx;
+ZBUFFv04_DCtx* ZBUFFv04_createDCtx(void);
+size_t         ZBUFFv04_freeDCtx(ZBUFFv04_DCtx* dctx);
+
+size_t ZBUFFv04_decompressInit(ZBUFFv04_DCtx* dctx);
+size_t ZBUFFv04_decompressWithDictionary(ZBUFFv04_DCtx* dctx, const void* dict, size_t dictSize);
+
+size_t ZBUFFv04_decompressContinue(ZBUFFv04_DCtx* dctx, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr);
+
+/** ************************************************
+*  Streaming decompression
+*
+*  A ZBUFF_DCtx object is required to track streaming operation.
+*  Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources.
+*  Use ZBUFF_decompressInit() to start a new decompression operation.
+*  ZBUFF_DCtx objects can be reused multiple times.
+*
+*  Optionally, a reference to a static dictionary can be set, using ZBUFF_decompressWithDictionary()
+*  It must be the same content as the one set during compression phase.
+*  Dictionary content must remain accessible during the decompression process.
+*
+*  Use ZBUFF_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *maxDstSizePtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
+*            or 0 when a frame is completely decoded
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize / ZBUFF_recommendedDOutSize
+*  output : ZBUFF_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded.
+*  input : ZBUFF_recommendedDInSize==128Kb+3; just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* **************************************************/
+unsigned ZBUFFv04_isError(size_t errorCode);
+const char* ZBUFFv04_getErrorName(size_t errorCode);
+
+
+/** The below functions provide recommended buffer sizes for Compression or Decompression operations.
+*   These sizes are not compulsory, they just tend to offer better latency */
+size_t ZBUFFv04_recommendedDInSize(void);
+size_t ZBUFFv04_recommendedDOutSize(void);
+
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv04_magicNumber 0xFD2FB524   /* v0.4 */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_V04_H_91868324769238 */
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v05.c b/contrib/libzstd/include/zstd/legacy/zstd_v05.c
new file mode 100644
index 00000000000..e929618a3bf
--- /dev/null
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v05.c
@@ -0,0 +1,4082 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+
+/*- Dependencies -*/
+#include "zstd_v05.h"
+#include "error_private.h"
+
+
+/* ******************************************************************
+   mem.h
+   low-level memory access routines
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSEv05 source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-****************************************
+*  Dependencies
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include <string.h>    /* memcpy */
+
+
+/*-****************************************
+*  Compiler specifics
+******************************************/
+#if defined(__GNUC__)
+#  define MEM_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define MEM_STATIC static inline
+#elif defined(_MSC_VER)
+#  define MEM_STATIC static __inline
+#else
+#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/*-**************************************************************
+*  Basic Types
+*****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef  int16_t S16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+  typedef  int64_t S64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef   signed short      S16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+  typedef   signed long long  S64;
+#endif
+
+
+/*-**************************************************************
+*  Memory I/O
+*****************************************************************/
+/* MEM_FORCE_MEMORY_ACCESS :
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets depending on alignment.
+ *            In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define MEM_FORCE_MEMORY_ACCESS 2
+#  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define MEM_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; }
+MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; }
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
+
+/* violates C standard, by lying on structure alignment.
+Only use if no other choice to achieve best performance on target platform */
+MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
+MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
+MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
+
+#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign;
+
+MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; }
+
+#else
+
+/* default method, safe and standard.
+   can sometimes prove slower */
+
+MEM_STATIC U16 MEM_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U32 MEM_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U64 MEM_read64(const void* memPtr)
+{
+    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write32(void* memPtr, U32 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write64(void* memPtr, U64 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif /* MEM_FORCE_MEMORY_ACCESS */
+
+
+MEM_STATIC U16 MEM_readLE16(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read16(memPtr);
+    else {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
+{
+    if (MEM_isLittleEndian()) {
+        MEM_write16(memPtr, val);
+    } else {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val;
+        p[1] = (BYTE)(val>>8);
+    }
+}
+
+MEM_STATIC U32 MEM_readLE32(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read32(memPtr);
+    else {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+
+MEM_STATIC U64 MEM_readLE64(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read64(memPtr);
+    else {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
+                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
+    }
+}
+
+
+MEM_STATIC size_t MEM_readLEST(const void* memPtr)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_readLE32(memPtr);
+    else
+        return (size_t)MEM_readLE64(memPtr);
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* MEM_H_MODULE */
+
+/*
+    zstd - standard compression library
+    Header File for static linking only
+    Copyright (C) 2014-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd homepage : http://www.zstd.net
+*/
+#ifndef ZSTD_STATIC_H
+#define ZSTD_STATIC_H
+
+/* The prototypes defined within this file are considered experimental.
+ * They should not be used in the context DLL as they may change in the future.
+ * Prefer static linking if you need them, to control breaking version changes issues.
+ */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/*-*************************************
+*  Types
+***************************************/
+#define ZSTDv05_WINDOWLOG_ABSOLUTEMIN 11
+
+
+/*-*************************************
+*  Advanced functions
+***************************************/
+/*- Advanced Decompression functions -*/
+
+/*! ZSTDv05_decompress_usingPreparedDCtx() :
+*   Same as ZSTDv05_decompress_usingDict, but using a reference context `preparedDCtx`, where dictionary has been loaded.
+*   It avoids reloading the dictionary each time.
+*   `preparedDCtx` must have been properly initialized using ZSTDv05_decompressBegin_usingDict().
+*   Requires 2 contexts : 1 for reference, which will not be modified, and 1 to run the decompression operation */
+size_t ZSTDv05_decompress_usingPreparedDCtx(
+                                             ZSTDv05_DCtx* dctx, const ZSTDv05_DCtx* preparedDCtx,
+                                             void* dst, size_t dstCapacity,
+                                       const void* src, size_t srcSize);
+
+
+/* **************************************
+*  Streaming functions (direct mode)
+****************************************/
+size_t ZSTDv05_decompressBegin(ZSTDv05_DCtx* dctx);
+
+/*
+  Streaming decompression, direct mode (bufferless)
+
+  A ZSTDv05_DCtx object is required to track streaming operations.
+  Use ZSTDv05_createDCtx() / ZSTDv05_freeDCtx() to manage it.
+  A ZSTDv05_DCtx object can be re-used multiple times.
+
+  First typical operation is to retrieve frame parameters, using ZSTDv05_getFrameParams().
+  This operation is independent, and just needs enough input data to properly decode the frame header.
+  Objective is to retrieve *params.windowlog, to know minimum amount of memory required during decoding.
+  Result : 0 when successful, it means the ZSTDv05_parameters structure has been filled.
+           >0 : means there is not enough data into src. Provides the expected size to successfully decode header.
+           errorCode, which can be tested using ZSTDv05_isError()
+
+  Start decompression, with ZSTDv05_decompressBegin() or ZSTDv05_decompressBegin_usingDict()
+  Alternatively, you can copy a prepared context, using ZSTDv05_copyDCtx()
+
+  Then use ZSTDv05_nextSrcSizeToDecompress() and ZSTDv05_decompressContinue() alternatively.
+  ZSTDv05_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTDv05_decompressContinue().
+  ZSTDv05_decompressContinue() requires this exact amount of bytes, or it will fail.
+  ZSTDv05_decompressContinue() needs previous data blocks during decompression, up to (1 << windowlog).
+  They should preferably be located contiguously, prior to current block. Alternatively, a round buffer is also possible.
+
+  @result of ZSTDv05_decompressContinue() is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTDv05_decompressContinue() has decoded some header.
+
+  A frame is fully decoded when ZSTDv05_nextSrcSizeToDecompress() returns zero.
+  Context can then be reset to start a new decompression.
+*/
+
+
+/* **************************************
+*  Block functions
+****************************************/
+/*! Block functions produce and decode raw zstd blocks, without frame metadata.
+    User will have to take in charge required information to regenerate data, such as block sizes.
+
+    A few rules to respect :
+    - Uncompressed block size must be <= 128 KB
+    - Compressing or decompressing requires a context structure
+      + Use ZSTDv05_createCCtx() and ZSTDv05_createDCtx()
+    - It is necessary to init context before starting
+      + compression : ZSTDv05_compressBegin()
+      + decompression : ZSTDv05_decompressBegin()
+      + variants _usingDict() are also allowed
+      + copyCCtx() and copyDCtx() work too
+    - When a block is considered not compressible enough, ZSTDv05_compressBlock() result will be zero.
+      In which case, nothing is produced into `dst`.
+      + User must test for such outcome and deal directly with uncompressed data
+      + ZSTDv05_decompressBlock() doesn't accept uncompressed data as input !!
+*/
+
+size_t ZSTDv05_decompressBlock(ZSTDv05_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTDv05_STATIC_H */
+
+
+/*
+    zstd_internal - common functions to include
+    Header File for include
+    Copyright (C) 2014-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+*/
+#ifndef ZSTD_CCOMMON_H_MODULE
+#define ZSTD_CCOMMON_H_MODULE
+
+
+
+/*-*************************************
+*  Common macros
+***************************************/
+#define MIN(a,b) ((a)<(b) ? (a) : (b))
+#define MAX(a,b) ((a)>(b) ? (a) : (b))
+
+
+/*-*************************************
+*  Common constants
+***************************************/
+#define ZSTDv05_DICT_MAGIC  0xEC30A435
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BLOCKSIZE (128 KB)                 /* define, for static allocation */
+
+static const size_t ZSTDv05_blockHeaderSize = 3;
+static const size_t ZSTDv05_frameHeaderSize_min = 5;
+#define ZSTDv05_frameHeaderSize_max 5         /* define, for static allocation */
+
+#define BITv057 128
+#define BITv056  64
+#define BITv055  32
+#define BITv054  16
+#define BITv051   2
+#define BITv050   1
+
+#define IS_HUFv05 0
+#define IS_PCH 1
+#define IS_RAW 2
+#define IS_RLE 3
+
+#define MINMATCH 4
+#define REPCODE_STARTVALUE 1
+
+#define Litbits  8
+#define MLbits   7
+#define LLbits   6
+#define Offbits  5
+#define MaxLit ((1<<Litbits) - 1)
+#define MaxML  ((1<<MLbits) - 1)
+#define MaxLL  ((1<<LLbits) - 1)
+#define MaxOff ((1<<Offbits)- 1)
+#define MLFSEv05Log   10
+#define LLFSEv05Log   10
+#define OffFSEv05Log   9
+#define MaxSeq MAX(MaxLL, MaxML)
+
+#define FSEv05_ENCODING_RAW     0
+#define FSEv05_ENCODING_RLE     1
+#define FSEv05_ENCODING_STATIC  2
+#define FSEv05_ENCODING_DYNAMIC 3
+
+
+#define HufLog 12
+
+#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
+#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */
+
+#define WILDCOPY_OVERLENGTH 8
+
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+
+/*-*******************************************
+*  Shared functions to include for inlining
+*********************************************/
+static void ZSTDv05_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+
+#define COPY8(d,s) { ZSTDv05_copy8(d,s); d+=8; s+=8; }
+
+/*! ZSTDv05_wildcopy() :
+*   custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
+MEM_STATIC void ZSTDv05_wildcopy(void* dst, const void* src, ptrdiff_t length)
+{
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+    do
+        COPY8(op, ip)
+    while (op < oend);
+}
+
+
+/*-*******************************************
+*  Private interfaces
+*********************************************/
+typedef struct {
+    void* buffer;
+    U32*  offsetStart;
+    U32*  offset;
+    BYTE* offCodeStart;
+    BYTE* offCode;
+    BYTE* litStart;
+    BYTE* lit;
+    BYTE* litLengthStart;
+    BYTE* litLength;
+    BYTE* matchLengthStart;
+    BYTE* matchLength;
+    BYTE* dumpsStart;
+    BYTE* dumps;
+    /* opt */
+    U32* matchLengthFreq;
+    U32* litLengthFreq;
+    U32* litFreq;
+    U32* offCodeFreq;
+    U32  matchLengthSum;
+    U32  litLengthSum;
+    U32  litSum;
+    U32  offCodeSum;
+} seqStore_t;
+
+
+
+#endif   /* ZSTDv05_CCOMMON_H_MODULE */
+/* ******************************************************************
+   FSEv05 : Finite State Entropy coder
+   header file
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef FSEv05_H
+#define FSEv05_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* *****************************************
+*  Includes
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+
+
+/*-****************************************
+*  FSEv05 simple functions
+******************************************/
+size_t FSEv05_decompress(void* dst,  size_t maxDstSize,
+                const void* cSrc, size_t cSrcSize);
+/*!
+FSEv05_decompress():
+    Decompress FSEv05 data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'maxDstSize'.
+    return : size of regenerated data (<= maxDstSize)
+             or an error code, which can be tested using FSEv05_isError()
+
+    ** Important ** : FSEv05_decompress() doesn't decompress non-compressible nor RLE data !!!
+    Why ? : making this distinction requires a header.
+    Header management is intentionally delegated to the user layer, which can better manage special cases.
+*/
+
+
+/* *****************************************
+*  Tool functions
+******************************************/
+/* Error Management */
+unsigned    FSEv05_isError(size_t code);        /* tells if a return value is an error code */
+const char* FSEv05_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+
+
+
+
+/* *****************************************
+*  FSEv05 detailed API
+******************************************/
+/* *** DECOMPRESSION *** */
+
+/*!
+FSEv05_readNCount():
+   Read compactly saved 'normalizedCounter' from 'rBuffer'.
+   return : size read from 'rBuffer'
+            or an errorCode, which can be tested using FSEv05_isError()
+            maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
+size_t FSEv05_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
+
+/*!
+Constructor and Destructor of type FSEv05_DTable
+    Note that its size depends on 'tableLog' */
+typedef unsigned FSEv05_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+FSEv05_DTable* FSEv05_createDTable(unsigned tableLog);
+void        FSEv05_freeDTable(FSEv05_DTable* dt);
+
+/*!
+FSEv05_buildDTable():
+   Builds 'dt', which must be already allocated, using FSEv05_createDTable()
+   @return : 0,
+             or an errorCode, which can be tested using FSEv05_isError() */
+size_t FSEv05_buildDTable (FSEv05_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*!
+FSEv05_decompress_usingDTable():
+   Decompress compressed source @cSrc of size @cSrcSize using `dt`
+   into `dst` which must be already allocated.
+   @return : size of regenerated data (necessarily <= @dstCapacity)
+             or an errorCode, which can be tested using FSEv05_isError() */
+size_t FSEv05_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSEv05_DTable* dt);
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* FSEv05_H */
+/* ******************************************************************
+   bitstream
+   Part of FSEv05 library
+   header file (to include)
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+#ifndef BITv05STREAM_H_MODULE
+#define BITv05STREAM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*
+*  This API consists of small unitary functions, which highly benefit from being inlined.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+
+
+/*-********************************************
+*  bitStream decoding API (read backward)
+**********************************************/
+typedef struct
+{
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} BITv05_DStream_t;
+
+typedef enum { BITv05_DStream_unfinished = 0,
+               BITv05_DStream_endOfBuffer = 1,
+               BITv05_DStream_completed = 2,
+               BITv05_DStream_overflow = 3 } BITv05_DStream_status;  /* result of BITv05_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t   BITv05_initDStream(BITv05_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t   BITv05_readBits(BITv05_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BITv05_DStream_status BITv05_reloadDStream(BITv05_DStream_t* bitD);
+MEM_STATIC unsigned BITv05_endOfDStream(const BITv05_DStream_t* bitD);
+
+
+/*!
+* Start by invoking BITv05_initDStream().
+* A chunk of the bitStream is then stored into a local register.
+* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+* You can then retrieve bitFields stored into the local register, **in reverse order**.
+* Local register is explicitly reloaded from memory by the BITv05_reloadDStream() method.
+* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BITv05_DStream_unfinished.
+* Otherwise, it can be less than that, so proceed accordingly.
+* Checking if DStream has reached its end can be performed with BITv05_endOfDStream()
+*/
+
+
+/*-****************************************
+*  unsafe API
+******************************************/
+MEM_STATIC size_t BITv05_readBitsFast(BITv05_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/*-**************************************************************
+*  Helper functions
+****************************************************************/
+MEM_STATIC unsigned BITv05_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    unsigned r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
+
+/*-********************************************************
+* bitStream decoding
+**********************************************************/
+/*!BITv05_initDStream
+*  Initialize a BITv05_DStream_t.
+*  @bitD : a pointer to an already allocated BITv05_DStream_t structure
+*  @srcBuffer must point at the beginning of a bitStream
+*  @srcSize must be the exact size of the bitStream
+*  @result : size of stream (== srcSize) or an errorCode if a problem is detected
+*/
+MEM_STATIC size_t BITv05_initDStream(BITv05_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
+    if (srcSize >=  sizeof(size_t)) {  /* normal case */
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BITv05_highbit32(contain32);
+    } else {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);/* fall-through */
+            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);/* fall-through */
+            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);/* fall-through */
+            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24; /* fall-through */
+            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16; /* fall-through */
+            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8; /* fall-through */
+            default: break;
+        }
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BITv05_highbit32(contain32);
+        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+/*!BITv05_lookBits
+ * Provides next n bits from local register
+ * local register is not modified (bits are still present for next read/look)
+ * On 32-bits, maxNbBits==25
+ * On 64-bits, maxNbBits==57
+ * @return : value extracted
+ */
+MEM_STATIC size_t BITv05_lookBits(BITv05_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+}
+
+/*! BITv05_lookBitsFast :
+*   unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BITv05_lookBitsFast(BITv05_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+}
+
+MEM_STATIC void BITv05_skipBits(BITv05_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+/*!BITv05_readBits
+ * Read next n bits from local register.
+ * pay attention to not read more than nbBits contained into local register.
+ * @return : extracted value.
+ */
+MEM_STATIC size_t BITv05_readBits(BITv05_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BITv05_lookBits(bitD, nbBits);
+    BITv05_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*!BITv05_readBitsFast :
+*  unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BITv05_readBitsFast(BITv05_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BITv05_lookBitsFast(bitD, nbBits);
+    BITv05_skipBits(bitD, nbBits);
+    return value;
+}
+
+MEM_STATIC BITv05_DStream_status BITv05_reloadDStream(BITv05_DStream_t* bitD)
+{
+    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
+        return BITv05_DStream_overflow;
+
+    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        return BITv05_DStream_unfinished;
+    }
+    if (bitD->ptr == bitD->start) {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BITv05_DStream_endOfBuffer;
+        return BITv05_DStream_completed;
+    }
+    {
+        U32 nbBytes = bitD->bitsConsumed >> 3;
+        BITv05_DStream_status result = BITv05_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start) {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = BITv05_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        return result;
+    }
+}
+
+/*! BITv05_endOfDStream
+*   @return Tells if DStream has reached its exact end
+*/
+MEM_STATIC unsigned BITv05_endOfDStream(const BITv05_DStream_t* DStream)
+{
+    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* BITv05STREAM_H_MODULE */
+/* ******************************************************************
+   FSEv05 : Finite State Entropy coder
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef FSEv05_STATIC_H
+#define FSEv05_STATIC_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/* *****************************************
+*  Static allocation
+*******************************************/
+/* It is possible to statically allocate FSEv05 CTable/DTable as a table of unsigned using below macros */
+#define FSEv05_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+
+
+/* *****************************************
+*  FSEv05 advanced API
+*******************************************/
+size_t FSEv05_buildDTable_raw (FSEv05_DTable* dt, unsigned nbBits);
+/* build a fake FSEv05_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
+
+size_t FSEv05_buildDTable_rle (FSEv05_DTable* dt, unsigned char symbolValue);
+/* build a fake FSEv05_DTable, designed to always generate the same symbolValue */
+
+
+
+/* *****************************************
+*  FSEv05 symbol decompression API
+*******************************************/
+typedef struct
+{
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSEv05_DState_t;
+
+
+static void     FSEv05_initDState(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD, const FSEv05_DTable* dt);
+
+static unsigned char FSEv05_decodeSymbol(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD);
+
+static unsigned FSEv05_endOfDState(const FSEv05_DState_t* DStatePtr);
+
+/*!
+Let's now decompose FSEv05_decompress_usingDTable() into its unitary components.
+You will decode FSEv05-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BITv05_DStream_t DStream;    // Stream context
+FSEv05_DState_t  DState;     // State context. Multiple ones are possible
+FSEv05_DTable*   DTablePtr;  // Decoding table, provided by FSEv05_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BITv05_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSEv05_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSEv05_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSEv05_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BITv05_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSEv05_reloadDStream(&DStream);
+
+BITv05_reloadDStream() result tells if there is still some more data to read from DStream.
+BITv05_DStream_unfinished : there is still some data left into the DStream.
+BITv05_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BITv05_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BITv05_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BITv05_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BITv05_reloadDStream(&DStream) >= BITv05_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BITv05_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSEv05_endOfDState(&DState);
+*/
+
+
+/* *****************************************
+*  FSEv05 unsafe API
+*******************************************/
+static unsigned char FSEv05_decodeSymbolFast(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/* *****************************************
+*  Implementation of inlined functions
+*******************************************/
+/* decompression */
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSEv05_DTableHeader;   /* sizeof U32 */
+
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSEv05_decode_t;   /* size == U32 */
+
+MEM_STATIC void FSEv05_initDState(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD, const FSEv05_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSEv05_DTableHeader* const DTableH = (const FSEv05_DTableHeader*)ptr;
+    DStatePtr->state = BITv05_readBits(bitD, DTableH->tableLog);
+    BITv05_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC BYTE FSEv05_peakSymbol(FSEv05_DState_t* DStatePtr)
+{
+    const FSEv05_decode_t DInfo = ((const FSEv05_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    return DInfo.symbol;
+}
+
+MEM_STATIC BYTE FSEv05_decodeSymbol(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD)
+{
+    const FSEv05_decode_t DInfo = ((const FSEv05_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32  nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BITv05_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC BYTE FSEv05_decodeSymbolFast(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD)
+{
+    const FSEv05_decode_t DInfo = ((const FSEv05_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32 nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BITv05_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC unsigned FSEv05_endOfDState(const FSEv05_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* FSEv05_STATIC_H */
+/* ******************************************************************
+   FSEv05 : Finite State Entropy coder
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSEv05 source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#ifndef FSEv05_COMMONDEFS_ONLY
+
+/* **************************************************************
+*  Tuning parameters
+****************************************************************/
+/*!MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSEv05_MAX_MEMORY_USAGE 14
+#define FSEv05_DEFAULT_MEMORY_USAGE 13
+
+/*!FSEv05_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSEv05_MAX_SYMBOL_VALUE 255
+
+
+/* **************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSEv05_FUNCTION_TYPE BYTE
+#define FSEv05_FUNCTION_EXTENSION
+#define FSEv05_DECODE_TYPE FSEv05_decode_t
+
+
+#endif   /* !FSEv05_COMMONDEFS_ONLY */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#    ifdef __GNUC__
+#      define FORCE_INLINE static inline __attribute__((always_inline))
+#    else
+#      define FORCE_INLINE static inline
+#    endif
+#  else
+#    define FORCE_INLINE static
+#  endif /* __STDC_VERSION__ */
+#endif
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+
+
+/* ***************************************************************
+*  Constants
+*****************************************************************/
+#define FSEv05_MAX_TABLELOG  (FSEv05_MAX_MEMORY_USAGE-2)
+#define FSEv05_MAX_TABLESIZE (1U<<FSEv05_MAX_TABLELOG)
+#define FSEv05_MAXTABLESIZE_MASK (FSEv05_MAX_TABLESIZE-1)
+#define FSEv05_DEFAULT_TABLELOG (FSEv05_DEFAULT_MEMORY_USAGE-2)
+#define FSEv05_MIN_TABLELOG 5
+
+#define FSEv05_TABLELOG_ABSOLUTE_MAX 15
+#if FSEv05_MAX_TABLELOG > FSEv05_TABLELOG_ABSOLUTE_MAX
+#error "FSEv05_MAX_TABLELOG > FSEv05_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define FSEv05_STATIC_ASSERT(c) { enum { FSEv05_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/* **************************************************************
+*  Complex types
+****************************************************************/
+typedef U32 DTable_max_t[FSEv05_DTABLE_SIZE_U32(FSEv05_MAX_TABLELOG)];
+
+
+/* **************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSEv05_FUNCTION_EXTENSION
+#  error "FSEv05_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSEv05_FUNCTION_TYPE
+#  error "FSEv05_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSEv05_CAT(X,Y) X##Y
+#define FSEv05_FUNCTION_NAME(X,Y) FSEv05_CAT(X,Y)
+#define FSEv05_TYPE_NAME(X,Y) FSEv05_CAT(X,Y)
+
+
+/* Function templates */
+static U32 FSEv05_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
+
+
+
+FSEv05_DTable* FSEv05_createDTable (unsigned tableLog)
+{
+    if (tableLog > FSEv05_TABLELOG_ABSOLUTE_MAX) tableLog = FSEv05_TABLELOG_ABSOLUTE_MAX;
+    return (FSEv05_DTable*)malloc( FSEv05_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
+}
+
+void FSEv05_freeDTable (FSEv05_DTable* dt)
+{
+    free(dt);
+}
+
+size_t FSEv05_buildDTable(FSEv05_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    FSEv05_DTableHeader DTableH;
+    void* const tdPtr = dt+1;   /* because dt is unsigned, 32-bits aligned on 32-bits */
+    FSEv05_DECODE_TYPE* const tableDecode = (FSEv05_DECODE_TYPE*) (tdPtr);
+    const U32 tableSize = 1 << tableLog;
+    const U32 tableMask = tableSize-1;
+    const U32 step = FSEv05_tableStep(tableSize);
+    U16 symbolNext[FSEv05_MAX_SYMBOL_VALUE+1];
+    U32 position = 0;
+    U32 highThreshold = tableSize-1;
+    const S16 largeLimit= (S16)(1 << (tableLog-1));
+    U32 noLarge = 1;
+    U32 s;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSEv05_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSEv05_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    DTableH.tableLog = (U16)tableLog;
+    for (s=0; s<=maxSymbolValue; s++) {
+        if (normalizedCounter[s]==-1) {
+            tableDecode[highThreshold--].symbol = (FSEv05_FUNCTION_TYPE)s;
+            symbolNext[s] = 1;
+        } else {
+            if (normalizedCounter[s] >= largeLimit) noLarge=0;
+            symbolNext[s] = normalizedCounter[s];
+    }   }
+
+    /* Spread symbols */
+    for (s=0; s<=maxSymbolValue; s++) {
+        int i;
+        for (i=0; i<normalizedCounter[s]; i++) {
+            tableDecode[position].symbol = (FSEv05_FUNCTION_TYPE)s;
+            position = (position + step) & tableMask;
+            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+    }   }
+
+    if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+
+    /* Build Decoding table */
+    {
+        U32 i;
+        for (i=0; i<tableSize; i++) {
+            FSEv05_FUNCTION_TYPE symbol = (FSEv05_FUNCTION_TYPE)(tableDecode[i].symbol);
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[i].nbBits = (BYTE) (tableLog - BITv05_highbit32 ((U32)nextState) );
+            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
+    }   }
+
+    DTableH.fastMode = (U16)noLarge;
+    memcpy(dt, &DTableH, sizeof(DTableH));
+    return 0;
+}
+
+
+#ifndef FSEv05_COMMONDEFS_ONLY
+/*-****************************************
+*  FSEv05 helper functions
+******************************************/
+unsigned FSEv05_isError(size_t code) { return ERR_isError(code); }
+
+const char* FSEv05_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/*-**************************************************************
+*  FSEv05 NCount encoding-decoding
+****************************************************************/
+static short FSEv05_abs(short a) { return a<0 ? -a : a; }
+
+
+size_t FSEv05_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    if (hbSize < 4) return ERROR(srcSize_wrong);
+    bitStream = MEM_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSEv05_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSEv05_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr)) {
+        if (previous0) {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF) {
+                n0+=24;
+                if (ip < iend-5) {
+                    ip+=2;
+                    bitStream = MEM_readLE32(ip) >> bitCount;
+                } else {
+                    bitStream >>= 16;
+                    bitCount+=16;
+            }   }
+            while ((bitStream & 3) == 3) {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                ip += bitCount>>3;
+                bitCount &= 7;
+                bitStream = MEM_readLE32(ip) >> bitCount;
+            }
+            else
+                bitStream >>= 2;
+        }
+        {
+            const short max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max) {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            } else {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSEv05_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold) {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                ip += bitCount>>3;
+                bitCount &= 7;
+            } else {
+                bitCount -= (int)(8 * (iend - 4 - ip));
+                ip = iend - 4;
+            }
+            bitStream = MEM_readLE32(ip) >> (bitCount & 31);
+    }   }
+    if (remaining != 1) return ERROR(GENERIC);
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong);
+    return ip-istart;
+}
+
+
+
+/*-*******************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+size_t FSEv05_buildDTable_rle (FSEv05_DTable* dt, BYTE symbolValue)
+{
+    void* ptr = dt;
+    FSEv05_DTableHeader* const DTableH = (FSEv05_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSEv05_decode_t* const cell = (FSEv05_decode_t*)dPtr;
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+size_t FSEv05_buildDTable_raw (FSEv05_DTable* dt, unsigned nbBits)
+{
+    void* ptr = dt;
+    FSEv05_DTableHeader* const DTableH = (FSEv05_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSEv05_decode_t* const dinfo = (FSEv05_decode_t*)dPtr;
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<=maxSymbolValue; s++) {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+FORCE_INLINE size_t FSEv05_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSEv05_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    BITv05_DStream_t bitD;
+    FSEv05_DState_t state1;
+    FSEv05_DState_t state2;
+    size_t errorCode;
+
+    /* Init */
+    errorCode = BITv05_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+    if (FSEv05_isError(errorCode)) return errorCode;
+
+    FSEv05_initDState(&state1, &bitD, dt);
+    FSEv05_initDState(&state2, &bitD, dt);
+
+#define FSEv05_GETSYMBOL(statePtr) fast ? FSEv05_decodeSymbolFast(statePtr, &bitD) : FSEv05_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (BITv05_reloadDStream(&bitD)==BITv05_DStream_unfinished) && (op<olimit) ; op+=4) {
+        op[0] = FSEv05_GETSYMBOL(&state1);
+
+        if (FSEv05_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BITv05_reloadDStream(&bitD);
+
+        op[1] = FSEv05_GETSYMBOL(&state2);
+
+        if (FSEv05_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (BITv05_reloadDStream(&bitD) > BITv05_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSEv05_GETSYMBOL(&state1);
+
+        if (FSEv05_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BITv05_reloadDStream(&bitD);
+
+        op[3] = FSEv05_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BITv05_reloadDStream(&bitD) >= FSEv05_DStream_partiallyFilled; Ends at exactly BITv05_DStream_completed */
+    while (1) {
+        if ( (BITv05_reloadDStream(&bitD)>BITv05_DStream_completed) || (op==omax) || (BITv05_endOfDStream(&bitD) && (fast || FSEv05_endOfDState(&state1))) )
+            break;
+
+        *op++ = FSEv05_GETSYMBOL(&state1);
+
+        if ( (BITv05_reloadDStream(&bitD)>BITv05_DStream_completed) || (op==omax) || (BITv05_endOfDStream(&bitD) && (fast || FSEv05_endOfDState(&state2))) )
+            break;
+
+        *op++ = FSEv05_GETSYMBOL(&state2);
+    }
+
+    /* end ? */
+    if (BITv05_endOfDStream(&bitD) && FSEv05_endOfDState(&state1) && FSEv05_endOfDState(&state2))
+        return op-ostart;
+
+    if (op==omax) return ERROR(dstSize_tooSmall);   /* dst buffer is full, but cSrc unfinished */
+
+    return ERROR(corruption_detected);
+}
+
+
+size_t FSEv05_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSEv05_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSEv05_DTableHeader* DTableH = (const FSEv05_DTableHeader*)ptr;
+    const U32 fastMode = DTableH->fastMode;
+
+    /* select fast mode (static) */
+    if (fastMode) return FSEv05_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSEv05_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+size_t FSEv05_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSEv05_MAX_SYMBOL_VALUE+1];
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSEv05_MAX_SYMBOL_VALUE;
+    size_t errorCode;
+
+    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */
+
+    /* normal FSEv05 decoding mode */
+    errorCode = FSEv05_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSEv05_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    errorCode = FSEv05_buildDTable (dt, counting, maxSymbolValue, tableLog);
+    if (FSEv05_isError(errorCode)) return errorCode;
+
+    /* always return, even if it is an error code */
+    return FSEv05_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
+}
+
+
+
+#endif   /* FSEv05_COMMONDEFS_ONLY */
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   header file
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+#ifndef HUFF0_H
+#define HUFF0_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/* ****************************************
+*  Huff0 simple functions
+******************************************/
+size_t HUFv05_decompress(void* dst,  size_t dstSize,
+                const void* cSrc, size_t cSrcSize);
+/*!
+HUFv05_decompress():
+    Decompress Huff0 data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'dstSize'.
+    @dstSize : must be the **exact** size of original (uncompressed) data.
+    Note : in contrast with FSEv05, HUFv05_decompress can regenerate
+           RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
+           because it knows size to regenerate.
+    @return : size of regenerated data (== dstSize)
+              or an error code, which can be tested using HUFv05_isError()
+*/
+
+
+/* ****************************************
+*  Tool functions
+******************************************/
+/* Error Management */
+unsigned    HUFv05_isError(size_t code);        /* tells if a return value is an error code */
+const char* HUFv05_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* HUF0_H */
+/* ******************************************************************
+   Huff0 : Huffman codec, part of New Generation Entropy library
+   header file, for static linking only
+   Copyright (C) 2013-2016, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+#ifndef HUF0_STATIC_H
+#define HUF0_STATIC_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/* ****************************************
+*  Static allocation
+******************************************/
+/* static allocation of Huff0's DTable */
+#define HUFv05_DTABLE_SIZE(maxTableLog)   (1 + (1<<maxTableLog))
+#define HUFv05_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+        unsigned short DTable[HUFv05_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUFv05_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
+        unsigned int DTable[HUFv05_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUFv05_CREATE_STATIC_DTABLEX6(DTable, maxTableLog) \
+        unsigned int DTable[HUFv05_DTABLE_SIZE(maxTableLog) * 3 / 2] = { maxTableLog }
+
+
+/* ****************************************
+*  Advanced decompression functions
+******************************************/
+size_t HUFv05_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+size_t HUFv05_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbols decoder */
+
+
+/* ****************************************
+*  Huff0 detailed API
+******************************************/
+/*!
+HUFv05_decompress() does the following:
+1. select the decompression algorithm (X2, X4, X6) based on pre-computed heuristics
+2. build Huffman table from save, using HUFv05_readDTableXn()
+3. decode 1 or 4 segments in parallel using HUFv05_decompressSXn_usingDTable
+*/
+size_t HUFv05_readDTableX2 (unsigned short* DTable, const void* src, size_t srcSize);
+size_t HUFv05_readDTableX4 (unsigned* DTable, const void* src, size_t srcSize);
+
+size_t HUFv05_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned short* DTable);
+size_t HUFv05_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
+
+
+/* single stream variants */
+
+size_t HUFv05_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+size_t HUFv05_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbol decoder */
+
+size_t HUFv05_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned short* DTable);
+size_t HUFv05_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* HUF0_STATIC_H */
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSEv05+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+/* inline is defined */
+#elif defined(_MSC_VER)
+#  define inline __inline
+#else
+#  define inline /* disable inline */
+#endif
+
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#endif
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+
+/* **************************************************************
+*  Constants
+****************************************************************/
+#define HUFv05_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUFv05_MAX_TABLELOG. Beyond that value, code does not work */
+#define HUFv05_MAX_TABLELOG  12           /* max configured tableLog (for static allocation); can be modified up to HUFv05_ABSOLUTEMAX_TABLELOG */
+#define HUFv05_DEFAULT_TABLELOG  HUFv05_MAX_TABLELOG   /* tableLog by default, when not specified */
+#define HUFv05_MAX_SYMBOL_VALUE 255
+#if (HUFv05_MAX_TABLELOG > HUFv05_ABSOLUTEMAX_TABLELOG)
+#  error "HUFv05_MAX_TABLELOG is too large !"
+#endif
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+unsigned HUFv05_isError(size_t code) { return ERR_isError(code); }
+const char* HUFv05_getErrorName(size_t code) { return ERR_getErrorName(code); }
+#define HUFv05_STATIC_ASSERT(c) { enum { HUFv05_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/* *******************************************************
+*  Huff0 : Huffman block decompression
+*********************************************************/
+typedef struct { BYTE byte; BYTE nbBits; } HUFv05_DEltX2;   /* single-symbol decoding */
+
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUFv05_DEltX4;  /* double-symbols decoding */
+
+typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+
+/*! HUFv05_readStats
+    Read compact Huffman tree, saved by HUFv05_writeCTable
+    @huffWeight : destination buffer
+    @return : size read from `src`
+*/
+static size_t HUFv05_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                            U32* nbSymbolsPtr, U32* tableLogPtr,
+                            const void* src, size_t srcSize)
+{
+    U32 weightTotal;
+    U32 tableLog;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize;
+    size_t oSize;
+    U32 n;
+
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
+    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128)  { /* special header */
+        if (iSize >= (242)) {  /* RLE */
+            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
+            oSize = l[iSize-242];
+            memset(huffWeight, 1, hwSize);
+            iSize = 0;
+        }
+        else {   /* Incompressible */
+            oSize = iSize - 127;
+            iSize = ((oSize+1)/2);
+            if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+            if (oSize >= hwSize) return ERROR(corruption_detected);
+            ip += 1;
+            for (n=0; n<oSize; n+=2) {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+    }   }   }
+    else  {   /* header compressed with FSEv05 (normal case) */
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        oSize = FSEv05_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
+        if (FSEv05_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    memset(rankStats, 0, (HUFv05_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32));
+    weightTotal = 0;
+    for (n=0; n<oSize; n++) {
+        if (huffWeight[n] >= HUFv05_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+        rankStats[huffWeight[n]]++;
+        weightTotal += (1 << huffWeight[n]) >> 1;
+    }
+    if (weightTotal == 0) return ERROR(corruption_detected);
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    tableLog = BITv05_highbit32(weightTotal) + 1;
+    if (tableLog > HUFv05_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+    {   /* determine last weight */
+        U32 total = 1 << tableLog;
+        U32 rest = total - weightTotal;
+        U32 verif = 1 << BITv05_highbit32(rest);
+        U32 lastWeight = BITv05_highbit32(rest) + 1;
+        if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+        huffWeight[oSize] = (BYTE)lastWeight;
+        rankStats[lastWeight]++;
+    }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32)(oSize+1);
+    *tableLogPtr = tableLog;
+    return iSize+1;
+}
+
+
+/*-***************************/
+/*  single-symbol decoding   */
+/*-***************************/
+
+size_t HUFv05_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
+{
+    BYTE huffWeight[HUFv05_MAX_SYMBOL_VALUE + 1];
+    U32 rankVal[HUFv05_ABSOLUTEMAX_TABLELOG + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    size_t iSize;
+    U32 nbSymbols = 0;
+    U32 n;
+    U32 nextRankStart;
+    void* const dtPtr = DTable + 1;
+    HUFv05_DEltX2* const dt = (HUFv05_DEltX2*)dtPtr;
+
+    HUFv05_STATIC_ASSERT(sizeof(HUFv05_DEltX2) == sizeof(U16));   /* if compilation fails here, assertion is false */
+    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUFv05_readStats(huffWeight, HUFv05_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
+    if (HUFv05_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge);   /* DTable is too small */
+    DTable[0] = (U16)tableLog;   /* maybe should separate sizeof allocated DTable, from used size of DTable, in case of re-use */
+
+    /* Prepare ranks */
+    nextRankStart = 0;
+    for (n=1; n<=tableLog; n++) {
+        U32 current = nextRankStart;
+        nextRankStart += (rankVal[n] << (n-1));
+        rankVal[n] = current;
+    }
+
+    /* fill DTable */
+    for (n=0; n<nbSymbols; n++) {
+        const U32 w = huffWeight[n];
+        const U32 length = (1 << w) >> 1;
+        U32 i;
+        HUFv05_DEltX2 D;
+        D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
+        for (i = rankVal[w]; i < rankVal[w] + length; i++)
+            dt[i] = D;
+        rankVal[w] += length;
+    }
+
+    return iSize;
+}
+
+static BYTE HUFv05_decodeSymbolX2(BITv05_DStream_t* Dstream, const HUFv05_DEltX2* dt, const U32 dtLog)
+{
+        const size_t val = BITv05_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+        const BYTE c = dt[val].byte;
+        BITv05_skipBits(Dstream, dt[val].nbBits);
+        return c;
+}
+
+#define HUFv05_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+    *ptr++ = HUFv05_decodeSymbolX2(DStreamPtr, dt, dtLog)
+
+#define HUFv05_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUFv05_MAX_TABLELOG<=12)) \
+        HUFv05_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+#define HUFv05_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUFv05_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+static inline size_t HUFv05_decodeStreamX2(BYTE* p, BITv05_DStream_t* const bitDPtr, BYTE* const pEnd, const HUFv05_DEltX2* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 4 symbols at a time */
+    while ((BITv05_reloadDStream(bitDPtr) == BITv05_DStream_unfinished) && (p <= pEnd-4)) {
+        HUFv05_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX2_1(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX2_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BITv05_reloadDStream(bitDPtr) == BITv05_DStream_unfinished) && (p < pEnd))
+        HUFv05_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    /* no more data to retrieve from bitstream, hence no need to reload */
+    while (p < pEnd)
+        HUFv05_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    return pEnd-pStart;
+}
+
+size_t HUFv05_decompress1X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U16* DTable)
+{
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + dstSize;
+    const U32 dtLog = DTable[0];
+    const void* dtPtr = DTable;
+    const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr)+1;
+    BITv05_DStream_t bitD;
+
+    if (dstSize <= cSrcSize) return ERROR(dstSize_tooSmall);
+    { size_t const errorCode = BITv05_initDStream(&bitD, cSrc, cSrcSize);
+      if (HUFv05_isError(errorCode)) return errorCode; }
+
+    HUFv05_decodeStreamX2(op, &bitD, oend, dt, dtLog);
+
+    /* check */
+    if (!BITv05_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    return dstSize;
+}
+
+size_t HUFv05_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv05_CREATE_STATIC_DTABLEX2(DTable, HUFv05_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+    size_t errorCode;
+
+    errorCode = HUFv05_readDTableX2 (DTable, cSrc, cSrcSize);
+    if (HUFv05_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    return HUFv05_decompress1X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+size_t HUFv05_decompress4X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U16* DTable)
+{
+    const BYTE* const istart = (const BYTE*) cSrc;
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* const oend = ostart + dstSize;
+    const void* const dtPtr = DTable;
+    const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr) +1;
+    const U32 dtLog = DTable[0];
+    size_t errorCode;
+
+    /* Init */
+    BITv05_DStream_t bitD1;
+    BITv05_DStream_t bitD2;
+    BITv05_DStream_t bitD3;
+    BITv05_DStream_t bitD4;
+    const size_t length1 = MEM_readLE16(istart);
+    const size_t length2 = MEM_readLE16(istart+2);
+    const size_t length3 = MEM_readLE16(istart+4);
+    size_t length4;
+    const BYTE* const istart1 = istart + 6;  /* jumpTable */
+    const BYTE* const istart2 = istart1 + length1;
+    const BYTE* const istart3 = istart2 + length2;
+    const BYTE* const istart4 = istart3 + length3;
+    const size_t segmentSize = (dstSize+3) / 4;
+    BYTE* const opStart2 = ostart + segmentSize;
+    BYTE* const opStart3 = opStart2 + segmentSize;
+    BYTE* const opStart4 = opStart3 + segmentSize;
+    BYTE* op1 = ostart;
+    BYTE* op2 = opStart2;
+    BYTE* op3 = opStart3;
+    BYTE* op4 = opStart4;
+    U32 endSignal;
+
+    /* Check */
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    length4 = cSrcSize - (length1 + length2 + length3 + 6);
+    if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+    errorCode = BITv05_initDStream(&bitD1, istart1, length1);
+    if (HUFv05_isError(errorCode)) return errorCode;
+    errorCode = BITv05_initDStream(&bitD2, istart2, length2);
+    if (HUFv05_isError(errorCode)) return errorCode;
+    errorCode = BITv05_initDStream(&bitD3, istart3, length3);
+    if (HUFv05_isError(errorCode)) return errorCode;
+    errorCode = BITv05_initDStream(&bitD4, istart4, length4);
+    if (HUFv05_isError(errorCode)) return errorCode;
+
+    /* 16-32 symbols per loop (4-8 symbols per stream) */
+    endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
+    for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) {
+        HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
+        HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
+        HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
+        HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
+        HUFv05_DECODE_SYMBOLX2_1(op1, &bitD1);
+        HUFv05_DECODE_SYMBOLX2_1(op2, &bitD2);
+        HUFv05_DECODE_SYMBOLX2_1(op3, &bitD3);
+        HUFv05_DECODE_SYMBOLX2_1(op4, &bitD4);
+        HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
+        HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
+        HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
+        HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
+        HUFv05_DECODE_SYMBOLX2_0(op1, &bitD1);
+        HUFv05_DECODE_SYMBOLX2_0(op2, &bitD2);
+        HUFv05_DECODE_SYMBOLX2_0(op3, &bitD3);
+        HUFv05_DECODE_SYMBOLX2_0(op4, &bitD4);
+        endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
+    }
+
+    /* check corruption */
+    if (op1 > opStart2) return ERROR(corruption_detected);
+    if (op2 > opStart3) return ERROR(corruption_detected);
+    if (op3 > opStart4) return ERROR(corruption_detected);
+    /* note : op4 supposed already verified within main loop */
+
+    /* finish bitStreams one by one */
+    HUFv05_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+    HUFv05_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+    HUFv05_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+    HUFv05_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+    /* check */
+    endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4);
+    if (!endSignal) return ERROR(corruption_detected);
+
+    /* decoded size */
+    return dstSize;
+}
+
+
+size_t HUFv05_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv05_CREATE_STATIC_DTABLEX2(DTable, HUFv05_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+    size_t errorCode;
+
+    errorCode = HUFv05_readDTableX2 (DTable, cSrc, cSrcSize);
+    if (HUFv05_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    return HUFv05_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/* *************************/
+/* double-symbols decoding */
+/* *************************/
+
+static void HUFv05_fillDTableX4Level2(HUFv05_DEltX4* DTable, U32 sizeLog, const U32 consumed,
+                           const U32* rankValOrigin, const int minWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
+                           U32 nbBitsBaseline, U16 baseSeq)
+{
+    HUFv05_DEltX4 DElt;
+    U32 rankVal[HUFv05_ABSOLUTEMAX_TABLELOG + 1];
+    U32 s;
+
+    /* get pre-calculated rankVal */
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1) {
+        U32 i, skipSize = rankVal[minWeight];
+        MEM_writeLE16(&(DElt.sequence), baseSeq);
+        DElt.nbBits   = (BYTE)(consumed);
+        DElt.length   = 1;
+        for (i = 0; i < skipSize; i++)
+            DTable[i] = DElt;
+    }
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++) {   /* note : sortedSymbols already skipped */
+        const U32 symbol = sortedSymbols[s].symbol;
+        const U32 weight = sortedSymbols[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 length = 1 << (sizeLog-nbBits);
+        const U32 start = rankVal[weight];
+        U32 i = start;
+        const U32 end = start + length;
+
+        MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+        DElt.nbBits = (BYTE)(nbBits + consumed);
+        DElt.length = 2;
+        do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
+
+        rankVal[weight] += length;
+    }
+}
+
+typedef U32 rankVal_t[HUFv05_ABSOLUTEMAX_TABLELOG][HUFv05_ABSOLUTEMAX_TABLELOG + 1];
+
+static void HUFv05_fillDTableX4(HUFv05_DEltX4* DTable, const U32 targetLog,
+                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32 nbBitsBaseline)
+{
+    U32 rankVal[HUFv05_ABSOLUTEMAX_TABLELOG + 1];
+    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits  = nbBitsBaseline - maxWeight;
+    U32 s;
+
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++) {
+        const U16 symbol = sortedList[s].symbol;
+        const U32 weight = sortedList[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 start = rankVal[weight];
+        const U32 length = 1 << (targetLog-nbBits);
+
+        if (targetLog-nbBits >= minBits) {   /* enough room for a second symbol */
+            U32 sortedRank;
+            int minWeight = nbBits + scaleLog;
+            if (minWeight < 1) minWeight = 1;
+            sortedRank = rankStart[minWeight];
+            HUFv05_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
+                           rankValOrigin[nbBits], minWeight,
+                           sortedList+sortedRank, sortedListSize-sortedRank,
+                           nbBitsBaseline, symbol);
+        } else {
+            U32 i;
+            const U32 end = start + length;
+            HUFv05_DEltX4 DElt;
+
+            MEM_writeLE16(&(DElt.sequence), symbol);
+            DElt.nbBits   = (BYTE)(nbBits);
+            DElt.length   = 1;
+            for (i = start; i < end; i++)
+                DTable[i] = DElt;
+        }
+        rankVal[weight] += length;
+    }
+}
+
+size_t HUFv05_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUFv05_MAX_SYMBOL_VALUE + 1];
+    sortedSymbol_t sortedSymbol[HUFv05_MAX_SYMBOL_VALUE + 1];
+    U32 rankStats[HUFv05_ABSOLUTEMAX_TABLELOG + 1] = { 0 };
+    U32 rankStart0[HUFv05_ABSOLUTEMAX_TABLELOG + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    rankVal_t rankVal;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    const U32 memLog = DTable[0];
+    size_t iSize;
+    void* dtPtr = DTable;
+    HUFv05_DEltX4* const dt = ((HUFv05_DEltX4*)dtPtr) + 1;
+
+    HUFv05_STATIC_ASSERT(sizeof(HUFv05_DEltX4) == sizeof(U32));   /* if compilation fails here, assertion is false */
+    if (memLog > HUFv05_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUFv05_readStats(weightList, HUFv05_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUFv05_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+
+    /* Get start index of each weight */
+    {
+        U32 w, nextRankStart = 0;
+        for (w=1; w<=maxW; w++) {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {
+        U32 s;
+        for (s=0; s<nbSymbols; s++) {
+            U32 w = weightList[s];
+            U32 r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {
+        const U32 minBits = tableLog+1 - maxW;
+        U32 nextRankVal = 0;
+        U32 w, consumed;
+        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
+        U32* rankVal0 = rankVal[0];
+        for (w=1; w<=maxW; w++) {
+            U32 current = nextRankVal;
+            nextRankVal += rankStats[w] << (w+rescale);
+            rankVal0[w] = current;
+        }
+        for (consumed = minBits; consumed <= memLog - minBits; consumed++) {
+            U32* rankValPtr = rankVal[consumed];
+            for (w = 1; w <= maxW; w++) {
+                rankValPtr[w] = rankVal0[w] >> consumed;
+    }   }   }
+
+    HUFv05_fillDTableX4(dt, memLog,
+                   sortedSymbol, sizeOfSort,
+                   rankStart0, rankVal, maxW,
+                   tableLog+1);
+
+    return iSize;
+}
+
+
+static U32 HUFv05_decodeSymbolX4(void* op, BITv05_DStream_t* DStream, const HUFv05_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BITv05_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 2);
+    BITv05_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
+
+static U32 HUFv05_decodeLastSymbolX4(void* op, BITv05_DStream_t* DStream, const HUFv05_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BITv05_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 1);
+    if (dt[val].length==1) BITv05_skipBits(DStream, dt[val].nbBits);
+    else {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
+            BITv05_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+    }   }
+    return 1;
+}
+
+
+#define HUFv05_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
+    ptr += HUFv05_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUFv05_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUFv05_MAX_TABLELOG<=12)) \
+        ptr += HUFv05_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUFv05_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        ptr += HUFv05_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+static inline size_t HUFv05_decodeStreamX4(BYTE* p, BITv05_DStream_t* bitDPtr, BYTE* const pEnd, const HUFv05_DEltX4* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 8 symbols at a time */
+    while ((BITv05_reloadDStream(bitDPtr) == BITv05_DStream_unfinished) && (p < pEnd-7)) {
+        HUFv05_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX4_1(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX4_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BITv05_reloadDStream(bitDPtr) == BITv05_DStream_unfinished) && (p <= pEnd-2))
+        HUFv05_DECODE_SYMBOLX4_0(p, bitDPtr);
+
+    while (p <= pEnd-2)
+        HUFv05_DECODE_SYMBOLX4_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    if (p < pEnd)
+        p += HUFv05_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
+
+    return p-pStart;
+}
+
+
+size_t HUFv05_decompress1X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    const BYTE* const istart = (const BYTE*) cSrc;
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* const oend = ostart + dstSize;
+
+    const U32 dtLog = DTable[0];
+    const void* const dtPtr = DTable;
+    const HUFv05_DEltX4* const dt = ((const HUFv05_DEltX4*)dtPtr) +1;
+    size_t errorCode;
+
+    /* Init */
+    BITv05_DStream_t bitD;
+    errorCode = BITv05_initDStream(&bitD, istart, cSrcSize);
+    if (HUFv05_isError(errorCode)) return errorCode;
+
+    /* finish bitStreams one by one */
+    HUFv05_decodeStreamX4(ostart, &bitD, oend,     dt, dtLog);
+
+    /* check */
+    if (!BITv05_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    /* decoded size */
+    return dstSize;
+}
+
+size_t HUFv05_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv05_CREATE_STATIC_DTABLEX4(DTable, HUFv05_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUFv05_readDTableX4 (DTable, cSrc, cSrcSize);
+    if (HUFv05_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUFv05_decompress1X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+size_t HUFv05_decompress4X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable;
+        const HUFv05_DEltX4* const dt = ((const HUFv05_DEltX4*)dtPtr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BITv05_DStream_t bitD1;
+        BITv05_DStream_t bitD2;
+        BITv05_DStream_t bitD3;
+        BITv05_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BITv05_initDStream(&bitD1, istart1, length1);
+        if (HUFv05_isError(errorCode)) return errorCode;
+        errorCode = BITv05_initDStream(&bitD2, istart2, length2);
+        if (HUFv05_isError(errorCode)) return errorCode;
+        errorCode = BITv05_initDStream(&bitD3, istart3, length3);
+        if (HUFv05_isError(errorCode)) return errorCode;
+        errorCode = BITv05_initDStream(&bitD4, istart4, length4);
+        if (HUFv05_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
+        for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) {
+            HUFv05_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUFv05_DECODE_SYMBOLX4_1(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX4_1(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX4_1(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX4_1(op4, &bitD4);
+            HUFv05_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUFv05_DECODE_SYMBOLX4_0(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX4_0(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX4_0(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX4_0(op4, &bitD4);
+
+            endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUFv05_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
+        HUFv05_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
+        HUFv05_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
+        HUFv05_decodeStreamX4(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+size_t HUFv05_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv05_CREATE_STATIC_DTABLEX4(DTable, HUFv05_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUFv05_readDTableX4 (DTable, cSrc, cSrcSize);
+    if (HUFv05_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUFv05_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/* ********************************/
+/* Generic decompression selector */
+/* ********************************/
+
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+{
+    /* single, double, quad */
+    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
+    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
+    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
+    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
+    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
+    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
+    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
+    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
+    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
+    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
+    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
+    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+};
+
+typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+
+size_t HUFv05_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    static const decompressionAlgo decompress[3] = { HUFv05_decompress4X2, HUFv05_decompress4X4, NULL };
+    /* estimate decompression time */
+    U32 Q;
+    const U32 D256 = (U32)(dstSize >> 8);
+    U32 Dtime[3];
+    U32 algoNb = 0;
+    int n;
+
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize >= dstSize) return ERROR(corruption_detected);   /* invalid, or not compressed, but not compressed already dealt with */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    /* decoder timing evaluation */
+    Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */
+    for (n=0; n<3; n++)
+        Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256);
+
+    Dtime[1] += Dtime[1] >> 4; Dtime[2] += Dtime[2] >> 3; /* advantage to algorithms using less memory, for cache eviction */
+
+    if (Dtime[1] < Dtime[0]) algoNb = 1;
+
+    return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
+
+    //return HUFv05_decompress4X2(dst, dstSize, cSrc, cSrcSize);   /* multi-streams single-symbol decoding */
+    //return HUFv05_decompress4X4(dst, dstSize, cSrc, cSrcSize);   /* multi-streams double-symbols decoding */
+    //return HUFv05_decompress4X6(dst, dstSize, cSrc, cSrcSize);   /* multi-streams quad-symbols decoding */
+}
+/*
+    zstd - standard compression library
+    Copyright (C) 2014-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+*/
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+ * HEAPMODE :
+ * Select how default decompression function ZSTDv05_decompress() will allocate memory,
+ * in memory stack (0), or in memory heap (1, requires malloc())
+ */
+#ifndef ZSTDv05_HEAPMODE
+#  define ZSTDv05_HEAPMODE 1
+#endif
+
+
+/*-*******************************************************
+*  Dependencies
+*********************************************************/
+#include <stdlib.h>      /* calloc */
+#include <string.h>      /* memcpy, memmove */
+#include <stdio.h>       /* debug only : printf */
+
+
+/*-*******************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#endif
+
+
+/*-*************************************
+*  Local types
+***************************************/
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+
+/* *******************************************************
+*  Memory operations
+**********************************************************/
+static void ZSTDv05_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+
+/* *************************************
+*  Error Management
+***************************************/
+/*! ZSTDv05_isError() :
+*   tells if a return value is an error code */
+unsigned ZSTDv05_isError(size_t code) { return ERR_isError(code); }
+
+
+/*! ZSTDv05_getErrorName() :
+*   provides error code string (useful for debugging) */
+const char* ZSTDv05_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/* *************************************************************
+*   Context management
+***************************************************************/
+typedef enum { ZSTDv05ds_getFrameHeaderSize, ZSTDv05ds_decodeFrameHeader,
+               ZSTDv05ds_decodeBlockHeader, ZSTDv05ds_decompressBlock } ZSTDv05_dStage;
+
+struct ZSTDv05_DCtx_s
+{
+    FSEv05_DTable LLTable[FSEv05_DTABLE_SIZE_U32(LLFSEv05Log)];
+    FSEv05_DTable OffTable[FSEv05_DTABLE_SIZE_U32(OffFSEv05Log)];
+    FSEv05_DTable MLTable[FSEv05_DTABLE_SIZE_U32(MLFSEv05Log)];
+    unsigned   hufTableX4[HUFv05_DTABLE_SIZE(HufLog)];
+    const void* previousDstEnd;
+    const void* base;
+    const void* vBase;
+    const void* dictEnd;
+    size_t expected;
+    size_t headerSize;
+    ZSTDv05_parameters params;
+    blockType_t bType;   /* used in ZSTDv05_decompressContinue(), to transfer blockType between header decoding and block decoding stages */
+    ZSTDv05_dStage stage;
+    U32 flagStaticTables;
+    const BYTE* litPtr;
+    size_t litSize;
+    BYTE litBuffer[BLOCKSIZE + WILDCOPY_OVERLENGTH];
+    BYTE headerBuffer[ZSTDv05_frameHeaderSize_max];
+};  /* typedef'd to ZSTDv05_DCtx within "zstd_static.h" */
+
+size_t ZSTDv05_sizeofDCtx (void) { return sizeof(ZSTDv05_DCtx); }
+
+size_t ZSTDv05_decompressBegin(ZSTDv05_DCtx* dctx)
+{
+    dctx->expected = ZSTDv05_frameHeaderSize_min;
+    dctx->stage = ZSTDv05ds_getFrameHeaderSize;
+    dctx->previousDstEnd = NULL;
+    dctx->base = NULL;
+    dctx->vBase = NULL;
+    dctx->dictEnd = NULL;
+    dctx->hufTableX4[0] = HufLog;
+    dctx->flagStaticTables = 0;
+    return 0;
+}
+
+ZSTDv05_DCtx* ZSTDv05_createDCtx(void)
+{
+    ZSTDv05_DCtx* dctx = (ZSTDv05_DCtx*)malloc(sizeof(ZSTDv05_DCtx));
+    if (dctx==NULL) return NULL;
+    ZSTDv05_decompressBegin(dctx);
+    return dctx;
+}
+
+size_t ZSTDv05_freeDCtx(ZSTDv05_DCtx* dctx)
+{
+    free(dctx);
+    return 0;   /* reserved as a potential error code in the future */
+}
+
+void ZSTDv05_copyDCtx(ZSTDv05_DCtx* dstDCtx, const ZSTDv05_DCtx* srcDCtx)
+{
+    memcpy(dstDCtx, srcDCtx,
+           sizeof(ZSTDv05_DCtx) - (BLOCKSIZE+WILDCOPY_OVERLENGTH + ZSTDv05_frameHeaderSize_max));  /* no need to copy workspace */
+}
+
+
+/* *************************************************************
+*   Decompression section
+***************************************************************/
+
+/* Frame format description
+   Frame Header -  [ Block Header - Block ] - Frame End
+   1) Frame Header
+      - 4 bytes - Magic Number : ZSTDv05_MAGICNUMBER (defined within zstd_internal.h)
+      - 1 byte  - Window Descriptor
+   2) Block Header
+      - 3 bytes, starting with a 2-bits descriptor
+                 Uncompressed, Compressed, Frame End, unused
+   3) Block
+      See Block Format Description
+   4) Frame End
+      - 3 bytes, compatible with Block Header
+*/
+
+/* Block format description
+
+   Block = Literal Section - Sequences Section
+   Prerequisite : size of (compressed) block, maximum size of regenerated data
+
+   1) Literal Section
+
+   1.1) Header : 1-5 bytes
+        flags: 2 bits
+            00 compressed by Huff0
+            01 unused
+            10 is Raw (uncompressed)
+            11 is Rle
+            Note : using 01 => Huff0 with precomputed table ?
+            Note : delta map ? => compressed ?
+
+   1.1.1) Huff0-compressed literal block : 3-5 bytes
+            srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream
+            srcSize < 1 KB => 3 bytes (2-2-10-10)
+            srcSize < 16KB => 4 bytes (2-2-14-14)
+            else           => 5 bytes (2-2-18-18)
+            big endian convention
+
+   1.1.2) Raw (uncompressed) literal block header : 1-3 bytes
+        size :  5 bits: (IS_RAW<<6) + (0<<4) + size
+               12 bits: (IS_RAW<<6) + (2<<4) + (size>>8)
+                        size&255
+               20 bits: (IS_RAW<<6) + (3<<4) + (size>>16)
+                        size>>8&255
+                        size&255
+
+   1.1.3) Rle (repeated single byte) literal block header : 1-3 bytes
+        size :  5 bits: (IS_RLE<<6) + (0<<4) + size
+               12 bits: (IS_RLE<<6) + (2<<4) + (size>>8)
+                        size&255
+               20 bits: (IS_RLE<<6) + (3<<4) + (size>>16)
+                        size>>8&255
+                        size&255
+
+   1.1.4) Huff0-compressed literal block, using precomputed CTables : 3-5 bytes
+            srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream
+            srcSize < 1 KB => 3 bytes (2-2-10-10)
+            srcSize < 16KB => 4 bytes (2-2-14-14)
+            else           => 5 bytes (2-2-18-18)
+            big endian convention
+
+        1- CTable available (stored into workspace ?)
+        2- Small input (fast heuristic ? Full comparison ? depend on clevel ?)
+
+
+   1.2) Literal block content
+
+   1.2.1) Huff0 block, using sizes from header
+        See Huff0 format
+
+   1.2.2) Huff0 block, using prepared table
+
+   1.2.3) Raw content
+
+   1.2.4) single byte
+
+
+   2) Sequences section
+      TO DO
+*/
+
+
+/** ZSTDv05_decodeFrameHeader_Part1() :
+*   decode the 1st part of the Frame Header, which tells Frame Header size.
+*   srcSize must be == ZSTDv05_frameHeaderSize_min.
+*   @return : the full size of the Frame Header */
+static size_t ZSTDv05_decodeFrameHeader_Part1(ZSTDv05_DCtx* zc, const void* src, size_t srcSize)
+{
+    U32 magicNumber;
+    if (srcSize != ZSTDv05_frameHeaderSize_min)
+        return ERROR(srcSize_wrong);
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTDv05_MAGICNUMBER) return ERROR(prefix_unknown);
+    zc->headerSize = ZSTDv05_frameHeaderSize_min;
+    return zc->headerSize;
+}
+
+
+size_t ZSTDv05_getFrameParams(ZSTDv05_parameters* params, const void* src, size_t srcSize)
+{
+    U32 magicNumber;
+    if (srcSize < ZSTDv05_frameHeaderSize_min) return ZSTDv05_frameHeaderSize_max;
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTDv05_MAGICNUMBER) return ERROR(prefix_unknown);
+    memset(params, 0, sizeof(*params));
+    params->windowLog = (((const BYTE*)src)[4] & 15) + ZSTDv05_WINDOWLOG_ABSOLUTEMIN;
+    if ((((const BYTE*)src)[4] >> 4) != 0) return ERROR(frameParameter_unsupported);   /* reserved bits */
+    return 0;
+}
+
+/** ZSTDv05_decodeFrameHeader_Part2() :
+*   decode the full Frame Header.
+*   srcSize must be the size provided by ZSTDv05_decodeFrameHeader_Part1().
+*   @return : 0, or an error code, which can be tested using ZSTDv05_isError() */
+static size_t ZSTDv05_decodeFrameHeader_Part2(ZSTDv05_DCtx* zc, const void* src, size_t srcSize)
+{
+    size_t result;
+    if (srcSize != zc->headerSize)
+        return ERROR(srcSize_wrong);
+    result = ZSTDv05_getFrameParams(&(zc->params), src, srcSize);
+    if ((MEM_32bits()) && (zc->params.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bits);
+    return result;
+}
+
+
+size_t ZSTDv05_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = (const BYTE* const)src;
+    BYTE headerFlags;
+    U32 cSize;
+
+    if (srcSize < 3)
+        return ERROR(srcSize_wrong);
+
+    headerFlags = *in;
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+
+    bpPtr->blockType = (blockType_t)(headerFlags >> 6);
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+
+static size_t ZSTDv05_copyRawBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+/*! ZSTDv05_decodeLiteralsBlock() :
+    @return : nb of bytes read from src (< srcSize ) */
+size_t ZSTDv05_decodeLiteralsBlock(ZSTDv05_DCtx* dctx,
+                          const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
+{
+    const BYTE* const istart = (const BYTE*) src;
+
+    /* any compressed block with literals segment must be at least this size */
+    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
+
+    switch(istart[0]>> 6)
+    {
+    case IS_HUFv05:
+        {
+            size_t litSize, litCSize, singleStream=0;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            if (srcSize < 5) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
+            switch(lhSize)
+            {
+            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
+                /* 2 - 2 - 10 - 10 */
+                lhSize=3;
+                singleStream = istart[0] & 16;
+                litSize  = ((istart[0] & 15) << 6) + (istart[1] >> 2);
+                litCSize = ((istart[1] &  3) << 8) + istart[2];
+                break;
+            case 2:
+                /* 2 - 2 - 14 - 14 */
+                lhSize=4;
+                litSize  = ((istart[0] & 15) << 10) + (istart[1] << 2) + (istart[2] >> 6);
+                litCSize = ((istart[2] & 63) <<  8) + istart[3];
+                break;
+            case 3:
+                /* 2 - 2 - 18 - 18 */
+                lhSize=5;
+                litSize  = ((istart[0] & 15) << 14) + (istart[1] << 6) + (istart[2] >> 2);
+                litCSize = ((istart[2] &  3) << 16) + (istart[3] << 8) + istart[4];
+                break;
+            }
+            if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
+            if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
+
+            if (HUFv05_isError(singleStream ?
+                            HUFv05_decompress1X2(dctx->litBuffer, litSize, istart+lhSize, litCSize) :
+                            HUFv05_decompress   (dctx->litBuffer, litSize, istart+lhSize, litCSize) ))
+                return ERROR(corruption_detected);
+
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litSize = litSize;
+            memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+            return litCSize + lhSize;
+        }
+    case IS_PCH:
+        {
+            size_t errorCode;
+            size_t litSize, litCSize;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            if (lhSize != 1)  /* only case supported for now : small litSize, single stream */
+                return ERROR(corruption_detected);
+            if (!dctx->flagStaticTables)
+                return ERROR(dictionary_corrupted);
+
+            /* 2 - 2 - 10 - 10 */
+            lhSize=3;
+            litSize  = ((istart[0] & 15) << 6) + (istart[1] >> 2);
+            litCSize = ((istart[1] &  3) << 8) + istart[2];
+            if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
+
+            errorCode = HUFv05_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTableX4);
+            if (HUFv05_isError(errorCode)) return ERROR(corruption_detected);
+
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litSize = litSize;
+            memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+            return litCSize + lhSize;
+        }
+    case IS_RAW:
+        {
+            size_t litSize;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            switch(lhSize)
+            {
+            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
+                lhSize=1;
+                litSize = istart[0] & 31;
+                break;
+            case 2:
+                litSize = ((istart[0] & 15) << 8) + istart[1];
+                break;
+            case 3:
+                litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2];
+                break;
+            }
+
+            if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
+                if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
+                memcpy(dctx->litBuffer, istart+lhSize, litSize);
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litSize = litSize;
+                memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+                return lhSize+litSize;
+            }
+            /* direct reference into compressed stream */
+            dctx->litPtr = istart+lhSize;
+            dctx->litSize = litSize;
+            return lhSize+litSize;
+        }
+    case IS_RLE:
+        {
+            size_t litSize;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            switch(lhSize)
+            {
+            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
+                lhSize = 1;
+                litSize = istart[0] & 31;
+                break;
+            case 2:
+                litSize = ((istart[0] & 15) << 8) + istart[1];
+                break;
+            case 3:
+                litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2];
+                if (srcSize<4) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
+                break;
+            }
+            if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
+            memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litSize = litSize;
+            return lhSize+1;
+        }
+    default:
+        return ERROR(corruption_detected);   /* impossible */
+    }
+}
+
+
+size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
+                         FSEv05_DTable* DTableLL, FSEv05_DTable* DTableML, FSEv05_DTable* DTableOffb,
+                         const void* src, size_t srcSize, U32 flagStaticTable)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    const BYTE* const iend = istart + srcSize;
+    U32 LLtype, Offtype, MLtype;
+    U32 LLlog, Offlog, MLlog;
+    size_t dumpsLength;
+
+    /* check */
+    if (srcSize < MIN_SEQUENCES_SIZE)
+        return ERROR(srcSize_wrong);
+
+    /* SeqHead */
+    *nbSeq = *ip++;
+    if (*nbSeq==0) return 1;
+    if (*nbSeq >= 128) {
+        if (ip >= iend) return ERROR(srcSize_wrong);
+        *nbSeq = ((nbSeq[0]-128)<<8) + *ip++;
+    }
+
+    if (ip >= iend) return ERROR(srcSize_wrong);
+    LLtype  = *ip >> 6;
+    Offtype = (*ip >> 4) & 3;
+    MLtype  = (*ip >> 2) & 3;
+    if (*ip & 2) {
+        if (ip+3 > iend) return ERROR(srcSize_wrong);
+        dumpsLength  = ip[2];
+        dumpsLength += ip[1] << 8;
+        ip += 3;
+    } else {
+        if (ip+2 > iend) return ERROR(srcSize_wrong);
+        dumpsLength  = ip[1];
+        dumpsLength += (ip[0] & 1) << 8;
+        ip += 2;
+    }
+    *dumpsPtr = ip;
+    ip += dumpsLength;
+    *dumpsLengthPtr = dumpsLength;
+
+    /* check */
+    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
+
+    /* sequences */
+    {
+        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL >= MaxOff */
+        size_t headerSize;
+
+        /* Build DTables */
+        switch(LLtype)
+        {
+        case FSEv05_ENCODING_RLE :
+            LLlog = 0;
+            FSEv05_buildDTable_rle(DTableLL, *ip++);
+            break;
+        case FSEv05_ENCODING_RAW :
+            LLlog = LLbits;
+            FSEv05_buildDTable_raw(DTableLL, LLbits);
+            break;
+        case FSEv05_ENCODING_STATIC:
+            if (!flagStaticTable) return ERROR(corruption_detected);
+            break;
+        case FSEv05_ENCODING_DYNAMIC :
+        default :   /* impossible */
+            {   U32 max = MaxLL;
+                headerSize = FSEv05_readNCount(norm, &max, &LLlog, ip, iend-ip);
+                if (FSEv05_isError(headerSize)) return ERROR(GENERIC);
+                if (LLlog > LLFSEv05Log) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSEv05_buildDTable(DTableLL, norm, max, LLlog);
+        }   }
+
+        switch(Offtype)
+        {
+        case FSEv05_ENCODING_RLE :
+            Offlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong);   /* min : "raw", hence no header, but at least xxLog bits */
+            FSEv05_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */
+            break;
+        case FSEv05_ENCODING_RAW :
+            Offlog = Offbits;
+            FSEv05_buildDTable_raw(DTableOffb, Offbits);
+            break;
+        case FSEv05_ENCODING_STATIC:
+            if (!flagStaticTable) return ERROR(corruption_detected);
+            break;
+        case FSEv05_ENCODING_DYNAMIC :
+        default :   /* impossible */
+            {   U32 max = MaxOff;
+                headerSize = FSEv05_readNCount(norm, &max, &Offlog, ip, iend-ip);
+                if (FSEv05_isError(headerSize)) return ERROR(GENERIC);
+                if (Offlog > OffFSEv05Log) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSEv05_buildDTable(DTableOffb, norm, max, Offlog);
+        }   }
+
+        switch(MLtype)
+        {
+        case FSEv05_ENCODING_RLE :
+            MLlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
+            FSEv05_buildDTable_rle(DTableML, *ip++);
+            break;
+        case FSEv05_ENCODING_RAW :
+            MLlog = MLbits;
+            FSEv05_buildDTable_raw(DTableML, MLbits);
+            break;
+        case FSEv05_ENCODING_STATIC:
+            if (!flagStaticTable) return ERROR(corruption_detected);
+            break;
+        case FSEv05_ENCODING_DYNAMIC :
+        default :   /* impossible */
+            {   U32 max = MaxML;
+                headerSize = FSEv05_readNCount(norm, &max, &MLlog, ip, iend-ip);
+                if (FSEv05_isError(headerSize)) return ERROR(GENERIC);
+                if (MLlog > MLFSEv05Log) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSEv05_buildDTable(DTableML, norm, max, MLlog);
+    }   }   }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t matchLength;
+    size_t offset;
+} seq_t;
+
+typedef struct {
+    BITv05_DStream_t DStream;
+    FSEv05_DState_t stateLL;
+    FSEv05_DState_t stateOffb;
+    FSEv05_DState_t stateML;
+    size_t prevOffset;
+    const BYTE* dumps;
+    const BYTE* dumpsEnd;
+} seqState_t;
+
+
+
+static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
+{
+    size_t litLength;
+    size_t prevOffset;
+    size_t offset;
+    size_t matchLength;
+    const BYTE* dumps = seqState->dumps;
+    const BYTE* const de = seqState->dumpsEnd;
+
+    /* Literal length */
+    litLength = FSEv05_peakSymbol(&(seqState->stateLL));
+    prevOffset = litLength ? seq->offset : seqState->prevOffset;
+    if (litLength == MaxLL) {
+        U32 add = *dumps++;
+        if (add < 255) litLength += add;
+        else {
+            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no risk : dumps is always followed by seq tables > 1 byte */
+            if (litLength&1) litLength>>=1, dumps += 3;
+            else litLength = (U16)(litLength)>>1, dumps += 2;
+        }
+        if (dumps > de) { litLength = MaxLL+255; }  /* late correction, to avoid using uninitialized memory */
+        if (dumps >= de) { dumps = de-1; }  /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+
+    /* Offset */
+    {
+        static const U32 offsetPrefix[MaxOff+1] = {
+                1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
+                512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
+                524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
+        U32 offsetCode = FSEv05_peakSymbol(&(seqState->stateOffb));   /* <= maxOff, by table construction */
+        U32 nbBits = offsetCode - 1;
+        if (offsetCode==0) nbBits = 0;   /* cmove */
+        offset = offsetPrefix[offsetCode] + BITv05_readBits(&(seqState->DStream), nbBits);
+        if (MEM_32bits()) BITv05_reloadDStream(&(seqState->DStream));
+        if (offsetCode==0) offset = prevOffset;   /* repcode, cmove */
+        if (offsetCode | !litLength) seqState->prevOffset = seq->offset;   /* cmove */
+        FSEv05_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));    /* update */
+    }
+
+    /* Literal length update */
+    FSEv05_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));   /* update */
+    if (MEM_32bits()) BITv05_reloadDStream(&(seqState->DStream));
+
+    /* MatchLength */
+    matchLength = FSEv05_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
+    if (matchLength == MaxML) {
+        U32 add = *dumps++;
+        if (add < 255) matchLength += add;
+        else {
+            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            if (matchLength&1) matchLength>>=1, dumps += 3;
+            else matchLength = (U16)(matchLength)>>1, dumps += 2;
+        }
+        if (dumps > de) { matchLength = MaxML+255; }  /* late correction, to avoid using uninitialized memory */
+        if (dumps >= de) { dumps = de-1; }  /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+    matchLength += MINMATCH;
+
+    /* save result */
+    seq->litLength = litLength;
+    seq->offset = offset;
+    seq->matchLength = matchLength;
+    seqState->dumps = dumps;
+
+#if 0   /* debug */
+    {
+        static U64 totalDecoded = 0;
+        printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n",
+           (U32)(totalDecoded), (U32)litLength, (U32)matchLength, (U32)offset);
+        totalDecoded += litLength + matchLength;
+    }
+#endif
+}
+
+
+static size_t ZSTDv05_execSequence(BYTE* op,
+                                BYTE* const oend, seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit,
+                                const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
+{
+    static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
+    static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* substracted */
+    BYTE* const oLitEnd = op + sequence.litLength;
+    const size_t sequenceLength = sequence.litLength + sequence.matchLength;
+    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_8 = oend-8;
+    const BYTE* const litEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+
+    /* check */
+    if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */
+    if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
+    if (litEnd > litLimit) return ERROR(corruption_detected);   /* risk read beyond lit buffer */
+
+    /* copy Literals */
+    ZSTDv05_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
+    op = oLitEnd;
+    *litPtr = litEnd;   /* update for next sequence */
+
+    /* copy Match */
+    if (sequence.offset > (size_t)(oLitEnd - base)) {
+        /* offset beyond prefix */
+        if (sequence.offset > (size_t)(oLitEnd - vBase))
+            return ERROR(corruption_detected);
+        match = dictEnd - (base-match);
+        if (match + sequence.matchLength <= dictEnd) {
+            memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {
+            size_t length1 = dictEnd - match;
+            memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = base;
+            if (op > oend_8 || sequence.matchLength < MINMATCH) {
+              while (op < oMatchEnd) *op++ = *match++;
+              return sequenceLength;
+            }
+    }   }
+    /* Requirement: op <= oend_8 */
+
+    /* match within prefix */
+    if (sequence.offset < 8) {
+        /* close range match, overlap */
+        const int sub2 = dec64table[sequence.offset];
+        op[0] = match[0];
+        op[1] = match[1];
+        op[2] = match[2];
+        op[3] = match[3];
+        match += dec32table[sequence.offset];
+        ZSTDv05_copy4(op+4, match);
+        match -= sub2;
+    } else {
+        ZSTDv05_copy8(op, match);
+    }
+    op += 8; match += 8;
+
+    if (oMatchEnd > oend-(16-MINMATCH)) {
+        if (op < oend_8) {
+            ZSTDv05_wildcopy(op, match, oend_8 - op);
+            match += oend_8 - op;
+            op = oend_8;
+        }
+        while (op < oMatchEnd)
+            *op++ = *match++;
+    } else {
+        ZSTDv05_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
+    }
+    return sequenceLength;
+}
+
+
+static size_t ZSTDv05_decompressSequences(
+                               ZSTDv05_DCtx* dctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize)
+{
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t errorCode, dumpsLength;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    int nbSeq;
+    const BYTE* dumps;
+    U32* DTableLL = dctx->LLTable;
+    U32* DTableML = dctx->MLTable;
+    U32* DTableOffb = dctx->OffTable;
+    const BYTE* const base = (const BYTE*) (dctx->base);
+    const BYTE* const vBase = (const BYTE*) (dctx->vBase);
+    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+
+    /* Build Decoding Tables */
+    errorCode = ZSTDv05_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
+                                      DTableLL, DTableML, DTableOffb,
+                                      ip, seqSize, dctx->flagStaticTables);
+    if (ZSTDv05_isError(errorCode)) return errorCode;
+    ip += errorCode;
+
+    /* Regen sequences */
+    if (nbSeq) {
+        seq_t sequence;
+        seqState_t seqState;
+
+        memset(&sequence, 0, sizeof(sequence));
+        sequence.offset = REPCODE_STARTVALUE;
+        seqState.dumps = dumps;
+        seqState.dumpsEnd = dumps + dumpsLength;
+        seqState.prevOffset = REPCODE_STARTVALUE;
+        errorCode = BITv05_initDStream(&(seqState.DStream), ip, iend-ip);
+        if (ERR_isError(errorCode)) return ERROR(corruption_detected);
+        FSEv05_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
+        FSEv05_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
+        FSEv05_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
+
+        for ( ; (BITv05_reloadDStream(&(seqState.DStream)) <= BITv05_DStream_completed) && nbSeq ; ) {
+            size_t oneSeqSize;
+            nbSeq--;
+            ZSTDv05_decodeSequence(&sequence, &seqState);
+            oneSeqSize = ZSTDv05_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd);
+            if (ZSTDv05_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* check if reached exact end */
+        if (nbSeq) return ERROR(corruption_detected);
+    }
+
+    /* last literal segment */
+    {
+        size_t lastLLSize = litEnd - litPtr;
+        if (litPtr > litEnd) return ERROR(corruption_detected);   /* too many literals already used */
+        if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
+        memcpy(op, litPtr, lastLLSize);
+        op += lastLLSize;
+    }
+
+    return op-ostart;
+}
+
+
+static void ZSTDv05_checkContinuity(ZSTDv05_DCtx* dctx, const void* dst)
+{
+    if (dst != dctx->previousDstEnd) {   /* not contiguous */
+        dctx->dictEnd = dctx->previousDstEnd;
+        dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
+        dctx->base = dst;
+        dctx->previousDstEnd = dst;
+    }
+}
+
+
+static size_t ZSTDv05_decompressBlock_internal(ZSTDv05_DCtx* dctx,
+                            void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{   /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+    size_t litCSize;
+
+    if (srcSize >= BLOCKSIZE) return ERROR(srcSize_wrong);
+
+    /* Decode literals sub-block */
+    litCSize = ZSTDv05_decodeLiteralsBlock(dctx, src, srcSize);
+    if (ZSTDv05_isError(litCSize)) return litCSize;
+    ip += litCSize;
+    srcSize -= litCSize;
+
+    return ZSTDv05_decompressSequences(dctx, dst, dstCapacity, ip, srcSize);
+}
+
+
+size_t ZSTDv05_decompressBlock(ZSTDv05_DCtx* dctx,
+                            void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{
+    ZSTDv05_checkContinuity(dctx, dst);
+    return ZSTDv05_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize);
+}
+
+
+/*! ZSTDv05_decompress_continueDCtx
+*   dctx must have been properly initialized */
+static size_t ZSTDv05_decompress_continueDCtx(ZSTDv05_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                                 const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* iend = ip + srcSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t remainingSize = srcSize;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    {
+        size_t frameHeaderSize;
+        if (srcSize < ZSTDv05_frameHeaderSize_min+ZSTDv05_blockHeaderSize) return ERROR(srcSize_wrong);
+        frameHeaderSize = ZSTDv05_decodeFrameHeader_Part1(dctx, src, ZSTDv05_frameHeaderSize_min);
+        if (ZSTDv05_isError(frameHeaderSize)) return frameHeaderSize;
+        if (srcSize < frameHeaderSize+ZSTDv05_blockHeaderSize) return ERROR(srcSize_wrong);
+        ip += frameHeaderSize; remainingSize -= frameHeaderSize;
+        frameHeaderSize = ZSTDv05_decodeFrameHeader_Part2(dctx, src, frameHeaderSize);
+        if (ZSTDv05_isError(frameHeaderSize)) return frameHeaderSize;
+    }
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t decodedSize=0;
+        size_t cBlockSize = ZSTDv05_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTDv05_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTDv05_blockHeaderSize;
+        remainingSize -= ZSTDv05_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            decodedSize = ZSTDv05_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize);
+            break;
+        case bt_raw :
+            decodedSize = ZSTDv05_copyRawBlock(op, oend-op, ip, cBlockSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet supported */
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return ERROR(srcSize_wrong);
+            break;
+        default:
+            return ERROR(GENERIC);   /* impossible */
+        }
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        if (ZSTDv05_isError(decodedSize)) return decodedSize;
+        op += decodedSize;
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return op-ostart;
+}
+
+
+size_t ZSTDv05_decompress_usingPreparedDCtx(ZSTDv05_DCtx* dctx, const ZSTDv05_DCtx* refDCtx,
+                                         void* dst, size_t maxDstSize,
+                                   const void* src, size_t srcSize)
+{
+    ZSTDv05_copyDCtx(dctx, refDCtx);
+    ZSTDv05_checkContinuity(dctx, dst);
+    return ZSTDv05_decompress_continueDCtx(dctx, dst, maxDstSize, src, srcSize);
+}
+
+
+size_t ZSTDv05_decompress_usingDict(ZSTDv05_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                                 const void* src, size_t srcSize,
+                                 const void* dict, size_t dictSize)
+{
+    ZSTDv05_decompressBegin_usingDict(dctx, dict, dictSize);
+    ZSTDv05_checkContinuity(dctx, dst);
+    return ZSTDv05_decompress_continueDCtx(dctx, dst, maxDstSize, src, srcSize);
+}
+
+
+size_t ZSTDv05_decompressDCtx(ZSTDv05_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return ZSTDv05_decompress_usingDict(dctx, dst, maxDstSize, src, srcSize, NULL, 0);
+}
+
+size_t ZSTDv05_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+#if defined(ZSTDv05_HEAPMODE) && (ZSTDv05_HEAPMODE==1)
+    size_t regenSize;
+    ZSTDv05_DCtx* dctx = ZSTDv05_createDCtx();
+    if (dctx==NULL) return ERROR(memory_allocation);
+    regenSize = ZSTDv05_decompressDCtx(dctx, dst, maxDstSize, src, srcSize);
+    ZSTDv05_freeDCtx(dctx);
+    return regenSize;
+#else
+    ZSTDv05_DCtx dctx;
+    return ZSTDv05_decompressDCtx(&dctx, dst, maxDstSize, src, srcSize);
+#endif
+}
+
+size_t ZSTDv05_findFrameCompressedSize(const void *src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    size_t remainingSize = srcSize;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    if (srcSize < ZSTDv05_frameHeaderSize_min) return ERROR(srcSize_wrong);
+    if (MEM_readLE32(src) != ZSTDv05_MAGICNUMBER) return ERROR(prefix_unknown);
+    ip += ZSTDv05_frameHeaderSize_min; remainingSize -= ZSTDv05_frameHeaderSize_min;
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t cBlockSize = ZSTDv05_getcBlockSize(ip, remainingSize, &blockProperties);
+        if (ZSTDv05_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTDv05_blockHeaderSize;
+        remainingSize -= ZSTDv05_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return ip - (const BYTE*)src;
+}
+
+/* ******************************
+*  Streaming Decompression API
+********************************/
+size_t ZSTDv05_nextSrcSizeToDecompress(ZSTDv05_DCtx* dctx)
+{
+    return dctx->expected;
+}
+
+size_t ZSTDv05_decompressContinue(ZSTDv05_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    /* Sanity check */
+    if (srcSize != dctx->expected) return ERROR(srcSize_wrong);
+    ZSTDv05_checkContinuity(dctx, dst);
+
+    /* Decompress : frame header; part 1 */
+    switch (dctx->stage)
+    {
+    case ZSTDv05ds_getFrameHeaderSize :
+        /* get frame header size */
+        if (srcSize != ZSTDv05_frameHeaderSize_min) return ERROR(srcSize_wrong);   /* impossible */
+        dctx->headerSize = ZSTDv05_decodeFrameHeader_Part1(dctx, src, ZSTDv05_frameHeaderSize_min);
+        if (ZSTDv05_isError(dctx->headerSize)) return dctx->headerSize;
+        memcpy(dctx->headerBuffer, src, ZSTDv05_frameHeaderSize_min);
+        if (dctx->headerSize > ZSTDv05_frameHeaderSize_min) return ERROR(GENERIC); /* should never happen */
+        dctx->expected = 0;   /* not necessary to copy more */
+        /* fallthrough */
+    case ZSTDv05ds_decodeFrameHeader:
+        /* get frame header */
+        {   size_t const result = ZSTDv05_decodeFrameHeader_Part2(dctx, dctx->headerBuffer, dctx->headerSize);
+            if (ZSTDv05_isError(result)) return result;
+            dctx->expected = ZSTDv05_blockHeaderSize;
+            dctx->stage = ZSTDv05ds_decodeBlockHeader;
+            return 0;
+        }
+    case ZSTDv05ds_decodeBlockHeader:
+        {
+            /* Decode block header */
+            blockProperties_t bp;
+            size_t blockSize = ZSTDv05_getcBlockSize(src, ZSTDv05_blockHeaderSize, &bp);
+            if (ZSTDv05_isError(blockSize)) return blockSize;
+            if (bp.blockType == bt_end) {
+                dctx->expected = 0;
+                dctx->stage = ZSTDv05ds_getFrameHeaderSize;
+            }
+            else {
+                dctx->expected = blockSize;
+                dctx->bType = bp.blockType;
+                dctx->stage = ZSTDv05ds_decompressBlock;
+            }
+            return 0;
+        }
+    case ZSTDv05ds_decompressBlock:
+        {
+            /* Decompress : block content */
+            size_t rSize;
+            switch(dctx->bType)
+            {
+            case bt_compressed:
+                rSize = ZSTDv05_decompressBlock_internal(dctx, dst, maxDstSize, src, srcSize);
+                break;
+            case bt_raw :
+                rSize = ZSTDv05_copyRawBlock(dst, maxDstSize, src, srcSize);
+                break;
+            case bt_rle :
+                return ERROR(GENERIC);   /* not yet handled */
+                break;
+            case bt_end :   /* should never happen (filtered at phase 1) */
+                rSize = 0;
+                break;
+            default:
+                return ERROR(GENERIC);   /* impossible */
+            }
+            dctx->stage = ZSTDv05ds_decodeBlockHeader;
+            dctx->expected = ZSTDv05_blockHeaderSize;
+            dctx->previousDstEnd = (char*)dst + rSize;
+            return rSize;
+        }
+    default:
+        return ERROR(GENERIC);   /* impossible */
+    }
+}
+
+
+static void ZSTDv05_refDictContent(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    dctx->dictEnd = dctx->previousDstEnd;
+    dctx->vBase = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
+    dctx->base = dict;
+    dctx->previousDstEnd = (const char*)dict + dictSize;
+}
+
+static size_t ZSTDv05_loadEntropy(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    size_t hSize, offcodeHeaderSize, matchlengthHeaderSize, errorCode, litlengthHeaderSize;
+    short offcodeNCount[MaxOff+1];
+    U32 offcodeMaxValue=MaxOff, offcodeLog;
+    short matchlengthNCount[MaxML+1];
+    unsigned matchlengthMaxValue = MaxML, matchlengthLog;
+    short litlengthNCount[MaxLL+1];
+    unsigned litlengthMaxValue = MaxLL, litlengthLog;
+
+    hSize = HUFv05_readDTableX4(dctx->hufTableX4, dict, dictSize);
+    if (HUFv05_isError(hSize)) return ERROR(dictionary_corrupted);
+    dict = (const char*)dict + hSize;
+    dictSize -= hSize;
+
+    offcodeHeaderSize = FSEv05_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize);
+    if (FSEv05_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
+    if (offcodeLog > OffFSEv05Log) return ERROR(dictionary_corrupted);
+    errorCode = FSEv05_buildDTable(dctx->OffTable, offcodeNCount, offcodeMaxValue, offcodeLog);
+    if (FSEv05_isError(errorCode)) return ERROR(dictionary_corrupted);
+    dict = (const char*)dict + offcodeHeaderSize;
+    dictSize -= offcodeHeaderSize;
+
+    matchlengthHeaderSize = FSEv05_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize);
+    if (FSEv05_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
+    if (matchlengthLog > MLFSEv05Log) return ERROR(dictionary_corrupted);
+    errorCode = FSEv05_buildDTable(dctx->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog);
+    if (FSEv05_isError(errorCode)) return ERROR(dictionary_corrupted);
+    dict = (const char*)dict + matchlengthHeaderSize;
+    dictSize -= matchlengthHeaderSize;
+
+    litlengthHeaderSize = FSEv05_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize);
+    if (litlengthLog > LLFSEv05Log) return ERROR(dictionary_corrupted);
+    if (FSEv05_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
+    errorCode = FSEv05_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog);
+    if (FSEv05_isError(errorCode)) return ERROR(dictionary_corrupted);
+
+    dctx->flagStaticTables = 1;
+    return hSize + offcodeHeaderSize + matchlengthHeaderSize + litlengthHeaderSize;
+}
+
+static size_t ZSTDv05_decompress_insertDictionary(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    size_t eSize;
+    U32 magic = MEM_readLE32(dict);
+    if (magic != ZSTDv05_DICT_MAGIC) {
+        /* pure content mode */
+        ZSTDv05_refDictContent(dctx, dict, dictSize);
+        return 0;
+    }
+    /* load entropy tables */
+    dict = (const char*)dict + 4;
+    dictSize -= 4;
+    eSize = ZSTDv05_loadEntropy(dctx, dict, dictSize);
+    if (ZSTDv05_isError(eSize)) return ERROR(dictionary_corrupted);
+
+    /* reference dictionary content */
+    dict = (const char*)dict + eSize;
+    dictSize -= eSize;
+    ZSTDv05_refDictContent(dctx, dict, dictSize);
+
+    return 0;
+}
+
+
+size_t ZSTDv05_decompressBegin_usingDict(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    size_t errorCode;
+    errorCode = ZSTDv05_decompressBegin(dctx);
+    if (ZSTDv05_isError(errorCode)) return errorCode;
+
+    if (dict && dictSize) {
+        errorCode = ZSTDv05_decompress_insertDictionary(dctx, dict, dictSize);
+        if (ZSTDv05_isError(errorCode)) return ERROR(dictionary_corrupted);
+    }
+
+    return 0;
+}
+
+/*
+    Buffered version of Zstd compression library
+    Copyright (C) 2015-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* The objects defined into this file should be considered experimental.
+ * They are not labelled stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risk of future changes.
+ */
+
+
+
+/* *************************************
+*  Constants
+***************************************/
+static size_t ZBUFFv05_blockHeaderSize = 3;
+
+
+
+/* *** Compression *** */
+
+static size_t ZBUFFv05_limitCopy(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    size_t length = MIN(maxDstSize, srcSize);
+    memcpy(dst, src, length);
+    return length;
+}
+
+
+
+
+/** ************************************************
+*  Streaming decompression
+*
+*  A ZBUFFv05_DCtx object is required to track streaming operation.
+*  Use ZBUFFv05_createDCtx() and ZBUFFv05_freeDCtx() to create/release resources.
+*  Use ZBUFFv05_decompressInit() to start a new decompression operation.
+*  ZBUFFv05_DCtx objects can be reused multiple times.
+*
+*  Use ZBUFFv05_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *maxDstSizePtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input.
+*  The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst .
+*  return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
+*            or 0 when a frame is completely decoded
+*            or an error code, which can be tested using ZBUFFv05_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory)
+*  output : 128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded.
+*  input : just follow indications from ZBUFFv05_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* **************************************************/
+
+typedef enum { ZBUFFv05ds_init, ZBUFFv05ds_readHeader, ZBUFFv05ds_loadHeader, ZBUFFv05ds_decodeHeader,
+               ZBUFFv05ds_read, ZBUFFv05ds_load, ZBUFFv05ds_flush } ZBUFFv05_dStage;
+
+/* *** Resource management *** */
+
+#define ZSTDv05_frameHeaderSize_max 5   /* too magical, should come from reference */
+struct ZBUFFv05_DCtx_s {
+    ZSTDv05_DCtx* zc;
+    ZSTDv05_parameters params;
+    char* inBuff;
+    size_t inBuffSize;
+    size_t inPos;
+    char* outBuff;
+    size_t outBuffSize;
+    size_t outStart;
+    size_t outEnd;
+    size_t hPos;
+    ZBUFFv05_dStage stage;
+    unsigned char headerBuffer[ZSTDv05_frameHeaderSize_max];
+};   /* typedef'd to ZBUFFv05_DCtx within "zstd_buffered.h" */
+
+
+ZBUFFv05_DCtx* ZBUFFv05_createDCtx(void)
+{
+    ZBUFFv05_DCtx* zbc = (ZBUFFv05_DCtx*)malloc(sizeof(ZBUFFv05_DCtx));
+    if (zbc==NULL) return NULL;
+    memset(zbc, 0, sizeof(*zbc));
+    zbc->zc = ZSTDv05_createDCtx();
+    zbc->stage = ZBUFFv05ds_init;
+    return zbc;
+}
+
+size_t ZBUFFv05_freeDCtx(ZBUFFv05_DCtx* zbc)
+{
+    if (zbc==NULL) return 0;   /* support free on null */
+    ZSTDv05_freeDCtx(zbc->zc);
+    free(zbc->inBuff);
+    free(zbc->outBuff);
+    free(zbc);
+    return 0;
+}
+
+
+/* *** Initialization *** */
+
+size_t ZBUFFv05_decompressInitDictionary(ZBUFFv05_DCtx* zbc, const void* dict, size_t dictSize)
+{
+    zbc->stage = ZBUFFv05ds_readHeader;
+    zbc->hPos = zbc->inPos = zbc->outStart = zbc->outEnd = 0;
+    return ZSTDv05_decompressBegin_usingDict(zbc->zc, dict, dictSize);
+}
+
+size_t ZBUFFv05_decompressInit(ZBUFFv05_DCtx* zbc)
+{
+    return ZBUFFv05_decompressInitDictionary(zbc, NULL, 0);
+}
+
+
+/* *** Decompression *** */
+
+size_t ZBUFFv05_decompressContinue(ZBUFFv05_DCtx* zbc, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr)
+{
+    const char* const istart = (const char*)src;
+    const char* ip = istart;
+    const char* const iend = istart + *srcSizePtr;
+    char* const ostart = (char*)dst;
+    char* op = ostart;
+    char* const oend = ostart + *maxDstSizePtr;
+    U32 notDone = 1;
+
+    while (notDone) {
+        switch(zbc->stage)
+        {
+        case ZBUFFv05ds_init :
+            return ERROR(init_missing);
+
+        case ZBUFFv05ds_readHeader :
+            /* read header from src */
+            {
+                size_t headerSize = ZSTDv05_getFrameParams(&(zbc->params), src, *srcSizePtr);
+                if (ZSTDv05_isError(headerSize)) return headerSize;
+                if (headerSize) {
+                    /* not enough input to decode header : tell how many bytes would be necessary */
+                    memcpy(zbc->headerBuffer+zbc->hPos, src, *srcSizePtr);
+                    zbc->hPos += *srcSizePtr;
+                    *maxDstSizePtr = 0;
+                    zbc->stage = ZBUFFv05ds_loadHeader;
+                    return headerSize - zbc->hPos;
+                }
+                zbc->stage = ZBUFFv05ds_decodeHeader;
+                break;
+            }
+	    /* fall-through */
+        case ZBUFFv05ds_loadHeader:
+            /* complete header from src */
+            {
+                size_t headerSize = ZBUFFv05_limitCopy(
+                    zbc->headerBuffer + zbc->hPos, ZSTDv05_frameHeaderSize_max - zbc->hPos,
+                    src, *srcSizePtr);
+                zbc->hPos += headerSize;
+                ip += headerSize;
+                headerSize = ZSTDv05_getFrameParams(&(zbc->params), zbc->headerBuffer, zbc->hPos);
+                if (ZSTDv05_isError(headerSize)) return headerSize;
+                if (headerSize) {
+                    /* not enough input to decode header : tell how many bytes would be necessary */
+                    *maxDstSizePtr = 0;
+                    return headerSize - zbc->hPos;
+                }
+                // zbc->stage = ZBUFFv05ds_decodeHeader; break;   /* useless : stage follows */
+            }
+	    /* fall-through */
+        case ZBUFFv05ds_decodeHeader:
+                /* apply header to create / resize buffers */
+                {
+                    size_t neededOutSize = (size_t)1 << zbc->params.windowLog;
+                    size_t neededInSize = BLOCKSIZE;   /* a block is never > BLOCKSIZE */
+                    if (zbc->inBuffSize < neededInSize) {
+                        free(zbc->inBuff);
+                        zbc->inBuffSize = neededInSize;
+                        zbc->inBuff = (char*)malloc(neededInSize);
+                        if (zbc->inBuff == NULL) return ERROR(memory_allocation);
+                    }
+                    if (zbc->outBuffSize < neededOutSize) {
+                        free(zbc->outBuff);
+                        zbc->outBuffSize = neededOutSize;
+                        zbc->outBuff = (char*)malloc(neededOutSize);
+                        if (zbc->outBuff == NULL) return ERROR(memory_allocation);
+                }   }
+                if (zbc->hPos) {
+                    /* some data already loaded into headerBuffer : transfer into inBuff */
+                    memcpy(zbc->inBuff, zbc->headerBuffer, zbc->hPos);
+                    zbc->inPos = zbc->hPos;
+                    zbc->hPos = 0;
+                    zbc->stage = ZBUFFv05ds_load;
+                    break;
+                }
+                zbc->stage = ZBUFFv05ds_read;
+		/* fall-through */
+        case ZBUFFv05ds_read:
+            {
+                size_t neededInSize = ZSTDv05_nextSrcSizeToDecompress(zbc->zc);
+                if (neededInSize==0) {  /* end of frame */
+                    zbc->stage = ZBUFFv05ds_init;
+                    notDone = 0;
+                    break;
+                }
+                if ((size_t)(iend-ip) >= neededInSize) {
+                    /* directly decode from src */
+                    size_t decodedSize = ZSTDv05_decompressContinue(zbc->zc,
+                        zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart,
+                        ip, neededInSize);
+                    if (ZSTDv05_isError(decodedSize)) return decodedSize;
+                    ip += neededInSize;
+                    if (!decodedSize) break;   /* this was just a header */
+                    zbc->outEnd = zbc->outStart +  decodedSize;
+                    zbc->stage = ZBUFFv05ds_flush;
+                    break;
+                }
+                if (ip==iend) { notDone = 0; break; }   /* no more input */
+                zbc->stage = ZBUFFv05ds_load;
+            }
+	    /* fall-through */
+        case ZBUFFv05ds_load:
+            {
+                size_t neededInSize = ZSTDv05_nextSrcSizeToDecompress(zbc->zc);
+                size_t toLoad = neededInSize - zbc->inPos;   /* should always be <= remaining space within inBuff */
+                size_t loadedSize;
+                if (toLoad > zbc->inBuffSize - zbc->inPos) return ERROR(corruption_detected);   /* should never happen */
+                loadedSize = ZBUFFv05_limitCopy(zbc->inBuff + zbc->inPos, toLoad, ip, iend-ip);
+                ip += loadedSize;
+                zbc->inPos += loadedSize;
+                if (loadedSize < toLoad) { notDone = 0; break; }   /* not enough input, wait for more */
+                {
+                    size_t decodedSize = ZSTDv05_decompressContinue(zbc->zc,
+                        zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart,
+                        zbc->inBuff, neededInSize);
+                    if (ZSTDv05_isError(decodedSize)) return decodedSize;
+                    zbc->inPos = 0;   /* input is consumed */
+                    if (!decodedSize) { zbc->stage = ZBUFFv05ds_read; break; }   /* this was just a header */
+                    zbc->outEnd = zbc->outStart +  decodedSize;
+                    zbc->stage = ZBUFFv05ds_flush;
+                    // break; /* ZBUFFv05ds_flush follows */
+                }
+	    }
+	    /* fall-through */
+        case ZBUFFv05ds_flush:
+            {
+                size_t toFlushSize = zbc->outEnd - zbc->outStart;
+                size_t flushedSize = ZBUFFv05_limitCopy(op, oend-op, zbc->outBuff + zbc->outStart, toFlushSize);
+                op += flushedSize;
+                zbc->outStart += flushedSize;
+                if (flushedSize == toFlushSize) {
+                    zbc->stage = ZBUFFv05ds_read;
+                    if (zbc->outStart + BLOCKSIZE > zbc->outBuffSize)
+                        zbc->outStart = zbc->outEnd = 0;
+                    break;
+                }
+                /* cannot flush everything */
+                notDone = 0;
+                break;
+            }
+        default: return ERROR(GENERIC);   /* impossible */
+    }   }
+
+    *srcSizePtr = ip-istart;
+    *maxDstSizePtr = op-ostart;
+
+    {   size_t nextSrcSizeHint = ZSTDv05_nextSrcSizeToDecompress(zbc->zc);
+        if (nextSrcSizeHint > ZBUFFv05_blockHeaderSize) nextSrcSizeHint+= ZBUFFv05_blockHeaderSize;   /* get next block header too */
+        nextSrcSizeHint -= zbc->inPos;   /* already loaded*/
+        return nextSrcSizeHint;
+    }
+}
+
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+unsigned ZBUFFv05_isError(size_t errorCode) { return ERR_isError(errorCode); }
+const char* ZBUFFv05_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
+
+size_t ZBUFFv05_recommendedDInSize(void)  { return BLOCKSIZE + ZBUFFv05_blockHeaderSize /* block header size*/ ; }
+size_t ZBUFFv05_recommendedDOutSize(void) { return BLOCKSIZE; }
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v05.h b/contrib/libzstd/include/zstd/legacy/zstd_v05.h
new file mode 100644
index 00000000000..8ce662fd9fc
--- /dev/null
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v05.h
@@ -0,0 +1,156 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+#ifndef ZSTDv05_H
+#define ZSTDv05_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include <stddef.h>   /* size_t */
+#include "mem.h"      /* U64, U32 */
+
+
+/* *************************************
+*  Simple functions
+***************************************/
+/*! ZSTDv05_decompress() :
+    `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail.
+    `dstCapacity` must be large enough, equal or larger than originalSize.
+    @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+              or an errorCode if it fails (which can be tested using ZSTDv05_isError()) */
+size_t ZSTDv05_decompress( void* dst, size_t dstCapacity,
+                     const void* src, size_t compressedSize);
+
+/**
+ZSTDv05_getFrameSrcSize() : get the source length of a ZSTD frame
+    compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+    return : the number of bytes that would be read to decompress this frame
+             or an errorCode if it fails (which can be tested using ZSTDv05_isError())
+*/
+size_t ZSTDv05_findFrameCompressedSize(const void* src, size_t compressedSize);
+
+/* *************************************
+*  Helper functions
+***************************************/
+/* Error Management */
+unsigned    ZSTDv05_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+const char* ZSTDv05_getErrorName(size_t code);     /*!< provides readable string for an error code */
+
+
+/* *************************************
+*  Explicit memory management
+***************************************/
+/** Decompression context */
+typedef struct ZSTDv05_DCtx_s ZSTDv05_DCtx;
+ZSTDv05_DCtx* ZSTDv05_createDCtx(void);
+size_t ZSTDv05_freeDCtx(ZSTDv05_DCtx* dctx);      /*!< @return : errorCode */
+
+/** ZSTDv05_decompressDCtx() :
+*   Same as ZSTDv05_decompress(), but requires an already allocated ZSTDv05_DCtx (see ZSTDv05_createDCtx()) */
+size_t ZSTDv05_decompressDCtx(ZSTDv05_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*-***********************
+*  Simple Dictionary API
+*************************/
+/*! ZSTDv05_decompress_usingDict() :
+*   Decompression using a pre-defined Dictionary content (see dictBuilder).
+*   Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted.
+*   Note : dict can be NULL, in which case, it's equivalent to ZSTDv05_decompressDCtx() */
+size_t ZSTDv05_decompress_usingDict(ZSTDv05_DCtx* dctx,
+                                            void* dst, size_t dstCapacity,
+                                      const void* src, size_t srcSize,
+                                      const void* dict,size_t dictSize);
+
+/*-************************
+*  Advanced Streaming API
+***************************/
+typedef enum { ZSTDv05_fast, ZSTDv05_greedy, ZSTDv05_lazy, ZSTDv05_lazy2, ZSTDv05_btlazy2, ZSTDv05_opt, ZSTDv05_btopt } ZSTDv05_strategy;
+typedef struct {
+    U64 srcSize;
+    U32 windowLog;     /* the only useful information to retrieve */
+    U32 contentLog; U32 hashLog; U32 searchLog; U32 searchLength; U32 targetLength; ZSTDv05_strategy strategy;
+} ZSTDv05_parameters;
+size_t ZSTDv05_getFrameParams(ZSTDv05_parameters* params, const void* src, size_t srcSize);
+
+size_t ZSTDv05_decompressBegin_usingDict(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize);
+void   ZSTDv05_copyDCtx(ZSTDv05_DCtx* dstDCtx, const ZSTDv05_DCtx* srcDCtx);
+size_t ZSTDv05_nextSrcSizeToDecompress(ZSTDv05_DCtx* dctx);
+size_t ZSTDv05_decompressContinue(ZSTDv05_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*-***********************
+*  ZBUFF API
+*************************/
+typedef struct ZBUFFv05_DCtx_s ZBUFFv05_DCtx;
+ZBUFFv05_DCtx* ZBUFFv05_createDCtx(void);
+size_t         ZBUFFv05_freeDCtx(ZBUFFv05_DCtx* dctx);
+
+size_t ZBUFFv05_decompressInit(ZBUFFv05_DCtx* dctx);
+size_t ZBUFFv05_decompressInitDictionary(ZBUFFv05_DCtx* dctx, const void* dict, size_t dictSize);
+
+size_t ZBUFFv05_decompressContinue(ZBUFFv05_DCtx* dctx,
+                                            void* dst, size_t* dstCapacityPtr,
+                                      const void* src, size_t* srcSizePtr);
+
+/*-***************************************************************************
+*  Streaming decompression
+*
+*  A ZBUFFv05_DCtx object is required to track streaming operations.
+*  Use ZBUFFv05_createDCtx() and ZBUFFv05_freeDCtx() to create/release resources.
+*  Use ZBUFFv05_decompressInit() to start a new decompression operation,
+*   or ZBUFFv05_decompressInitDictionary() if decompression requires a dictionary.
+*  Note that ZBUFFv05_DCtx objects can be reused multiple times.
+*
+*  Use ZBUFFv05_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *dstCapacityPtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of @dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change @dst.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency)
+*            or 0 when a frame is completely decoded
+*            or an error code, which can be tested using ZBUFFv05_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFFv05_recommendedDInSize() / ZBUFFv05_recommendedDOutSize()
+*  output : ZBUFFv05_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
+*  input  : ZBUFFv05_recommendedDInSize==128Kb+3; just follow indications from ZBUFFv05_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* *******************************************************************************/
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+unsigned ZBUFFv05_isError(size_t errorCode);
+const char* ZBUFFv05_getErrorName(size_t errorCode);
+
+/** Functions below provide recommended buffer sizes for Compression or Decompression operations.
+*   These sizes are just hints, and tend to offer better latency */
+size_t ZBUFFv05_recommendedDInSize(void);
+size_t ZBUFFv05_recommendedDOutSize(void);
+
+
+
+/*-*************************************
+*  Constants
+***************************************/
+#define ZSTDv05_MAGICNUMBER 0xFD2FB525   /* v0.5 */
+
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTDv0505_H */
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v06.c b/contrib/libzstd/include/zstd/legacy/zstd_v06.c
index d9e89f806fd..26f0929da6f 100644
--- a/contrib/libzstd/include/zstd/legacy/zstd_v06.c
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v06.c
@@ -537,7 +537,7 @@ static void ZSTDv06_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
 /*! ZSTDv06_wildcopy() :
 *   custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
 #define WILDCOPY_OVERLENGTH 8
-MEM_STATIC void ZSTDv06_wildcopy(void* dst, const void* src, size_t length)
+MEM_STATIC void ZSTDv06_wildcopy(void* dst, const void* src, ptrdiff_t length)
 {
     const BYTE* ip = (const BYTE*)src;
     BYTE* op = (BYTE*)dst;
@@ -910,13 +910,13 @@ MEM_STATIC size_t BITv06_initDStream(BITv06_DStream_t* bitD, const void* srcBuff
         bitD->bitContainer = *(const BYTE*)(bitD->start);
         switch(srcSize)
         {
-            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
-            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
-            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
-            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
-            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
-            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
-            default:;
+            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);/* fall-through */
+            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);/* fall-through */
+            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);/* fall-through */
+            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; /* fall-through */
+            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; /* fall-through */
+            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8; /* fall-through */
+            default: break;
         }
         { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
           if (lastByte == 0) return ERROR(GENERIC);   /* endMark not present */
@@ -982,8 +982,8 @@ MEM_STATIC size_t BITv06_readBitsFast(BITv06_DStream_t* bitD, U32 nbBits)
               if status == unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */
 MEM_STATIC BITv06_DStream_status BITv06_reloadDStream(BITv06_DStream_t* bitD)
 {
-	if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
-		return BITv06_DStream_overflow;
+    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
+        return BITv06_DStream_overflow;
 
     if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) {
         bitD->ptr -= bitD->bitsConsumed >> 3;
@@ -1932,9 +1932,11 @@ MEM_STATIC size_t HUFv06_readStats(BYTE* huffWeight, size_t hwSize, U32* rankSta
 {
     U32 weightTotal;
     const BYTE* ip = (const BYTE*) src;
-    size_t iSize = ip[0];
+    size_t iSize;
     size_t oSize;
 
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
     //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
 
     if (iSize >= 128)  { /* special header */
@@ -1969,6 +1971,7 @@ MEM_STATIC size_t HUFv06_readStats(BYTE* huffWeight, size_t hwSize, U32* rankSta
             rankStats[huffWeight[n]]++;
             weightTotal += (1 << huffWeight[n]) >> 1;
     }   }
+    if (weightTotal == 0) return ERROR(corruption_detected);
 
     /* get last non-null symbol weight (implied, total must be 2^n) */
     {   U32 const tableLog = BITv06_highbit32(weightTotal) + 1;
@@ -2890,7 +2893,6 @@ struct ZSTDv06_DCtx_s
     ZSTDv06_dStage stage;
     U32 flagRepeatTable;
     const BYTE* litPtr;
-    size_t litBufSize;
     size_t litSize;
     BYTE litBuffer[ZSTDv06_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
     BYTE headerBuffer[ZSTDv06_FRAMEHEADERSIZE_MAX];
@@ -3167,8 +3169,8 @@ size_t ZSTDv06_decodeLiteralsBlock(ZSTDv06_DCtx* dctx,
                 return ERROR(corruption_detected);
 
             dctx->litPtr = dctx->litBuffer;
-            dctx->litBufSize = ZSTDv06_BLOCKSIZE_MAX+8;
             dctx->litSize = litSize;
+            memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
             return litCSize + lhSize;
         }
     case IS_PCH:
@@ -3183,13 +3185,14 @@ size_t ZSTDv06_decodeLiteralsBlock(ZSTDv06_DCtx* dctx,
             lhSize=3;
             litSize  = ((istart[0] & 15) << 6) + (istart[1] >> 2);
             litCSize = ((istart[1] &  3) << 8) + istart[2];
+            if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
 
             {   size_t const errorCode = HUFv06_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTableX4);
                 if (HUFv06_isError(errorCode)) return ERROR(corruption_detected);
             }
             dctx->litPtr = dctx->litBuffer;
-            dctx->litBufSize = ZSTDv06_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH;
             dctx->litSize = litSize;
+            memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
             return litCSize + lhSize;
         }
     case IS_RAW:
@@ -3213,13 +3216,12 @@ size_t ZSTDv06_decodeLiteralsBlock(ZSTDv06_DCtx* dctx,
                 if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
                 memcpy(dctx->litBuffer, istart+lhSize, litSize);
                 dctx->litPtr = dctx->litBuffer;
-                dctx->litBufSize = ZSTDv06_BLOCKSIZE_MAX+8;
                 dctx->litSize = litSize;
+                memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
                 return lhSize+litSize;
             }
             /* direct reference into compressed stream */
             dctx->litPtr = istart+lhSize;
-            dctx->litBufSize = srcSize-lhSize;
             dctx->litSize = litSize;
             return lhSize+litSize;
         }
@@ -3241,9 +3243,8 @@ size_t ZSTDv06_decodeLiteralsBlock(ZSTDv06_DCtx* dctx,
                 break;
             }
             if (litSize > ZSTDv06_BLOCKSIZE_MAX) return ERROR(corruption_detected);
-            memset(dctx->litBuffer, istart[lhSize], litSize);
+            memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
             dctx->litPtr = dctx->litBuffer;
-            dctx->litBufSize = ZSTDv06_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH;
             dctx->litSize = litSize;
             return lhSize+1;
         }
@@ -3302,10 +3303,13 @@ size_t ZSTDv06_decodeSeqHeaders(int* nbSeqPtr,
     {   int nbSeq = *ip++;
         if (!nbSeq) { *nbSeqPtr=0; return 1; }
         if (nbSeq > 0x7F) {
-            if (nbSeq == 0xFF)
+            if (nbSeq == 0xFF) {
+                if (ip+2 > iend) return ERROR(srcSize_wrong);
                 nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
-            else
+            } else {
+                if (ip >= iend) return ERROR(srcSize_wrong);
                 nbSeq = ((nbSeq-0x80)<<8) + *ip++;
+            }
         }
         *nbSeqPtr = nbSeq;
     }
@@ -3431,7 +3435,7 @@ static void ZSTDv06_decodeSequence(seq_t* seq, seqState_t* seqState)
 
 size_t ZSTDv06_execSequence(BYTE* op,
                                 BYTE* const oend, seq_t sequence,
-                                const BYTE** litPtr, const BYTE* const litLimit_8,
+                                const BYTE** litPtr, const BYTE* const litLimit,
                                 const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
 {
     BYTE* const oLitEnd = op + sequence.litLength;
@@ -3444,7 +3448,7 @@ size_t ZSTDv06_execSequence(BYTE* op,
     /* check */
     if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */
     if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
-    if (iLitEnd > litLimit_8) return ERROR(corruption_detected);   /* over-read beyond lit buffer */
+    if (iLitEnd > litLimit) return ERROR(corruption_detected);   /* over-read beyond lit buffer */
 
     /* copy Literals */
     ZSTDv06_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
@@ -3466,7 +3470,12 @@ size_t ZSTDv06_execSequence(BYTE* op,
             op = oLitEnd + length1;
             sequence.matchLength -= length1;
             match = base;
+            if (op > oend_8 || sequence.matchLength < MINMATCH) {
+              while (op < oMatchEnd) *op++ = *match++;
+              return sequenceLength;
+            }
     }   }
+    /* Requirement: op <= oend_8 */
 
     /* match within prefix */
     if (sequence.offset < 8) {
@@ -3494,7 +3503,7 @@ size_t ZSTDv06_execSequence(BYTE* op,
         }
         while (op < oMatchEnd) *op++ = *match++;
     } else {
-        ZSTDv06_wildcopy(op, match, sequence.matchLength-8);   /* works even if matchLength < 8 */
+        ZSTDv06_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
     }
     return sequenceLength;
 }
@@ -3511,7 +3520,6 @@ static size_t ZSTDv06_decompressSequences(
     BYTE* const oend = ostart + maxDstSize;
     BYTE* op = ostart;
     const BYTE* litPtr = dctx->litPtr;
-    const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8;
     const BYTE* const litEnd = litPtr + dctx->litSize;
     FSEv06_DTable* DTableLL = dctx->LLTable;
     FSEv06_DTable* DTableML = dctx->MLTable;
@@ -3555,7 +3563,7 @@ static size_t ZSTDv06_decompressSequences(
                        pos, (U32)sequence.litLength, (U32)sequence.matchLength, (U32)sequence.offset);
 #endif
 
-            {   size_t const oneSeqSize = ZSTDv06_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd);
+            {   size_t const oneSeqSize = ZSTDv06_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd);
                 if (ZSTDv06_isError(oneSeqSize)) return oneSeqSize;
                 op += oneSeqSize;
         }   }
@@ -3721,6 +3729,37 @@ size_t ZSTDv06_decompress(void* dst, size_t dstCapacity, const void* src, size_t
 #endif
 }
 
+size_t ZSTDv06_findFrameCompressedSize(const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    size_t remainingSize = srcSize;
+    blockProperties_t blockProperties = { bt_compressed, 0 };
+
+    /* Frame Header */
+    {   size_t const frameHeaderSize = ZSTDv06_frameHeaderSize(src, ZSTDv06_frameHeaderSize_min);
+        if (ZSTDv06_isError(frameHeaderSize)) return frameHeaderSize;
+        if (MEM_readLE32(src) != ZSTDv06_MAGICNUMBER) return ERROR(prefix_unknown);
+        if (srcSize < frameHeaderSize+ZSTDv06_blockHeaderSize) return ERROR(srcSize_wrong);
+        ip += frameHeaderSize; remainingSize -= frameHeaderSize;
+    }
+
+    /* Loop on each block */
+    while (1) {
+        size_t const cBlockSize = ZSTDv06_getcBlockSize(ip, remainingSize, &blockProperties);
+        if (ZSTDv06_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTDv06_blockHeaderSize;
+        remainingSize -= ZSTDv06_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return ip - (const BYTE*)src;
+}
 
 /*_******************************
 *  Streaming Decompression API
@@ -3750,7 +3789,7 @@ size_t ZSTDv06_decompressContinue(ZSTDv06_DCtx* dctx, void* dst, size_t dstCapac
             return 0;
         }
         dctx->expected = 0;   /* not necessary to copy more */
-
+	/* fall-through */
     case ZSTDds_decodeFrameHeader:
         {   size_t result;
             memcpy(dctx->headerBuffer + ZSTDv06_frameHeaderSize_min, src, dctx->expected);
@@ -3822,9 +3861,10 @@ static size_t ZSTDv06_loadEntropy(ZSTDv06_DCtx* dctx, const void* dict, size_t d
     dictSize -= hSize;
 
     {   short offcodeNCount[MaxOff+1];
-        U32 offcodeMaxValue=MaxOff, offcodeLog=OffFSELog;
+        U32 offcodeMaxValue=MaxOff, offcodeLog;
         offcodeHeaderSize = FSEv06_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize);
         if (FSEv06_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
+        if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
         { size_t const errorCode = FSEv06_buildDTable(dctx->OffTable, offcodeNCount, offcodeMaxValue, offcodeLog);
           if (FSEv06_isError(errorCode)) return ERROR(dictionary_corrupted); }
         dict = (const char*)dict + offcodeHeaderSize;
@@ -3832,9 +3872,10 @@ static size_t ZSTDv06_loadEntropy(ZSTDv06_DCtx* dctx, const void* dict, size_t d
     }
 
     {   short matchlengthNCount[MaxML+1];
-        unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog;
+        unsigned matchlengthMaxValue = MaxML, matchlengthLog;
         matchlengthHeaderSize = FSEv06_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize);
         if (FSEv06_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
+        if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
         { size_t const errorCode = FSEv06_buildDTable(dctx->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog);
           if (FSEv06_isError(errorCode)) return ERROR(dictionary_corrupted); }
         dict = (const char*)dict + matchlengthHeaderSize;
@@ -3842,9 +3883,10 @@ static size_t ZSTDv06_loadEntropy(ZSTDv06_DCtx* dctx, const void* dict, size_t d
     }
 
     {   short litlengthNCount[MaxLL+1];
-        unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog;
+        unsigned litlengthMaxValue = MaxLL, litlengthLog;
         litlengthHeaderSize = FSEv06_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize);
         if (FSEv06_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
+        if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
         { size_t const errorCode = FSEv06_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog);
           if (FSEv06_isError(errorCode)) return ERROR(dictionary_corrupted); }
     }
@@ -4066,7 +4108,7 @@ size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* zbd,
                     zbd->inBuff = (char*)malloc(blockSize);
                     if (zbd->inBuff == NULL) return ERROR(memory_allocation);
                 }
-                {   size_t const neededOutSize = ((size_t)1 << zbd->fParams.windowLog) + blockSize;
+                {   size_t const neededOutSize = ((size_t)1 << zbd->fParams.windowLog) + blockSize + WILDCOPY_OVERLENGTH * 2;
                     if (zbd->outBuffSize < neededOutSize) {
                         free(zbd->outBuff);
                         zbd->outBuffSize = neededOutSize;
@@ -4074,7 +4116,7 @@ size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* zbd,
                         if (zbd->outBuff == NULL) return ERROR(memory_allocation);
             }   }   }
             zbd->stage = ZBUFFds_read;
-
+	    /* fall-through */
         case ZBUFFds_read:
             {   size_t const neededInSize = ZSTDv06_nextSrcSizeToDecompress(zbd->zd);
                 if (neededInSize==0) {  /* end of frame */
@@ -4096,7 +4138,7 @@ size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* zbd,
                 if (ip==iend) { notDone = 0; break; }   /* no more input */
                 zbd->stage = ZBUFFds_load;
             }
-
+	    /* fall-through */
         case ZBUFFds_load:
             {   size_t const neededInSize = ZSTDv06_nextSrcSizeToDecompress(zbd->zd);
                 size_t const toLoad = neededInSize - zbd->inPos;   /* should always be <= remaining space within inBuff */
@@ -4117,8 +4159,9 @@ size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* zbd,
                     zbd->outEnd = zbd->outStart +  decodedSize;
                     zbd->stage = ZBUFFds_flush;
                     // break; /* ZBUFFds_flush follows */
-            }   }
-
+                }
+	    }
+	    /* fall-through */
         case ZBUFFds_flush:
             {   size_t const toFlushSize = zbd->outEnd - zbd->outStart;
                 size_t const flushedSize = ZBUFFv06_limitCopy(op, oend-op, zbd->outBuff + zbd->outStart, toFlushSize);
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v06.h b/contrib/libzstd/include/zstd/legacy/zstd_v06.h
index 14040abdd94..10c9c772597 100644
--- a/contrib/libzstd/include/zstd/legacy/zstd_v06.h
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v06.h
@@ -41,6 +41,13 @@ extern "C" {
 ZSTDLIBv06_API size_t ZSTDv06_decompress( void* dst, size_t dstCapacity,
                                     const void* src, size_t compressedSize);
 
+/**
+ZSTDv06_getFrameSrcSize() : get the source length of a ZSTD frame
+    compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+    return : the number of bytes that would be read to decompress this frame
+             or an errorCode if it fails (which can be tested using ZSTDv06_isError())
+*/
+size_t ZSTDv06_findFrameCompressedSize(const void* src, size_t compressedSize);
 
 /* *************************************
 *  Helper functions
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v07.c b/contrib/libzstd/include/zstd/legacy/zstd_v07.c
new file mode 100644
index 00000000000..6669b71cea4
--- /dev/null
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v07.c
@@ -0,0 +1,4577 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+
+/*- Dependencies -*/
+#include <stddef.h>     /* size_t, ptrdiff_t */
+#include <string.h>     /* memcpy */
+#include <stdlib.h>     /* malloc, free, qsort */
+
+#ifndef XXH_STATIC_LINKING_ONLY
+#  define XXH_STATIC_LINKING_ONLY    /* XXH64_state_t */
+#endif
+#include "xxhash.h"                  /* XXH64_* */
+#include "zstd_v07.h"
+
+#define FSEv07_STATIC_LINKING_ONLY   /* FSEv07_MIN_TABLELOG */
+#define HUFv07_STATIC_LINKING_ONLY   /* HUFv07_TABLELOG_ABSOLUTEMAX */
+#define ZSTDv07_STATIC_LINKING_ONLY
+
+#include "error_private.h"
+
+
+#ifdef ZSTDv07_STATIC_LINKING_ONLY
+
+/* ====================================================================================
+ * The definitions in this section are considered experimental.
+ * They should never be used with a dynamic library, as they may change in the future.
+ * They are provided for advanced usages.
+ * Use them only in association with static linking.
+ * ==================================================================================== */
+
+/*--- Constants ---*/
+#define ZSTDv07_MAGIC_SKIPPABLE_START  0x184D2A50U
+
+#define ZSTDv07_WINDOWLOG_MAX_32  25
+#define ZSTDv07_WINDOWLOG_MAX_64  27
+#define ZSTDv07_WINDOWLOG_MAX    ((U32)(MEM_32bits() ? ZSTDv07_WINDOWLOG_MAX_32 : ZSTDv07_WINDOWLOG_MAX_64))
+#define ZSTDv07_WINDOWLOG_MIN     18
+#define ZSTDv07_CHAINLOG_MAX     (ZSTDv07_WINDOWLOG_MAX+1)
+#define ZSTDv07_CHAINLOG_MIN       4
+#define ZSTDv07_HASHLOG_MAX       ZSTDv07_WINDOWLOG_MAX
+#define ZSTDv07_HASHLOG_MIN       12
+#define ZSTDv07_HASHLOG3_MAX      17
+#define ZSTDv07_SEARCHLOG_MAX    (ZSTDv07_WINDOWLOG_MAX-1)
+#define ZSTDv07_SEARCHLOG_MIN      1
+#define ZSTDv07_SEARCHLENGTH_MAX   7
+#define ZSTDv07_SEARCHLENGTH_MIN   3
+#define ZSTDv07_TARGETLENGTH_MIN   4
+#define ZSTDv07_TARGETLENGTH_MAX 999
+
+#define ZSTDv07_FRAMEHEADERSIZE_MAX 18    /* for static allocation */
+static const size_t ZSTDv07_frameHeaderSize_min = 5;
+static const size_t ZSTDv07_frameHeaderSize_max = ZSTDv07_FRAMEHEADERSIZE_MAX;
+static const size_t ZSTDv07_skippableHeaderSize = 8;  /* magic number + skippable frame length */
+
+
+/* custom memory allocation functions */
+typedef void* (*ZSTDv07_allocFunction) (void* opaque, size_t size);
+typedef void  (*ZSTDv07_freeFunction) (void* opaque, void* address);
+typedef struct { ZSTDv07_allocFunction customAlloc; ZSTDv07_freeFunction customFree; void* opaque; } ZSTDv07_customMem;
+
+
+/*--- Advanced Decompression functions ---*/
+
+/*! ZSTDv07_estimateDCtxSize() :
+ *  Gives the potential amount of memory allocated to create a ZSTDv07_DCtx */
+ZSTDLIBv07_API size_t ZSTDv07_estimateDCtxSize(void);
+
+/*! ZSTDv07_createDCtx_advanced() :
+ *  Create a ZSTD decompression context using external alloc and free functions */
+ZSTDLIBv07_API ZSTDv07_DCtx* ZSTDv07_createDCtx_advanced(ZSTDv07_customMem customMem);
+
+/*! ZSTDv07_sizeofDCtx() :
+ *  Gives the amount of memory used by a given ZSTDv07_DCtx */
+ZSTDLIBv07_API size_t ZSTDv07_sizeofDCtx(const ZSTDv07_DCtx* dctx);
+
+
+/* ******************************************************************
+*  Buffer-less streaming functions (synchronous mode)
+********************************************************************/
+
+ZSTDLIBv07_API size_t ZSTDv07_decompressBegin(ZSTDv07_DCtx* dctx);
+ZSTDLIBv07_API size_t ZSTDv07_decompressBegin_usingDict(ZSTDv07_DCtx* dctx, const void* dict, size_t dictSize);
+ZSTDLIBv07_API void   ZSTDv07_copyDCtx(ZSTDv07_DCtx* dctx, const ZSTDv07_DCtx* preparedDCtx);
+
+ZSTDLIBv07_API size_t ZSTDv07_nextSrcSizeToDecompress(ZSTDv07_DCtx* dctx);
+ZSTDLIBv07_API size_t ZSTDv07_decompressContinue(ZSTDv07_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+/*
+  Buffer-less streaming decompression (synchronous mode)
+
+  A ZSTDv07_DCtx object is required to track streaming operations.
+  Use ZSTDv07_createDCtx() / ZSTDv07_freeDCtx() to manage it.
+  A ZSTDv07_DCtx object can be re-used multiple times.
+
+  First optional operation is to retrieve frame parameters, using ZSTDv07_getFrameParams(), which doesn't consume the input.
+  It can provide the minimum size of rolling buffer required to properly decompress data (`windowSize`),
+  and optionally the final size of uncompressed content.
+  (Note : content size is an optional info that may not be present. 0 means : content size unknown)
+  Frame parameters are extracted from the beginning of compressed frame.
+  The amount of data to read is variable, from ZSTDv07_frameHeaderSize_min to ZSTDv07_frameHeaderSize_max (so if `srcSize` >= ZSTDv07_frameHeaderSize_max, it will always work)
+  If `srcSize` is too small for operation to succeed, function will return the minimum size it requires to produce a result.
+  Result : 0 when successful, it means the ZSTDv07_frameParams structure has been filled.
+          >0 : means there is not enough data into `src`. Provides the expected size to successfully decode header.
+           errorCode, which can be tested using ZSTDv07_isError()
+
+  Start decompression, with ZSTDv07_decompressBegin() or ZSTDv07_decompressBegin_usingDict().
+  Alternatively, you can copy a prepared context, using ZSTDv07_copyDCtx().
+
+  Then use ZSTDv07_nextSrcSizeToDecompress() and ZSTDv07_decompressContinue() alternatively.
+  ZSTDv07_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTDv07_decompressContinue().
+  ZSTDv07_decompressContinue() requires this exact amount of bytes, or it will fail.
+
+  @result of ZSTDv07_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
+  It can be zero, which is not an error; it just means ZSTDv07_decompressContinue() has decoded some header.
+
+  ZSTDv07_decompressContinue() needs previous data blocks during decompression, up to `windowSize`.
+  They should preferably be located contiguously, prior to current block.
+  Alternatively, a round buffer of sufficient size is also possible. Sufficient size is determined by frame parameters.
+  ZSTDv07_decompressContinue() is very sensitive to contiguity,
+  if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place,
+    or that previous contiguous segment is large enough to properly handle maximum back-reference.
+
+  A frame is fully decoded when ZSTDv07_nextSrcSizeToDecompress() returns zero.
+  Context can then be reset to start a new decompression.
+
+
+  == Special case : skippable frames ==
+
+  Skippable frames allow the integration of user-defined data into a flow of concatenated frames.
+  Skippable frames will be ignored (skipped) by a decompressor. The format of skippable frame is following:
+  a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F
+  b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
+  c) Frame Content - any content (User Data) of length equal to Frame Size
+  For skippable frames ZSTDv07_decompressContinue() always returns 0.
+  For skippable frames ZSTDv07_getFrameParams() returns fparamsPtr->windowLog==0 what means that a frame is skippable.
+  It also returns Frame Size as fparamsPtr->frameContentSize.
+*/
+
+
+/* **************************************
+*  Block functions
+****************************************/
+/*! Block functions produce and decode raw zstd blocks, without frame metadata.
+    Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
+    User will have to take in charge required information to regenerate data, such as compressed and content sizes.
+
+    A few rules to respect :
+    - Compressing and decompressing require a context structure
+      + Use ZSTDv07_createCCtx() and ZSTDv07_createDCtx()
+    - It is necessary to init context before starting
+      + compression : ZSTDv07_compressBegin()
+      + decompression : ZSTDv07_decompressBegin()
+      + variants _usingDict() are also allowed
+      + copyCCtx() and copyDCtx() work too
+    - Block size is limited, it must be <= ZSTDv07_getBlockSizeMax()
+      + If you need to compress more, cut data into multiple blocks
+      + Consider using the regular ZSTDv07_compress() instead, as frame metadata costs become negligible when source size is large.
+    - When a block is considered not compressible enough, ZSTDv07_compressBlock() result will be zero.
+      In which case, nothing is produced into `dst`.
+      + User must test for such outcome and deal directly with uncompressed data
+      + ZSTDv07_decompressBlock() doesn't accept uncompressed data as input !!!
+      + In case of multiple successive blocks, decoder must be informed of uncompressed block existence to follow proper history.
+        Use ZSTDv07_insertBlock() in such a case.
+*/
+
+#define ZSTDv07_BLOCKSIZE_ABSOLUTEMAX (128 * 1024)   /* define, for static allocation */
+ZSTDLIBv07_API size_t ZSTDv07_decompressBlock(ZSTDv07_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIBv07_API size_t ZSTDv07_insertBlock(ZSTDv07_DCtx* dctx, const void* blockStart, size_t blockSize);  /**< insert block into `dctx` history. Useful for uncompressed blocks */
+
+
+#endif   /* ZSTDv07_STATIC_LINKING_ONLY */
+
+
+/* ******************************************************************
+   mem.h
+   low-level memory access routines
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-****************************************
+*  Compiler specifics
+******************************************/
+#if defined(_MSC_VER)   /* Visual Studio */
+#   include <stdlib.h>  /* _byteswap_ulong */
+#   include <intrin.h>  /* _byteswap_* */
+#endif
+#if defined(__GNUC__)
+#  define MEM_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define MEM_STATIC static inline
+#elif defined(_MSC_VER)
+#  define MEM_STATIC static __inline
+#else
+#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/*-**************************************************************
+*  Basic Types
+*****************************************************************/
+#if  !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef  int16_t S16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+  typedef  int64_t S64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef   signed short      S16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+  typedef   signed long long  S64;
+#endif
+
+
+/*-**************************************************************
+*  Memory I/O
+*****************************************************************/
+/* MEM_FORCE_MEMORY_ACCESS :
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets depending on alignment.
+ *            In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define MEM_FORCE_MEMORY_ACCESS 2
+#  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define MEM_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; }
+MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; }
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
+
+/* violates C standard, by lying on structure alignment.
+Only use if no other choice to achieve best performance on target platform */
+MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
+MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
+MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+
+#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign;
+
+MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+
+#else
+
+/* default method, safe and standard.
+   can sometimes prove slower */
+
+MEM_STATIC U16 MEM_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U32 MEM_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U64 MEM_read64(const void* memPtr)
+{
+    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif /* MEM_FORCE_MEMORY_ACCESS */
+
+MEM_STATIC U32 MEM_swap32(U32 in)
+{
+#if defined(_MSC_VER)     /* Visual Studio */
+    return _byteswap_ulong(in);
+#elif defined (__GNUC__)
+    return __builtin_bswap32(in);
+#else
+    return  ((in << 24) & 0xff000000 ) |
+            ((in <<  8) & 0x00ff0000 ) |
+            ((in >>  8) & 0x0000ff00 ) |
+            ((in >> 24) & 0x000000ff );
+#endif
+}
+
+MEM_STATIC U64 MEM_swap64(U64 in)
+{
+#if defined(_MSC_VER)     /* Visual Studio */
+    return _byteswap_uint64(in);
+#elif defined (__GNUC__)
+    return __builtin_bswap64(in);
+#else
+    return  ((in << 56) & 0xff00000000000000ULL) |
+            ((in << 40) & 0x00ff000000000000ULL) |
+            ((in << 24) & 0x0000ff0000000000ULL) |
+            ((in << 8)  & 0x000000ff00000000ULL) |
+            ((in >> 8)  & 0x00000000ff000000ULL) |
+            ((in >> 24) & 0x0000000000ff0000ULL) |
+            ((in >> 40) & 0x000000000000ff00ULL) |
+            ((in >> 56) & 0x00000000000000ffULL);
+#endif
+}
+
+
+/*=== Little endian r/w ===*/
+
+MEM_STATIC U16 MEM_readLE16(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read16(memPtr);
+    else {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
+{
+    if (MEM_isLittleEndian()) {
+        MEM_write16(memPtr, val);
+    } else {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val;
+        p[1] = (BYTE)(val>>8);
+    }
+}
+
+MEM_STATIC U32 MEM_readLE32(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read32(memPtr);
+    else
+        return MEM_swap32(MEM_read32(memPtr));
+}
+
+
+MEM_STATIC U64 MEM_readLE64(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read64(memPtr);
+    else
+        return MEM_swap64(MEM_read64(memPtr));
+}
+
+MEM_STATIC size_t MEM_readLEST(const void* memPtr)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_readLE32(memPtr);
+    else
+        return (size_t)MEM_readLE64(memPtr);
+}
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* MEM_H_MODULE */
+/* ******************************************************************
+   bitstream
+   Part of FSE library
+   header file (to include)
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+#ifndef BITSTREAM_H_MODULE
+#define BITSTREAM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*
+*  This API consists of small unitary functions, which must be inlined for best performance.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+
+/*=========================================
+*  Target specific
+=========================================*/
+#if defined(__BMI__) && defined(__GNUC__)
+#  include <immintrin.h>   /* support for bextr (experimental) */
+#endif
+
+/*-********************************************
+*  bitStream decoding API (read backward)
+**********************************************/
+typedef struct
+{
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} BITv07_DStream_t;
+
+typedef enum { BITv07_DStream_unfinished = 0,
+               BITv07_DStream_endOfBuffer = 1,
+               BITv07_DStream_completed = 2,
+               BITv07_DStream_overflow = 3 } BITv07_DStream_status;  /* result of BITv07_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t   BITv07_initDStream(BITv07_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t   BITv07_readBits(BITv07_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BITv07_DStream_status BITv07_reloadDStream(BITv07_DStream_t* bitD);
+MEM_STATIC unsigned BITv07_endOfDStream(const BITv07_DStream_t* bitD);
+
+
+/* Start by invoking BITv07_initDStream().
+*  A chunk of the bitStream is then stored into a local register.
+*  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+*  You can then retrieve bitFields stored into the local register, **in reverse order**.
+*  Local register is explicitly reloaded from memory by the BITv07_reloadDStream() method.
+*  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BITv07_DStream_unfinished.
+*  Otherwise, it can be less than that, so proceed accordingly.
+*  Checking if DStream has reached its end can be performed with BITv07_endOfDStream().
+*/
+
+
+/*-****************************************
+*  unsafe API
+******************************************/
+MEM_STATIC size_t BITv07_readBitsFast(BITv07_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/*-**************************************************************
+*  Internal functions
+****************************************************************/
+MEM_STATIC unsigned BITv07_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+#   endif
+}
+
+
+
+/*-********************************************************
+* bitStream decoding
+**********************************************************/
+/*! BITv07_initDStream() :
+*   Initialize a BITv07_DStream_t.
+*   `bitD` : a pointer to an already allocated BITv07_DStream_t structure.
+*   `srcSize` must be the *exact* size of the bitStream, in bytes.
+*   @return : size of stream (== srcSize) or an errorCode if a problem is detected
+*/
+MEM_STATIC size_t BITv07_initDStream(BITv07_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
+    if (srcSize >=  sizeof(bitD->bitContainer)) {  /* normal case */
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+          bitD->bitsConsumed = lastByte ? 8 - BITv07_highbit32(lastByte) : 0;
+          if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
+    } else {
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);/* fall-through */
+            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);/* fall-through */
+            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);/* fall-through */
+            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; /* fall-through */
+            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; /* fall-through */
+            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8; /* fall-through */
+            default: break;
+        }
+        { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+          bitD->bitsConsumed = lastByte ? 8 - BITv07_highbit32(lastByte) : 0;
+          if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
+        bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+
+/*! BITv07_lookBits() :
+ *  Provides next n bits from local register.
+ *  local register is not modified.
+ *  On 32-bits, maxNbBits==24.
+ *  On 64-bits, maxNbBits==56.
+ *  @return : value extracted
+ */
+ MEM_STATIC size_t BITv07_lookBits(const BITv07_DStream_t* bitD, U32 nbBits)
+{
+    U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+}
+
+/*! BITv07_lookBitsFast() :
+*   unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BITv07_lookBitsFast(const BITv07_DStream_t* bitD, U32 nbBits)
+{
+    U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+}
+
+MEM_STATIC void BITv07_skipBits(BITv07_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+/*! BITv07_readBits() :
+ *  Read (consume) next n bits from local register and update.
+ *  Pay attention to not read more than nbBits contained into local register.
+ *  @return : extracted value.
+ */
+MEM_STATIC size_t BITv07_readBits(BITv07_DStream_t* bitD, U32 nbBits)
+{
+    size_t const value = BITv07_lookBits(bitD, nbBits);
+    BITv07_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*! BITv07_readBitsFast() :
+*   unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BITv07_readBitsFast(BITv07_DStream_t* bitD, U32 nbBits)
+{
+    size_t const value = BITv07_lookBitsFast(bitD, nbBits);
+    BITv07_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*! BITv07_reloadDStream() :
+*   Refill `BITv07_DStream_t` from src buffer previously defined (see BITv07_initDStream() ).
+*   This function is safe, it guarantees it will not read beyond src buffer.
+*   @return : status of `BITv07_DStream_t` internal register.
+              if status == unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */
+MEM_STATIC BITv07_DStream_status BITv07_reloadDStream(BITv07_DStream_t* bitD)
+{
+    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should not happen => corruption detected */
+        return BITv07_DStream_overflow;
+
+    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        return BITv07_DStream_unfinished;
+    }
+    if (bitD->ptr == bitD->start) {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BITv07_DStream_endOfBuffer;
+        return BITv07_DStream_completed;
+    }
+    {   U32 nbBytes = bitD->bitsConsumed >> 3;
+        BITv07_DStream_status result = BITv07_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start) {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = BITv07_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        return result;
+    }
+}
+
+/*! BITv07_endOfDStream() :
+*   @return Tells if DStream has exactly reached its end (all bits consumed).
+*/
+MEM_STATIC unsigned BITv07_endOfDStream(const BITv07_DStream_t* DStream)
+{
+    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* BITSTREAM_H_MODULE */
+/* ******************************************************************
+   FSE : Finite State Entropy codec
+   Public Prototypes declaration
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+#ifndef FSEv07_H
+#define FSEv07_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/*-****************************************
+*  FSE simple functions
+******************************************/
+
+/*! FSEv07_decompress():
+    Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'dstCapacity'.
+    @return : size of regenerated data (<= maxDstSize),
+              or an error code, which can be tested using FSEv07_isError() .
+
+    ** Important ** : FSEv07_decompress() does not decompress non-compressible nor RLE data !!!
+    Why ? : making this distinction requires a header.
+    Header management is intentionally delegated to the user layer, which can better manage special cases.
+*/
+size_t FSEv07_decompress(void* dst,  size_t dstCapacity,
+                const void* cSrc, size_t cSrcSize);
+
+
+/* Error Management */
+unsigned    FSEv07_isError(size_t code);        /* tells if a return value is an error code */
+const char* FSEv07_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+
+
+/*-*****************************************
+*  FSE detailed API
+******************************************/
+/*!
+FSEv07_decompress() does the following:
+1. read normalized counters with readNCount()
+2. build decoding table 'DTable' from normalized counters
+3. decode the data stream using decoding table 'DTable'
+
+The following API allows targeting specific sub-functions for advanced tasks.
+For example, it's possible to compress several blocks using the same 'CTable',
+or to save and provide normalized distribution using external method.
+*/
+
+
+/* *** DECOMPRESSION *** */
+
+/*! FSEv07_readNCount():
+    Read compactly saved 'normalizedCounter' from 'rBuffer'.
+    @return : size read from 'rBuffer',
+              or an errorCode, which can be tested using FSEv07_isError().
+              maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
+size_t FSEv07_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
+
+/*! Constructor and Destructor of FSEv07_DTable.
+    Note that its size depends on 'tableLog' */
+typedef unsigned FSEv07_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+FSEv07_DTable* FSEv07_createDTable(unsigned tableLog);
+void        FSEv07_freeDTable(FSEv07_DTable* dt);
+
+/*! FSEv07_buildDTable():
+    Builds 'dt', which must be already allocated, using FSEv07_createDTable().
+    return : 0, or an errorCode, which can be tested using FSEv07_isError() */
+size_t FSEv07_buildDTable (FSEv07_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSEv07_decompress_usingDTable():
+    Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
+    into `dst` which must be already allocated.
+    @return : size of regenerated data (necessarily <= `dstCapacity`),
+              or an errorCode, which can be tested using FSEv07_isError() */
+size_t FSEv07_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSEv07_DTable* dt);
+
+/*!
+Tutorial :
+----------
+(Note : these functions only decompress FSE-compressed blocks.
+ If block is uncompressed, use memcpy() instead
+ If block is a single repeated byte, use memset() instead )
+
+The first step is to obtain the normalized frequencies of symbols.
+This can be performed by FSEv07_readNCount() if it was saved using FSEv07_writeNCount().
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
+In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
+or size the table to handle worst case situations (typically 256).
+FSEv07_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
+The result of FSEv07_readNCount() is the number of bytes read from 'rBuffer'.
+Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
+If there is an error, the function will return an error code, which can be tested using FSEv07_isError().
+
+The next step is to build the decompression tables 'FSEv07_DTable' from 'normalizedCounter'.
+This is performed by the function FSEv07_buildDTable().
+The space required by 'FSEv07_DTable' must be already allocated using FSEv07_createDTable().
+If there is an error, the function will return an error code, which can be tested using FSEv07_isError().
+
+`FSEv07_DTable` can then be used to decompress `cSrc`, with FSEv07_decompress_usingDTable().
+`cSrcSize` must be strictly correct, otherwise decompression will fail.
+FSEv07_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`).
+If there is an error, the function will return an error code, which can be tested using FSEv07_isError(). (ex: dst buffer too small)
+*/
+
+
+#ifdef FSEv07_STATIC_LINKING_ONLY
+
+
+/* *****************************************
+*  Static allocation
+*******************************************/
+/* FSE buffer bounds */
+#define FSEv07_NCOUNTBOUND 512
+#define FSEv07_BLOCKBOUND(size) (size + (size>>7))
+
+/* It is possible to statically allocate FSE CTable/DTable as a table of unsigned using below macros */
+#define FSEv07_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+
+
+/* *****************************************
+*  FSE advanced API
+*******************************************/
+size_t FSEv07_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
+/**< same as FSEv07_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr  */
+
+unsigned FSEv07_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
+/**< same as FSEv07_optimalTableLog(), which used `minus==2` */
+
+size_t FSEv07_buildDTable_raw (FSEv07_DTable* dt, unsigned nbBits);
+/**< build a fake FSEv07_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
+
+size_t FSEv07_buildDTable_rle (FSEv07_DTable* dt, unsigned char symbolValue);
+/**< build a fake FSEv07_DTable, designed to always generate the same symbolValue */
+
+
+
+/* *****************************************
+*  FSE symbol decompression API
+*******************************************/
+typedef struct
+{
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSEv07_DState_t;
+
+
+static void     FSEv07_initDState(FSEv07_DState_t* DStatePtr, BITv07_DStream_t* bitD, const FSEv07_DTable* dt);
+
+static unsigned char FSEv07_decodeSymbol(FSEv07_DState_t* DStatePtr, BITv07_DStream_t* bitD);
+
+
+/**<
+Let's now decompose FSEv07_decompress_usingDTable() into its unitary components.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BITv07_DStream_t DStream;    // Stream context
+FSEv07_DState_t  DState;     // State context. Multiple ones are possible
+FSEv07_DTable*   DTablePtr;  // Decoding table, provided by FSEv07_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BITv07_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSEv07_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSEv07_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSEv07_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BITv07_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSEv07_reloadDStream(&DStream);
+
+BITv07_reloadDStream() result tells if there is still some more data to read from DStream.
+BITv07_DStream_unfinished : there is still some data left into the DStream.
+BITv07_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BITv07_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BITv07_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BITv07_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BITv07_reloadDStream(&DStream) >= BITv07_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BITv07_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSEv07_endOfDState(&DState);
+*/
+
+
+/* *****************************************
+*  FSE unsafe API
+*******************************************/
+static unsigned char FSEv07_decodeSymbolFast(FSEv07_DState_t* DStatePtr, BITv07_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/* ======    Decompression    ====== */
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSEv07_DTableHeader;   /* sizeof U32 */
+
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSEv07_decode_t;   /* size == U32 */
+
+MEM_STATIC void FSEv07_initDState(FSEv07_DState_t* DStatePtr, BITv07_DStream_t* bitD, const FSEv07_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSEv07_DTableHeader* const DTableH = (const FSEv07_DTableHeader*)ptr;
+    DStatePtr->state = BITv07_readBits(bitD, DTableH->tableLog);
+    BITv07_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC BYTE FSEv07_peekSymbol(const FSEv07_DState_t* DStatePtr)
+{
+    FSEv07_decode_t const DInfo = ((const FSEv07_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    return DInfo.symbol;
+}
+
+MEM_STATIC void FSEv07_updateState(FSEv07_DState_t* DStatePtr, BITv07_DStream_t* bitD)
+{
+    FSEv07_decode_t const DInfo = ((const FSEv07_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    size_t const lowBits = BITv07_readBits(bitD, nbBits);
+    DStatePtr->state = DInfo.newState + lowBits;
+}
+
+MEM_STATIC BYTE FSEv07_decodeSymbol(FSEv07_DState_t* DStatePtr, BITv07_DStream_t* bitD)
+{
+    FSEv07_decode_t const DInfo = ((const FSEv07_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    BYTE const symbol = DInfo.symbol;
+    size_t const lowBits = BITv07_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+/*! FSEv07_decodeSymbolFast() :
+    unsafe, only works if no symbol has a probability > 50% */
+MEM_STATIC BYTE FSEv07_decodeSymbolFast(FSEv07_DState_t* DStatePtr, BITv07_DStream_t* bitD)
+{
+    FSEv07_decode_t const DInfo = ((const FSEv07_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    BYTE const symbol = DInfo.symbol;
+    size_t const lowBits = BITv07_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+
+
+#ifndef FSEv07_COMMONDEFS_ONLY
+
+/* **************************************************************
+*  Tuning parameters
+****************************************************************/
+/*!MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSEv07_MAX_MEMORY_USAGE 14
+#define FSEv07_DEFAULT_MEMORY_USAGE 13
+
+/*!FSEv07_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSEv07_MAX_SYMBOL_VALUE 255
+
+
+/* **************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSEv07_FUNCTION_TYPE BYTE
+#define FSEv07_FUNCTION_EXTENSION
+#define FSEv07_DECODE_TYPE FSEv07_decode_t
+
+
+#endif   /* !FSEv07_COMMONDEFS_ONLY */
+
+
+/* ***************************************************************
+*  Constants
+*****************************************************************/
+#define FSEv07_MAX_TABLELOG  (FSEv07_MAX_MEMORY_USAGE-2)
+#define FSEv07_MAX_TABLESIZE (1U<<FSEv07_MAX_TABLELOG)
+#define FSEv07_MAXTABLESIZE_MASK (FSEv07_MAX_TABLESIZE-1)
+#define FSEv07_DEFAULT_TABLELOG (FSEv07_DEFAULT_MEMORY_USAGE-2)
+#define FSEv07_MIN_TABLELOG 5
+
+#define FSEv07_TABLELOG_ABSOLUTE_MAX 15
+#if FSEv07_MAX_TABLELOG > FSEv07_TABLELOG_ABSOLUTE_MAX
+#  error "FSEv07_MAX_TABLELOG > FSEv07_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+#define FSEv07_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
+
+
+#endif /* FSEv07_STATIC_LINKING_ONLY */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* FSEv07_H */
+/* ******************************************************************
+   Huffman coder, part of New Generation Entropy library
+   header file
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+#ifndef HUFv07_H_298734234
+#define HUFv07_H_298734234
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/* *** simple functions *** */
+/**
+HUFv07_decompress() :
+    Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated buffer 'dst', of minimum size 'dstSize'.
+    `dstSize` : **must** be the ***exact*** size of original (uncompressed) data.
+    Note : in contrast with FSE, HUFv07_decompress can regenerate
+           RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
+           because it knows size to regenerate.
+    @return : size of regenerated data (== dstSize),
+              or an error code, which can be tested using HUFv07_isError()
+*/
+size_t HUFv07_decompress(void* dst,  size_t dstSize,
+                const void* cSrc, size_t cSrcSize);
+
+
+/* ****************************************
+*  Tool functions
+******************************************/
+#define HUFv07_BLOCKSIZE_MAX (128 * 1024)
+
+/* Error Management */
+unsigned    HUFv07_isError(size_t code);        /**< tells if a return value is an error code */
+const char* HUFv07_getErrorName(size_t code);   /**< provides error code string (useful for debugging) */
+
+
+/* *** Advanced function *** */
+
+
+#ifdef HUFv07_STATIC_LINKING_ONLY
+
+
+/* *** Constants *** */
+#define HUFv07_TABLELOG_ABSOLUTEMAX  16   /* absolute limit of HUFv07_MAX_TABLELOG. Beyond that value, code does not work */
+#define HUFv07_TABLELOG_MAX  12           /* max configured tableLog (for static allocation); can be modified up to HUFv07_ABSOLUTEMAX_TABLELOG */
+#define HUFv07_TABLELOG_DEFAULT  11       /* tableLog by default, when not specified */
+#define HUFv07_SYMBOLVALUE_MAX 255
+#if (HUFv07_TABLELOG_MAX > HUFv07_TABLELOG_ABSOLUTEMAX)
+#  error "HUFv07_TABLELOG_MAX is too large !"
+#endif
+
+
+/* ****************************************
+*  Static allocation
+******************************************/
+/* HUF buffer bounds */
+#define HUFv07_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true if incompressible pre-filtered with fast heuristic */
+
+/* static allocation of HUF's DTable */
+typedef U32 HUFv07_DTable;
+#define HUFv07_DTABLE_SIZE(maxTableLog)   (1 + (1<<(maxTableLog)))
+#define HUFv07_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+        HUFv07_DTable DTable[HUFv07_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1)*0x1000001) }
+#define HUFv07_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
+        HUFv07_DTable DTable[HUFv07_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog)*0x1000001) }
+
+
+/* ****************************************
+*  Advanced decompression functions
+******************************************/
+size_t HUFv07_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
+size_t HUFv07_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
+
+size_t HUFv07_decompress4X_DCtx (HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< decodes RLE and uncompressed */
+size_t HUFv07_decompress4X_hufOnly(HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
+size_t HUFv07_decompress4X2_DCtx(HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
+size_t HUFv07_decompress4X4_DCtx(HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
+
+size_t HUFv07_decompress1X_DCtx (HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+size_t HUFv07_decompress1X2_DCtx(HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
+size_t HUFv07_decompress1X4_DCtx(HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
+
+
+/* ****************************************
+*  HUF detailed API
+******************************************/
+/*!
+The following API allows targeting specific sub-functions for advanced tasks.
+For example, it's possible to compress several blocks using the same 'CTable',
+or to save and regenerate 'CTable' using external methods.
+*/
+/* FSEv07_count() : find it within "fse.h" */
+
+/*! HUFv07_readStats() :
+    Read compact Huffman tree, saved by HUFv07_writeCTable().
+    `huffWeight` is destination buffer.
+    @return : size read from `src` , or an error Code .
+    Note : Needed by HUFv07_readCTable() and HUFv07_readDTableXn() . */
+size_t HUFv07_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize);
+
+
+/*
+HUFv07_decompress() does the following:
+1. select the decompression algorithm (X2, X4) based on pre-computed heuristics
+2. build Huffman table from save, using HUFv07_readDTableXn()
+3. decode 1 or 4 segments in parallel using HUFv07_decompressSXn_usingDTable
+*/
+
+/** HUFv07_selectDecoder() :
+*   Tells which decoder is likely to decode faster,
+*   based on a set of pre-determined metrics.
+*   @return : 0==HUFv07_decompress4X2, 1==HUFv07_decompress4X4 .
+*   Assumption : 0 < cSrcSize < dstSize <= 128 KB */
+U32 HUFv07_selectDecoder (size_t dstSize, size_t cSrcSize);
+
+size_t HUFv07_readDTableX2 (HUFv07_DTable* DTable, const void* src, size_t srcSize);
+size_t HUFv07_readDTableX4 (HUFv07_DTable* DTable, const void* src, size_t srcSize);
+
+size_t HUFv07_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUFv07_DTable* DTable);
+size_t HUFv07_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUFv07_DTable* DTable);
+size_t HUFv07_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUFv07_DTable* DTable);
+
+
+/* single stream variants */
+size_t HUFv07_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+size_t HUFv07_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbol decoder */
+
+size_t HUFv07_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUFv07_DTable* DTable);
+size_t HUFv07_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUFv07_DTable* DTable);
+size_t HUFv07_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUFv07_DTable* DTable);
+
+
+#endif /* HUFv07_STATIC_LINKING_ONLY */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* HUFv07_H_298734234 */
+/*
+   Common functions of New Generation Entropy library
+   Copyright (C) 2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+*************************************************************************** */
+
+
+
+/*-****************************************
+*  FSE Error Management
+******************************************/
+unsigned FSEv07_isError(size_t code) { return ERR_isError(code); }
+
+const char* FSEv07_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/* **************************************************************
+*  HUF Error Management
+****************************************************************/
+unsigned HUFv07_isError(size_t code) { return ERR_isError(code); }
+
+const char* HUFv07_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/*-**************************************************************
+*  FSE NCount encoding-decoding
+****************************************************************/
+static short FSEv07_abs(short a) { return (short)(a<0 ? -a : a); }
+
+size_t FSEv07_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    if (hbSize < 4) return ERROR(srcSize_wrong);
+    bitStream = MEM_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSEv07_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSEv07_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr)) {
+        if (previous0) {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF) {
+                n0+=24;
+                if (ip < iend-5) {
+                    ip+=2;
+                    bitStream = MEM_readLE32(ip) >> bitCount;
+                } else {
+                    bitStream >>= 16;
+                    bitCount+=16;
+            }   }
+            while ((bitStream & 3) == 3) {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                ip += bitCount>>3;
+                bitCount &= 7;
+                bitStream = MEM_readLE32(ip) >> bitCount;
+            }
+            else
+                bitStream >>= 2;
+        }
+        {   short const max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max) {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            } else {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSEv07_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold) {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                ip += bitCount>>3;
+                bitCount &= 7;
+            } else {
+                bitCount -= (int)(8 * (iend - 4 - ip));
+                ip = iend - 4;
+            }
+            bitStream = MEM_readLE32(ip) >> (bitCount & 31);
+    }   }   /* while ((remaining>1) && (charnum<=*maxSVPtr)) */
+    if (remaining != 1) return ERROR(GENERIC);
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong);
+    return ip-istart;
+}
+
+
+/*! HUFv07_readStats() :
+    Read compact Huffman tree, saved by HUFv07_writeCTable().
+    `huffWeight` is destination buffer.
+    @return : size read from `src` , or an error Code .
+    Note : Needed by HUFv07_readCTable() and HUFv07_readDTableXn() .
+*/
+size_t HUFv07_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize)
+{
+    U32 weightTotal;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize;
+    size_t oSize;
+
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
+    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128)  { /* special header */
+        if (iSize >= (242)) {  /* RLE */
+            static U32 l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
+            oSize = l[iSize-242];
+            memset(huffWeight, 1, hwSize);
+            iSize = 0;
+        }
+        else {   /* Incompressible */
+            oSize = iSize - 127;
+            iSize = ((oSize+1)/2);
+            if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+            if (oSize >= hwSize) return ERROR(corruption_detected);
+            ip += 1;
+            {   U32 n;
+                for (n=0; n<oSize; n+=2) {
+                    huffWeight[n]   = ip[n/2] >> 4;
+                    huffWeight[n+1] = ip[n/2] & 15;
+    }   }   }   }
+    else  {   /* header compressed with FSE (normal case) */
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        oSize = FSEv07_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
+        if (FSEv07_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    memset(rankStats, 0, (HUFv07_TABLELOG_ABSOLUTEMAX + 1) * sizeof(U32));
+    weightTotal = 0;
+    {   U32 n; for (n=0; n<oSize; n++) {
+            if (huffWeight[n] >= HUFv07_TABLELOG_ABSOLUTEMAX) return ERROR(corruption_detected);
+            rankStats[huffWeight[n]]++;
+            weightTotal += (1 << huffWeight[n]) >> 1;
+    }   }
+    if (weightTotal == 0) return ERROR(corruption_detected);
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    {   U32 const tableLog = BITv07_highbit32(weightTotal) + 1;
+        if (tableLog > HUFv07_TABLELOG_ABSOLUTEMAX) return ERROR(corruption_detected);
+        *tableLogPtr = tableLog;
+        /* determine last weight */
+        {   U32 const total = 1 << tableLog;
+            U32 const rest = total - weightTotal;
+            U32 const verif = 1 << BITv07_highbit32(rest);
+            U32 const lastWeight = BITv07_highbit32(rest) + 1;
+            if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+            huffWeight[oSize] = (BYTE)lastWeight;
+            rankStats[lastWeight]++;
+    }   }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32)(oSize+1);
+    return iSize+1;
+}
+/* ******************************************************************
+   FSE : Finite State Entropy decoder
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#    ifdef __GNUC__
+#      define FORCE_INLINE static inline __attribute__((always_inline))
+#    else
+#      define FORCE_INLINE static inline
+#    endif
+#  else
+#    define FORCE_INLINE static
+#  endif /* __STDC_VERSION__ */
+#endif
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define FSEv07_isError ERR_isError
+#define FSEv07_STATIC_ASSERT(c) { enum { FSEv07_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/* **************************************************************
+*  Complex types
+****************************************************************/
+typedef U32 DTable_max_t[FSEv07_DTABLE_SIZE_U32(FSEv07_MAX_TABLELOG)];
+
+
+/* **************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSEv07_FUNCTION_EXTENSION
+#  error "FSEv07_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSEv07_FUNCTION_TYPE
+#  error "FSEv07_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSEv07_CAT(X,Y) X##Y
+#define FSEv07_FUNCTION_NAME(X,Y) FSEv07_CAT(X,Y)
+#define FSEv07_TYPE_NAME(X,Y) FSEv07_CAT(X,Y)
+
+
+/* Function templates */
+FSEv07_DTable* FSEv07_createDTable (unsigned tableLog)
+{
+    if (tableLog > FSEv07_TABLELOG_ABSOLUTE_MAX) tableLog = FSEv07_TABLELOG_ABSOLUTE_MAX;
+    return (FSEv07_DTable*)malloc( FSEv07_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
+}
+
+void FSEv07_freeDTable (FSEv07_DTable* dt)
+{
+    free(dt);
+}
+
+size_t FSEv07_buildDTable(FSEv07_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    void* const tdPtr = dt+1;   /* because *dt is unsigned, 32-bits aligned on 32-bits */
+    FSEv07_DECODE_TYPE* const tableDecode = (FSEv07_DECODE_TYPE*) (tdPtr);
+    U16 symbolNext[FSEv07_MAX_SYMBOL_VALUE+1];
+
+    U32 const maxSV1 = maxSymbolValue + 1;
+    U32 const tableSize = 1 << tableLog;
+    U32 highThreshold = tableSize-1;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSEv07_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSEv07_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    {   FSEv07_DTableHeader DTableH;
+        DTableH.tableLog = (U16)tableLog;
+        DTableH.fastMode = 1;
+        {   S16 const largeLimit= (S16)(1 << (tableLog-1));
+            U32 s;
+            for (s=0; s<maxSV1; s++) {
+                if (normalizedCounter[s]==-1) {
+                    tableDecode[highThreshold--].symbol = (FSEv07_FUNCTION_TYPE)s;
+                    symbolNext[s] = 1;
+                } else {
+                    if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
+                    symbolNext[s] = normalizedCounter[s];
+        }   }   }
+        memcpy(dt, &DTableH, sizeof(DTableH));
+    }
+
+    /* Spread symbols */
+    {   U32 const tableMask = tableSize-1;
+        U32 const step = FSEv07_TABLESTEP(tableSize);
+        U32 s, position = 0;
+        for (s=0; s<maxSV1; s++) {
+            int i;
+            for (i=0; i<normalizedCounter[s]; i++) {
+                tableDecode[position].symbol = (FSEv07_FUNCTION_TYPE)s;
+                position = (position + step) & tableMask;
+                while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }   }
+
+        if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+    }
+
+    /* Build Decoding table */
+    {   U32 u;
+        for (u=0; u<tableSize; u++) {
+            FSEv07_FUNCTION_TYPE const symbol = (FSEv07_FUNCTION_TYPE)(tableDecode[u].symbol);
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[u].nbBits = (BYTE) (tableLog - BITv07_highbit32 ((U32)nextState) );
+            tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
+    }   }
+
+    return 0;
+}
+
+
+
+#ifndef FSEv07_COMMONDEFS_ONLY
+
+/*-*******************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+size_t FSEv07_buildDTable_rle (FSEv07_DTable* dt, BYTE symbolValue)
+{
+    void* ptr = dt;
+    FSEv07_DTableHeader* const DTableH = (FSEv07_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSEv07_decode_t* const cell = (FSEv07_decode_t*)dPtr;
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+size_t FSEv07_buildDTable_raw (FSEv07_DTable* dt, unsigned nbBits)
+{
+    void* ptr = dt;
+    FSEv07_DTableHeader* const DTableH = (FSEv07_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSEv07_decode_t* const dinfo = (FSEv07_decode_t*)dPtr;
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSV1 = tableMask+1;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<maxSV1; s++) {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+FORCE_INLINE size_t FSEv07_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSEv07_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    BITv07_DStream_t bitD;
+    FSEv07_DState_t state1;
+    FSEv07_DState_t state2;
+
+    /* Init */
+    { size_t const errorCode = BITv07_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+      if (FSEv07_isError(errorCode)) return errorCode; }
+
+    FSEv07_initDState(&state1, &bitD, dt);
+    FSEv07_initDState(&state2, &bitD, dt);
+
+#define FSEv07_GETSYMBOL(statePtr) fast ? FSEv07_decodeSymbolFast(statePtr, &bitD) : FSEv07_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (BITv07_reloadDStream(&bitD)==BITv07_DStream_unfinished) && (op<olimit) ; op+=4) {
+        op[0] = FSEv07_GETSYMBOL(&state1);
+
+        if (FSEv07_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BITv07_reloadDStream(&bitD);
+
+        op[1] = FSEv07_GETSYMBOL(&state2);
+
+        if (FSEv07_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (BITv07_reloadDStream(&bitD) > BITv07_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSEv07_GETSYMBOL(&state1);
+
+        if (FSEv07_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BITv07_reloadDStream(&bitD);
+
+        op[3] = FSEv07_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BITv07_reloadDStream(&bitD) >= FSEv07_DStream_partiallyFilled; Ends at exactly BITv07_DStream_completed */
+    while (1) {
+        if (op>(omax-2)) return ERROR(dstSize_tooSmall);
+
+        *op++ = FSEv07_GETSYMBOL(&state1);
+
+        if (BITv07_reloadDStream(&bitD)==BITv07_DStream_overflow) {
+            *op++ = FSEv07_GETSYMBOL(&state2);
+            break;
+        }
+
+        if (op>(omax-2)) return ERROR(dstSize_tooSmall);
+
+        *op++ = FSEv07_GETSYMBOL(&state2);
+
+        if (BITv07_reloadDStream(&bitD)==BITv07_DStream_overflow) {
+            *op++ = FSEv07_GETSYMBOL(&state1);
+            break;
+    }   }
+
+    return op-ostart;
+}
+
+
+size_t FSEv07_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSEv07_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSEv07_DTableHeader* DTableH = (const FSEv07_DTableHeader*)ptr;
+    const U32 fastMode = DTableH->fastMode;
+
+    /* select fast mode (static) */
+    if (fastMode) return FSEv07_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSEv07_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+size_t FSEv07_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSEv07_MAX_SYMBOL_VALUE+1];
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSEv07_MAX_SYMBOL_VALUE;
+
+    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */
+
+    /* normal FSE decoding mode */
+    {   size_t const NCountLength = FSEv07_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+        if (FSEv07_isError(NCountLength)) return NCountLength;
+        if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */
+        ip += NCountLength;
+        cSrcSize -= NCountLength;
+    }
+
+    { size_t const errorCode = FSEv07_buildDTable (dt, counting, maxSymbolValue, tableLog);
+      if (FSEv07_isError(errorCode)) return errorCode; }
+
+    return FSEv07_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);   /* always return, even if it is an error code */
+}
+
+
+
+#endif   /* FSEv07_COMMONDEFS_ONLY */
+
+/* ******************************************************************
+   Huffman decoder, part of New Generation Entropy library
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+/* inline is defined */
+#elif defined(_MSC_VER)
+#  define inline __inline
+#else
+#  define inline /* disable inline */
+#endif
+
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#endif
+
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define HUFv07_STATIC_ASSERT(c) { enum { HUFv07_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/*-***************************/
+/*  generic DTableDesc       */
+/*-***************************/
+
+typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
+
+static DTableDesc HUFv07_getDTableDesc(const HUFv07_DTable* table)
+{
+    DTableDesc dtd;
+    memcpy(&dtd, table, sizeof(dtd));
+    return dtd;
+}
+
+
+/*-***************************/
+/*  single-symbol decoding   */
+/*-***************************/
+
+typedef struct { BYTE byte; BYTE nbBits; } HUFv07_DEltX2;   /* single-symbol decoding */
+
+size_t HUFv07_readDTableX2 (HUFv07_DTable* DTable, const void* src, size_t srcSize)
+{
+    BYTE huffWeight[HUFv07_SYMBOLVALUE_MAX + 1];
+    U32 rankVal[HUFv07_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    U32 nbSymbols = 0;
+    size_t iSize;
+    void* const dtPtr = DTable + 1;
+    HUFv07_DEltX2* const dt = (HUFv07_DEltX2*)dtPtr;
+
+    HUFv07_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUFv07_DTable));
+    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUFv07_readStats(huffWeight, HUFv07_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
+    if (HUFv07_isError(iSize)) return iSize;
+
+    /* Table header */
+    {   DTableDesc dtd = HUFv07_getDTableDesc(DTable);
+        if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, huffman tree cannot fit in */
+        dtd.tableType = 0;
+        dtd.tableLog = (BYTE)tableLog;
+        memcpy(DTable, &dtd, sizeof(dtd));
+    }
+
+    /* Prepare ranks */
+    {   U32 n, nextRankStart = 0;
+        for (n=1; n<tableLog+1; n++) {
+            U32 current = nextRankStart;
+            nextRankStart += (rankVal[n] << (n-1));
+            rankVal[n] = current;
+    }   }
+
+    /* fill DTable */
+    {   U32 n;
+        for (n=0; n<nbSymbols; n++) {
+            U32 const w = huffWeight[n];
+            U32 const length = (1 << w) >> 1;
+            U32 i;
+            HUFv07_DEltX2 D;
+            D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
+            for (i = rankVal[w]; i < rankVal[w] + length; i++)
+                dt[i] = D;
+            rankVal[w] += length;
+    }   }
+
+    return iSize;
+}
+
+
+static BYTE HUFv07_decodeSymbolX2(BITv07_DStream_t* Dstream, const HUFv07_DEltX2* dt, const U32 dtLog)
+{
+    size_t const val = BITv07_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+    BYTE const c = dt[val].byte;
+    BITv07_skipBits(Dstream, dt[val].nbBits);
+    return c;
+}
+
+#define HUFv07_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+    *ptr++ = HUFv07_decodeSymbolX2(DStreamPtr, dt, dtLog)
+
+#define HUFv07_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUFv07_TABLELOG_MAX<=12)) \
+        HUFv07_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+#define HUFv07_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUFv07_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+static inline size_t HUFv07_decodeStreamX2(BYTE* p, BITv07_DStream_t* const bitDPtr, BYTE* const pEnd, const HUFv07_DEltX2* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 4 symbols at a time */
+    while ((BITv07_reloadDStream(bitDPtr) == BITv07_DStream_unfinished) && (p <= pEnd-4)) {
+        HUFv07_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUFv07_DECODE_SYMBOLX2_1(p, bitDPtr);
+        HUFv07_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUFv07_DECODE_SYMBOLX2_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BITv07_reloadDStream(bitDPtr) == BITv07_DStream_unfinished) && (p < pEnd))
+        HUFv07_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    /* no more data to retrieve from bitstream, hence no need to reload */
+    while (p < pEnd)
+        HUFv07_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    return pEnd-pStart;
+}
+
+static size_t HUFv07_decompress1X2_usingDTable_internal(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUFv07_DTable* DTable)
+{
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + dstSize;
+    const void* dtPtr = DTable + 1;
+    const HUFv07_DEltX2* const dt = (const HUFv07_DEltX2*)dtPtr;
+    BITv07_DStream_t bitD;
+    DTableDesc const dtd = HUFv07_getDTableDesc(DTable);
+    U32 const dtLog = dtd.tableLog;
+
+    { size_t const errorCode = BITv07_initDStream(&bitD, cSrc, cSrcSize);
+      if (HUFv07_isError(errorCode)) return errorCode; }
+
+    HUFv07_decodeStreamX2(op, &bitD, oend, dt, dtLog);
+
+    /* check */
+    if (!BITv07_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    return dstSize;
+}
+
+size_t HUFv07_decompress1X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUFv07_DTable* DTable)
+{
+    DTableDesc dtd = HUFv07_getDTableDesc(DTable);
+    if (dtd.tableType != 0) return ERROR(GENERIC);
+    return HUFv07_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+
+size_t HUFv07_decompress1X2_DCtx (HUFv07_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUFv07_readDTableX2 (DCtx, cSrc, cSrcSize);
+    if (HUFv07_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUFv07_decompress1X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
+}
+
+size_t HUFv07_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv07_CREATE_STATIC_DTABLEX2(DTable, HUFv07_TABLELOG_MAX);
+    return HUFv07_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
+}
+
+
+static size_t HUFv07_decompress4X2_usingDTable_internal(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUFv07_DTable* DTable)
+{
+    /* Check */
+    if (cSrcSize < 10) return ERROR(corruption_detected);  /* strict minimum : jump table + 1 byte per stream */
+
+    {   const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable + 1;
+        const HUFv07_DEltX2* const dt = (const HUFv07_DEltX2*)dtPtr;
+
+        /* Init */
+        BITv07_DStream_t bitD1;
+        BITv07_DStream_t bitD2;
+        BITv07_DStream_t bitD3;
+        BITv07_DStream_t bitD4;
+        size_t const length1 = MEM_readLE16(istart);
+        size_t const length2 = MEM_readLE16(istart+2);
+        size_t const length3 = MEM_readLE16(istart+4);
+        size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+        DTableDesc const dtd = HUFv07_getDTableDesc(DTable);
+        U32 const dtLog = dtd.tableLog;
+
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        { size_t const errorCode = BITv07_initDStream(&bitD1, istart1, length1);
+          if (HUFv07_isError(errorCode)) return errorCode; }
+        { size_t const errorCode = BITv07_initDStream(&bitD2, istart2, length2);
+          if (HUFv07_isError(errorCode)) return errorCode; }
+        { size_t const errorCode = BITv07_initDStream(&bitD3, istart3, length3);
+          if (HUFv07_isError(errorCode)) return errorCode; }
+        { size_t const errorCode = BITv07_initDStream(&bitD4, istart4, length4);
+          if (HUFv07_isError(errorCode)) return errorCode; }
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BITv07_reloadDStream(&bitD1) | BITv07_reloadDStream(&bitD2) | BITv07_reloadDStream(&bitD3) | BITv07_reloadDStream(&bitD4);
+        for ( ; (endSignal==BITv07_DStream_unfinished) && (op4<(oend-7)) ; ) {
+            HUFv07_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUFv07_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUFv07_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUFv07_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUFv07_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUFv07_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUFv07_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUFv07_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUFv07_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUFv07_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUFv07_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUFv07_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUFv07_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUFv07_DECODE_SYMBOLX2_0(op2, &bitD2);
+            HUFv07_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUFv07_DECODE_SYMBOLX2_0(op4, &bitD4);
+            endSignal = BITv07_reloadDStream(&bitD1) | BITv07_reloadDStream(&bitD2) | BITv07_reloadDStream(&bitD3) | BITv07_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUFv07_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUFv07_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUFv07_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUFv07_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BITv07_endOfDStream(&bitD1) & BITv07_endOfDStream(&bitD2) & BITv07_endOfDStream(&bitD3) & BITv07_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+size_t HUFv07_decompress4X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUFv07_DTable* DTable)
+{
+    DTableDesc dtd = HUFv07_getDTableDesc(DTable);
+    if (dtd.tableType != 0) return ERROR(GENERIC);
+    return HUFv07_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+
+
+size_t HUFv07_decompress4X2_DCtx (HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUFv07_readDTableX2 (dctx, cSrc, cSrcSize);
+    if (HUFv07_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUFv07_decompress4X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, dctx);
+}
+
+size_t HUFv07_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv07_CREATE_STATIC_DTABLEX2(DTable, HUFv07_TABLELOG_MAX);
+    return HUFv07_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
+}
+
+
+/* *************************/
+/* double-symbols decoding */
+/* *************************/
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUFv07_DEltX4;  /* double-symbols decoding */
+
+typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+
+static void HUFv07_fillDTableX4Level2(HUFv07_DEltX4* DTable, U32 sizeLog, const U32 consumed,
+                           const U32* rankValOrigin, const int minWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
+                           U32 nbBitsBaseline, U16 baseSeq)
+{
+    HUFv07_DEltX4 DElt;
+    U32 rankVal[HUFv07_TABLELOG_ABSOLUTEMAX + 1];
+
+    /* get pre-calculated rankVal */
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1) {
+        U32 i, skipSize = rankVal[minWeight];
+        MEM_writeLE16(&(DElt.sequence), baseSeq);
+        DElt.nbBits   = (BYTE)(consumed);
+        DElt.length   = 1;
+        for (i = 0; i < skipSize; i++)
+            DTable[i] = DElt;
+    }
+
+    /* fill DTable */
+    { U32 s; for (s=0; s<sortedListSize; s++) {   /* note : sortedSymbols already skipped */
+        const U32 symbol = sortedSymbols[s].symbol;
+        const U32 weight = sortedSymbols[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 length = 1 << (sizeLog-nbBits);
+        const U32 start = rankVal[weight];
+        U32 i = start;
+        const U32 end = start + length;
+
+        MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+        DElt.nbBits = (BYTE)(nbBits + consumed);
+        DElt.length = 2;
+        do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
+
+        rankVal[weight] += length;
+    }}
+}
+
+typedef U32 rankVal_t[HUFv07_TABLELOG_ABSOLUTEMAX][HUFv07_TABLELOG_ABSOLUTEMAX + 1];
+
+static void HUFv07_fillDTableX4(HUFv07_DEltX4* DTable, const U32 targetLog,
+                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32 nbBitsBaseline)
+{
+    U32 rankVal[HUFv07_TABLELOG_ABSOLUTEMAX + 1];
+    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits  = nbBitsBaseline - maxWeight;
+    U32 s;
+
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++) {
+        const U16 symbol = sortedList[s].symbol;
+        const U32 weight = sortedList[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 start = rankVal[weight];
+        const U32 length = 1 << (targetLog-nbBits);
+
+        if (targetLog-nbBits >= minBits) {   /* enough room for a second symbol */
+            U32 sortedRank;
+            int minWeight = nbBits + scaleLog;
+            if (minWeight < 1) minWeight = 1;
+            sortedRank = rankStart[minWeight];
+            HUFv07_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
+                           rankValOrigin[nbBits], minWeight,
+                           sortedList+sortedRank, sortedListSize-sortedRank,
+                           nbBitsBaseline, symbol);
+        } else {
+            HUFv07_DEltX4 DElt;
+            MEM_writeLE16(&(DElt.sequence), symbol);
+            DElt.nbBits = (BYTE)(nbBits);
+            DElt.length = 1;
+            {   U32 u;
+                const U32 end = start + length;
+                for (u = start; u < end; u++) DTable[u] = DElt;
+        }   }
+        rankVal[weight] += length;
+    }
+}
+
+size_t HUFv07_readDTableX4 (HUFv07_DTable* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUFv07_SYMBOLVALUE_MAX + 1];
+    sortedSymbol_t sortedSymbol[HUFv07_SYMBOLVALUE_MAX + 1];
+    U32 rankStats[HUFv07_TABLELOG_ABSOLUTEMAX + 1] = { 0 };
+    U32 rankStart0[HUFv07_TABLELOG_ABSOLUTEMAX + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    rankVal_t rankVal;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    DTableDesc dtd = HUFv07_getDTableDesc(DTable);
+    U32 const maxTableLog = dtd.maxTableLog;
+    size_t iSize;
+    void* dtPtr = DTable+1;   /* force compiler to avoid strict-aliasing */
+    HUFv07_DEltX4* const dt = (HUFv07_DEltX4*)dtPtr;
+
+    HUFv07_STATIC_ASSERT(sizeof(HUFv07_DEltX4) == sizeof(HUFv07_DTable));   /* if compilation fails here, assertion is false */
+    if (maxTableLog > HUFv07_TABLELOG_ABSOLUTEMAX) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUFv07_readStats(weightList, HUFv07_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUFv07_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+
+    /* Get start index of each weight */
+    {   U32 w, nextRankStart = 0;
+        for (w=1; w<maxW+1; w++) {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {   U32 s;
+        for (s=0; s<nbSymbols; s++) {
+            U32 const w = weightList[s];
+            U32 const r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {   U32* const rankVal0 = rankVal[0];
+        {   int const rescale = (maxTableLog-tableLog) - 1;   /* tableLog <= maxTableLog */
+            U32 nextRankVal = 0;
+            U32 w;
+            for (w=1; w<maxW+1; w++) {
+                U32 current = nextRankVal;
+                nextRankVal += rankStats[w] << (w+rescale);
+                rankVal0[w] = current;
+        }   }
+        {   U32 const minBits = tableLog+1 - maxW;
+            U32 consumed;
+            for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
+                U32* const rankValPtr = rankVal[consumed];
+                U32 w;
+                for (w = 1; w < maxW+1; w++) {
+                    rankValPtr[w] = rankVal0[w] >> consumed;
+    }   }   }   }
+
+    HUFv07_fillDTableX4(dt, maxTableLog,
+                   sortedSymbol, sizeOfSort,
+                   rankStart0, rankVal, maxW,
+                   tableLog+1);
+
+    dtd.tableLog = (BYTE)maxTableLog;
+    dtd.tableType = 1;
+    memcpy(DTable, &dtd, sizeof(dtd));
+    return iSize;
+}
+
+
+static U32 HUFv07_decodeSymbolX4(void* op, BITv07_DStream_t* DStream, const HUFv07_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BITv07_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 2);
+    BITv07_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
+
+static U32 HUFv07_decodeLastSymbolX4(void* op, BITv07_DStream_t* DStream, const HUFv07_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BITv07_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 1);
+    if (dt[val].length==1) BITv07_skipBits(DStream, dt[val].nbBits);
+    else {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
+            BITv07_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+    }   }
+    return 1;
+}
+
+
+#define HUFv07_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
+    ptr += HUFv07_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUFv07_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUFv07_TABLELOG_MAX<=12)) \
+        ptr += HUFv07_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUFv07_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        ptr += HUFv07_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+static inline size_t HUFv07_decodeStreamX4(BYTE* p, BITv07_DStream_t* bitDPtr, BYTE* const pEnd, const HUFv07_DEltX4* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 8 symbols at a time */
+    while ((BITv07_reloadDStream(bitDPtr) == BITv07_DStream_unfinished) && (p < pEnd-7)) {
+        HUFv07_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUFv07_DECODE_SYMBOLX4_1(p, bitDPtr);
+        HUFv07_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUFv07_DECODE_SYMBOLX4_0(p, bitDPtr);
+    }
+
+    /* closer to end : up to 2 symbols at a time */
+    while ((BITv07_reloadDStream(bitDPtr) == BITv07_DStream_unfinished) && (p <= pEnd-2))
+        HUFv07_DECODE_SYMBOLX4_0(p, bitDPtr);
+
+    while (p <= pEnd-2)
+        HUFv07_DECODE_SYMBOLX4_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    if (p < pEnd)
+        p += HUFv07_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
+
+    return p-pStart;
+}
+
+
+static size_t HUFv07_decompress1X4_usingDTable_internal(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUFv07_DTable* DTable)
+{
+    BITv07_DStream_t bitD;
+
+    /* Init */
+    {   size_t const errorCode = BITv07_initDStream(&bitD, cSrc, cSrcSize);
+        if (HUFv07_isError(errorCode)) return errorCode;
+    }
+
+    /* decode */
+    {   BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable+1;   /* force compiler to not use strict-aliasing */
+        const HUFv07_DEltX4* const dt = (const HUFv07_DEltX4*)dtPtr;
+        DTableDesc const dtd = HUFv07_getDTableDesc(DTable);
+        HUFv07_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
+    }
+
+    /* check */
+    if (!BITv07_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    /* decoded size */
+    return dstSize;
+}
+
+size_t HUFv07_decompress1X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUFv07_DTable* DTable)
+{
+    DTableDesc dtd = HUFv07_getDTableDesc(DTable);
+    if (dtd.tableType != 1) return ERROR(GENERIC);
+    return HUFv07_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+
+size_t HUFv07_decompress1X4_DCtx (HUFv07_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUFv07_readDTableX4 (DCtx, cSrc, cSrcSize);
+    if (HUFv07_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUFv07_decompress1X4_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
+}
+
+size_t HUFv07_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv07_CREATE_STATIC_DTABLEX4(DTable, HUFv07_TABLELOG_MAX);
+    return HUFv07_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
+}
+
+static size_t HUFv07_decompress4X4_usingDTable_internal(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUFv07_DTable* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {   const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable+1;
+        const HUFv07_DEltX4* const dt = (const HUFv07_DEltX4*)dtPtr;
+
+        /* Init */
+        BITv07_DStream_t bitD1;
+        BITv07_DStream_t bitD2;
+        BITv07_DStream_t bitD3;
+        BITv07_DStream_t bitD4;
+        size_t const length1 = MEM_readLE16(istart);
+        size_t const length2 = MEM_readLE16(istart+2);
+        size_t const length3 = MEM_readLE16(istart+4);
+        size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        size_t const segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+        DTableDesc const dtd = HUFv07_getDTableDesc(DTable);
+        U32 const dtLog = dtd.tableLog;
+
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        { size_t const errorCode = BITv07_initDStream(&bitD1, istart1, length1);
+          if (HUFv07_isError(errorCode)) return errorCode; }
+        { size_t const errorCode = BITv07_initDStream(&bitD2, istart2, length2);
+          if (HUFv07_isError(errorCode)) return errorCode; }
+        { size_t const errorCode = BITv07_initDStream(&bitD3, istart3, length3);
+          if (HUFv07_isError(errorCode)) return errorCode; }
+        { size_t const errorCode = BITv07_initDStream(&bitD4, istart4, length4);
+          if (HUFv07_isError(errorCode)) return errorCode; }
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BITv07_reloadDStream(&bitD1) | BITv07_reloadDStream(&bitD2) | BITv07_reloadDStream(&bitD3) | BITv07_reloadDStream(&bitD4);
+        for ( ; (endSignal==BITv07_DStream_unfinished) && (op4<(oend-7)) ; ) {
+            HUFv07_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUFv07_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUFv07_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUFv07_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUFv07_DECODE_SYMBOLX4_1(op1, &bitD1);
+            HUFv07_DECODE_SYMBOLX4_1(op2, &bitD2);
+            HUFv07_DECODE_SYMBOLX4_1(op3, &bitD3);
+            HUFv07_DECODE_SYMBOLX4_1(op4, &bitD4);
+            HUFv07_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUFv07_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUFv07_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUFv07_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUFv07_DECODE_SYMBOLX4_0(op1, &bitD1);
+            HUFv07_DECODE_SYMBOLX4_0(op2, &bitD2);
+            HUFv07_DECODE_SYMBOLX4_0(op3, &bitD3);
+            HUFv07_DECODE_SYMBOLX4_0(op4, &bitD4);
+
+            endSignal = BITv07_reloadDStream(&bitD1) | BITv07_reloadDStream(&bitD2) | BITv07_reloadDStream(&bitD3) | BITv07_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUFv07_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
+        HUFv07_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
+        HUFv07_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
+        HUFv07_decodeStreamX4(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        { U32 const endCheck = BITv07_endOfDStream(&bitD1) & BITv07_endOfDStream(&bitD2) & BITv07_endOfDStream(&bitD3) & BITv07_endOfDStream(&bitD4);
+          if (!endCheck) return ERROR(corruption_detected); }
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+size_t HUFv07_decompress4X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUFv07_DTable* DTable)
+{
+    DTableDesc dtd = HUFv07_getDTableDesc(DTable);
+    if (dtd.tableType != 1) return ERROR(GENERIC);
+    return HUFv07_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+
+
+size_t HUFv07_decompress4X4_DCtx (HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUFv07_readDTableX4 (dctx, cSrc, cSrcSize);
+    if (HUFv07_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUFv07_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
+}
+
+size_t HUFv07_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv07_CREATE_STATIC_DTABLEX4(DTable, HUFv07_TABLELOG_MAX);
+    return HUFv07_decompress4X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
+}
+
+
+/* ********************************/
+/* Generic decompression selector */
+/* ********************************/
+
+size_t HUFv07_decompress1X_usingDTable(void* dst, size_t maxDstSize,
+                                    const void* cSrc, size_t cSrcSize,
+                                    const HUFv07_DTable* DTable)
+{
+    DTableDesc const dtd = HUFv07_getDTableDesc(DTable);
+    return dtd.tableType ? HUFv07_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) :
+                           HUFv07_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
+}
+
+size_t HUFv07_decompress4X_usingDTable(void* dst, size_t maxDstSize,
+                                    const void* cSrc, size_t cSrcSize,
+                                    const HUFv07_DTable* DTable)
+{
+    DTableDesc const dtd = HUFv07_getDTableDesc(DTable);
+    return dtd.tableType ? HUFv07_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) :
+                           HUFv07_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
+}
+
+
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+{
+    /* single, double, quad */
+    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
+    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
+    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
+    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
+    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
+    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
+    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
+    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
+    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
+    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
+    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
+    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+};
+
+/** HUFv07_selectDecoder() :
+*   Tells which decoder is likely to decode faster,
+*   based on a set of pre-determined metrics.
+*   @return : 0==HUFv07_decompress4X2, 1==HUFv07_decompress4X4 .
+*   Assumption : 0 < cSrcSize < dstSize <= 128 KB */
+U32 HUFv07_selectDecoder (size_t dstSize, size_t cSrcSize)
+{
+    /* decoder timing evaluation */
+    U32 const Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */
+    U32 const D256 = (U32)(dstSize >> 8);
+    U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
+    U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
+    DTime1 += DTime1 >> 3;  /* advantage to algorithm using less memory, for cache eviction */
+
+    return DTime1 < DTime0;
+}
+
+
+typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+
+size_t HUFv07_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    static const decompressionAlgo decompress[2] = { HUFv07_decompress4X2, HUFv07_decompress4X4 };
+
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    {   U32 const algoNb = HUFv07_selectDecoder(dstSize, cSrcSize);
+        return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
+    }
+
+    //return HUFv07_decompress4X2(dst, dstSize, cSrc, cSrcSize);   /* multi-streams single-symbol decoding */
+    //return HUFv07_decompress4X4(dst, dstSize, cSrc, cSrcSize);   /* multi-streams double-symbols decoding */
+}
+
+size_t HUFv07_decompress4X_DCtx (HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    {   U32 const algoNb = HUFv07_selectDecoder(dstSize, cSrcSize);
+        return algoNb ? HUFv07_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
+                        HUFv07_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
+    }
+}
+
+size_t HUFv07_decompress4X_hufOnly (HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if ((cSrcSize >= dstSize) || (cSrcSize <= 1)) return ERROR(corruption_detected);   /* invalid */
+
+    {   U32 const algoNb = HUFv07_selectDecoder(dstSize, cSrcSize);
+        return algoNb ? HUFv07_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
+                        HUFv07_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
+    }
+}
+
+size_t HUFv07_decompress1X_DCtx (HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    {   U32 const algoNb = HUFv07_selectDecoder(dstSize, cSrcSize);
+        return algoNb ? HUFv07_decompress1X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
+                        HUFv07_decompress1X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
+    }
+}
+/*
+    Common functions of Zstd compression library
+    Copyright (C) 2015-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd homepage : http://www.zstd.net/
+*/
+
+
+
+/*-****************************************
+*  ZSTD Error Management
+******************************************/
+/*! ZSTDv07_isError() :
+*   tells if a return value is an error code */
+unsigned ZSTDv07_isError(size_t code) { return ERR_isError(code); }
+
+/*! ZSTDv07_getErrorName() :
+*   provides error code string from function result (useful for debugging) */
+const char* ZSTDv07_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+
+/* **************************************************************
+*  ZBUFF Error Management
+****************************************************************/
+unsigned ZBUFFv07_isError(size_t errorCode) { return ERR_isError(errorCode); }
+
+const char* ZBUFFv07_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
+
+
+
+void* ZSTDv07_defaultAllocFunction(void* opaque, size_t size)
+{
+    void* address = malloc(size);
+    (void)opaque;
+    /* printf("alloc %p, %d opaque=%p \n", address, (int)size, opaque); */
+    return address;
+}
+
+void ZSTDv07_defaultFreeFunction(void* opaque, void* address)
+{
+    (void)opaque;
+    /* if (address) printf("free %p opaque=%p \n", address, opaque); */
+    free(address);
+}
+/*
+    zstd_internal - common functions to include
+    Header File for include
+    Copyright (C) 2014-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd homepage : https://www.zstd.net
+*/
+#ifndef ZSTDv07_CCOMMON_H_MODULE
+#define ZSTDv07_CCOMMON_H_MODULE
+
+
+/*-*************************************
+*  Common macros
+***************************************/
+#define MIN(a,b) ((a)<(b) ? (a) : (b))
+#define MAX(a,b) ((a)>(b) ? (a) : (b))
+
+
+/*-*************************************
+*  Common constants
+***************************************/
+#define ZSTDv07_OPT_NUM    (1<<12)
+#define ZSTDv07_DICT_MAGIC  0xEC30A437   /* v0.7 */
+
+#define ZSTDv07_REP_NUM    3
+#define ZSTDv07_REP_INIT   ZSTDv07_REP_NUM
+#define ZSTDv07_REP_MOVE   (ZSTDv07_REP_NUM-1)
+static const U32 repStartValue[ZSTDv07_REP_NUM] = { 1, 4, 8 };
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+#define BIT1   2
+#define BIT0   1
+
+#define ZSTDv07_WINDOWLOG_ABSOLUTEMIN 10
+static const size_t ZSTDv07_fcs_fieldSize[4] = { 0, 2, 4, 8 };
+static const size_t ZSTDv07_did_fieldSize[4] = { 0, 1, 2, 4 };
+
+#define ZSTDv07_BLOCKHEADERSIZE 3   /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
+static const size_t ZSTDv07_blockHeaderSize = ZSTDv07_BLOCKHEADERSIZE;
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
+#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */
+
+#define HufLog 12
+typedef enum { lbt_huffman, lbt_repeat, lbt_raw, lbt_rle } litBlockType_t;
+
+#define LONGNBSEQ 0x7F00
+
+#define MINMATCH 3
+#define EQUAL_READ32 4
+
+#define Litbits  8
+#define MaxLit ((1<<Litbits) - 1)
+#define MaxML  52
+#define MaxLL  35
+#define MaxOff 28
+#define MaxSeq MAX(MaxLL, MaxML)   /* Assumption : MaxOff < MaxLL,MaxML */
+#define MLFSELog    9
+#define LLFSELog    9
+#define OffFSELog   8
+
+#define FSEv07_ENCODING_RAW     0
+#define FSEv07_ENCODING_RLE     1
+#define FSEv07_ENCODING_STATIC  2
+#define FSEv07_ENCODING_DYNAMIC 3
+
+static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                      1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12,
+                                     13,14,15,16 };
+static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
+                                             2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
+                                            -1,-1,-1,-1 };
+static const U32 LL_defaultNormLog = 6;
+
+static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                      1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9,10,11,
+                                     12,13,14,15,16 };
+static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+                                             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,
+                                            -1,-1,-1,-1,-1 };
+static const U32 ML_defaultNormLog = 6;
+
+static const S16 OF_defaultNorm[MaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+                                              1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 };
+static const U32 OF_defaultNormLog = 5;
+
+
+/*-*******************************************
+*  Shared functions to include for inlining
+*********************************************/
+static void ZSTDv07_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+#define COPY8(d,s) { ZSTDv07_copy8(d,s); d+=8; s+=8; }
+
+/*! ZSTDv07_wildcopy() :
+*   custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
+#define WILDCOPY_OVERLENGTH 8
+MEM_STATIC void ZSTDv07_wildcopy(void* dst, const void* src, ptrdiff_t length)
+{
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+    do
+        COPY8(op, ip)
+    while (op < oend);
+}
+
+
+/*-*******************************************
+*  Private interfaces
+*********************************************/
+typedef struct ZSTDv07_stats_s ZSTDv07_stats_t;
+
+typedef struct {
+    U32 off;
+    U32 len;
+} ZSTDv07_match_t;
+
+typedef struct {
+    U32 price;
+    U32 off;
+    U32 mlen;
+    U32 litlen;
+    U32 rep[ZSTDv07_REP_INIT];
+} ZSTDv07_optimal_t;
+
+struct ZSTDv07_stats_s { U32 unused; };
+
+typedef struct {
+    void* buffer;
+    U32*  offsetStart;
+    U32*  offset;
+    BYTE* offCodeStart;
+    BYTE* litStart;
+    BYTE* lit;
+    U16*  litLengthStart;
+    U16*  litLength;
+    BYTE* llCodeStart;
+    U16*  matchLengthStart;
+    U16*  matchLength;
+    BYTE* mlCodeStart;
+    U32   longLengthID;   /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
+    U32   longLengthPos;
+    /* opt */
+    ZSTDv07_optimal_t* priceTable;
+    ZSTDv07_match_t* matchTable;
+    U32* matchLengthFreq;
+    U32* litLengthFreq;
+    U32* litFreq;
+    U32* offCodeFreq;
+    U32  matchLengthSum;
+    U32  matchSum;
+    U32  litLengthSum;
+    U32  litSum;
+    U32  offCodeSum;
+    U32  log2matchLengthSum;
+    U32  log2matchSum;
+    U32  log2litLengthSum;
+    U32  log2litSum;
+    U32  log2offCodeSum;
+    U32  factor;
+    U32  cachedPrice;
+    U32  cachedLitLength;
+    const BYTE* cachedLiterals;
+    ZSTDv07_stats_t stats;
+} seqStore_t;
+
+void ZSTDv07_seqToCodes(const seqStore_t* seqStorePtr, size_t const nbSeq);
+
+/* custom memory allocation functions */
+static const ZSTDv07_customMem defaultCustomMem = { ZSTDv07_defaultAllocFunction, ZSTDv07_defaultFreeFunction, NULL };
+
+#endif   /* ZSTDv07_CCOMMON_H_MODULE */
+/*
+    zstd - standard compression library
+    Copyright (C) 2014-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd homepage : http://www.zstd.net
+*/
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+ * HEAPMODE :
+ * Select how default decompression function ZSTDv07_decompress() will allocate memory,
+ * in memory stack (0), or in memory heap (1, requires malloc())
+ */
+#ifndef ZSTDv07_HEAPMODE
+#  define ZSTDv07_HEAPMODE 1
+#endif
+
+
+/*-*******************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#  pragma warning(disable : 4100)        /* disable: C4100: unreferenced formal parameter */
+#endif
+
+
+/*-*************************************
+*  Macros
+***************************************/
+#define ZSTDv07_isError ERR_isError   /* for inlining */
+#define FSEv07_isError  ERR_isError
+#define HUFv07_isError  ERR_isError
+
+
+/*_*******************************************************
+*  Memory operations
+**********************************************************/
+static void ZSTDv07_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+
+/*-*************************************************************
+*   Context management
+***************************************************************/
+typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
+               ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
+               ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTDv07_dStage;
+
+struct ZSTDv07_DCtx_s
+{
+    FSEv07_DTable LLTable[FSEv07_DTABLE_SIZE_U32(LLFSELog)];
+    FSEv07_DTable OffTable[FSEv07_DTABLE_SIZE_U32(OffFSELog)];
+    FSEv07_DTable MLTable[FSEv07_DTABLE_SIZE_U32(MLFSELog)];
+    HUFv07_DTable hufTable[HUFv07_DTABLE_SIZE(HufLog)];  /* can accommodate HUFv07_decompress4X */
+    const void* previousDstEnd;
+    const void* base;
+    const void* vBase;
+    const void* dictEnd;
+    size_t expected;
+    U32 rep[3];
+    ZSTDv07_frameParams fParams;
+    blockType_t bType;   /* used in ZSTDv07_decompressContinue(), to transfer blockType between header decoding and block decoding stages */
+    ZSTDv07_dStage stage;
+    U32 litEntropy;
+    U32 fseEntropy;
+    XXH64_state_t xxhState;
+    size_t headerSize;
+    U32 dictID;
+    const BYTE* litPtr;
+    ZSTDv07_customMem customMem;
+    size_t litSize;
+    BYTE litBuffer[ZSTDv07_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH];
+    BYTE headerBuffer[ZSTDv07_FRAMEHEADERSIZE_MAX];
+};  /* typedef'd to ZSTDv07_DCtx within "zstd_static.h" */
+
+int ZSTDv07_isSkipFrame(ZSTDv07_DCtx* dctx);
+
+size_t ZSTDv07_sizeofDCtx (const ZSTDv07_DCtx* dctx) { return sizeof(*dctx); }
+
+size_t ZSTDv07_estimateDCtxSize(void) { return sizeof(ZSTDv07_DCtx); }
+
+size_t ZSTDv07_decompressBegin(ZSTDv07_DCtx* dctx)
+{
+    dctx->expected = ZSTDv07_frameHeaderSize_min;
+    dctx->stage = ZSTDds_getFrameHeaderSize;
+    dctx->previousDstEnd = NULL;
+    dctx->base = NULL;
+    dctx->vBase = NULL;
+    dctx->dictEnd = NULL;
+    dctx->hufTable[0] = (HUFv07_DTable)((HufLog)*0x1000001);
+    dctx->litEntropy = dctx->fseEntropy = 0;
+    dctx->dictID = 0;
+    { int i; for (i=0; i<ZSTDv07_REP_NUM; i++) dctx->rep[i] = repStartValue[i]; }
+    return 0;
+}
+
+ZSTDv07_DCtx* ZSTDv07_createDCtx_advanced(ZSTDv07_customMem customMem)
+{
+    ZSTDv07_DCtx* dctx;
+
+    if (!customMem.customAlloc && !customMem.customFree)
+        customMem = defaultCustomMem;
+
+    if (!customMem.customAlloc || !customMem.customFree)
+        return NULL;
+
+    dctx = (ZSTDv07_DCtx*) customMem.customAlloc(customMem.opaque, sizeof(ZSTDv07_DCtx));
+    if (!dctx) return NULL;
+    memcpy(&dctx->customMem, &customMem, sizeof(ZSTDv07_customMem));
+    ZSTDv07_decompressBegin(dctx);
+    return dctx;
+}
+
+ZSTDv07_DCtx* ZSTDv07_createDCtx(void)
+{
+    return ZSTDv07_createDCtx_advanced(defaultCustomMem);
+}
+
+size_t ZSTDv07_freeDCtx(ZSTDv07_DCtx* dctx)
+{
+    if (dctx==NULL) return 0;   /* support free on NULL */
+    dctx->customMem.customFree(dctx->customMem.opaque, dctx);
+    return 0;   /* reserved as a potential error code in the future */
+}
+
+void ZSTDv07_copyDCtx(ZSTDv07_DCtx* dstDCtx, const ZSTDv07_DCtx* srcDCtx)
+{
+    memcpy(dstDCtx, srcDCtx,
+           sizeof(ZSTDv07_DCtx) - (ZSTDv07_BLOCKSIZE_ABSOLUTEMAX+WILDCOPY_OVERLENGTH + ZSTDv07_frameHeaderSize_max));  /* no need to copy workspace */
+}
+
+
+/*-*************************************************************
+*   Decompression section
+***************************************************************/
+
+/* Frame format description
+   Frame Header -  [ Block Header - Block ] - Frame End
+   1) Frame Header
+      - 4 bytes - Magic Number : ZSTDv07_MAGICNUMBER (defined within zstd.h)
+      - 1 byte  - Frame Descriptor
+   2) Block Header
+      - 3 bytes, starting with a 2-bits descriptor
+                 Uncompressed, Compressed, Frame End, unused
+   3) Block
+      See Block Format Description
+   4) Frame End
+      - 3 bytes, compatible with Block Header
+*/
+
+
+/* Frame Header :
+
+   1 byte - FrameHeaderDescription :
+   bit 0-1 : dictID (0, 1, 2 or 4 bytes)
+   bit 2   : checksumFlag
+   bit 3   : reserved (must be zero)
+   bit 4   : reserved (unused, can be any value)
+   bit 5   : Single Segment (if 1, WindowLog byte is not present)
+   bit 6-7 : FrameContentFieldSize (0, 2, 4, or 8)
+             if (SkippedWindowLog && !FrameContentFieldsize) FrameContentFieldsize=1;
+
+   Optional : WindowLog (0 or 1 byte)
+   bit 0-2 : octal Fractional (1/8th)
+   bit 3-7 : Power of 2, with 0 = 1 KB (up to 2 TB)
+
+   Optional : dictID (0, 1, 2 or 4 bytes)
+   Automatic adaptation
+   0 : no dictID
+   1 : 1 - 255
+   2 : 256 - 65535
+   4 : all other values
+
+   Optional : content size (0, 1, 2, 4 or 8 bytes)
+   0 : unknown          (fcfs==0 and swl==0)
+   1 : 0-255 bytes      (fcfs==0 and swl==1)
+   2 : 256 - 65535+256  (fcfs==1)
+   4 : 0 - 4GB-1        (fcfs==2)
+   8 : 0 - 16EB-1       (fcfs==3)
+*/
+
+
+/* Compressed Block, format description
+
+   Block = Literal Section - Sequences Section
+   Prerequisite : size of (compressed) block, maximum size of regenerated data
+
+   1) Literal Section
+
+   1.1) Header : 1-5 bytes
+        flags: 2 bits
+            00 compressed by Huff0
+            01 unused
+            10 is Raw (uncompressed)
+            11 is Rle
+            Note : using 01 => Huff0 with precomputed table ?
+            Note : delta map ? => compressed ?
+
+   1.1.1) Huff0-compressed literal block : 3-5 bytes
+            srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream
+            srcSize < 1 KB => 3 bytes (2-2-10-10)
+            srcSize < 16KB => 4 bytes (2-2-14-14)
+            else           => 5 bytes (2-2-18-18)
+            big endian convention
+
+   1.1.2) Raw (uncompressed) literal block header : 1-3 bytes
+        size :  5 bits: (IS_RAW<<6) + (0<<4) + size
+               12 bits: (IS_RAW<<6) + (2<<4) + (size>>8)
+                        size&255
+               20 bits: (IS_RAW<<6) + (3<<4) + (size>>16)
+                        size>>8&255
+                        size&255
+
+   1.1.3) Rle (repeated single byte) literal block header : 1-3 bytes
+        size :  5 bits: (IS_RLE<<6) + (0<<4) + size
+               12 bits: (IS_RLE<<6) + (2<<4) + (size>>8)
+                        size&255
+               20 bits: (IS_RLE<<6) + (3<<4) + (size>>16)
+                        size>>8&255
+                        size&255
+
+   1.1.4) Huff0-compressed literal block, using precomputed CTables : 3-5 bytes
+            srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream
+            srcSize < 1 KB => 3 bytes (2-2-10-10)
+            srcSize < 16KB => 4 bytes (2-2-14-14)
+            else           => 5 bytes (2-2-18-18)
+            big endian convention
+
+        1- CTable available (stored into workspace ?)
+        2- Small input (fast heuristic ? Full comparison ? depend on clevel ?)
+
+
+   1.2) Literal block content
+
+   1.2.1) Huff0 block, using sizes from header
+        See Huff0 format
+
+   1.2.2) Huff0 block, using prepared table
+
+   1.2.3) Raw content
+
+   1.2.4) single byte
+
+
+   2) Sequences section
+      TO DO
+*/
+
+/** ZSTDv07_frameHeaderSize() :
+*   srcSize must be >= ZSTDv07_frameHeaderSize_min.
+*   @return : size of the Frame Header */
+static size_t ZSTDv07_frameHeaderSize(const void* src, size_t srcSize)
+{
+    if (srcSize < ZSTDv07_frameHeaderSize_min) return ERROR(srcSize_wrong);
+    {   BYTE const fhd = ((const BYTE*)src)[4];
+        U32 const dictID= fhd & 3;
+        U32 const directMode = (fhd >> 5) & 1;
+        U32 const fcsId = fhd >> 6;
+        return ZSTDv07_frameHeaderSize_min + !directMode + ZSTDv07_did_fieldSize[dictID] + ZSTDv07_fcs_fieldSize[fcsId]
+                + (directMode && !ZSTDv07_fcs_fieldSize[fcsId]);
+    }
+}
+
+
+/** ZSTDv07_getFrameParams() :
+*   decode Frame Header, or require larger `srcSize`.
+*   @return : 0, `fparamsPtr` is correctly filled,
+*            >0, `srcSize` is too small, result is expected `srcSize`,
+*             or an error code, which can be tested using ZSTDv07_isError() */
+size_t ZSTDv07_getFrameParams(ZSTDv07_frameParams* fparamsPtr, const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+
+    if (srcSize < ZSTDv07_frameHeaderSize_min) return ZSTDv07_frameHeaderSize_min;
+    if (MEM_readLE32(src) != ZSTDv07_MAGICNUMBER) {
+        if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTDv07_MAGIC_SKIPPABLE_START) {
+            if (srcSize < ZSTDv07_skippableHeaderSize) return ZSTDv07_skippableHeaderSize; /* magic number + skippable frame length */
+            memset(fparamsPtr, 0, sizeof(*fparamsPtr));
+            fparamsPtr->frameContentSize = MEM_readLE32((const char *)src + 4);
+            fparamsPtr->windowSize = 0; /* windowSize==0 means a frame is skippable */
+            return 0;
+        }
+        return ERROR(prefix_unknown);
+    }
+
+    /* ensure there is enough `srcSize` to fully read/decode frame header */
+    { size_t const fhsize = ZSTDv07_frameHeaderSize(src, srcSize);
+      if (srcSize < fhsize) return fhsize; }
+
+    {   BYTE const fhdByte = ip[4];
+        size_t pos = 5;
+        U32 const dictIDSizeCode = fhdByte&3;
+        U32 const checksumFlag = (fhdByte>>2)&1;
+        U32 const directMode = (fhdByte>>5)&1;
+        U32 const fcsID = fhdByte>>6;
+        U32 const windowSizeMax = 1U << ZSTDv07_WINDOWLOG_MAX;
+        U32 windowSize = 0;
+        U32 dictID = 0;
+        U64 frameContentSize = 0;
+        if ((fhdByte & 0x08) != 0) return ERROR(frameParameter_unsupported);   /* reserved bits, which must be zero */
+        if (!directMode) {
+            BYTE const wlByte = ip[pos++];
+            U32 const windowLog = (wlByte >> 3) + ZSTDv07_WINDOWLOG_ABSOLUTEMIN;
+            if (windowLog > ZSTDv07_WINDOWLOG_MAX) return ERROR(frameParameter_unsupported);
+            windowSize = (1U << windowLog);
+            windowSize += (windowSize >> 3) * (wlByte&7);
+        }
+
+        switch(dictIDSizeCode)
+        {
+            default:   /* impossible */
+            case 0 : break;
+            case 1 : dictID = ip[pos]; pos++; break;
+            case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break;
+            case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break;
+        }
+        switch(fcsID)
+        {
+            default:   /* impossible */
+            case 0 : if (directMode) frameContentSize = ip[pos]; break;
+            case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break;
+            case 2 : frameContentSize = MEM_readLE32(ip+pos); break;
+            case 3 : frameContentSize = MEM_readLE64(ip+pos); break;
+        }
+        if (!windowSize) windowSize = (U32)frameContentSize;
+        if (windowSize > windowSizeMax) return ERROR(frameParameter_unsupported);
+        fparamsPtr->frameContentSize = frameContentSize;
+        fparamsPtr->windowSize = windowSize;
+        fparamsPtr->dictID = dictID;
+        fparamsPtr->checksumFlag = checksumFlag;
+    }
+    return 0;
+}
+
+
+/** ZSTDv07_getDecompressedSize() :
+*   compatible with legacy mode
+*   @return : decompressed size if known, 0 otherwise
+              note : 0 can mean any of the following :
+                   - decompressed size is not provided within frame header
+                   - frame header unknown / not supported
+                   - frame header not completely provided (`srcSize` too small) */
+unsigned long long ZSTDv07_getDecompressedSize(const void* src, size_t srcSize)
+{
+    {   ZSTDv07_frameParams fparams;
+        size_t const frResult = ZSTDv07_getFrameParams(&fparams, src, srcSize);
+        if (frResult!=0) return 0;
+        return fparams.frameContentSize;
+    }
+}
+
+
+/** ZSTDv07_decodeFrameHeader() :
+*   `srcSize` must be the size provided by ZSTDv07_frameHeaderSize().
+*   @return : 0 if success, or an error code, which can be tested using ZSTDv07_isError() */
+static size_t ZSTDv07_decodeFrameHeader(ZSTDv07_DCtx* dctx, const void* src, size_t srcSize)
+{
+    size_t const result = ZSTDv07_getFrameParams(&(dctx->fParams), src, srcSize);
+    if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID)) return ERROR(dictionary_wrong);
+    if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0);
+    return result;
+}
+
+
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+/*! ZSTDv07_getcBlockSize() :
+*   Provides the size of compressed block from block header `src` */
+size_t ZSTDv07_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = (const BYTE* const)src;
+    U32 cSize;
+
+    if (srcSize < ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
+
+    bpPtr->blockType = (blockType_t)((*in) >> 6);
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+
+static size_t ZSTDv07_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall);
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+/*! ZSTDv07_decodeLiteralsBlock() :
+    @return : nb of bytes read from src (< srcSize ) */
+size_t ZSTDv07_decodeLiteralsBlock(ZSTDv07_DCtx* dctx,
+                          const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
+{
+    const BYTE* const istart = (const BYTE*) src;
+
+    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
+
+    switch((litBlockType_t)(istart[0]>> 6))
+    {
+    case lbt_huffman:
+        {   size_t litSize, litCSize, singleStream=0;
+            U32 lhSize = (istart[0] >> 4) & 3;
+            if (srcSize < 5) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for lhSize, + cSize (+nbSeq) */
+            switch(lhSize)
+            {
+            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
+                /* 2 - 2 - 10 - 10 */
+                lhSize=3;
+                singleStream = istart[0] & 16;
+                litSize  = ((istart[0] & 15) << 6) + (istart[1] >> 2);
+                litCSize = ((istart[1] &  3) << 8) + istart[2];
+                break;
+            case 2:
+                /* 2 - 2 - 14 - 14 */
+                lhSize=4;
+                litSize  = ((istart[0] & 15) << 10) + (istart[1] << 2) + (istart[2] >> 6);
+                litCSize = ((istart[2] & 63) <<  8) + istart[3];
+                break;
+            case 3:
+                /* 2 - 2 - 18 - 18 */
+                lhSize=5;
+                litSize  = ((istart[0] & 15) << 14) + (istart[1] << 6) + (istart[2] >> 2);
+                litCSize = ((istart[2] &  3) << 16) + (istart[3] << 8) + istart[4];
+                break;
+            }
+            if (litSize > ZSTDv07_BLOCKSIZE_ABSOLUTEMAX) return ERROR(corruption_detected);
+            if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
+
+            if (HUFv07_isError(singleStream ?
+                            HUFv07_decompress1X2_DCtx(dctx->hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize) :
+                            HUFv07_decompress4X_hufOnly (dctx->hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize) ))
+                return ERROR(corruption_detected);
+
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litSize = litSize;
+            dctx->litEntropy = 1;
+            memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+            return litCSize + lhSize;
+        }
+    case lbt_repeat:
+        {   size_t litSize, litCSize;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            if (lhSize != 1)  /* only case supported for now : small litSize, single stream */
+                return ERROR(corruption_detected);
+            if (dctx->litEntropy==0)
+                return ERROR(dictionary_corrupted);
+
+            /* 2 - 2 - 10 - 10 */
+            lhSize=3;
+            litSize  = ((istart[0] & 15) << 6) + (istart[1] >> 2);
+            litCSize = ((istart[1] &  3) << 8) + istart[2];
+            if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
+
+            {   size_t const errorCode = HUFv07_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTable);
+                if (HUFv07_isError(errorCode)) return ERROR(corruption_detected);
+            }
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litSize = litSize;
+            memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+            return litCSize + lhSize;
+        }
+    case lbt_raw:
+        {   size_t litSize;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            switch(lhSize)
+            {
+            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
+                lhSize=1;
+                litSize = istart[0] & 31;
+                break;
+            case 2:
+                litSize = ((istart[0] & 15) << 8) + istart[1];
+                break;
+            case 3:
+                litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2];
+                break;
+            }
+
+            if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
+                if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
+                memcpy(dctx->litBuffer, istart+lhSize, litSize);
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litSize = litSize;
+                memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+                return lhSize+litSize;
+            }
+            /* direct reference into compressed stream */
+            dctx->litPtr = istart+lhSize;
+            dctx->litSize = litSize;
+            return lhSize+litSize;
+        }
+    case lbt_rle:
+        {   size_t litSize;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            switch(lhSize)
+            {
+            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
+                lhSize = 1;
+                litSize = istart[0] & 31;
+                break;
+            case 2:
+                litSize = ((istart[0] & 15) << 8) + istart[1];
+                break;
+            case 3:
+                litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2];
+                if (srcSize<4) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
+                break;
+            }
+            if (litSize > ZSTDv07_BLOCKSIZE_ABSOLUTEMAX) return ERROR(corruption_detected);
+            memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litSize = litSize;
+            return lhSize+1;
+        }
+    default:
+        return ERROR(corruption_detected);   /* impossible */
+    }
+}
+
+
+/*! ZSTDv07_buildSeqTable() :
+    @return : nb bytes read from src,
+              or an error code if it fails, testable with ZSTDv07_isError()
+*/
+size_t ZSTDv07_buildSeqTable(FSEv07_DTable* DTable, U32 type, U32 max, U32 maxLog,
+                                 const void* src, size_t srcSize,
+                                 const S16* defaultNorm, U32 defaultLog, U32 flagRepeatTable)
+{
+    switch(type)
+    {
+    case FSEv07_ENCODING_RLE :
+        if (!srcSize) return ERROR(srcSize_wrong);
+        if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected);
+        FSEv07_buildDTable_rle(DTable, *(const BYTE*)src);   /* if *src > max, data is corrupted */
+        return 1;
+    case FSEv07_ENCODING_RAW :
+        FSEv07_buildDTable(DTable, defaultNorm, max, defaultLog);
+        return 0;
+    case FSEv07_ENCODING_STATIC:
+        if (!flagRepeatTable) return ERROR(corruption_detected);
+        return 0;
+    default :   /* impossible */
+    case FSEv07_ENCODING_DYNAMIC :
+        {   U32 tableLog;
+            S16 norm[MaxSeq+1];
+            size_t const headerSize = FSEv07_readNCount(norm, &max, &tableLog, src, srcSize);
+            if (FSEv07_isError(headerSize)) return ERROR(corruption_detected);
+            if (tableLog > maxLog) return ERROR(corruption_detected);
+            FSEv07_buildDTable(DTable, norm, max, tableLog);
+            return headerSize;
+    }   }
+}
+
+
+size_t ZSTDv07_decodeSeqHeaders(int* nbSeqPtr,
+                             FSEv07_DTable* DTableLL, FSEv07_DTable* DTableML, FSEv07_DTable* DTableOffb, U32 flagRepeatTable,
+                             const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* ip = istart;
+
+    /* check */
+    if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
+
+    /* SeqHead */
+    {   int nbSeq = *ip++;
+        if (!nbSeq) { *nbSeqPtr=0; return 1; }
+        if (nbSeq > 0x7F) {
+            if (nbSeq == 0xFF) {
+                if (ip+2 > iend) return ERROR(srcSize_wrong);
+                nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
+            } else {
+                if (ip >= iend) return ERROR(srcSize_wrong);
+                nbSeq = ((nbSeq-0x80)<<8) + *ip++;
+            }
+        }
+        *nbSeqPtr = nbSeq;
+    }
+
+    /* FSE table descriptors */
+    {   U32 const LLtype  = *ip >> 6;
+        U32 const OFtype = (*ip >> 4) & 3;
+        U32 const MLtype  = (*ip >> 2) & 3;
+        ip++;
+
+        /* check */
+        if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
+
+        /* Build DTables */
+        {   size_t const llhSize = ZSTDv07_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable);
+            if (ZSTDv07_isError(llhSize)) return ERROR(corruption_detected);
+            ip += llhSize;
+        }
+        {   size_t const ofhSize = ZSTDv07_buildSeqTable(DTableOffb, OFtype, MaxOff, OffFSELog, ip, iend-ip, OF_defaultNorm, OF_defaultNormLog, flagRepeatTable);
+            if (ZSTDv07_isError(ofhSize)) return ERROR(corruption_detected);
+            ip += ofhSize;
+        }
+        {   size_t const mlhSize = ZSTDv07_buildSeqTable(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip, ML_defaultNorm, ML_defaultNormLog, flagRepeatTable);
+            if (ZSTDv07_isError(mlhSize)) return ERROR(corruption_detected);
+            ip += mlhSize;
+    }   }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t matchLength;
+    size_t offset;
+} seq_t;
+
+typedef struct {
+    BITv07_DStream_t DStream;
+    FSEv07_DState_t stateLL;
+    FSEv07_DState_t stateOffb;
+    FSEv07_DState_t stateML;
+    size_t prevOffset[ZSTDv07_REP_INIT];
+} seqState_t;
+
+
+static seq_t ZSTDv07_decodeSequence(seqState_t* seqState)
+{
+    seq_t seq;
+
+    U32 const llCode = FSEv07_peekSymbol(&(seqState->stateLL));
+    U32 const mlCode = FSEv07_peekSymbol(&(seqState->stateML));
+    U32 const ofCode = FSEv07_peekSymbol(&(seqState->stateOffb));   /* <= maxOff, by table construction */
+
+    U32 const llBits = LL_bits[llCode];
+    U32 const mlBits = ML_bits[mlCode];
+    U32 const ofBits = ofCode;
+    U32 const totalBits = llBits+mlBits+ofBits;
+
+    static const U32 LL_base[MaxLL+1] = {
+                             0,  1,  2,  3,  4,  5,  6,  7,  8,  9,   10,    11,    12,    13,    14,     15,
+                            16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
+                            0x2000, 0x4000, 0x8000, 0x10000 };
+
+    static const U32 ML_base[MaxML+1] = {
+                             3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,   14,    15,    16,    17,    18,
+                            19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,   30,    31,    32,    33,    34,
+                            35, 37, 39, 41, 43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
+                            0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
+
+    static const U32 OF_base[MaxOff+1] = {
+                 0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
+                 0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
+                 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
+                 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD };
+
+    /* sequence */
+    {   size_t offset;
+        if (!ofCode)
+            offset = 0;
+        else {
+            offset = OF_base[ofCode] + BITv07_readBits(&(seqState->DStream), ofBits);   /* <=  (ZSTDv07_WINDOWLOG_MAX-1) bits */
+            if (MEM_32bits()) BITv07_reloadDStream(&(seqState->DStream));
+        }
+
+        if (ofCode <= 1) {
+            if ((llCode == 0) & (offset <= 1)) offset = 1-offset;
+            if (offset) {
+                size_t const temp = seqState->prevOffset[offset];
+                if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
+                seqState->prevOffset[1] = seqState->prevOffset[0];
+                seqState->prevOffset[0] = offset = temp;
+            } else {
+                offset = seqState->prevOffset[0];
+            }
+        } else {
+            seqState->prevOffset[2] = seqState->prevOffset[1];
+            seqState->prevOffset[1] = seqState->prevOffset[0];
+            seqState->prevOffset[0] = offset;
+        }
+        seq.offset = offset;
+    }
+
+    seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BITv07_readBits(&(seqState->DStream), mlBits) : 0);   /* <=  16 bits */
+    if (MEM_32bits() && (mlBits+llBits>24)) BITv07_reloadDStream(&(seqState->DStream));
+
+    seq.litLength = LL_base[llCode] + ((llCode>15) ? BITv07_readBits(&(seqState->DStream), llBits) : 0);   /* <=  16 bits */
+    if (MEM_32bits() ||
+       (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BITv07_reloadDStream(&(seqState->DStream));
+
+    /* ANS state update */
+    FSEv07_updateState(&(seqState->stateLL), &(seqState->DStream));   /* <=  9 bits */
+    FSEv07_updateState(&(seqState->stateML), &(seqState->DStream));   /* <=  9 bits */
+    if (MEM_32bits()) BITv07_reloadDStream(&(seqState->DStream));     /* <= 18 bits */
+    FSEv07_updateState(&(seqState->stateOffb), &(seqState->DStream)); /* <=  8 bits */
+
+    return seq;
+}
+
+
+static
+size_t ZSTDv07_execSequence(BYTE* op,
+                                BYTE* const oend, seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit,
+                                const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
+{
+    BYTE* const oLitEnd = op + sequence.litLength;
+    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_w = oend-WILDCOPY_OVERLENGTH;
+    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+
+    /* check */
+    if ((oLitEnd>oend_w) | (oMatchEnd>oend)) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
+    if (iLitEnd > litLimit) return ERROR(corruption_detected);   /* over-read beyond lit buffer */
+
+    /* copy Literals */
+    ZSTDv07_wildcopy(op, *litPtr, sequence.litLength);   /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
+    op = oLitEnd;
+    *litPtr = iLitEnd;   /* update for next sequence */
+
+    /* copy Match */
+    if (sequence.offset > (size_t)(oLitEnd - base)) {
+        /* offset beyond prefix */
+        if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected);
+        match = dictEnd - (base-match);
+        if (match + sequence.matchLength <= dictEnd) {
+            memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {   size_t const length1 = dictEnd - match;
+            memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = base;
+            if (op > oend_w || sequence.matchLength < MINMATCH) {
+              while (op < oMatchEnd) *op++ = *match++;
+              return sequenceLength;
+            }
+    }   }
+    /* Requirement: op <= oend_w */
+
+    /* match within prefix */
+    if (sequence.offset < 8) {
+        /* close range match, overlap */
+        static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
+        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* substracted */
+        int const sub2 = dec64table[sequence.offset];
+        op[0] = match[0];
+        op[1] = match[1];
+        op[2] = match[2];
+        op[3] = match[3];
+        match += dec32table[sequence.offset];
+        ZSTDv07_copy4(op+4, match);
+        match -= sub2;
+    } else {
+        ZSTDv07_copy8(op, match);
+    }
+    op += 8; match += 8;
+
+    if (oMatchEnd > oend-(16-MINMATCH)) {
+        if (op < oend_w) {
+            ZSTDv07_wildcopy(op, match, oend_w - op);
+            match += oend_w - op;
+            op = oend_w;
+        }
+        while (op < oMatchEnd) *op++ = *match++;
+    } else {
+        ZSTDv07_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
+    }
+    return sequenceLength;
+}
+
+
+static size_t ZSTDv07_decompressSequences(
+                               ZSTDv07_DCtx* dctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize)
+{
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const oend = ostart + maxDstSize;
+    BYTE* op = ostart;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    FSEv07_DTable* DTableLL = dctx->LLTable;
+    FSEv07_DTable* DTableML = dctx->MLTable;
+    FSEv07_DTable* DTableOffb = dctx->OffTable;
+    const BYTE* const base = (const BYTE*) (dctx->base);
+    const BYTE* const vBase = (const BYTE*) (dctx->vBase);
+    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+    int nbSeq;
+
+    /* Build Decoding Tables */
+    {   size_t const seqHSize = ZSTDv07_decodeSeqHeaders(&nbSeq, DTableLL, DTableML, DTableOffb, dctx->fseEntropy, ip, seqSize);
+        if (ZSTDv07_isError(seqHSize)) return seqHSize;
+        ip += seqHSize;
+    }
+
+    /* Regen sequences */
+    if (nbSeq) {
+        seqState_t seqState;
+        dctx->fseEntropy = 1;
+        { U32 i; for (i=0; i<ZSTDv07_REP_INIT; i++) seqState.prevOffset[i] = dctx->rep[i]; }
+        { size_t const errorCode = BITv07_initDStream(&(seqState.DStream), ip, iend-ip);
+          if (ERR_isError(errorCode)) return ERROR(corruption_detected); }
+        FSEv07_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
+        FSEv07_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
+        FSEv07_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
+
+        for ( ; (BITv07_reloadDStream(&(seqState.DStream)) <= BITv07_DStream_completed) && nbSeq ; ) {
+            nbSeq--;
+            {   seq_t const sequence = ZSTDv07_decodeSequence(&seqState);
+                size_t const oneSeqSize = ZSTDv07_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd);
+                if (ZSTDv07_isError(oneSeqSize)) return oneSeqSize;
+                op += oneSeqSize;
+        }   }
+
+        /* check if reached exact end */
+        if (nbSeq) return ERROR(corruption_detected);
+        /* save reps for next block */
+        { U32 i; for (i=0; i<ZSTDv07_REP_INIT; i++) dctx->rep[i] = (U32)(seqState.prevOffset[i]); }
+    }
+
+    /* last literal segment */
+    {   size_t const lastLLSize = litEnd - litPtr;
+        //if (litPtr > litEnd) return ERROR(corruption_detected);   /* too many literals already used */
+        if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
+        memcpy(op, litPtr, lastLLSize);
+        op += lastLLSize;
+    }
+
+    return op-ostart;
+}
+
+
+static void ZSTDv07_checkContinuity(ZSTDv07_DCtx* dctx, const void* dst)
+{
+    if (dst != dctx->previousDstEnd) {   /* not contiguous */
+        dctx->dictEnd = dctx->previousDstEnd;
+        dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
+        dctx->base = dst;
+        dctx->previousDstEnd = dst;
+    }
+}
+
+
+static size_t ZSTDv07_decompressBlock_internal(ZSTDv07_DCtx* dctx,
+                            void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{   /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+
+    if (srcSize >= ZSTDv07_BLOCKSIZE_ABSOLUTEMAX) return ERROR(srcSize_wrong);
+
+    /* Decode literals sub-block */
+    {   size_t const litCSize = ZSTDv07_decodeLiteralsBlock(dctx, src, srcSize);
+        if (ZSTDv07_isError(litCSize)) return litCSize;
+        ip += litCSize;
+        srcSize -= litCSize;
+    }
+    return ZSTDv07_decompressSequences(dctx, dst, dstCapacity, ip, srcSize);
+}
+
+
+size_t ZSTDv07_decompressBlock(ZSTDv07_DCtx* dctx,
+                            void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{
+    size_t dSize;
+    ZSTDv07_checkContinuity(dctx, dst);
+    dSize = ZSTDv07_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize);
+    dctx->previousDstEnd = (char*)dst + dSize;
+    return dSize;
+}
+
+
+/** ZSTDv07_insertBlock() :
+    insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
+ZSTDLIBv07_API size_t ZSTDv07_insertBlock(ZSTDv07_DCtx* dctx, const void* blockStart, size_t blockSize)
+{
+    ZSTDv07_checkContinuity(dctx, blockStart);
+    dctx->previousDstEnd = (const char*)blockStart + blockSize;
+    return blockSize;
+}
+
+
+size_t ZSTDv07_generateNxBytes(void* dst, size_t dstCapacity, BYTE byte, size_t length)
+{
+    if (length > dstCapacity) return ERROR(dstSize_tooSmall);
+    memset(dst, byte, length);
+    return length;
+}
+
+
+/*! ZSTDv07_decompressFrame() :
+*   `dctx` must be properly initialized */
+static size_t ZSTDv07_decompressFrame(ZSTDv07_DCtx* dctx,
+                                 void* dst, size_t dstCapacity,
+                                 const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* const iend = ip + srcSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart;
+    size_t remainingSize = srcSize;
+
+    /* check */
+    if (srcSize < ZSTDv07_frameHeaderSize_min+ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
+
+    /* Frame Header */
+    {   size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, ZSTDv07_frameHeaderSize_min);
+        if (ZSTDv07_isError(frameHeaderSize)) return frameHeaderSize;
+        if (srcSize < frameHeaderSize+ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
+        if (ZSTDv07_decodeFrameHeader(dctx, src, frameHeaderSize)) return ERROR(corruption_detected);
+        ip += frameHeaderSize; remainingSize -= frameHeaderSize;
+    }
+
+    /* Loop on each block */
+    while (1) {
+        size_t decodedSize;
+        blockProperties_t blockProperties;
+        size_t const cBlockSize = ZSTDv07_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTDv07_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTDv07_blockHeaderSize;
+        remainingSize -= ZSTDv07_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            decodedSize = ZSTDv07_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize);
+            break;
+        case bt_raw :
+            decodedSize = ZSTDv07_copyRawBlock(op, oend-op, ip, cBlockSize);
+            break;
+        case bt_rle :
+            decodedSize = ZSTDv07_generateNxBytes(op, oend-op, *ip, blockProperties.origSize);
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return ERROR(srcSize_wrong);
+            decodedSize = 0;
+            break;
+        default:
+            return ERROR(GENERIC);   /* impossible */
+        }
+        if (blockProperties.blockType == bt_end) break;   /* bt_end */
+
+        if (ZSTDv07_isError(decodedSize)) return decodedSize;
+        if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, op, decodedSize);
+        op += decodedSize;
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return op-ostart;
+}
+
+
+/*! ZSTDv07_decompress_usingPreparedDCtx() :
+*   Same as ZSTDv07_decompress_usingDict, but using a reference context `preparedDCtx`, where dictionary has been loaded.
+*   It avoids reloading the dictionary each time.
+*   `preparedDCtx` must have been properly initialized using ZSTDv07_decompressBegin_usingDict().
+*   Requires 2 contexts : 1 for reference (preparedDCtx), which will not be modified, and 1 to run the decompression operation (dctx) */
+size_t ZSTDv07_decompress_usingPreparedDCtx(ZSTDv07_DCtx* dctx, const ZSTDv07_DCtx* refDCtx,
+                                         void* dst, size_t dstCapacity,
+                                   const void* src, size_t srcSize)
+{
+    ZSTDv07_copyDCtx(dctx, refDCtx);
+    ZSTDv07_checkContinuity(dctx, dst);
+    return ZSTDv07_decompressFrame(dctx, dst, dstCapacity, src, srcSize);
+}
+
+
+size_t ZSTDv07_decompress_usingDict(ZSTDv07_DCtx* dctx,
+                                 void* dst, size_t dstCapacity,
+                                 const void* src, size_t srcSize,
+                                 const void* dict, size_t dictSize)
+{
+    ZSTDv07_decompressBegin_usingDict(dctx, dict, dictSize);
+    ZSTDv07_checkContinuity(dctx, dst);
+    return ZSTDv07_decompressFrame(dctx, dst, dstCapacity, src, srcSize);
+}
+
+
+size_t ZSTDv07_decompressDCtx(ZSTDv07_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    return ZSTDv07_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0);
+}
+
+
+size_t ZSTDv07_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+#if defined(ZSTDv07_HEAPMODE) && (ZSTDv07_HEAPMODE==1)
+    size_t regenSize;
+    ZSTDv07_DCtx* const dctx = ZSTDv07_createDCtx();
+    if (dctx==NULL) return ERROR(memory_allocation);
+    regenSize = ZSTDv07_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
+    ZSTDv07_freeDCtx(dctx);
+    return regenSize;
+#else   /* stack mode */
+    ZSTDv07_DCtx dctx;
+    return ZSTDv07_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize);
+#endif
+}
+
+size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    size_t remainingSize = srcSize;
+
+    /* check */
+    if (srcSize < ZSTDv07_frameHeaderSize_min+ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
+
+    /* Frame Header */
+    {   size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, ZSTDv07_frameHeaderSize_min);
+        if (ZSTDv07_isError(frameHeaderSize)) return frameHeaderSize;
+        if (MEM_readLE32(src) != ZSTDv07_MAGICNUMBER) return ERROR(prefix_unknown);
+        if (srcSize < frameHeaderSize+ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
+        ip += frameHeaderSize; remainingSize -= frameHeaderSize;
+    }
+
+    /* Loop on each block */
+    while (1) {
+        blockProperties_t blockProperties;
+        size_t const cBlockSize = ZSTDv07_getcBlockSize(ip, remainingSize, &blockProperties);
+        if (ZSTDv07_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTDv07_blockHeaderSize;
+        remainingSize -= ZSTDv07_blockHeaderSize;
+
+        if (blockProperties.blockType == bt_end) break;
+
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return ip - (const BYTE*)src;
+}
+
+/*_******************************
+*  Streaming Decompression API
+********************************/
+size_t ZSTDv07_nextSrcSizeToDecompress(ZSTDv07_DCtx* dctx)
+{
+    return dctx->expected;
+}
+
+int ZSTDv07_isSkipFrame(ZSTDv07_DCtx* dctx)
+{
+    return dctx->stage == ZSTDds_skipFrame;
+}
+
+/** ZSTDv07_decompressContinue() :
+*   @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity)
+*             or an error code, which can be tested using ZSTDv07_isError() */
+size_t ZSTDv07_decompressContinue(ZSTDv07_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    /* Sanity check */
+    if (srcSize != dctx->expected) return ERROR(srcSize_wrong);
+    if (dstCapacity) ZSTDv07_checkContinuity(dctx, dst);
+
+    switch (dctx->stage)
+    {
+    case ZSTDds_getFrameHeaderSize :
+        if (srcSize != ZSTDv07_frameHeaderSize_min) return ERROR(srcSize_wrong);   /* impossible */
+        if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTDv07_MAGIC_SKIPPABLE_START) {
+            memcpy(dctx->headerBuffer, src, ZSTDv07_frameHeaderSize_min);
+            dctx->expected = ZSTDv07_skippableHeaderSize - ZSTDv07_frameHeaderSize_min; /* magic number + skippable frame length */
+            dctx->stage = ZSTDds_decodeSkippableHeader;
+            return 0;
+        }
+        dctx->headerSize = ZSTDv07_frameHeaderSize(src, ZSTDv07_frameHeaderSize_min);
+        if (ZSTDv07_isError(dctx->headerSize)) return dctx->headerSize;
+        memcpy(dctx->headerBuffer, src, ZSTDv07_frameHeaderSize_min);
+        if (dctx->headerSize > ZSTDv07_frameHeaderSize_min) {
+            dctx->expected = dctx->headerSize - ZSTDv07_frameHeaderSize_min;
+            dctx->stage = ZSTDds_decodeFrameHeader;
+            return 0;
+        }
+        dctx->expected = 0;   /* not necessary to copy more */
+	/* fall-through */
+    case ZSTDds_decodeFrameHeader:
+        {   size_t result;
+            memcpy(dctx->headerBuffer + ZSTDv07_frameHeaderSize_min, src, dctx->expected);
+            result = ZSTDv07_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize);
+            if (ZSTDv07_isError(result)) return result;
+            dctx->expected = ZSTDv07_blockHeaderSize;
+            dctx->stage = ZSTDds_decodeBlockHeader;
+            return 0;
+        }
+    case ZSTDds_decodeBlockHeader:
+        {   blockProperties_t bp;
+            size_t const cBlockSize = ZSTDv07_getcBlockSize(src, ZSTDv07_blockHeaderSize, &bp);
+            if (ZSTDv07_isError(cBlockSize)) return cBlockSize;
+            if (bp.blockType == bt_end) {
+                if (dctx->fParams.checksumFlag) {
+                    U64 const h64 = XXH64_digest(&dctx->xxhState);
+                    U32 const h32 = (U32)(h64>>11) & ((1<<22)-1);
+                    const BYTE* const ip = (const BYTE*)src;
+                    U32 const check32 = ip[2] + (ip[1] << 8) + ((ip[0] & 0x3F) << 16);
+                    if (check32 != h32) return ERROR(checksum_wrong);
+                }
+                dctx->expected = 0;
+                dctx->stage = ZSTDds_getFrameHeaderSize;
+            } else {
+                dctx->expected = cBlockSize;
+                dctx->bType = bp.blockType;
+                dctx->stage = ZSTDds_decompressBlock;
+            }
+            return 0;
+        }
+    case ZSTDds_decompressBlock:
+        {   size_t rSize;
+            switch(dctx->bType)
+            {
+            case bt_compressed:
+                rSize = ZSTDv07_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize);
+                break;
+            case bt_raw :
+                rSize = ZSTDv07_copyRawBlock(dst, dstCapacity, src, srcSize);
+                break;
+            case bt_rle :
+                return ERROR(GENERIC);   /* not yet handled */
+                break;
+            case bt_end :   /* should never happen (filtered at phase 1) */
+                rSize = 0;
+                break;
+            default:
+                return ERROR(GENERIC);   /* impossible */
+            }
+            dctx->stage = ZSTDds_decodeBlockHeader;
+            dctx->expected = ZSTDv07_blockHeaderSize;
+            dctx->previousDstEnd = (char*)dst + rSize;
+            if (ZSTDv07_isError(rSize)) return rSize;
+            if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);
+            return rSize;
+        }
+    case ZSTDds_decodeSkippableHeader:
+        {   memcpy(dctx->headerBuffer + ZSTDv07_frameHeaderSize_min, src, dctx->expected);
+            dctx->expected = MEM_readLE32(dctx->headerBuffer + 4);
+            dctx->stage = ZSTDds_skipFrame;
+            return 0;
+        }
+    case ZSTDds_skipFrame:
+        {   dctx->expected = 0;
+            dctx->stage = ZSTDds_getFrameHeaderSize;
+            return 0;
+        }
+    default:
+        return ERROR(GENERIC);   /* impossible */
+    }
+}
+
+
+static size_t ZSTDv07_refDictContent(ZSTDv07_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    dctx->dictEnd = dctx->previousDstEnd;
+    dctx->vBase = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
+    dctx->base = dict;
+    dctx->previousDstEnd = (const char*)dict + dictSize;
+    return 0;
+}
+
+static size_t ZSTDv07_loadEntropy(ZSTDv07_DCtx* dctx, const void* const dict, size_t const dictSize)
+{
+    const BYTE* dictPtr = (const BYTE*)dict;
+    const BYTE* const dictEnd = dictPtr + dictSize;
+
+    {   size_t const hSize = HUFv07_readDTableX4(dctx->hufTable, dict, dictSize);
+        if (HUFv07_isError(hSize)) return ERROR(dictionary_corrupted);
+        dictPtr += hSize;
+    }
+
+    {   short offcodeNCount[MaxOff+1];
+        U32 offcodeMaxValue=MaxOff, offcodeLog;
+        size_t const offcodeHeaderSize = FSEv07_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
+        if (FSEv07_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
+        if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
+        { size_t const errorCode = FSEv07_buildDTable(dctx->OffTable, offcodeNCount, offcodeMaxValue, offcodeLog);
+          if (FSEv07_isError(errorCode)) return ERROR(dictionary_corrupted); }
+        dictPtr += offcodeHeaderSize;
+    }
+
+    {   short matchlengthNCount[MaxML+1];
+        unsigned matchlengthMaxValue = MaxML, matchlengthLog;
+        size_t const matchlengthHeaderSize = FSEv07_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
+        if (FSEv07_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
+        if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
+        { size_t const errorCode = FSEv07_buildDTable(dctx->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog);
+          if (FSEv07_isError(errorCode)) return ERROR(dictionary_corrupted); }
+        dictPtr += matchlengthHeaderSize;
+    }
+
+    {   short litlengthNCount[MaxLL+1];
+        unsigned litlengthMaxValue = MaxLL, litlengthLog;
+        size_t const litlengthHeaderSize = FSEv07_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
+        if (FSEv07_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
+        if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
+        { size_t const errorCode = FSEv07_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog);
+          if (FSEv07_isError(errorCode)) return ERROR(dictionary_corrupted); }
+        dictPtr += litlengthHeaderSize;
+    }
+
+    if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
+    dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] == 0 || dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
+    dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] == 0 || dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
+    dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] == 0 || dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
+    dictPtr += 12;
+
+    dctx->litEntropy = dctx->fseEntropy = 1;
+    return dictPtr - (const BYTE*)dict;
+}
+
+static size_t ZSTDv07_decompress_insertDictionary(ZSTDv07_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    if (dictSize < 8) return ZSTDv07_refDictContent(dctx, dict, dictSize);
+    {   U32 const magic = MEM_readLE32(dict);
+        if (magic != ZSTDv07_DICT_MAGIC) {
+            return ZSTDv07_refDictContent(dctx, dict, dictSize);   /* pure content mode */
+    }   }
+    dctx->dictID = MEM_readLE32((const char*)dict + 4);
+
+    /* load entropy tables */
+    dict = (const char*)dict + 8;
+    dictSize -= 8;
+    {   size_t const eSize = ZSTDv07_loadEntropy(dctx, dict, dictSize);
+        if (ZSTDv07_isError(eSize)) return ERROR(dictionary_corrupted);
+        dict = (const char*)dict + eSize;
+        dictSize -= eSize;
+    }
+
+    /* reference dictionary content */
+    return ZSTDv07_refDictContent(dctx, dict, dictSize);
+}
+
+
+size_t ZSTDv07_decompressBegin_usingDict(ZSTDv07_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    { size_t const errorCode = ZSTDv07_decompressBegin(dctx);
+      if (ZSTDv07_isError(errorCode)) return errorCode; }
+
+    if (dict && dictSize) {
+        size_t const errorCode = ZSTDv07_decompress_insertDictionary(dctx, dict, dictSize);
+        if (ZSTDv07_isError(errorCode)) return ERROR(dictionary_corrupted);
+    }
+
+    return 0;
+}
+
+
+struct ZSTDv07_DDict_s {
+    void* dict;
+    size_t dictSize;
+    ZSTDv07_DCtx* refContext;
+};  /* typedef'd tp ZSTDv07_CDict within zstd.h */
+
+ZSTDv07_DDict* ZSTDv07_createDDict_advanced(const void* dict, size_t dictSize, ZSTDv07_customMem customMem)
+{
+    if (!customMem.customAlloc && !customMem.customFree)
+        customMem = defaultCustomMem;
+
+    if (!customMem.customAlloc || !customMem.customFree)
+        return NULL;
+
+    {   ZSTDv07_DDict* const ddict = (ZSTDv07_DDict*) customMem.customAlloc(customMem.opaque, sizeof(*ddict));
+        void* const dictContent = customMem.customAlloc(customMem.opaque, dictSize);
+        ZSTDv07_DCtx* const dctx = ZSTDv07_createDCtx_advanced(customMem);
+
+        if (!dictContent || !ddict || !dctx) {
+            customMem.customFree(customMem.opaque, dictContent);
+            customMem.customFree(customMem.opaque, ddict);
+            customMem.customFree(customMem.opaque, dctx);
+            return NULL;
+        }
+
+        memcpy(dictContent, dict, dictSize);
+        {   size_t const errorCode = ZSTDv07_decompressBegin_usingDict(dctx, dictContent, dictSize);
+            if (ZSTDv07_isError(errorCode)) {
+                customMem.customFree(customMem.opaque, dictContent);
+                customMem.customFree(customMem.opaque, ddict);
+                customMem.customFree(customMem.opaque, dctx);
+                return NULL;
+        }   }
+
+        ddict->dict = dictContent;
+        ddict->dictSize = dictSize;
+        ddict->refContext = dctx;
+        return ddict;
+    }
+}
+
+/*! ZSTDv07_createDDict() :
+*   Create a digested dictionary, ready to start decompression without startup delay.
+*   `dict` can be released after `ZSTDv07_DDict` creation */
+ZSTDv07_DDict* ZSTDv07_createDDict(const void* dict, size_t dictSize)
+{
+    ZSTDv07_customMem const allocator = { NULL, NULL, NULL };
+    return ZSTDv07_createDDict_advanced(dict, dictSize, allocator);
+}
+
+size_t ZSTDv07_freeDDict(ZSTDv07_DDict* ddict)
+{
+    ZSTDv07_freeFunction const cFree = ddict->refContext->customMem.customFree;
+    void* const opaque = ddict->refContext->customMem.opaque;
+    ZSTDv07_freeDCtx(ddict->refContext);
+    cFree(opaque, ddict->dict);
+    cFree(opaque, ddict);
+    return 0;
+}
+
+/*! ZSTDv07_decompress_usingDDict() :
+*   Decompression using a pre-digested Dictionary
+*   Use dictionary without significant overhead. */
+ZSTDLIBv07_API size_t ZSTDv07_decompress_usingDDict(ZSTDv07_DCtx* dctx,
+                                           void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize,
+                                     const ZSTDv07_DDict* ddict)
+{
+    return ZSTDv07_decompress_usingPreparedDCtx(dctx, ddict->refContext,
+                                           dst, dstCapacity,
+                                           src, srcSize);
+}
+/*
+    Buffered version of Zstd compression library
+    Copyright (C) 2015-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd homepage : http://www.zstd.net/
+*/
+
+
+
+/*-***************************************************************************
+*  Streaming decompression howto
+*
+*  A ZBUFFv07_DCtx object is required to track streaming operations.
+*  Use ZBUFFv07_createDCtx() and ZBUFFv07_freeDCtx() to create/release resources.
+*  Use ZBUFFv07_decompressInit() to start a new decompression operation,
+*   or ZBUFFv07_decompressInitDictionary() if decompression requires a dictionary.
+*  Note that ZBUFFv07_DCtx objects can be re-init multiple times.
+*
+*  Use ZBUFFv07_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *dstCapacityPtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of @dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change @dst.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency),
+*            or 0 when a frame is completely decoded,
+*            or an error code, which can be tested using ZBUFFv07_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFFv07_recommendedDInSize() and ZBUFFv07_recommendedDOutSize()
+*  output : ZBUFFv07_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
+*  input  : ZBUFFv07_recommendedDInSize == 128KB + 3;
+*           just follow indications from ZBUFFv07_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* *******************************************************************************/
+
+typedef enum { ZBUFFds_init, ZBUFFds_loadHeader,
+               ZBUFFds_read, ZBUFFds_load, ZBUFFds_flush } ZBUFFv07_dStage;
+
+/* *** Resource management *** */
+struct ZBUFFv07_DCtx_s {
+    ZSTDv07_DCtx* zd;
+    ZSTDv07_frameParams fParams;
+    ZBUFFv07_dStage stage;
+    char*  inBuff;
+    size_t inBuffSize;
+    size_t inPos;
+    char*  outBuff;
+    size_t outBuffSize;
+    size_t outStart;
+    size_t outEnd;
+    size_t blockSize;
+    BYTE headerBuffer[ZSTDv07_FRAMEHEADERSIZE_MAX];
+    size_t lhSize;
+    ZSTDv07_customMem customMem;
+};   /* typedef'd to ZBUFFv07_DCtx within "zstd_buffered.h" */
+
+ZSTDLIBv07_API ZBUFFv07_DCtx* ZBUFFv07_createDCtx_advanced(ZSTDv07_customMem customMem);
+
+ZBUFFv07_DCtx* ZBUFFv07_createDCtx(void)
+{
+    return ZBUFFv07_createDCtx_advanced(defaultCustomMem);
+}
+
+ZBUFFv07_DCtx* ZBUFFv07_createDCtx_advanced(ZSTDv07_customMem customMem)
+{
+    ZBUFFv07_DCtx* zbd;
+
+    if (!customMem.customAlloc && !customMem.customFree)
+        customMem = defaultCustomMem;
+
+    if (!customMem.customAlloc || !customMem.customFree)
+        return NULL;
+
+    zbd = (ZBUFFv07_DCtx*)customMem.customAlloc(customMem.opaque, sizeof(ZBUFFv07_DCtx));
+    if (zbd==NULL) return NULL;
+    memset(zbd, 0, sizeof(ZBUFFv07_DCtx));
+    memcpy(&zbd->customMem, &customMem, sizeof(ZSTDv07_customMem));
+    zbd->zd = ZSTDv07_createDCtx_advanced(customMem);
+    if (zbd->zd == NULL) { ZBUFFv07_freeDCtx(zbd); return NULL; }
+    zbd->stage = ZBUFFds_init;
+    return zbd;
+}
+
+size_t ZBUFFv07_freeDCtx(ZBUFFv07_DCtx* zbd)
+{
+    if (zbd==NULL) return 0;   /* support free on null */
+    ZSTDv07_freeDCtx(zbd->zd);
+    if (zbd->inBuff) zbd->customMem.customFree(zbd->customMem.opaque, zbd->inBuff);
+    if (zbd->outBuff) zbd->customMem.customFree(zbd->customMem.opaque, zbd->outBuff);
+    zbd->customMem.customFree(zbd->customMem.opaque, zbd);
+    return 0;
+}
+
+
+/* *** Initialization *** */
+
+size_t ZBUFFv07_decompressInitDictionary(ZBUFFv07_DCtx* zbd, const void* dict, size_t dictSize)
+{
+    zbd->stage = ZBUFFds_loadHeader;
+    zbd->lhSize = zbd->inPos = zbd->outStart = zbd->outEnd = 0;
+    return ZSTDv07_decompressBegin_usingDict(zbd->zd, dict, dictSize);
+}
+
+size_t ZBUFFv07_decompressInit(ZBUFFv07_DCtx* zbd)
+{
+    return ZBUFFv07_decompressInitDictionary(zbd, NULL, 0);
+}
+
+
+/* internal util function */
+MEM_STATIC size_t ZBUFFv07_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    size_t const length = MIN(dstCapacity, srcSize);
+    memcpy(dst, src, length);
+    return length;
+}
+
+
+/* *** Decompression *** */
+
+size_t ZBUFFv07_decompressContinue(ZBUFFv07_DCtx* zbd,
+                                void* dst, size_t* dstCapacityPtr,
+                          const void* src, size_t* srcSizePtr)
+{
+    const char* const istart = (const char*)src;
+    const char* const iend = istart + *srcSizePtr;
+    const char* ip = istart;
+    char* const ostart = (char*)dst;
+    char* const oend = ostart + *dstCapacityPtr;
+    char* op = ostart;
+    U32 notDone = 1;
+
+    while (notDone) {
+        switch(zbd->stage)
+        {
+        case ZBUFFds_init :
+            return ERROR(init_missing);
+
+        case ZBUFFds_loadHeader :
+            {   size_t const hSize = ZSTDv07_getFrameParams(&(zbd->fParams), zbd->headerBuffer, zbd->lhSize);
+                if (ZSTDv07_isError(hSize)) return hSize;
+                if (hSize != 0) {
+                    size_t const toLoad = hSize - zbd->lhSize;   /* if hSize!=0, hSize > zbd->lhSize */
+                    if (toLoad > (size_t)(iend-ip)) {   /* not enough input to load full header */
+                        memcpy(zbd->headerBuffer + zbd->lhSize, ip, iend-ip);
+                        zbd->lhSize += iend-ip;
+                        *dstCapacityPtr = 0;
+                        return (hSize - zbd->lhSize) + ZSTDv07_blockHeaderSize;   /* remaining header bytes + next block header */
+                    }
+                    memcpy(zbd->headerBuffer + zbd->lhSize, ip, toLoad); zbd->lhSize = hSize; ip += toLoad;
+                    break;
+            }   }
+
+            /* Consume header */
+            {   size_t const h1Size = ZSTDv07_nextSrcSizeToDecompress(zbd->zd);  /* == ZSTDv07_frameHeaderSize_min */
+                size_t const h1Result = ZSTDv07_decompressContinue(zbd->zd, NULL, 0, zbd->headerBuffer, h1Size);
+                if (ZSTDv07_isError(h1Result)) return h1Result;
+                if (h1Size < zbd->lhSize) {   /* long header */
+                    size_t const h2Size = ZSTDv07_nextSrcSizeToDecompress(zbd->zd);
+                    size_t const h2Result = ZSTDv07_decompressContinue(zbd->zd, NULL, 0, zbd->headerBuffer+h1Size, h2Size);
+                    if (ZSTDv07_isError(h2Result)) return h2Result;
+            }   }
+
+            zbd->fParams.windowSize = MAX(zbd->fParams.windowSize, 1U << ZSTDv07_WINDOWLOG_ABSOLUTEMIN);
+
+            /* Frame header instruct buffer sizes */
+            {   size_t const blockSize = MIN(zbd->fParams.windowSize, ZSTDv07_BLOCKSIZE_ABSOLUTEMAX);
+                zbd->blockSize = blockSize;
+                if (zbd->inBuffSize < blockSize) {
+                    zbd->customMem.customFree(zbd->customMem.opaque, zbd->inBuff);
+                    zbd->inBuffSize = blockSize;
+                    zbd->inBuff = (char*)zbd->customMem.customAlloc(zbd->customMem.opaque, blockSize);
+                    if (zbd->inBuff == NULL) return ERROR(memory_allocation);
+                }
+                {   size_t const neededOutSize = zbd->fParams.windowSize + blockSize + WILDCOPY_OVERLENGTH * 2;
+                    if (zbd->outBuffSize < neededOutSize) {
+                        zbd->customMem.customFree(zbd->customMem.opaque, zbd->outBuff);
+                        zbd->outBuffSize = neededOutSize;
+                        zbd->outBuff = (char*)zbd->customMem.customAlloc(zbd->customMem.opaque, neededOutSize);
+                        if (zbd->outBuff == NULL) return ERROR(memory_allocation);
+            }   }   }
+            zbd->stage = ZBUFFds_read;
+            /* pass-through */
+	    /* fall-through */
+        case ZBUFFds_read:
+            {   size_t const neededInSize = ZSTDv07_nextSrcSizeToDecompress(zbd->zd);
+                if (neededInSize==0) {  /* end of frame */
+                    zbd->stage = ZBUFFds_init;
+                    notDone = 0;
+                    break;
+                }
+                if ((size_t)(iend-ip) >= neededInSize) {  /* decode directly from src */
+                    const int isSkipFrame = ZSTDv07_isSkipFrame(zbd->zd);
+                    size_t const decodedSize = ZSTDv07_decompressContinue(zbd->zd,
+                        zbd->outBuff + zbd->outStart, (isSkipFrame ? 0 : zbd->outBuffSize - zbd->outStart),
+                        ip, neededInSize);
+                    if (ZSTDv07_isError(decodedSize)) return decodedSize;
+                    ip += neededInSize;
+                    if (!decodedSize && !isSkipFrame) break;   /* this was just a header */
+                    zbd->outEnd = zbd->outStart +  decodedSize;
+                    zbd->stage = ZBUFFds_flush;
+                    break;
+                }
+                if (ip==iend) { notDone = 0; break; }   /* no more input */
+                zbd->stage = ZBUFFds_load;
+            }
+	    /* fall-through */
+        case ZBUFFds_load:
+            {   size_t const neededInSize = ZSTDv07_nextSrcSizeToDecompress(zbd->zd);
+                size_t const toLoad = neededInSize - zbd->inPos;   /* should always be <= remaining space within inBuff */
+                size_t loadedSize;
+                if (toLoad > zbd->inBuffSize - zbd->inPos) return ERROR(corruption_detected);   /* should never happen */
+                loadedSize = ZBUFFv07_limitCopy(zbd->inBuff + zbd->inPos, toLoad, ip, iend-ip);
+                ip += loadedSize;
+                zbd->inPos += loadedSize;
+                if (loadedSize < toLoad) { notDone = 0; break; }   /* not enough input, wait for more */
+
+                /* decode loaded input */
+                {  const int isSkipFrame = ZSTDv07_isSkipFrame(zbd->zd);
+                   size_t const decodedSize = ZSTDv07_decompressContinue(zbd->zd,
+                        zbd->outBuff + zbd->outStart, zbd->outBuffSize - zbd->outStart,
+                        zbd->inBuff, neededInSize);
+                    if (ZSTDv07_isError(decodedSize)) return decodedSize;
+                    zbd->inPos = 0;   /* input is consumed */
+                    if (!decodedSize && !isSkipFrame) { zbd->stage = ZBUFFds_read; break; }   /* this was just a header */
+                    zbd->outEnd = zbd->outStart +  decodedSize;
+                    zbd->stage = ZBUFFds_flush;
+                    /* break; */
+                    /* pass-through */
+                }
+	    }
+	    /* fall-through */
+        case ZBUFFds_flush:
+            {   size_t const toFlushSize = zbd->outEnd - zbd->outStart;
+                size_t const flushedSize = ZBUFFv07_limitCopy(op, oend-op, zbd->outBuff + zbd->outStart, toFlushSize);
+                op += flushedSize;
+                zbd->outStart += flushedSize;
+                if (flushedSize == toFlushSize) {
+                    zbd->stage = ZBUFFds_read;
+                    if (zbd->outStart + zbd->blockSize > zbd->outBuffSize)
+                        zbd->outStart = zbd->outEnd = 0;
+                    break;
+                }
+                /* cannot flush everything */
+                notDone = 0;
+                break;
+            }
+        default: return ERROR(GENERIC);   /* impossible */
+    }   }
+
+    /* result */
+    *srcSizePtr = ip-istart;
+    *dstCapacityPtr = op-ostart;
+    {   size_t nextSrcSizeHint = ZSTDv07_nextSrcSizeToDecompress(zbd->zd);
+        nextSrcSizeHint -= zbd->inPos;   /* already loaded*/
+        return nextSrcSizeHint;
+    }
+}
+
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+size_t ZBUFFv07_recommendedDInSize(void)  { return ZSTDv07_BLOCKSIZE_ABSOLUTEMAX + ZSTDv07_blockHeaderSize /* block header size*/ ; }
+size_t ZBUFFv07_recommendedDOutSize(void) { return ZSTDv07_BLOCKSIZE_ABSOLUTEMAX; }
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v07.h b/contrib/libzstd/include/zstd/legacy/zstd_v07.h
new file mode 100644
index 00000000000..cc95c661bc9
--- /dev/null
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v07.h
@@ -0,0 +1,181 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+#ifndef ZSTDv07_H_235446
+#define ZSTDv07_H_235446
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*======  Dependency  ======*/
+#include <stddef.h>   /* size_t */
+
+
+/*======  Export for Windows  ======*/
+/*!
+*  ZSTDv07_DLL_EXPORT :
+*  Enable exporting of functions when building a Windows DLL
+*/
+#if defined(_WIN32) && defined(ZSTDv07_DLL_EXPORT) && (ZSTDv07_DLL_EXPORT==1)
+#  define ZSTDLIBv07_API __declspec(dllexport)
+#else
+#  define ZSTDLIBv07_API
+#endif
+
+
+/* *************************************
+*  Simple API
+***************************************/
+/*! ZSTDv07_getDecompressedSize() :
+*   @return : decompressed size if known, 0 otherwise.
+       note 1 : if `0`, follow up with ZSTDv07_getFrameParams() to know precise failure cause.
+       note 2 : decompressed size could be wrong or intentionally modified !
+                always ensure results fit within application's authorized limits */
+unsigned long long ZSTDv07_getDecompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTDv07_decompress() :
+    `compressedSize` : must be _exact_ size of compressed input, otherwise decompression will fail.
+    `dstCapacity` must be equal or larger than originalSize.
+    @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+              or an errorCode if it fails (which can be tested using ZSTDv07_isError()) */
+ZSTDLIBv07_API size_t ZSTDv07_decompress( void* dst, size_t dstCapacity,
+                                    const void* src, size_t compressedSize);
+
+/**
+ZSTDv07_getFrameSrcSize() : get the source length of a ZSTD frame
+    compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+    return : the number of bytes that would be read to decompress this frame
+             or an errorCode if it fails (which can be tested using ZSTDv07_isError())
+*/
+size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t compressedSize);
+
+/*======  Helper functions  ======*/
+ZSTDLIBv07_API unsigned    ZSTDv07_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+ZSTDLIBv07_API const char* ZSTDv07_getErrorName(size_t code);     /*!< provides readable string from an error code */
+
+
+/*-*************************************
+*  Explicit memory management
+***************************************/
+/** Decompression context */
+typedef struct ZSTDv07_DCtx_s ZSTDv07_DCtx;
+ZSTDLIBv07_API ZSTDv07_DCtx* ZSTDv07_createDCtx(void);
+ZSTDLIBv07_API size_t     ZSTDv07_freeDCtx(ZSTDv07_DCtx* dctx);      /*!< @return : errorCode */
+
+/** ZSTDv07_decompressDCtx() :
+*   Same as ZSTDv07_decompress(), requires an allocated ZSTDv07_DCtx (see ZSTDv07_createDCtx()) */
+ZSTDLIBv07_API size_t ZSTDv07_decompressDCtx(ZSTDv07_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*-************************
+*  Simple dictionary API
+***************************/
+/*! ZSTDv07_decompress_usingDict() :
+*   Decompression using a pre-defined Dictionary content (see dictBuilder).
+*   Dictionary must be identical to the one used during compression.
+*   Note : This function load the dictionary, resulting in a significant startup time */
+ZSTDLIBv07_API size_t ZSTDv07_decompress_usingDict(ZSTDv07_DCtx* dctx,
+                                                   void* dst, size_t dstCapacity,
+                                             const void* src, size_t srcSize,
+                                             const void* dict,size_t dictSize);
+
+
+/*-**************************
+*  Advanced Dictionary API
+****************************/
+/*! ZSTDv07_createDDict() :
+*   Create a digested dictionary, ready to start decompression operation without startup delay.
+*   `dict` can be released after creation */
+typedef struct ZSTDv07_DDict_s ZSTDv07_DDict;
+ZSTDLIBv07_API ZSTDv07_DDict* ZSTDv07_createDDict(const void* dict, size_t dictSize);
+ZSTDLIBv07_API size_t      ZSTDv07_freeDDict(ZSTDv07_DDict* ddict);
+
+/*! ZSTDv07_decompress_usingDDict() :
+*   Decompression using a pre-digested Dictionary
+*   Faster startup than ZSTDv07_decompress_usingDict(), recommended when same dictionary is used multiple times. */
+ZSTDLIBv07_API size_t ZSTDv07_decompress_usingDDict(ZSTDv07_DCtx* dctx,
+                                                    void* dst, size_t dstCapacity,
+                                              const void* src, size_t srcSize,
+                                              const ZSTDv07_DDict* ddict);
+
+typedef struct {
+    unsigned long long frameContentSize;
+    unsigned windowSize;
+    unsigned dictID;
+    unsigned checksumFlag;
+} ZSTDv07_frameParams;
+
+ZSTDLIBv07_API size_t ZSTDv07_getFrameParams(ZSTDv07_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
+
+
+
+
+/* *************************************
+*  Streaming functions
+***************************************/
+typedef struct ZBUFFv07_DCtx_s ZBUFFv07_DCtx;
+ZSTDLIBv07_API ZBUFFv07_DCtx* ZBUFFv07_createDCtx(void);
+ZSTDLIBv07_API size_t      ZBUFFv07_freeDCtx(ZBUFFv07_DCtx* dctx);
+
+ZSTDLIBv07_API size_t ZBUFFv07_decompressInit(ZBUFFv07_DCtx* dctx);
+ZSTDLIBv07_API size_t ZBUFFv07_decompressInitDictionary(ZBUFFv07_DCtx* dctx, const void* dict, size_t dictSize);
+
+ZSTDLIBv07_API size_t ZBUFFv07_decompressContinue(ZBUFFv07_DCtx* dctx,
+                                            void* dst, size_t* dstCapacityPtr,
+                                      const void* src, size_t* srcSizePtr);
+
+/*-***************************************************************************
+*  Streaming decompression howto
+*
+*  A ZBUFFv07_DCtx object is required to track streaming operations.
+*  Use ZBUFFv07_createDCtx() and ZBUFFv07_freeDCtx() to create/release resources.
+*  Use ZBUFFv07_decompressInit() to start a new decompression operation,
+*   or ZBUFFv07_decompressInitDictionary() if decompression requires a dictionary.
+*  Note that ZBUFFv07_DCtx objects can be re-init multiple times.
+*
+*  Use ZBUFFv07_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *dstCapacityPtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency),
+*            or 0 when a frame is completely decoded,
+*            or an error code, which can be tested using ZBUFFv07_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFFv07_recommendedDInSize() and ZBUFFv07_recommendedDOutSize()
+*  output : ZBUFFv07_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
+*  input  : ZBUFFv07_recommendedDInSize == 128KB + 3;
+*           just follow indications from ZBUFFv07_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* *******************************************************************************/
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+ZSTDLIBv07_API unsigned ZBUFFv07_isError(size_t errorCode);
+ZSTDLIBv07_API const char* ZBUFFv07_getErrorName(size_t errorCode);
+
+/** Functions below provide recommended buffer sizes for Compression or Decompression operations.
+*   These sizes are just hints, they tend to offer better latency */
+ZSTDLIBv07_API size_t ZBUFFv07_recommendedDInSize(void);
+ZSTDLIBv07_API size_t ZBUFFv07_recommendedDOutSize(void);
+
+
+/*-*************************************
+*  Constants
+***************************************/
+#define ZSTDv07_MAGICNUMBER            0xFD2FB527   /* v0.7 */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTDv07_H_235446 */
diff --git a/contrib/libzstd/include/zstd/zstd.h b/contrib/libzstd/include/zstd/zstd.h
index dd3f5df4c7f..58e9a5606db 100644
--- a/contrib/libzstd/include/zstd/zstd.h
+++ b/contrib/libzstd/include/zstd/zstd.h
@@ -1,4 +1,4 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
@@ -7,80 +7,126 @@
  * of patent rights can be found in the PATENTS file in the same directory.
  */
 
-#ifndef ZSTD_H_235446
-#define ZSTD_H_235446
-
 #if defined (__cplusplus)
 extern "C" {
 #endif
 
-/*======   Dependency   ======*/
+#ifndef ZSTD_H_235446
+#define ZSTD_H_235446
+
+/* ======   Dependency   ======*/
 #include <stddef.h>   /* size_t */
 
 
-/*======  Export for Windows  ======*/
-/*!
-*  ZSTD_DLL_EXPORT :
-*  Enable exporting of functions when building a Windows DLL
-*/
-#if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
-#  define ZSTDLIB_API __declspec(dllexport)
+/* =====   ZSTDLIB_API : control library symbols visibility   ===== */
+#ifndef ZSTDLIB_VISIBILITY
+#  if defined(__GNUC__) && (__GNUC__ >= 4)
+#    define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default")))
+#  else
+#    define ZSTDLIB_VISIBILITY
+#  endif
+#endif
+#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
+#  define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY
+#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
+#  define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
 #else
-#  define ZSTDLIB_API
+#  define ZSTDLIB_API ZSTDLIB_VISIBILITY
 #endif
 
 
-/*=======   Version   =======*/
+/*******************************************************************************************************
+  Introduction
+
+  zstd, short for Zstandard, is a fast lossless compression algorithm,
+  targeting real-time compression scenarios at zlib-level and better compression ratios.
+  The zstd compression library provides in-memory compression and decompression functions.
+  The library supports compression levels from 1 up to ZSTD_maxCLevel() which is currently 22.
+  Levels >= 20, labeled `--ultra`, should be used with caution, as they require more memory.
+  Compression can be done in:
+    - a single step (described as Simple API)
+    - a single step, reusing a context (described as Explicit memory management)
+    - unbounded multiple steps (described as Streaming compression)
+  The compression ratio achievable on small data can be highly improved using a dictionary in:
+    - a single step (described as Simple dictionary API)
+    - a single step, reusing a dictionary (described as Fast dictionary API)
+
+  Advanced experimental functions can be accessed using #define ZSTD_STATIC_LINKING_ONLY before including zstd.h.
+  Advanced experimental APIs shall never be used with a dynamic library.
+  They are not "stable", their definition may change in the future. Only static linking is allowed.
+*********************************************************************************************************/
+
+/*------   Version   ------*/
 #define ZSTD_VERSION_MAJOR    1
-#define ZSTD_VERSION_MINOR    1
+#define ZSTD_VERSION_MINOR    3
 #define ZSTD_VERSION_RELEASE  0
 
+#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+ZSTDLIB_API unsigned ZSTD_versionNumber(void);   /**< useful to check dll version */
+
 #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
 #define ZSTD_QUOTE(str) #str
 #define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
 #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
-
-#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
-ZSTDLIB_API unsigned ZSTD_versionNumber (void);
+ZSTDLIB_API const char* ZSTD_versionString(void);   /* v1.3.0 */
 
 
-/* *************************************
+/***************************************
 *  Simple API
 ***************************************/
 /*! ZSTD_compress() :
-    Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
-    Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
-    @return : compressed size written into `dst` (<= `dstCapacity),
-              or an error code if it fails (which can be tested using ZSTD_isError()) */
+ *  Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
+ *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
+ *  @return : compressed size written into `dst` (<= `dstCapacity),
+ *            or an error code if it fails (which can be tested using ZSTD_isError()). */
 ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity,
                             const void* src, size_t srcSize,
                                   int compressionLevel);
 
 /*! ZSTD_decompress() :
-    `compressedSize` : must be the _exact_ size of a single compressed frame.
-    `dstCapacity` is an upper bound of originalSize.
-    If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
-    @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
-              or an errorCode if it fails (which can be tested using ZSTD_isError()) */
+ *  `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
+ *  `dstCapacity` is an upper bound of originalSize to regenerate.
+ *  If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
+ *  @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+ *            or an errorCode if it fails (which can be tested using ZSTD_isError()). */
 ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity,
                               const void* src, size_t compressedSize);
 
+/*! ZSTD_getFrameContentSize() : v1.3.0
+ *  `src` should point to the start of a ZSTD encoded frame.
+ *  `srcSize` must be at least as large as the frame header.
+ *            hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
+ *  @return : - decompressed size of the frame in `src`, if known
+ *            - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+ *            - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small)
+ *   note 1 : a 0 return value means the frame is valid but "empty".
+ *   note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode.
+ *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *            Optionally, application can rely on some implicit limit,
+ *            as ZSTD_decompress() only needs an upper bound of decompressed size.
+ *            (For example, data could be necessarily cut into blocks <= 16 KB).
+ *   note 3 : decompressed size is always present when compression is done with ZSTD_compress()
+ *   note 4 : decompressed size can be very large (64-bits value),
+ *            potentially larger than what local system can handle as a single memory segment.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ *            Always ensure return value fits within application's authorized limits.
+ *            Each application can set its own limits.
+ *   note 6 : This function replaces ZSTD_getDecompressedSize() */
+#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
+#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
+ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
+
 /*! ZSTD_getDecompressedSize() :
-*   'src' is the start of a zstd compressed frame.
-*   @return : content size to be decompressed, as a 64-bits value _if known_, 0 otherwise.
-*    note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
-*             When `return==0`, data to decompress could be any size.
-*             In which case, it's necessary to use streaming mode to decompress data.
-*             Optionally, application can still use ZSTD_decompress() while relying on implied limits.
-*             (For example, data may be necessarily cut into blocks <= 16 KB).
-*    note 2 : decompressed size is always present when compression is done with ZSTD_compress()
-*    note 3 : decompressed size can be very large (64-bits value),
-*             potentially larger than what local system can handle as a single memory segment.
-*             In which case, it's necessary to use streaming mode to decompress data.
-*    note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
-*             Always ensure result fits within application's authorized limits.
-*             Each application can set its own limits.
-*    note 5 : when `return==0`, if precise failure cause is needed, use ZSTD_getFrameParams() to know more. */
+ *  NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize().
+ *  Both functions work the same way,
+ *  but ZSTD_getDecompressedSize() blends
+ *  "empty", "unknown" and "error" results in the same return value (0),
+ *  while ZSTD_getFrameContentSize() distinguishes them.
+ *
+ *  'src' is the start of a zstd compressed frame.
+ *  @return : content size to be decompressed, as a 64-bits value _if known and not empty_, 0 otherwise. */
 ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
 
 
@@ -91,34 +137,49 @@ ZSTDLIB_API unsigned    ZSTD_isError(size_t code);          /*!< tells if a `siz
 ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */
 
 
-/*-*************************************
+/***************************************
 *  Explicit memory management
 ***************************************/
-/** Compression context */
+/*= Compression context
+ *  When compressing many times,
+ *  it is recommended to allocate a context just once, and re-use it for each successive compression operation.
+ *  This will make workload friendlier for system's memory.
+ *  Use one context per thread for parallel execution in multi-threaded environments. */
 typedef struct ZSTD_CCtx_s ZSTD_CCtx;
 ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
 ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
 
-/** ZSTD_compressCCtx() :
-    Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()) */
-ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel);
+/*! ZSTD_compressCCtx() :
+ *  Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()). */
+ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx,
+                                     void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                                     int compressionLevel);
 
-/** Decompression context */
+/*= Decompression context
+ *  When decompressing many times,
+ *  it is recommended to allocate a context only once,
+ *  and re-use it for each successive compression operation.
+ *  This will make workload friendlier for system's memory.
+ *  Use one context per thread for parallel execution. */
 typedef struct ZSTD_DCtx_s ZSTD_DCtx;
 ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
 ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
 
-/** ZSTD_decompressDCtx() :
-*   Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()) */
-ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+/*! ZSTD_decompressDCtx() :
+ *  Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()) */
+ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx,
+                                       void* dst, size_t dstCapacity,
+                                 const void* src, size_t srcSize);
 
 
-/*-************************
+/**************************
 *  Simple dictionary API
 ***************************/
 /*! ZSTD_compress_usingDict() :
-*   Compression using a predefined Dictionary (see dictBuilder/zdict.h).
-*   Note : This function load the dictionary, resulting in significant startup delay. */
+ *  Compression using a predefined Dictionary (see dictBuilder/zdict.h).
+ *  Note : This function loads the dictionary, resulting in significant startup delay.
+ *  Note : When `dict == NULL || dictSize < 8` no dictionary is used. */
 ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
                                            void* dst, size_t dstCapacity,
                                      const void* src, size_t srcSize,
@@ -126,51 +187,65 @@ ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
                                            int compressionLevel);
 
 /*! ZSTD_decompress_usingDict() :
-*   Decompression using a predefined Dictionary (see dictBuilder/zdict.h).
-*   Dictionary must be identical to the one used during compression.
-*   Note : This function load the dictionary, resulting in significant startup delay */
+ *  Decompression using a predefined Dictionary (see dictBuilder/zdict.h).
+ *  Dictionary must be identical to the one used during compression.
+ *  Note : This function loads the dictionary, resulting in significant startup delay.
+ *  Note : When `dict == NULL || dictSize < 8` no dictionary is used. */
 ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
                                              void* dst, size_t dstCapacity,
                                        const void* src, size_t srcSize,
                                        const void* dict,size_t dictSize);
 
 
-/*-**************************
-*  Fast Dictionary API
-****************************/
-/*! ZSTD_createCDict() :
-*   Create a digested dictionary, ready to start compression operation without startup delay.
-*   `dict` can be released after ZSTD_CDict creation */
+/**********************************
+ *  Bulk processing dictionary API
+ *********************************/
 typedef struct ZSTD_CDict_s ZSTD_CDict;
-ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel);
+
+/*! ZSTD_createCDict() :
+ *  When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
+ *  ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
+ *  ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ *  `dictBuffer` can be released after ZSTD_CDict creation, since its content is copied within CDict */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
+                                         int compressionLevel);
+
+/*! ZSTD_freeCDict() :
+ *  Function frees memory allocated by ZSTD_createCDict(). */
 ZSTDLIB_API size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
 
 /*! ZSTD_compress_usingCDict() :
-*   Compression using a digested Dictionary.
-*   Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
-*   Note that compression level is decided during dictionary creation */
+ *  Compression using a digested Dictionary.
+ *  Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
+ *  Note that compression level is decided during dictionary creation.
+ *  Frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */
 ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
                                             void* dst, size_t dstCapacity,
                                       const void* src, size_t srcSize,
                                       const ZSTD_CDict* cdict);
 
-/*! ZSTD_createDDict() :
-*   Create a digested dictionary, ready to start decompression operation without startup delay.
-*   `dict` can be released after creation */
+
 typedef struct ZSTD_DDict_s ZSTD_DDict;
-ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize);
+
+/*! ZSTD_createDDict() :
+ *  Create a digested dictionary, ready to start decompression operation without startup delay.
+ *  dictBuffer can be released after DDict creation, as its content is copied inside DDict */
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
+
+/*! ZSTD_freeDDict() :
+ *  Function frees memory allocated with ZSTD_createDDict() */
 ZSTDLIB_API size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
 
 /*! ZSTD_decompress_usingDDict() :
-*   Decompression using a digested Dictionary
-*   Faster startup than ZSTD_decompress_usingDict(), recommended when same dictionary is used multiple times. */
+ *  Decompression using a digested Dictionary.
+ *  Faster startup than ZSTD_decompress_usingDict(), recommended when same dictionary is used multiple times. */
 ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
                                               void* dst, size_t dstCapacity,
                                         const void* src, size_t srcSize,
                                         const ZSTD_DDict* ddict);
 
 
-/*-**************************
+/****************************
 *  Streaming
 ****************************/
 
@@ -187,61 +262,67 @@ typedef struct ZSTD_outBuffer_s {
 } ZSTD_outBuffer;
 
 
-/*======   streaming compression   ======*/
 
 /*-***********************************************************************
-*  Streaming compression - howto
+*  Streaming compression - HowTo
 *
 *  A ZSTD_CStream object is required to track streaming operation.
 *  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
 *  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
+*  It is recommended to re-use ZSTD_CStream in situations where many streaming operations will be achieved consecutively,
+*  since it will play nicer with system's memory, by re-using already allocated memory.
+*  Use one separate ZSTD_CStream per thread for parallel execution.
 *
-*  Start by initializing ZSTD_CStream.
+*  Start a new compression by initializing ZSTD_CStream.
 *  Use ZSTD_initCStream() to start a new compression operation.
-*  Use ZSTD_initCStream_usingDict() for a compression which requires a dictionary.
+*  Use ZSTD_initCStream_usingDict() or ZSTD_initCStream_usingCDict() for a compression which requires a dictionary (experimental section)
 *
 *  Use ZSTD_compressStream() repetitively to consume input stream.
 *  The function will automatically update both `pos` fields.
 *  Note that it may not consume the entire input, in which case `pos < size`,
 *  and it's up to the caller to present again remaining data.
 *  @return : a size hint, preferred nb of bytes to use as input for next function call
-*           (it's just a hint, to help latency a little, any other value will work fine)
-*           (note : the size hint is guaranteed to be <= ZSTD_CStreamInSize() )
 *            or an error code, which can be tested using ZSTD_isError().
+*            Note 1 : it's just a hint, to help latency a little, any other value will work fine.
+*            Note 2 : size hint is guaranteed to be <= ZSTD_CStreamInSize()
 *
-*  At any moment, it's possible to flush whatever data remains within buffer, using ZSTD_flushStream().
+*  At any moment, it's possible to flush whatever data remains within internal buffer, using ZSTD_flushStream().
 *  `output->pos` will be updated.
-*  Note some content might still be left within internal buffer if `output->size` is too small.
+*  Note that some content might still be left within internal buffer if `output->size` is too small.
 *  @return : nb of bytes still present within internal buffer (0 if it's empty)
 *            or an error code, which can be tested using ZSTD_isError().
 *
 *  ZSTD_endStream() instructs to finish a frame.
 *  It will perform a flush and write frame epilogue.
 *  The epilogue is required for decoders to consider a frame completed.
-*  Similar to ZSTD_flushStream(), it may not be able to flush the full content if `output->size` is too small.
+*  ZSTD_endStream() may not be able to flush full data if `output->size` is too small.
 *  In which case, call again ZSTD_endStream() to complete the flush.
-*  @return : nb of bytes still present within internal buffer (0 if it's empty)
+*  @return : 0 if frame fully completed and fully flushed,
+             or >0 if some data is still present within internal buffer
+                  (value is minimum size estimation for remaining data to flush, but it could be more)
 *            or an error code, which can be tested using ZSTD_isError().
 *
 * *******************************************************************/
 
-typedef struct ZSTD_CStream_s ZSTD_CStream;
+typedef ZSTD_CCtx ZSTD_CStream;  /**< CCtx and CStream are now effectively same object (>= v1.3.0) */
+                                 /* Continue to distinguish them for compatibility with versions <= v1.2.0 */
+/*===== ZSTD_CStream management functions =====*/
 ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void);
 ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
 
-ZSTDLIB_API size_t ZSTD_CStreamInSize(void);    /**< recommended size for input buffer */
-ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */
-
+/*===== Streaming compression functions =====*/
 ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
 ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
 ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
 ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
 
+ZSTDLIB_API size_t ZSTD_CStreamInSize(void);    /**< recommended size for input buffer */
+ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */
+
 
-/*======   decompression   ======*/
 
 /*-***************************************************************************
-*  Streaming decompression howto
+*  Streaming decompression - HowTo
 *
 *  A ZSTD_DStream object is required to track streaming operations.
 *  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
@@ -258,38 +339,46 @@ ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
 *  If `output.pos < output.size`, decoder has flushed everything it could.
 *  @return : 0 when a frame is completely decoded and fully flushed,
 *            an error code, which can be tested using ZSTD_isError(),
-*            any other value > 0, which means there is still some work to do to complete the frame.
-*            The return value is a suggested next input size (just an hint, to help latency).
+*            any other value > 0, which means there is still some decoding to do to complete current frame.
+*            The return value is a suggested next input size (a hint to improve latency) that will never load more than the current frame.
 * *******************************************************************************/
 
-typedef struct ZSTD_DStream_s ZSTD_DStream;
+typedef ZSTD_DCtx ZSTD_DStream;  /**< DCtx and DStream are now effectively same object (>= v1.3.0) */
+                                 /* Continue to distinguish them for compatibility with versions <= v1.2.0 */
+/*===== ZSTD_DStream management functions =====*/
 ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void);
 ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);
 
+/*===== Streaming decompression functions =====*/
+ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
+ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+
 ZSTDLIB_API size_t ZSTD_DStreamInSize(void);    /*!< recommended size for input buffer */
 ZSTDLIB_API size_t ZSTD_DStreamOutSize(void);   /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */
 
-ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
-ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+#endif  /* ZSTD_H_235446 */
 
 
 
-#ifdef ZSTD_STATIC_LINKING_ONLY
-
-/* ====================================================================================
+/****************************************************************************************
+ * START OF ADVANCED AND EXPERIMENTAL FUNCTIONS
  * The definitions in this section are considered experimental.
- * They should never be used with a dynamic library, as they may change in the future.
- * They are provided for advanced usages.
+ * They should never be used with a dynamic library, as prototypes may change in the future.
+ * They are provided for advanced scenarios.
  * Use them only in association with static linking.
- * ==================================================================================== */
+ * ***************************************************************************************/
 
-/*--- Constants ---*/
-#define ZSTD_MAGICNUMBER            0xFD2FB528   /* v0.8 */
+#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
+#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
+
+/* --- Constants ---*/
+#define ZSTD_MAGICNUMBER            0xFD2FB528   /* >= v0.8.0 */
 #define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50U
+#define ZSTD_MAGIC_DICTIONARY       0xEC30A437   /* v0.7+ */
 
-#define ZSTD_WINDOWLOG_MAX_32  25
+#define ZSTD_WINDOWLOG_MAX_32  27
 #define ZSTD_WINDOWLOG_MAX_64  27
-#define ZSTD_WINDOWLOG_MAX    ((U32)(MEM_32bits() ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
+#define ZSTD_WINDOWLOG_MAX    ((unsigned)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
 #define ZSTD_WINDOWLOG_MIN     10
 #define ZSTD_HASHLOG_MAX       ZSTD_WINDOWLOG_MAX
 #define ZSTD_HASHLOG_MIN        6
@@ -304,14 +393,16 @@ ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* outp
 #define ZSTD_TARGETLENGTH_MAX 999
 
 #define ZSTD_FRAMEHEADERSIZE_MAX 18    /* for static allocation */
-static const size_t ZSTD_frameHeaderSize_prefix = 5;
-static const size_t ZSTD_frameHeaderSize_min = 6;
+#define ZSTD_FRAMEHEADERSIZE_MIN  6
+static const size_t ZSTD_frameHeaderSize_prefix = 5;  /* minimum input size to know frame header size */
 static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX;
+static const size_t ZSTD_frameHeaderSize_min = ZSTD_FRAMEHEADERSIZE_MIN;
 static const size_t ZSTD_skippableHeaderSize = 8;  /* magic number + skippable frame length */
 
 
-/*--- Types ---*/
-typedef enum { ZSTD_fast, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt } ZSTD_strategy;   /* from faster to stronger */
+/*--- Advanced types ---*/
+typedef enum { ZSTD_fast=1, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2,
+               ZSTD_btlazy2, ZSTD_btopt, ZSTD_btultra } ZSTD_strategy;   /* from faster to stronger */
 
 typedef struct {
     unsigned windowLog;      /**< largest match distance : larger == more compression, more memory needed during decompression */
@@ -324,9 +415,9 @@ typedef struct {
 } ZSTD_compressionParameters;
 
 typedef struct {
-    unsigned contentSizeFlag; /**< 1: content size will be in frame header (if known). */
-    unsigned checksumFlag;    /**< 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */
-    unsigned noDictIDFlag;    /**< 1: no dict ID will be saved into frame header (if dictionary compression) */
+    unsigned contentSizeFlag; /**< 1: content size will be in frame header (when known) */
+    unsigned checksumFlag;    /**< 1: generate a 32-bits checksum at end of frame, for error detection */
+    unsigned noDictIDFlag;    /**< 1: no dictID will be saved into frame header (if dictionary compression) */
 } ZSTD_frameParameters;
 
 typedef struct {
@@ -334,125 +425,350 @@ typedef struct {
     ZSTD_frameParameters fParams;
 } ZSTD_parameters;
 
-/* custom memory allocation functions */
+typedef struct {
+    unsigned long long frameContentSize;
+    size_t windowSize;
+    unsigned dictID;
+    unsigned checksumFlag;
+} ZSTD_frameHeader;
+
+/*= Custom memory allocation functions */
 typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
 typedef void  (*ZSTD_freeFunction) (void* opaque, void* address);
 typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
+/* use this constant to defer to stdlib's functions */
+static const ZSTD_customMem ZSTD_defaultCMem = { NULL, NULL, NULL };
 
 
-/*-*************************************
+/***************************************
+*  Frame size functions
+***************************************/
+
+/*! ZSTD_findFrameCompressedSize() :
+ *  `src` should point to the start of a ZSTD encoded frame or skippable frame
+ *  `srcSize` must be at least as large as the frame
+ *  @return : the compressed size of the first frame starting at `src`,
+ *            suitable to pass to `ZSTD_decompress` or similar,
+ *            or an error code if input is invalid */
+ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTD_findDecompressedSize() :
+ *  `src` should point the start of a series of ZSTD encoded and/or skippable frames
+ *  `srcSize` must be the _exact_ size of this series
+ *       (i.e. there should be a frame boundary exactly at `srcSize` bytes after `src`)
+ *  @return : - decompressed size of all data in all successive frames
+ *            - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN
+ *            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
+ *
+ *   note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
+ *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 2 : decompressed size is always present when compression is done with ZSTD_compress()
+ *   note 3 : decompressed size can be very large (64-bits value),
+ *            potentially larger than what local system can handle as a single memory segment.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ *            Always ensure result fits within application's authorized limits.
+ *            Each application can set its own limits.
+ *   note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to
+ *            read each contained frame header.  This is fast as most of the data is skipped,
+ *            however it does mean that all frame data must be present and valid. */
+ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTD_frameHeaderSize() :
+*   `src` should point to the start of a ZSTD frame
+*   `srcSize` must be >= ZSTD_frameHeaderSize_prefix.
+*   @return : size of the Frame Header */
+ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
+
+
+/***************************************
+*  Context memory usage
+***************************************/
+
+/*! ZSTD_sizeof_*() :
+ *  These functions give the current memory usage of selected object.
+ *  Object memory usage can evolve if it's re-used multiple times. */
+ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
+ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
+ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
+ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
+ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
+
+/*! ZSTD_estimate*() :
+ *  These functions make it possible to estimate memory usage
+ *  of a future {D,C}Ctx, before its creation.
+ *  ZSTD_estimateCCtxSize() will provide a budget large enough for any compression level up to selected one.
+ *  It will also consider src size to be arbitrarily "large", which is worst case.
+ *  If srcSize is known to always be small, ZSTD_estimateCCtxSize_advanced() can provide a tighter estimation.
+ *  ZSTD_estimateCCtxSize_advanced() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
+ *  Note : CCtx estimation is only correct for single-threaded compression */
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize_advanced(ZSTD_compressionParameters cParams);
+ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void);
+
+/*! ZSTD_estimate?StreamSize() :
+ *  ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one.
+ *  It will also consider src size to be arbitrarily "large", which is worst case.
+ *  If srcSize is known to always be small, ZSTD_estimateCStreamSize_advanced() can provide a tighter estimation.
+ *  ZSTD_estimateCStreamSize_advanced() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
+ *  Note : CStream estimation is only correct for single-threaded compression.
+ *  ZSTD_DStream memory budget depends on window Size.
+ *  This information can be passed manually, using ZSTD_estimateDStreamSize,
+ *  or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
+ *  Note : if streaming is init with function ZSTD_init?Stream_usingDict(),
+ *         an internal ?Dict will be created, which additional size is not estimated here.
+ *         In this case, get total size by adding ZSTD_estimate?DictSize */
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize_advanced(ZSTD_compressionParameters cParams);
+ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize);
+ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
+
+/*! ZSTD_estimate?DictSize() :
+ *  ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict().
+ *  ZSTD_estimateCStreamSize_advanced() makes it possible to control precisely compression parameters, like ZSTD_createCDict_advanced().
+ *  Note : dictionary created "byReference" are smaller */
+ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, unsigned byReference);
+ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, unsigned byReference);
+
+
+/***************************************
 *  Advanced compression functions
 ***************************************/
-/*! ZSTD_estimateCCtxSize() :
- *  Gives the amount of memory allocated for a ZSTD_CCtx given a set of compression parameters.
- *  `frameContentSize` is an optional parameter, provide `0` if unknown */
-ZSTDLIB_API size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams);
-
 /*! ZSTD_createCCtx_advanced() :
  *  Create a ZSTD compression context using external alloc and free functions */
 ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
 
-/*! ZSTD_sizeofCCtx() :
- *  Gives the amount of memory used by a given ZSTD_CCtx */
-ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
+/*! ZSTD_initStaticCCtx() : initialize a fixed-size zstd compression context
+ *  workspace: The memory area to emplace the context into.
+ *             Provided pointer must 8-bytes aligned.
+ *             It must outlive context usage.
+ *  workspaceSize: Use ZSTD_estimateCCtxSize() or ZSTD_estimateCStreamSize()
+ *                 to determine how large workspace must be to support scenario.
+ * @return : pointer to ZSTD_CCtx*, or NULL if error (size too small)
+ *  Note : zstd will never resize nor malloc() when using a static cctx.
+ *         If it needs more memory than available, it will simply error out.
+ *  Note 2 : there is no corresponding "free" function.
+ *           Since workspace was allocated externally, it must be freed externally too.
+ *  Limitation 1 : currently not compatible with internal CDict creation, such as
+ *                 ZSTD_CCtx_loadDictionary() or ZSTD_initCStream_usingDict().
+ *  Limitation 2 : currently not compatible with multi-threading
+ */
+ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
 
+
+/* !!! To be deprecated !!! */
+typedef enum {
+    ZSTD_p_forceWindow,   /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0) */
+    ZSTD_p_forceRawDict   /* Force loading dictionary in "content-only" mode (no header analysis) */
+} ZSTD_CCtxParameter;
+/*! ZSTD_setCCtxParameter() :
+ *  Set advanced parameters, selected through enum ZSTD_CCtxParameter
+ *  @result : 0, or an error code (which can be tested with ZSTD_isError()) */
+ZSTDLIB_API size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value);
+
+
+/*! ZSTD_createCDict_byReference() :
+ *  Create a digested dictionary for compression
+ *  Dictionary content is simply referenced, and therefore stays in dictBuffer.
+ *  It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
+
+
+typedef enum { ZSTD_dm_auto=0,        /* dictionary is "full" if it starts with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */
+               ZSTD_dm_rawContent,    /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */
+               ZSTD_dm_fullDict       /* refuses to load a dictionary if it does not respect Zstandard's specification */
+} ZSTD_dictMode_e;
 /*! ZSTD_createCDict_advanced() :
  *  Create a ZSTD_CDict using external alloc and free, and customized compression parameters */
 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
-                                                  ZSTD_parameters params, ZSTD_customMem customMem);
+                                                  unsigned byReference, ZSTD_dictMode_e dictMode,
+                                                  ZSTD_compressionParameters cParams,
+                                                  ZSTD_customMem customMem);
 
-/*! ZSTD_sizeof_CDict() :
- *  Gives the amount of memory used by a given ZSTD_sizeof_CDict */
-ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
-
-/*! ZSTD_getParams() :
-*   same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of a `ZSTD_compressionParameters`.
-*   All fields of `ZSTD_frameParameters` are set to default (0) */
-ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, size_t dictSize);
+/*! ZSTD_initStaticCDict_advanced() :
+ *  Generate a digested dictionary in provided memory area.
+ *  workspace: The memory area to emplace the dictionary into.
+ *             Provided pointer must 8-bytes aligned.
+ *             It must outlive dictionary usage.
+ *  workspaceSize: Use ZSTD_estimateCDictSize()
+ *                 to determine how large workspace must be.
+ *  cParams : use ZSTD_getCParams() to transform a compression level
+ *            into its relevants cParams.
+ * @return : pointer to ZSTD_CDict*, or NULL if error (size too small)
+ *  Note : there is no corresponding "free" function.
+ *         Since workspace was allocated externally, it must be freed externally.
+ */
+ZSTDLIB_API ZSTD_CDict* ZSTD_initStaticCDict(
+                            void* workspace, size_t workspaceSize,
+                      const void* dict, size_t dictSize,
+                            unsigned byReference, ZSTD_dictMode_e dictMode,
+                            ZSTD_compressionParameters cParams);
 
 /*! ZSTD_getCParams() :
-*   @return ZSTD_compressionParameters structure for a selected compression level and srcSize.
-*   `srcSize` value is optional, select 0 if not known */
-ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSize, size_t dictSize);
+*   @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
+*   `estimatedSrcSize` value is optional, select 0 if not known */
+ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
+
+/*! ZSTD_getParams() :
+*   same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`.
+*   All fields of `ZSTD_frameParameters` are set to default (0) */
+ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
 
 /*! ZSTD_checkCParams() :
 *   Ensure param values remain within authorized range */
 ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
 
 /*! ZSTD_adjustCParams() :
-*   optimize params for a given `srcSize` and `dictSize`.
-*   both values are optional, select `0` if unknown. */
+ *  optimize params for a given `srcSize` and `dictSize`.
+ *  both values are optional, select `0` if unknown. */
 ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
 
 /*! ZSTD_compress_advanced() :
-*   Same as ZSTD_compress_usingDict(), with fine-tune control of each compression parameter */
-ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
-                                           void* dst, size_t dstCapacity,
-                                     const void* src, size_t srcSize,
-                                     const void* dict,size_t dictSize,
-                                           ZSTD_parameters params);
+*   Same as ZSTD_compress_usingDict(), with fine-tune control over each compression parameter */
+ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
+                                  void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                            const void* dict,size_t dictSize,
+                                  ZSTD_parameters params);
+
+/*! ZSTD_compress_usingCDict_advanced() :
+*   Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters */
+ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+                                  void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                            const ZSTD_CDict* cdict, ZSTD_frameParameters fParams);
 
 
-/*--- Advanced Decompression functions ---*/
+/*--- Advanced decompression functions ---*/
 
-/*! ZSTD_estimateDCtxSize() :
- *  Gives the potential amount of memory allocated to create a ZSTD_DCtx */
-ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void);
+/*! ZSTD_isFrame() :
+ *  Tells if the content of `buffer` starts with a valid Frame Identifier.
+ *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
+ *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
+ *  Note 3 : Skippable Frame Identifiers are considered valid. */
+ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size);
 
 /*! ZSTD_createDCtx_advanced() :
  *  Create a ZSTD decompression context using external alloc and free functions */
 ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
 
-/*! ZSTD_sizeof_DCtx() :
- *  Gives the amount of memory used by a given ZSTD_DCtx */
-ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
+/*! ZSTD_initStaticDCtx() : initialize a fixed-size zstd decompression context
+ *  workspace: The memory area to emplace the context into.
+ *             Provided pointer must 8-bytes aligned.
+ *             It must outlive context usage.
+ *  workspaceSize: Use ZSTD_estimateDCtxSize() or ZSTD_estimateDStreamSize()
+ *                 to determine how large workspace must be to support scenario.
+ * @return : pointer to ZSTD_DCtx*, or NULL if error (size too small)
+ *  Note : zstd will never resize nor malloc() when using a static dctx.
+ *         If it needs more memory than available, it will simply error out.
+ *  Note 2 : static dctx is incompatible with legacy support
+ *  Note 3 : there is no corresponding "free" function.
+ *           Since workspace was allocated externally, it must be freed externally.
+ *  Limitation : currently not compatible with internal DDict creation,
+ *               such as ZSTD_initDStream_usingDict().
+ */
+ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize);
 
-/*! ZSTD_sizeof_DDict() :
- *  Gives the amount of memory used by a given ZSTD_DDict */
-ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
+/*! ZSTD_createDDict_byReference() :
+ *  Create a digested dictionary, ready to start decompression operation without startup delay.
+ *  Dictionary content is referenced, and therefore stays in dictBuffer.
+ *  It is important that dictBuffer outlives DDict,
+ *  it must remain read accessible throughout the lifetime of DDict */
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
+
+/*! ZSTD_createDDict_advanced() :
+ *  Create a ZSTD_DDict using external alloc and free, optionally by reference */
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
+                                                  unsigned byReference, ZSTD_customMem customMem);
+
+/*! ZSTD_initStaticDDict() :
+ *  Generate a digested dictionary in provided memory area.
+ *  workspace: The memory area to emplace the dictionary into.
+ *             Provided pointer must 8-bytes aligned.
+ *             It must outlive dictionary usage.
+ *  workspaceSize: Use ZSTD_estimateDDictSize()
+ *                 to determine how large workspace must be.
+ * @return : pointer to ZSTD_DDict*, or NULL if error (size too small)
+ *  Note : there is no corresponding "free" function.
+ *         Since workspace was allocated externally, it must be freed externally.
+ */
+ZSTDLIB_API ZSTD_DDict* ZSTD_initStaticDDict(void* workspace, size_t workspaceSize,
+                                             const void* dict, size_t dictSize,
+                                             unsigned byReference);
+
+/*! ZSTD_getDictID_fromDict() :
+ *  Provides the dictID stored within dictionary.
+ *  if @return == 0, the dictionary is not conformant with Zstandard specification.
+ *  It can still be loaded, but as a content-only dictionary. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
+
+/*! ZSTD_getDictID_fromDDict() :
+ *  Provides the dictID of the dictionary loaded into `ddict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
+
+/*! ZSTD_getDictID_fromFrame() :
+ *  Provides the dictID required to decompressed the frame stored within `src`.
+ *  If @return == 0, the dictID could not be decoded.
+ *  This could for one of the following reasons :
+ *  - The frame does not require a dictionary to be decoded (most common case).
+ *  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information.
+ *    Note : this use case also happens when using a non-conformant dictionary.
+ *  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
+ *  - This is not a Zstandard frame.
+ *  When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
 
 
-/* ******************************************************************
-*  Advanced Streaming functions
+/********************************************************************
+*  Advanced streaming functions
 ********************************************************************/
 
-/*======   compression   ======*/
-
+/*=====   Advanced Streaming compression functions  =====*/
 ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
-ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
+ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize);    /**< same as ZSTD_initStaticCCtx() */
+ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize);   /**< pledgedSrcSize must be correct, a size of 0 means unknown.  for a frame size of 0 use initCStream_advanced */
+ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< creates of an internal CDict (incompatible with static CCtx), except if dict == NULL or dictSize < 8, in which case no dict is used. */
 ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
-                                             ZSTD_parameters params, unsigned long long pledgedSrcSize);  /**< pledgedSrcSize is optional and can be zero == unknown */
-ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);  /**< re-use compression parameters from previous init; saves dictionary loading */
-ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
+                                             ZSTD_parameters params, unsigned long long pledgedSrcSize);  /**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */
+ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);  /**< note : cdict will just be referenced, and must outlive compression session */
+ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize);  /**< same as ZSTD_initCStream_usingCDict(), with control over frame parameters */
+
+/*! ZSTD_resetCStream() :
+ *  start a new compression job, using same parameters from previous job.
+ *  This is typically useful to skip dictionary loading stage, since it will re-use it in-place..
+ *  Note that zcs must be init at least once before using ZSTD_resetCStream().
+ *  pledgedSrcSize==0 means "srcSize unknown".
+ *  If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
+ *  @return : 0, or an error code (which can be tested using ZSTD_isError()) */
+ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
 
 
-/*======   decompression   ======*/
-
-typedef enum { ZSTDdsp_maxWindowSize } ZSTD_DStreamParameter_e;
-
+/*=====   Advanced Streaming decompression functions  =====*/
+typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e;
 ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
-ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize);    /**< same as ZSTD_initStaticDCtx() */
 ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue);
+ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */
+ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);  /**< note : ddict will just be referenced, and must outlive decompression session */
 ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);  /**< re-use decompression parameters from previous init; saves dictionary loading */
-ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
 
 
-/* ******************************************************************
+/*********************************************************************
 *  Buffer-less and synchronous inner streaming functions
-********************************************************************/
-/* This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
+*
+*  This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
 *  But it's also a complex one, with many restrictions (documented below).
-*  Prefer using normal streaming API for an easier experience */
+*  Prefer using normal streaming API for an easier experience
+********************************************************************* */
 
-ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
-ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
-ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize);
-ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize);
+/**
+  Buffer-less streaming compression (synchronous mode)
 
-ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-
-/*
   A ZSTD_CCtx object is required to track streaming operations.
   Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
   ZSTD_CCtx object can be re-used multiple times within successive compression operations.
@@ -475,53 +791,50 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapaci
     In which case, it will "discard" the relevant memory section from its history.
 
   Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
-  It's possible to use a NULL,0 src content, in which case, it will write a final empty block to end the frame,
-  Without last block mark, frames will be considered unfinished (broken) by decoders.
+  It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
+  Without last block mark, frames will be considered unfinished (corrupted) by decoders.
 
-  You can then reuse `ZSTD_CCtx` (ZSTD_compressBegin()) to compress some new frame.
+  `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new frame.
 */
 
-typedef struct {
-    unsigned long long frameContentSize;
-    unsigned windowSize;
-    unsigned dictID;
-    unsigned checksumFlag;
-} ZSTD_frameParams;
+/*=====   Buffer-less streaming compression functions  =====*/
+ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */
+ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */
+ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   /* compression parameters are already set within cdict. pledgedSrcSize=0 means null-size */
+ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**<  note: if pledgedSrcSize can be 0, indicating unknown size.  if it is non-zero, it must be accurate.  for 0 size frames, use compressBegin_advanced */
 
-ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input, see details below */
+ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 
-ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
-ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
-ZSTDLIB_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
 
-ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
-ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 
-typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
-ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
-
-/*
+/*-
   Buffer-less streaming decompression (synchronous mode)
 
   A ZSTD_DCtx object is required to track streaming operations.
   Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
   A ZSTD_DCtx object can be re-used multiple times.
 
-  First typical operation is to retrieve frame parameters, using ZSTD_getFrameParams().
-  It fills a ZSTD_frameParams structure which provide important information to correctly decode the frame,
-  such as the minimum rolling buffer size to allocate to decompress data (`windowSize`),
-  and the dictionary ID used.
+  First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader().
+  It fills a ZSTD_frameHeader structure with important information to correctly decode the frame,
+  such as minimum rolling buffer size to allocate to decompress data (`windowSize`),
+  and the dictionary ID in use.
   (Note : content size is optional, it may not be present. 0 means : content size unknown).
   Note that these values could be wrong, either because of data malformation, or because an attacker is spoofing deliberate false information.
   As a consequence, check that values remain within valid application range, especially `windowSize`, before allocation.
-  Each application can set its own limit, depending on local restrictions. For extended interoperability, it is recommended to support at least 8 MB.
-  Frame parameters are extracted from the beginning of the compressed frame.
-  Data fragment must be large enough to ensure successful decoding, typically `ZSTD_frameHeaderSize_max` bytes.
-  @result : 0 : successful decoding, the `ZSTD_frameParams` structure is correctly filled.
+  Each application can set its own limit, depending on local restrictions.
+  For extended interoperability, it is recommended to support windowSize of at least 8 MB.
+  Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough.
+  Data fragment must be large enough to ensure successful decoding.
+  `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough.
+  @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled.
            >0 : `srcSize` is too small, please provide at least @result bytes on next attempt.
            errorCode, which can be tested using ZSTD_isError().
 
-  Start decompression, with ZSTD_decompressBegin() or ZSTD_decompressBegin_usingDict().
+  Start decompression, with ZSTD_decompressBegin().
+  If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict().
   Alternatively, you can copy a prepared context, using ZSTD_copyDCtx().
 
   Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
@@ -553,15 +866,237 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
   b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
   c) Frame Content - any content (User Data) of length equal to Frame Size
   For skippable frames ZSTD_decompressContinue() always returns 0.
-  For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0 what means that a frame is skippable.
+  For skippable frames ZSTD_getFrameHeader() returns fparamsPtr->windowLog==0 what means that a frame is skippable.
+    Note : If fparamsPtr->frameContentSize==0, it is ambiguous: the frame might actually be a Zstd encoded frame with no content.
+           For purposes of decompression, it is valid in both cases to skip the frame using
+           ZSTD_findFrameCompressedSize to find its size in bytes.
   It also returns Frame Size as fparamsPtr->frameContentSize.
 */
 
+/*=====   Buffer-less streaming decompression functions  =====*/
+ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
+ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+ZSTDLIB_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
 
-/* **************************************
-*  Block functions
-****************************************/
-/*! Block functions produce and decode raw zstd blocks, without frame metadata.
+ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
+ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
+
+
+
+/*===   New advanced API (experimental, and compression only)  ===*/
+
+/* notes on API design :
+ *   In this proposal, parameters are pushed one by one into an existing CCtx,
+ *   and then applied on all subsequent compression jobs.
+ *   When no parameter is ever provided, CCtx is created with compression level ZSTD_CLEVEL_DEFAULT.
+ *
+ *   This API is intended to replace all others experimental API.
+ *   It can basically do all other use cases, and even new ones.
+ *   It stands a good chance to become "stable",
+ *   after a reasonable testing period.
+ */
+
+/* note on naming convention :
+ *   Initially, the API favored names like ZSTD_setCCtxParameter() .
+ *   In this proposal, convention is changed towards ZSTD_CCtx_setParameter() .
+ *   The main driver is that it identifies more clearly the target object type.
+ *   It feels clearer in light of potential variants :
+ *   ZSTD_CDict_setParameter() (rather than ZSTD_setCDictParameter())
+ *   ZSTD_DCtx_setParameter()  (rather than ZSTD_setDCtxParameter() )
+ *   Left variant feels easier to distinguish.
+ */
+
+/* note on enum design :
+ * All enum will be manually set to explicit values before reaching "stable API" status */
+
+typedef enum {
+    /* compression parameters */
+    ZSTD_p_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table
+                              * Default level is ZSTD_CLEVEL_DEFAULT==3.
+                              * Special: value 0 means "do not change cLevel". */
+    ZSTD_p_windowLog,        /* Maximum allowed back-reference distance, expressed as power of 2.
+                              * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
+                              * Special: value 0 means "do not change windowLog". */
+    ZSTD_p_hashLog,          /* Size of the probe table, as a power of 2.
+                              * Resulting table size is (1 << (hashLog+2)).
+                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
+                              * Larger tables improve compression ratio of strategies <= dFast,
+                              * and improve speed of strategies > dFast.
+                              * Special: value 0 means "do not change hashLog". */
+    ZSTD_p_chainLog,         /* Size of the full-search table, as a power of 2.
+                              * Resulting table size is (1 << (chainLog+2)).
+                              * Larger tables result in better and slower compression.
+                              * This parameter is useless when using "fast" strategy.
+                              * Special: value 0 means "do not change chainLog". */
+    ZSTD_p_searchLog,        /* Number of search attempts, as a power of 2.
+                              * More attempts result in better and slower compression.
+                              * This parameter is useless when using "fast" and "dFast" strategies.
+                              * Special: value 0 means "do not change searchLog". */
+    ZSTD_p_minMatch,         /* Minimum size of searched matches (note : repCode matches can be smaller).
+                              * Larger values make faster compression and decompression, but decrease ratio.
+                              * Must be clamped between ZSTD_SEARCHLENGTH_MIN and ZSTD_SEARCHLENGTH_MAX.
+                              * Note that currently, for all strategies < btopt, effective minimum is 4.
+                              * Note that currently, for all strategies > fast, effective maximum is 6.
+                              * Special: value 0 means "do not change minMatchLength". */
+    ZSTD_p_targetLength,     /* Only useful for strategies >= btopt.
+                              * Length of Match considered "good enough" to stop search.
+                              * Larger values make compression stronger and slower.
+                              * Special: value 0 means "do not change targetLength". */
+    ZSTD_p_compressionStrategy, /* See ZSTD_strategy enum definition.
+                              * Cast selected strategy as unsigned for ZSTD_CCtx_setParameter() compatibility.
+                              * The higher the value of selected strategy, the more complex it is,
+                              * resulting in stronger and slower compression.
+                              * Special: value 0 means "do not change strategy". */
+
+    /* frame parameters */
+    ZSTD_p_contentSizeFlag=200, /* Content size is written into frame header _whenever known_ (default:1) */
+    ZSTD_p_checksumFlag,     /* A 32-bits checksum of content is written at end of frame (default:0) */
+    ZSTD_p_dictIDFlag,       /* When applicable, dictID of dictionary is provided in frame header (default:1) */
+
+    /* dictionary parameters (must be set before ZSTD_CCtx_loadDictionary) */
+    ZSTD_p_dictMode=300,     /* Select how dictionary content must be interpreted. Value must be from type ZSTD_dictMode_e.
+                              * default : 0==auto : dictionary will be "full" if it respects specification, otherwise it will be "rawContent" */
+    ZSTD_p_refDictContent,   /* Dictionary content will be referenced, instead of copied (default:0==byCopy).
+                              * It requires that dictionary buffer outlives its users */
+
+    /* multi-threading parameters */
+    ZSTD_p_nbThreads=400,    /* Select how many threads a compression job can spawn (default:1)
+                              * More threads improve speed, but also increase memory usage.
+                              * Can only receive a value > 1 if ZSTD_MULTITHREAD is enabled.
+                              * Special: value 0 means "do not change nbThreads" */
+    ZSTD_p_jobSize,          /* Size of a compression job. Each compression job is completed in parallel.
+                              * 0 means default, which is dynamically determined based on compression parameters.
+                              * Job size must be a minimum of overlapSize, or 1 KB, whichever is largest
+                              * The minimum size is automatically and transparently enforced */
+    ZSTD_p_overlapSizeLog,   /* Size of previous input reloaded at the beginning of each job.
+                              * 0 => no overlap, 6(default) => use 1/8th of windowSize, >=9 => use full windowSize */
+
+    /* advanced parameters - may not remain available after API update */
+    ZSTD_p_forceMaxWindow=1100, /* Force back-reference distances to remain < windowSize,
+                              * even when referencing into Dictionary content (default:0) */
+
+} ZSTD_cParameter;
+
+
+/*! ZSTD_CCtx_setParameter() :
+ *  Set one compression parameter, selected by enum ZSTD_cParameter.
+ *  Note : when `value` is an enum, cast it to unsigned for proper type checking.
+ *  @result : 0, or an error code (which can be tested with ZSTD_isError()). */
+ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value);
+
+/*! ZSTD_CCtx_setPledgedSrcSize() :
+ *  Total input data size to be compressed as a single frame.
+ *  This value will be controlled at the end, and result in error if not respected.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : 0 means zero, empty.
+ *           In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN.
+ *           Note that ZSTD_CONTENTSIZE_UNKNOWN is default value for new compression jobs.
+ *  Note 2 : If all data is provided and consumed in a single round,
+ *           this value is overriden by srcSize instead. */
+ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
+
+/*! ZSTD_CCtx_loadDictionary() :
+ *  Create an internal CDict from dict buffer.
+ *  Decompression will have to use same buffer.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
+ *            meaning "return to no-dictionary mode".
+ *  Note 1 : `dict` content will be copied internally,
+ *           except if ZSTD_p_refDictContent is set before loading.
+ *  Note 2 : Loading a dictionary involves building tables, which are dependent on compression parameters.
+ *           For this reason, compression parameters cannot be changed anymore after loading a dictionary.
+ *           It's also a CPU-heavy operation, with non-negligible impact on latency.
+ *  Note 3 : Dictionary will be used for all future compression jobs.
+ *           To return to "no-dictionary" situation, load a NULL dictionary */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_CCtx_refCDict() :
+ *  Reference a prepared dictionary, to be used for all next compression jobs.
+ *  Note that compression parameters are enforced from within CDict,
+ *  and supercede any compression parameter previously set within CCtx.
+ *  The dictionary will remain valid for future compression jobs using same CCtx.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special : adding a NULL CDict means "return to no-dictionary mode".
+ *  Note 1 : Currently, only one dictionary can be managed.
+ *           Adding a new dictionary effectively "discards" any previous one.
+ *  Note 2 : CDict is just referenced, its lifetime must outlive CCtx.
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
+
+/*! ZSTD_CCtx_refPrefix() :
+ *  Reference a prefix (single-usage dictionary) for next compression job.
+ *  Decompression need same prefix to properly regenerate data.
+ *  Prefix is **only used once**. Tables are discarded at end of compression job.
+ *  Subsequent compression jobs will be done without prefix (if none is explicitly referenced).
+ *  If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_CDict instead.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special : Adding any prefix (including NULL) invalidates any previous prefix or dictionary
+ *  Note 1 : Prefix buffer is referenced. It must outlive compression job.
+ *  Note 2 : Referencing a prefix involves building tables, which are dependent on compression parameters.
+ *           It's a CPU-heavy operation, with non-negligible impact on latency.
+ *  Note 3 : it's possible to alter ZSTD_p_dictMode using ZSTD_CCtx_setParameter() */
+ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize);
+
+
+
+typedef enum {
+    ZSTD_e_continue=0, /* collect more data, encoder transparently decides when to output result, for optimal conditions */
+    ZSTD_e_flush,      /* flush any data provided so far - frame will continue, future data can still reference previous data for better compression */
+    ZSTD_e_end         /* flush any remaining data and ends current frame. Any future compression starts a new frame. */
+} ZSTD_EndDirective;
+
+/*! ZSTD_compress_generic() :
+ *  Behave about the same as ZSTD_compressStream. To note :
+ *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_setParameter()
+ *  - Compression parameters cannot be changed once compression is started.
+ *  - *dstPos must be <= dstCapacity, *srcPos must be <= srcSize
+ *  - *dspPos and *srcPos will be updated. They are guaranteed to remain below their respective limit.
+ *  - @return provides the minimum amount of data still to flush from internal buffers
+ *            or an error code, which can be tested using ZSTD_isError().
+ *            if @return != 0, flush is not fully completed, there is some data left within internal buffers.
+ *  - after a ZSTD_e_end directive, if internal buffer is not fully flushed,
+ *            only ZSTD_e_end or ZSTD_e_flush operations are allowed.
+ *            It is necessary to fully flush internal buffers
+ *            before starting a new compression job, or changing compression parameters.
+ */
+ZSTDLIB_API size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
+                                          ZSTD_outBuffer* output,
+                                          ZSTD_inBuffer* input,
+                                          ZSTD_EndDirective endOp);
+
+/*! ZSTD_CCtx_reset() :
+ *  Return a CCtx to clean state.
+ *  Useful after an error, or to interrupt an ongoing compression job and start a new one.
+ *  Any internal data not yet flushed is cancelled.
+ *  Dictionary (if any) is dropped.
+ *  It's possible to modify compression parameters after a reset.
+ */
+ZSTDLIB_API void ZSTD_CCtx_reset(ZSTD_CCtx* cctx);   /* Not ready yet ! */
+
+
+/*! ZSTD_compress_generic_simpleArgs() :
+ *  Same as ZSTD_compress_generic(),
+ *  but using only integral types as arguments.
+ *  Argument list is larger and less expressive than ZSTD_{in,out}Buffer,
+ *  but can be helpful for binders from dynamic languages
+ *  which have troubles handling structures containing memory pointers.
+ */
+size_t ZSTD_compress_generic_simpleArgs (
+                            ZSTD_CCtx* cctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos,
+                            ZSTD_EndDirective endOp);
+
+
+
+/**
+    Block functions
+
+    Block functions produce and decode raw zstd blocks, without frame metadata.
     Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
     User will have to take in charge required information to regenerate data, such as compressed and content sizes.
 
@@ -569,32 +1104,33 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
     - Compressing and decompressing require a context structure
       + Use ZSTD_createCCtx() and ZSTD_createDCtx()
     - It is necessary to init context before starting
-      + compression : ZSTD_compressBegin()
-      + decompression : ZSTD_decompressBegin()
-      + variants _usingDict() are also allowed
-      + copyCCtx() and copyDCtx() work too
-    - Block size is limited, it must be <= ZSTD_getBlockSizeMax()
-      + If you need to compress more, cut data into multiple blocks
-      + Consider using the regular ZSTD_compress() instead, as frame metadata costs become negligible when source size is large.
+      + compression : any ZSTD_compressBegin*() variant, including with dictionary
+      + decompression : any ZSTD_decompressBegin*() variant, including with dictionary
+      + copyCCtx() and copyDCtx() can be used too
+    - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX
+      + If input is larger than a block size, it's necessary to split input data into multiple blocks
+      + For inputs larger than a single block size, consider using the regular ZSTD_compress() instead.
+        Frame metadata is not that costly, and quickly becomes negligible as source size grows larger.
     - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero.
       In which case, nothing is produced into `dst`.
       + User must test for such outcome and deal directly with uncompressed data
       + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!!
-      + In case of multiple successive blocks, decoder must be informed of uncompressed block existence to follow proper history.
-        Use ZSTD_insertBlock() in such a case.
+      + In case of multiple successive blocks, should some of them be uncompressed,
+        decoder must be informed of their existence in order to follow proper history.
+        Use ZSTD_insertBlock() for such a case.
 */
 
-#define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024)   /* define, for static allocation */
-ZSTDLIB_API size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx);
+#define ZSTD_BLOCKSIZELOG_MAX 17
+#define ZSTD_BLOCKSIZE_MAX   (1<<ZSTD_BLOCKSIZELOG_MAX)   /* define, for static allocation */
+/*=====   Raw zstd block functions  =====*/
+ZSTDLIB_API size_t ZSTD_getBlockSize   (const ZSTD_CCtx* cctx);
 ZSTDLIB_API size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /**< insert block into `dctx` history. Useful for uncompressed blocks */
 
 
-#endif   /* ZSTD_STATIC_LINKING_ONLY */
+#endif   /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
 
 #if defined (__cplusplus)
 }
 #endif
-
-#endif  /* ZSTD_H_235446 */

From fd552774c0f42b9b1b8878e3dcc3aa300652b317 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 24 Aug 2017 16:25:51 +0300
Subject: [PATCH 273/281] Fixed translation error [#CLICKHOUSE-2].

---
 dbms/src/Interpreters/Aggregator.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp
index 4f0772a4980..57b8756c8a7 100644
--- a/dbms/src/Interpreters/Aggregator.cpp
+++ b/dbms/src/Interpreters/Aggregator.cpp
@@ -605,7 +605,7 @@ void NO_INLINE Aggregator::executeImplCase(
 
         if (!no_more_keys)  /// Insert.
         {
-            /// Optimization for frequently duplicating keys.
+            /// Optimization for consecutive identical keys.
             if (!Method::no_consecutive_keys_optimization)
             {
                 if (i != 0 && key == prev_key)

From 16b964d04d9c3a343fe27be86135154425177fd3 Mon Sep 17 00:00:00 2001
From: Vladimir Chebotarev <chebotarev@yandex-team.ru>
Date: Wed, 23 Aug 2017 15:01:48 +0300
Subject: [PATCH 274/281] cmake: Avoiding check target conflicts.
 [#CHEBOTAREV-14]

---
 cmake/add_check.cmake | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/cmake/add_check.cmake b/cmake/add_check.cmake
index 1f93c5bcfd7..c6abbcdb321 100644
--- a/cmake/add_check.cmake
+++ b/cmake/add_check.cmake
@@ -1,14 +1,16 @@
 # Adding test output on failure
 enable_testing ()
 
-if (CMAKE_CONFIGURATION_TYPES)
-    add_custom_target (check COMMAND ${CMAKE_CTEST_COMMAND}
-        --force-new-ctest-process --output-on-failure --build-config "$<CONFIGURATION>"
-        WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
-else ()
-    add_custom_target (check COMMAND ${CMAKE_CTEST_COMMAND}
-        --force-new-ctest-process --output-on-failure
-        WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
+if (NOT TARGET check)
+    if (CMAKE_CONFIGURATION_TYPES)
+        add_custom_target (check COMMAND ${CMAKE_CTEST_COMMAND}
+            --force-new-ctest-process --output-on-failure --build-config "$<CONFIGURATION>"
+            WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
+    else ()
+        add_custom_target (check COMMAND ${CMAKE_CTEST_COMMAND}
+            --force-new-ctest-process --output-on-failure
+            WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
+    endif ()
 endif ()
 
 macro (add_check target)

From 87cc80c30e14c6dbe993336bf4efb353d0b2ad21 Mon Sep 17 00:00:00 2001
From: Vladimir Chebotarev <chebotarev@yandex-team.ru>
Date: Wed, 23 Aug 2017 15:05:15 +0300
Subject: [PATCH 275/281] libdaemon: Moved default graphite config name to
 constant. [#CHEBOTAREV-14]

---
 libs/libdaemon/include/daemon/BaseDaemon.h | 8 +++++---
 libs/libdaemon/src/BaseDaemon.cpp          | 2 ++
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/libs/libdaemon/include/daemon/BaseDaemon.h b/libs/libdaemon/include/daemon/BaseDaemon.h
index c29261930ed..870990246e5 100644
--- a/libs/libdaemon/include/daemon/BaseDaemon.h
+++ b/libs/libdaemon/include/daemon/BaseDaemon.h
@@ -54,6 +54,8 @@ class BaseDaemon : public Poco::Util::ServerApplication
     friend class SignalListener;
 
 public:
+    static constexpr char DEFAULT_GRAPHITE_CONFIG_NAME[] = "graphite";
+
     BaseDaemon();
     ~BaseDaemon();
 
@@ -107,7 +109,7 @@ public:
     /// root_path по умолчанию one_min
     /// key - лучше группировать по смыслу. Например "meminfo.cached" или "meminfo.free", "meminfo.total"
     template <class T>
-    void writeToGraphite(const std::string & key, const T & value, const std::string & config_name = "graphite", time_t timestamp = 0, const std::string & custom_root_path = "")
+    void writeToGraphite(const std::string & key, const T & value, const std::string & config_name = DEFAULT_GRAPHITE_CONFIG_NAME, time_t timestamp = 0, const std::string & custom_root_path = "")
     {
         auto writer = getGraphiteWriter(config_name);
         if (writer)
@@ -115,14 +117,14 @@ public:
     }
 
     template <class T>
-    void writeToGraphite(const GraphiteWriter::KeyValueVector<T> & key_vals, const std::string & config_name = "graphite", time_t timestamp = 0, const std::string & custom_root_path = "")
+    void writeToGraphite(const GraphiteWriter::KeyValueVector<T> & key_vals, const std::string & config_name = DEFAULT_GRAPHITE_CONFIG_NAME, time_t timestamp = 0, const std::string & custom_root_path = "")
     {
         auto writer = getGraphiteWriter(config_name);
         if (writer)
             writer->write(key_vals, timestamp, custom_root_path);
     }
 
-    GraphiteWriter * getGraphiteWriter(const std::string & config_name = "graphite")
+    GraphiteWriter * getGraphiteWriter(const std::string & config_name = DEFAULT_GRAPHITE_CONFIG_NAME)
     {
         if (graphite_writers.count(config_name))
             return graphite_writers[config_name].get();
diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp
index e64d9236396..434545b89cc 100644
--- a/libs/libdaemon/src/BaseDaemon.cpp
+++ b/libs/libdaemon/src/BaseDaemon.cpp
@@ -73,6 +73,8 @@ using Poco::Message;
 using Poco::Util::AbstractConfiguration;
 
 
+constexpr char BaseDaemon::DEFAULT_GRAPHITE_CONFIG_NAME[];
+
 /** Для передачи информации из обработчика сигнала для обработки в другом потоке.
   * Если при получении сигнала надо делать что-нибудь серьёзное (например, вывести сообщение в лог),
   *  то передать нужную информацию через pipe в другой поток и сделать там всю работу

From 7943a21fcf68633e3f4cd4d25606e37a0354ba8b Mon Sep 17 00:00:00 2001
From: robot-metrika-test <robot-metrika-test@yandex-team.ru>
Date: Wed, 23 Aug 2017 15:09:15 +0300
Subject: [PATCH 276/281] Auto version update to [54281]

---
 dbms/cmake/version.cmake | 4 ++--
 debian/changelog         | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake
index 2438e4e2597..e5a67e1c743 100644
--- a/dbms/cmake/version.cmake
+++ b/dbms/cmake/version.cmake
@@ -1,6 +1,6 @@
 # This strings autochanged from release_lib.sh:
-set(VERSION_DESCRIBE v1.1.54280-testing)
-set(VERSION_REVISION 54280)
+set(VERSION_DESCRIBE v1.1.54281-testing)
+set(VERSION_REVISION 54281)
 # end of autochange
 
 set (VERSION_MAJOR 1)
diff --git a/debian/changelog b/debian/changelog
index 44d45811d3a..8f6dc3b2b4a 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,5 +1,5 @@
-clickhouse (1.1.54280) unstable; urgency=low
+clickhouse (1.1.54281) unstable; urgency=low
 
   * Modified source code
 
- -- proller <proller@yandex-team.ru>  Fri, 18 Aug 2017 16:18:33 +0300
+ --  <robot-metrika-test@yandex-team.ru>  Wed, 23 Aug 2017 15:09:15 +0300

From b818258af9a46078848531b0e350169518a62961 Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Fri, 25 Aug 2017 16:47:09 +0300
Subject: [PATCH 277/281] Add ability to run an integration test env without
 test runs. [#CLICKHOUSE-2]

---
 dbms/tests/integration/helpers/cluster.py           | 10 +++++-----
 dbms/tests/integration/test_distributed_ddl/test.py | 11 +++++++++++
 2 files changed, 16 insertions(+), 5 deletions(-)
 mode change 100644 => 100755 dbms/tests/integration/test_distributed_ddl/test.py

diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py
index 9a64e2cb070..e950c2eac95 100644
--- a/dbms/tests/integration/helpers/cluster.py
+++ b/dbms/tests/integration/helpers/cluster.py
@@ -266,14 +266,14 @@ class ClickHouseInstance:
 
         os.makedirs(self.path)
 
-        configs_dir = p.join(self.path, 'configs')
+        configs_dir = p.abspath(p.join(self.path, 'configs'))
         os.mkdir(configs_dir)
 
         shutil.copy(p.join(self.base_configs_dir, 'config.xml'), configs_dir)
         shutil.copy(p.join(self.base_configs_dir, 'users.xml'), configs_dir)
 
-        config_d_dir = p.join(configs_dir, 'config.d')
-        users_d_dir = p.join(configs_dir, 'users.d')
+        config_d_dir = p.abspath(p.join(configs_dir, 'config.d'))
+        users_d_dir = p.abspath(p.join(configs_dir, 'users.d'))
         os.mkdir(config_d_dir)
         os.mkdir(users_d_dir)
 
@@ -301,12 +301,12 @@ class ClickHouseInstance:
         for path in self.custom_user_config_paths:
             shutil.copy(path, users_d_dir)
 
-        db_dir = p.join(self.path, 'database')
+        db_dir = p.abspath(p.join(self.path, 'database'))
         os.mkdir(db_dir)
         if self.clickhouse_path_dir is not None:
             distutils.dir_util.copy_tree(self.clickhouse_path_dir, db_dir)
 
-        logs_dir = p.join(self.path, 'logs')
+        logs_dir = p.abspath(p.join(self.path, 'logs'))
         os.mkdir(logs_dir)
 
         depends_on = '[]'
diff --git a/dbms/tests/integration/test_distributed_ddl/test.py b/dbms/tests/integration/test_distributed_ddl/test.py
old mode 100644
new mode 100755
index 32363debfa6..0b529bb0a44
--- a/dbms/tests/integration/test_distributed_ddl/test.py
+++ b/dbms/tests/integration/test_distributed_ddl/test.py
@@ -1,8 +1,12 @@
+import os
 import os.path as p
+import sys
 import time
 import datetime
 import pytest
+from contextlib import contextmanager
 
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from helpers.cluster import ClickHouseCluster
 from helpers.network import PartitionManager, PartitionManagerDisbaler
 from helpers.test_tools import TSV
@@ -295,3 +299,10 @@ ENGINE = Distributed(cluster_without_replication, default, merge, i)
     ddl_check_query(instance, "DROP TABLE merge ON CLUSTER cluster_without_replication")
     ddl_check_query(instance, "DROP TABLE all_merge_32 ON CLUSTER cluster_without_replication")
     ddl_check_query(instance, "DROP TABLE all_merge_64 ON CLUSTER cluster_without_replication")
+
+
+if __name__ == '__main__':
+    with contextmanager(started_cluster)() as cluster:
+       for name, instance in cluster.instances.items():
+           print name, instance.ip_address
+       raw_input("Cluster created, press any key to destroy...")

From 3659caad7dacf5005618ed54ac9e7ceaa87a410a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Vavru=C5=A1a?= <mvavrusa@cloudflare.com>
Date: Thu, 24 Aug 2017 13:44:29 -0700
Subject: [PATCH 278/281] ZooKeeper: use seeded random device to shuffle node
 list #1149

std::random_shuffle() may or may not use std::rand() which isn't
seeded, and since configuration parsing runs predictably as one
of the first things after server startup, the list of ZK nodes
is shuffled the same way on all replicas.
---
 dbms/src/Common/ZooKeeper/ZooKeeper.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp
index 6ba093719e6..62739331473 100644
--- a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp
@@ -116,7 +116,9 @@ struct ZooKeeperArgs
         }
 
         /// Shuffle the hosts to distribute the load among ZooKeeper nodes.
-        std::random_shuffle(hosts_strings.begin(), hosts_strings.end());
+        std::random_device rd;
+        std::mt19937 g(rd());
+        std::shuffle(hosts_strings.begin(), hosts_strings.end(), g);
 
         for (auto & host : hosts_strings)
         {

From f995662945feadb1405399d466a26ef81ff94173 Mon Sep 17 00:00:00 2001
From: Reto Kromer <retokromer@users.noreply.github.com>
Date: Fri, 25 Aug 2017 13:34:08 +0200
Subject: [PATCH 279/281] update to macOS

---
 MacOS.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MacOS.md b/MacOS.md
index 23457877567..93c04d7c2de 100644
--- a/MacOS.md
+++ b/MacOS.md
@@ -1,6 +1,6 @@
-## How to increase maxfiles on Mac OS X
+## How to increase maxfiles on macOS
 
-To increase maxfiles on MacOS, create the following file:
+To increase maxfiles on macOS, create the following file:
 
 (Note: you'll need to use sudo)
 

From f8c9059857979217b0d32d380db8d0bca5380027 Mon Sep 17 00:00:00 2001
From: Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Date: Fri, 25 Aug 2017 19:19:57 +0300
Subject: [PATCH 280/281] Fixed SIGABRT in DDLWorker. [#CLICKHOUSE-3251]

---
 dbms/src/Interpreters/DDLWorker.cpp | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp
index 0618626632e..1035082a773 100644
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@@ -886,7 +886,22 @@ void DDLWorker::run()
                 if (!e.isTemporaryError())
                 {
                     LOG_DEBUG(log, "Recovering ZooKeeper session after: " << getCurrentExceptionMessage(false));
-                    zookeeper = context.getZooKeeper();
+
+                    while (!stop_flag)
+                    {
+                        try
+                        {
+                            zookeeper = context.getZooKeeper();
+                            break;
+                        }
+                        catch (...)
+                        {
+                            tryLogCurrentException(__PRETTY_FUNCTION__);
+
+                            using namespace std::chrono_literals;
+                            std::this_thread::sleep_for(5s);
+                        }
+                    }
                 }
                 else
                 {

From 738f1685b2fac2ac9fe6a41f82f5c9b30f225041 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 25 Aug 2017 19:35:10 +0300
Subject: [PATCH 281/281] Fixed build after merge [#CLICKHOUSE-2].

---
 dbms/src/Common/ZooKeeper/ZooKeeper.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp
index 62739331473..803d6259dfb 100644
--- a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp
@@ -1,3 +1,4 @@
+#include <random>
 #include <functional>
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <common/logger_useful.h>