diff --git a/.gitignore b/.gitignore
index a6509c69d76..208b03861e6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -199,7 +199,6 @@ vgcore*
 *.changes
 build-stamp
 configure-stamp
-debian/changelog
 debian/*.debhelper.log
 debian/*.debhelper
 debian/*.substvars
diff --git a/.gitmodules b/.gitmodules
index b3a0823a399..e69de29bb2d 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +0,0 @@
-[submodule "doc/presentations"]
-	path = website/presentations
-	url = https://github.com/yandex/clickhouse-presentations.git
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 00000000000..8a7a389bac9
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,123 @@
+# ClickHouse release 1.1.54284
+
+* This is bugfix release for previous 1.1.54282 release. It fixes ZooKeeper nodes leak in `parts/` directory.
+
+# ClickHouse release 1.1.54282
+
+This is a bugfix release. The following bugs were fixed:
+* `DB::Exception: Assertion violation: !_path.empty()` error when inserting into a Distributed table.
+* Error when parsing inserted data in RowBinary format if the data begins with ';' character.
+* Errors during runtime compilation of certain aggregate functions (e.g. `groupArray()`).
+
+# ClickHouse release 1.1.54276
+
+## New features:
+
+* You can use an optional WITH clause in a SELECT query. Example query: `WITH 1+1 AS a SELECT a, a*a`
+* INSERT can be performed synchronously in a Distributed table: OK is returned only after all the data is saved on all the shards. This is activated by the setting insert_distributed_sync=1.
+* Added the UUID data type for working with 16-byte identifiers.
+* Added aliases of CHAR, FLOAT and other types for compatibility with the Tableau.
+* Added the functions toYYYYMM, toYYYYMMDD, and toYYYYMMDDhhmmss for converting time into numbers.
+* You can use IP addresses (together with the hostname) to identify servers for clustered DDL queries.
+* Added support for non-constant arguments and negative offsets in the function `substring(str, pos, len).`
+* Added the max_size parameter for the `groupArray(max_size)(column)` aggregate function, and optimized its performance.
+
+## Major changes:
+
+* Improved security: all server files are created with 0640 permissions.
+* Improved error messages for queries with invalid syntax.
+* Significantly reduced memory consumption and improved performance when merging large sections of MergeTree data.
+* Significantly increased the performance of data merges for the ReplacingMergeTree engine.
+* Improved performance for asynchronous inserts from a Distributed table by batching multiple source inserts. To enable this functionality, use the setting distributed_directory_monitor_batch_inserts=1.
+
+## Backward incompatible changes:
+
+* Changed the binary format of aggregate states of `groupArray(array_column)` functions for arrays.
+
+## Complete list of changes:
+
+* Added the `output_format_json_quote_denormals` setting, which enables outputting nan and inf values in JSON format.
+* Optimized thread allocation when reading from a Distributed table.
+* Settings can be modified in readonly mode if the value doesn't change.
+* Added the ability to read fractional granules of the MergeTree engine in order to meet restrictions on the block size specified in the preferred_block_size_bytes setting. The purpose is to reduce the consumption of RAM and increase cache locality when processing queries from tables with large columns.
+* Efficient use of indexes that contain expressions like `toStartOfHour(x)` for conditions like `toStartOfHour(x) op сonstexpr.`
+* Added new settings for MergeTree engines (the merge_tree section in config.xml):
+   - replicated_deduplication_window_seconds sets the size of deduplication window in seconds for Replicated tables.
+   - cleanup_delay_period sets how often to start cleanup to remove outdated data.
+   - replicated_can_become_leader can prevent a replica from becoming the leader (and assigning merges).
+* Accelerated cleanup to remove outdated data from ZooKeeper.
+* Multiple improvements and fixes for clustered DDL queries. Of particular interest is the new setting distributed_ddl_task_timeout, which limits the time to wait for a response from the servers in the cluster.
+* Improved display of stack traces in the server logs.
+* Added the "none" value for the compression method.
+* You can use multiple dictionaries_config sections in config.xml.
+* It is possible to connect to MySQL through a socket in the file system.
+* The `system.parts` table has a new column with information about the size of marks, in bytes.
+
+## Bug fixes:
+
+* Distributed tables using a Merge table now work correctly for a SELECT query with a condition on the  _table field.
+* Fixed a rare race condition in ReplicatedMergeTree when checking data parts.
+* Fixed possible freezing on "leader election" when starting a server.
+* The max_replica_delay_for_distributed_queries setting was ignored when using a local replica of the data source. This has been fixed.
+* Fixed incorrect behavior of `ALTER TABLE CLEAR COLUMN IN PARTITION` when attempting to clean a non-existing column.
+* Fixed an exception in the multiIf function when using empty arrays or strings.
+* Fixed excessive memory allocations when deserializing Native format.
+* Fixed incorrect auto-update of Trie dictionaries.
+* Fixed an exception when running queries with a GROUP BY clause from a Merge table when using SAMPLE.
+* Fixed a crash of GROUP BY when using distributed_aggregation_memory_efficient=1.
+* Now you can specify the database.table in the right side of IN and JOIN.
+* Too many threads were used for parallel aggregation. This has been fixed.
+* Fixed how the "if" function works with FixedString arguments.
+* SELECT worked incorrectly from a Distributed table for shards with a weight of 0. This has been fixed.
+* Crashes no longer occur when running `CREATE VIEW IF EXISTS.`
+* Fixed incorrect behavior when input_format_skip_unknown_fields=1 is set and there are negative numbers.
+* Fixed an infinite loop in the `dictGetHierarchy()` function if there is some invalid data in the dictionary.
+* Fixed `Syntax error: unexpected (...)` errors  when running distributed queries with subqueries in an IN or JOIN clause and Merge tables.
+* Fixed the incorrect interpretation of a SELECT query from Dictionary tables.
+* Fixed the "Cannot mremap" error when using arrays in IN and JOIN clauses with more than 2 billion elements.
+* Fixed the failover for dictionaries with MySQL as the source.
+
+## Improved workflow for developing and assembling ClickHouse:
+
+* Builds can be assembled in Arcadia.
+* You can use gcc 7 to compile ClickHouse.
+* Parallel builds using ccache+distcc are faster now.
+
+# ClickHouse release 1.1.54245
+
+## New features:
+
+* Distributed DDL (for example, `CREATE TABLE ON CLUSTER`).
+* The replicated request `ALTER TABLE CLEAR COLUMN IN PARTITION.`
+* The engine for Dictionary tables (access to dictionary data in the form of a table).
+* Dictionary database engine (this type of database automatically has Dictionary tables available for all the connected external dictionaries).
+* You can check for updates to the dictionary by sending a request to the source.
+* Qualified column names
+* Quoting identifiers using double quotation marks.
+* Sessions in the HTTP interface.
+* The OPTIMIZE query for a Replicated table can can run not only on the leader.
+
+## Backward incompatible changes:
+
+* Removed SET GLOBAL.
+
+## Minor changes:
+
+* If an alert is triggered, the full stack trace is printed into the log.
+* Relaxed the verification of the number of damaged or extra data parts at startup (there were too many false positives).
+
+## Bug fixes:
+
+* Fixed a bad connection "sticking" when inserting into a Distributed table.
+* GLOBAL IN now works for a query from a Merge table that looks at a Distributed table.
+* The incorrect number of cores was detected on a Google Compute Engine virtual machine. This has been fixed.
+* Changes in how an executable source of cached external dictionaries works.
+* Fixed the comparison of strings containing null characters.
+* Fixed the comparison of Float32 primary key fields with constants.
+* Previously, an incorrect estimate of the size of a field could lead to overly large allocations. This has been fixed.
+* Fixed a crash when querying a Nullable column added to a table using ALTER.
+* Fixed a crash when sorting by a Nullable column, if the number of rows is less than LIMIT.
+* Fixed an ORDER BY subquery consisting of only constant values.
+* Previously, a Replicated table could remain in the invalid state after a failed DROP TABLE.
+* Aliases for scalar subqueries with empty results are no longer lost.
+* Now a query that used compilation does not fail with an error if the .so file gets damaged.
diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md
new file mode 100644
index 00000000000..6ac2620cbac
--- /dev/null
+++ b/CHANGELOG_RU.md
@@ -0,0 +1,114 @@
+# Релиз ClickHouse 1.1.54284
+
+* Релиз содержит изменения к предыдущему релизу 1.1.54282, которые исправляют утечку записей о кусках в ZooKeeper
+
+# Релиз ClickHouse 1.1.54282
+
+Релиз содержит исправления к предыдущему релизу 1.1.54276:
+* Исправлена ошибка `DB::Exception: Assertion violation: !_path.empty()` при вставке в Distributed таблицу.
+* Исправлен парсинг при вставке в формате RowBinary, если входные данные начинаются с ';'.
+* Исправлена ошибка при рантайм-компиляции некоторых агрегатных функций (например, `groupArray()`).
+
+# Релиз ClickHouse 1.1.54276
+
+## Новые возможности:
+* Добавлена опциональная секция WITH запроса SELECT. Пример запроса: `WITH 1+1 AS a SELECT a, a*a`
+* Добавлена возможность синхронной вставки в Distributed таблицу: выдается Ok только после того как все данные записались на все шарды. Активируется настройкой insert_distributed_sync=1
+* Добавлен тип данных UUID для работы с 16-байтовыми идентификаторами
+* Добавлены алиасы типов CHAR, FLOAT и т.д. для совместимости с Tableau
+* Добавлены функции toYYYYMM, toYYYYMMDD, toYYYYMMDDhhmmss для перевода времени в числа
+* Добавлена возможность использовать IP адреса (совместно с hostname) для идентификации сервера при работе с кластерными DDL запросами
+* Добавлена поддержка неконстантных аргументов и отрицательных смещений в функции `substring(str, pos, len)`
+* Добавлен параметр max_size для агрегатной функции `groupArray(max_size)(column)`, и оптимизирована её производительность
+
+## Основные изменения:
+* Улучшение безопасности: все файлы сервера создаются с правами 0640
+* Улучшены сообщения об ошибках в случае синтаксически неверных запросов
+* Значительно уменьшен расход оперативной памяти и улучшена производительность слияний больших MergeTree-кусков данных
+* Значительно увеличена производительность слияний данных для движка ReplacingMergeTree
+* Улучшена производительность асинхронных вставок из Distributed таблицы за счет объединения нескольких исходных вставок. Функциональность включается настройкой distributed_directory_monitor_batch_inserts=1.
+
+## Обратно несовместимые изменения:
+* Изменился бинарный формат агрегатных состояний функции `groupArray(array_column)` для массивов
+
+## Полный список изменений:
+* Добавлена настройка `output_format_json_quote_denormals`, включающая вывод nan и inf значений в формате JSON
+* Более оптимальное выделение потоков при чтении из Distributed таблиц
+* Разрешено задавать настройки в режиме readonly, если их значение не изменяется
+* Добавлена возможность считывать нецелые гранулы движка MergeTree для выполнения ограничений на размер блока, задаваемый настройкой preferred_block_size_bytes - для уменьшения потребления оперативной памяти и увеличения кэш-локальности при обработке запросов из таблиц со столбцами большого размера
+* Эффективное использование индекса, содержащего выражения типа `toStartOfHour(x)`, для условий вида `toStartOfHour(x) op сonstexpr`
+* Добавлены новые настройки для MergeTree движков (секция merge_tree в config.xml):
+  - replicated_deduplication_window_seconds позволяет задать интервал дедупликации вставок в Replicated-таблицы в секундах
+  - cleanup_delay_period - периодичность запуска очистки неактуальных данных
+  - replicated_can_become_leader - запретить реплике становиться лидером (и назначать мержи)
+* Ускорена очистка неактуальных данных из ZooKeeper
+* Множественные улучшения и исправления работы кластерных DDL запросов. В частности, добавлена настройка distributed_ddl_task_timeout, ограничивающая время ожидания ответов серверов кластера.
+* Улучшено отображение стэктрейсов в логах сервера
+* Добавлен метод сжатия none
+* Возможность использования нескольких секций dictionaries_config в config.xml
+* Возможность подключения к MySQL через сокет на файловой системе
+* В таблицу system.parts добавлен столбец с информацией о размере marks в байтах
+
+## Исправления багов:
+* Исправлена некорректная работа Distributed таблиц, использующих Merge таблицы, при SELECT с условием на поле _table
+* Исправлен редкий race condition в ReplicatedMergeTree при проверке кусков данных
+* Исправлено возможное зависание процедуры leader election при старте сервера
+* Исправлено игнорирование настройки max_replica_delay_for_distributed_queries при использовании локальной реплики в качестве источника данных
+* Исправлено некорректное поведение `ALTER TABLE CLEAR COLUMN IN PARTITION` при попытке очистить несуществующую колонку
+* Исправлено исключение в функции multiIf при использовании пустых массивов или строк
+* Исправлено чрезмерное выделение памяти при десериализации формата Native
+* Исправлено некорректное автообновление Trie словарей
+* Исправлено исключение при выполнении запросов с GROUP BY из Merge-таблицы при использовании SAMPLE
+* Исправлено падение GROUP BY при использовании настройки distributed_aggregation_memory_efficient=1
+* Добавлена возможность указывать database.table в правой части IN и JOIN
+* Исправлено использование слишком большого количества потоков при параллельной агрегации
+* Исправлена работа функции if с аргументами FixedString
+* Исправлена некорректная работа SELECT из Distributed-таблицы для шардов с весом 0
+* Исправлено падение запроса `CREATE VIEW IF EXISTS`
+* Исправлено некорректное поведение при input_format_skip_unknown_fields=1 в случае отрицательных чисел
+* Исправлен бесконечный цикл в функции `dictGetHierarchy()` в случае некоторых некорректных данных словаря
+* Исправлены ошибки типа `Syntax error: unexpected (...)` при выполнении распределенных запросов с подзапросами в секции IN или JOIN, в случае использования совместно с Merge таблицами
+* Исправлена неправильная интерпретация SELECT запроса из таблиц типа Dictionary
+* Исправлена ошибка "Cannot mremap" при использовании множеств в секциях IN, JOIN, содержащих более 2 млрд. элементов
+* Исправлен failover для словарей с источником MySQL
+
+## Улучшения процесса разработки и сборки ClickHouse:
+* Добавлена возмозможность сборки в Arcadia
+* Добавлена возможность сборки с помощью gcc 7
+* Ускорена параллельная сборка с помощью ccache+distcc
+
+
+# Релиз ClickHouse 1.1.54245
+
+## Новые возможности:
+* Распределённые DDL (например, `CREATE TABLE ON CLUSTER`)
+* Реплицируемый запрос `ALTER TABLE CLEAR COLUMN IN PARTITION`
+* Движок таблиц Dictionary (доступ к данным словаря в виде таблицы)
+* Движок баз данных Dictionary (в такой базе автоматически доступны Dictionary-таблицы для всех подключённых внешних словарей)
+* Возможность проверки необходимости обновления словаря путём отправки запроса в источник
+* Qualified имена столбцов
+* Квотирование идентификаторов двойными кавычками
+* Сессии в HTTP интерфейсе
+* Запрос OPTIMIZE для Replicated таблицы теперь можно выполнять не только на лидере
+
+## Обратно несовместимые изменения:
+* Убрана команда SET GLOBAL
+
+## Мелкие изменения:
+* Теперь после получения сигнала в лог печатается полный стектрейс
+* Ослаблена проверка на количество повреждённых/лишних кусков при старте (было слишком много ложных срабатываний)
+
+## Исправления багов:
+* Исправлено залипание плохого соединения при вставке в Distributed таблицу
+* GLOBAL IN теперь работает при запросе из таблицы Merge, смотрящей в Distributed
+* Теперь правильно определяется количество ядер на виртуалках Google Compute Engine
+* Исправления в работе executable источника кэшируемых внешних словарей
+* Исправлены сравнения строк, содержащих нулевые символы
+* Исправлено сравнение полей первичного ключа типа Float32 с константами
+* Раньше неправильная оценка размера поля могла приводить к слишком большим аллокациям
+* Исправлено падение при запросе Nullable столбца, добавленного в таблицу ALTER-ом
+* Исправлено падение при сортировке по Nullable столбцу, если количество строк меньше LIMIT
+* Исправлен ORDER BY подзапроса, состоящего только из константных значений
+* Раньше Replicated таблица могла остаться в невалидном состоянии после неудавшегося DROP TABLE
+* Алиасы для скалярных подзапросов с пустым результатом теперь не теряются
+* Теперь запрос, в котором использовалась компиляция, не завершается ошибкой, если .so файл повреждается
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 11eb0790aed..d98476dca39 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -27,11 +27,11 @@ endif ()
 cmake_policy (SET CMP0014 OLD) # Ignore warning about CMakeLists.txt in each directory
 cmake_policy (SET CMP0012 NEW) # Don't dereference TRUE and FALSE
 
-find_program(CCACHE_FOUND ccache)
-if(CCACHE_FOUND AND NOT CMAKE_CXX_COMPILER_LAUNCHER MATCHES "ccache")
-  set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "ccache")
-  set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK "ccache")
-endif()
+find_program (CCACHE_FOUND ccache)
+if (CCACHE_FOUND AND NOT CMAKE_CXX_COMPILER_LAUNCHER MATCHES "ccache" AND NOT CMAKE_CXX_COMPILER MATCHES "ccache")
+  set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "ccache")
+  set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "ccache")
+endif ()
 
 if (NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "None")
     message (STATUS "CMAKE_BUILD_TYPE is not set, set to default = RELWITHDEBINFO")
diff --git a/MacOS.md b/MacOS.md
index 23457877567..93c04d7c2de 100644
--- a/MacOS.md
+++ b/MacOS.md
@@ -1,6 +1,6 @@
-## How to increase maxfiles on Mac OS X
+## How to increase maxfiles on macOS
 
-To increase maxfiles on MacOS, create the following file:
+To increase maxfiles on macOS, create the following file:
 
 (Note: you'll need to use sudo)
 
diff --git a/cmake/Modules/Finddouble-conversion.cmake b/cmake/Modules/Finddouble-conversion.cmake
index 178964b8e2e..cb01be0f25b 100644
--- a/cmake/Modules/Finddouble-conversion.cmake
+++ b/cmake/Modules/Finddouble-conversion.cmake
@@ -18,7 +18,7 @@
 #  DOUBLE_CONVERSION_INCLUDE_DIR       The location of double-conversion headers
 
 find_path(DOUBLE_CONVERSION_ROOT_DIR
-    NAMES include/double-conversion.h
+    NAMES include/double-conversion/double-conversion.h
 )
 
 find_library(DOUBLE_CONVERSION_LIBRARIES
@@ -27,7 +27,7 @@ find_library(DOUBLE_CONVERSION_LIBRARIES
 )
 
 find_path(DOUBLE_CONVERSION_INCLUDE_DIR
-    NAMES double-conversion.h
+    NAMES double-conversion/double-conversion.h
     PATHS ${DOUBLE_CONVERSION_ROOT_DIR}/include ${DOUBLE_CONVERSION_INCLUDE_PATHS}
 )
 
diff --git a/cmake/add_check.cmake b/cmake/add_check.cmake
index 1f93c5bcfd7..c6abbcdb321 100644
--- a/cmake/add_check.cmake
+++ b/cmake/add_check.cmake
@@ -1,14 +1,16 @@
 # Adding test output on failure
 enable_testing ()
 
-if (CMAKE_CONFIGURATION_TYPES)
-    add_custom_target (check COMMAND ${CMAKE_CTEST_COMMAND}
-        --force-new-ctest-process --output-on-failure --build-config "$<CONFIGURATION>"
-        WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
-else ()
-    add_custom_target (check COMMAND ${CMAKE_CTEST_COMMAND}
-        --force-new-ctest-process --output-on-failure
-        WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
+if (NOT TARGET check)
+    if (CMAKE_CONFIGURATION_TYPES)
+        add_custom_target (check COMMAND ${CMAKE_CTEST_COMMAND}
+            --force-new-ctest-process --output-on-failure --build-config "$<CONFIGURATION>"
+            WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
+    else ()
+        add_custom_target (check COMMAND ${CMAKE_CTEST_COMMAND}
+            --force-new-ctest-process --output-on-failure
+            WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
+    endif ()
 endif ()
 
 macro (add_check target)
diff --git a/cmake/print_include_directories.cmake b/cmake/print_include_directories.cmake
index ba0bba4c9df..83b8064d262 100644
--- a/cmake/print_include_directories.cmake
+++ b/cmake/print_include_directories.cmake
@@ -7,6 +7,11 @@ list(APPEND dirs ${dirs1})
 get_property (dirs1 TARGET common PROPERTY INCLUDE_DIRECTORIES)
 list(APPEND dirs ${dirs1})
 
+if (USE_INTERNAL_BOOST_LIBRARY)
+    get_property (dirs1 TARGET ${Boost_PROGRAM_OPTIONS_LIBRARY} PROPERTY INCLUDE_DIRECTORIES)
+    list(APPEND dirs ${dirs1})
+endif ()
+
 list(REMOVE_DUPLICATES dirs)
 file (WRITE ${CMAKE_CURRENT_BINARY_DIR}/include_directories.txt "")
 foreach (dir ${dirs})
diff --git a/contrib/libzstd/CMakeLists.txt b/contrib/libzstd/CMakeLists.txt
index 00d20cf2146..1900017ebe3 100644
--- a/contrib/libzstd/CMakeLists.txt
+++ b/contrib/libzstd/CMakeLists.txt
@@ -51,30 +51,42 @@ MESSAGE(STATUS "ZSTD VERSION ${LIBVER_MAJOR}.${LIBVER_MINOR}.${LIBVER_RELEASE}")
 
 SET(Sources
         ${LIBRARY_DIR}/common/entropy_common.c
-        ${LIBRARY_DIR}/common/zstd_common.c
-        ${LIBRARY_DIR}/common/xxhash.c
+        ${LIBRARY_DIR}/common/error_private.c
         ${LIBRARY_DIR}/common/fse_decompress.c
+        ${LIBRARY_DIR}/common/pool.c
+        ${LIBRARY_DIR}/common/threading.c
+        ${LIBRARY_DIR}/common/xxhash.c
+        ${LIBRARY_DIR}/common/zstd_common.c
         ${LIBRARY_DIR}/compress/fse_compress.c
         ${LIBRARY_DIR}/compress/huf_compress.c
-        ${LIBRARY_DIR}/compress/zbuff_compress.c
         ${LIBRARY_DIR}/compress/zstd_compress.c
+        ${LIBRARY_DIR}/compress/zstdmt_compress.c
         ${LIBRARY_DIR}/decompress/huf_decompress.c
-        ${LIBRARY_DIR}/decompress/zbuff_decompress.c
         ${LIBRARY_DIR}/decompress/zstd_decompress.c
+        ${LIBRARY_DIR}/deprecated/zbuff_common.c
+        ${LIBRARY_DIR}/deprecated/zbuff_compress.c
+        ${LIBRARY_DIR}/deprecated/zbuff_decompress.c
+        ${LIBRARY_DIR}/dictBuilder/cover.c
         ${LIBRARY_DIR}/dictBuilder/divsufsort.c
         ${LIBRARY_DIR}/dictBuilder/zdict.c)
 
 SET(Headers
         ${LIBRARY_DIR}/common/bitstream.h
         ${LIBRARY_DIR}/common/error_private.h
-        ${LIBRARY_DIR}/common/error_public.h
         ${LIBRARY_DIR}/common/fse.h
         ${LIBRARY_DIR}/common/huf.h
         ${LIBRARY_DIR}/common/mem.h
-        ${LIBRARY_DIR}/common/zbuff.h
+        ${LIBRARY_DIR}/common/pool.h
+        ${LIBRARY_DIR}/common/threading.h
+        ${LIBRARY_DIR}/common/xxhash.h
+        ${LIBRARY_DIR}/common/zstd_errors.h
         ${LIBRARY_DIR}/common/zstd_internal.h
-        ${LIBRARY_DIR}/zstd.h
-        ${LIBRARY_DIR}/dictBuilder/zdict.h)
+        ${LIBRARY_DIR}/compress/zstdmt_compress.h
+        ${LIBRARY_DIR}/compress/zstd_opt.h
+        ${LIBRARY_DIR}/deprecated/zbuff.h
+        ${LIBRARY_DIR}/dictBuilder/divsufsort.h
+        ${LIBRARY_DIR}/dictBuilder/zdict.h
+        ${LIBRARY_DIR}/zstd.h)
 
 SET(ZSTD_LEGACY_SUPPORT true)
 
@@ -84,11 +96,23 @@ IF (ZSTD_LEGACY_SUPPORT)
     ADD_DEFINITIONS(-D ZSTD_LEGACY_SUPPORT=1)
 
     SET(Sources ${Sources}
-            ${LIBRARY_LEGACY_DIR}/zstd_v06.c)
+            ${LIBRARY_LEGACY_DIR}/zstd_v01.c
+            ${LIBRARY_LEGACY_DIR}/zstd_v02.c
+            ${LIBRARY_LEGACY_DIR}/zstd_v03.c
+            ${LIBRARY_LEGACY_DIR}/zstd_v04.c
+            ${LIBRARY_LEGACY_DIR}/zstd_v05.c
+            ${LIBRARY_LEGACY_DIR}/zstd_v06.c
+            ${LIBRARY_LEGACY_DIR}/zstd_v07.c)
 
     SET(Headers ${Headers}
             ${LIBRARY_LEGACY_DIR}/zstd_legacy.h
-            ${LIBRARY_LEGACY_DIR}/zstd_v06.h)
+            ${LIBRARY_LEGACY_DIR}/zstd_v01.h
+            ${LIBRARY_LEGACY_DIR}/zstd_v02.h
+            ${LIBRARY_LEGACY_DIR}/zstd_v03.h
+            ${LIBRARY_LEGACY_DIR}/zstd_v04.h
+            ${LIBRARY_LEGACY_DIR}/zstd_v05.h
+            ${LIBRARY_LEGACY_DIR}/zstd_v06.h
+            ${LIBRARY_LEGACY_DIR}/zstd_v07.h)
 ENDIF (ZSTD_LEGACY_SUPPORT)
 
 ADD_LIBRARY(zstd ${Sources} ${Headers})
diff --git a/contrib/libzstd/README b/contrib/libzstd/README
index 5662f703b55..595103c2c34 100644
--- a/contrib/libzstd/README
+++ b/contrib/libzstd/README
@@ -1 +1 @@
-https://github.com/facebook/zstd/tree/v1.1.0
+https://github.com/facebook/zstd/tree/v1.3.1
diff --git a/contrib/libzstd/include/zstd/common/bitstream.h b/contrib/libzstd/include/zstd/common/bitstream.h
index e96798fe47b..07b85026c95 100644
--- a/contrib/libzstd/include/zstd/common/bitstream.h
+++ b/contrib/libzstd/include/zstd/common/bitstream.h
@@ -2,7 +2,7 @@
    bitstream
    Part of FSE library
    header file (to include)
-   Copyright (C) 2013-2016, Yann Collet.
+   Copyright (C) 2013-2017, Yann Collet.
 
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
@@ -39,7 +39,6 @@
 extern "C" {
 #endif
 
-
 /*
 *  This API consists of small unitary functions, which must be inlined for best performance.
 *  Since link-time-optimization is not available for all compilers,
@@ -53,6 +52,18 @@ extern "C" {
 #include "error_private.h"  /* error codes and messages */
 
 
+/*-*************************************
+*  Debug
+***************************************/
+#if defined(BIT_DEBUG) && (BIT_DEBUG>=1)
+#  include <assert.h>
+#else
+#  ifndef assert
+#    define assert(condition) ((void)0)
+#  endif
+#endif
+
+
 /*=========================================
 *  Target specific
 =========================================*/
@@ -60,6 +71,10 @@ extern "C" {
 #  include <immintrin.h>   /* support for bextr (experimental) */
 #endif
 
+#define STREAM_ACCUMULATOR_MIN_32  25
+#define STREAM_ACCUMULATOR_MIN_64  57
+#define STREAM_ACCUMULATOR_MIN    ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
+
 
 /*-******************************************
 *  bitStream encoding API (write forward)
@@ -71,7 +86,7 @@ extern "C" {
 typedef struct
 {
     size_t bitContainer;
-    int    bitPos;
+    unsigned bitPos;
     char*  startPtr;
     char*  ptr;
     char*  endPtr;
@@ -109,6 +124,7 @@ typedef struct
     unsigned bitsConsumed;
     const char* ptr;
     const char* start;
+    const char* limitPtr;
 } BIT_DStream_t;
 
 typedef enum { BIT_DStream_unfinished = 0,
@@ -160,7 +176,10 @@ MEM_STATIC unsigned BIT_highbit32 (register U32 val)
 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
     return 31 - __builtin_clz (val);
 #   else   /* Software version */
-    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    static const unsigned DeBruijnClz[32] = { 0,  9,  1, 10, 13, 21,  2, 29,
+                                             11, 14, 16, 18, 22, 25,  3, 30,
+                                              8, 12, 20, 28, 15, 17, 24,  7,
+                                             19, 27, 23,  6, 26,  5,  4, 31 };
     U32 v = val;
     v |= v >> 1;
     v |= v >> 2;
@@ -172,31 +191,36 @@ MEM_STATIC unsigned BIT_highbit32 (register U32 val)
 }
 
 /*=====    Local Constants   =====*/
-static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,  0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF };   /* up to 26 bits */
+static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F,
+                                    0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF,
+                                    0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,
+                                    0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF };   /* up to 26 bits */
 
 
 /*-**************************************************************
 *  bitStream encoding
 ****************************************************************/
 /*! BIT_initCStream() :
- *  `dstCapacity` must be > sizeof(void*)
+ *  `dstCapacity` must be > sizeof(size_t)
  *  @return : 0 if success,
               otherwise an error code (can be tested using ERR_isError() ) */
-MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* startPtr, size_t dstCapacity)
+MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
+                                  void* startPtr, size_t dstCapacity)
 {
     bitC->bitContainer = 0;
     bitC->bitPos = 0;
     bitC->startPtr = (char*)startPtr;
     bitC->ptr = bitC->startPtr;
-    bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->ptr);
-    if (dstCapacity <= sizeof(bitC->ptr)) return ERROR(dstSize_tooSmall);
+    bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
+    if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
     return 0;
 }
 
 /*! BIT_addBits() :
     can add up to 26 bits into `bitC`.
     Does not check for register overflow ! */
-MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits)
+MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
+                            size_t value, unsigned nbBits)
 {
     bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
     bitC->bitPos += nbBits;
@@ -204,34 +228,42 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits)
 
 /*! BIT_addBitsFast() :
  *  works only if `value` is _clean_, meaning all high bits above nbBits are 0 */
-MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits)
+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
+                                size_t value, unsigned nbBits)
 {
+    assert((value>>nbBits) == 0);
     bitC->bitContainer |= value << bitC->bitPos;
     bitC->bitPos += nbBits;
 }
 
 /*! BIT_flushBitsFast() :
+ *  assumption : bitContainer has not overflowed
  *  unsafe version; does not check buffer overflow */
 MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
 {
     size_t const nbBytes = bitC->bitPos >> 3;
+    assert( bitC->bitPos <= (sizeof(bitC->bitContainer)*8) );
     MEM_writeLEST(bitC->ptr, bitC->bitContainer);
     bitC->ptr += nbBytes;
+    assert(bitC->ptr <= bitC->endPtr);
     bitC->bitPos &= 7;
-    bitC->bitContainer >>= nbBytes*8;   /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
+    bitC->bitContainer >>= nbBytes*8;
 }
 
 /*! BIT_flushBits() :
+ *  assumption : bitContainer has not overflowed
  *  safe version; check for buffer overflow, and prevents it.
- *  note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */
+ *  note : does not signal buffer overflow.
+ *  overflow will be revealed later on using BIT_closeCStream() */
 MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
 {
     size_t const nbBytes = bitC->bitPos >> 3;
+    assert( bitC->bitPos <= (sizeof(bitC->bitContainer)*8) );
     MEM_writeLEST(bitC->ptr, bitC->bitContainer);
     bitC->ptr += nbBytes;
     if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
     bitC->bitPos &= 7;
-    bitC->bitContainer >>= nbBytes*8;   /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
+    bitC->bitContainer >>= nbBytes*8;
 }
 
 /*! BIT_closeCStream() :
@@ -241,9 +273,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
 {
     BIT_addBitsFast(bitC, 1, 1);   /* endMark */
     BIT_flushBits(bitC);
-
-    if (bitC->ptr >= bitC->endPtr) return 0; /* doesn't fit within authorized budget : cancel */
-
+    if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
     return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
 }
 
@@ -261,26 +291,39 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
 {
     if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
 
+    bitD->start = (const char*)srcBuffer;
+    bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
+
     if (srcSize >=  sizeof(bitD->bitContainer)) {  /* normal case */
-        bitD->start = (const char*)srcBuffer;
         bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
         bitD->bitContainer = MEM_readLEST(bitD->ptr);
         { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
-          bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
+          bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;  /* ensures bitsConsumed is always set */
           if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
     } else {
-        bitD->start = (const char*)srcBuffer;
         bitD->ptr   = bitD->start;
         bitD->bitContainer = *(const BYTE*)(bitD->start);
         switch(srcSize)
         {
-            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
-            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
-            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
-            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
-            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
-            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
-            default:;
+	    case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
+	            /* fall-through */
+
+	    case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
+	            /* fall-through */
+
+	    case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
+	            /* fall-through */
+
+	    case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
+	            /* fall-through */
+
+	    case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
+	            /* fall-through */
+
+	    case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
+	            /* fall-through */
+
+            default: break;
         }
         { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
           bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
@@ -298,7 +341,7 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
 
 MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
 {
-#if defined(__BMI__) && defined(__GNUC__)   /* experimental */
+#if defined(__BMI__) && defined(__GNUC__) && __GNUC__*1000+__GNUC_MINOR__ >= 4008  /* experimental */
 #  if defined(__x86_64__)
     if (sizeof(bitContainer)==8)
         return _bextr_u64(bitContainer, start, nbBits);
@@ -327,17 +370,18 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
 #if defined(__BMI__) && defined(__GNUC__)   /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */
     return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
 #else
-    U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
-    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+    U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
 #endif
 }
 
 /*! BIT_lookBitsFast() :
-*   unsafe version; only works only if nbBits >= 1 */
+ *  unsafe version; only works if nbBits >= 1 */
 MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
 {
-    U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
-    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+    U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
+    assert(nbBits >= 1);
+    return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
 }
 
 MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
@@ -362,21 +406,22 @@ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
 MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
 {
     size_t const value = BIT_lookBitsFast(bitD, nbBits);
+    assert(nbBits >= 1);
     BIT_skipBits(bitD, nbBits);
     return value;
 }
 
 /*! BIT_reloadDStream() :
-*   Refill `BIT_DStream_t` from src buffer previously defined (see BIT_initDStream() ).
+*   Refill `bitD` from buffer previously set in BIT_initDStream() .
 *   This function is safe, it guarantees it will not read beyond src buffer.
 *   @return : status of `BIT_DStream_t` internal register.
-              if status == unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */
+              if status == BIT_DStream_unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */
 MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
 {
-	if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should not happen => corruption detected */
-		return BIT_DStream_overflow;
+    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* overflow detected, like end of stream */
+        return BIT_DStream_overflow;
 
-    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) {
+    if (bitD->ptr >= bitD->limitPtr) {
         bitD->ptr -= bitD->bitsConsumed >> 3;
         bitD->bitsConsumed &= 7;
         bitD->bitContainer = MEM_readLEST(bitD->ptr);
@@ -386,6 +431,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
         if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
         return BIT_DStream_completed;
     }
+    /* start < ptr < limitPtr */
     {   U32 nbBytes = bitD->bitsConsumed >> 3;
         BIT_DStream_status result = BIT_DStream_unfinished;
         if (bitD->ptr - nbBytes < bitD->start) {
@@ -394,7 +440,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
         }
         bitD->ptr -= nbBytes;
         bitD->bitsConsumed -= nbBytes*8;
-        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
         return result;
     }
 }
diff --git a/contrib/libzstd/include/zstd/common/entropy_common.c b/contrib/libzstd/include/zstd/common/entropy_common.c
index acd96699976..b37a082fee2 100644
--- a/contrib/libzstd/include/zstd/common/entropy_common.c
+++ b/contrib/libzstd/include/zstd/common/entropy_common.c
@@ -43,27 +43,21 @@
 #include "huf.h"
 
 
-/*-****************************************
-*  FSE Error Management
-******************************************/
-unsigned FSE_isError(size_t code) { return ERR_isError(code); }
+/*===   Version   ===*/
+unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
 
+
+/*===   Error Management   ===*/
+unsigned FSE_isError(size_t code) { return ERR_isError(code); }
 const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
 
-
-/* **************************************************************
-*  HUF Error Management
-****************************************************************/
 unsigned HUF_isError(size_t code) { return ERR_isError(code); }
-
 const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
 
 
 /*-**************************************************************
 *  FSE NCount encoding-decoding
 ****************************************************************/
-static short FSE_abs(short a) { return (short)(a<0 ? -a : a); }
-
 size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
                  const void* headerBuffer, size_t hbSize)
 {
@@ -117,21 +111,21 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
             } else {
                 bitStream >>= 2;
         }   }
-        {   short const max = (short)((2*threshold-1)-remaining);
-            short count;
+        {   int const max = (2*threshold-1) - remaining;
+            int count;
 
             if ((bitStream & (threshold-1)) < (U32)max) {
-                count = (short)(bitStream & (threshold-1));
-                bitCount   += nbBits-1;
+                count = bitStream & (threshold-1);
+                bitCount += nbBits-1;
             } else {
-                count = (short)(bitStream & (2*threshold-1));
+                count = bitStream & (2*threshold-1);
                 if (count >= threshold) count -= max;
-                bitCount   += nbBits;
+                bitCount += nbBits;
             }
 
             count--;   /* extra accuracy */
-            remaining -= FSE_abs(count);
-            normalizedCounter[charnum++] = count;
+            remaining -= count < 0 ? -count : count;   /* -1 means +1 */
+            normalizedCounter[charnum++] = (short)count;
             previous0 = !count;
             while (remaining < threshold) {
                 nbBits--;
@@ -159,6 +153,7 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
 /*! HUF_readStats() :
     Read compact Huffman tree, saved by HUF_writeCTable().
     `huffWeight` is destination buffer.
+    `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
     @return : size read from `src` , or an error Code .
     Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
 */
@@ -168,9 +163,11 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
 {
     U32 weightTotal;
     const BYTE* ip = (const BYTE*) src;
-    size_t iSize = ip[0];
+    size_t iSize;
     size_t oSize;
 
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
     /* memset(huffWeight, 0, hwSize);   *//* is not necessary, even though some analyzer complain ... */
 
     if (iSize >= 128) {  /* special header */
@@ -185,23 +182,25 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
                 huffWeight[n+1] = ip[n/2] & 15;
     }   }   }
     else  {   /* header compressed with FSE (normal case) */
+        FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)];  /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */
         if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
-        oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
+        oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6);   /* max (hwSize-1) values decoded, as last one is implied */
         if (FSE_isError(oSize)) return oSize;
     }
 
     /* collect weight stats */
-    memset(rankStats, 0, (HUF_TABLELOG_ABSOLUTEMAX + 1) * sizeof(U32));
+    memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
     weightTotal = 0;
     {   U32 n; for (n=0; n<oSize; n++) {
-            if (huffWeight[n] >= HUF_TABLELOG_ABSOLUTEMAX) return ERROR(corruption_detected);
+            if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
             rankStats[huffWeight[n]]++;
             weightTotal += (1 << huffWeight[n]) >> 1;
     }   }
+    if (weightTotal == 0) return ERROR(corruption_detected);
 
     /* get last non-null symbol weight (implied, total must be 2^n) */
     {   U32 const tableLog = BIT_highbit32(weightTotal) + 1;
-        if (tableLog > HUF_TABLELOG_ABSOLUTEMAX) return ERROR(corruption_detected);
+        if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
         *tableLogPtr = tableLog;
         /* determine last weight */
         {   U32 const total = 1 << tableLog;
diff --git a/contrib/libzstd/include/zstd/common/error_private.c b/contrib/libzstd/include/zstd/common/error_private.c
new file mode 100644
index 00000000000..2d752cd23a7
--- /dev/null
+++ b/contrib/libzstd/include/zstd/common/error_private.c
@@ -0,0 +1,47 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+/* The purpose of this file is to have a single list of error strings embedded in binary */
+
+#include "error_private.h"
+
+const char* ERR_getErrorString(ERR_enum code)
+{
+    static const char* const notErrorCode = "Unspecified error code";
+    switch( code )
+    {
+    case PREFIX(no_error): return "No error detected";
+    case PREFIX(GENERIC):  return "Error (generic)";
+    case PREFIX(prefix_unknown): return "Unknown frame descriptor";
+    case PREFIX(version_unsupported): return "Version not supported";
+    case PREFIX(parameter_unknown): return "Unknown parameter type";
+    case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
+    case PREFIX(frameParameter_unsupportedBy32bits): return "Frame parameter unsupported in 32-bits mode";
+    case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
+    case PREFIX(compressionParameter_unsupported): return "Compression parameter is not supported";
+    case PREFIX(compressionParameter_outOfBound): return "Compression parameter is out of bound";
+    case PREFIX(init_missing): return "Context should be init first";
+    case PREFIX(memory_allocation): return "Allocation error : not enough memory";
+    case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
+    case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
+    case PREFIX(srcSize_wrong): return "Src size is incorrect";
+    case PREFIX(corruption_detected): return "Corrupted block detected";
+    case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
+    case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
+    case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
+    case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
+    case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
+    case PREFIX(dictionary_wrong): return "Dictionary mismatch";
+    case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
+    case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
+    case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
+    case PREFIX(maxCode):
+    default: return notErrorCode;
+    }
+}
diff --git a/contrib/libzstd/include/zstd/common/error_private.h b/contrib/libzstd/include/zstd/common/error_private.h
index d27e15af8b4..1bc2e495481 100644
--- a/contrib/libzstd/include/zstd/common/error_private.h
+++ b/contrib/libzstd/include/zstd/common/error_private.h
@@ -21,7 +21,7 @@ extern "C" {
 *  Dependencies
 ******************************************/
 #include <stddef.h>        /* size_t */
-#include "error_public.h"  /* enum list */
+#include "zstd_errors.h"  /* enum list */
 
 
 /* ****************************************
@@ -62,35 +62,7 @@ ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) retu
 *  Error Strings
 ******************************************/
 
-ERR_STATIC const char* ERR_getErrorString(ERR_enum code)
-{
-    static const char* notErrorCode = "Unspecified error code";
-    switch( code )
-    {
-    case PREFIX(no_error): return "No error detected";
-    case PREFIX(GENERIC):  return "Error (generic)";
-    case PREFIX(prefix_unknown): return "Unknown frame descriptor";
-    case PREFIX(version_unsupported): return "Version not supported";
-    case PREFIX(parameter_unknown): return "Unknown parameter type";
-    case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
-    case PREFIX(frameParameter_unsupportedBy32bits): return "Frame parameter unsupported in 32-bits mode";
-    case PREFIX(compressionParameter_unsupported): return "Compression parameter is out of bound";
-    case PREFIX(init_missing): return "Context should be init first";
-    case PREFIX(memory_allocation): return "Allocation error : not enough memory";
-    case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
-    case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
-    case PREFIX(srcSize_wrong): return "Src size incorrect";
-    case PREFIX(corruption_detected): return "Corrupted block detected";
-    case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
-    case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
-    case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
-    case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
-    case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
-    case PREFIX(dictionary_wrong): return "Dictionary mismatch";
-    case PREFIX(maxCode):
-    default: return notErrorCode;
-    }
-}
+const char* ERR_getErrorString(ERR_enum code);   /* error_private.c */
 
 ERR_STATIC const char* ERR_getErrorName(size_t code)
 {
diff --git a/contrib/libzstd/include/zstd/common/error_public.h b/contrib/libzstd/include/zstd/common/error_public.h
deleted file mode 100644
index d46abd2c720..00000000000
--- a/contrib/libzstd/include/zstd/common/error_public.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
- */
-
-#ifndef ERROR_PUBLIC_H_MODULE
-#define ERROR_PUBLIC_H_MODULE
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-/*===== dependency =====*/
-#include <stddef.h>   /* size_t */
-
-
-/*-****************************************
-*  error codes list
-******************************************/
-typedef enum {
-  ZSTD_error_no_error,
-  ZSTD_error_GENERIC,
-  ZSTD_error_prefix_unknown,
-  ZSTD_error_version_unsupported,
-  ZSTD_error_parameter_unknown,
-  ZSTD_error_frameParameter_unsupported,
-  ZSTD_error_frameParameter_unsupportedBy32bits,
-  ZSTD_error_compressionParameter_unsupported,
-  ZSTD_error_init_missing,
-  ZSTD_error_memory_allocation,
-  ZSTD_error_stage_wrong,
-  ZSTD_error_dstSize_tooSmall,
-  ZSTD_error_srcSize_wrong,
-  ZSTD_error_corruption_detected,
-  ZSTD_error_checksum_wrong,
-  ZSTD_error_tableLog_tooLarge,
-  ZSTD_error_maxSymbolValue_tooLarge,
-  ZSTD_error_maxSymbolValue_tooSmall,
-  ZSTD_error_dictionary_corrupted,
-  ZSTD_error_dictionary_wrong,
-  ZSTD_error_maxCode
-} ZSTD_ErrorCode;
-
-/*! ZSTD_getErrorCode() :
-    convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
-    which can be used to compare directly with enum list published into "error_public.h" */
-ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
-const char* ZSTD_getErrorString(ZSTD_ErrorCode code);
-
-
-#if defined (__cplusplus)
-}
-#endif
-
-#endif /* ERROR_PUBLIC_H_MODULE */
diff --git a/contrib/libzstd/include/zstd/common/fse.h b/contrib/libzstd/include/zstd/common/fse.h
index 720d54b111e..6d5d41def19 100644
--- a/contrib/libzstd/include/zstd/common/fse.h
+++ b/contrib/libzstd/include/zstd/common/fse.h
@@ -45,6 +45,32 @@ extern "C" {
 #include <stddef.h>    /* size_t, ptrdiff_t */
 
 
+/*-*****************************************
+*  FSE_PUBLIC_API : control library symbols visibility
+******************************************/
+#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
+#  define FSE_PUBLIC_API __attribute__ ((visibility ("default")))
+#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1)   /* Visual expected */
+#  define FSE_PUBLIC_API __declspec(dllexport)
+#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
+#  define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#  define FSE_PUBLIC_API
+#endif
+
+/*------   Version   ------*/
+#define FSE_VERSION_MAJOR    0
+#define FSE_VERSION_MINOR    9
+#define FSE_VERSION_RELEASE  0
+
+#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE
+#define FSE_QUOTE(str) #str
+#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str)
+#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION)
+
+#define FSE_VERSION_NUMBER  (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE)
+FSE_PUBLIC_API unsigned FSE_versionNumber(void);   /**< library version number; to be used when checking dll version */
+
 /*-****************************************
 *  FSE simple functions
 ******************************************/
@@ -56,8 +82,8 @@ extern "C" {
                      if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
                      if FSE_isError(return), compression failed (more details using FSE_getErrorName())
 */
-size_t FSE_compress(void* dst, size_t dstCapacity,
-              const void* src, size_t srcSize);
+FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
+                             const void* src, size_t srcSize);
 
 /*! FSE_decompress():
     Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
@@ -69,18 +95,18 @@ size_t FSE_compress(void* dst, size_t dstCapacity,
     Why ? : making this distinction requires a header.
     Header management is intentionally delegated to the user layer, which can better manage special cases.
 */
-size_t FSE_decompress(void* dst,  size_t dstCapacity,
-                const void* cSrc, size_t cSrcSize);
+FSE_PUBLIC_API size_t FSE_decompress(void* dst,  size_t dstCapacity,
+                               const void* cSrc, size_t cSrcSize);
 
 
 /*-*****************************************
 *  Tool functions
 ******************************************/
-size_t FSE_compressBound(size_t size);       /* maximum compressed size */
+FSE_PUBLIC_API size_t FSE_compressBound(size_t size);       /* maximum compressed size */
 
 /* Error Management */
-unsigned    FSE_isError(size_t code);        /* tells if a return value is an error code */
-const char* FSE_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+FSE_PUBLIC_API unsigned    FSE_isError(size_t code);        /* tells if a return value is an error code */
+FSE_PUBLIC_API const char* FSE_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
 
 
 /*-*****************************************
@@ -94,7 +120,7 @@ const char* FSE_getErrorName(size_t code);   /* provides error code string (usef
                      if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
                      if FSE_isError(return), it's an error code.
 */
-size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
 
 
 /*-*****************************************
@@ -127,50 +153,50 @@ or to save and provide normalized distribution using external method.
     @return : the count of the most frequent symbol (which is not identified).
               if return == srcSize, there is only one symbol.
               Can also return an error code, which can be tested with FSE_isError(). */
-size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
+FSE_PUBLIC_API size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
 
 /*! FSE_optimalTableLog():
     dynamically downsize 'tableLog' when conditions are met.
     It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
     @return : recommended tableLog (necessarily <= 'maxTableLog') */
-unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
+FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
 
 /*! FSE_normalizeCount():
     normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
     'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
     @return : tableLog,
               or an errorCode, which can be tested using FSE_isError() */
-size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
+FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
 
 /*! FSE_NCountWriteBound():
     Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
     Typically useful for allocation purpose. */
-size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
+FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
 
 /*! FSE_writeNCount():
     Compactly save 'normalizedCounter' into 'buffer'.
     @return : size of the compressed table,
               or an errorCode, which can be tested using FSE_isError(). */
-size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
 
 
 /*! Constructor and Destructor of FSE_CTable.
     Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
 typedef unsigned FSE_CTable;   /* don't allocate that. It's only meant to be more restrictive than void* */
-FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue);
-void        FSE_freeCTable (FSE_CTable* ct);
+FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue);
+FSE_PUBLIC_API void        FSE_freeCTable (FSE_CTable* ct);
 
 /*! FSE_buildCTable():
     Builds `ct`, which must be already allocated, using FSE_createCTable().
     @return : 0, or an errorCode, which can be tested using FSE_isError() */
-size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
 
 /*! FSE_compress_usingCTable():
     Compress `src` using `ct` into `dst` which must be already allocated.
     @return : size of compressed data (<= `dstCapacity`),
               or 0 if compressed data could not fit into `dst`,
               or an errorCode, which can be tested using FSE_isError() */
-size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
+FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
 
 /*!
 Tutorial :
@@ -223,25 +249,25 @@ If there is an error, the function will return an ErrorCode (which can be tested
     @return : size read from 'rBuffer',
               or an errorCode, which can be tested using FSE_isError().
               maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
-size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
+FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
 
 /*! Constructor and Destructor of FSE_DTable.
     Note that its size depends on 'tableLog' */
 typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
-FSE_DTable* FSE_createDTable(unsigned tableLog);
-void        FSE_freeDTable(FSE_DTable* dt);
+FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
+FSE_PUBLIC_API void        FSE_freeDTable(FSE_DTable* dt);
 
 /*! FSE_buildDTable():
     Builds 'dt', which must be already allocated, using FSE_createDTable().
     return : 0, or an errorCode, which can be tested using FSE_isError() */
-size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
 
 /*! FSE_decompress_usingDTable():
     Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
     into `dst` which must be already allocated.
     @return : size of regenerated data (necessarily <= `dstCapacity`),
               or an errorCode, which can be tested using FSE_isError() */
-size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
+FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
 
 /*!
 Tutorial :
@@ -286,45 +312,84 @@ If there is an error, the function will return an error code, which can be teste
 #define FSE_BLOCKBOUND(size) (size + (size>>7))
 #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
 
-/* It is possible to statically allocate FSE CTable/DTable as a table of unsigned using below macros */
+/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
 #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
 #define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
 
+/* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
+#define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue)   (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
+#define FSE_DTABLE_SIZE(maxTableLog)                   (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable))
+
 
 /* *****************************************
 *  FSE advanced API
 *******************************************/
+/* FSE_count_wksp() :
+ * Same as FSE_count(), but using an externally provided scratch buffer.
+ * `workSpace` size must be table of >= `1024` unsigned
+ */
+size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                 const void* source, size_t sourceSize, unsigned* workSpace);
+
+/** FSE_countFast() :
+ *  same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr
+ */
 size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
-/**< same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr  */
+
+/* FSE_countFast_wksp() :
+ * Same as FSE_countFast(), but using an externally provided scratch buffer.
+ * `workSpace` must be a table of minimum `1024` unsigned
+ */
+size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* workSpace);
+
+/*! FSE_count_simple
+ * Same as FSE_countFast(), but does not use any additional memory (not even on stack).
+ * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`).
+*/
+size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
+
+
 
 unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
 /**< same as FSE_optimalTableLog(), which used `minus==2` */
 
+/* FSE_compress_wksp() :
+ * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
+ * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
+ */
+#define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue)   ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
+size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+
 size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
-/**< build a fake FSE_CTable, designed to not compress an input, where each symbol uses nbBits */
+/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
 
 size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
 /**< build a fake FSE_CTable, designed to compress always the same symbolValue */
 
+/* FSE_buildCTable_wksp() :
+ * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
+ * `wkspSize` must be >= `(1<<tableLog)`.
+ */
+size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+
 size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
-/**< build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
+/**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
 
 size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
 /**< build a fake FSE_DTable, designed to always generate the same symbolValue */
 
+size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
+/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
+
 
 /* *****************************************
 *  FSE symbol compression API
 *******************************************/
 /*!
    This API consists of small unitary functions, which highly benefit from being inlined.
-   You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
-   Visual seems to do it automatically.
-   For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
-   If none of these solutions is applicable, include "fse.c" directly.
+   Hence their body are included in next section.
 */
-typedef struct
-{
+typedef struct {
     ptrdiff_t   value;
     const void* stateTable;
     const void* symbolTT;
@@ -384,8 +449,7 @@ If there is an error, it returns an errorCode (which can be tested using FSE_isE
 /* *****************************************
 *  FSE symbol decompression API
 *******************************************/
-typedef struct
-{
+typedef struct {
     size_t      state;
     const void* table;   /* precise table may vary, depending on U16 */
 } FSE_DState_t;
@@ -490,9 +554,9 @@ MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U3
 
 MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol)
 {
-    const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
+    FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
     const U16* const stateTable = (const U16*)(statePtr->stateTable);
-    U32 nbBitsOut  = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
+    U32 const nbBitsOut  = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
     BIT_addBits(bitC, statePtr->value, nbBitsOut);
     statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
 }
@@ -503,6 +567,7 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt
     BIT_flushBits(bitC);
 }
 
+
 /* ======    Decompression    ====== */
 
 typedef struct {
@@ -581,14 +646,19 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
 *  Increasing memory usage improves compression ratio
 *  Reduced memory usage can improve speed, due to cache effect
 *  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
-#define FSE_MAX_MEMORY_USAGE 14
-#define FSE_DEFAULT_MEMORY_USAGE 13
+#ifndef FSE_MAX_MEMORY_USAGE
+#  define FSE_MAX_MEMORY_USAGE 14
+#endif
+#ifndef FSE_DEFAULT_MEMORY_USAGE
+#  define FSE_DEFAULT_MEMORY_USAGE 13
+#endif
 
 /*!FSE_MAX_SYMBOL_VALUE :
 *  Maximum symbol value authorized.
 *  Required for proper stack allocation */
-#define FSE_MAX_SYMBOL_VALUE 255
-
+#ifndef FSE_MAX_SYMBOL_VALUE
+#  define FSE_MAX_SYMBOL_VALUE 255
+#endif
 
 /* **************************************************************
 *  template functions type & suffix
diff --git a/contrib/libzstd/include/zstd/common/fse_decompress.c b/contrib/libzstd/include/zstd/common/fse_decompress.c
index 7492a3832b7..8474a4c079b 100644
--- a/contrib/libzstd/include/zstd/common/fse_decompress.c
+++ b/contrib/libzstd/include/zstd/common/fse_decompress.c
@@ -59,7 +59,6 @@
 ****************************************************************/
 #include <stdlib.h>     /* malloc, free, qsort */
 #include <string.h>     /* memcpy, memset */
-#include <stdio.h>      /* printf (debug) */
 #include "bitstream.h"
 #define FSE_STATIC_LINKING_ONLY
 #include "fse.h"
@@ -75,12 +74,6 @@
 #define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; }
 
 
-/* **************************************************************
-*  Complex types
-****************************************************************/
-typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
-
-
 /* **************************************************************
 *  Templates
 ****************************************************************/
@@ -300,28 +293,34 @@ size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
 }
 
 
-size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog)
 {
     const BYTE* const istart = (const BYTE*)cSrc;
     const BYTE* ip = istart;
     short counting[FSE_MAX_SYMBOL_VALUE+1];
-    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
     unsigned tableLog;
     unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
 
-    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */
-
     /* normal FSE decoding mode */
-    {   size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
-        if (FSE_isError(NCountLength)) return NCountLength;
-        if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */
-        ip += NCountLength;
-        cSrcSize -= NCountLength;
-    }
+    size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSE_isError(NCountLength)) return NCountLength;
+    //if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */
+    if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
+    ip += NCountLength;
+    cSrcSize -= NCountLength;
 
-    CHECK_F( FSE_buildDTable (dt, counting, maxSymbolValue, tableLog) );
+    CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) );
 
-    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);   /* always return, even if it is an error code */
+    return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace);   /* always return, even if it is an error code */
+}
+
+
+typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
+{
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG);
 }
 
 
diff --git a/contrib/libzstd/include/zstd/common/huf.h b/contrib/libzstd/include/zstd/common/huf.h
index 29bab4b7646..dabd359915a 100644
--- a/contrib/libzstd/include/zstd/common/huf.h
+++ b/contrib/libzstd/include/zstd/common/huf.h
@@ -43,6 +43,21 @@ extern "C" {
 #include <stddef.h>    /* size_t */
 
 
+/* *** library symbols visibility *** */
+/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual,
+ *        HUF symbols remain "private" (internal symbols for library only).
+ *        Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */
+#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
+#  define HUF_PUBLIC_API __attribute__ ((visibility ("default")))
+#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1)   /* Visual expected */
+#  define HUF_PUBLIC_API __declspec(dllexport)
+#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
+#  define HUF_PUBLIC_API __declspec(dllimport)  /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */
+#else
+#  define HUF_PUBLIC_API
+#endif
+
+
 /* *** simple functions *** */
 /**
 HUF_compress() :
@@ -55,42 +70,68 @@ HUF_compress() :
                      if return == 1, srcData is a single repeated byte symbol (RLE compression).
                      if HUF_isError(return), compression failed (more details using HUF_getErrorName())
 */
-size_t HUF_compress(void* dst, size_t dstCapacity,
-              const void* src, size_t srcSize);
+HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity,
+                             const void* src, size_t srcSize);
 
 /**
 HUF_decompress() :
     Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
     into already allocated buffer 'dst', of minimum size 'dstSize'.
-    `dstSize` : **must** be the ***exact*** size of original (uncompressed) data.
+    `originalSize` : **must** be the ***exact*** size of original (uncompressed) data.
     Note : in contrast with FSE, HUF_decompress can regenerate
            RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
            because it knows size to regenerate.
-    @return : size of regenerated data (== dstSize),
+    @return : size of regenerated data (== originalSize),
               or an error code, which can be tested using HUF_isError()
 */
-size_t HUF_decompress(void* dst,  size_t dstSize,
-                const void* cSrc, size_t cSrcSize);
+HUF_PUBLIC_API size_t HUF_decompress(void* dst,  size_t originalSize,
+                               const void* cSrc, size_t cSrcSize);
 
 
-/* ****************************************
-*  Tool functions
-******************************************/
-#define HUF_BLOCKSIZE_MAX (128 * 1024)
-size_t HUF_compressBound(size_t size);       /**< maximum compressed size (worst case) */
+/* ***   Tool functions *** */
+#define HUF_BLOCKSIZE_MAX (128 * 1024)                  /**< maximum input size for a single block compressed with HUF_compress */
+HUF_PUBLIC_API size_t HUF_compressBound(size_t size);   /**< maximum compressed size (worst case) */
 
 /* Error Management */
-unsigned    HUF_isError(size_t code);        /**< tells if a return value is an error code */
-const char* HUF_getErrorName(size_t code);   /**< provides error code string (useful for debugging) */
+HUF_PUBLIC_API unsigned    HUF_isError(size_t code);       /**< tells if a return value is an error code */
+HUF_PUBLIC_API const char* HUF_getErrorName(size_t code);  /**< provides error code string (useful for debugging) */
 
 
-/* *** Advanced function *** */
+/* ***   Advanced function   *** */
 
 /** HUF_compress2() :
-*   Same as HUF_compress(), but offers direct control over `maxSymbolValue` and `tableLog` */
-size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+ *  Same as HUF_compress(), but offers direct control over `maxSymbolValue` and `tableLog`.
+ *  `tableLog` must be `<= HUF_TABLELOG_MAX` . */
+HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+
+/** HUF_compress4X_wksp() :
+ *  Same as HUF_compress2(), but uses externally allocated `workSpace`.
+ *  `workspace` must have minimum alignment of 4, and be at least as large as following macro */
+#define HUF_WORKSPACE_SIZE (6 << 10)
+#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
+HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+
+/**
+ *  The minimum workspace size for the `workSpace` used in
+ *  HUF_readDTableX2_wksp() and HUF_readDTableX4_wksp().
+ *
+ *  The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
+ *  HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
+ *  Buffer overflow errors may potentially occur if code modifications result in
+ *  a required workspace size greater than that specified in the following
+ *  macro.
+ */
+#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
+#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
 
 
+/* ******************************************************************
+ *  WARNING !!
+ *  The following section contains advanced and experimental definitions
+ *  which shall never be used in the context of dll
+ *  because they are not guaranteed to remain stable in the future.
+ *  Only consider them in association with static linking.
+ *******************************************************************/
 #ifdef HUF_STATIC_LINKING_ONLY
 
 /* *** Dependencies *** */
@@ -98,10 +139,11 @@ size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize
 
 
 /* *** Constants *** */
-#define HUF_TABLELOG_ABSOLUTEMAX  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
-#define HUF_TABLELOG_MAX  12           /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
+#define HUF_TABLELOG_MAX      12       /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
 #define HUF_TABLELOG_DEFAULT  11       /* tableLog by default, when not specified */
-#define HUF_SYMBOLVALUE_MAX 255
+#define HUF_SYMBOLVALUE_MAX  255
+
+#define HUF_TABLELOG_ABSOLUTEMAX  15   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
 #if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
 #  error "HUF_TABLELOG_MAX is too large !"
 #endif
@@ -112,12 +154,14 @@ size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize
 ******************************************/
 /* HUF buffer bounds */
 #define HUF_CTABLEBOUND 129
-#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true if incompressible pre-filtered with fast heuristic */
+#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true when incompressible is pre-filtered with fast heuristic */
 #define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
 
 /* static allocation of HUF's Compression Table */
+#define HUF_CTABLE_SIZE_U32(maxSymbolValue)   ((maxSymbolValue)+1)   /* Use tables of U32, for proper alignment */
+#define HUF_CTABLE_SIZE(maxSymbolValue)       (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
 #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
-    U32 name##hb[maxSymbolValue+1]; \
+    U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \
     void* name##hv = &(name##hb); \
     HUF_CElt* name = (HUF_CElt*)(name##hv)   /* no final ; */
 
@@ -125,9 +169,9 @@ size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize
 typedef U32 HUF_DTable;
 #define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<(maxTableLog)))
 #define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
-        HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1)*0x1000001) }
+        HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) }
 #define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
-        HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog)*0x1000001) }
+        HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) }
 
 
 /* ****************************************
@@ -138,12 +182,11 @@ size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cS
 
 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< decodes RLE and uncompressed */
 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */
 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
+size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< single-symbol decoder */
 size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
-
-size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
-size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
-size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
+size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< double-symbols decoder */
 
 
 /* ****************************************
@@ -168,6 +211,23 @@ size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSym
 size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
 
+typedef enum {
+   HUF_repeat_none,  /**< Cannot use the previous table */
+   HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
+   HUF_repeat_valid  /**< Can use the previous table and it is asumed to be valid */
+ } HUF_repeat;
+/** HUF_compress4X_repeat() :
+*   Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+*   If it uses hufTable it does not modify hufTable or repeat.
+*   If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
+*   If preferRepeat then the old table will always be used if valid. */
+size_t HUF_compress4X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat);  /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
+
+/** HUF_buildCTable_wksp() :
+ *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
+ *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
+ */
+size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize);
 
 /*! HUF_readStats() :
     Read compact Huffman tree, saved by HUF_writeCTable().
@@ -198,7 +258,9 @@ HUF_decompress() does the following:
 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
 
 size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
+size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
 size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize);
+size_t HUF_readDTableX4_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
 
 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
 size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
@@ -208,16 +270,29 @@ size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* c
 /* single stream variants */
 
 size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);  /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
 size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
+/** HUF_compress1X_repeat() :
+*   Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+*   If it uses hufTable it does not modify hufTable or repeat.
+*   If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
+*   If preferRepeat then the old table will always be used if valid. */
+size_t HUF_compress1X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat);  /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
 
 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
 size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbol decoder */
 
-size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
+size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< single-symbol decoder */
+size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
+size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< double-symbols decoder */
+
+size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);   /**< automatic selection of sing or double symbol decoder, based on DTable */
 size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
 size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
 
-
 #endif /* HUF_STATIC_LINKING_ONLY */
 
 
diff --git a/contrib/libzstd/include/zstd/common/mem.h b/contrib/libzstd/include/zstd/common/mem.h
index 681dd35d2da..b0e5bf60b43 100644
--- a/contrib/libzstd/include/zstd/common/mem.h
+++ b/contrib/libzstd/include/zstd/common/mem.h
@@ -39,7 +39,7 @@ extern "C" {
 #endif
 
 /* code only tested on 32 and 64 bits systems */
-#define MEM_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(int)(!!(c)) }; }
+#define MEM_STATIC_ASSERT(c)   { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
 MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
 
 
@@ -48,21 +48,25 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size
 *****************************************************************/
 #if  !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
 # include <stdint.h>
-  typedef  uint8_t BYTE;
-  typedef uint16_t U16;
-  typedef  int16_t S16;
-  typedef uint32_t U32;
-  typedef  int32_t S32;
-  typedef uint64_t U64;
-  typedef  int64_t S64;
+  typedef   uint8_t BYTE;
+  typedef  uint16_t U16;
+  typedef   int16_t S16;
+  typedef  uint32_t U32;
+  typedef   int32_t S32;
+  typedef  uint64_t U64;
+  typedef   int64_t S64;
+  typedef  intptr_t iPtrDiff;
+  typedef uintptr_t uPtrDiff;
 #else
-  typedef unsigned char       BYTE;
+  typedef unsigned char      BYTE;
   typedef unsigned short      U16;
   typedef   signed short      S16;
   typedef unsigned int        U32;
   typedef   signed int        S32;
   typedef unsigned long long  U64;
   typedef   signed long long  S64;
+  typedef ptrdiff_t      iPtrDiff;
+  typedef size_t         uPtrDiff;
 #endif
 
 
@@ -74,19 +78,18 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size
  * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
  * The below switch allow to select different access method for improved performance.
  * Method 0 (default) : use `memcpy()`. Safe and portable.
- * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable).
  *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
  * Method 2 : direct access. This method is portable but violate C standard.
  *            It can generate buggy code on targets depending on alignment.
- *            In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ *            In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6)
  * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
  * Prefer these methods in priority order (0 > 1 > 2)
  */
 #ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
 #  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
 #    define MEM_FORCE_MEMORY_ACCESS 2
-#  elif defined(__INTEL_COMPILER) /*|| defined(_MSC_VER)*/ || \
-  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#  elif defined(__INTEL_COMPILER) || defined(__GNUC__)
 #    define MEM_FORCE_MEMORY_ACCESS 1
 #  endif
 #endif
@@ -118,7 +121,7 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
 /* currently only defined for gcc and icc */
 #if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
-	__pragma( pack(push, 1) )
+    __pragma( pack(push, 1) )
     typedef union { U16 u16; U32 u32; U64 u64; size_t st; } unalign;
     __pragma( pack(pop) )
 #else
@@ -180,7 +183,7 @@ MEM_STATIC U32 MEM_swap32(U32 in)
 {
 #if defined(_MSC_VER)     /* Visual Studio */
     return _byteswap_ulong(in);
-#elif defined (__GNUC__)
+#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)
     return __builtin_bswap32(in);
 #else
     return  ((in << 24) & 0xff000000 ) |
@@ -194,7 +197,7 @@ MEM_STATIC U64 MEM_swap64(U64 in)
 {
 #if defined(_MSC_VER)     /* Visual Studio */
     return _byteswap_uint64(in);
-#elif defined (__GNUC__)
+#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)
     return __builtin_bswap64(in);
 #else
     return  ((in << 56) & 0xff00000000000000ULL) |
@@ -349,20 +352,6 @@ MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
 }
 
 
-/* function safe only for comparisons */
-MEM_STATIC U32 MEM_readMINMATCH(const void* memPtr, U32 length)
-{
-    switch (length)
-    {
-    default :
-    case 4 : return MEM_read32(memPtr);
-    case 3 : if (MEM_isLittleEndian())
-                return MEM_read32(memPtr)<<8;
-             else
-                return MEM_read32(memPtr)>>8;
-    }
-}
-
 #if defined (__cplusplus)
 }
 #endif
diff --git a/contrib/libzstd/include/zstd/common/pool.c b/contrib/libzstd/include/zstd/common/pool.c
new file mode 100644
index 00000000000..749fa4f2f7b
--- /dev/null
+++ b/contrib/libzstd/include/zstd/common/pool.c
@@ -0,0 +1,206 @@
+/**
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+
+/* ======   Dependencies   ======= */
+#include <stddef.h>  /* size_t */
+#include <stdlib.h>  /* malloc, calloc, free */
+#include "pool.h"
+
+/* ======   Compiler specifics   ====== */
+#if defined(_MSC_VER)
+#  pragma warning(disable : 4204)        /* disable: C4204: non-constant aggregate initializer */
+#endif
+
+
+#ifdef ZSTD_MULTITHREAD
+
+#include "threading.h"   /* pthread adaptation */
+
+/* A job is a function and an opaque argument */
+typedef struct POOL_job_s {
+  POOL_function function;
+  void *opaque;
+} POOL_job;
+
+struct POOL_ctx_s {
+    /* Keep track of the threads */
+    pthread_t *threads;
+    size_t numThreads;
+
+    /* The queue is a circular buffer */
+    POOL_job *queue;
+    size_t queueHead;
+    size_t queueTail;
+    size_t queueSize;
+    /* The mutex protects the queue */
+    pthread_mutex_t queueMutex;
+    /* Condition variable for pushers to wait on when the queue is full */
+    pthread_cond_t queuePushCond;
+    /* Condition variables for poppers to wait on when the queue is empty */
+    pthread_cond_t queuePopCond;
+    /* Indicates if the queue is shutting down */
+    int shutdown;
+};
+
+/* POOL_thread() :
+   Work thread for the thread pool.
+   Waits for jobs and executes them.
+   @returns : NULL on failure else non-null.
+*/
+static void* POOL_thread(void* opaque) {
+    POOL_ctx* const ctx = (POOL_ctx*)opaque;
+    if (!ctx) { return NULL; }
+    for (;;) {
+        /* Lock the mutex and wait for a non-empty queue or until shutdown */
+        pthread_mutex_lock(&ctx->queueMutex);
+        while (ctx->queueHead == ctx->queueTail && !ctx->shutdown) {
+            pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);
+        }
+        /* empty => shutting down: so stop */
+        if (ctx->queueHead == ctx->queueTail) {
+            pthread_mutex_unlock(&ctx->queueMutex);
+            return opaque;
+        }
+        /* Pop a job off the queue */
+        {   POOL_job const job = ctx->queue[ctx->queueHead];
+            ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
+            /* Unlock the mutex, signal a pusher, and run the job */
+            pthread_mutex_unlock(&ctx->queueMutex);
+            pthread_cond_signal(&ctx->queuePushCond);
+            job.function(job.opaque);
+        }
+    }
+    /* Unreachable */
+}
+
+POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) {
+    POOL_ctx *ctx;
+    /* Check the parameters */
+    if (!numThreads || !queueSize) { return NULL; }
+    /* Allocate the context and zero initialize */
+    ctx = (POOL_ctx *)calloc(1, sizeof(POOL_ctx));
+    if (!ctx) { return NULL; }
+    /* Initialize the job queue.
+     * It needs one extra space since one space is wasted to differentiate empty
+     * and full queues.
+     */
+    ctx->queueSize = queueSize + 1;
+    ctx->queue = (POOL_job *)malloc(ctx->queueSize * sizeof(POOL_job));
+    ctx->queueHead = 0;
+    ctx->queueTail = 0;
+    pthread_mutex_init(&ctx->queueMutex, NULL);
+    pthread_cond_init(&ctx->queuePushCond, NULL);
+    pthread_cond_init(&ctx->queuePopCond, NULL);
+    ctx->shutdown = 0;
+    /* Allocate space for the thread handles */
+    ctx->threads = (pthread_t *)malloc(numThreads * sizeof(pthread_t));
+    ctx->numThreads = 0;
+    /* Check for errors */
+    if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; }
+    /* Initialize the threads */
+    {   size_t i;
+        for (i = 0; i < numThreads; ++i) {
+            if (pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) {
+                ctx->numThreads = i;
+                POOL_free(ctx);
+                return NULL;
+        }   }
+        ctx->numThreads = numThreads;
+    }
+    return ctx;
+}
+
+/*! POOL_join() :
+    Shutdown the queue, wake any sleeping threads, and join all of the threads.
+*/
+static void POOL_join(POOL_ctx *ctx) {
+    /* Shut down the queue */
+    pthread_mutex_lock(&ctx->queueMutex);
+    ctx->shutdown = 1;
+    pthread_mutex_unlock(&ctx->queueMutex);
+    /* Wake up sleeping threads */
+    pthread_cond_broadcast(&ctx->queuePushCond);
+    pthread_cond_broadcast(&ctx->queuePopCond);
+    /* Join all of the threads */
+    {   size_t i;
+        for (i = 0; i < ctx->numThreads; ++i) {
+            pthread_join(ctx->threads[i], NULL);
+    }   }
+}
+
+void POOL_free(POOL_ctx *ctx) {
+    if (!ctx) { return; }
+    POOL_join(ctx);
+    pthread_mutex_destroy(&ctx->queueMutex);
+    pthread_cond_destroy(&ctx->queuePushCond);
+    pthread_cond_destroy(&ctx->queuePopCond);
+    if (ctx->queue) free(ctx->queue);
+    if (ctx->threads) free(ctx->threads);
+    free(ctx);
+}
+
+size_t POOL_sizeof(POOL_ctx *ctx) {
+    if (ctx==NULL) return 0;  /* supports sizeof NULL */
+    return sizeof(*ctx)
+        + ctx->queueSize * sizeof(POOL_job)
+        + ctx->numThreads * sizeof(pthread_t);
+}
+
+void POOL_add(void *ctxVoid, POOL_function function, void *opaque) {
+    POOL_ctx *ctx = (POOL_ctx *)ctxVoid;
+    if (!ctx) { return; }
+
+    pthread_mutex_lock(&ctx->queueMutex);
+    {   POOL_job const job = {function, opaque};
+        /* Wait until there is space in the queue for the new job */
+        size_t newTail = (ctx->queueTail + 1) % ctx->queueSize;
+        while (ctx->queueHead == newTail && !ctx->shutdown) {
+          pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
+          newTail = (ctx->queueTail + 1) % ctx->queueSize;
+        }
+        /* The queue is still going => there is space */
+        if (!ctx->shutdown) {
+            ctx->queue[ctx->queueTail] = job;
+            ctx->queueTail = newTail;
+        }
+    }
+    pthread_mutex_unlock(&ctx->queueMutex);
+    pthread_cond_signal(&ctx->queuePopCond);
+}
+
+#else  /* ZSTD_MULTITHREAD  not defined */
+/* No multi-threading support */
+
+/* We don't need any data, but if it is empty malloc() might return NULL. */
+struct POOL_ctx_s {
+  int data;
+};
+
+POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) {
+  (void)numThreads;
+  (void)queueSize;
+  return (POOL_ctx *)malloc(sizeof(POOL_ctx));
+}
+
+void POOL_free(POOL_ctx *ctx) {
+  if (ctx) free(ctx);
+}
+
+void POOL_add(void *ctx, POOL_function function, void *opaque) {
+  (void)ctx;
+  function(opaque);
+}
+
+size_t POOL_sizeof(POOL_ctx *ctx) {
+    if (ctx==NULL) return 0;  /* supports sizeof NULL */
+    return sizeof(*ctx);
+}
+
+#endif  /* ZSTD_MULTITHREAD */
diff --git a/contrib/libzstd/include/zstd/common/pool.h b/contrib/libzstd/include/zstd/common/pool.h
new file mode 100644
index 00000000000..386cd674b7c
--- /dev/null
+++ b/contrib/libzstd/include/zstd/common/pool.h
@@ -0,0 +1,61 @@
+/**
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+#ifndef POOL_H
+#define POOL_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+#include <stddef.h>   /* size_t */
+
+typedef struct POOL_ctx_s POOL_ctx;
+
+/*! POOL_create() :
+    Create a thread pool with at most `numThreads` threads.
+    `numThreads` must be at least 1.
+    The maximum number of queued jobs before blocking is `queueSize`.
+    `queueSize` must be at least 1.
+    @return : The POOL_ctx pointer on success else NULL.
+*/
+POOL_ctx *POOL_create(size_t numThreads, size_t queueSize);
+
+/*! POOL_free() :
+    Free a thread pool returned by POOL_create().
+*/
+void POOL_free(POOL_ctx *ctx);
+
+/*! POOL_sizeof() :
+    return memory usage of pool returned by POOL_create().
+*/
+size_t POOL_sizeof(POOL_ctx *ctx);
+
+/*! POOL_function :
+    The function type that can be added to a thread pool.
+*/
+typedef void (*POOL_function)(void *);
+/*! POOL_add_function :
+    The function type for a generic thread pool add function.
+*/
+typedef void (*POOL_add_function)(void *, POOL_function, void *);
+
+/*! POOL_add() :
+    Add the job `function(opaque)` to the thread pool.
+    Possibly blocks until there is room in the queue.
+    Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed.
+*/
+void POOL_add(void *ctx, POOL_function function, void *opaque);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif
diff --git a/contrib/libzstd/include/zstd/common/threading.c b/contrib/libzstd/include/zstd/common/threading.c
new file mode 100644
index 00000000000..141376c5619
--- /dev/null
+++ b/contrib/libzstd/include/zstd/common/threading.c
@@ -0,0 +1,79 @@
+/**
+ * Copyright (c) 2016 Tino Reichardt
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ *
+ * You can contact the author at:
+ * - zstdmt source repository: https://github.com/mcmilk/zstdmt
+ */
+
+/**
+ * This file will hold wrapper for systems, which do not support pthreads
+ */
+
+/* When ZSTD_MULTITHREAD is not defined, this file would become an empty translation unit.
+* Include some ISO C header code to prevent this and portably avoid related warnings.
+* (Visual C++: C4206 / GCC: -Wpedantic / Clang: -Wempty-translation-unit)
+*/
+#include <stddef.h>
+
+
+#if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
+
+/**
+ * Windows minimalist Pthread Wrapper, based on :
+ * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
+ */
+
+
+/* ===  Dependencies  === */
+#include <process.h>
+#include <errno.h>
+#include "threading.h"
+
+
+/* ===  Implementation  === */
+
+static unsigned __stdcall worker(void *arg)
+{
+    pthread_t* const thread = (pthread_t*) arg;
+    thread->arg = thread->start_routine(thread->arg);
+    return 0;
+}
+
+int pthread_create(pthread_t* thread, const void* unused,
+            void* (*start_routine) (void*), void* arg)
+{
+    (void)unused;
+    thread->arg = arg;
+    thread->start_routine = start_routine;
+    thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL);
+
+    if (!thread->handle)
+        return errno;
+    else
+        return 0;
+}
+
+int _pthread_join(pthread_t * thread, void **value_ptr)
+{
+    DWORD result;
+
+    if (!thread->handle) return 0;
+
+    result = WaitForSingleObject(thread->handle, INFINITE);
+    switch (result) {
+    case WAIT_OBJECT_0:
+        if (value_ptr) *value_ptr = thread->arg;
+        return 0;
+    case WAIT_ABANDONED:
+        return EINVAL;
+    default:
+        return GetLastError();
+    }
+}
+
+#endif   /* ZSTD_MULTITHREAD */
diff --git a/contrib/libzstd/include/zstd/common/threading.h b/contrib/libzstd/include/zstd/common/threading.h
new file mode 100644
index 00000000000..c0086139ea3
--- /dev/null
+++ b/contrib/libzstd/include/zstd/common/threading.h
@@ -0,0 +1,104 @@
+
+/**
+ * Copyright (c) 2016 Tino Reichardt
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ *
+ * You can contact the author at:
+ * - zstdmt source repository: https://github.com/mcmilk/zstdmt
+ */
+
+#ifndef THREADING_H_938743
+#define THREADING_H_938743
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
+
+/**
+ * Windows minimalist Pthread Wrapper, based on :
+ * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
+ */
+#ifdef WINVER
+#  undef WINVER
+#endif
+#define WINVER       0x0600
+
+#ifdef _WIN32_WINNT
+#  undef _WIN32_WINNT
+#endif
+#define _WIN32_WINNT 0x0600
+
+#ifndef WIN32_LEAN_AND_MEAN
+#  define WIN32_LEAN_AND_MEAN
+#endif
+
+#include <windows.h>
+
+/* mutex */
+#define pthread_mutex_t           CRITICAL_SECTION
+#define pthread_mutex_init(a,b)   InitializeCriticalSection((a))
+#define pthread_mutex_destroy(a)  DeleteCriticalSection((a))
+#define pthread_mutex_lock(a)     EnterCriticalSection((a))
+#define pthread_mutex_unlock(a)   LeaveCriticalSection((a))
+
+/* condition variable */
+#define pthread_cond_t             CONDITION_VARIABLE
+#define pthread_cond_init(a, b)    InitializeConditionVariable((a))
+#define pthread_cond_destroy(a)    /* No delete */
+#define pthread_cond_wait(a, b)    SleepConditionVariableCS((a), (b), INFINITE)
+#define pthread_cond_signal(a)     WakeConditionVariable((a))
+#define pthread_cond_broadcast(a)  WakeAllConditionVariable((a))
+
+/* pthread_create() and pthread_join() */
+typedef struct {
+    HANDLE handle;
+    void* (*start_routine)(void*);
+    void* arg;
+} pthread_t;
+
+int pthread_create(pthread_t* thread, const void* unused,
+                   void* (*start_routine) (void*), void* arg);
+
+#define pthread_join(a, b) _pthread_join(&(a), (b))
+int _pthread_join(pthread_t* thread, void** value_ptr);
+
+/**
+ * add here more wrappers as required
+ */
+
+
+#elif defined(ZSTD_MULTITHREAD)   /* posix assumed ; need a better detection method */
+/* ===   POSIX Systems   === */
+#  include <pthread.h>
+
+#else  /* ZSTD_MULTITHREAD not defined */
+/* No multithreading support */
+
+#define pthread_mutex_t int   /* #define rather than typedef, as sometimes pthread support is implicit, resulting in duplicated symbols */
+#define pthread_mutex_init(a,b)
+#define pthread_mutex_destroy(a)
+#define pthread_mutex_lock(a)
+#define pthread_mutex_unlock(a)
+
+#define pthread_cond_t int
+#define pthread_cond_init(a,b)
+#define pthread_cond_destroy(a)
+#define pthread_cond_wait(a,b)
+#define pthread_cond_signal(a)
+#define pthread_cond_broadcast(a)
+
+/* do not use pthread_t */
+
+#endif /* ZSTD_MULTITHREAD */
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* THREADING_H_938743 */
diff --git a/contrib/libzstd/include/zstd/common/xxhash.c b/contrib/libzstd/include/zstd/common/xxhash.c
index 29e4fa628f3..eb44222c5fc 100644
--- a/contrib/libzstd/include/zstd/common/xxhash.c
+++ b/contrib/libzstd/include/zstd/common/xxhash.c
@@ -104,7 +104,9 @@ static void  XXH_free  (void* p)  { free(p); }
 #include <string.h>
 static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
 
-#define XXH_STATIC_LINKING_ONLY
+#ifndef XXH_STATIC_LINKING_ONLY
+#  define XXH_STATIC_LINKING_ONLY
+#endif
 #include "xxhash.h"
 
 
diff --git a/contrib/libzstd/include/zstd/common/xxhash.h b/contrib/libzstd/include/zstd/common/xxhash.h
index 2c9b7c61bf4..9bad1f59f63 100644
--- a/contrib/libzstd/include/zstd/common/xxhash.h
+++ b/contrib/libzstd/include/zstd/common/xxhash.h
@@ -64,16 +64,12 @@ XXH64       13.8 GB/s            1.9 GB/s
 XXH32        6.8 GB/s            6.0 GB/s
 */
 
-#ifndef XXHASH_H_5627135585666179
-#define XXHASH_H_5627135585666179 1
-
 #if defined (__cplusplus)
 extern "C" {
 #endif
 
-#ifndef XXH_NAMESPACE
-#  define XXH_NAMESPACE ZSTD_  /* Zstandard specific */
-#endif
+#ifndef XXHASH_H_5627135585666179
+#define XXHASH_H_5627135585666179 1
 
 
 /* ****************************
@@ -242,6 +238,11 @@ XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH
 /* **************************
 *  Canonical representation
 ****************************/
+/* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
+*  The canonical representation uses human-readable write convention, aka big-endian (large digits first).
+*  These functions allow transformation of hash result into and from its canonical format.
+*  This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
+*/
 typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
 typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
 
@@ -251,14 +252,9 @@ XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t
 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
 XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
 
-/* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
-*  The canonical representation uses human-readable write convention, aka big-endian (large digits first).
-*  These functions allow transformation of hash result into and from its canonical format.
-*  This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
-*/
+#endif /* XXHASH_H_5627135585666179 */
 
 
-#ifdef XXH_STATIC_LINKING_ONLY
 
 /* ================================================================================================
    This section contains definitions which are not guaranteed to remain stable.
@@ -266,6 +262,8 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
    They shall only be used with static linking.
    Never use these definitions in association with dynamic linking !
 =================================================================================================== */
+#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345)
+#define XXH_STATIC_H_3543687687345
 
 /* These definitions are only meant to allow allocation of XXH state
    statically, on stack, or in a struct for example.
@@ -299,11 +297,9 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
 #    include "xxhash.c"   /* include xxhash functions as `static`, for inlining */
 #  endif
 
-#endif /* XXH_STATIC_LINKING_ONLY */
+#endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */
 
 
 #if defined (__cplusplus)
 }
 #endif
-
-#endif /* XXHASH_H_5627135585666179 */
diff --git a/contrib/libzstd/include/zstd/common/zstd_common.c b/contrib/libzstd/include/zstd/common/zstd_common.c
index 54bc91c8992..f6816723815 100644
--- a/contrib/libzstd/include/zstd/common/zstd_common.c
+++ b/contrib/libzstd/include/zstd/common/zstd_common.c
@@ -12,17 +12,19 @@
 /*-*************************************
 *  Dependencies
 ***************************************/
-#include <stdlib.h>         /* malloc */
+#include <stdlib.h>      /* malloc, calloc, free */
+#include <string.h>      /* memset */
 #include "error_private.h"
 #define ZSTD_STATIC_LINKING_ONLY
-#include "zstd.h"           /* declaration of ZSTD_isError, ZSTD_getErrorName, ZSTD_getErrorCode, ZSTD_getErrorString, ZSTD_versionNumber */
-#include "zbuff.h"          /* declaration of ZBUFF_isError, ZBUFF_getErrorName */
+#include "zstd.h"
 
 
 /*-****************************************
 *  Version
 ******************************************/
-unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; }
+unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; }
+
+const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; }
 
 
 /*-****************************************
@@ -42,42 +44,37 @@ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
 
 /*! ZSTD_getErrorString() :
 *   provides error code string from enum */
-const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorName(code); }
-
-
-/* **************************************************************
-*  ZBUFF Error Management
-****************************************************************/
-unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); }
-
-const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
-
+const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
 
 
 /*=**************************************************************
 *  Custom allocator
 ****************************************************************/
-/* default uses stdlib */
-void* ZSTD_defaultAllocFunction(void* opaque, size_t size)
-{
-    void* address = malloc(size);
-    (void)opaque;
-    return address;
-}
-
-void ZSTD_defaultFreeFunction(void* opaque, void* address)
-{
-    (void)opaque;
-    free(address);
-}
-
 void* ZSTD_malloc(size_t size, ZSTD_customMem customMem)
 {
-    return customMem.customAlloc(customMem.opaque, size);
+    if (customMem.customAlloc)
+        return customMem.customAlloc(customMem.opaque, size);
+    return malloc(size);
+}
+
+void* ZSTD_calloc(size_t size, ZSTD_customMem customMem)
+{
+    if (customMem.customAlloc) {
+        /* calloc implemented as malloc+memset;
+         * not as efficient as calloc, but next best guess for custom malloc */
+        void* const ptr = customMem.customAlloc(customMem.opaque, size);
+        memset(ptr, 0, size);
+        return ptr;
+    }
+    return calloc(1, size);
 }
 
 void ZSTD_free(void* ptr, ZSTD_customMem customMem)
 {
-    if (ptr!=NULL)
-        customMem.customFree(customMem.opaque, ptr);
+    if (ptr!=NULL) {
+        if (customMem.customFree)
+            customMem.customFree(customMem.opaque, ptr);
+        else
+            free(ptr);
+    }
 }
diff --git a/contrib/libzstd/include/zstd/common/zstd_errors.h b/contrib/libzstd/include/zstd/common/zstd_errors.h
new file mode 100644
index 00000000000..19f1597aa34
--- /dev/null
+++ b/contrib/libzstd/include/zstd/common/zstd_errors.h
@@ -0,0 +1,83 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+#ifndef ZSTD_ERRORS_H_398273423
+#define ZSTD_ERRORS_H_398273423
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*===== dependency =====*/
+#include <stddef.h>   /* size_t */
+
+
+/* =====   ZSTDERRORLIB_API : control library symbols visibility   ===== */
+#ifndef ZSTDERRORLIB_VISIBILITY
+#  if defined(__GNUC__) && (__GNUC__ >= 4)
+#    define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default")))
+#  else
+#    define ZSTDERRORLIB_VISIBILITY
+#  endif
+#endif
+#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
+#  define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY
+#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
+#  define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#  define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
+#endif
+
+/*-****************************************
+ *  error codes list
+ *  note : this API is still considered unstable
+ *         it should not be used with a dynamic library
+ *         only static linking is allowed
+ ******************************************/
+typedef enum {
+  ZSTD_error_no_error,
+  ZSTD_error_GENERIC,
+  ZSTD_error_prefix_unknown,
+  ZSTD_error_version_unsupported,
+  ZSTD_error_parameter_unknown,
+  ZSTD_error_frameParameter_unsupported,
+  ZSTD_error_frameParameter_unsupportedBy32bits,
+  ZSTD_error_frameParameter_windowTooLarge,
+  ZSTD_error_compressionParameter_unsupported,
+  ZSTD_error_compressionParameter_outOfBound,
+  ZSTD_error_init_missing,
+  ZSTD_error_memory_allocation,
+  ZSTD_error_stage_wrong,
+  ZSTD_error_dstSize_tooSmall,
+  ZSTD_error_srcSize_wrong,
+  ZSTD_error_corruption_detected,
+  ZSTD_error_checksum_wrong,
+  ZSTD_error_tableLog_tooLarge,
+  ZSTD_error_maxSymbolValue_tooLarge,
+  ZSTD_error_maxSymbolValue_tooSmall,
+  ZSTD_error_dictionary_corrupted,
+  ZSTD_error_dictionary_wrong,
+  ZSTD_error_dictionaryCreation_failed,
+  ZSTD_error_frameIndex_tooLarge,
+  ZSTD_error_seekableIO,
+  ZSTD_error_maxCode
+} ZSTD_ErrorCode;
+
+/*! ZSTD_getErrorCode() :
+    convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
+    which can be used to compare with enum list published above */
+ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
+ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_ERRORS_H_398273423 */
diff --git a/contrib/libzstd/include/zstd/common/zstd_internal.h b/contrib/libzstd/include/zstd/common/zstd_internal.h
index f40e00aabb3..f2c4e6249fb 100644
--- a/contrib/libzstd/include/zstd/common/zstd_internal.h
+++ b/contrib/libzstd/include/zstd/common/zstd_internal.h
@@ -16,9 +16,10 @@
 #ifdef _MSC_VER    /* Visual Studio */
 #  define FORCE_INLINE static __forceinline
 #  include <intrin.h>                    /* For Visual 2005 */
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
-#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
 #  pragma warning(disable : 4100)        /* disable: C4100: unreferenced formal parameter */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4204)        /* disable: C4204: non-constant aggregate initializer */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
 #else
 #  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
 #    ifdef __GNUC__
@@ -31,6 +32,16 @@
 #  endif /* __STDC_VERSION__ */
 #endif
 
+#ifdef _MSC_VER
+#  define FORCE_NOINLINE static __declspec(noinline)
+#else
+#  ifdef __GNUC__
+#    define FORCE_NOINLINE static __attribute__((__noinline__))
+#  else
+#    define FORCE_NOINLINE static
+#  endif
+#endif
+
 
 /*-*************************************
 *  Dependencies
@@ -39,11 +50,50 @@
 #include "error_private.h"
 #define ZSTD_STATIC_LINKING_ONLY
 #include "zstd.h"
+#ifndef XXH_STATIC_LINKING_ONLY
+#  define XXH_STATIC_LINKING_ONLY  /* XXH64_state_t */
+#endif
+#include "xxhash.h"                /* XXH_reset, update, digest */
+
+
+/*-*************************************
+*  Debug
+***************************************/
+#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
+#  include <assert.h>
+#else
+#  ifndef assert
+#    define assert(condition) ((void)0)
+#  endif
+#endif
+
+#define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; }
+
+#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
+#  include <stdio.h>
+/* recommended values for ZSTD_DEBUG display levels :
+ * 1 : no display, enables assert() only
+ * 2 : reserved for currently active debugging path
+ * 3 : events once per object lifetime (CCtx, CDict)
+ * 4 : events once per frame
+ * 5 : events once per block
+ * 6 : events once per sequence (*very* verbose) */
+#  define DEBUGLOG(l, ...) {                         \
+                if (l<=ZSTD_DEBUG) {                 \
+                    fprintf(stderr, __FILE__ ": ");  \
+                    fprintf(stderr, __VA_ARGS__);    \
+                    fprintf(stderr, " \n");          \
+            }   }
+#else
+#  define DEBUGLOG(l, ...)      {}    /* disabled */
+#endif
 
 
 /*-*************************************
 *  shared macros
 ***************************************/
+#undef MIN
+#undef MAX
 #define MIN(a,b) ((a)<(b) ? (a) : (b))
 #define MAX(a,b) ((a)>(b) ? (a) : (b))
 #define CHECK_F(f) { size_t const errcod = f; if (ERR_isError(errcod)) return errcod; }  /* check and Forward error code */
@@ -54,7 +104,6 @@
 *  Common constants
 ***************************************/
 #define ZSTD_OPT_NUM    (1<<12)
-#define ZSTD_DICT_MAGIC  0xEC30A437   /* v0.7+ */
 
 #define ZSTD_REP_NUM      3                 /* number of repcodes */
 #define ZSTD_REP_CHECK    (ZSTD_REP_NUM)    /* number of repcodes to check by the optimal parser */
@@ -90,7 +139,6 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
 #define LONGNBSEQ 0x7F00
 
 #define MINMATCH 3
-#define EQUAL_READ32 4
 
 #define Litbits  8
 #define MaxLit ((1<<Litbits) - 1)
@@ -137,7 +185,7 @@ static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
 /*! ZSTD_wildcopy() :
 *   custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
 #define WILDCOPY_OVERLENGTH 8
-MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, size_t length)
+MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
 {
     const BYTE* ip = (const BYTE*)src;
     BYTE* op = (BYTE*)dst;
@@ -212,6 +260,7 @@ typedef struct {
     U32  log2litSum;
     U32  log2offCodeSum;
     U32  factor;
+    U32  staticPrices;
     U32  cachedPrice;
     U32  cachedLitLength;
     const BYTE* cachedLiterals;
@@ -219,13 +268,10 @@ typedef struct {
 
 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);
 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);
-int ZSTD_isSkipFrame(ZSTD_DCtx* dctx);
 
 /* custom memory allocation functions */
-void* ZSTD_defaultAllocFunction(void* opaque, size_t size);
-void ZSTD_defaultFreeFunction(void* opaque, void* address);
-static const ZSTD_customMem defaultCustomMem = { ZSTD_defaultAllocFunction, ZSTD_defaultFreeFunction, NULL };
 void* ZSTD_malloc(size_t size, ZSTD_customMem customMem);
+void* ZSTD_calloc(size_t size, ZSTD_customMem customMem);
 void ZSTD_free(void* ptr, ZSTD_customMem customMem);
 
 
@@ -254,4 +300,35 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val)
 }
 
 
+/* hidden functions */
+
+/* ZSTD_invalidateRepCodes() :
+ * ensures next compression will not use repcodes from previous block.
+ * Note : only works with regular variant;
+ *        do not use with extDict variant ! */
+void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx);
+
+
+/*! ZSTD_initCStream_internal() :
+ *  Private use only. Init streaming operation.
+ *  expects params to be valid.
+ *  must receive dict, or cdict, or none, but not both.
+ *  @return : 0, or an error code */
+size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
+                     const void* dict, size_t dictSize,
+                     const ZSTD_CDict* cdict,
+                     ZSTD_parameters params, unsigned long long pledgedSrcSize);
+
+/*! ZSTD_compressStream_generic() :
+ *  Private use only. To be called from zstdmt_compress.c in single-thread mode. */
+size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
+                                   ZSTD_outBuffer* output,
+                                   ZSTD_inBuffer* input,
+                                   ZSTD_EndDirective const flushMode);
+
+/*! ZSTD_getParamsFromCDict() :
+ *  as the name implies */
+ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict* cdict);
+
+
 #endif   /* ZSTD_CCOMMON_H_MODULE */
diff --git a/contrib/libzstd/include/zstd/compress/fse_compress.c b/contrib/libzstd/include/zstd/compress/fse_compress.c
index 679dbdb83be..26e8052ddcc 100644
--- a/contrib/libzstd/include/zstd/compress/fse_compress.c
+++ b/contrib/libzstd/include/zstd/compress/fse_compress.c
@@ -70,12 +70,6 @@
 #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
 
 
-/* **************************************************************
-*  Complex types
-****************************************************************/
-typedef U32 CTable_max_t[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
-
-
 /* **************************************************************
 *  Templates
 ****************************************************************/
@@ -100,7 +94,13 @@ typedef U32 CTable_max_t[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VA
 
 
 /* Function templates */
-size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+
+/* FSE_buildCTable_wksp() :
+ * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
+ * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
+ * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
+ */
+size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
 {
     U32 const tableSize = 1 << tableLog;
     U32 const tableMask = tableSize - 1;
@@ -111,10 +111,11 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
     U32 const step = FSE_TABLESTEP(tableSize);
     U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
 
-    FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
+    FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace;
     U32 highThreshold = tableSize-1;
 
     /* CTable header */
+    if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
     tableU16[-2] = (U16) tableLog;
     tableU16[-1] = (U16) maxSymbolValue;
 
@@ -181,6 +182,13 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
 }
 
 
+size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE];   /* memset() is not necessary, even if static analyzer complain about it */
+    return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
+}
+
+
 
 #ifndef FSE_COMMONDEFS_ONLY
 
@@ -189,12 +197,10 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
 ****************************************************************/
 size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
 {
-    size_t maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
+    size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
     return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND;  /* maxSymbolValue==0 ? use default */
 }
 
-static short FSE_abs(short a) { return (short)(a<0 ? -a : a); }
-
 static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
                                        const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
                                        unsigned writeIsSafe)
@@ -250,16 +256,16 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
                 bitStream >>= 16;
                 bitCount -= 16;
         }   }
-        {   short count = normalizedCounter[charnum++];
-            const short max = (short)((2*threshold-1)-remaining);
-            remaining -= FSE_abs(count);
-            if (remaining<1) return ERROR(GENERIC);
+        {   int count = normalizedCounter[charnum++];
+            int const max = (2*threshold-1)-remaining;
+            remaining -= count < 0 ? -count : count;
             count++;   /* +1 for extra accuracy */
             if (count>=threshold) count += max;   /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
             bitStream += count << bitCount;
             bitCount  += nbBits;
             bitCount  -= (count<max);
             previous0  = (count==1);
+            if (remaining<1) return ERROR(GENERIC);
             while (remaining<threshold) nbBits--, threshold>>=1;
         }
         if (bitCount>16) {
@@ -285,7 +291,7 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
 
 size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
 {
-    if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC);   /* Unsupported */
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported */
     if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported */
 
     if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
@@ -300,21 +306,20 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalized
 *  Counting histogram
 ****************************************************************/
 /*! FSE_count_simple
-    This function just counts byte values within `src`,
-    and store the histogram into table `count`.
-    This function is unsafe : it doesn't check that all values within `src` can fit into `count`.
+    This function counts byte values within `src`, and store the histogram into table `count`.
+    It doesn't use any additional memory.
+    But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
     For this reason, prefer using a table `count` with 256 elements.
     @return : count of most numerous element
 */
-static size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
-                               const void* src, size_t srcSize)
+size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
+                        const void* src, size_t srcSize)
 {
     const BYTE* ip = (const BYTE*)src;
     const BYTE* const end = ip + srcSize;
     unsigned maxSymbolValue = *maxSymbolValuePtr;
     unsigned max=0;
 
-
     memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
     if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
 
@@ -329,20 +334,24 @@ static size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
 }
 
 
-static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr,
+/* FSE_count_parallel_wksp() :
+ * Same as FSE_count_parallel(), but using an externally provided scratch buffer.
+ * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */
+static size_t FSE_count_parallel_wksp(
+                                unsigned* count, unsigned* maxSymbolValuePtr,
                                 const void* source, size_t sourceSize,
-                                unsigned checkMax)
+                                unsigned checkMax, unsigned* const workSpace)
 {
     const BYTE* ip = (const BYTE*)source;
     const BYTE* const iend = ip+sourceSize;
     unsigned maxSymbolValue = *maxSymbolValuePtr;
     unsigned max=0;
+    U32* const Counting1 = workSpace;
+    U32* const Counting2 = Counting1 + 256;
+    U32* const Counting3 = Counting2 + 256;
+    U32* const Counting4 = Counting3 + 256;
 
-
-    U32 Counting1[256] = { 0 };
-    U32 Counting2[256] = { 0 };
-    U32 Counting3[256] = { 0 };
-    U32 Counting4[256] = { 0 };
+    memset(Counting1, 0, 4*256*sizeof(unsigned));
 
     /* safety checks */
     if (!sourceSize) {
@@ -388,31 +397,51 @@ static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr,
             if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
     }   }
 
-    { U32 s; for (s=0; s<=maxSymbolValue; s++) {
-        count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
-        if (count[s] > max) max = count[s];
-    }}
+    {   U32 s; for (s=0; s<=maxSymbolValue; s++) {
+            count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
+            if (count[s] > max) max = count[s];
+    }   }
 
     while (!count[maxSymbolValue]) maxSymbolValue--;
     *maxSymbolValuePtr = maxSymbolValue;
     return (size_t)max;
 }
 
+/* FSE_countFast_wksp() :
+ * Same as FSE_countFast(), but using an externally provided scratch buffer.
+ * `workSpace` size must be table of >= `1024` unsigned */
+size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                     const void* source, size_t sourceSize, unsigned* workSpace)
+{
+    if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
+    return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
+}
+
 /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
 size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
                      const void* source, size_t sourceSize)
 {
-    if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
-    return FSE_count_parallel(count, maxSymbolValuePtr, source, sourceSize, 0);
+    unsigned tmpCounters[1024];
+    return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters);
+}
+
+/* FSE_count_wksp() :
+ * Same as FSE_count(), but using an externally provided scratch buffer.
+ * `workSpace` size must be table of >= `1024` unsigned */
+size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                 const void* source, size_t sourceSize, unsigned* workSpace)
+{
+    if (*maxSymbolValuePtr < 255)
+        return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
+    *maxSymbolValuePtr = 255;
+    return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
 }
 
 size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
-                 const void* source, size_t sourceSize)
+                 const void* src, size_t srcSize)
 {
-    if (*maxSymbolValuePtr <255)
-        return FSE_count_parallel(count, maxSymbolValuePtr, source, sourceSize, 1);
-    *maxSymbolValuePtr = 255;
-    return FSE_countFast(count, maxSymbolValuePtr, source, sourceSize);
+    unsigned tmpCounters[1024];
+    return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters);
 }
 
 
@@ -428,14 +457,10 @@ size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
     `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];`  // This size is variable
 Allocation is manual (C standard does not support variable-size structures).
 */
-
 size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
 {
-    size_t size;
-    FSE_STATIC_ASSERT((size_t)FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)*4 >= sizeof(CTable_max_t));   /* A compilation error here means FSE_CTABLE_SIZE_U32 is not large enough */
-    if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC);
-    size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
-    return size;
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    return FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
 }
 
 FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
@@ -451,20 +476,20 @@ void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
 /* provides the minimum logSize to safely represent a distribution */
 static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
 {
-	U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
-	U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
-	U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
-	return minBits;
+    U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
+    U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
+    U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
+    return minBits;
 }
 
 unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
 {
-	U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
+    U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
     U32 tableLog = maxTableLog;
-	U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
+    U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
     if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
-	if (maxBitsSrc < tableLog) tableLog = maxBitsSrc;   /* Accuracy can be reduced */
-	if (minBits > tableLog) tableLog = minBits;   /* Need a minimum to safely represent all symbol values */
+    if (maxBitsSrc < tableLog) tableLog = maxBitsSrc;   /* Accuracy can be reduced */
+    if (minBits > tableLog) tableLog = minBits;   /* Need a minimum to safely represent all symbol values */
     if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
     if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
     return tableLog;
@@ -481,12 +506,13 @@ unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
 
 static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
 {
+    short const NOT_YET_ASSIGNED = -2;
     U32 s;
     U32 distributed = 0;
     U32 ToDistribute;
 
     /* Init */
-    U32 lowThreshold = (U32)(total >> tableLog);
+    U32 const lowThreshold = (U32)(total >> tableLog);
     U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
 
     for (s=0; s<=maxSymbolValue; s++) {
@@ -506,7 +532,8 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
             total -= count[s];
             continue;
         }
-        norm[s]=-2;
+
+        norm[s]=NOT_YET_ASSIGNED;
     }
     ToDistribute = (1 << tableLog) - distributed;
 
@@ -514,7 +541,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
         /* risk of rounding to zero */
         lowOne = (U32)((total * 3) / (ToDistribute * 2));
         for (s=0; s<=maxSymbolValue; s++) {
-            if ((norm[s] == -2) && (count[s] <= lowOne)) {
+            if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
                 norm[s] = 1;
                 distributed++;
                 total -= count[s];
@@ -534,17 +561,23 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
         return 0;
     }
 
-    {
-        U64 const vStepLog = 62 - tableLog;
+    if (total == 0) {
+        /* all of the symbols were low enough for the lowOne or lowThreshold */
+        for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
+            if (norm[s] > 0) ToDistribute--, norm[s]++;
+        return 0;
+    }
+
+    {   U64 const vStepLog = 62 - tableLog;
         U64 const mid = (1ULL << (vStepLog-1)) - 1;
         U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total;   /* scale on remaining */
         U64 tmpTotal = mid;
         for (s=0; s<=maxSymbolValue; s++) {
-            if (norm[s]==-2) {
-                U64 end = tmpTotal + (count[s] * rStep);
-                U32 sStart = (U32)(tmpTotal >> vStepLog);
-                U32 sEnd = (U32)(end >> vStepLog);
-                U32 weight = sEnd - sStart;
+            if (norm[s]==NOT_YET_ASSIGNED) {
+                U64 const end = tmpTotal + (count[s] * rStep);
+                U32 const sStart = (U32)(tmpTotal >> vStepLog);
+                U32 const sEnd = (U32)(end >> vStepLog);
+                U32 const weight = sEnd - sStart;
                 if (weight < 1)
                     return ERROR(GENERIC);
                 norm[s] = (short)weight;
@@ -566,7 +599,6 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
     if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC);   /* Too small tableLog, compression potentially impossible */
 
     {   U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
-
         U64 const scale = 62 - tableLog;
         U64 const step = ((U64)1<<62) / total;   /* <== here, one division ! */
         U64 const vStep = 1ULL<<(scale-20);
@@ -594,7 +626,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
         }   }
         if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
             /* corner case, need another normalization method */
-            size_t errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
+            size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
             if (FSE_isError(errorCode)) return errorCode;
         }
         else normalizedCounter[largest] += (short)stillToDistribute;
@@ -643,17 +675,15 @@ size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
 
     /* Build Symbol Transformation Table */
     {   const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
-
         for (s=0; s<=maxSymbolValue; s++) {
             symbolTT[s].deltaNbBits = deltaNbBits;
             symbolTT[s].deltaFindState = s-1;
     }   }
 
-
     return 0;
 }
 
-/* fake FSE_CTable, for rle (100% always same symbol) input */
+/* fake FSE_CTable, for rle input (always same symbol) */
 size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
 {
     void* ptr = ct;
@@ -685,14 +715,13 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
     const BYTE* const iend = istart + srcSize;
     const BYTE* ip=iend;
 
-
     BIT_CStream_t bitC;
     FSE_CState_t CState1, CState2;
 
     /* init */
     if (srcSize <= 2) return 0;
-    { size_t const errorCode = BIT_initCStream(&bitC, dst, dstSize);
-      if (FSE_isError(errorCode)) return 0; }
+    { size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
+      if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ }
 
 #define FSE_FLUSHBITS(s)  (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
 
@@ -715,7 +744,7 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
     }
 
     /* 2 or 4 encoding per loop */
-    for ( ; ip>istart ; ) {
+    while ( ip>istart ) {
 
         FSE_encodeSymbol(&bitC, &CState2, *--ip);
 
@@ -741,7 +770,7 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
                            const void* src, size_t srcSize,
                            const FSE_CTable* ct)
 {
-    const unsigned fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
+    unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
 
     if (fast)
         return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
@@ -752,58 +781,76 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
 
 size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
 
-size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
-{
-    const BYTE* const istart = (const BYTE*) src;
-    const BYTE* ip = istart;
+#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return f
+#define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
 
+/* FSE_compress_wksp() :
+ * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
+ * `wkspSize` size must be `(1<<tableLog)`.
+ */
+size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
+{
     BYTE* const ostart = (BYTE*) dst;
     BYTE* op = ostart;
     BYTE* const oend = ostart + dstSize;
 
     U32   count[FSE_MAX_SYMBOL_VALUE+1];
     S16   norm[FSE_MAX_SYMBOL_VALUE+1];
-    CTable_max_t ct;
-    size_t errorCode;
+    FSE_CTable* CTable = (FSE_CTable*)workSpace;
+    size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
+    void* scratchBuffer = (void*)(CTable + CTableSize);
+    size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
 
     /* init conditions */
-    if (srcSize <= 1) return 0;  /* Uncompressible */
+    if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
+    if (srcSize <= 1) return 0;  /* Not compressible */
     if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
     if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
 
     /* Scan input and build symbol stats */
-    errorCode = FSE_count (count, &maxSymbolValue, ip, srcSize);
-    if (FSE_isError(errorCode)) return errorCode;
-    if (errorCode == srcSize) return 1;
-    if (errorCode == 1) return 0;   /* each symbol only present once */
-    if (errorCode < (srcSize >> 7)) return 0;   /* Heuristic : not compressible enough */
+    {   CHECK_V_F(maxCount, FSE_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) );
+        if (maxCount == srcSize) return 1;   /* only a single symbol in src : rle */
+        if (maxCount == 1) return 0;         /* each symbol present maximum once => not compressible */
+        if (maxCount < (srcSize >> 7)) return 0;   /* Heuristic : not compressible enough */
+    }
 
     tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
-    errorCode = FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue);
-    if (FSE_isError(errorCode)) return errorCode;
+    CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) );
 
     /* Write table description header */
-    errorCode = FSE_writeNCount (op, oend-op, norm, maxSymbolValue, tableLog);
-    if (FSE_isError(errorCode)) return errorCode;
-    op += errorCode;
+    {   CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
+        op += nc_err;
+    }
 
     /* Compress */
-    errorCode = FSE_buildCTable (ct, norm, maxSymbolValue, tableLog);
-    if (FSE_isError(errorCode)) return errorCode;
-    errorCode = FSE_compress_usingCTable(op, oend - op, ip, srcSize, ct);
-    if (errorCode == 0) return 0;   /* not enough space for compressed data */
-    op += errorCode;
+    CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) );
+    {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) );
+        if (cSize == 0) return 0;   /* not enough space for compressed data */
+        op += cSize;
+    }
 
     /* check compressibility */
-    if ( (size_t)(op-ostart) >= srcSize-1 )
-        return 0;
+    if ( (size_t)(op-ostart) >= srcSize-1 ) return 0;
 
     return op-ostart;
 }
 
-size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize)
+typedef struct {
+    FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
+    BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
+} fseWkspMax_t;
+
+size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
 {
-    return FSE_compress2(dst, dstSize, src, (U32)srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
+    fseWkspMax_t scratchBuffer;
+    FSE_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE));   /* compilation failures here means scratchBuffer is not large enough */
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
+}
+
+size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
 }
 
 
diff --git a/contrib/libzstd/include/zstd/compress/huf_compress.c b/contrib/libzstd/include/zstd/compress/huf_compress.c
index b7d3d77a241..7af0789a9c5 100644
--- a/contrib/libzstd/include/zstd/compress/huf_compress.c
+++ b/contrib/libzstd/include/zstd/compress/huf_compress.c
@@ -56,6 +56,8 @@
 *  Error Management
 ****************************************************************/
 #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return f
+#define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
 
 
 /* **************************************************************
@@ -70,31 +72,73 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
 /* *******************************************************
 *  HUF : Huffman block compression
 *********************************************************/
+/* HUF_compressWeights() :
+ * Same as FSE_compress(), but dedicated to huff0's weights compression.
+ * The use case needs much less stack memory.
+ * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
+ */
+#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
+size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + dstSize;
+
+    U32 maxSymbolValue = HUF_TABLELOG_MAX;
+    U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
+
+    FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
+    BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
+
+    U32 count[HUF_TABLELOG_MAX+1];
+    S16 norm[HUF_TABLELOG_MAX+1];
+
+    /* init conditions */
+    if (wtSize <= 1) return 0;  /* Not compressible */
+
+    /* Scan input and build symbol stats */
+    {   CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize) );
+        if (maxCount == wtSize) return 1;   /* only a single symbol in src : rle */
+        if (maxCount == 1) return 0;         /* each symbol present maximum once => not compressible */
+    }
+
+    tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
+    CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
+
+    /* Write table description header */
+    {   CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
+        op += hSize;
+    }
+
+    /* Compress */
+    CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
+    {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) );
+        if (cSize == 0) return 0;   /* not enough space for compressed data */
+        op += cSize;
+    }
+
+    return op-ostart;
+}
+
+
 struct HUF_CElt_s {
   U16  val;
   BYTE nbBits;
 };   /* typedef'd to HUF_CElt within "huf.h" */
 
-typedef struct nodeElt_s {
-    U32 count;
-    U16 parent;
-    BYTE byte;
-    BYTE nbBits;
-} nodeElt;
-
 /*! HUF_writeCTable() :
-    `CTable` : huffman tree to save, using huf representation.
+    `CTable` : Huffman tree to save, using huf representation.
     @return : size of saved CTable */
 size_t HUF_writeCTable (void* dst, size_t maxDstSize,
                         const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog)
 {
-    BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];
+    BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];   /* precomputed conversion table */
     BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
     BYTE* op = (BYTE*)dst;
     U32 n;
 
      /* check conditions */
-    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC);
+    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
 
     /* convert to weight */
     bitsToWeight[0] = 0;
@@ -103,38 +147,33 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
     for (n=0; n<maxSymbolValue; n++)
         huffWeight[n] = bitsToWeight[CTable[n].nbBits];
 
-    {   size_t const size = FSE_compress(op+1, maxDstSize-1, huffWeight, maxSymbolValue);
-        if (FSE_isError(size)) return size;
-        if ((size>1) & (size < maxSymbolValue/2)) {   /* FSE compressed */
-            op[0] = (BYTE)size;
-            return size+1;
-        }
-    }
+    /* attempt weights compression by FSE */
+    {   CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
+        if ((hSize>1) & (hSize < maxSymbolValue/2)) {   /* FSE compressed */
+            op[0] = (BYTE)hSize;
+            return hSize+1;
+    }   }
 
-    /* raw values */
-    if (maxSymbolValue > (256-128)) return ERROR(GENERIC);   /* should not happen */
+    /* write raw values as 4-bits (max : 15) */
+    if (maxSymbolValue > (256-128)) return ERROR(GENERIC);   /* should not happen : likely means source cannot be compressed */
     if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
     op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
-    huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause issue in final combination */
+    huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause msan issue in final combination */
     for (n=0; n<maxSymbolValue; n+=2)
         op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
     return ((maxSymbolValue+1)/2) + 1;
-
 }
 
 
 size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, size_t srcSize)
 {
-    BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
+    BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];   /* init not required, even though some static analyzer may complain */
     U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
     U32 tableLog = 0;
-    size_t readSize;
     U32 nbSymbols = 0;
-    /*memset(huffWeight, 0, sizeof(huffWeight));*/   /* is not necessary, even though some analyzer complain ... */
 
     /* get symbol weights */
-    readSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize);
-    if (HUF_isError(readSize)) return readSize;
+    CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
 
     /* check result */
     if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
@@ -155,13 +194,14 @@ size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, si
     }   }
 
     /* fill val */
-    {   U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
-        U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
+    {   U16 nbPerRank[HUF_TABLELOG_MAX+2]  = {0};  /* support w=0=>n=tableLog+1 */
+        U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
         { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
         /* determine stating value per rank */
+        valPerRank[tableLog+1] = 0;   /* for w==0 */
         {   U16 min = 0;
-            U32 n; for (n=HUF_TABLELOG_MAX; n>0; n--) {
-                valPerRank[n] = min;      /* get starting value within each rank */
+            U32 n; for (n=tableLog; n>0; n--) {  /* start at n=tablelog <-> w=1 */
+                valPerRank[n] = min;     /* get starting value within each rank */
                 min += nbPerRank[n];
                 min >>= 1;
         }   }
@@ -173,6 +213,13 @@ size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, si
 }
 
 
+typedef struct nodeElt_s {
+    U32 count;
+    U16 parent;
+    BYTE byte;
+    BYTE nbBits;
+} nodeElt;
+
 static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
 {
     const U32 largestBits = huffNode[lastNonNull].nbBits;
@@ -219,7 +266,8 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
                         if (highTotal <= lowTotal) break;
                 }   }
                 /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
-                while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))  /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
+                /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
+                while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
                     nBitsToDecrease ++;
                 totalCost -= 1 << (nBitsToDecrease-1);
                 if (rankLast[nBitsToDecrease-1] == noSymbol)
@@ -278,20 +326,26 @@ static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
 }
 
 
+/** HUF_buildCTable_wksp() :
+ *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
+ *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
+ */
 #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
-size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
+typedef nodeElt huffNodeTable[2*HUF_SYMBOLVALUE_MAX+1 +1];
+size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
 {
-    nodeElt huffNode0[2*HUF_SYMBOLVALUE_MAX+1 +1];
-    nodeElt* huffNode = huffNode0 + 1;
+    nodeElt* const huffNode0 = (nodeElt*)workSpace;
+    nodeElt* const huffNode = huffNode0+1;
     U32 n, nonNullRank;
     int lowS, lowN;
     U16 nodeNb = STARTNODE;
     U32 nodeRoot;
 
     /* safety checks */
+    if (wkspSize < sizeof(huffNodeTable)) return ERROR(GENERIC);   /* workSpace is not large enough */
     if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC);
-    memset(huffNode0, 0, sizeof(huffNode0));
+    memset(huffNode0, 0, sizeof(huffNodeTable));
 
     /* sort, decreasing order */
     HUF_sort(huffNode, count, maxSymbolValue);
@@ -304,7 +358,7 @@ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U3
     huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
     nodeNb++; lowS-=2;
     for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
-    huffNode0[0].count = (U32)(1U<<31);
+    huffNode0[0].count = (U32)(1U<<31);  /* fake entry, strong barrier */
 
     /* create parents */
     while (nodeNb <= nodeRoot) {
@@ -347,6 +401,34 @@ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U3
     return maxNbBits;
 }
 
+/** HUF_buildCTable() :
+ *  Note : count is used before tree is written, so they can safely overlap
+ */
+size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
+{
+    huffNodeTable nodeTable;
+    return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
+}
+
+static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
+{
+    size_t nbBits = 0;
+    int s;
+    for (s = 0; s <= (int)maxSymbolValue; ++s) {
+        nbBits += CTable[s].nbBits * count[s];
+    }
+    return nbBits >> 3;
+}
+
+static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
+  int bad = 0;
+  int s;
+  for (s = 0; s <= (int)maxSymbolValue; ++s) {
+    bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
+  }
+  return !bad;
+}
+
 static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
 {
     BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
@@ -374,20 +456,23 @@ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, si
 
     /* init */
     if (dstSize < 8) return 0;   /* not enough space to compress */
-    { size_t const errorCode = BIT_initCStream(&bitC, op, oend-op);
-      if (HUF_isError(errorCode)) return 0; }
+    { size_t const initErr = BIT_initCStream(&bitC, op, oend-op);
+      if (HUF_isError(initErr)) return 0; }
 
     n = srcSize & ~3;  /* join to mod 4 */
     switch (srcSize & 3)
     {
         case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
                  HUF_FLUSHBITS_2(&bitC);
+		 /* fall-through */
         case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
                  HUF_FLUSHBITS_1(&bitC);
+		 /* fall-through */
         case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
                  HUF_FLUSHBITS(&bitC);
-        case 0 :
-        default: ;
+		 /* fall-through */
+        case 0 : /* fall-through */
+        default: break;
     }
 
     for (; n>0; n-=4) {  /* note : n&3==0 at this stage */
@@ -418,32 +503,28 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si
     if (srcSize < 12) return 0;   /* no saving possible : too small input */
     op += 6;   /* jumpTable */
 
-    {   size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable);
-        if (HUF_isError(cSize)) return cSize;
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) );
         if (cSize==0) return 0;
         MEM_writeLE16(ostart, (U16)cSize);
         op += cSize;
     }
 
     ip += segmentSize;
-    {   size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable);
-        if (HUF_isError(cSize)) return cSize;
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) );
         if (cSize==0) return 0;
         MEM_writeLE16(ostart+2, (U16)cSize);
         op += cSize;
     }
 
     ip += segmentSize;
-    {   size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable);
-        if (HUF_isError(cSize)) return cSize;
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) );
         if (cSize==0) return 0;
         MEM_writeLE16(ostart+4, (U16)cSize);
         op += cSize;
     }
 
     ip += segmentSize;
-    {   size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable);
-        if (HUF_isError(cSize)) return cSize;
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable) );
         if (cSize==0) return 0;
         op += cSize;
     }
@@ -452,20 +533,43 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si
 }
 
 
+static size_t HUF_compressCTable_internal(
+                BYTE* const ostart, BYTE* op, BYTE* const oend,
+                const void* src, size_t srcSize,
+                unsigned singleStream, const HUF_CElt* CTable)
+{
+    size_t const cSize = singleStream ?
+                         HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) :
+                         HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable);
+    if (HUF_isError(cSize)) { return cSize; }
+    if (cSize==0) { return 0; }   /* uncompressible */
+    op += cSize;
+    /* check compressibility */
+    if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
+    return op-ostart;
+}
+
+
+/* `workSpace` must a table of at least 1024 unsigned */
 static size_t HUF_compress_internal (
                 void* dst, size_t dstSize,
                 const void* src, size_t srcSize,
                 unsigned maxSymbolValue, unsigned huffLog,
-                unsigned singleStream)
+                unsigned singleStream,
+                void* workSpace, size_t wkspSize,
+                HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat)
 {
     BYTE* const ostart = (BYTE*)dst;
     BYTE* const oend = ostart + dstSize;
     BYTE* op = ostart;
 
-    U32 count[HUF_SYMBOLVALUE_MAX+1];
-    HUF_CElt CTable[HUF_SYMBOLVALUE_MAX+1];
+    U32* count;
+    size_t const countSize = sizeof(U32) * (HUF_SYMBOLVALUE_MAX + 1);
+    HUF_CElt* CTable;
+    size_t const CTableSize = sizeof(HUF_CElt) * (HUF_SYMBOLVALUE_MAX + 1);
 
     /* checks & inits */
+    if (wkspSize < sizeof(huffNodeTable) + countSize + CTableSize) return ERROR(GENERIC);
     if (!srcSize) return 0;  /* Uncompressed (note : 1 means rle, so first byte must be correct) */
     if (!dstSize) return 0;  /* cannot fit within dst budget */
     if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);   /* current block size limit */
@@ -473,59 +577,111 @@ static size_t HUF_compress_internal (
     if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
     if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
 
+    count = (U32*)workSpace;
+    workSpace = (BYTE*)workSpace + countSize;
+    wkspSize -= countSize;
+    CTable = (HUF_CElt*)workSpace;
+    workSpace = (BYTE*)workSpace + CTableSize;
+    wkspSize -= CTableSize;
+
+    /* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */
+    if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
+        return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
+    }
+
     /* Scan input and build symbol stats */
-    {   size_t const largest = FSE_count (count, &maxSymbolValue, (const BYTE*)src, srcSize);
-        if (HUF_isError(largest)) return largest;
+    {   CHECK_V_F(largest, FSE_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, (U32*)workSpace) );
         if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }   /* single symbol, rle */
         if (largest <= (srcSize >> 7)+1) return 0;   /* Fast heuristic : not compressible enough */
     }
 
+    /* Check validity of previous table */
+    if (repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, count, maxSymbolValue)) {
+        *repeat = HUF_repeat_none;
+    }
+    /* Heuristic : use existing table for small inputs */
+    if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
+        return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
+    }
+
     /* Build Huffman Tree */
     huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
-    {   size_t const maxBits = HUF_buildCTable (CTable, count, maxSymbolValue, huffLog);
-        if (HUF_isError(maxBits)) return maxBits;
+    {   CHECK_V_F(maxBits, HUF_buildCTable_wksp (CTable, count, maxSymbolValue, huffLog, workSpace, wkspSize) );
         huffLog = (U32)maxBits;
+        /* Zero the unused symbols so we can check it for validity */
+        memset(CTable + maxSymbolValue + 1, 0, CTableSize - (maxSymbolValue + 1) * sizeof(HUF_CElt));
     }
 
     /* Write table description header */
-    {   size_t const hSize = HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog);
-        if (HUF_isError(hSize)) return hSize;
-        if (hSize + 12 >= srcSize) return 0;   /* not useful to try compression */
+    {   CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog) );
+        /* Check if using the previous table will be beneficial */
+        if (repeat && *repeat != HUF_repeat_none) {
+            size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, count, maxSymbolValue);
+            size_t const newSize = HUF_estimateCompressedSize(CTable, count, maxSymbolValue);
+            if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
+                return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
+            }
+        }
+        /* Use the new table */
+        if (hSize + 12ul >= srcSize) { return 0; }
         op += hSize;
+        if (repeat) { *repeat = HUF_repeat_none; }
+        if (oldHufTable) { memcpy(oldHufTable, CTable, CTableSize); } /* Save the new table */
     }
-
-    /* Compress */
-    {   size_t const cSize = (singleStream) ?
-                            HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) :   /* single segment */
-                            HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable);
-        if (HUF_isError(cSize)) return cSize;
-        if (cSize==0) return 0;   /* uncompressible */
-        op += cSize;
-    }
-
-    /* check compressibility */
-    if ((size_t)(op-ostart) >= srcSize-1)
-        return 0;
-
-    return op-ostart;
+    return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, CTable);
 }
 
 
+size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0);
+}
+
+size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize,
+                      HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
+}
+
 size_t HUF_compress1X (void* dst, size_t dstSize,
                  const void* src, size_t srcSize,
                  unsigned maxSymbolValue, unsigned huffLog)
 {
-    return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1);
+    unsigned workSpace[1024];
+    return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
+}
+
+size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0);
+}
+
+size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize,
+                      HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
 }
 
 size_t HUF_compress2 (void* dst, size_t dstSize,
                 const void* src, size_t srcSize,
                 unsigned maxSymbolValue, unsigned huffLog)
 {
-    return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0);
+    unsigned workSpace[1024];
+    return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
 }
 
-
 size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
 {
     return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_TABLELOG_DEFAULT);
diff --git a/contrib/libzstd/include/zstd/compress/zbuff_compress.c b/contrib/libzstd/include/zstd/compress/zbuff_compress.c
deleted file mode 100644
index 5095b43e644..00000000000
--- a/contrib/libzstd/include/zstd/compress/zbuff_compress.c
+++ /dev/null
@@ -1,319 +0,0 @@
-/**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
- */
-
-
-
-/* *************************************
-*  Dependencies
-***************************************/
-#include <stdlib.h>
-#include "error_private.h"
-#include "zstd_internal.h"  /* MIN, ZSTD_BLOCKHEADERSIZE, defaultCustomMem */
-#define ZBUFF_STATIC_LINKING_ONLY
-#include "zbuff.h"
-
-
-/* *************************************
-*  Constants
-***************************************/
-static size_t const ZBUFF_endFrameSize = ZSTD_BLOCKHEADERSIZE;
-
-
-/*-***********************************************************
-*  Streaming compression
-*
-*  A ZBUFF_CCtx object is required to track streaming operation.
-*  Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources.
-*  Use ZBUFF_compressInit() to start a new compression operation.
-*  ZBUFF_CCtx objects can be reused multiple times.
-*
-*  Use ZBUFF_compressContinue() repetitively to consume your input.
-*  *srcSizePtr and *dstCapacityPtr can be any size.
-*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
-*  Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input.
-*  The content of dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change dst .
-*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
-*            or an error code, which can be tested using ZBUFF_isError().
-*
-*  ZBUFF_compressFlush() can be used to instruct ZBUFF to compress and output whatever remains within its buffer.
-*  Note that it will not output more than *dstCapacityPtr.
-*  Therefore, some content might still be left into its internal buffer if dst buffer is too small.
-*  @return : nb of bytes still present into internal buffer (0 if it's empty)
-*            or an error code, which can be tested using ZBUFF_isError().
-*
-*  ZBUFF_compressEnd() instructs to finish a frame.
-*  It will perform a flush and write frame epilogue.
-*  Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *dstCapacityPtr is too small.
-*  @return : nb of bytes still present into internal buffer (0 if it's empty)
-*            or an error code, which can be tested using ZBUFF_isError().
-*
-*  Hint : recommended buffer sizes (not compulsory)
-*  input : ZSTD_BLOCKSIZE_MAX (128 KB), internal unit size, it improves latency to use this value.
-*  output : ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + ZBUFF_endFrameSize : ensures it's always possible to write/flush/end a full block at best speed.
-* ***********************************************************/
-
-typedef enum { ZBUFFcs_init, ZBUFFcs_load, ZBUFFcs_flush, ZBUFFcs_final } ZBUFF_cStage;
-
-/* *** Resources *** */
-struct ZBUFF_CCtx_s {
-    ZSTD_CCtx* zc;
-    char*  inBuff;
-    size_t inBuffSize;
-    size_t inToCompress;
-    size_t inBuffPos;
-    size_t inBuffTarget;
-    size_t blockSize;
-    char*  outBuff;
-    size_t outBuffSize;
-    size_t outBuffContentSize;
-    size_t outBuffFlushedSize;
-    ZBUFF_cStage stage;
-    U32    checksum;
-    U32    frameEnded;
-    ZSTD_customMem customMem;
-};   /* typedef'd tp ZBUFF_CCtx within "zbuff.h" */
-
-ZBUFF_CCtx* ZBUFF_createCCtx(void)
-{
-    return ZBUFF_createCCtx_advanced(defaultCustomMem);
-}
-
-ZBUFF_CCtx* ZBUFF_createCCtx_advanced(ZSTD_customMem customMem)
-{
-    ZBUFF_CCtx* zbc;
-
-    if (!customMem.customAlloc && !customMem.customFree)
-        customMem = defaultCustomMem;
-
-    if (!customMem.customAlloc || !customMem.customFree)
-        return NULL;
-
-    zbc = (ZBUFF_CCtx*)customMem.customAlloc(customMem.opaque, sizeof(ZBUFF_CCtx));
-    if (zbc==NULL) return NULL;
-    memset(zbc, 0, sizeof(ZBUFF_CCtx));
-    memcpy(&zbc->customMem, &customMem, sizeof(ZSTD_customMem));
-    zbc->zc = ZSTD_createCCtx_advanced(customMem);
-    if (zbc->zc == NULL) { ZBUFF_freeCCtx(zbc); return NULL; }
-    return zbc;
-}
-
-size_t ZBUFF_freeCCtx(ZBUFF_CCtx* zbc)
-{
-    if (zbc==NULL) return 0;   /* support free on NULL */
-    ZSTD_freeCCtx(zbc->zc);
-    if (zbc->inBuff) zbc->customMem.customFree(zbc->customMem.opaque, zbc->inBuff);
-    if (zbc->outBuff) zbc->customMem.customFree(zbc->customMem.opaque, zbc->outBuff);
-    zbc->customMem.customFree(zbc->customMem.opaque, zbc);
-    return 0;
-}
-
-
-/* ======   Initialization   ====== */
-
-size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
-                                   const void* dict, size_t dictSize,
-                                   ZSTD_parameters params, unsigned long long pledgedSrcSize)
-{
-    /* allocate buffers */
-    {   size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog;
-        if (zbc->inBuffSize < neededInBuffSize) {
-            zbc->inBuffSize = neededInBuffSize;
-            zbc->customMem.customFree(zbc->customMem.opaque, zbc->inBuff);   /* should not be necessary */
-            zbc->inBuff = (char*)zbc->customMem.customAlloc(zbc->customMem.opaque, neededInBuffSize);
-            if (zbc->inBuff == NULL) return ERROR(memory_allocation);
-        }
-        zbc->blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, neededInBuffSize);
-    }
-    if (zbc->outBuffSize < ZSTD_compressBound(zbc->blockSize)+1) {
-        zbc->outBuffSize = ZSTD_compressBound(zbc->blockSize)+1;
-        zbc->customMem.customFree(zbc->customMem.opaque, zbc->outBuff);   /* should not be necessary */
-        zbc->outBuff = (char*)zbc->customMem.customAlloc(zbc->customMem.opaque, zbc->outBuffSize);
-        if (zbc->outBuff == NULL) return ERROR(memory_allocation);
-    }
-
-    { size_t const errorCode = ZSTD_compressBegin_advanced(zbc->zc, dict, dictSize, params, pledgedSrcSize);
-      if (ZSTD_isError(errorCode)) return errorCode; }
-
-    zbc->inToCompress = 0;
-    zbc->inBuffPos = 0;
-    zbc->inBuffTarget = zbc->blockSize;
-    zbc->outBuffContentSize = zbc->outBuffFlushedSize = 0;
-    zbc->stage = ZBUFFcs_load;
-    zbc->checksum = params.fParams.checksumFlag > 0;
-    zbc->frameEnded = 0;
-    return 0;   /* ready to go */
-}
-
-
-size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel)
-{
-    ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
-    return ZBUFF_compressInit_advanced(zbc, dict, dictSize, params, 0);
-}
-
-size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel)
-{
-    return ZBUFF_compressInitDictionary(zbc, NULL, 0, compressionLevel);
-}
-
-
-/* internal util function */
-MEM_STATIC size_t ZBUFF_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
-{
-    size_t const length = MIN(dstCapacity, srcSize);
-    memcpy(dst, src, length);
-    return length;
-}
-
-
-/* ======   Compression   ====== */
-
-typedef enum { zbf_gather, zbf_flush, zbf_end } ZBUFF_flush_e;
-
-static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc,
-                              void* dst, size_t* dstCapacityPtr,
-                        const void* src, size_t* srcSizePtr,
-                              ZBUFF_flush_e const flush)
-{
-    U32 someMoreWork = 1;
-    const char* const istart = (const char*)src;
-    const char* const iend = istart + *srcSizePtr;
-    const char* ip = istart;
-    char* const ostart = (char*)dst;
-    char* const oend = ostart + *dstCapacityPtr;
-    char* op = ostart;
-
-    while (someMoreWork) {
-        switch(zbc->stage)
-        {
-        case ZBUFFcs_init: return ERROR(init_missing);   /* call ZBUFF_compressInit() first ! */
-
-        case ZBUFFcs_load:
-            /* complete inBuffer */
-            {   size_t const toLoad = zbc->inBuffTarget - zbc->inBuffPos;
-                size_t const loaded = ZBUFF_limitCopy(zbc->inBuff + zbc->inBuffPos, toLoad, ip, iend-ip);
-                zbc->inBuffPos += loaded;
-                ip += loaded;
-                if ( (zbc->inBuffPos==zbc->inToCompress) || (!flush && (toLoad != loaded)) ) {
-                    someMoreWork = 0; break;  /* not enough input to get a full block : stop there, wait for more */
-            }   }
-            /* compress current block (note : this stage cannot be stopped in the middle) */
-            {   void* cDst;
-                size_t cSize;
-                size_t const iSize = zbc->inBuffPos - zbc->inToCompress;
-                size_t oSize = oend-op;
-                if (oSize >= ZSTD_compressBound(iSize))
-                    cDst = op;   /* compress directly into output buffer (avoid flush stage) */
-                else
-                    cDst = zbc->outBuff, oSize = zbc->outBuffSize;
-                cSize = (flush == zbf_end) ?
-                        ZSTD_compressEnd(zbc->zc, cDst, oSize, zbc->inBuff + zbc->inToCompress, iSize) :
-                        ZSTD_compressContinue(zbc->zc, cDst, oSize, zbc->inBuff + zbc->inToCompress, iSize);
-                if (ZSTD_isError(cSize)) return cSize;
-                if (flush == zbf_end) zbc->frameEnded = 1;
-                /* prepare next block */
-                zbc->inBuffTarget = zbc->inBuffPos + zbc->blockSize;
-                if (zbc->inBuffTarget > zbc->inBuffSize)
-                    zbc->inBuffPos = 0, zbc->inBuffTarget = zbc->blockSize;   /* note : inBuffSize >= blockSize */
-                zbc->inToCompress = zbc->inBuffPos;
-                if (cDst == op) { op += cSize; break; }   /* no need to flush */
-                zbc->outBuffContentSize = cSize;
-                zbc->outBuffFlushedSize = 0;
-                zbc->stage = ZBUFFcs_flush;   /* continue to flush stage */
-            }
-
-        case ZBUFFcs_flush:
-            {   size_t const toFlush = zbc->outBuffContentSize - zbc->outBuffFlushedSize;
-                size_t const flushed = ZBUFF_limitCopy(op, oend-op, zbc->outBuff + zbc->outBuffFlushedSize, toFlush);
-                op += flushed;
-                zbc->outBuffFlushedSize += flushed;
-                if (toFlush!=flushed) { someMoreWork = 0; break; } /* dst too small to store flushed data : stop there */
-                zbc->outBuffContentSize = zbc->outBuffFlushedSize = 0;
-                zbc->stage = ZBUFFcs_load;
-                break;
-            }
-
-        case ZBUFFcs_final:
-            someMoreWork = 0;   /* do nothing */
-            break;
-
-        default:
-            return ERROR(GENERIC);   /* impossible */
-        }
-    }
-
-    *srcSizePtr = ip - istart;
-    *dstCapacityPtr = op - ostart;
-    if (zbc->frameEnded) return 0;
-    {   size_t hintInSize = zbc->inBuffTarget - zbc->inBuffPos;
-        if (hintInSize==0) hintInSize = zbc->blockSize;
-        return hintInSize;
-    }
-}
-
-size_t ZBUFF_compressContinue(ZBUFF_CCtx* zbc,
-                              void* dst, size_t* dstCapacityPtr,
-                        const void* src, size_t* srcSizePtr)
-{
-    return ZBUFF_compressContinue_generic(zbc, dst, dstCapacityPtr, src, srcSizePtr, zbf_gather);
-}
-
-
-
-/* ======   Finalize   ====== */
-
-size_t ZBUFF_compressFlush(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr)
-{
-    size_t srcSize = 0;
-    ZBUFF_compressContinue_generic(zbc, dst, dstCapacityPtr, &srcSize, &srcSize, zbf_flush);  /* use a valid src address instead of NULL */
-    return zbc->outBuffContentSize - zbc->outBuffFlushedSize;
-}
-
-
-size_t ZBUFF_compressEnd(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr)
-{
-    BYTE* const ostart = (BYTE*)dst;
-    BYTE* const oend = ostart + *dstCapacityPtr;
-    BYTE* op = ostart;
-
-    if (zbc->stage != ZBUFFcs_final) {
-        /* flush whatever remains */
-        size_t outSize = *dstCapacityPtr;
-        size_t srcSize = 0;
-        size_t const notEnded = ZBUFF_compressContinue_generic(zbc, dst, &outSize, &srcSize, &srcSize, zbf_end);  /* use a valid address instead of NULL */
-        size_t const remainingToFlush = zbc->outBuffContentSize - zbc->outBuffFlushedSize;
-        op += outSize;
-        if (remainingToFlush) {
-            *dstCapacityPtr = op-ostart;
-            return remainingToFlush + ZBUFF_endFrameSize + (zbc->checksum * 4);
-        }
-        /* create epilogue */
-        zbc->stage = ZBUFFcs_final;
-        zbc->outBuffContentSize = !notEnded ? 0 :
-            ZSTD_compressEnd(zbc->zc, zbc->outBuff, zbc->outBuffSize, NULL, 0);  /* write epilogue into outBuff */
-    }
-
-    /* flush epilogue */
-    {   size_t const toFlush = zbc->outBuffContentSize - zbc->outBuffFlushedSize;
-        size_t const flushed = ZBUFF_limitCopy(op, oend-op, zbc->outBuff + zbc->outBuffFlushedSize, toFlush);
-        op += flushed;
-        zbc->outBuffFlushedSize += flushed;
-        *dstCapacityPtr = op-ostart;
-        if (toFlush==flushed) zbc->stage = ZBUFFcs_init;  /* end reached */
-        return toFlush - flushed;
-    }
-}
-
-
-
-/* *************************************
-*  Tool functions
-***************************************/
-size_t ZBUFF_recommendedCInSize(void)  { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
-size_t ZBUFF_recommendedCOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + ZBUFF_endFrameSize; }
diff --git a/contrib/libzstd/include/zstd/compress/zstd_compress.c b/contrib/libzstd/include/zstd/compress/zstd_compress.c
index 94f4b5a2586..9300357f2d3 100644
--- a/contrib/libzstd/include/zstd/compress/zstd_compress.c
+++ b/contrib/libzstd/include/zstd/compress/zstd_compress.c
@@ -8,18 +8,25 @@
  */
 
 
+/*-*************************************
+*  Tuning parameters
+***************************************/
+#ifndef ZSTD_CLEVEL_DEFAULT
+#  define ZSTD_CLEVEL_DEFAULT 3
+#endif
+
+
 /*-*************************************
 *  Dependencies
 ***************************************/
 #include <string.h>         /* memset */
 #include "mem.h"
-#define XXH_STATIC_LINKING_ONLY   /* XXH64_state_t */
-#include "xxhash.h"               /* XXH_reset, update, digest */
 #define FSE_STATIC_LINKING_ONLY   /* FSE_encodeSymbol */
 #include "fse.h"
 #define HUF_STATIC_LINKING_ONLY
 #include "huf.h"
 #include "zstd_internal.h"  /* includes zstd.h */
+#include "zstdmt_compress.h"
 
 
 /*-*************************************
@@ -29,11 +36,22 @@ static const U32 g_searchStrength = 8;   /* control skip over incompressible dat
 #define HASH_READ_SIZE 8
 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
 
+/* entropy tables always have same size */
+static size_t const hufCTable_size = HUF_CTABLE_SIZE(255);
+static size_t const litlengthCTable_size = FSE_CTABLE_SIZE(LLFSELog, MaxLL);
+static size_t const offcodeCTable_size = FSE_CTABLE_SIZE(OffFSELog, MaxOff);
+static size_t const matchlengthCTable_size = FSE_CTABLE_SIZE(MLFSELog, MaxML);
+static size_t const entropyScratchSpace_size = HUF_WORKSPACE_SIZE;
+
 
 /*-*************************************
 *  Helper functions
 ***************************************/
-size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; }
+size_t ZSTD_compressBound(size_t srcSize) {
+    size_t const lowLimit = 256 KB;
+    size_t const margin = (srcSize < lowLimit) ? (lowLimit-srcSize) >> 12 : 0;  /* from 64 to 0 */
+    return srcSize + (srcSize >> 8) + margin;
+}
 
 
 /*-*************************************
@@ -50,8 +68,16 @@ static void ZSTD_resetSeqStore(seqStore_t* ssPtr)
 /*-*************************************
 *  Context memory management
 ***************************************/
-struct ZSTD_CCtx_s
-{
+typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
+
+struct ZSTD_CDict_s {
+    void* dictBuffer;
+    const void* dictContent;
+    size_t dictContentSize;
+    ZSTD_CCtx* refContext;
+};  /* typedef'd to ZSTD_CDict within "zstd.h" */
+
+struct ZSTD_CCtx_s {
     const BYTE* nextSrc;    /* next block here to continue on current prefix */
     const BYTE* base;       /* All regular indexes relative to this position */
     const BYTE* dictBase;   /* extDict indexes relative to this position */
@@ -60,53 +86,121 @@ struct ZSTD_CCtx_s
     U32   nextToUpdate;     /* index from which to continue dictionary update */
     U32   nextToUpdate3;    /* index from which to continue dictionary update */
     U32   hashLog3;         /* dispatch table : larger == faster, more memory */
-    U32   loadedDictEnd;
+    U32   loadedDictEnd;    /* index of end of dictionary */
+    U32   forceWindow;      /* force back-references to respect limit of 1<<wLog, even for dictionary */
     ZSTD_compressionStage_e stage;
     U32   rep[ZSTD_REP_NUM];
-    U32   savedRep[ZSTD_REP_NUM];
+    U32   repToConfirm[ZSTD_REP_NUM];
     U32   dictID;
-    ZSTD_parameters params;
+    int   compressionLevel;
+    ZSTD_parameters requestedParams;
+    ZSTD_parameters appliedParams;
     void* workSpace;
     size_t workSpaceSize;
     size_t blockSize;
-    U64 frameContentSize;
+    U64 pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
+    U64 consumedSrcSize;
     XXH64_state_t xxhState;
     ZSTD_customMem customMem;
+    size_t staticSize;
 
     seqStore_t seqStore;    /* sequences storage ptrs */
     U32* hashTable;
     U32* hashTable3;
     U32* chainTable;
-    HUF_CElt* hufTable;
-    U32 flagStaticTables;
-    FSE_CTable offcodeCTable  [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
-    FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
-    FSE_CTable litlengthCTable  [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
+    HUF_repeat hufCTable_repeatMode;
+    HUF_CElt* hufCTable;
+    U32 fseCTables_ready;
+    FSE_CTable* offcodeCTable;
+    FSE_CTable* matchlengthCTable;
+    FSE_CTable* litlengthCTable;
+    unsigned* entropyScratchSpace;
+
+    /* streaming */
+    char*  inBuff;
+    size_t inBuffSize;
+    size_t inToCompress;
+    size_t inBuffPos;
+    size_t inBuffTarget;
+    char*  outBuff;
+    size_t outBuffSize;
+    size_t outBuffContentSize;
+    size_t outBuffFlushedSize;
+    ZSTD_cStreamStage streamStage;
+    U32    frameEnded;
+
+    /* Dictionary */
+    ZSTD_dictMode_e dictMode; /* select restricting dictionary to "rawContent" or "fullDict" only */
+    U32 dictContentByRef;
+    ZSTD_CDict* cdictLocal;
+    const ZSTD_CDict* cdict;
+    const void* prefix;
+    size_t prefixSize;
+
+    /* Multi-threading */
+    U32 nbThreads;
+    ZSTDMT_CCtx* mtctx;
 };
 
+
 ZSTD_CCtx* ZSTD_createCCtx(void)
 {
-    return ZSTD_createCCtx_advanced(defaultCustomMem);
+    return ZSTD_createCCtx_advanced(ZSTD_defaultCMem);
 }
 
 ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
 {
     ZSTD_CCtx* cctx;
 
-    if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
-    if (!customMem.customAlloc || !customMem.customFree) return NULL;
+    if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
 
-    cctx = (ZSTD_CCtx*) ZSTD_malloc(sizeof(ZSTD_CCtx), customMem);
+    cctx = (ZSTD_CCtx*) ZSTD_calloc(sizeof(ZSTD_CCtx), customMem);
     if (!cctx) return NULL;
-    memset(cctx, 0, sizeof(ZSTD_CCtx));
-    memcpy(&(cctx->customMem), &customMem, sizeof(customMem));
+    cctx->customMem = customMem;
+    cctx->compressionLevel = ZSTD_CLEVEL_DEFAULT;
+    ZSTD_STATIC_ASSERT(zcss_init==0);
+    ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1));
+    return cctx;
+}
+
+ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize)
+{
+    ZSTD_CCtx* cctx = (ZSTD_CCtx*) workspace;
+    if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL;  /* minimum size */
+    if ((size_t)workspace & 7) return NULL;  /* must be 8-aligned */
+    memset(workspace, 0, workspaceSize);   /* may be a bit generous, could memset be smaller ? */
+    cctx->staticSize = workspaceSize;
+    cctx->workSpace = (void*)(cctx+1);
+    cctx->workSpaceSize = workspaceSize - sizeof(ZSTD_CCtx);
+
+    /* entropy space (never moves) */
+    /* note : this code should be shared with resetCCtx, rather than copy/pasted */
+    {   void* ptr = cctx->workSpace;
+        cctx->hufCTable = (HUF_CElt*)ptr;
+        ptr = (char*)cctx->hufCTable + hufCTable_size;
+        cctx->offcodeCTable = (FSE_CTable*) ptr;
+        ptr = (char*)ptr + offcodeCTable_size;
+        cctx->matchlengthCTable = (FSE_CTable*) ptr;
+        ptr = (char*)ptr + matchlengthCTable_size;
+        cctx->litlengthCTable = (FSE_CTable*) ptr;
+        ptr = (char*)ptr + litlengthCTable_size;
+        assert(((size_t)ptr & 3) == 0);   /* ensure correct alignment */
+        cctx->entropyScratchSpace = (unsigned*) ptr;
+    }
+
     return cctx;
 }
 
 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
 {
     if (cctx==NULL) return 0;   /* support free on NULL */
+    if (cctx->staticSize) return ERROR(memory_allocation);   /* not compatible with static CCtx */
     ZSTD_free(cctx->workSpace, cctx->customMem);
+    cctx->workSpace = NULL;
+    ZSTD_freeCDict(cctx->cdictLocal);
+    cctx->cdictLocal = NULL;
+    ZSTDMT_freeCCtx(cctx->mtctx);
+    cctx->mtctx = NULL;
     ZSTD_free(cctx, cctx->customMem);
     return 0;   /* reserved as a potential error code in the future */
 }
@@ -114,42 +208,312 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
 size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
 {
     if (cctx==NULL) return 0;   /* support sizeof on NULL */
-    return sizeof(*cctx) + cctx->workSpaceSize;
+    DEBUGLOG(5, "sizeof(*cctx) : %u", (U32)sizeof(*cctx));
+    DEBUGLOG(5, "workSpaceSize : %u", (U32)cctx->workSpaceSize);
+    DEBUGLOG(5, "streaming buffers : %u", (U32)(cctx->outBuffSize + cctx->inBuffSize));
+    DEBUGLOG(5, "inner MTCTX : %u", (U32)ZSTDMT_sizeof_CCtx(cctx->mtctx));
+    return sizeof(*cctx) + cctx->workSpaceSize
+           + ZSTD_sizeof_CDict(cctx->cdictLocal)
+           + cctx->outBuffSize + cctx->inBuffSize
+           + ZSTDMT_sizeof_CCtx(cctx->mtctx);
 }
 
-const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx)   /* hidden interface */
+size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
 {
-    return &(ctx->seqStore);
+    return ZSTD_sizeof_CCtx(zcs);  /* same object */
+}
+
+/* private API call, for dictBuilder only */
+const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
+
+static ZSTD_parameters ZSTD_getParamsFromCCtx(const ZSTD_CCtx* cctx) { return cctx->appliedParams; }
+
+/* older variant; will be deprecated */
+size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value)
+{
+    switch(param)
+    {
+    case ZSTD_p_forceWindow : cctx->forceWindow = value>0; cctx->loadedDictEnd = 0; return 0;
+    ZSTD_STATIC_ASSERT(ZSTD_dm_auto==0);
+    ZSTD_STATIC_ASSERT(ZSTD_dm_rawContent==1);
+    case ZSTD_p_forceRawDict : cctx->dictMode = (ZSTD_dictMode_e)(value>0); return 0;
+    default: return ERROR(parameter_unknown);
+    }
 }
 
 
-/** ZSTD_checkParams() :
-    ensure param values remain within authorized range.
+#define ZSTD_CLEVEL_CUSTOM 999
+static void ZSTD_cLevelToCParams(ZSTD_CCtx* cctx)
+{
+    if (cctx->compressionLevel==ZSTD_CLEVEL_CUSTOM) return;
+    cctx->requestedParams.cParams = ZSTD_getCParams(cctx->compressionLevel,
+                                            cctx->pledgedSrcSizePlusOne-1, 0);
+    cctx->compressionLevel = ZSTD_CLEVEL_CUSTOM;
+}
+
+#define CLAMPCHECK(val,min,max) {                       \
+    if (((val)<(min)) | ((val)>(max))) {                \
+        return ERROR(compressionParameter_outOfBound);  \
+}   }
+
+size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value)
+{
+    if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
+
+    switch(param)
+    {
+    case ZSTD_p_compressionLevel :
+        if ((int)value > ZSTD_maxCLevel()) value = ZSTD_maxCLevel();   /* cap max compression level */
+        if (value == 0) return 0;  /* special value : 0 means "don't change anything" */
+        if (cctx->cdict) return ERROR(stage_wrong);
+        cctx->compressionLevel = value;
+        return 0;
+
+    case ZSTD_p_windowLog :
+        DEBUGLOG(5, "setting ZSTD_p_windowLog = %u (cdict:%u)",
+                    value, (cctx->cdict!=NULL));
+        if (value == 0) return 0;  /* special value : 0 means "don't change anything" */
+        if (cctx->cdict) return ERROR(stage_wrong);
+        CLAMPCHECK(value, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
+        ZSTD_cLevelToCParams(cctx);
+        cctx->requestedParams.cParams.windowLog = value;
+        return 0;
+
+    case ZSTD_p_hashLog :
+        if (value == 0) return 0;  /* special value : 0 means "don't change anything" */
+        if (cctx->cdict) return ERROR(stage_wrong);
+        CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
+        ZSTD_cLevelToCParams(cctx);
+        cctx->requestedParams.cParams.hashLog = value;
+        return 0;
+
+    case ZSTD_p_chainLog :
+        if (value == 0) return 0;  /* special value : 0 means "don't change anything" */
+        if (cctx->cdict) return ERROR(stage_wrong);
+        CLAMPCHECK(value, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
+        ZSTD_cLevelToCParams(cctx);
+        cctx->requestedParams.cParams.chainLog = value;
+        return 0;
+
+    case ZSTD_p_searchLog :
+        if (value == 0) return 0;  /* special value : 0 means "don't change anything" */
+        if (cctx->cdict) return ERROR(stage_wrong);
+        CLAMPCHECK(value, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
+        ZSTD_cLevelToCParams(cctx);
+        cctx->requestedParams.cParams.searchLog = value;
+        return 0;
+
+    case ZSTD_p_minMatch :
+        if (value == 0) return 0;  /* special value : 0 means "don't change anything" */
+        if (cctx->cdict) return ERROR(stage_wrong);
+        CLAMPCHECK(value, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
+        ZSTD_cLevelToCParams(cctx);
+        cctx->requestedParams.cParams.searchLength = value;
+        return 0;
+
+    case ZSTD_p_targetLength :
+        if (value == 0) return 0;  /* special value : 0 means "don't change anything" */
+        if (cctx->cdict) return ERROR(stage_wrong);
+        CLAMPCHECK(value, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
+        ZSTD_cLevelToCParams(cctx);
+        cctx->requestedParams.cParams.targetLength = value;
+        return 0;
+
+    case ZSTD_p_compressionStrategy :
+        if (value == 0) return 0;  /* special value : 0 means "don't change anything" */
+        if (cctx->cdict) return ERROR(stage_wrong);
+        CLAMPCHECK(value, (unsigned)ZSTD_fast, (unsigned)ZSTD_btultra);
+        ZSTD_cLevelToCParams(cctx);
+        cctx->requestedParams.cParams.strategy = (ZSTD_strategy)value;
+        return 0;
+
+    case ZSTD_p_contentSizeFlag :
+        DEBUGLOG(5, "set content size flag = %u", (value>0));
+        /* Content size written in frame header _when known_ (default:1) */
+        cctx->requestedParams.fParams.contentSizeFlag = value>0;
+        return 0;
+
+    case ZSTD_p_checksumFlag :
+        /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
+        cctx->requestedParams.fParams.checksumFlag = value>0;
+        return 0;
+
+    case ZSTD_p_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
+        DEBUGLOG(5, "set dictIDFlag = %u", (value>0));
+        cctx->requestedParams.fParams.noDictIDFlag = (value==0);
+        return 0;
+
+    /* Dictionary parameters */
+    case ZSTD_p_dictMode :
+        if (cctx->cdict) return ERROR(stage_wrong);  /* must be set before loading */
+        /* restrict dictionary mode, to "rawContent" or "fullDict" only */
+        ZSTD_STATIC_ASSERT((U32)ZSTD_dm_fullDict > (U32)ZSTD_dm_rawContent);
+        if (value > (unsigned)ZSTD_dm_fullDict)
+            return ERROR(compressionParameter_outOfBound);
+        cctx->dictMode = (ZSTD_dictMode_e)value;
+        return 0;
+
+    case ZSTD_p_refDictContent :
+        if (cctx->cdict) return ERROR(stage_wrong);  /* must be set before loading */
+        /* dictionary content will be referenced, instead of copied */
+        cctx->dictContentByRef = value>0;
+        return 0;
+
+    case ZSTD_p_forceMaxWindow :  /* Force back-references to remain < windowSize,
+                                   * even when referencing into Dictionary content
+                                   * default : 0 when using a CDict, 1 when using a Prefix */
+        cctx->forceWindow = value>0;
+        cctx->loadedDictEnd = 0;
+        return 0;
+
+    case ZSTD_p_nbThreads:
+        if (value==0) return 0;
+        DEBUGLOG(5, " setting nbThreads : %u", value);
+#ifndef ZSTD_MULTITHREAD
+        if (value > 1) return ERROR(compressionParameter_unsupported);
+#endif
+        if ((value>1) && (cctx->nbThreads != value)) {
+            if (cctx->staticSize)  /* MT not compatible with static alloc */
+                return ERROR(compressionParameter_unsupported);
+            ZSTDMT_freeCCtx(cctx->mtctx);
+            cctx->nbThreads = 1;
+            cctx->mtctx = ZSTDMT_createCCtx(value);
+            if (cctx->mtctx == NULL) return ERROR(memory_allocation);
+        }
+        cctx->nbThreads = value;
+        return 0;
+
+    case ZSTD_p_jobSize:
+        if (cctx->nbThreads <= 1) return ERROR(compressionParameter_unsupported);
+        assert(cctx->mtctx != NULL);
+        return ZSTDMT_setMTCtxParameter(cctx->mtctx, ZSTDMT_p_sectionSize, value);
+
+    case ZSTD_p_overlapSizeLog:
+        DEBUGLOG(5, " setting overlap with nbThreads == %u", cctx->nbThreads);
+        if (cctx->nbThreads <= 1) return ERROR(compressionParameter_unsupported);
+        assert(cctx->mtctx != NULL);
+        return ZSTDMT_setMTCtxParameter(cctx->mtctx, ZSTDMT_p_overlapSectionLog, value);
+
+    default: return ERROR(parameter_unknown);
+    }
+}
+
+ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(5, " setting pledgedSrcSize to %u", (U32)pledgedSrcSize);
+    if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
+    cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
+    return 0;
+}
+
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
+{
+    if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
+    if (cctx->staticSize) return ERROR(memory_allocation);  /* no malloc for static CCtx */
+    DEBUGLOG(5, "load dictionary of size %u", (U32)dictSize);
+    ZSTD_freeCDict(cctx->cdictLocal);  /* in case one already exists */
+    if (dict==NULL || dictSize==0) {   /* no dictionary mode */
+        cctx->cdictLocal = NULL;
+        cctx->cdict = NULL;
+    } else {
+        ZSTD_compressionParameters const cParams =
+                cctx->compressionLevel == ZSTD_CLEVEL_CUSTOM ?
+                cctx->requestedParams.cParams :
+                ZSTD_getCParams(cctx->compressionLevel, 0, dictSize);
+        cctx->cdictLocal = ZSTD_createCDict_advanced(
+                                dict, dictSize,
+                                cctx->dictContentByRef, cctx->dictMode,
+                                cParams, cctx->customMem);
+        cctx->cdict = cctx->cdictLocal;
+        if (cctx->cdictLocal == NULL)
+            return ERROR(memory_allocation);
+    }
+    return 0;
+}
+
+size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
+{
+    if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
+    cctx->cdict = cdict;
+    cctx->prefix = NULL;   /* exclusive */
+    cctx->prefixSize = 0;
+    return 0;
+}
+
+size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize)
+{
+    if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
+    cctx->cdict = NULL;   /* prefix discards any prior cdict */
+    cctx->prefix = prefix;
+    cctx->prefixSize = prefixSize;
+    return 0;
+}
+
+static void ZSTD_startNewCompression(ZSTD_CCtx* cctx)
+{
+    cctx->streamStage = zcss_init;
+    cctx->pledgedSrcSizePlusOne = 0;
+}
+
+/*! ZSTD_CCtx_reset() :
+ *  Also dumps dictionary */
+void ZSTD_CCtx_reset(ZSTD_CCtx* cctx)
+{
+    ZSTD_startNewCompression(cctx);
+    cctx->cdict = NULL;
+}
+
+/** ZSTD_checkCParams() :
+    control CParam values remain within authorized range.
     @return : 0, or an error code if one value is beyond authorized range */
 size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
 {
-#   define CLAMPCHECK(val,min,max) { if ((val<min) | (val>max)) return ERROR(compressionParameter_unsupported); }
     CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
     CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
     CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
     CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
-    { U32 const searchLengthMin = ((cParams.strategy == ZSTD_fast) | (cParams.strategy == ZSTD_greedy)) ? ZSTD_SEARCHLENGTH_MIN+1 : ZSTD_SEARCHLENGTH_MIN;
-      U32 const searchLengthMax = (cParams.strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1;
-      CLAMPCHECK(cParams.searchLength, searchLengthMin, searchLengthMax); }
+    CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
     CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
-    if ((U32)(cParams.strategy) > (U32)ZSTD_btopt) return ERROR(compressionParameter_unsupported);
+    if ((U32)(cParams.strategy) > (U32)ZSTD_btultra) return ERROR(compressionParameter_unsupported);
     return 0;
 }
 
+/** ZSTD_clampCParams() :
+ *  make CParam values within valid range.
+ *  @return : valid CParams */
+static ZSTD_compressionParameters ZSTD_clampCParams(ZSTD_compressionParameters cParams)
+{
+#   define CLAMP(val,min,max) {      \
+        if (val<min) val=min;        \
+        else if (val>max) val=max;   \
+    }
+    CLAMP(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
+    CLAMP(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
+    CLAMP(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
+    CLAMP(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
+    CLAMP(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
+    CLAMP(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
+    if ((U32)(cParams.strategy) > (U32)ZSTD_btultra) cParams.strategy = ZSTD_btultra;
+    return cParams;
+}
 
-/** ZSTD_adjustCParams() :
+/** ZSTD_cycleLog() :
+ *  condition for correct operation : hashLog > 1 */
+static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
+{
+    U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
+    return hashLog - btScale;
+}
+
+/** ZSTD_adjustCParams_internal() :
     optimize `cPar` for a given input (`srcSize` and `dictSize`).
     mostly downsizing to reduce memory consumption and initialization.
     Both `srcSize` and `dictSize` are optional (use 0 if unknown),
     but if both are 0, no optimization can be done.
     Note : cPar is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */
-ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize)
+ZSTD_compressionParameters ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize)
 {
+    assert(ZSTD_checkCParams(cPar)==0);
     if (srcSize+dictSize == 0) return cPar;   /* no size information available : no adjustment */
 
     /* resize params, to use less memory when necessary */
@@ -160,19 +524,25 @@ ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, u
             if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
     }   }
     if (cPar.hashLog > cPar.windowLog) cPar.hashLog = cPar.windowLog;
-    {   U32 const btPlus = (cPar.strategy == ZSTD_btlazy2) | (cPar.strategy == ZSTD_btopt);
-        U32 const maxChainLog = cPar.windowLog+btPlus;
-        if (cPar.chainLog > maxChainLog) cPar.chainLog = maxChainLog; }   /* <= ZSTD_CHAINLOG_MAX */
+    {   U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
+        if (cycleLog > cPar.windowLog) cPar.chainLog -= (cycleLog - cPar.windowLog);
+    }
 
     if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN;  /* required for frame header */
 
     return cPar;
 }
 
-
-size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams)
+ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize)
 {
-    size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << cParams.windowLog);
+    cPar = ZSTD_clampCParams(cPar);
+    return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize);
+}
+
+
+size_t ZSTD_estimateCCtxSize_advanced(ZSTD_compressionParameters cParams)
+{
+    size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
     U32    const divider = (cParams.searchLength==3) ? 3 : 4;
     size_t const maxNbSeq = blockSize / divider;
     size_t const tokenSpace = blockSize + 11*maxNbSeq;
@@ -181,32 +551,69 @@ size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams)
     size_t const hSize = ((size_t)1) << cParams.hashLog;
     U32    const hashLog3 = (cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog);
     size_t const h3Size = ((size_t)1) << hashLog3;
+    size_t const entropySpace = hufCTable_size + litlengthCTable_size
+                              + offcodeCTable_size + matchlengthCTable_size
+                              + entropyScratchSpace_size;
     size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
 
-    size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
+    size_t const optBudget = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
                           + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
-    size_t const neededSpace = tableSpace + (256*sizeof(U32)) /* huffTable */ + tokenSpace
-                             + ((cParams.strategy == ZSTD_btopt) ? optSpace : 0);
+    size_t const optSpace = ((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btultra)) ? optBudget : 0;
+    size_t const neededSpace = entropySpace + tableSpace + tokenSpace + optSpace;
 
+    DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)sizeof(ZSTD_CCtx));
+    DEBUGLOG(5, "estimate workSpace : %u", (U32)neededSpace);
     return sizeof(ZSTD_CCtx) + neededSpace;
 }
 
-
-static U32 ZSTD_equivalentParams(ZSTD_parameters param1, ZSTD_parameters param2)
+size_t ZSTD_estimateCCtxSize(int compressionLevel)
 {
-    return (param1.cParams.hashLog  == param2.cParams.hashLog)
-         & (param1.cParams.chainLog == param2.cParams.chainLog)
-         & (param1.cParams.strategy == param2.cParams.strategy)
-         & ((param1.cParams.searchLength==3) == (param2.cParams.searchLength==3));
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0);
+    return ZSTD_estimateCCtxSize_advanced(cParams);
+}
+
+size_t ZSTD_estimateCStreamSize_advanced(ZSTD_compressionParameters cParams)
+{
+    size_t const CCtxSize = ZSTD_estimateCCtxSize_advanced(cParams);
+    size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
+    size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize;
+    size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1;
+    size_t const streamingSize = inBuffSize + outBuffSize;
+
+    return CCtxSize + streamingSize;
+}
+
+size_t ZSTD_estimateCStreamSize(int compressionLevel) {
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0);
+    return ZSTD_estimateCStreamSize_advanced(cParams);
+}
+
+
+static U32 ZSTD_equivalentParams(ZSTD_compressionParameters cParams1,
+                                 ZSTD_compressionParameters cParams2)
+{
+    U32 bslog1 = MIN(cParams1.windowLog, ZSTD_BLOCKSIZELOG_MAX);
+    U32 bslog2 = MIN(cParams2.windowLog, ZSTD_BLOCKSIZELOG_MAX);
+    return (bslog1 == bslog2)   /* same block size */
+         & (cParams1.hashLog  == cParams2.hashLog)
+         & (cParams1.chainLog == cParams2.chainLog)
+         & (cParams1.strategy == cParams2.strategy)   /* opt parser space */
+         & ((cParams1.searchLength==3) == (cParams2.searchLength==3));  /* hashlog3 space */
 }
 
 /*! ZSTD_continueCCtx() :
-    reuse CCtx without reset (note : requires no dictionary) */
-static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_parameters params, U64 frameContentSize)
+ *  reuse CCtx without reset (note : requires no dictionary) */
+static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_parameters params, U64 pledgedSrcSize)
 {
     U32 const end = (U32)(cctx->nextSrc - cctx->base);
-    cctx->params = params;
-    cctx->frameContentSize = frameContentSize;
+    DEBUGLOG(5, "continue mode");
+    cctx->appliedParams = params;
+    cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
+    cctx->consumedSrcSize = 0;
+    if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
+        cctx->appliedParams.fParams.contentSizeFlag = 0;
+    DEBUGLOG(5, "pledged content size : %u ; flag : %u",
+        (U32)pledgedSrcSize, cctx->appliedParams.fParams.contentSizeFlag);
     cctx->lowLimit = end;
     cctx->dictLimit = end;
     cctx->nextToUpdate = end+1;
@@ -219,64 +626,119 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_parameters params, U64 fra
     return 0;
 }
 
-typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset, ZSTDcrp_fullReset } ZSTD_compResetPolicy_e;
+typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e;
+typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e;
 
-/*! ZSTD_resetCCtx_advanced() :
-    note : 'params' must be validated */
-static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
-                                       ZSTD_parameters params, U64 frameContentSize,
-                                       ZSTD_compResetPolicy_e const crp)
+/*! ZSTD_resetCCtx_internal() :
+    note : `params` are assumed fully validated at this stage */
+static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
+                                      ZSTD_parameters params, U64 pledgedSrcSize,
+                                      ZSTD_compResetPolicy_e const crp,
+                                      ZSTD_buffered_policy_e const zbuff)
 {
-    if (crp == ZSTDcrp_continue)
-        if (ZSTD_equivalentParams(params, zc->params))
-            return ZSTD_continueCCtx(zc, params, frameContentSize);
+    assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
 
-    {   size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << params.cParams.windowLog);
+    if (crp == ZSTDcrp_continue) {
+        if (ZSTD_equivalentParams(params.cParams, zc->appliedParams.cParams)) {
+            DEBUGLOG(5, "ZSTD_equivalentParams()==1");
+            zc->fseCTables_ready = 0;
+            zc->hufCTable_repeatMode = HUF_repeat_none;
+            return ZSTD_continueCCtx(zc, params, pledgedSrcSize);
+    }   }
+
+    {   size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.cParams.windowLog);
         U32    const divider = (params.cParams.searchLength==3) ? 3 : 4;
         size_t const maxNbSeq = blockSize / divider;
         size_t const tokenSpace = blockSize + 11*maxNbSeq;
-        size_t const chainSize = (params.cParams.strategy == ZSTD_fast) ? 0 : (1 << params.cParams.chainLog);
+        size_t const chainSize = (params.cParams.strategy == ZSTD_fast) ?
+                                0 : (1 << params.cParams.chainLog);
         size_t const hSize = ((size_t)1) << params.cParams.hashLog;
-        U32    const hashLog3 = (params.cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog);
+        U32    const hashLog3 = (params.cParams.searchLength>3) ?
+                                0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog);
         size_t const h3Size = ((size_t)1) << hashLog3;
         size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
+        size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0;
+        size_t const buffInSize = (zbuff==ZSTDb_buffered) ? ((size_t)1 << params.cParams.windowLog) + blockSize : 0;
         void* ptr;
 
         /* Check if workSpace is large enough, alloc a new one if needed */
-        {   size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
-                                  + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
-            size_t const neededSpace = tableSpace + (256*sizeof(U32)) /* huffTable */ + tokenSpace
-                                  + ((params.cParams.strategy == ZSTD_btopt) ? optSpace : 0);
-            if (zc->workSpaceSize < neededSpace) {
+        {   size_t const entropySpace = hufCTable_size + litlengthCTable_size
+                                  + offcodeCTable_size + matchlengthCTable_size
+                                  + entropyScratchSpace_size;
+            size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits)) * sizeof(U32)
+                                  + (ZSTD_OPT_NUM+1) * (sizeof(ZSTD_match_t)+sizeof(ZSTD_optimal_t));
+            size_t const optSpace = ( (params.cParams.strategy == ZSTD_btopt)
+                                    || (params.cParams.strategy == ZSTD_btultra)) ?
+                                    optPotentialSpace : 0;
+            size_t const bufferSpace = buffInSize + buffOutSize;
+            size_t const neededSpace = entropySpace + optSpace + tableSpace
+                                     + tokenSpace + bufferSpace;
+
+            if (zc->workSpaceSize < neededSpace) {  /* too small : resize /*/
+                DEBUGLOG(5, "Need to update workSpaceSize from %uK to %uK \n",
+                            (unsigned)zc->workSpaceSize>>10,
+                            (unsigned)neededSpace>>10);
+                /* static cctx : no resize, error out */
+                if (zc->staticSize) return ERROR(memory_allocation);
+
+                zc->workSpaceSize = 0;
                 ZSTD_free(zc->workSpace, zc->customMem);
                 zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem);
                 if (zc->workSpace == NULL) return ERROR(memory_allocation);
                 zc->workSpaceSize = neededSpace;
+                ptr = zc->workSpace;
+
+                /* entropy space */
+                zc->hufCTable = (HUF_CElt*)ptr;
+                ptr = (char*)zc->hufCTable + hufCTable_size;  /* note : HUF_CElt* is incomplete type, size is estimated via macro */
+                zc->offcodeCTable = (FSE_CTable*) ptr;
+                ptr = (char*)ptr + offcodeCTable_size;
+                zc->matchlengthCTable = (FSE_CTable*) ptr;
+                ptr = (char*)ptr + matchlengthCTable_size;
+                zc->litlengthCTable = (FSE_CTable*) ptr;
+                ptr = (char*)ptr + litlengthCTable_size;
+                assert(((size_t)ptr & 3) == 0);   /* ensure correct alignment */
+                zc->entropyScratchSpace = (unsigned*) ptr;
         }   }
 
-        if (crp!=ZSTDcrp_noMemset) memset(zc->workSpace, 0, tableSpace);   /* reset tables only */
-        XXH64_reset(&zc->xxhState, 0);
-        zc->hashLog3 = hashLog3;
-        zc->hashTable = (U32*)(zc->workSpace);
-        zc->chainTable = zc->hashTable + hSize;
-        zc->hashTable3 = zc->chainTable + chainSize;
-        ptr = zc->hashTable3 + h3Size;
-        zc->hufTable = (HUF_CElt*)ptr;
-        zc->flagStaticTables = 0;
-        ptr = ((U32*)ptr) + 256;  /* note : HUF_CElt* is incomplete type, size is simulated using U32 */
+        /* init params */
+        zc->appliedParams = params;
+        zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
+        zc->consumedSrcSize = 0;
+        if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
+            zc->appliedParams.fParams.contentSizeFlag = 0;
+        DEBUGLOG(5, "pledged content size : %u ; flag : %u",
+            (U32)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag);
+        zc->blockSize = blockSize;
 
+        XXH64_reset(&zc->xxhState, 0);
+        zc->stage = ZSTDcs_init;
+        zc->dictID = 0;
+        zc->loadedDictEnd = 0;
+        zc->fseCTables_ready = 0;
+        zc->hufCTable_repeatMode = HUF_repeat_none;
         zc->nextToUpdate = 1;
         zc->nextSrc = NULL;
         zc->base = NULL;
         zc->dictBase = NULL;
         zc->dictLimit = 0;
         zc->lowLimit = 0;
-        zc->params = params;
-        zc->blockSize = blockSize;
-        zc->frameContentSize = frameContentSize;
         { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = repStartValue[i]; }
+        zc->hashLog3 = hashLog3;
+        zc->seqStore.litLengthSum = 0;
 
-        if (params.cParams.strategy == ZSTD_btopt) {
+        /* ensure entropy tables are close together at the beginning */
+        assert((void*)zc->hufCTable == zc->workSpace);
+        assert((char*)zc->offcodeCTable == (char*)zc->hufCTable + hufCTable_size);
+        assert((char*)zc->matchlengthCTable == (char*)zc->offcodeCTable + offcodeCTable_size);
+        assert((char*)zc->litlengthCTable == (char*)zc->matchlengthCTable + matchlengthCTable_size);
+        assert((char*)zc->entropyScratchSpace == (char*)zc->litlengthCTable + litlengthCTable_size);
+        ptr = (char*)zc->entropyScratchSpace + entropyScratchSpace_size;
+
+        /* opt parser space */
+        if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btultra)) {
+            DEBUGLOG(5, "reserving optimal parser space");
+            assert(((size_t)ptr & 3) == 0);  /* ensure ptr is properly aligned */
             zc->seqStore.litFreq = (U32*)ptr;
             zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<<Litbits);
             zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1);
@@ -286,41 +748,74 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
             ptr = zc->seqStore.matchTable + ZSTD_OPT_NUM+1;
             zc->seqStore.priceTable = (ZSTD_optimal_t*)ptr;
             ptr = zc->seqStore.priceTable + ZSTD_OPT_NUM+1;
-            zc->seqStore.litLengthSum = 0;
         }
+
+        /* table Space */
+        if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace);   /* reset tables only */
+        assert(((size_t)ptr & 3) == 0);  /* ensure ptr is properly aligned */
+        zc->hashTable = (U32*)(ptr);
+        zc->chainTable = zc->hashTable + hSize;
+        zc->hashTable3 = zc->chainTable + chainSize;
+        ptr = zc->hashTable3 + h3Size;
+
+        /* sequences storage */
         zc->seqStore.sequencesStart = (seqDef*)ptr;
         ptr = zc->seqStore.sequencesStart + maxNbSeq;
         zc->seqStore.llCode = (BYTE*) ptr;
         zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq;
         zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq;
         zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq;
+        ptr = zc->seqStore.litStart + blockSize;
 
-        zc->stage = ZSTDcs_init;
-        zc->dictID = 0;
-        zc->loadedDictEnd = 0;
+        /* buffers */
+        zc->inBuffSize = buffInSize;
+        zc->inBuff = (char*)ptr;
+        zc->outBuffSize = buffOutSize;
+        zc->outBuff = zc->inBuff + buffInSize;
 
         return 0;
     }
 }
 
+/* ZSTD_invalidateRepCodes() :
+ * ensures next compression will not use repcodes from previous block.
+ * Note : only works with regular variant;
+ *        do not use with extDict variant ! */
+void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
+    int i;
+    for (i=0; i<ZSTD_REP_NUM; i++) cctx->rep[i] = 0;
+}
 
-/*! ZSTD_copyCCtx() :
-*   Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
-*   Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
-*   @return : 0, or an error code */
-size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
+
+/*! ZSTD_copyCCtx_internal() :
+ *  Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
+ *  Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
+ *  pledgedSrcSize=0 means "empty" if fParams.contentSizeFlag=1
+ *  @return : 0, or an error code */
+static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
+                            const ZSTD_CCtx* srcCCtx,
+                            ZSTD_frameParameters fParams,
+                            unsigned long long pledgedSrcSize,
+                            ZSTD_buffered_policy_e zbuff)
 {
+    DEBUGLOG(5, "ZSTD_copyCCtx_internal");
     if (srcCCtx->stage!=ZSTDcs_init) return ERROR(stage_wrong);
 
     memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
-    ZSTD_resetCCtx_advanced(dstCCtx, srcCCtx->params, pledgedSrcSize, ZSTDcrp_noMemset);
+    {   ZSTD_parameters params = srcCCtx->appliedParams;
+        params.fParams = fParams;
+        ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize,
+                                ZSTDcrp_noMemset, zbuff);
+    }
 
     /* copy tables */
-    {   size_t const chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog);
-        size_t const hSize = ((size_t)1) << srcCCtx->params.cParams.hashLog;
+    {   size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->appliedParams.cParams.chainLog);
+        size_t const hSize =  (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
         size_t const h3Size = (size_t)1 << srcCCtx->hashLog3;
         size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
-        memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace);
+        assert((U32*)dstCCtx->chainTable == (U32*)dstCCtx->hashTable + hSize);  /* chainTable must follow hashTable */
+        assert((U32*)dstCCtx->hashTable3 == (U32*)dstCCtx->chainTable + chainSize);
+        memcpy(dstCCtx->hashTable, srcCCtx->hashTable, tableSpace);   /* presumes all tables follow each other */
     }
 
     /* copy dictionary offsets */
@@ -335,20 +830,38 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long
     dstCCtx->dictID       = srcCCtx->dictID;
 
     /* copy entropy tables */
-    dstCCtx->flagStaticTables = srcCCtx->flagStaticTables;
-    if (srcCCtx->flagStaticTables) {
-        memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4);
-        memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable));
-        memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable));
-        memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, sizeof(dstCCtx->offcodeCTable));
+    dstCCtx->fseCTables_ready = srcCCtx->fseCTables_ready;
+    if (srcCCtx->fseCTables_ready) {
+        memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, litlengthCTable_size);
+        memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, matchlengthCTable_size);
+        memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, offcodeCTable_size);
+    }
+    dstCCtx->hufCTable_repeatMode = srcCCtx->hufCTable_repeatMode;
+    if (srcCCtx->hufCTable_repeatMode) {
+        memcpy(dstCCtx->hufCTable, srcCCtx->hufCTable, hufCTable_size);
     }
 
     return 0;
 }
 
+/*! ZSTD_copyCCtx() :
+ *  Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
+ *  Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
+ *  pledgedSrcSize==0 means "unknown".
+*   @return : 0, or an error code */
+size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
+{
+    ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
+    ZSTD_buffered_policy_e const zbuff = (ZSTD_buffered_policy_e)(srcCCtx->inBuffSize>0);
+    ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1);
+    fParams.contentSizeFlag = pledgedSrcSize>0;
+
+    return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, fParams, pledgedSrcSize, zbuff);
+}
+
 
 /*! ZSTD_reduceTable() :
-*   reduce table indexes by `reducerValue` */
+ *  reduce table indexes by `reducerValue` */
 static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue)
 {
     U32 u;
@@ -362,10 +875,10 @@ static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reduce
 *   rescale all indexes to avoid future overflow (indexes are U32) */
 static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
 {
-    { U32 const hSize = 1 << zc->params.cParams.hashLog;
+    { U32 const hSize = 1 << zc->appliedParams.cParams.hashLog;
       ZSTD_reduceTable(zc->hashTable, hSize, reducerValue); }
 
-    { U32 const chainSize = (zc->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->params.cParams.chainLog);
+    { U32 const chainSize = (zc->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->appliedParams.cParams.chainLog);
       ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue); }
 
     { U32 const h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0;
@@ -377,7 +890,7 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
 *  Block entropic compression
 *********************************************************/
 
-/* See zstd_compression_format.md for detailed format description */
+/* See doc/zstd_compression_format.md for detailed format description */
 
 size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
@@ -403,10 +916,11 @@ static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void
         case 2: /* 2 - 2 - 12 */
             MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
             break;
-        default:   /*note : should not be necessary : flSize is within {1,2,3} */
         case 3: /* 2 - 2 - 20 */
             MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
             break;
+        default:   /* not necessary : flSize is {1,2,3} */
+            assert(0);
     }
 
     memcpy(ostart + flSize, src, srcSize);
@@ -428,10 +942,11 @@ static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, cons
         case 2: /* 2 - 2 - 12 */
             MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
             break;
-        default:   /*note : should not be necessary : flSize is necessarily within {1,2,3} */
         case 3: /* 2 - 2 - 20 */
             MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
             break;
+        default:   /* not necessary : flSize is {1,2,3} */
+            assert(0);
     }
 
     ostart[flSize] = *(const BYTE*)src;
@@ -455,24 +970,30 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
 
     /* small ? don't even attempt compression (speed opt) */
 #   define LITERAL_NOENTROPY 63
-    {   size_t const minLitSize = zc->flagStaticTables ? 6 : LITERAL_NOENTROPY;
+    {   size_t const minLitSize = zc->hufCTable_repeatMode == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY;
         if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
     }
 
     if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall);   /* not enough space for compression */
-    if (zc->flagStaticTables && (lhSize==3)) {
-        hType = set_repeat;
-        singleStream = 1;
-        cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable);
-    } else {
-        cLitSize = singleStream ? HUF_compress1X(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11)
-                                : HUF_compress2 (ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11);
+    {   HUF_repeat repeat = zc->hufCTable_repeatMode;
+        int const preferRepeat = zc->appliedParams.cParams.strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
+        if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
+        cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
+                                      zc->entropyScratchSpace, entropyScratchSpace_size, zc->hufCTable, &repeat, preferRepeat)
+                                : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
+                                      zc->entropyScratchSpace, entropyScratchSpace_size, zc->hufCTable, &repeat, preferRepeat);
+        if (repeat != HUF_repeat_none) { hType = set_repeat; }    /* reused the existing table */
+        else { zc->hufCTable_repeatMode = HUF_repeat_check; }       /* now have a table to reuse */
     }
 
-    if ((cLitSize==0) | (cLitSize >= srcSize - minGain))
+    if ((cLitSize==0) | (cLitSize >= srcSize - minGain)) {
+        zc->hufCTable_repeatMode = HUF_repeat_none;
         return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
-    if (cLitSize==1)
+    }
+    if (cLitSize==1) {
+        zc->hufCTable_repeatMode = HUF_repeat_none;
         return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
+    }
 
     /* Build header */
     switch(lhSize)
@@ -487,13 +1008,14 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
             MEM_writeLE32(ostart, lhc);
             break;
         }
-    default:   /* should not be necessary, lhSize is only {3,4,5} */
     case 5: /* 2 - 2 - 18 - 18 */
         {   U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
             MEM_writeLE32(ostart, lhc);
             ostart[4] = (BYTE)(cLitSize >> 10);
             break;
         }
+    default:   /* not possible : lhSize is {3,4,5} */
+        assert(0);
     }
     return lhSize+cLitSize;
 }
@@ -540,11 +1062,11 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
         mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
 }
 
-
-size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
+MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
                               void* dst, size_t dstCapacity,
                               size_t srcSize)
 {
+    const int longOffsets = zc->appliedParams.cParams.windowLog > STREAM_ACCUMULATOR_MIN;
     const seqStore_t* seqStorePtr = &(zc->seqStore);
     U32 count[MaxSeq+1];
     S16 norm[MaxSeq+1];
@@ -561,6 +1083,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
     BYTE* op = ostart;
     size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
     BYTE* seqHead;
+    BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)];
 
     /* Compress literals */
     {   const BYTE* const literals = seqStorePtr->litStart;
@@ -588,15 +1111,15 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
 
     /* CTable for Literal Lengths */
     {   U32 max = MaxLL;
-        size_t const mostFrequent = FSE_countFast(count, &max, llCodeTable, nbSeq);
+        size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, zc->entropyScratchSpace);
         if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
             *op++ = llCodeTable[0];
             FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
             LLtype = set_rle;
-        } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
+        } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
             LLtype = set_repeat;
         } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) {
-            FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog);
+            FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
             LLtype = set_basic;
         } else {
             size_t nbSeq_1 = nbSeq;
@@ -604,23 +1127,23 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
             if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
             FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
             { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
-              if (FSE_isError(NCountSize)) return ERROR(GENERIC);
+              if (FSE_isError(NCountSize)) return NCountSize;
               op += NCountSize; }
-            FSE_buildCTable(CTable_LitLength, norm, max, tableLog);
+            FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
             LLtype = set_compressed;
     }   }
 
     /* CTable for Offsets */
     {   U32 max = MaxOff;
-        size_t const mostFrequent = FSE_countFast(count, &max, ofCodeTable, nbSeq);
+        size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, zc->entropyScratchSpace);
         if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
             *op++ = ofCodeTable[0];
             FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
             Offtype = set_rle;
-        } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
+        } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
             Offtype = set_repeat;
         } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) {
-            FSE_buildCTable(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog);
+            FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
             Offtype = set_basic;
         } else {
             size_t nbSeq_1 = nbSeq;
@@ -628,23 +1151,23 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
             if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; }
             FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
             { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
-              if (FSE_isError(NCountSize)) return ERROR(GENERIC);
+              if (FSE_isError(NCountSize)) return NCountSize;
               op += NCountSize; }
-            FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog);
+            FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
             Offtype = set_compressed;
     }   }
 
     /* CTable for MatchLengths */
     {   U32 max = MaxML;
-        size_t const mostFrequent = FSE_countFast(count, &max, mlCodeTable, nbSeq);
+        size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, zc->entropyScratchSpace);
         if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
             *op++ = *mlCodeTable;
             FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
             MLtype = set_rle;
-        } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
+        } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
             MLtype = set_repeat;
         } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) {
-            FSE_buildCTable(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog);
+            FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
             MLtype = set_basic;
         } else {
             size_t nbSeq_1 = nbSeq;
@@ -652,14 +1175,14 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
             if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; }
             FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
             { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
-              if (FSE_isError(NCountSize)) return ERROR(GENERIC);
+              if (FSE_isError(NCountSize)) return NCountSize;
               op += NCountSize; }
-            FSE_buildCTable(CTable_MatchLength, norm, max, tableLog);
+            FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
             MLtype = set_compressed;
     }   }
 
     *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
-    zc->flagStaticTables = 0;
+    zc->fseCTables_ready = 0;
 
     /* Encoding Sequences */
     {   BIT_CStream_t blockStream;
@@ -677,7 +1200,18 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
         if (MEM_32bits()) BIT_flushBits(&blockStream);
         BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
         if (MEM_32bits()) BIT_flushBits(&blockStream);
-        BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
+        if (longOffsets) {
+            U32 const ofBits = ofCodeTable[nbSeq-1];
+            int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
+            if (extraBits) {
+                BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
+                BIT_flushBits(&blockStream);
+            }
+            BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
+                        ofBits - extraBits);
+        } else {
+            BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
+        }
         BIT_flushBits(&blockStream);
 
         {   size_t n;
@@ -699,7 +1233,17 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
                 if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
                 BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
                 if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
-                BIT_addBits(&blockStream, sequences[n].offset, ofBits);         /* 31 */
+                if (longOffsets) {
+                    int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
+                    if (extraBits) {
+                        BIT_addBits(&blockStream, sequences[n].offset, extraBits);
+                        BIT_flushBits(&blockStream);                            /* (7)*/
+                    }
+                    BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
+                                ofBits - extraBits);                            /* 31 */
+                } else {
+                    BIT_addBits(&blockStream, sequences[n].offset, ofBits);     /* 31 */
+                }
                 BIT_flushBits(&blockStream);                                    /* (7)*/
         }   }
 
@@ -714,12 +1258,15 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
 
     /* check compressibility */
 _check_compressibility:
-    { size_t const minGain = ZSTD_minGain(srcSize);
-      size_t const maxCSize = srcSize - minGain;
-      if ((size_t)(op-ostart) >= maxCSize) return 0; }
+    {   size_t const minGain = ZSTD_minGain(srcSize);
+        size_t const maxCSize = srcSize - minGain;
+        if ((size_t)(op-ostart) >= maxCSize) {
+            zc->hufCTable_repeatMode = HUF_repeat_none;
+            return 0;
+    }   }
 
     /* confirm repcodes */
-    { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->savedRep[i]; }
+    { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->repToConfirm[i]; }
 
     return op - ostart;
 }
@@ -732,27 +1279,34 @@ _check_compressibility:
 */
 MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode)
 {
-#if 0  /* for debug */
+#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 6)
     static const BYTE* g_start = NULL;
-    const U32 pos = (U32)(literals - g_start);
-    if (g_start==NULL) g_start = literals;
-    //if ((pos > 1) && (pos < 50000))
-        printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n",
+    U32 const pos = (U32)((const BYTE*)literals - g_start);
+    if (g_start==NULL) g_start = (const BYTE*)literals;
+    if ((pos > 0) && (pos < 1000000000))
+        DEBUGLOG(6, "Cpos %6u :%5u literals & match %3u bytes at distance %6u",
                pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode);
 #endif
     /* copy Literals */
+    assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + 128 KB);
     ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
     seqStorePtr->lit += litLength;
 
     /* literal Length */
-    if (litLength>0xFFFF) { seqStorePtr->longLengthID = 1; seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); }
+    if (litLength>0xFFFF) {
+        seqStorePtr->longLengthID = 1;
+        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    }
     seqStorePtr->sequences[0].litLength = (U16)litLength;
 
     /* match offset */
     seqStorePtr->sequences[0].offset = offsetCode + 1;
 
     /* match Length */
-    if (matchCode>0xFFFF) { seqStorePtr->longLengthID = 2; seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); }
+    if (matchCode>0xFFFF) {
+        seqStorePtr->longLengthID = 2;
+        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    }
     seqStorePtr->sequences[0].matchLength = (U16)matchCode;
 
     seqStorePtr->sequences++;
@@ -773,7 +1327,14 @@ static unsigned ZSTD_NbCommonBytes (register size_t val)
 #       elif defined(__GNUC__) && (__GNUC__ >= 3)
             return (__builtin_ctzll((U64)val) >> 3);
 #       else
-            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
+            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
+                                                     0, 3, 1, 3, 1, 4, 2, 7,
+                                                     0, 2, 3, 6, 1, 5, 3, 5,
+                                                     1, 3, 4, 4, 2, 5, 6, 7,
+                                                     7, 0, 1, 2, 3, 3, 4, 6,
+                                                     2, 6, 5, 5, 3, 4, 5, 6,
+                                                     7, 1, 2, 4, 6, 4, 4, 5,
+                                                     7, 2, 6, 5, 7, 6, 7, 7 };
             return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
 #       endif
         } else { /* 32 bits */
@@ -784,7 +1345,10 @@ static unsigned ZSTD_NbCommonBytes (register size_t val)
 #       elif defined(__GNUC__) && (__GNUC__ >= 3)
             return (__builtin_ctz((U32)val) >> 3);
 #       else
-            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
+            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
+                                                     3, 2, 2, 1, 3, 2, 0, 1,
+                                                     3, 3, 1, 2, 2, 2, 2, 0,
+                                                     3, 1, 2, 0, 1, 0, 1, 1 };
             return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
 #       endif
         }
@@ -856,7 +1420,7 @@ static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE
 ***************************************/
 static const U32 prime3bytes = 506832829U;
 static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
-MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); }   /* only in zstd_opt.h */
+MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
 
 static const U32 prime4bytes = 2654435761U;
 static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
@@ -898,7 +1462,7 @@ static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
 static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls)
 {
     U32* const hashTable = zc->hashTable;
-    U32  const hBits = zc->params.cParams.hashLog;
+    U32  const hBits = zc->appliedParams.cParams.hashLog;
     const BYTE* const base = zc->base;
     const BYTE* ip = base + zc->nextToUpdate;
     const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
@@ -917,7 +1481,7 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
                                const U32 mls)
 {
     U32* const hashTable = cctx->hashTable;
-    U32  const hBits = cctx->params.cParams.hashLog;
+    U32  const hBits = cctx->appliedParams.cParams.hashLog;
     seqStore_t* seqStorePtr = &(cctx->seqStore);
     const BYTE* const base = cctx->base;
     const BYTE* const istart = (const BYTE*)src;
@@ -988,8 +1552,8 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
     }   }   }
 
     /* save reps for next block */
-    cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved;
-    cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved;
+    cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
+    cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -1002,10 +1566,10 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
 static void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
                        const void* src, size_t srcSize)
 {
-    const U32 mls = ctx->params.cParams.searchLength;
+    const U32 mls = ctx->appliedParams.cParams.searchLength;
     switch(mls)
     {
-    default:
+    default: /* includes case 3 */
     case 4 :
         ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return;
     case 5 :
@@ -1023,7 +1587,7 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
                                  const U32 mls)
 {
     U32* hashTable = ctx->hashTable;
-    const U32 hBits = ctx->params.cParams.hashLog;
+    const U32 hBits = ctx->appliedParams.cParams.hashLog;
     seqStore_t* seqStorePtr = &(ctx->seqStore);
     const BYTE* const base = ctx->base;
     const BYTE* const dictBase = ctx->dictBase;
@@ -1055,7 +1619,7 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
         if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
            && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
             const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
-            mLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32;
+            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
             ip++;
             ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
         } else {
@@ -1067,7 +1631,7 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
             {   const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
                 const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
                 U32 offset;
-                mLength = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iend, matchEnd, lowPrefixPtr) + EQUAL_READ32;
+                mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4;
                 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
                 offset = current - matchIndex;
                 offset_2 = offset_1;
@@ -1091,7 +1655,7 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
                 if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex))  /* intentional overflow */
                    && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
                     const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
-                    size_t repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32;
+                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4;
                     U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
                     ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
                     hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2;
@@ -1103,7 +1667,7 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
     }   }   }
 
     /* save reps for next block */
-    ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
+    ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -1116,10 +1680,10 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
 static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
                          const void* src, size_t srcSize)
 {
-    U32 const mls = ctx->params.cParams.searchLength;
+    U32 const mls = ctx->appliedParams.cParams.searchLength;
     switch(mls)
     {
-    default:
+    default: /* includes case 3 */
     case 4 :
         ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return;
     case 5 :
@@ -1138,9 +1702,9 @@ static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
 static void ZSTD_fillDoubleHashTable (ZSTD_CCtx* cctx, const void* end, const U32 mls)
 {
     U32* const hashLarge = cctx->hashTable;
-    U32  const hBitsL = cctx->params.cParams.hashLog;
+    U32  const hBitsL = cctx->appliedParams.cParams.hashLog;
     U32* const hashSmall = cctx->chainTable;
-    U32  const hBitsS = cctx->params.cParams.chainLog;
+    U32  const hBitsS = cctx->appliedParams.cParams.chainLog;
     const BYTE* const base = cctx->base;
     const BYTE* ip = base + cctx->nextToUpdate;
     const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
@@ -1160,9 +1724,9 @@ void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
                                  const U32 mls)
 {
     U32* const hashLong = cctx->hashTable;
-    const U32 hBitsL = cctx->params.cParams.hashLog;
+    const U32 hBitsL = cctx->appliedParams.cParams.hashLog;
     U32* const hashSmall = cctx->chainTable;
-    const U32 hBitsS = cctx->params.cParams.chainLog;
+    const U32 hBitsS = cctx->appliedParams.cParams.chainLog;
     seqStore_t* seqStorePtr = &(cctx->seqStore);
     const BYTE* const base = cctx->base;
     const BYTE* const istart = (const BYTE*)src;
@@ -1194,7 +1758,9 @@ void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
         const BYTE* match = base + matchIndexS;
         hashLong[h2] = hashSmall[h] = current;   /* update hash tables */
 
-        if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { /* note : by construction, offset_1 <= current */
+        assert(offset_1 <= current);   /* supposed guaranteed by construction */
+        if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
+            /* favor repcode */
             mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
             ip++;
             ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
@@ -1205,15 +1771,15 @@ void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
                 offset = (U32)(ip-matchLong);
                 while (((ip>anchor) & (matchLong>lowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
             } else if ( (matchIndexS > lowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) {
-                size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
-                U32 const matchIndex3 = hashLong[h3];
-                const BYTE* match3 = base + matchIndex3;
-                hashLong[h3] = current + 1;
-                if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
-                    mLength = ZSTD_count(ip+9, match3+8, iend) + 8;
+                size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
+                U32 const matchIndexL3 = hashLong[hl3];
+                const BYTE* matchL3 = base + matchIndexL3;
+                hashLong[hl3] = current + 1;
+                if ( (matchIndexL3 > lowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1)) ) {
+                    mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
                     ip++;
-                    offset = (U32)(ip-match3);
-                    while (((ip>anchor) & (match3>lowest)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
+                    offset = (U32)(ip-matchL3);
+                    while (((ip>anchor) & (matchL3>lowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
                 } else {
                     mLength = ZSTD_count(ip+4, match+4, iend) + 4;
                     offset = (U32)(ip-match);
@@ -1257,8 +1823,8 @@ void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
     }   }   }
 
     /* save reps for next block */
-    cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved;
-    cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved;
+    cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
+    cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -1270,10 +1836,10 @@ void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
 
 static void ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
-    const U32 mls = ctx->params.cParams.searchLength;
+    const U32 mls = ctx->appliedParams.cParams.searchLength;
     switch(mls)
     {
-    default:
+    default: /* includes case 3 */
     case 4 :
         ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); return;
     case 5 :
@@ -1291,9 +1857,9 @@ static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
                                  const U32 mls)
 {
     U32* const hashLong = ctx->hashTable;
-    U32  const hBitsL = ctx->params.cParams.hashLog;
+    U32  const hBitsL = ctx->appliedParams.cParams.hashLog;
     U32* const hashSmall = ctx->chainTable;
-    U32  const hBitsS = ctx->params.cParams.chainLog;
+    U32  const hBitsS = ctx->appliedParams.cParams.chainLog;
     seqStore_t* seqStorePtr = &(ctx->seqStore);
     const BYTE* const base = ctx->base;
     const BYTE* const dictBase = ctx->dictBase;
@@ -1382,8 +1948,8 @@ static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
 
         if (ip <= ilimit) {
             /* Fill Table */
-			hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
-			hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
+            hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
+            hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
             hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
             hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
             /* check immediate repcode */
@@ -1394,7 +1960,7 @@ static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
                 if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex))  /* intentional overflow */
                    && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
                     const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
-                    size_t const repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32;
+                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4;
                     U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
                     ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
                     hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
@@ -1407,7 +1973,7 @@ static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
     }   }   }
 
     /* save reps for next block */
-    ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
+    ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -1420,10 +1986,10 @@ static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
 static void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx,
                          const void* src, size_t srcSize)
 {
-    U32 const mls = ctx->params.cParams.searchLength;
+    U32 const mls = ctx->appliedParams.cParams.searchLength;
     switch(mls)
     {
-    default:
+    default: /* includes case 3 */
     case 4 :
         ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); return;
     case 5 :
@@ -1446,10 +2012,10 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
                           U32 extDict)
 {
     U32*   const hashTable = zc->hashTable;
-    U32    const hashLog = zc->params.cParams.hashLog;
+    U32    const hashLog = zc->appliedParams.cParams.hashLog;
     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
     U32*   const bt = zc->chainTable;
-    U32    const btLog  = zc->params.cParams.chainLog - 1;
+    U32    const btLog  = zc->appliedParams.cParams.chainLog - 1;
     U32    const btMask = (1 << btLog) - 1;
     U32 matchIndex = hashTable[h];
     size_t commonLengthSmaller=0, commonLengthLarger=0;
@@ -1458,7 +2024,7 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
     const U32 dictLimit = zc->dictLimit;
     const BYTE* const dictEnd = dictBase + dictLimit;
     const BYTE* const prefixStart = base + dictLimit;
-    const BYTE* match = base + matchIndex;
+    const BYTE* match;
     const U32 current = (U32)(ip-base);
     const U32 btLow = btMask >= current ? 0 : current - btMask;
     U32* smallerPtr = bt + 2*(current&btMask);
@@ -1477,8 +2043,9 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
     hashTable[h] = current;   /* Update Hash Table */
 
     while (nbCompares-- && (matchIndex > windowLow)) {
-        U32* nextPtr = bt + 2*(matchIndex & btMask);
+        U32* const nextPtr = bt + 2*(matchIndex & btMask);
         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+
 #ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
         const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
         if (matchIndex == predictedSmall) {
@@ -1507,7 +2074,7 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
             match = dictBase + matchIndex;
             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
             if (matchIndex+matchLength >= dictLimit)
-				match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
+                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
         }
 
         if (matchLength > bestLength) {
@@ -1550,10 +2117,10 @@ static size_t ZSTD_insertBtAndFindBestMatch (
                         U32 extDict)
 {
     U32*   const hashTable = zc->hashTable;
-    U32    const hashLog = zc->params.cParams.hashLog;
+    U32    const hashLog = zc->appliedParams.cParams.hashLog;
     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
     U32*   const bt = zc->chainTable;
-    U32    const btLog  = zc->params.cParams.chainLog - 1;
+    U32    const btLog  = zc->appliedParams.cParams.chainLog - 1;
     U32    const btMask = (1 << btLog) - 1;
     U32 matchIndex  = hashTable[h];
     size_t commonLengthSmaller=0, commonLengthLarger=0;
@@ -1574,7 +2141,7 @@ static size_t ZSTD_insertBtAndFindBestMatch (
     hashTable[h] = current;   /* Update Hash Table */
 
     while (nbCompares-- && (matchIndex > windowLow)) {
-        U32* nextPtr = bt + 2*(matchIndex & btMask);
+        U32* const nextPtr = bt + 2*(matchIndex & btMask);
         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
         const BYTE* match;
 
@@ -1586,7 +2153,7 @@ static size_t ZSTD_insertBtAndFindBestMatch (
             match = dictBase + matchIndex;
             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
             if (matchIndex+matchLength >= dictLimit)
-				match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
+                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
         }
 
         if (matchLength > bestLength) {
@@ -1652,9 +2219,10 @@ static size_t ZSTD_BtFindBestMatch_selectMLS (
 {
     switch(matchLengthSearch)
     {
-    default :
+    default : /* includes case 3 */
     case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
     case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
+    case 7 :
     case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
     }
 }
@@ -1691,9 +2259,10 @@ static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
 {
     switch(matchLengthSearch)
     {
-    default :
+    default : /* includes case 3 */
     case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
     case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
+    case 7 :
     case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
     }
 }
@@ -1706,14 +2275,14 @@ static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
 #define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & mask]
 
 /* Update chains up to ip (excluded)
-   Assumption : always within prefix (ie. not within extDict) */
+   Assumption : always within prefix (i.e. not within extDict) */
 FORCE_INLINE
 U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
 {
     U32* const hashTable  = zc->hashTable;
-    const U32 hashLog = zc->params.cParams.hashLog;
+    const U32 hashLog = zc->appliedParams.cParams.hashLog;
     U32* const chainTable = zc->chainTable;
-    const U32 chainMask = (1 << zc->params.cParams.chainLog) - 1;
+    const U32 chainMask = (1 << zc->appliedParams.cParams.chainLog) - 1;
     const BYTE* const base = zc->base;
     const U32 target = (U32)(ip - base);
     U32 idx = zc->nextToUpdate;
@@ -1730,8 +2299,8 @@ U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
 }
 
 
-
-FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
+/* inlining is important to hardwire a hot branch (template emulation) */
+FORCE_INLINE
 size_t ZSTD_HcFindBestMatch_generic (
                         ZSTD_CCtx* zc,   /* Index table will be updated */
                         const BYTE* const ip, const BYTE* const iLimit,
@@ -1739,7 +2308,7 @@ size_t ZSTD_HcFindBestMatch_generic (
                         const U32 maxNbAttempts, const U32 mls, const U32 extDict)
 {
     U32* const chainTable = zc->chainTable;
-    const U32 chainSize = (1 << zc->params.cParams.chainLog);
+    const U32 chainSize = (1 << zc->appliedParams.cParams.chainLog);
     const U32 chainMask = chainSize-1;
     const BYTE* const base = zc->base;
     const BYTE* const dictBase = zc->dictBase;
@@ -1750,7 +2319,7 @@ size_t ZSTD_HcFindBestMatch_generic (
     const U32 current = (U32)(ip-base);
     const U32 minChain = current > chainSize ? current - chainSize : 0;
     int nbAttempts=maxNbAttempts;
-    size_t ml=EQUAL_READ32-1;
+    size_t ml=4-1;
 
     /* HC4 match finder */
     U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls);
@@ -1765,11 +2334,15 @@ size_t ZSTD_HcFindBestMatch_generic (
         } else {
             match = dictBase + matchIndex;
             if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
-                currentMl = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iLimit, dictEnd, prefixStart) + EQUAL_READ32;
+                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
         }
 
         /* save best solution */
-        if (currentMl > ml) { ml = currentMl; *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; if (ip+currentMl == iLimit) break; /* best possible, and avoid read overflow*/ }
+        if (currentMl > ml) {
+            ml = currentMl;
+            *offsetPtr = current - matchIndex + ZSTD_REP_MOVE;
+            if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+        }
 
         if (matchIndex <= minChain) break;
         matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
@@ -1787,9 +2360,10 @@ FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS (
 {
     switch(matchLengthSearch)
     {
-    default :
+    default : /* includes case 3 */
     case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0);
     case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0);
+    case 7 :
     case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0);
     }
 }
@@ -1803,9 +2377,10 @@ FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
 {
     switch(matchLengthSearch)
     {
-    default :
+    default : /* includes case 3 */
     case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1);
     case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1);
+    case 7 :
     case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1);
     }
 }
@@ -1827,8 +2402,8 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
     const BYTE* const ilimit = iend - 8;
     const BYTE* const base = ctx->base + ctx->dictLimit;
 
-    U32 const maxSearches = 1 << ctx->params.cParams.searchLog;
-    U32 const mls = ctx->params.cParams.searchLength;
+    U32 const maxSearches = 1 << ctx->appliedParams.cParams.searchLog;
+    U32 const mls = ctx->appliedParams.cParams.searchLength;
 
     typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
                         size_t* offsetPtr,
@@ -1853,7 +2428,7 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
         /* check repCode */
         if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) {
             /* repcode : we take it */
-            matchLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
+            matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
             if (depth==0) goto _storeSequence;
         }
 
@@ -1864,7 +2439,7 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
                 matchLength = ml2, start = ip, offset=offsetFound;
         }
 
-        if (matchLength < EQUAL_READ32) {
+        if (matchLength < 4) {
             ip += ((ip-anchor) >> g_searchStrength) + 1;   /* jump faster over incompressible sections */
             continue;
         }
@@ -1874,17 +2449,17 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
         while (ip<ilimit) {
             ip ++;
             if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
-                size_t const mlRep = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
+                size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
                 int const gain2 = (int)(mlRep * 3);
                 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
-                if ((mlRep >= EQUAL_READ32) && (gain2 > gain1))
+                if ((mlRep >= 4) && (gain2 > gain1))
                     matchLength = mlRep, offset = 0, start = ip;
             }
             {   size_t offset2=99999999;
                 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
                 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
                 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
-                if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
+                if ((ml2 >= 4) && (gain2 > gain1)) {
                     matchLength = ml2, offset = offset2, start = ip;
                     continue;   /* search a better one */
             }   }
@@ -1893,30 +2468,36 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
             if ((depth==2) && (ip<ilimit)) {
                 ip ++;
                 if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
-                    size_t const ml2 = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
+                    size_t const ml2 = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
                     int const gain2 = (int)(ml2 * 4);
                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
-                    if ((ml2 >= EQUAL_READ32) && (gain2 > gain1))
+                    if ((ml2 >= 4) && (gain2 > gain1))
                         matchLength = ml2, offset = 0, start = ip;
                 }
                 {   size_t offset2=99999999;
                     size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
                     int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
-                    if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
+                    if ((ml2 >= 4) && (gain2 > gain1)) {
                         matchLength = ml2, offset = offset2, start = ip;
                         continue;
             }   }   }
             break;  /* nothing found : store previous solution */
         }
 
+        /* NOTE:
+         * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
+         * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
+         * overflows the pointer, which is undefined behavior.
+         */
         /* catch up */
         if (offset) {
-            while ((start>anchor) && (start>base+offset-ZSTD_REP_MOVE) && (start[-1] == start[-1-offset+ZSTD_REP_MOVE]))   /* only search for offset within prefix */
+            while ( (start > anchor)
+                 && (start > base+offset-ZSTD_REP_MOVE)
+                 && (start[-1] == (start-offset+ZSTD_REP_MOVE)[-1]) )  /* only search for offset within prefix */
                 { start--; matchLength++; }
             offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
         }
-
         /* store sequence */
 _storeSequence:
         {   size_t const litLength = start - anchor;
@@ -1929,7 +2510,7 @@ _storeSequence:
              && ((offset_2>0)
              & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
             /* store sequence */
-            matchLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32;
+            matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
             offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
             ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
             ip += matchLength;
@@ -1938,8 +2519,8 @@ _storeSequence:
     }   }
 
     /* Save reps for next block */
-    ctx->savedRep[0] = offset_1 ? offset_1 : savedOffset;
-    ctx->savedRep[1] = offset_2 ? offset_2 : savedOffset;
+    ctx->repToConfirm[0] = offset_1 ? offset_1 : savedOffset;
+    ctx->repToConfirm[1] = offset_2 ? offset_2 : savedOffset;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -1989,8 +2570,8 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
     const BYTE* const dictEnd  = dictBase + dictLimit;
     const BYTE* const dictStart  = dictBase + ctx->lowLimit;
 
-    const U32 maxSearches = 1 << ctx->params.cParams.searchLog;
-    const U32 mls = ctx->params.cParams.searchLength;
+    const U32 maxSearches = 1 << ctx->appliedParams.cParams.searchLog;
+    const U32 mls = ctx->appliedParams.cParams.searchLength;
 
     typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
                         size_t* offsetPtr,
@@ -2018,7 +2599,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
             if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
                 /* repcode detected we should take it */
                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                matchLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
+                matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
                 if (depth==0) goto _storeSequence;
         }   }
 
@@ -2029,7 +2610,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
                 matchLength = ml2, start = ip, offset=offsetFound;
         }
 
-         if (matchLength < EQUAL_READ32) {
+         if (matchLength < 4) {
             ip += ((ip-anchor) >> g_searchStrength) + 1;   /* jump faster over incompressible sections */
             continue;
         }
@@ -2048,10 +2629,10 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
                 if (MEM_read32(ip) == MEM_read32(repMatch)) {
                     /* repcode detected */
                     const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                    size_t const repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
+                    size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
                     int const gain2 = (int)(repLength * 3);
                     int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
-                    if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
+                    if ((repLength >= 4) && (gain2 > gain1))
                         matchLength = repLength, offset = 0, start = ip;
             }   }
 
@@ -2060,7 +2641,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
                 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
                 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
                 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
-                if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
+                if ((ml2 >= 4) && (gain2 > gain1)) {
                     matchLength = ml2, offset = offset2, start = ip;
                     continue;   /* search a better one */
             }   }
@@ -2078,10 +2659,10 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
                     if (MEM_read32(ip) == MEM_read32(repMatch)) {
                         /* repcode detected */
                         const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                        size_t repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
-                        int gain2 = (int)(repLength * 4);
-                        int gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
-                        if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
+                        size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+                        int const gain2 = (int)(repLength * 4);
+                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
+                        if ((repLength >= 4) && (gain2 > gain1))
                             matchLength = repLength, offset = 0, start = ip;
                 }   }
 
@@ -2090,7 +2671,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
                     size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
                     int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
-                    if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
+                    if ((ml2 >= 4) && (gain2 > gain1)) {
                         matchLength = ml2, offset = offset2, start = ip;
                         continue;
             }   }   }
@@ -2122,7 +2703,7 @@ _storeSequence:
             if (MEM_read32(ip) == MEM_read32(repMatch)) {
                 /* repcode detected we should take it */
                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                matchLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
+                matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
                 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset history */
                 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
                 ip += matchLength;
@@ -2133,7 +2714,7 @@ _storeSequence:
     }   }
 
     /* Save reps for next block */
-    ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
+    ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -2170,7 +2751,17 @@ static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src,
 static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
 #ifdef ZSTD_OPT_H_91842398743
-    ZSTD_compressBlock_opt_generic(ctx, src, srcSize);
+    ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0);
+#else
+    (void)ctx; (void)src; (void)srcSize;
+    return;
+#endif
+}
+
+static void ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+{
+#ifdef ZSTD_OPT_H_91842398743
+    ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1);
 #else
     (void)ctx; (void)src; (void)srcSize;
     return;
@@ -2180,7 +2771,17 @@ static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t src
 static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
 #ifdef ZSTD_OPT_H_91842398743
-    ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize);
+    ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0);
+#else
+    (void)ctx; (void)src; (void)srcSize;
+    return;
+#endif
+}
+
+static void ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+{
+#ifdef ZSTD_OPT_H_91842398743
+    ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1);
 #else
     (void)ctx; (void)src; (void)srcSize;
     return;
@@ -2188,42 +2789,52 @@ static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, si
 }
 
 
+/* ZSTD_selectBlockCompressor() :
+ * assumption : strat is a valid strategy */
 typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize);
-
 static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
 {
-    static const ZSTD_blockCompressor blockCompressor[2][7] = {
-        { ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt },
-        { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict }
+    static const ZSTD_blockCompressor blockCompressor[2][(unsigned)ZSTD_btultra+1] = {
+        { ZSTD_compressBlock_fast  /* default for 0 */,
+          ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy,
+          ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2,
+          ZSTD_compressBlock_btopt, ZSTD_compressBlock_btultra },
+        { ZSTD_compressBlock_fast_extDict  /* default for 0 */,
+          ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict,
+          ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict,
+          ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btultra_extDict }
     };
+    ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
+    assert((U32)strat >= (U32)ZSTD_fast);
+    assert((U32)strat <= (U32)ZSTD_btultra);
 
-    return blockCompressor[extDict][(U32)strat];
+    return blockCompressor[extDict!=0][(U32)strat];
 }
 
 
 static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
-    ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit);
+    ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, zc->lowLimit < zc->dictLimit);
     const BYTE* const base = zc->base;
     const BYTE* const istart = (const BYTE*)src;
     const U32 current = (U32)(istart-base);
     if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0;   /* don't even attempt compression below a certain srcSize */
     ZSTD_resetSeqStore(&(zc->seqStore));
     if (current > zc->nextToUpdate + 384)
-        zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384));   /* update tree not updated after finding very long rep matches */
+        zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384));   /* limited update after finding a very long match */
     blockCompressor(zc, src, srcSize);
     return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize);
 }
 
 
-/*! ZSTD_compress_generic() :
+/*! ZSTD_compress_frameChunk() :
 *   Compress a chunk of data into one or multiple blocks.
 *   All blocks will be terminated, all input will be consumed.
 *   Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
 *   Frame is supposed already started (header already produced)
 *   @return : compressed size, or an error code
 */
-static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
+static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
                                      void* dst, size_t dstCapacity,
                                const void* src, size_t srcSize,
                                      U32 lastFrameChunk)
@@ -2233,29 +2844,30 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
     const BYTE* ip = (const BYTE*)src;
     BYTE* const ostart = (BYTE*)dst;
     BYTE* op = ostart;
-    U32 const maxDist = 1 << cctx->params.cParams.windowLog;
+    U32 const maxDist = 1 << cctx->appliedParams.cParams.windowLog;
 
-    if (cctx->params.fParams.checksumFlag)
+    if (cctx->appliedParams.fParams.checksumFlag && srcSize)
         XXH64_update(&cctx->xxhState, src, srcSize);
 
     while (remaining) {
         U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
         size_t cSize;
 
-        if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall);   /* not enough space to store compressed block */
+        if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE)
+            return ERROR(dstSize_tooSmall);   /* not enough space to store compressed block */
         if (remaining < blockSize) blockSize = remaining;
 
         /* preemptive overflow correction */
-        if (cctx->lowLimit > (1<<30)) {
-            U32 const btplus = (cctx->params.cParams.strategy == ZSTD_btlazy2) | (cctx->params.cParams.strategy == ZSTD_btopt);
-            U32 const chainMask = (1 << (cctx->params.cParams.chainLog - btplus)) - 1;
-            U32 const supLog = MAX(cctx->params.cParams.chainLog, 17 /* blockSize */);
-            U32 const newLowLimit = (cctx->lowLimit & chainMask) + (1 << supLog);   /* preserve position % chainSize, ensure current-repcode doesn't underflow */
-            U32 const correction = cctx->lowLimit - newLowLimit;
+        if (cctx->lowLimit > (3U<<29)) {
+            U32 const cycleMask = (1 << ZSTD_cycleLog(cctx->appliedParams.cParams.hashLog, cctx->appliedParams.cParams.strategy)) - 1;
+            U32 const current = (U32)(ip - cctx->base);
+            U32 const newCurrent = (current & cycleMask) + (1 << cctx->appliedParams.cParams.windowLog);
+            U32 const correction = current - newCurrent;
+            ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_64 <= 30);
             ZSTD_reduceIndex(cctx, correction);
             cctx->base += correction;
             cctx->dictBase += correction;
-            cctx->lowLimit = newLowLimit;
+            cctx->lowLimit -= correction;
             cctx->dictLimit -= correction;
             if (cctx->nextToUpdate < correction) cctx->nextToUpdate = 0;
             else cctx->nextToUpdate -= correction;
@@ -2297,25 +2909,27 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
 static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
                                     ZSTD_parameters params, U64 pledgedSrcSize, U32 dictID)
 {   BYTE* const op = (BYTE*)dst;
-    U32   const dictIDSizeCode = (dictID>0) + (dictID>=256) + (dictID>=65536);   /* 0-3 */
+    U32   const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536);   /* 0-3 */
+    U32   const dictIDSizeCode = params.fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength;   /* 0-3 */
     U32   const checksumFlag = params.fParams.checksumFlag>0;
     U32   const windowSize = 1U << params.cParams.windowLog;
-    U32   const singleSegment = params.fParams.contentSizeFlag && (windowSize > (pledgedSrcSize-1));
+    U32   const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
     BYTE  const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
     U32   const fcsCode = params.fParams.contentSizeFlag ?
-                     (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) :   /* 0-3 */
-                      0;
+                     (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0;  /* 0-3 */
     BYTE  const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
     size_t pos;
 
     if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall);
+    DEBUGLOG(5, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
+                !params.fParams.noDictIDFlag, dictID,  dictIDSizeCode);
 
     MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
     op[4] = frameHeaderDecriptionByte; pos=5;
     if (!singleSegment) op[pos++] = windowLogByte;
     switch(dictIDSizeCode)
     {
-        default:   /* impossible */
+        default:  assert(0); /* impossible */
         case 0 : break;
         case 1 : op[pos] = (BYTE)(dictID); pos++; break;
         case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;
@@ -2323,7 +2937,7 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
     }
     switch(fcsCode)
     {
-        default:   /* impossible */
+        default:  assert(0); /* impossible */
         case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;
         case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;
         case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
@@ -2341,10 +2955,13 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
     const BYTE* const ip = (const BYTE*) src;
     size_t fhSize = 0;
 
+    DEBUGLOG(5, "ZSTD_compressContinue_internal");
+    DEBUGLOG(5, "stage: %u", cctx->stage);
     if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong);   /* missing init (ZSTD_compressBegin) */
 
     if (frame && (cctx->stage==ZSTDcs_init)) {
-        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, cctx->frameContentSize, cctx->dictID);
+        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams,
+                                cctx->pledgedSrcSizePlusOne-1, cctx->dictID);
         if (ZSTD_isError(fhSize)) return fhSize;
         dstCapacity -= fhSize;
         dst = (char*)dst + fhSize;
@@ -2372,12 +2989,15 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
 
     cctx->nextSrc = ip + srcSize;
 
-    {   size_t const cSize = frame ?
-                             ZSTD_compress_generic (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
+    if (srcSize) {
+        size_t const cSize = frame ?
+                             ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
                              ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
         if (ZSTD_isError(cSize)) return cSize;
+        cctx->consumedSrcSize += srcSize;
         return cSize + fhSize;
-    }
+    } else
+        return fhSize;
 }
 
 
@@ -2385,23 +3005,29 @@ size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
                               void* dst, size_t dstCapacity,
                         const void* src, size_t srcSize)
 {
-    return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 0);
+    return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
 }
 
 
-size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx)
+size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
 {
-    return MIN (ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << cctx->params.cParams.windowLog);
+    U32 const cLevel = cctx->compressionLevel;
+    ZSTD_compressionParameters cParams = (cLevel == ZSTD_CLEVEL_CUSTOM) ?
+                                        cctx->appliedParams.cParams :
+                                        ZSTD_getCParams(cLevel, 0, 0);
+    return MIN (ZSTD_BLOCKSIZE_MAX, 1 << cParams.windowLog);
 }
 
 size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
-    size_t const blockSizeMax = ZSTD_getBlockSizeMax(cctx);
+    size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
     if (srcSize > blockSizeMax) return ERROR(srcSize_wrong);
-    return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0, 0);
+    return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
 }
 
-
+/*! ZSTD_loadDictionaryContent() :
+ *  @return : 0, or an error code
+ */
 static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t srcSize)
 {
     const BYTE* const ip = (const BYTE*) src;
@@ -2413,125 +3039,197 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t
     zc->dictBase = zc->base;
     zc->base += ip - zc->nextSrc;
     zc->nextToUpdate = zc->dictLimit;
-    zc->loadedDictEnd = (U32)(iend - zc->base);
+    zc->loadedDictEnd = zc->forceWindow ? 0 : (U32)(iend - zc->base);
 
     zc->nextSrc = iend;
     if (srcSize <= HASH_READ_SIZE) return 0;
 
-    switch(zc->params.cParams.strategy)
+    switch(zc->appliedParams.cParams.strategy)
     {
     case ZSTD_fast:
-        ZSTD_fillHashTable (zc, iend, zc->params.cParams.searchLength);
+        ZSTD_fillHashTable (zc, iend, zc->appliedParams.cParams.searchLength);
         break;
 
     case ZSTD_dfast:
-        ZSTD_fillDoubleHashTable (zc, iend, zc->params.cParams.searchLength);
+        ZSTD_fillDoubleHashTable (zc, iend, zc->appliedParams.cParams.searchLength);
         break;
 
     case ZSTD_greedy:
     case ZSTD_lazy:
     case ZSTD_lazy2:
-        ZSTD_insertAndFindFirstIndex (zc, iend-HASH_READ_SIZE, zc->params.cParams.searchLength);
+        if (srcSize >= HASH_READ_SIZE)
+            ZSTD_insertAndFindFirstIndex(zc, iend-HASH_READ_SIZE, zc->appliedParams.cParams.searchLength);
         break;
 
     case ZSTD_btlazy2:
     case ZSTD_btopt:
-        ZSTD_updateTree(zc, iend-HASH_READ_SIZE, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength);
+    case ZSTD_btultra:
+        if (srcSize >= HASH_READ_SIZE)
+            ZSTD_updateTree(zc, iend-HASH_READ_SIZE, iend, 1 << zc->appliedParams.cParams.searchLog, zc->appliedParams.cParams.searchLength);
         break;
 
     default:
-        return ERROR(GENERIC);   /* strategy doesn't exist; impossible */
+        assert(0);  /* not possible : not a valid strategy id */
     }
 
-    zc->nextToUpdate = zc->loadedDictEnd;
+    zc->nextToUpdate = (U32)(iend - zc->base);
+    return 0;
+}
+
+
+/* Dictionaries that assign zero probability to symbols that show up causes problems
+   when FSE encoding.  Refuse dictionaries that assign zero probability to symbols
+   that we may encounter during compression.
+   NOTE: This behavior is not standard and could be improved in the future. */
+static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) {
+    U32 s;
+    if (dictMaxSymbolValue < maxSymbolValue) return ERROR(dictionary_corrupted);
+    for (s = 0; s <= maxSymbolValue; ++s) {
+        if (normalizedCounter[s] == 0) return ERROR(dictionary_corrupted);
+    }
     return 0;
 }
 
 
 /* Dictionary format :
-     Magic == ZSTD_DICT_MAGIC (4 bytes)
-     HUF_writeCTable(256)
-     FSE_writeNCount(off)
-     FSE_writeNCount(ml)
-     FSE_writeNCount(ll)
-     RepOffsets
-     Dictionary content
-*/
-/*! ZSTD_loadDictEntropyStats() :
-    @return : size read from dictionary
-    note : magic number supposed already checked */
-static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
+ * See :
+ * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
+ */
+/*! ZSTD_loadZstdDictionary() :
+ * @return : 0, or an error code
+ *  assumptions : magic number supposed already checked
+ *                dictSize supposed > 8
+ */
+static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
 {
     const BYTE* dictPtr = (const BYTE*)dict;
     const BYTE* const dictEnd = dictPtr + dictSize;
+    short offcodeNCount[MaxOff+1];
+    unsigned offcodeMaxValue = MaxOff;
+    BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)];
 
-    {   size_t const hufHeaderSize = HUF_readCTable(cctx->hufTable, 255, dict, dictSize);
+    dictPtr += 4;   /* skip magic number */
+    cctx->dictID = cctx->appliedParams.fParams.noDictIDFlag ? 0 :  MEM_readLE32(dictPtr);
+    dictPtr += 4;
+
+    {   size_t const hufHeaderSize = HUF_readCTable(cctx->hufCTable, 255, dictPtr, dictEnd-dictPtr);
         if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
         dictPtr += hufHeaderSize;
     }
 
-    {   short offcodeNCount[MaxOff+1];
-        unsigned offcodeMaxValue = MaxOff, offcodeLog = OffFSELog;
+    {   unsigned offcodeLog;
         size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
-        CHECK_E (FSE_buildCTable(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog), dictionary_corrupted);
+        if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
+        /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
+        CHECK_E( FSE_buildCTable_wksp(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, scratchBuffer, sizeof(scratchBuffer)),
+                 dictionary_corrupted);
         dictPtr += offcodeHeaderSize;
     }
 
     {   short matchlengthNCount[MaxML+1];
-        unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog;
+        unsigned matchlengthMaxValue = MaxML, matchlengthLog;
         size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
-        CHECK_E (FSE_buildCTable(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog), dictionary_corrupted);
+        if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
+        /* Every match length code must have non-zero probability */
+        CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
+        CHECK_E( FSE_buildCTable_wksp(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, scratchBuffer, sizeof(scratchBuffer)),
+                 dictionary_corrupted);
         dictPtr += matchlengthHeaderSize;
     }
 
     {   short litlengthNCount[MaxLL+1];
-        unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog;
+        unsigned litlengthMaxValue = MaxLL, litlengthLog;
         size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
-        CHECK_E(FSE_buildCTable(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog), dictionary_corrupted);
+        if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
+        /* Every literal length code must have non-zero probability */
+        CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
+        CHECK_E( FSE_buildCTable_wksp(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, scratchBuffer, sizeof(scratchBuffer)),
+                 dictionary_corrupted);
         dictPtr += litlengthHeaderSize;
     }
 
     if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
-    cctx->rep[0] = MEM_readLE32(dictPtr+0); if (cctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
-    cctx->rep[1] = MEM_readLE32(dictPtr+4); if (cctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
-    cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
+    cctx->rep[0] = MEM_readLE32(dictPtr+0);
+    cctx->rep[1] = MEM_readLE32(dictPtr+4);
+    cctx->rep[2] = MEM_readLE32(dictPtr+8);
     dictPtr += 12;
 
-    cctx->flagStaticTables = 1;
-    return dictPtr - (const BYTE*)dict;
+    {   size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
+        U32 offcodeMax = MaxOff;
+        if (dictContentSize <= ((U32)-1) - 128 KB) {
+            U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
+            offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */
+        }
+        /* All offset values <= dictContentSize + 128 KB must be representable */
+        CHECK_F (ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)));
+        /* All repCodes must be <= dictContentSize and != 0*/
+        {   U32 u;
+            for (u=0; u<3; u++) {
+                if (cctx->rep[u] == 0) return ERROR(dictionary_corrupted);
+                if (cctx->rep[u] > dictContentSize) return ERROR(dictionary_corrupted);
+        }   }
+
+        cctx->fseCTables_ready = 1;
+        cctx->hufCTable_repeatMode = HUF_repeat_valid;
+        return ZSTD_loadDictionaryContent(cctx, dictPtr, dictContentSize);
+    }
 }
 
 /** ZSTD_compress_insertDictionary() :
 *   @return : 0, or an error code */
-static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, size_t dictSize)
+static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* cctx,
+                                       const void* dict, size_t dictSize,
+                                             ZSTD_dictMode_e dictMode)
 {
+    DEBUGLOG(5, "ZSTD_compress_insertDictionary");
     if ((dict==NULL) || (dictSize<=8)) return 0;
 
-    /* default : dict is pure content */
-    if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return ZSTD_loadDictionaryContent(zc, dict, dictSize);
-    zc->dictID = zc->params.fParams.noDictIDFlag ? 0 :  MEM_readLE32((const char*)dict+4);
+    /* dict restricted modes */
+    if (dictMode==ZSTD_dm_rawContent)
+        return ZSTD_loadDictionaryContent(cctx, dict, dictSize);
 
-    /* known magic number : dict is parsed for entropy stats and content */
-    {   size_t const loadError = ZSTD_loadDictEntropyStats(zc, (const char*)dict+8 /* skip dictHeader */, dictSize-8);
-        size_t const eSize = loadError + 8;
-        if (ZSTD_isError(loadError)) return loadError;
-        return ZSTD_loadDictionaryContent(zc, (const char*)dict+eSize, dictSize-eSize);
+    if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
+        if (dictMode == ZSTD_dm_auto) {
+            DEBUGLOG(5, "raw content dictionary detected");
+            return ZSTD_loadDictionaryContent(cctx, dict, dictSize);
+        }
+        if (dictMode == ZSTD_dm_fullDict)
+            return ERROR(dictionary_wrong);
+        assert(0);   /* impossible */
     }
+
+    /* dict as full zstd dictionary */
+    return ZSTD_loadZstdDictionary(cctx, dict, dictSize);
 }
 
-
 /*! ZSTD_compressBegin_internal() :
-*   @return : 0, or an error code */
+ * @return : 0, or an error code */
 static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
                              const void* dict, size_t dictSize,
-                                   ZSTD_parameters params, U64 pledgedSrcSize)
+                             ZSTD_dictMode_e dictMode,
+                             const ZSTD_CDict* cdict,
+                                   ZSTD_parameters params, U64 pledgedSrcSize,
+                                   ZSTD_buffered_policy_e zbuff)
 {
-    ZSTD_compResetPolicy_e const crp = dictSize ? ZSTDcrp_fullReset : ZSTDcrp_continue;
-    CHECK_F(ZSTD_resetCCtx_advanced(cctx, params, pledgedSrcSize, crp));
-    return ZSTD_compress_insertDictionary(cctx, dict, dictSize);
+    DEBUGLOG(4, "ZSTD_compressBegin_internal");
+    DEBUGLOG(4, "dict ? %s", dict ? "dict" : (cdict ? "cdict" : "none"));
+    DEBUGLOG(4, "dictMode : %u", (U32)dictMode);
+    /* params are supposed to be fully validated at this point */
+    assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
+
+    if (cdict && cdict->dictContentSize>0) {
+        return ZSTD_copyCCtx_internal(cctx, cdict->refContext,
+                                      params.fParams, pledgedSrcSize,
+                                      zbuff);
+    }
+
+    CHECK_F( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+                                     ZSTDcrp_continue, zbuff) );
+    return ZSTD_compress_insertDictionary(cctx, dict, dictSize, dictMode);
 }
 
 
@@ -2543,20 +3241,22 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
 {
     /* compression parameters verification and optimization */
     CHECK_F(ZSTD_checkCParams(params.cParams));
-    return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, pledgedSrcSize);
+    return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dm_auto, NULL,
+                                    params, pledgedSrcSize, ZSTDb_not_buffered);
 }
 
 
 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
 {
     ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
-    return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, 0);
+    return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dm_auto, NULL,
+                                       params, 0, ZSTDb_not_buffered);
 }
 
 
-size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel)
+size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
 {
-    return ZSTD_compressBegin_usingDict(zc, NULL, 0, compressionLevel);
+    return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
 }
 
 
@@ -2569,11 +3269,12 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
     BYTE* op = ostart;
     size_t fhSize = 0;
 
+    DEBUGLOG(5, "ZSTD_writeEpilogue");
     if (cctx->stage == ZSTDcs_created) return ERROR(stage_wrong);  /* init missing */
 
     /* special case : empty frame */
     if (cctx->stage == ZSTDcs_init) {
-        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, 0, 0);
+        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, 0, 0);
         if (ZSTD_isError(fhSize)) return fhSize;
         dstCapacity -= fhSize;
         op += fhSize;
@@ -2589,7 +3290,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
         dstCapacity -= ZSTD_blockHeaderSize;
     }
 
-    if (cctx->params.fParams.checksumFlag) {
+    if (cctx->appliedParams.fParams.checksumFlag) {
         U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
         if (dstCapacity<4) return ERROR(dstSize_tooSmall);
         MEM_writeLE32(op, checksum);
@@ -2606,10 +3307,19 @@ size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
                    const void* src, size_t srcSize)
 {
     size_t endResult;
-    size_t const cSize = ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 1);
+    size_t const cSize = ZSTD_compressContinue_internal(cctx,
+                                dst, dstCapacity, src, srcSize,
+                                1 /* frame mode */, 1 /* last chunk */);
     if (ZSTD_isError(cSize)) return cSize;
     endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
     if (ZSTD_isError(endResult)) return endResult;
+    if (cctx->appliedParams.fParams.contentSizeFlag) {  /* control src size */
+        DEBUGLOG(5, "end of frame : controlling src size");
+        if (cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1) {
+            DEBUGLOG(5, "error : pledgedSrcSize = %u, while realSrcSize = %u",
+                (U32)cctx->pledgedSrcSizePlusOne-1, (U32)cctx->consumedSrcSize);
+            return ERROR(srcSize_wrong);
+    }   }
     return cSize + endResult;
 }
 
@@ -2620,7 +3330,8 @@ static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
                          const void* dict,size_t dictSize,
                                ZSTD_parameters params)
 {
-    CHECK_F(ZSTD_compressBegin_internal(cctx, dict, dictSize, params, srcSize));
+    CHECK_F( ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dm_auto, NULL,
+                                         params, srcSize, ZSTDb_not_buffered) );
     return ZSTD_compressEnd(cctx, dst,  dstCapacity, src, srcSize);
 }
 
@@ -2634,9 +3345,10 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
     return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
 }
 
-size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel)
+size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize,
+                               const void* dict, size_t dictSize, int compressionLevel)
 {
-    ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, dictSize);
+    ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, dict ? dictSize : 0);
     params.fParams.contentSizeFlag = 1;
     return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
 }
@@ -2651,65 +3363,122 @@ size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcS
     size_t result;
     ZSTD_CCtx ctxBody;
     memset(&ctxBody, 0, sizeof(ctxBody));
-    memcpy(&ctxBody.customMem, &defaultCustomMem, sizeof(ZSTD_customMem));
+    ctxBody.customMem = ZSTD_defaultCMem;
     result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel);
-    ZSTD_free(ctxBody.workSpace, defaultCustomMem);  /* can't free ctxBody itself, as it's on stack; free only heap content */
+    ZSTD_free(ctxBody.workSpace, ZSTD_defaultCMem);  /* can't free ctxBody itself, as it's on stack; free only heap content */
     return result;
 }
 
 
 /* =====  Dictionary API  ===== */
 
-struct ZSTD_CDict_s {
-    void* dictContent;
-    size_t dictContentSize;
-    ZSTD_CCtx* refContext;
-};  /* typedef'd tp ZSTD_CDict within "zstd.h" */
+/*! ZSTD_estimateCDictSize_advanced() :
+ *  Estimate amount of memory that will be needed to create a dictionary with following arguments */
+size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, unsigned byReference)
+{
+    DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (U32)sizeof(ZSTD_CDict));
+    DEBUGLOG(5, "CCtx estimate : %u", (U32)ZSTD_estimateCCtxSize_advanced(cParams));
+    return sizeof(ZSTD_CDict) + ZSTD_estimateCCtxSize_advanced(cParams)
+           + (byReference ? 0 : dictSize);
+}
+
+size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel)
+{
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
+    return ZSTD_estimateCDictSize_advanced(dictSize, cParams, 0);
+}
 
 size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
 {
     if (cdict==NULL) return 0;   /* support sizeof on NULL */
-    return ZSTD_sizeof_CCtx(cdict->refContext) + cdict->dictContentSize;
+    DEBUGLOG(5, "sizeof(*cdict) : %u", (U32)sizeof(*cdict));
+    DEBUGLOG(5, "ZSTD_sizeof_CCtx : %u", (U32)ZSTD_sizeof_CCtx(cdict->refContext));
+    return ZSTD_sizeof_CCtx(cdict->refContext) + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict);
 }
 
-ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, ZSTD_parameters params, ZSTD_customMem customMem)
+static ZSTD_parameters ZSTD_makeParams(ZSTD_compressionParameters cParams, ZSTD_frameParameters fParams)
 {
-    if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
-    if (!customMem.customAlloc || !customMem.customFree) return NULL;
+    ZSTD_parameters params;
+    params.cParams = cParams;
+    params.fParams = fParams;
+    return params;
+}
 
-    {   ZSTD_CDict* const cdict = (ZSTD_CDict*) ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
-        void* const dictContent = ZSTD_malloc(dictSize, customMem);
+static size_t ZSTD_initCDict_internal(
+                    ZSTD_CDict* cdict,
+              const void* dictBuffer, size_t dictSize,
+                    unsigned byReference, ZSTD_dictMode_e dictMode,
+                    ZSTD_compressionParameters cParams)
+{
+    DEBUGLOG(5, "ZSTD_initCDict_internal, mode %u", (U32)dictMode);
+    if ((byReference) || (!dictBuffer) || (!dictSize)) {
+        cdict->dictBuffer = NULL;
+        cdict->dictContent = dictBuffer;
+    } else {
+        void* const internalBuffer = ZSTD_malloc(dictSize, cdict->refContext->customMem);
+        cdict->dictBuffer = internalBuffer;
+        cdict->dictContent = internalBuffer;
+        if (!internalBuffer) return ERROR(memory_allocation);
+        memcpy(internalBuffer, dictBuffer, dictSize);
+    }
+    cdict->dictContentSize = dictSize;
+
+    {   ZSTD_frameParameters const fParams = { 0 /* contentSizeFlag */,
+                    0 /* checksumFlag */, 0 /* noDictIDFlag */ };  /* dummy */
+        ZSTD_parameters const params = ZSTD_makeParams(cParams, fParams);
+        CHECK_F( ZSTD_compressBegin_internal(cdict->refContext,
+                                        cdict->dictContent, dictSize, dictMode,
+                                        NULL,
+                                        params, ZSTD_CONTENTSIZE_UNKNOWN,
+                                        ZSTDb_not_buffered) );
+    }
+
+    return 0;
+}
+
+ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
+                                      unsigned byReference, ZSTD_dictMode_e dictMode,
+                                      ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
+{
+    DEBUGLOG(5, "ZSTD_createCDict_advanced, mode %u", (U32)dictMode);
+    if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
+
+    {   ZSTD_CDict* const cdict = (ZSTD_CDict*)ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
         ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(customMem);
 
-        if (!dictContent || !cdict || !cctx) {
-            ZSTD_free(dictContent, customMem);
+        if (!cdict || !cctx) {
             ZSTD_free(cdict, customMem);
-            ZSTD_free(cctx, customMem);
+            ZSTD_freeCCtx(cctx);
+            return NULL;
+        }
+        cdict->refContext = cctx;
+
+        if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
+                                        dictBuffer, dictSize,
+                                        byReference, dictMode,
+                                        cParams) )) {
+            ZSTD_freeCDict(cdict);
             return NULL;
         }
 
-        memcpy(dictContent, dict, dictSize);
-        {   size_t const errorCode = ZSTD_compressBegin_advanced(cctx, dictContent, dictSize, params, 0);
-            if (ZSTD_isError(errorCode)) {
-                ZSTD_free(dictContent, customMem);
-                ZSTD_free(cdict, customMem);
-                ZSTD_free(cctx, customMem);
-                return NULL;
-        }   }
-
-        cdict->dictContent = dictContent;
-        cdict->dictContentSize = dictSize;
-        cdict->refContext = cctx;
         return cdict;
     }
 }
 
 ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
 {
-    ZSTD_customMem const allocator = { NULL, NULL, NULL };
-    ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize);
-    params.fParams.contentSizeFlag = 1;
-    return ZSTD_createCDict_advanced(dict, dictSize, params, allocator);
+    ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
+    return ZSTD_createCDict_advanced(dict, dictSize,
+                                     0 /* byReference */, ZSTD_dm_auto,
+                                     cParams, ZSTD_defaultCMem);
+}
+
+ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
+{
+    ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
+    return ZSTD_createCDict_advanced(dict, dictSize,
+                                     1 /* byReference */, ZSTD_dm_auto,
+                                     cParams, ZSTD_defaultCMem);
 }
 
 size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
@@ -2717,36 +3486,112 @@ size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
     if (cdict==NULL) return 0;   /* support free on NULL */
     {   ZSTD_customMem const cMem = cdict->refContext->customMem;
         ZSTD_freeCCtx(cdict->refContext);
-        ZSTD_free(cdict->dictContent, cMem);
+        ZSTD_free(cdict->dictBuffer, cMem);
         ZSTD_free(cdict, cMem);
         return 0;
     }
 }
 
-size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, U64 pledgedSrcSize)
+/*! ZSTD_initStaticCDict_advanced() :
+ *  Generate a digested dictionary in provided memory area.
+ *  workspace: The memory area to emplace the dictionary into.
+ *             Provided pointer must 8-bytes aligned.
+ *             It must outlive dictionary usage.
+ *  workspaceSize: Use ZSTD_estimateCDictSize()
+ *                 to determine how large workspace must be.
+ *  cParams : use ZSTD_getCParams() to transform a compression level
+ *            into its relevants cParams.
+ * @return : pointer to ZSTD_CDict*, or NULL if error (size too small)
+ *  Note : there is no corresponding "free" function.
+ *         Since workspace was allocated externally, it must be freed externally.
+ */
+ZSTD_CDict* ZSTD_initStaticCDict(void* workspace, size_t workspaceSize,
+                           const void* dict, size_t dictSize,
+                                 unsigned byReference, ZSTD_dictMode_e dictMode,
+                                 ZSTD_compressionParameters cParams)
 {
-    if (cdict->dictContentSize) CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize))
-    else CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, cdict->refContext->params, pledgedSrcSize));
-    return 0;
+    size_t const cctxSize = ZSTD_estimateCCtxSize_advanced(cParams);
+    size_t const neededSize = sizeof(ZSTD_CDict) + (byReference ? 0 : dictSize)
+                            + cctxSize;
+    ZSTD_CDict* const cdict = (ZSTD_CDict*) workspace;
+    void* ptr;
+    DEBUGLOG(5, "(size_t)workspace & 7 : %u", (U32)(size_t)workspace & 7);
+    if ((size_t)workspace & 7) return NULL;  /* 8-aligned */
+    DEBUGLOG(5, "(workspaceSize < neededSize) : (%u < %u) => %u",
+        (U32)workspaceSize, (U32)neededSize, (U32)(workspaceSize < neededSize));
+    if (workspaceSize < neededSize) return NULL;
+
+    if (!byReference) {
+        memcpy(cdict+1, dict, dictSize);
+        dict = cdict+1;
+        ptr = (char*)workspace + sizeof(ZSTD_CDict) + dictSize;
+    } else {
+        ptr = cdict+1;
+    }
+    cdict->refContext = ZSTD_initStaticCCtx(ptr, cctxSize);
+
+    if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
+                                              dict, dictSize,
+                                              1 /* byReference */, dictMode,
+                                              cParams) ))
+        return NULL;
+
+    return cdict;
+}
+
+ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict* cdict) {
+    return ZSTD_getParamsFromCCtx(cdict->refContext);
+}
+
+/* ZSTD_compressBegin_usingCDict_advanced() :
+ * cdict must be != NULL */
+size_t ZSTD_compressBegin_usingCDict_advanced(
+    ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
+    ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
+{
+    if (cdict==NULL) return ERROR(dictionary_wrong);
+    {   ZSTD_parameters params = cdict->refContext->appliedParams;
+        params.fParams = fParams;
+        DEBUGLOG(5, "ZSTD_compressBegin_usingCDict_advanced");
+        return ZSTD_compressBegin_internal(cctx,
+                                           NULL, 0, ZSTD_dm_auto,
+                                           cdict,
+                                           params, pledgedSrcSize,
+                                           ZSTDb_not_buffered);
+    }
+}
+
+/* ZSTD_compressBegin_usingCDict() :
+ * pledgedSrcSize=0 means "unknown"
+ * if pledgedSrcSize>0, it will enable contentSizeFlag */
+size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
+{
+    ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
+    DEBUGLOG(5, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag);
+    return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, 0);
+}
+
+size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+                                void* dst, size_t dstCapacity,
+                                const void* src, size_t srcSize,
+                                const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
+{
+    CHECK_F (ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize));   /* will check if cdict != NULL */
+    return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
 }
 
 /*! ZSTD_compress_usingCDict() :
-*   Compression using a digested Dictionary.
-*   Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
-*   Note that compression level is decided during dictionary creation */
+ *  Compression using a digested Dictionary.
+ *  Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
+ *  Note that compression parameters are decided at CDict creation time
+ *  while frame parameters are hardcoded */
 size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
                                 void* dst, size_t dstCapacity,
                                 const void* src, size_t srcSize,
                                 const ZSTD_CDict* cdict)
 {
-    CHECK_F(ZSTD_compressBegin_usingCDict(cctx, cdict, srcSize));
-
-    if (cdict->refContext->params.fParams.contentSizeFlag==1) {
-        cctx->params.fParams.contentSizeFlag = 1;
-        cctx->frameContentSize = srcSize;
-    }
-
-    return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
+    ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
+    return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
 }
 
 
@@ -2755,212 +3600,314 @@ size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
 *  Streaming
 ********************************************************************/
 
-typedef enum { zcss_init, zcss_load, zcss_flush, zcss_final } ZSTD_cStreamStage;
-
-struct ZSTD_CStream_s {
-    ZSTD_CCtx* cctx;
-    ZSTD_CDict* cdict;
-    char*  inBuff;
-    size_t inBuffSize;
-    size_t inToCompress;
-    size_t inBuffPos;
-    size_t inBuffTarget;
-    size_t blockSize;
-    char*  outBuff;
-    size_t outBuffSize;
-    size_t outBuffContentSize;
-    size_t outBuffFlushedSize;
-    ZSTD_cStreamStage stage;
-    U32    checksum;
-    U32    frameEnded;
-    ZSTD_customMem customMem;
-};   /* typedef'd to ZSTD_CStream within "zstd.h" */
-
 ZSTD_CStream* ZSTD_createCStream(void)
 {
-    return ZSTD_createCStream_advanced(defaultCustomMem);
+    return ZSTD_createCStream_advanced(ZSTD_defaultCMem);
+}
+
+ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize)
+{
+    return ZSTD_initStaticCCtx(workspace, workspaceSize);
 }
 
 ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem)
-{
-    ZSTD_CStream* zcs;
-
-    if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
-    if (!customMem.customAlloc || !customMem.customFree) return NULL;
-
-    zcs = (ZSTD_CStream*)ZSTD_malloc(sizeof(ZSTD_CStream), customMem);
-    if (zcs==NULL) return NULL;
-    memset(zcs, 0, sizeof(ZSTD_CStream));
-    memcpy(&zcs->customMem, &customMem, sizeof(ZSTD_customMem));
-    zcs->cctx = ZSTD_createCCtx_advanced(customMem);
-    if (zcs->cctx == NULL) { ZSTD_freeCStream(zcs); return NULL; }
-    return zcs;
+{   /* CStream and CCtx are now same object */
+    return ZSTD_createCCtx_advanced(customMem);
 }
 
 size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
 {
-    if (zcs==NULL) return 0;   /* support free on NULL */
-    {   ZSTD_customMem const cMem = zcs->customMem;
-        ZSTD_freeCCtx(zcs->cctx);
-        ZSTD_freeCDict(zcs->cdict);
-        ZSTD_free(zcs->inBuff, cMem);
-        ZSTD_free(zcs->outBuff, cMem);
-        ZSTD_free(zcs, cMem);
-        return 0;
-    }
+    return ZSTD_freeCCtx(zcs);   /* same object */
 }
 
 
+
 /*======   Initialization   ======*/
 
-size_t ZSTD_CStreamInSize(void)  { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
-size_t ZSTD_CStreamOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; }
+size_t ZSTD_CStreamInSize(void)  { return ZSTD_BLOCKSIZE_MAX; }
 
-size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
+size_t ZSTD_CStreamOutSize(void)
 {
-    CHECK_F(ZSTD_compressBegin_usingCDict(zcs->cctx, zcs->cdict, pledgedSrcSize));
+    return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;
+}
+
+static size_t ZSTD_resetCStream_internal(ZSTD_CStream* zcs,
+                    const void* dict, size_t dictSize, ZSTD_dictMode_e dictMode,
+                    const ZSTD_CDict* cdict,
+                    ZSTD_parameters params, unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTD_resetCStream_internal");
+    /* params are supposed to be fully validated at this point */
+    assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
+
+    CHECK_F( ZSTD_compressBegin_internal(zcs,
+                                        dict, dictSize, dictMode,
+                                        cdict,
+                                        params, pledgedSrcSize,
+                                        ZSTDb_buffered) );
 
     zcs->inToCompress = 0;
     zcs->inBuffPos = 0;
     zcs->inBuffTarget = zcs->blockSize;
     zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
-    zcs->stage = zcss_load;
+    zcs->streamStage = zcss_load;
     zcs->frameEnded = 0;
     return 0;   /* ready to go */
 }
 
+size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
+{
+    ZSTD_parameters params = zcs->requestedParams;
+    params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
+    DEBUGLOG(5, "ZSTD_resetCStream");
+    if (zcs->compressionLevel != ZSTD_CLEVEL_CUSTOM) {
+        params.cParams = ZSTD_getCParams(zcs->compressionLevel, pledgedSrcSize, 0 /* dictSize */);
+    }
+    return ZSTD_resetCStream_internal(zcs, NULL, 0, zcs->dictMode, zcs->cdict, params, pledgedSrcSize);
+}
+
+/*! ZSTD_initCStream_internal() :
+ *  Note : not static, but hidden (not exposed). Used by zstdmt_compress.c
+ *  Assumption 1 : params are valid
+ *  Assumption 2 : either dict, or cdict, is defined, not both */
+size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
+                    const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
+                    ZSTD_parameters params, unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(5, "ZSTD_initCStream_internal");
+    assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
+
+    if (dict && dictSize >= 8) {
+        DEBUGLOG(5, "loading dictionary of size %u", (U32)dictSize);
+        if (zcs->staticSize) {   /* static CCtx : never uses malloc */
+            /* incompatible with internal cdict creation */
+            return ERROR(memory_allocation);
+        }
+        ZSTD_freeCDict(zcs->cdictLocal);
+        zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
+                                            zcs->dictContentByRef, zcs->dictMode,
+                                            params.cParams, zcs->customMem);
+        zcs->cdict = zcs->cdictLocal;
+        if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
+    } else {
+        if (cdict) {
+            ZSTD_parameters const cdictParams = ZSTD_getParamsFromCDict(cdict);
+            params.cParams = cdictParams.cParams;  /* cParams are enforced from cdict */
+        }
+        ZSTD_freeCDict(zcs->cdictLocal);
+        zcs->cdictLocal = NULL;
+        zcs->cdict = cdict;
+    }
+
+    zcs->requestedParams = params;
+    zcs->compressionLevel = ZSTD_CLEVEL_CUSTOM;
+    return ZSTD_resetCStream_internal(zcs, NULL, 0, zcs->dictMode, zcs->cdict, params, pledgedSrcSize);
+}
+
+/* ZSTD_initCStream_usingCDict_advanced() :
+ * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */
+size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+                                            const ZSTD_CDict* cdict,
+                                            ZSTD_frameParameters fParams,
+                                            unsigned long long pledgedSrcSize)
+{   /* cannot handle NULL cdict (does not know what to do) */
+    if (!cdict) return ERROR(dictionary_wrong);
+    {   ZSTD_parameters params = ZSTD_getParamsFromCDict(cdict);
+        params.fParams = fParams;
+        return ZSTD_initCStream_internal(zcs,
+                                NULL, 0, cdict,
+                                params, pledgedSrcSize);
+    }
+}
+
+/* note : cdict must outlive compression session */
+size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
+{
+    ZSTD_frameParameters const fParams = { 0 /* contentSize */, 0 /* checksum */, 0 /* hideDictID */ };
+    return ZSTD_initCStream_usingCDict_advanced(zcs, cdict, fParams, 0);  /* note : will check that cdict != NULL */
+}
+
 size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
                                  const void* dict, size_t dictSize,
                                  ZSTD_parameters params, unsigned long long pledgedSrcSize)
 {
-    /* allocate buffers */
-    {   size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog;
-        if (zcs->inBuffSize < neededInBuffSize) {
-            zcs->inBuffSize = neededInBuffSize;
-            ZSTD_free(zcs->inBuff, zcs->customMem);
-            zcs->inBuff = (char*) ZSTD_malloc(neededInBuffSize, zcs->customMem);
-            if (zcs->inBuff == NULL) return ERROR(memory_allocation);
-        }
-        zcs->blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, neededInBuffSize);
-    }
-    if (zcs->outBuffSize < ZSTD_compressBound(zcs->blockSize)+1) {
-        zcs->outBuffSize = ZSTD_compressBound(zcs->blockSize)+1;
-        ZSTD_free(zcs->outBuff, zcs->customMem);
-        zcs->outBuff = (char*) ZSTD_malloc(zcs->outBuffSize, zcs->customMem);
-        if (zcs->outBuff == NULL) return ERROR(memory_allocation);
-    }
-
-    ZSTD_freeCDict(zcs->cdict);
-    zcs->cdict = ZSTD_createCDict_advanced(dict, dictSize, params, zcs->customMem);
-    if (zcs->cdict == NULL) return ERROR(memory_allocation);
-
-    zcs->checksum = params.fParams.checksumFlag > 0;
-
-    return ZSTD_resetCStream(zcs, pledgedSrcSize);
+    CHECK_F( ZSTD_checkCParams(params.cParams) );
+    zcs->requestedParams = params;
+    zcs->compressionLevel = ZSTD_CLEVEL_CUSTOM;
+    return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL, params, pledgedSrcSize);
 }
 
 size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
 {
     ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
-    return ZSTD_initCStream_advanced(zcs, dict, dictSize, params, 0);
+    zcs->compressionLevel = compressionLevel;
+    return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL, params, 0);
+}
+
+size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize)
+{
+    ZSTD_parameters params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0);
+    params.fParams.contentSizeFlag = (pledgedSrcSize>0);
+    return ZSTD_initCStream_internal(zcs, NULL, 0, NULL, params, pledgedSrcSize);
 }
 
 size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
 {
-    return ZSTD_initCStream_usingDict(zcs, NULL, 0, compressionLevel);
-}
-
-size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
-{
-    if (zcs==NULL) return 0;   /* support sizeof on NULL */
-    return sizeof(zcs) + ZSTD_sizeof_CCtx(zcs->cctx) + ZSTD_sizeof_CDict(zcs->cdict) + zcs->outBuffSize + zcs->inBuffSize;
+    ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0);
+    return ZSTD_initCStream_internal(zcs, NULL, 0, NULL, params, 0);
 }
 
 /*======   Compression   ======*/
 
-typedef enum { zsf_gather, zsf_flush, zsf_end } ZSTD_flush_e;
-
-MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity,
+                           const void* src, size_t srcSize)
 {
     size_t const length = MIN(dstCapacity, srcSize);
-    memcpy(dst, src, length);
+    if (length) memcpy(dst, src, length);
     return length;
 }
 
-static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
-                              void* dst, size_t* dstCapacityPtr,
-                        const void* src, size_t* srcSizePtr,
-                              ZSTD_flush_e const flush)
+/** ZSTD_compressStream_generic():
+ *  internal function for all *compressStream*() variants and *compress_generic()
+ * @return : hint size for next input */
+size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
+                                   ZSTD_outBuffer* output,
+                                   ZSTD_inBuffer* input,
+                                   ZSTD_EndDirective const flushMode)
 {
+    const char* const istart = (const char*)input->src;
+    const char* const iend = istart + input->size;
+    const char* ip = istart + input->pos;
+    char* const ostart = (char*)output->dst;
+    char* const oend = ostart + output->size;
+    char* op = ostart + output->pos;
     U32 someMoreWork = 1;
-    const char* const istart = (const char*)src;
-    const char* const iend = istart + *srcSizePtr;
-    const char* ip = istart;
-    char* const ostart = (char*)dst;
-    char* const oend = ostart + *dstCapacityPtr;
-    char* op = ostart;
+
+    /* check expectations */
+    DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (U32)flushMode);
+    assert(zcs->inBuff != NULL);
+    assert(zcs->inBuffSize>0);
+    assert(zcs->outBuff!= NULL);
+    assert(zcs->outBuffSize>0);
+    assert(output->pos <= output->size);
+    assert(input->pos <= input->size);
 
     while (someMoreWork) {
-        switch(zcs->stage)
+        switch(zcs->streamStage)
         {
-        case zcss_init: return ERROR(init_missing);   /* call ZBUFF_compressInit() first ! */
+        case zcss_init:
+            /* call ZSTD_initCStream() first ! */
+            return ERROR(init_missing);
 
         case zcss_load:
-            /* complete inBuffer */
+            if ( (flushMode == ZSTD_e_end)
+              && ((size_t)(oend-op) >= ZSTD_compressBound(iend-ip))  /* enough dstCapacity */
+              && (zcs->inBuffPos == 0) ) {
+                /* shortcut to compression pass directly into output buffer */
+                size_t const cSize = ZSTD_compressEnd(zcs,
+                                                op, oend-op, ip, iend-ip);
+                DEBUGLOG(4, "ZSTD_compressEnd : %u", (U32)cSize);
+                if (ZSTD_isError(cSize)) return cSize;
+                ip = iend;
+                op += cSize;
+                zcs->frameEnded = 1;
+                ZSTD_startNewCompression(zcs);
+                someMoreWork = 0; break;
+            }
+            /* complete loading into inBuffer */
             {   size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
-                size_t const loaded = ZSTD_limitCopy(zcs->inBuff + zcs->inBuffPos, toLoad, ip, iend-ip);
+                size_t const loaded = ZSTD_limitCopy(
+                                        zcs->inBuff + zcs->inBuffPos, toLoad,
+                                        ip, iend-ip);
                 zcs->inBuffPos += loaded;
                 ip += loaded;
-                if ( (zcs->inBuffPos==zcs->inToCompress) || (!flush && (toLoad != loaded)) ) {
-                    someMoreWork = 0; break;  /* not enough input to get a full block : stop there, wait for more */
-            }   }
+                if ( (flushMode == ZSTD_e_continue)
+                  && (zcs->inBuffPos < zcs->inBuffTarget) ) {
+                    /* not enough input to fill full block : stop here */
+                    someMoreWork = 0; break;
+                }
+                if ( (flushMode == ZSTD_e_flush)
+                  && (zcs->inBuffPos == zcs->inToCompress) ) {
+                    /* empty */
+                    someMoreWork = 0; break;
+                }
+            }
             /* compress current block (note : this stage cannot be stopped in the middle) */
+            DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode);
             {   void* cDst;
                 size_t cSize;
                 size_t const iSize = zcs->inBuffPos - zcs->inToCompress;
                 size_t oSize = oend-op;
+                unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);
                 if (oSize >= ZSTD_compressBound(iSize))
-                    cDst = op;   /* compress directly into output buffer (avoid flush stage) */
+                    cDst = op;   /* compress into output buffer, to skip flush stage */
                 else
                     cDst = zcs->outBuff, oSize = zcs->outBuffSize;
-                cSize = (flush == zsf_end) ?
-                        ZSTD_compressEnd(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize) :
-                        ZSTD_compressContinue(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize);
+                cSize = lastBlock ?
+                        ZSTD_compressEnd(zcs, cDst, oSize,
+                                    zcs->inBuff + zcs->inToCompress, iSize) :
+                        ZSTD_compressContinue(zcs, cDst, oSize,
+                                    zcs->inBuff + zcs->inToCompress, iSize);
                 if (ZSTD_isError(cSize)) return cSize;
-                if (flush == zsf_end) zcs->frameEnded = 1;
+                zcs->frameEnded = lastBlock;
                 /* prepare next block */
                 zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
                 if (zcs->inBuffTarget > zcs->inBuffSize)
-                    zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;   /* note : inBuffSize >= blockSize */
+                    zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;
+                DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",
+                         (U32)zcs->inBuffTarget, (U32)zcs->inBuffSize);
+                if (!lastBlock)
+                    assert(zcs->inBuffTarget <= zcs->inBuffSize);
                 zcs->inToCompress = zcs->inBuffPos;
-                if (cDst == op) { op += cSize; break; }   /* no need to flush */
+                if (cDst == op) {  /* no need to flush */
+                    op += cSize;
+                    if (zcs->frameEnded) {
+                        DEBUGLOG(5, "Frame completed directly in outBuffer");
+                        someMoreWork = 0;
+                        ZSTD_startNewCompression(zcs);
+                    }
+                    break;
+                }
                 zcs->outBuffContentSize = cSize;
                 zcs->outBuffFlushedSize = 0;
-                zcs->stage = zcss_flush;   /* pass-through to flush stage */
+                zcs->streamStage = zcss_flush; /* pass-through to flush stage */
             }
-
+	    /* fall-through */
         case zcss_flush:
+            DEBUGLOG(5, "flush stage");
             {   size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
-                size_t const flushed = ZSTD_limitCopy(op, oend-op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
+                size_t const flushed = ZSTD_limitCopy(op, oend-op,
+                            zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
+                DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
+                            (U32)toFlush, (U32)(oend-op), (U32)flushed);
                 op += flushed;
                 zcs->outBuffFlushedSize += flushed;
-                if (toFlush!=flushed) { someMoreWork = 0; break; }  /* dst too small to store flushed data : stop there */
+                if (toFlush!=flushed) {
+                    /* flush not fully completed, presumably because dst is too small */
+                    assert(op==oend);
+                    someMoreWork = 0;
+                    break;
+                }
                 zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
-                zcs->stage = zcss_load;
+                if (zcs->frameEnded) {
+                    DEBUGLOG(5, "Frame completed on flush");
+                    someMoreWork = 0;
+                    ZSTD_startNewCompression(zcs);
+                    break;
+                }
+                zcs->streamStage = zcss_load;
                 break;
             }
 
-        case zcss_final:
-            someMoreWork = 0;   /* do nothing */
-            break;
-
-        default:
-            return ERROR(GENERIC);   /* impossible */
+        default: /* impossible */
+            assert(0);
         }
     }
 
-    *srcSizePtr = ip - istart;
-    *dstCapacityPtr = op - ostart;
+    input->pos = ip - istart;
+    output->pos = op - ostart;
     if (zcs->frameEnded) return 0;
     {   size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos;
         if (hintInSize==0) hintInSize = zcs->blockSize;
@@ -2970,14 +3917,86 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
 
 size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
 {
-    size_t sizeRead = input->size - input->pos;
-    size_t sizeWritten = output->size - output->pos;
-    size_t const result = ZSTD_compressStream_generic(zcs,
-                                                      (char*)(output->dst) + output->pos, &sizeWritten,
-                                                      (const char*)(input->src) + input->pos, &sizeRead, zsf_gather);
-    input->pos += sizeRead;
-    output->pos += sizeWritten;
-    return result;
+    /* check conditions */
+    if (output->pos > output->size) return ERROR(GENERIC);
+    if (input->pos  > input->size)  return ERROR(GENERIC);
+
+    return ZSTD_compressStream_generic(zcs, output, input, ZSTD_e_continue);
+}
+
+/*! ZSTDMT_initCStream_internal() :
+ *  Private use only. Init streaming operation.
+ *  expects params to be valid.
+ *  must receive dict, or cdict, or none, but not both.
+ *  @return : 0, or an error code */
+size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
+                    const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
+                    ZSTD_parameters params, unsigned long long pledgedSrcSize);
+
+
+size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
+                              ZSTD_outBuffer* output,
+                              ZSTD_inBuffer* input,
+                              ZSTD_EndDirective endOp)
+{
+    /* check conditions */
+    if (output->pos > output->size) return ERROR(GENERIC);
+    if (input->pos  > input->size)  return ERROR(GENERIC);
+    assert(cctx!=NULL);
+
+    /* transparent initialization stage */
+    if (cctx->streamStage == zcss_init) {
+        const void* const prefix = cctx->prefix;
+        size_t const prefixSize = cctx->prefixSize;
+        ZSTD_parameters params = cctx->requestedParams;
+        if (cctx->compressionLevel != ZSTD_CLEVEL_CUSTOM)
+            params.cParams = ZSTD_getCParams(cctx->compressionLevel,
+                                cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/);
+        cctx->prefix = NULL; cctx->prefixSize = 0;   /* single usage */
+        assert(prefix==NULL || cctx->cdict==NULL);   /* only one can be set */
+
+#ifdef ZSTD_MULTITHREAD
+        if (cctx->nbThreads > 1) {
+            DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbThreads=%u", cctx->nbThreads);
+            CHECK_F( ZSTDMT_initCStream_internal(cctx->mtctx, prefix, prefixSize, cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) );
+            cctx->streamStage = zcss_load;
+        } else
+#endif
+        {
+            CHECK_F( ZSTD_resetCStream_internal(cctx, prefix, prefixSize, cctx->dictMode, cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) );
+    }   }
+
+    /* compression stage */
+#ifdef ZSTD_MULTITHREAD
+    if (cctx->nbThreads > 1) {
+        size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
+        DEBUGLOG(5, "ZSTDMT_compressStream_generic : %u", (U32)flushMin);
+        if ( ZSTD_isError(flushMin)
+          || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */
+            ZSTD_startNewCompression(cctx);
+        }
+        return flushMin;
+    }
+#endif
+
+    CHECK_F( ZSTD_compressStream_generic(cctx, output, input, endOp) );
+    DEBUGLOG(5, "completed ZSTD_compress_generic");
+    return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */
+}
+
+size_t ZSTD_compress_generic_simpleArgs (
+                            ZSTD_CCtx* cctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos,
+                            ZSTD_EndDirective endOp)
+{
+    ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
+    ZSTD_inBuffer  input  = { src, srcSize, *srcPos };
+    /* ZSTD_compress_generic() will check validity of dstPos and srcPos */
+    size_t const cErr = ZSTD_compress_generic(cctx, &output, &input, endOp);
+    *dstPos = output.pos;
+    *srcPos = input.pos;
+    return cErr;
 }
 
 
@@ -2987,86 +4006,59 @@ size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuf
 *   @return : amount of data remaining to flush */
 size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
 {
-    size_t srcSize = 0;
-    size_t sizeWritten = output->size - output->pos;
-    size_t const result = ZSTD_compressStream_generic(zcs,
-                                                     (char*)(output->dst) + output->pos, &sizeWritten,
-                                                     &srcSize, &srcSize, /* use a valid src address instead of NULL */
-                                                      zsf_flush);
-    output->pos += sizeWritten;
-    if (ZSTD_isError(result)) return result;
-    return zcs->outBuffContentSize - zcs->outBuffFlushedSize;   /* remaining to flush */
+    ZSTD_inBuffer input = { NULL, 0, 0 };
+    if (output->pos > output->size) return ERROR(GENERIC);
+    CHECK_F( ZSTD_compressStream_generic(zcs, output, &input, ZSTD_e_flush) );
+    return zcs->outBuffContentSize - zcs->outBuffFlushedSize;  /* remaining to flush */
 }
 
 
 size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
 {
-    BYTE* const ostart = (BYTE*)(output->dst) + output->pos;
-    BYTE* const oend = (BYTE*)(output->dst) + output->size;
-    BYTE* op = ostart;
-
-    if (zcs->stage != zcss_final) {
-        /* flush whatever remains */
-        size_t srcSize = 0;
-        size_t sizeWritten = output->size - output->pos;
-        size_t const notEnded = ZSTD_compressStream_generic(zcs, ostart, &sizeWritten, &srcSize, &srcSize, zsf_end);  /* use a valid src address instead of NULL */
-        size_t const remainingToFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
-        op += sizeWritten;
-        if (remainingToFlush) {
-            output->pos += sizeWritten;
-            return remainingToFlush + ZSTD_BLOCKHEADERSIZE /* final empty block */ + (zcs->checksum * 4);
-        }
-        /* create epilogue */
-        zcs->stage = zcss_final;
-        zcs->outBuffContentSize = !notEnded ? 0 :
-            ZSTD_compressEnd(zcs->cctx, zcs->outBuff, zcs->outBuffSize, NULL, 0);  /* write epilogue, including final empty block, into outBuff */
-    }
-
-    /* flush epilogue */
-    {   size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
-        size_t const flushed = ZSTD_limitCopy(op, oend-op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
-        op += flushed;
-        zcs->outBuffFlushedSize += flushed;
-        output->pos += op-ostart;
-        if (toFlush==flushed) zcs->stage = zcss_init;  /* end reached */
-        return toFlush - flushed;
+    ZSTD_inBuffer input = { NULL, 0, 0 };
+    if (output->pos > output->size) return ERROR(GENERIC);
+    CHECK_F( ZSTD_compressStream_generic(zcs, output, &input, ZSTD_e_end) );
+    {   size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
+        size_t const checksumSize = zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4;
+        size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize + lastBlockSize + checksumSize;
+        DEBUGLOG(5, "ZSTD_endStream : remaining to flush : %u",
+                (unsigned)toFlush);
+        return toFlush;
     }
 }
 
 
-
 /*-=====  Pre-defined compression levels  =====-*/
 
-#define ZSTD_DEFAULT_CLEVEL 1
 #define ZSTD_MAX_CLEVEL     22
 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
 
 static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
-{   /* "default" */
+{   /* "default" - guarantees a monotonically increasing memory budget */
     /* W,  C,  H,  S,  L, TL, strat */
     { 18, 12, 12,  1,  7, 16, ZSTD_fast    },  /* level  0 - never used */
     { 19, 13, 14,  1,  7, 16, ZSTD_fast    },  /* level  1 */
     { 19, 15, 16,  1,  6, 16, ZSTD_fast    },  /* level  2 */
-    { 20, 16, 17,  1,  5, 16, ZSTD_dfast   },  /* level  3.*/
-    { 20, 18, 18,  1,  5, 16, ZSTD_dfast   },  /* level  4.*/
-    { 20, 15, 18,  3,  5, 16, ZSTD_greedy  },  /* level  5 */
-    { 21, 16, 19,  2,  5, 16, ZSTD_lazy    },  /* level  6 */
-    { 21, 17, 20,  3,  5, 16, ZSTD_lazy    },  /* level  7 */
+    { 20, 16, 17,  1,  5, 16, ZSTD_dfast   },  /* level  3 */
+    { 20, 17, 18,  1,  5, 16, ZSTD_dfast   },  /* level  4 */
+    { 20, 17, 18,  2,  5, 16, ZSTD_greedy  },  /* level  5 */
+    { 21, 17, 19,  2,  5, 16, ZSTD_lazy    },  /* level  6 */
+    { 21, 18, 19,  3,  5, 16, ZSTD_lazy    },  /* level  7 */
     { 21, 18, 20,  3,  5, 16, ZSTD_lazy2   },  /* level  8 */
-    { 21, 20, 20,  3,  5, 16, ZSTD_lazy2   },  /* level  9 */
+    { 21, 19, 20,  3,  5, 16, ZSTD_lazy2   },  /* level  9 */
     { 21, 19, 21,  4,  5, 16, ZSTD_lazy2   },  /* level 10 */
     { 22, 20, 22,  4,  5, 16, ZSTD_lazy2   },  /* level 11 */
     { 22, 20, 22,  5,  5, 16, ZSTD_lazy2   },  /* level 12 */
     { 22, 21, 22,  5,  5, 16, ZSTD_lazy2   },  /* level 13 */
     { 22, 21, 22,  6,  5, 16, ZSTD_lazy2   },  /* level 14 */
-    { 22, 21, 21,  5,  5, 16, ZSTD_btlazy2 },  /* level 15 */
+    { 22, 21, 22,  5,  5, 16, ZSTD_btlazy2 },  /* level 15 */
     { 23, 22, 22,  5,  5, 16, ZSTD_btlazy2 },  /* level 16 */
-    { 23, 21, 22,  4,  5, 24, ZSTD_btopt   },  /* level 17 */
-    { 23, 23, 22,  6,  5, 32, ZSTD_btopt   },  /* level 18 */
+    { 23, 22, 22,  4,  5, 24, ZSTD_btopt   },  /* level 17 */
+    { 23, 22, 22,  5,  4, 32, ZSTD_btopt   },  /* level 18 */
     { 23, 23, 22,  6,  3, 48, ZSTD_btopt   },  /* level 19 */
-    { 25, 25, 23,  7,  3, 64, ZSTD_btopt   },  /* level 20 */
-    { 26, 26, 23,  7,  3,256, ZSTD_btopt   },  /* level 21 */
-    { 27, 27, 25,  9,  3,512, ZSTD_btopt   },  /* level 22 */
+    { 25, 25, 23,  7,  3, 64, ZSTD_btultra },  /* level 20 */
+    { 26, 26, 24,  7,  3,256, ZSTD_btultra },  /* level 21 */
+    { 27, 27, 25,  9,  3,512, ZSTD_btultra },  /* level 22 */
 },
 {   /* for srcSize <= 256 KB */
     /* W,  C,  H,  S,  L,  T, strat */
@@ -3090,9 +4082,9 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
     { 18, 19, 18,  8,  3, 64, ZSTD_btopt   },  /* level 17.*/
     { 18, 19, 18,  9,  3,128, ZSTD_btopt   },  /* level 18.*/
     { 18, 19, 18, 10,  3,256, ZSTD_btopt   },  /* level 19.*/
-    { 18, 19, 18, 11,  3,512, ZSTD_btopt   },  /* level 20.*/
-    { 18, 19, 18, 12,  3,512, ZSTD_btopt   },  /* level 21.*/
-    { 18, 19, 18, 13,  3,512, ZSTD_btopt   },  /* level 22.*/
+    { 18, 19, 18, 11,  3,512, ZSTD_btultra },  /* level 20.*/
+    { 18, 19, 18, 12,  3,512, ZSTD_btultra },  /* level 21.*/
+    { 18, 19, 18, 13,  3,512, ZSTD_btultra },  /* level 22.*/
 },
 {   /* for srcSize <= 128 KB */
     /* W,  C,  H,  S,  L,  T, strat */
@@ -3116,9 +4108,9 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
     { 17, 18, 17,  7,  3, 64, ZSTD_btopt   },  /* level 17.*/
     { 17, 18, 17,  7,  3,256, ZSTD_btopt   },  /* level 18.*/
     { 17, 18, 17,  8,  3,256, ZSTD_btopt   },  /* level 19.*/
-    { 17, 18, 17,  9,  3,256, ZSTD_btopt   },  /* level 20.*/
-    { 17, 18, 17, 10,  3,256, ZSTD_btopt   },  /* level 21.*/
-    { 17, 18, 17, 11,  3,512, ZSTD_btopt   },  /* level 22.*/
+    { 17, 18, 17,  9,  3,256, ZSTD_btultra },  /* level 20.*/
+    { 17, 18, 17, 10,  3,256, ZSTD_btultra },  /* level 21.*/
+    { 17, 18, 17, 11,  3,512, ZSTD_btultra },  /* level 22.*/
 },
 {   /* for srcSize <= 16 KB */
     /* W,  C,  H,  S,  L,  T, strat */
@@ -3142,39 +4134,59 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
     { 14, 15, 15,  6,  3,128, ZSTD_btopt   },  /* level 17.*/
     { 14, 15, 15,  6,  3,256, ZSTD_btopt   },  /* level 18.*/
     { 14, 15, 15,  7,  3,256, ZSTD_btopt   },  /* level 19.*/
-    { 14, 15, 15,  8,  3,256, ZSTD_btopt   },  /* level 20.*/
-    { 14, 15, 15,  9,  3,256, ZSTD_btopt   },  /* level 21.*/
-    { 14, 15, 15, 10,  3,256, ZSTD_btopt   },  /* level 22.*/
+    { 14, 15, 15,  8,  3,256, ZSTD_btultra },  /* level 20.*/
+    { 14, 15, 15,  9,  3,256, ZSTD_btultra },  /* level 21.*/
+    { 14, 15, 15, 10,  3,256, ZSTD_btultra },  /* level 22.*/
 },
 };
 
+#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
+/* This function just controls
+ * the monotonic memory budget increase of ZSTD_defaultCParameters[0].
+ * Run once, on first ZSTD_getCParams() usage, if ZSTD_DEBUG is enabled
+ */
+MEM_STATIC void ZSTD_check_compressionLevel_monotonicIncrease_memoryBudget(void)
+{
+    int level;
+    for (level=1; level<ZSTD_maxCLevel(); level++) {
+        ZSTD_compressionParameters const c1 = ZSTD_defaultCParameters[0][level];
+        ZSTD_compressionParameters const c2 = ZSTD_defaultCParameters[0][level+1];
+        assert(c1.windowLog <= c2.windowLog);
+#       define ZSTD_TABLECOST(h,c) ((1<<(h)) + (1<<(c)))
+        assert(ZSTD_TABLECOST(c1.hashLog, c1.chainLog) <= ZSTD_TABLECOST(c2.hashLog, c2.chainLog));
+    }
+}
+#endif
+
 /*! ZSTD_getCParams() :
 *   @return ZSTD_compressionParameters structure for a selected compression level, `srcSize` and `dictSize`.
 *   Size values are optional, provide 0 if not known or unused */
-ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSize, size_t dictSize)
+ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
 {
-    ZSTD_compressionParameters cp;
-    size_t const addedSize = srcSize ? 0 : 500;
-    U64 const rSize = srcSize+dictSize ? srcSize+dictSize+addedSize : (U64)-1;
+    size_t const addedSize = srcSizeHint ? 0 : 500;
+    U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : (U64)-1;
     U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB);   /* intentional underflow for srcSizeHint == 0 */
-    if (compressionLevel <= 0) compressionLevel = ZSTD_DEFAULT_CLEVEL;   /* 0 == default; no negative compressionLevel yet */
-    if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL;
-    cp = ZSTD_defaultCParameters[tableID][compressionLevel];
-    if (MEM_32bits()) {   /* auto-correction, for 32-bits mode */
-        if (cp.windowLog > ZSTD_WINDOWLOG_MAX) cp.windowLog = ZSTD_WINDOWLOG_MAX;
-        if (cp.chainLog > ZSTD_CHAINLOG_MAX) cp.chainLog = ZSTD_CHAINLOG_MAX;
-        if (cp.hashLog > ZSTD_HASHLOG_MAX) cp.hashLog = ZSTD_HASHLOG_MAX;
+
+#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
+    static int g_monotonicTest = 1;
+    if (g_monotonicTest) {
+        ZSTD_check_compressionLevel_monotonicIncrease_memoryBudget();
+        g_monotonicTest=0;
     }
-    cp = ZSTD_adjustCParams(cp, srcSize, dictSize);
-    return cp;
+#endif
+
+    if (compressionLevel <= 0) compressionLevel = ZSTD_CLEVEL_DEFAULT;   /* 0 == default; no negative compressionLevel yet */
+    if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL;
+    { ZSTD_compressionParameters const cp = ZSTD_defaultCParameters[tableID][compressionLevel];
+      return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); }
 }
 
 /*! ZSTD_getParams() :
 *   same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object (instead of `ZSTD_compressionParameters`).
 *   All fields of `ZSTD_frameParameters` are set to default (0) */
-ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, size_t dictSize) {
+ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
     ZSTD_parameters params;
-    ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSize, dictSize);
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize);
     memset(&params, 0, sizeof(params));
     params.cParams = cParams;
     return params;
diff --git a/contrib/libzstd/include/zstd/compress/zstd_opt.h b/contrib/libzstd/include/zstd/compress/zstd_opt.h
index cb5872908f2..e8e98915ea3 100644
--- a/contrib/libzstd/include/zstd/compress/zstd_opt.h
+++ b/contrib/libzstd/include/zstd/compress/zstd_opt.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under the BSD-style license found in the
@@ -15,7 +15,9 @@
 #define ZSTD_OPT_H_91842398743
 
 
-#define ZSTD_FREQ_DIV   5
+#define ZSTD_LITFREQ_ADD    2
+#define ZSTD_FREQ_DIV       4
+#define ZSTD_MAX_PRICE      (1<<30)
 
 /*-*************************************
 *  Price functions for optimal parser
@@ -30,22 +32,33 @@ FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t* ssPtr)
 }
 
 
-MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr)
+MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr, const BYTE* src, size_t srcSize)
 {
     unsigned u;
 
     ssPtr->cachedLiterals = NULL;
     ssPtr->cachedPrice = ssPtr->cachedLitLength = 0;
+    ssPtr->staticPrices = 0;
 
     if (ssPtr->litLengthSum == 0) {
-        ssPtr->litSum = (2<<Litbits);
+        if (srcSize <= 1024) ssPtr->staticPrices = 1;
+
+        assert(ssPtr->litFreq!=NULL);
+        for (u=0; u<=MaxLit; u++)
+            ssPtr->litFreq[u] = 0;
+        for (u=0; u<srcSize; u++)
+            ssPtr->litFreq[src[u]]++;
+
+        ssPtr->litSum = 0;
         ssPtr->litLengthSum = MaxLL+1;
         ssPtr->matchLengthSum = MaxML+1;
         ssPtr->offCodeSum = (MaxOff+1);
-        ssPtr->matchSum = (2<<Litbits);
+        ssPtr->matchSum = (ZSTD_LITFREQ_ADD<<Litbits);
 
-        for (u=0; u<=MaxLit; u++)
-            ssPtr->litFreq[u] = 2;
+        for (u=0; u<=MaxLit; u++) {
+            ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV);
+            ssPtr->litSum += ssPtr->litFreq[u];
+        }
         for (u=0; u<=MaxLL; u++)
             ssPtr->litLengthFreq[u] = 1;
         for (u=0; u<=MaxML; u++)
@@ -60,11 +73,11 @@ MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr)
         ssPtr->litSum = 0;
 
         for (u=0; u<=MaxLit; u++) {
-            ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV);
+            ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>(ZSTD_FREQ_DIV+1));
             ssPtr->litSum += ssPtr->litFreq[u];
         }
         for (u=0; u<=MaxLL; u++) {
-            ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>ZSTD_FREQ_DIV);
+            ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1));
             ssPtr->litLengthSum += ssPtr->litLengthFreq[u];
         }
         for (u=0; u<=MaxML; u++) {
@@ -72,6 +85,7 @@ MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr)
             ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u];
             ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3);
         }
+        ssPtr->matchSum *= ZSTD_LITFREQ_ADD;
         for (u=0; u<=MaxOff; u++) {
             ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
             ssPtr->offCodeSum += ssPtr->offCodeFreq[u];
@@ -86,6 +100,9 @@ FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BY
 {
     U32 price, u;
 
+    if (ssPtr->staticPrices)
+        return ZSTD_highbit32((U32)litLength+1) + (litLength*6);
+
     if (litLength == 0)
         return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0]+1);
 
@@ -120,11 +137,17 @@ FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BY
 }
 
 
-FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength)
+FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra)
 {
     /* offset */
+    U32 price;
     BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
-    U32 price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1);
+
+    if (seqStorePtr->staticPrices)
+        return ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit32((U32)matchLength+1) + 16 + offCode;
+
+    price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1);
+    if (!ultra && offCode >= 20) price += (offCode-19)*2;
 
     /* match Length */
     {   const BYTE ML_deltaCode = 36;
@@ -141,9 +164,9 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
     U32 u;
 
     /* literals */
-    seqStorePtr->litSum += litLength;
+    seqStorePtr->litSum += litLength*ZSTD_LITFREQ_ADD;
     for (u=0; u < litLength; u++)
-        seqStorePtr->litFreq[literals[u]]++;
+        seqStorePtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
 
     /* literal Length */
     {   const BYTE LL_deltaCode = 19;
@@ -153,10 +176,10 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
     }
 
     /* match offset */
-	{   BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
-		seqStorePtr->offCodeSum++;
-		seqStorePtr->offCodeFreq[offCode]++;
-	}
+    {   BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
+        seqStorePtr->offCodeSum++;
+        seqStorePtr->offCodeFreq[offCode]++;
+    }
 
     /* match Length */
     {   const BYTE ML_deltaCode = 36;
@@ -171,7 +194,7 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
 
 #define SET_PRICE(pos, mlen_, offset_, litlen_, price_)   \
     {                                                 \
-        while (last_pos < pos)  { opt[last_pos+1].price = 1<<30; last_pos++; } \
+        while (last_pos < pos)  { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } \
         opt[pos].mlen = mlen_;                         \
         opt[pos].off = offset_;                        \
         opt[pos].litlen = litlen_;                     \
@@ -179,9 +202,23 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
     }
 
 
+/* function safe only for comparisons */
+MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
+{
+    switch (length)
+    {
+    default :
+    case 4 : return MEM_read32(memPtr);
+    case 3 : if (MEM_isLittleEndian())
+                return MEM_read32(memPtr)<<8;
+             else
+                return MEM_read32(memPtr)>>8;
+    }
+}
+
 
 /* Update hashTable3 up to ip (excluded)
-   Assumption : always within prefix (ie. not within extDict) */
+   Assumption : always within prefix (i.e. not within extDict) */
 FORCE_INLINE
 U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip)
 {
@@ -212,12 +249,12 @@ static U32 ZSTD_insertBtAndGetAllMatches (
 {
     const BYTE* const base = zc->base;
     const U32 current = (U32)(ip-base);
-    const U32 hashLog = zc->params.cParams.hashLog;
+    const U32 hashLog = zc->appliedParams.cParams.hashLog;
     const size_t h  = ZSTD_hashPtr(ip, hashLog, mls);
     U32* const hashTable = zc->hashTable;
     U32 matchIndex  = hashTable[h];
     U32* const bt   = zc->chainTable;
-    const U32 btLog = zc->params.cParams.chainLog - 1;
+    const U32 btLog = zc->appliedParams.cParams.chainLog - 1;
     const U32 btMask= (1U << btLog) - 1;
     size_t commonLengthSmaller=0, commonLengthLarger=0;
     const BYTE* const dictBase = zc->dictBase;
@@ -245,7 +282,7 @@ static U32 ZSTD_insertBtAndGetAllMatches (
                 if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit);
             } else {
                 match = dictBase + matchIndex3;
-                if (MEM_readMINMATCH(match, MINMATCH) == MEM_readMINMATCH(ip, MINMATCH))    /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */
+                if (ZSTD_readMINMATCH(match, MINMATCH) == ZSTD_readMINMATCH(ip, MINMATCH))    /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */
                     currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH;
             }
 
@@ -338,6 +375,7 @@ static U32 ZSTD_BtGetAllMatches_selectMLS (
     default :
     case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
     case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
+    case 7 :
     case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
     }
 }
@@ -365,6 +403,7 @@ static U32 ZSTD_BtGetAllMatches_selectMLS_extDict (
     default :
     case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
     case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
+    case 7 :
     case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
     }
 }
@@ -375,7 +414,7 @@ static U32 ZSTD_BtGetAllMatches_selectMLS_extDict (
 *********************************/
 FORCE_INLINE
 void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
-                                    const void* src, size_t srcSize)
+                                    const void* src, size_t srcSize, const int ultra)
 {
     seqStore_t* seqStorePtr = &(ctx->seqStore);
     const BYTE* const istart = (const BYTE*)src;
@@ -386,10 +425,10 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
     const BYTE* const base = ctx->base;
     const BYTE* const prefixStart = base + ctx->dictLimit;
 
-    const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
-    const U32 sufficient_len = ctx->params.cParams.targetLength;
-    const U32 mls = ctx->params.cParams.searchLength;
-    const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
+    const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
+    const U32 sufficient_len = ctx->appliedParams.cParams.targetLength;
+    const U32 mls = ctx->appliedParams.cParams.searchLength;
+    const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
 
     ZSTD_optimal_t* opt = seqStorePtr->priceTable;
     ZSTD_match_t* matches = seqStorePtr->matchTable;
@@ -398,10 +437,9 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
 
     /* init */
     ctx->nextToUpdate3 = ctx->nextToUpdate;
-    ZSTD_rescaleFreqs(seqStorePtr);
+    ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize);
     ip += (ip==prefixStart);
     { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }
-    inr = ip;
 
     /* Match Loop */
     while (ip < ilimit) {
@@ -414,9 +452,9 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
         /* check repCode */
         {   U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
             for (i=(ip == anchor); i<last_i; i++) {
-                const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (ip==anchor)) ? (rep[0] - 1) : rep[i];
+                const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
                 if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart))
-                    && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - repCur, minMatch))) {
+                    && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repCur, minMatch))) {
                     mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch;
                     if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
                         best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
@@ -424,7 +462,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
                     }
                     best_off = i - (ip == anchor);
                     do {
-                        price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH);
+                        price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
                         if (mlen > last_pos || price < opt[mlen].price)
                             SET_PRICE(mlen, mlen, i, litlen, price);   /* note : macro modifies last_pos */
                         mlen--;
@@ -449,7 +487,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
             mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
             best_mlen = matches[u].len;
             while (mlen <= best_mlen) {
-                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH);
+                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
                 if (mlen > last_pos || price < opt[mlen].price)
                     SET_PRICE(mlen, mlen, matches[u].off, litlen, price);   /* note : macro modifies last_pos */
                 mlen++;
@@ -496,12 +534,12 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
                 opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
            }
 
-           best_mlen = minMatch;
+            best_mlen = minMatch;
             {   U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
                 for (i=(opt[cur].mlen != 1); i<last_i; i++) {  /* check rep */
-                    const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (opt[cur].mlen != 1)) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
+                    const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
                     if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart))
-                       && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - repCur, minMatch))) {
+                       && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(inr - repCur, minMatch))) {
                        mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch;
 
                        if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
@@ -510,21 +548,20 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
                        }
 
                        best_off = i - (opt[cur].mlen != 1);
+                       if (mlen > best_mlen) best_mlen = mlen;
 
-                       if (opt[cur].mlen == 1) {
-                            litlen = opt[cur].litlen;
-                            if (cur > litlen) {
-                                price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH);
-                            } else
-                                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH);
-                        } else {
-                            litlen = 0;
-                            price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH);
-                        }
+                       do {
+                           if (opt[cur].mlen == 1) {
+                                litlen = opt[cur].litlen;
+                                if (cur > litlen) {
+                                    price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
+                                } else
+                                    price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
+                            } else {
+                                litlen = 0;
+                                price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
+                            }
 
-                        if (mlen > best_mlen) best_mlen = mlen;
-
-                        do {
                             if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
                                 SET_PRICE(cur + mlen, mlen, i, litlen, price);
                             mlen--;
@@ -549,12 +586,12 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
                     if (opt[cur].mlen == 1) {
                         litlen = opt[cur].litlen;
                         if (cur > litlen)
-                            price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH);
+                            price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
                         else
-                            price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH);
+                            price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
                     } else {
                         litlen = 0;
-                        price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH);
+                        price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
                     }
 
                     if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
@@ -600,7 +637,7 @@ _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
                 offset--;
             } else {
                 if (offset != 0) {
-                    best_off = ((offset==ZSTD_REP_MOVE_OPT) && (litLength==0)) ? (rep[0] - 1) : (rep[offset]);
+                    best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
                     if (offset != 1) rep[2] = rep[1];
                     rep[1] = rep[0];
                     rep[0] = best_off;
@@ -614,7 +651,7 @@ _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
     }    }   /* for (cur=0; cur < last_pos; ) */
 
     /* Save reps for next block */
-    { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->savedRep[i] = rep[i]; }
+    { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; }
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -626,7 +663,7 @@ _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
 
 FORCE_INLINE
 void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
-                                     const void* src, size_t srcSize)
+                                     const void* src, size_t srcSize, const int ultra)
 {
     seqStore_t* seqStorePtr = &(ctx->seqStore);
     const BYTE* const istart = (const BYTE*)src;
@@ -641,10 +678,10 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
     const BYTE* const dictBase = ctx->dictBase;
     const BYTE* const dictEnd  = dictBase + dictLimit;
 
-    const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
-    const U32 sufficient_len = ctx->params.cParams.targetLength;
-    const U32 mls = ctx->params.cParams.searchLength;
-    const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
+    const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
+    const U32 sufficient_len = ctx->appliedParams.cParams.targetLength;
+    const U32 mls = ctx->appliedParams.cParams.searchLength;
+    const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
 
     ZSTD_optimal_t* opt = seqStorePtr->priceTable;
     ZSTD_match_t* matches = seqStorePtr->matchTable;
@@ -655,9 +692,8 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
     { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }
 
     ctx->nextToUpdate3 = ctx->nextToUpdate;
-    ZSTD_rescaleFreqs(seqStorePtr);
+    ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize);
     ip += (ip==prefixStart);
-    inr = ip;
 
     /* Match Loop */
     while (ip < ilimit) {
@@ -666,19 +702,18 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
         U32 current = (U32)(ip-base);
         memset(opt, 0, sizeof(ZSTD_optimal_t));
         last_pos = 0;
-        inr = ip;
         opt[0].litlen = (U32)(ip - anchor);
 
         /* check repCode */
         {   U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
             for (i = (ip==anchor); i<last_i; i++) {
-                const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (ip==anchor)) ? (rep[0] - 1) : rep[i];
+                const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
                 const U32 repIndex = (U32)(current - repCur);
                 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                 const BYTE* const repMatch = repBase + repIndex;
                 if ( (repCur > 0 && repCur <= (S32)current)
                    && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex))  /* intentional overflow */
-                   && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) {
+                   && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
                     /* repcode detected we should take it */
                     const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                     mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
@@ -691,7 +726,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
                     best_off = i - (ip==anchor);
                     litlen = opt[0].litlen;
                     do {
-                        price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH);
+                        price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
                         if (mlen > last_pos || price < opt[mlen].price)
                             SET_PRICE(mlen, mlen, i, litlen, price);   /* note : macro modifies last_pos */
                         mlen--;
@@ -721,7 +756,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
             best_mlen = matches[u].len;
             litlen = opt[0].litlen;
             while (mlen <= best_mlen) {
-                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH);
+                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
                 if (mlen > last_pos || price < opt[mlen].price)
                     SET_PRICE(mlen, mlen, matches[u].off, litlen, price);
                 mlen++;
@@ -765,17 +800,16 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
                 opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
             }
 
-            best_mlen = 0;
-
+            best_mlen = minMatch;
             {   U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
                 for (i = (mlen != 1); i<last_i; i++) {
-                    const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (opt[cur].mlen != 1)) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
+                    const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
                     const U32 repIndex = (U32)(current+cur - repCur);
                     const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                     const BYTE* const repMatch = repBase + repIndex;
                     if ( (repCur > 0 && repCur <= (S32)(current+cur))
                       && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex))  /* intentional overflow */
-                      && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) {
+                      && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
                         /* repcode detected */
                         const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                         mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
@@ -786,20 +820,20 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
                         }
 
                         best_off = i - (opt[cur].mlen != 1);
-                        if (opt[cur].mlen == 1) {
-                            litlen = opt[cur].litlen;
-                            if (cur > litlen) {
-                                price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH);
-                            } else
-                                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH);
-                        } else {
-                            litlen = 0;
-                            price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH);
-                        }
-
-                        best_mlen = mlen;
+                        if (mlen > best_mlen) best_mlen = mlen;
 
                         do {
+                            if (opt[cur].mlen == 1) {
+                                litlen = opt[cur].litlen;
+                                if (cur > litlen) {
+                                    price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
+                                } else
+                                    price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
+                            } else {
+                                litlen = 0;
+                                price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
+                            }
+
                             if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
                                 SET_PRICE(cur + mlen, mlen, i, litlen, price);
                             mlen--;
@@ -808,30 +842,28 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
 
             match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch);
 
-            if (match_num > 0 && matches[match_num-1].len > sufficient_len) {
+            if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
                 best_mlen = matches[match_num-1].len;
                 best_off = matches[match_num-1].off;
                 last_pos = cur + 1;
                 goto _storeSequence;
             }
 
-            best_mlen = (best_mlen > minMatch) ? best_mlen : minMatch;
-
             /* set prices using matches at position = cur */
             for (u = 0; u < match_num; u++) {
                 mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
-                best_mlen = (cur + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur;
+                best_mlen = matches[u].len;
 
                 while (mlen <= best_mlen) {
                     if (opt[cur].mlen == 1) {
                         litlen = opt[cur].litlen;
                         if (cur > litlen)
-                            price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH);
+                            price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
                         else
-                            price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH);
+                            price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
                     } else {
                         litlen = 0;
-                        price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH);
+                        price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
                     }
 
                     if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
@@ -877,7 +909,7 @@ _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
                 offset--;
             } else {
                 if (offset != 0) {
-                    best_off = ((offset==ZSTD_REP_MOVE_OPT) && (litLength==0)) ? (rep[0] - 1) : (rep[offset]);
+                    best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
                     if (offset != 1) rep[2] = rep[1];
                     rep[1] = rep[0];
                     rep[0] = best_off;
@@ -892,7 +924,7 @@ _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
     }    }   /* for (cur=0; cur < last_pos; ) */
 
     /* Save reps for next block */
-    { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->savedRep[i] = rep[i]; }
+    { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; }
 
     /* Last Literals */
     {   size_t lastLLSize = iend - anchor;
diff --git a/contrib/libzstd/include/zstd/compress/zstdmt_compress.c b/contrib/libzstd/include/zstd/compress/zstdmt_compress.c
new file mode 100644
index 00000000000..0cee01eacb8
--- /dev/null
+++ b/contrib/libzstd/include/zstd/compress/zstdmt_compress.c
@@ -0,0 +1,955 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+
+/* ======   Tuning parameters   ====== */
+#define ZSTDMT_NBTHREADS_MAX 128
+
+
+/* ======   Compiler specifics   ====== */
+#if defined(_MSC_VER)
+#  pragma warning(disable : 4204)   /* disable: C4204: non-constant aggregate initializer */
+#endif
+
+
+/* ======   Dependencies   ====== */
+#include <string.h>      /* memcpy, memset */
+#include "pool.h"        /* threadpool */
+#include "threading.h"   /* mutex */
+#include "zstd_internal.h"  /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
+#include "zstdmt_compress.h"
+
+
+/* ======   Debug   ====== */
+#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
+
+#  include <stdio.h>
+#  include <unistd.h>
+#  include <sys/times.h>
+#  define DEBUGLOGRAW(l, ...) if (l<=ZSTD_DEBUG) { fprintf(stderr, __VA_ARGS__); }
+
+#  define DEBUG_PRINTHEX(l,p,n) {            \
+    unsigned debug_u;                        \
+    for (debug_u=0; debug_u<(n); debug_u++)  \
+        DEBUGLOGRAW(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
+    DEBUGLOGRAW(l, " \n");                   \
+}
+
+static unsigned long long GetCurrentClockTimeMicroseconds(void)
+{
+   static clock_t _ticksPerSecond = 0;
+   if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK);
+
+   { struct tms junk; clock_t newTicks = (clock_t) times(&junk);
+     return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond); }
+}
+
+#define MUTEX_WAIT_TIME_DLEVEL 6
+#define PTHREAD_MUTEX_LOCK(mutex) {               \
+    if (ZSTD_DEBUG>=MUTEX_WAIT_TIME_DLEVEL) {   \
+        unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
+        pthread_mutex_lock(mutex);                \
+        {   unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
+            unsigned long long const elapsedTime = (afterTime-beforeTime); \
+            if (elapsedTime > 1000) {  /* or whatever threshold you like; I'm using 1 millisecond here */ \
+                DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
+                   elapsedTime, #mutex);          \
+        }   }                                     \
+    } else pthread_mutex_lock(mutex);             \
+}
+
+#else
+
+#  define PTHREAD_MUTEX_LOCK(m) pthread_mutex_lock(m)
+#  define DEBUG_PRINTHEX(l,p,n) {}
+
+#endif
+
+
+/* =====   Buffer Pool   ===== */
+
+typedef struct buffer_s {
+    void* start;
+    size_t size;
+} buffer_t;
+
+static const buffer_t g_nullBuffer = { NULL, 0 };
+
+typedef struct ZSTDMT_bufferPool_s {
+    unsigned totalBuffers;
+    unsigned nbBuffers;
+    ZSTD_customMem cMem;
+    buffer_t bTable[1];   /* variable size */
+} ZSTDMT_bufferPool;
+
+static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads, ZSTD_customMem cMem)
+{
+    unsigned const maxNbBuffers = 2*nbThreads + 2;
+    ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_calloc(
+        sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
+    if (bufPool==NULL) return NULL;
+    bufPool->totalBuffers = maxNbBuffers;
+    bufPool->nbBuffers = 0;
+    bufPool->cMem = cMem;
+    return bufPool;
+}
+
+static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
+{
+    unsigned u;
+    if (!bufPool) return;   /* compatibility with free on NULL */
+    for (u=0; u<bufPool->totalBuffers; u++)
+        ZSTD_free(bufPool->bTable[u].start, bufPool->cMem);
+    ZSTD_free(bufPool, bufPool->cMem);
+}
+
+/* only works at initialization, not during compression */
+static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
+{
+    size_t const poolSize = sizeof(*bufPool)
+                            + (bufPool->totalBuffers - 1) * sizeof(buffer_t);
+    unsigned u;
+    size_t totalBufferSize = 0;
+    for (u=0; u<bufPool->totalBuffers; u++)
+        totalBufferSize += bufPool->bTable[u].size;
+
+    return poolSize + totalBufferSize;
+}
+
+/** ZSTDMT_getBuffer() :
+ *  assumption : invocation from main thread only ! */
+static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize)
+{
+    if (pool->nbBuffers) {   /* try to use an existing buffer */
+        buffer_t const buf = pool->bTable[--(pool->nbBuffers)];
+        size_t const availBufferSize = buf.size;
+        if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize))
+            /* large enough, but not too much */
+            return buf;
+        /* size conditions not respected : scratch this buffer, create new one */
+        ZSTD_free(buf.start, pool->cMem);
+    }
+    /* create new buffer */
+    {   buffer_t buffer;
+        void* const start = ZSTD_malloc(bSize, pool->cMem);
+        if (start==NULL) bSize = 0;
+        buffer.start = start;   /* note : start can be NULL if malloc fails ! */
+        buffer.size = bSize;
+        return buffer;
+    }
+}
+
+/* store buffer for later re-use, up to pool capacity */
+static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf)
+{
+    if (buf.start == NULL) return;   /* release on NULL */
+    if (pool->nbBuffers < pool->totalBuffers) {
+        pool->bTable[pool->nbBuffers++] = buf;   /* store for later re-use */
+        return;
+    }
+    /* Reached bufferPool capacity (should not happen) */
+    ZSTD_free(buf.start, pool->cMem);
+}
+
+
+/* =====   CCtx Pool   ===== */
+
+typedef struct {
+    unsigned totalCCtx;
+    unsigned availCCtx;
+    ZSTD_customMem cMem;
+    ZSTD_CCtx* cctx[1];   /* variable size */
+} ZSTDMT_CCtxPool;
+
+/* assumption : CCtxPool invocation only from main thread */
+
+/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
+static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
+{
+    unsigned u;
+    for (u=0; u<pool->totalCCtx; u++)
+        ZSTD_freeCCtx(pool->cctx[u]);  /* note : compatible with free on NULL */
+    ZSTD_free(pool, pool->cMem);
+}
+
+/* ZSTDMT_createCCtxPool() :
+ * implies nbThreads >= 1 , checked by caller ZSTDMT_createCCtx() */
+static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads,
+                                              ZSTD_customMem cMem)
+{
+    ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
+        sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*), cMem);
+    if (!cctxPool) return NULL;
+    cctxPool->cMem = cMem;
+    cctxPool->totalCCtx = nbThreads;
+    cctxPool->availCCtx = 1;   /* at least one cctx for single-thread mode */
+    cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem);
+    if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
+    DEBUGLOG(3, "cctxPool created, with %u threads", nbThreads);
+    return cctxPool;
+}
+
+/* only works during initialization phase, not during compression */
+static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
+{
+    unsigned const nbThreads = cctxPool->totalCCtx;
+    size_t const poolSize = sizeof(*cctxPool)
+                            + (nbThreads-1)*sizeof(ZSTD_CCtx*);
+    unsigned u;
+    size_t totalCCtxSize = 0;
+    for (u=0; u<nbThreads; u++)
+        totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
+
+    return poolSize + totalCCtxSize;
+}
+
+static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* pool)
+{
+    if (pool->availCCtx) {
+        pool->availCCtx--;
+        return pool->cctx[pool->availCCtx];
+    }
+    return ZSTD_createCCtx();   /* note : can be NULL, when creation fails ! */
+}
+
+static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
+{
+    if (cctx==NULL) return;   /* compatibility with release on NULL */
+    if (pool->availCCtx < pool->totalCCtx)
+        pool->cctx[pool->availCCtx++] = cctx;
+    else
+        /* pool overflow : should not happen, since totalCCtx==nbThreads */
+        ZSTD_freeCCtx(cctx);
+}
+
+
+/* =====   Thread worker   ===== */
+
+typedef struct {
+    buffer_t buffer;
+    size_t filled;
+} inBuff_t;
+
+typedef struct {
+    ZSTD_CCtx* cctx;
+    buffer_t src;
+    const void* srcStart;
+    size_t   srcSize;
+    size_t   dictSize;
+    buffer_t dstBuff;
+    size_t   cSize;
+    size_t   dstFlushed;
+    unsigned firstChunk;
+    unsigned lastChunk;
+    unsigned jobCompleted;
+    unsigned jobScanned;
+    pthread_mutex_t* jobCompleted_mutex;
+    pthread_cond_t* jobCompleted_cond;
+    ZSTD_parameters params;
+    const ZSTD_CDict* cdict;
+    unsigned long long fullFrameSize;
+} ZSTDMT_jobDescription;
+
+/* ZSTDMT_compressChunk() : POOL_function type */
+void ZSTDMT_compressChunk(void* jobDescription)
+{
+    ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
+    const void* const src = (const char*)job->srcStart + job->dictSize;
+    buffer_t const dstBuff = job->dstBuff;
+    DEBUGLOG(5, "job (first:%u) (last:%u) : dictSize %u, srcSize %u",
+                 job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize);
+    if (job->cdict) {  /* should only happen for first segment */
+        size_t const initError = ZSTD_compressBegin_usingCDict_advanced(job->cctx, job->cdict, job->params.fParams, job->fullFrameSize);
+        DEBUGLOG(5, "using CDict");
+        if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
+    } else {  /* srcStart points at reloaded section */
+        if (!job->firstChunk) job->params.fParams.contentSizeFlag = 0;  /* ensure no srcSize control */
+        {   size_t const dictModeError = ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceRawDict, 1);  /* Force loading dictionary in "content-only" mode (no header analysis) */
+            size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize);
+            if (ZSTD_isError(initError) || ZSTD_isError(dictModeError)) { job->cSize = initError; goto _endJob; }
+            ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1);
+    }   }
+    if (!job->firstChunk) {  /* flush and overwrite frame header when it's not first segment */
+        size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, 0);
+        if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; }
+        ZSTD_invalidateRepCodes(job->cctx);
+    }
+
+    DEBUGLOG(5, "Compressing : ");
+    DEBUG_PRINTHEX(4, job->srcStart, 12);
+    job->cSize = (job->lastChunk) ?
+                 ZSTD_compressEnd     (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
+                 ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
+    DEBUGLOG(5, "compressed %u bytes into %u bytes   (first:%u) (last:%u)",
+                (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);
+    DEBUGLOG(5, "dstBuff.size : %u ; => %s", (U32)dstBuff.size, ZSTD_getErrorName(job->cSize));
+
+_endJob:
+    PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex);
+    job->jobCompleted = 1;
+    job->jobScanned = 0;
+    pthread_cond_signal(job->jobCompleted_cond);
+    pthread_mutex_unlock(job->jobCompleted_mutex);
+}
+
+
+/* ------------------------------------------ */
+/* =====   Multi-threaded compression   ===== */
+/* ------------------------------------------ */
+
+struct ZSTDMT_CCtx_s {
+    POOL_ctx* factory;
+    ZSTDMT_jobDescription* jobs;
+    ZSTDMT_bufferPool* buffPool;
+    ZSTDMT_CCtxPool* cctxPool;
+    pthread_mutex_t jobCompleted_mutex;
+    pthread_cond_t jobCompleted_cond;
+    size_t targetSectionSize;
+    size_t marginSize;
+    size_t inBuffSize;
+    size_t dictSize;
+    size_t targetDictSize;
+    inBuff_t inBuff;
+    ZSTD_parameters params;
+    XXH64_state_t xxhState;
+    unsigned nbThreads;
+    unsigned jobIDMask;
+    unsigned doneJobID;
+    unsigned nextJobID;
+    unsigned frameEnded;
+    unsigned allJobsCompleted;
+    unsigned overlapRLog;
+    unsigned long long frameContentSize;
+    size_t sectionSize;
+    ZSTD_customMem cMem;
+    ZSTD_CDict* cdictLocal;
+    const ZSTD_CDict* cdict;
+};
+
+static ZSTDMT_jobDescription* ZSTDMT_allocJobsTable(U32* nbJobsPtr, ZSTD_customMem cMem)
+{
+    U32 const nbJobsLog2 = ZSTD_highbit32(*nbJobsPtr) + 1;
+    U32 const nbJobs = 1 << nbJobsLog2;
+    *nbJobsPtr = nbJobs;
+    return (ZSTDMT_jobDescription*) ZSTD_calloc(
+                            nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
+}
+
+ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem)
+{
+    ZSTDMT_CCtx* mtctx;
+    U32 nbJobs = nbThreads + 2;
+    DEBUGLOG(3, "ZSTDMT_createCCtx_advanced");
+
+    if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL;
+    if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL))
+        /* invalid custom allocator */
+        return NULL;
+
+    mtctx = (ZSTDMT_CCtx*) ZSTD_calloc(sizeof(ZSTDMT_CCtx), cMem);
+    if (!mtctx) return NULL;
+    mtctx->cMem = cMem;
+    mtctx->nbThreads = nbThreads;
+    mtctx->allJobsCompleted = 1;
+    mtctx->sectionSize = 0;
+    mtctx->overlapRLog = 3;
+    mtctx->factory = POOL_create(nbThreads, 1);
+    mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, cMem);
+    mtctx->jobIDMask = nbJobs - 1;
+    mtctx->buffPool = ZSTDMT_createBufferPool(nbThreads, cMem);
+    mtctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads, cMem);
+    if (!mtctx->factory | !mtctx->jobs | !mtctx->buffPool | !mtctx->cctxPool) {
+        ZSTDMT_freeCCtx(mtctx);
+        return NULL;
+    }
+    pthread_mutex_init(&mtctx->jobCompleted_mutex, NULL);   /* Todo : check init function return */
+    pthread_cond_init(&mtctx->jobCompleted_cond, NULL);
+    DEBUGLOG(3, "mt_cctx created, for %u threads", nbThreads);
+    return mtctx;
+}
+
+ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads)
+{
+    return ZSTDMT_createCCtx_advanced(nbThreads, ZSTD_defaultCMem);
+}
+
+/* ZSTDMT_releaseAllJobResources() :
+ * note : ensure all workers are killed first ! */
+static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
+{
+    unsigned jobID;
+    DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
+    for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
+        ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].dstBuff);
+        mtctx->jobs[jobID].dstBuff = g_nullBuffer;
+        ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].src);
+        mtctx->jobs[jobID].src = g_nullBuffer;
+        ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[jobID].cctx);
+        mtctx->jobs[jobID].cctx = NULL;
+    }
+    memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
+    ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer);
+    mtctx->inBuff.buffer = g_nullBuffer;
+    mtctx->allJobsCompleted = 1;
+}
+
+size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
+{
+    if (mtctx==NULL) return 0;   /* compatible with free on NULL */
+    POOL_free(mtctx->factory);
+    if (!mtctx->allJobsCompleted) ZSTDMT_releaseAllJobResources(mtctx); /* stop workers first */
+    ZSTDMT_freeBufferPool(mtctx->buffPool);  /* release job resources into pools first */
+    ZSTD_free(mtctx->jobs, mtctx->cMem);
+    ZSTDMT_freeCCtxPool(mtctx->cctxPool);
+    ZSTD_freeCDict(mtctx->cdictLocal);
+    pthread_mutex_destroy(&mtctx->jobCompleted_mutex);
+    pthread_cond_destroy(&mtctx->jobCompleted_cond);
+    ZSTD_free(mtctx, mtctx->cMem);
+    return 0;
+}
+
+size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
+{
+    if (mtctx == NULL) return 0;   /* supports sizeof NULL */
+    return sizeof(*mtctx)
+        + POOL_sizeof(mtctx->factory)
+        + ZSTDMT_sizeof_bufferPool(mtctx->buffPool)
+        + (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription)
+        + ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool)
+        + ZSTD_sizeof_CDict(mtctx->cdictLocal);
+}
+
+size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value)
+{
+    switch(parameter)
+    {
+    case ZSTDMT_p_sectionSize :
+        mtctx->sectionSize = value;
+        return 0;
+    case ZSTDMT_p_overlapSectionLog :
+        DEBUGLOG(5, "ZSTDMT_p_overlapSectionLog : %u", value);
+        mtctx->overlapRLog = (value >= 9) ? 0 : 9 - value;
+        return 0;
+    default :
+        return ERROR(compressionParameter_unsupported);
+    }
+}
+
+
+/* ------------------------------------------ */
+/* =====   Multi-threaded compression   ===== */
+/* ------------------------------------------ */
+
+static unsigned computeNbChunks(size_t srcSize, unsigned windowLog, unsigned nbThreads) {
+    size_t const chunkSizeTarget = (size_t)1 << (windowLog + 2);
+    size_t const chunkMaxSize = chunkSizeTarget << 2;
+    size_t const passSizeMax = chunkMaxSize * nbThreads;
+    unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1;
+    unsigned const nbChunksLarge = multiplier * nbThreads;
+    unsigned const nbChunksMax = (unsigned)(srcSize / chunkSizeTarget) + 1;
+    unsigned const nbChunksSmall = MIN(nbChunksMax, nbThreads);
+    return (multiplier>1) ? nbChunksLarge : nbChunksSmall;
+}
+
+
+size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
+                           void* dst, size_t dstCapacity,
+                     const void* src, size_t srcSize,
+                     const ZSTD_CDict* cdict,
+                           ZSTD_parameters const params,
+                           unsigned overlapRLog)
+{
+    size_t const overlapSize = (overlapRLog>=9) ? 0 : (size_t)1 << (params.cParams.windowLog - overlapRLog);
+    unsigned nbChunks = computeNbChunks(srcSize, params.cParams.windowLog, mtctx->nbThreads);
+    size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks;
+    size_t const avgChunkSize = ((proposedChunkSize & 0x1FFFF) < 0x7FFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize;   /* avoid too small last block */
+    const char* const srcStart = (const char*)src;
+    size_t remainingSrcSize = srcSize;
+    unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbChunks : (unsigned)(dstCapacity / ZSTD_compressBound(avgChunkSize));  /* presumes avgChunkSize >= 256 KB, which should be the case */
+    size_t frameStartPos = 0, dstBufferPos = 0;
+
+    DEBUGLOG(4, "nbChunks  : %2u   (chunkSize : %u bytes)   ", nbChunks, (U32)avgChunkSize);
+    if (nbChunks==1) {   /* fallback to single-thread mode */
+        ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
+        if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, params.fParams);
+        return ZSTD_compress_advanced(cctx, dst, dstCapacity, src, srcSize, NULL, 0, params);
+    }
+    assert(avgChunkSize >= 256 KB);  /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), which is useful to avoid allocating extra buffers */
+
+    if (nbChunks > mtctx->jobIDMask+1) {  /* enlarge job table */
+        U32 nbJobs = nbChunks;
+        ZSTD_free(mtctx->jobs, mtctx->cMem);
+        mtctx->jobIDMask = 0;
+        mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, mtctx->cMem);
+        if (mtctx->jobs==NULL) return ERROR(memory_allocation);
+        mtctx->jobIDMask = nbJobs - 1;
+    }
+
+    {   unsigned u;
+        for (u=0; u<nbChunks; u++) {
+            size_t const chunkSize = MIN(remainingSrcSize, avgChunkSize);
+            size_t const dstBufferCapacity = ZSTD_compressBound(chunkSize);
+            buffer_t const dstAsBuffer = { (char*)dst + dstBufferPos, dstBufferCapacity };
+            buffer_t const dstBuffer = u < compressWithinDst ? dstAsBuffer : ZSTDMT_getBuffer(mtctx->buffPool, dstBufferCapacity);
+            ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(mtctx->cctxPool);
+            size_t dictSize = u ? overlapSize : 0;
+
+            if ((cctx==NULL) || (dstBuffer.start==NULL)) {
+                mtctx->jobs[u].cSize = ERROR(memory_allocation);   /* job result */
+                mtctx->jobs[u].jobCompleted = 1;
+                nbChunks = u+1;   /* only wait and free u jobs, instead of initially expected nbChunks ones */
+                break;   /* let's wait for previous jobs to complete, but don't start new ones */
+            }
+
+            mtctx->jobs[u].srcStart = srcStart + frameStartPos - dictSize;
+            mtctx->jobs[u].dictSize = dictSize;
+            mtctx->jobs[u].srcSize = chunkSize;
+            mtctx->jobs[u].cdict = mtctx->nextJobID==0 ? cdict : NULL;
+            mtctx->jobs[u].fullFrameSize = srcSize;
+            mtctx->jobs[u].params = params;
+            /* do not calculate checksum within sections, but write it in header for first section */
+            if (u!=0) mtctx->jobs[u].params.fParams.checksumFlag = 0;
+            mtctx->jobs[u].dstBuff = dstBuffer;
+            mtctx->jobs[u].cctx = cctx;
+            mtctx->jobs[u].firstChunk = (u==0);
+            mtctx->jobs[u].lastChunk = (u==nbChunks-1);
+            mtctx->jobs[u].jobCompleted = 0;
+            mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex;
+            mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond;
+
+            DEBUGLOG(5, "posting job %u   (%u bytes)", u, (U32)chunkSize);
+            DEBUG_PRINTHEX(6, mtctx->jobs[u].srcStart, 12);
+            POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]);
+
+            frameStartPos += chunkSize;
+            dstBufferPos += dstBufferCapacity;
+            remainingSrcSize -= chunkSize;
+    }   }
+
+    /* collect result */
+    {   unsigned chunkID;
+        size_t error = 0, dstPos = 0;
+        for (chunkID=0; chunkID<nbChunks; chunkID++) {
+            DEBUGLOG(5, "waiting for chunk %u ", chunkID);
+            PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex);
+            while (mtctx->jobs[chunkID].jobCompleted==0) {
+                DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", chunkID);
+                pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex);
+            }
+            pthread_mutex_unlock(&mtctx->jobCompleted_mutex);
+            DEBUGLOG(5, "ready to write chunk %u ", chunkID);
+
+            ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[chunkID].cctx);
+            mtctx->jobs[chunkID].cctx = NULL;
+            mtctx->jobs[chunkID].srcStart = NULL;
+            {   size_t const cSize = mtctx->jobs[chunkID].cSize;
+                if (ZSTD_isError(cSize)) error = cSize;
+                if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall);
+                if (chunkID) {   /* note : chunk 0 is written directly at dst, which is correct position */
+                    if (!error)
+                        memmove((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize);  /* may overlap when chunk compressed within dst */
+                    if (chunkID >= compressWithinDst) {  /* chunk compressed into its own buffer, which must be released */
+                        DEBUGLOG(5, "releasing buffer %u>=%u", chunkID, compressWithinDst);
+                        ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[chunkID].dstBuff);
+                    }
+                    mtctx->jobs[chunkID].dstBuff = g_nullBuffer;
+                }
+                dstPos += cSize ;
+            }
+        }
+        if (!error) DEBUGLOG(4, "compressed size : %u  ", (U32)dstPos);
+        return error ? error : dstPos;
+    }
+}
+
+
+size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
+                           void* dst, size_t dstCapacity,
+                     const void* src, size_t srcSize,
+                           int compressionLevel)
+{
+    U32 const overlapRLog = (compressionLevel >= ZSTD_maxCLevel()) ? 0 : 3;
+    ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
+    params.fParams.contentSizeFlag = 1;
+    return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapRLog);
+}
+
+
+/* ====================================== */
+/* =======      Streaming API     ======= */
+/* ====================================== */
+
+static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs)
+{
+    DEBUGLOG(4, "ZSTDMT_waitForAllJobsCompleted");
+    while (zcs->doneJobID < zcs->nextJobID) {
+        unsigned const jobID = zcs->doneJobID & zcs->jobIDMask;
+        PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
+        while (zcs->jobs[jobID].jobCompleted==0) {
+            DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID);   /* we want to block when waiting for data to flush */
+            pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);
+        }
+        pthread_mutex_unlock(&zcs->jobCompleted_mutex);
+        zcs->doneJobID++;
+    }
+}
+
+
+/** ZSTDMT_initCStream_internal() :
+ *  internal usage only */
+size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
+                    const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
+                    ZSTD_parameters params, unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTDMT_initCStream_internal");
+    /* params are supposed to be fully validated at this point */
+    assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
+
+    if (zcs->nbThreads==1) {
+        DEBUGLOG(4, "single thread mode");
+        return ZSTD_initCStream_internal(zcs->cctxPool->cctx[0],
+                                dict, dictSize, cdict,
+                                params, pledgedSrcSize);
+    }
+
+    if (zcs->allJobsCompleted == 0) {   /* previous compression not correctly finished */
+        ZSTDMT_waitForAllJobsCompleted(zcs);
+        ZSTDMT_releaseAllJobResources(zcs);
+        zcs->allJobsCompleted = 1;
+    }
+
+    zcs->params = params;
+    zcs->frameContentSize = pledgedSrcSize;
+    if (dict) {
+        DEBUGLOG(4,"cdictLocal: %08X", (U32)(size_t)zcs->cdictLocal);
+        ZSTD_freeCDict(zcs->cdictLocal);
+        zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
+                                                    0 /* byRef */, ZSTD_dm_auto,   /* note : a loadPrefix becomes an internal CDict */
+                                                    params.cParams, zcs->cMem);
+        zcs->cdict = zcs->cdictLocal;
+        if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
+    } else {
+        DEBUGLOG(4,"cdictLocal: %08X", (U32)(size_t)zcs->cdictLocal);
+        ZSTD_freeCDict(zcs->cdictLocal);
+        zcs->cdictLocal = NULL;
+        zcs->cdict = cdict;
+    }
+
+    zcs->targetDictSize = (zcs->overlapRLog>=9) ? 0 : (size_t)1 << (zcs->params.cParams.windowLog - zcs->overlapRLog);
+    DEBUGLOG(4, "overlapRLog : %u ", zcs->overlapRLog);
+    DEBUGLOG(4, "overlap Size : %u KB", (U32)(zcs->targetDictSize>>10));
+    zcs->targetSectionSize = zcs->sectionSize ? zcs->sectionSize : (size_t)1 << (zcs->params.cParams.windowLog + 2);
+    zcs->targetSectionSize = MAX(ZSTDMT_SECTION_SIZE_MIN, zcs->targetSectionSize);
+    zcs->targetSectionSize = MAX(zcs->targetDictSize, zcs->targetSectionSize);
+    DEBUGLOG(4, "Section Size : %u KB", (U32)(zcs->targetSectionSize>>10));
+    zcs->marginSize = zcs->targetSectionSize >> 2;
+    zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize + zcs->marginSize;
+    zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
+    if (zcs->inBuff.buffer.start == NULL) return ERROR(memory_allocation);
+    zcs->inBuff.filled = 0;
+    zcs->dictSize = 0;
+    zcs->doneJobID = 0;
+    zcs->nextJobID = 0;
+    zcs->frameEnded = 0;
+    zcs->allJobsCompleted = 0;
+    if (params.fParams.checksumFlag) XXH64_reset(&zcs->xxhState, 0);
+    return 0;
+}
+
+size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
+                                const void* dict, size_t dictSize,
+                                ZSTD_parameters params, unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(5, "ZSTDMT_initCStream_advanced");
+    return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, NULL, params, pledgedSrcSize);
+}
+
+size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
+                               const ZSTD_CDict* cdict,
+                                     ZSTD_frameParameters fParams,
+                                     unsigned long long pledgedSrcSize)
+{
+    ZSTD_parameters params = ZSTD_getParamsFromCDict(cdict);
+    if (cdict==NULL) return ERROR(dictionary_wrong);   /* method incompatible with NULL cdict */
+    params.fParams = fParams;
+    return ZSTDMT_initCStream_internal(mtctx, NULL, 0 /*dictSize*/, cdict,
+                                        params, pledgedSrcSize);
+}
+
+
+/* ZSTDMT_resetCStream() :
+ * pledgedSrcSize is optional and can be zero == unknown */
+size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize)
+{
+    if (zcs->nbThreads==1)
+        return ZSTD_resetCStream(zcs->cctxPool->cctx[0], pledgedSrcSize);
+    return ZSTDMT_initCStream_internal(zcs, NULL, 0, 0, zcs->params, pledgedSrcSize);
+}
+
+size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) {
+    ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0);
+    return ZSTDMT_initCStream_internal(zcs, NULL, 0, NULL, params, 0);
+}
+
+
+static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsigned endFrame)
+{
+    size_t const dstBufferCapacity = ZSTD_compressBound(srcSize);
+    buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity);
+    ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool);
+    unsigned const jobID = zcs->nextJobID & zcs->jobIDMask;
+
+    if ((cctx==NULL) || (dstBuffer.start==NULL)) {
+        zcs->jobs[jobID].jobCompleted = 1;
+        zcs->nextJobID++;
+        ZSTDMT_waitForAllJobsCompleted(zcs);
+        ZSTDMT_releaseAllJobResources(zcs);
+        return ERROR(memory_allocation);
+    }
+
+    DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ",
+                zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize);
+    zcs->jobs[jobID].src = zcs->inBuff.buffer;
+    zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start;
+    zcs->jobs[jobID].srcSize = srcSize;
+    zcs->jobs[jobID].dictSize = zcs->dictSize;
+    assert(zcs->inBuff.filled >= srcSize + zcs->dictSize);
+    zcs->jobs[jobID].params = zcs->params;
+    /* do not calculate checksum within sections, but write it in header for first section */
+    if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0;
+    zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL;
+    zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize;
+    zcs->jobs[jobID].dstBuff = dstBuffer;
+    zcs->jobs[jobID].cctx = cctx;
+    zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0);
+    zcs->jobs[jobID].lastChunk = endFrame;
+    zcs->jobs[jobID].jobCompleted = 0;
+    zcs->jobs[jobID].dstFlushed = 0;
+    zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex;
+    zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond;
+
+    /* get a new buffer for next input */
+    if (!endFrame) {
+        size_t const newDictSize = MIN(srcSize + zcs->dictSize, zcs->targetDictSize);
+        DEBUGLOG(5, "ZSTDMT_createCompressionJob::endFrame = %u", endFrame);
+        zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
+        if (zcs->inBuff.buffer.start == NULL) {   /* not enough memory to allocate next input buffer */
+            zcs->jobs[jobID].jobCompleted = 1;
+            zcs->nextJobID++;
+            ZSTDMT_waitForAllJobsCompleted(zcs);
+            ZSTDMT_releaseAllJobResources(zcs);
+            return ERROR(memory_allocation);
+        }
+        DEBUGLOG(5, "inBuff currently filled to %u", (U32)zcs->inBuff.filled);
+        zcs->inBuff.filled -= srcSize + zcs->dictSize - newDictSize;
+        DEBUGLOG(5, "new job : inBuff filled to %u, with %u dict and %u src",
+                    (U32)zcs->inBuff.filled, (U32)newDictSize,
+                    (U32)(zcs->inBuff.filled - newDictSize));
+        memmove(zcs->inBuff.buffer.start,
+            (const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize,
+            zcs->inBuff.filled);
+        DEBUGLOG(5, "new inBuff pre-filled");
+        zcs->dictSize = newDictSize;
+    } else {   /* if (endFrame==1) */
+        DEBUGLOG(5, "ZSTDMT_createCompressionJob::endFrame = %u", endFrame);
+        zcs->inBuff.buffer = g_nullBuffer;
+        zcs->inBuff.filled = 0;
+        zcs->dictSize = 0;
+        zcs->frameEnded = 1;
+        if (zcs->nextJobID == 0)
+            /* single chunk exception : checksum is calculated directly within worker thread */
+            zcs->params.fParams.checksumFlag = 0;
+    }
+
+    DEBUGLOG(4, "posting job %u : %u bytes  (end:%u) (note : doneJob = %u=>%u)",
+                zcs->nextJobID,
+                (U32)zcs->jobs[jobID].srcSize,
+                zcs->jobs[jobID].lastChunk,
+                zcs->doneJobID,
+                zcs->doneJobID & zcs->jobIDMask);
+    POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]);   /* this call is blocking when thread worker pool is exhausted */
+    zcs->nextJobID++;
+    return 0;
+}
+
+
+/* ZSTDMT_flushNextJob() :
+ * output : will be updated with amount of data flushed .
+ * blockToFlush : if >0, the function will block and wait if there is no data available to flush .
+ * @return : amount of data remaining within internal buffer, 1 if unknown but > 0, 0 if no more, or an error code */
+static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush)
+{
+    unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask;
+    if (zcs->doneJobID == zcs->nextJobID) return 0;   /* all flushed ! */
+    PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
+    while (zcs->jobs[wJobID].jobCompleted==0) {
+        DEBUGLOG(5, "waiting for jobCompleted signal from job %u", zcs->doneJobID);
+        if (!blockToFlush) { pthread_mutex_unlock(&zcs->jobCompleted_mutex); return 0; }  /* nothing ready to be flushed => skip */
+        pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);  /* block when nothing available to flush */
+    }
+    pthread_mutex_unlock(&zcs->jobCompleted_mutex);
+    /* compression job completed : output can be flushed */
+    {   ZSTDMT_jobDescription job = zcs->jobs[wJobID];
+        if (!job.jobScanned) {
+            if (ZSTD_isError(job.cSize)) {
+                DEBUGLOG(5, "compression error detected ");
+                ZSTDMT_waitForAllJobsCompleted(zcs);
+                ZSTDMT_releaseAllJobResources(zcs);
+                return job.cSize;
+            }
+            ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx);
+            zcs->jobs[wJobID].cctx = NULL;
+            DEBUGLOG(5, "zcs->params.fParams.checksumFlag : %u ", zcs->params.fParams.checksumFlag);
+            if (zcs->params.fParams.checksumFlag) {
+                XXH64_update(&zcs->xxhState, (const char*)job.srcStart + job.dictSize, job.srcSize);
+                if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) {  /* write checksum at end of last section */
+                    U32 const checksum = (U32)XXH64_digest(&zcs->xxhState);
+                    DEBUGLOG(5, "writing checksum : %08X \n", checksum);
+                    MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum);
+                    job.cSize += 4;
+                    zcs->jobs[wJobID].cSize += 4;
+            }   }
+            ZSTDMT_releaseBuffer(zcs->buffPool, job.src);
+            zcs->jobs[wJobID].srcStart = NULL;
+            zcs->jobs[wJobID].src = g_nullBuffer;
+            zcs->jobs[wJobID].jobScanned = 1;
+        }
+        {   size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
+            DEBUGLOG(5, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID);
+            memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite);
+            output->pos += toWrite;
+            job.dstFlushed += toWrite;
+        }
+        if (job.dstFlushed == job.cSize) {   /* output buffer fully flushed => move to next one */
+            ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff);
+            zcs->jobs[wJobID].dstBuff = g_nullBuffer;
+            zcs->jobs[wJobID].jobCompleted = 0;
+            zcs->doneJobID++;
+        } else {
+            zcs->jobs[wJobID].dstFlushed = job.dstFlushed;
+        }
+        /* return value : how many bytes left in buffer ; fake it to 1 if unknown but >0 */
+        if (job.cSize > job.dstFlushed) return (job.cSize - job.dstFlushed);
+        if (zcs->doneJobID < zcs->nextJobID) return 1;   /* still some buffer to flush */
+        zcs->allJobsCompleted = zcs->frameEnded;   /* frame completed and entirely flushed */
+        return 0;   /* everything flushed */
+}   }
+
+
+/** ZSTDMT_compressStream_generic() :
+ *  internal use only
+ *  assumption : output and input are valid (pos <= size)
+ * @return : minimum amount of data remaining to flush, 0 if none */
+size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
+                                     ZSTD_outBuffer* output,
+                                     ZSTD_inBuffer* input,
+                                     ZSTD_EndDirective endOp)
+{
+    size_t const newJobThreshold = mtctx->dictSize + mtctx->targetSectionSize + mtctx->marginSize;
+    assert(output->pos <= output->size);
+    assert(input->pos  <= input->size);
+    if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) {
+        /* current frame being ended. Only flush/end are allowed. Or start new frame with init */
+        return ERROR(stage_wrong);
+    }
+    if (mtctx->nbThreads==1) {
+        return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp);
+    }
+
+    /* single-pass shortcut (note : this is blocking-mode) */
+    if ( (mtctx->nextJobID==0)      /* just started */
+      && (mtctx->inBuff.filled==0)  /* nothing buffered */
+      && (endOp==ZSTD_e_end)        /* end order */
+      && (output->size - output->pos >= ZSTD_compressBound(input->size - input->pos)) ) { /* enough room */
+        size_t const cSize = ZSTDMT_compress_advanced(mtctx,
+                (char*)output->dst + output->pos, output->size - output->pos,
+                (const char*)input->src + input->pos, input->size - input->pos,
+                mtctx->cdict, mtctx->params, mtctx->overlapRLog);
+        if (ZSTD_isError(cSize)) return cSize;
+        input->pos = input->size;
+        output->pos += cSize;
+        ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer);  /* was allocated in initStream */
+        mtctx->allJobsCompleted = 1;
+        mtctx->frameEnded = 1;
+        return 0;
+    }
+
+    /* fill input buffer */
+    if ((input->src) && (mtctx->inBuff.buffer.start)) {   /* support NULL input */
+        size_t const toLoad = MIN(input->size - input->pos, mtctx->inBuffSize - mtctx->inBuff.filled);
+        DEBUGLOG(2, "inBuff:%08X;  inBuffSize=%u;  ToCopy=%u", (U32)(size_t)mtctx->inBuff.buffer.start, (U32)mtctx->inBuffSize, (U32)toLoad);
+        memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad);
+        input->pos += toLoad;
+        mtctx->inBuff.filled += toLoad;
+    }
+
+    if ( (mtctx->inBuff.filled >= newJobThreshold)  /* filled enough : let's compress */
+      && (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) {   /* avoid overwriting job round buffer */
+        CHECK_F( ZSTDMT_createCompressionJob(mtctx, mtctx->targetSectionSize, 0 /* endFrame */) );
+    }
+
+    /* check for potential compressed data ready to be flushed */
+    CHECK_F( ZSTDMT_flushNextJob(mtctx, output, (mtctx->inBuff.filled == mtctx->inBuffSize) /* blockToFlush */) ); /* block if it wasn't possible to create new job due to saturation */
+
+    if (input->pos < input->size)  /* input not consumed : do not flush yet */
+        endOp = ZSTD_e_continue;
+
+    switch(endOp)
+    {
+        case ZSTD_e_flush:
+            return ZSTDMT_flushStream(mtctx, output);
+        case ZSTD_e_end:
+            return ZSTDMT_endStream(mtctx, output);
+        case ZSTD_e_continue:
+            return 1;
+        default:
+            return ERROR(GENERIC);   /* invalid endDirective */
+    }
+}
+
+
+size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
+{
+    CHECK_F( ZSTDMT_compressStream_generic(zcs, output, input, ZSTD_e_continue) );
+
+    /* recommended next input size : fill current input buffer */
+    return zcs->inBuffSize - zcs->inBuff.filled;   /* note : could be zero when input buffer is fully filled and no more availability to create new job */
+}
+
+
+static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned endFrame)
+{
+    size_t const srcSize = zcs->inBuff.filled - zcs->dictSize;
+
+    if ( ((srcSize > 0) || (endFrame && !zcs->frameEnded))
+       && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) {
+        CHECK_F( ZSTDMT_createCompressionJob(zcs, srcSize, endFrame) );
+    }
+
+    /* check if there is any data available to flush */
+    return ZSTDMT_flushNextJob(zcs, output, 1 /* blockToFlush */);
+}
+
+
+size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
+{
+    DEBUGLOG(5, "ZSTDMT_flushStream");
+    if (zcs->nbThreads==1)
+        return ZSTD_flushStream(zcs->cctxPool->cctx[0], output);
+    return ZSTDMT_flushStream_internal(zcs, output, 0 /* endFrame */);
+}
+
+size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
+{
+    DEBUGLOG(4, "ZSTDMT_endStream");
+    if (zcs->nbThreads==1)
+        return ZSTD_endStream(zcs->cctxPool->cctx[0], output);
+    return ZSTDMT_flushStream_internal(zcs, output, 1 /* endFrame */);
+}
diff --git a/contrib/libzstd/include/zstd/compress/zstdmt_compress.h b/contrib/libzstd/include/zstd/compress/zstdmt_compress.h
new file mode 100644
index 00000000000..fad63b6d861
--- /dev/null
+++ b/contrib/libzstd/include/zstd/compress/zstdmt_compress.h
@@ -0,0 +1,114 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+ #ifndef ZSTDMT_COMPRESS_H
+ #define ZSTDMT_COMPRESS_H
+
+ #if defined (__cplusplus)
+ extern "C" {
+ #endif
+
+
+/* Note : All prototypes defined in this file are labelled experimental.
+ *        No guarantee of API continuity is provided on any of them.
+ *        In fact, the expectation is that these prototypes will be replaced
+ *        by ZSTD_compress_generic() API in the near future */
+
+/* ===   Dependencies   === */
+#include <stddef.h>                /* size_t */
+#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_parameters */
+#include "zstd.h"            /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
+
+
+/* ===   Memory management   === */
+typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
+ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads);
+ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads,
+                                                    ZSTD_customMem cMem);
+ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
+
+ZSTDLIB_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
+
+
+/* ===   Simple buffer-to-butter one-pass function   === */
+
+ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
+                                       void* dst, size_t dstCapacity,
+                                 const void* src, size_t srcSize,
+                                       int compressionLevel);
+
+
+
+/* ===   Streaming functions   === */
+
+ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
+ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize);    /**< pledgedSrcSize is optional and can be zero == unknown */
+
+ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+
+ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output);   /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
+ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output);     /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
+
+
+/* ===   Advanced functions and parameters  === */
+
+#ifndef ZSTDMT_SECTION_SIZE_MIN
+#  define ZSTDMT_SECTION_SIZE_MIN (1U << 20)   /* 1 MB - Minimum size of each compression job */
+#endif
+
+ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
+                                           void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize,
+                                     const ZSTD_CDict* cdict,
+                                           ZSTD_parameters const params,
+                                           unsigned overlapRLog);
+
+ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
+                                        const void* dict, size_t dictSize,   /* dict can be released after init, a local copy is preserved within zcs */
+                                        ZSTD_parameters params,
+                                        unsigned long long pledgedSrcSize);  /* pledgedSrcSize is optional and can be zero == unknown */
+
+ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
+                                        const ZSTD_CDict* cdict,
+                                        ZSTD_frameParameters fparams,
+                                        unsigned long long pledgedSrcSize);  /* note : zero means empty */
+
+/* ZSDTMT_parameter :
+ * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
+typedef enum {
+    ZSTDMT_p_sectionSize,        /* size of input "section". Each section is compressed in parallel. 0 means default, which is dynamically determined within compression functions */
+    ZSTDMT_p_overlapSectionLog   /* Log of overlapped section; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window */
+} ZSDTMT_parameter;
+
+/* ZSTDMT_setMTCtxParameter() :
+ * allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter.
+ * The function must be called typically after ZSTD_createCCtx().
+ * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
+ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value);
+
+
+/*! ZSTDMT_compressStream_generic() :
+ *  Combines ZSTDMT_compressStream() with ZSTDMT_flushStream() or ZSTDMT_endStream()
+ *  depending on flush directive.
+ * @return : minimum amount of data still to be flushed
+ *           0 if fully flushed
+ *           or an error code */
+ZSTDLIB_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
+                                                ZSTD_outBuffer* output,
+                                                ZSTD_inBuffer* input,
+                                                ZSTD_EndDirective endOp);
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* ZSTDMT_COMPRESS_H */
diff --git a/contrib/libzstd/include/zstd/decompress/huf_decompress.c b/contrib/libzstd/include/zstd/decompress/huf_decompress.c
index e94fa83ccd7..2a1b70ea5ef 100644
--- a/contrib/libzstd/include/zstd/decompress/huf_decompress.c
+++ b/contrib/libzstd/include/zstd/decompress/huf_decompress.c
@@ -35,16 +35,19 @@
 /* **************************************************************
 *  Compiler specifics
 ****************************************************************/
-#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-/* inline is defined */
-#elif defined(_MSC_VER) || defined(__GNUC__)
-#  define inline __inline
-#else
-#  define inline /* disable inline */
-#endif
-
 #ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
 #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#else
+#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#    ifdef __GNUC__
+#      define FORCE_INLINE static inline __attribute__((always_inline))
+#    else
+#      define FORCE_INLINE static inline
+#    endif
+#  else
+#    define FORCE_INLINE static
+#  endif /* __STDC_VERSION__ */
 #endif
 
 
@@ -64,6 +67,12 @@
 #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
 
 
+/* **************************************************************
+*  Byte alignment for workSpace management
+****************************************************************/
+#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
+#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
+
 /*-***************************/
 /*  generic DTableDesc       */
 /*-***************************/
@@ -84,16 +93,28 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
 
 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2;   /* single-symbol decoding */
 
-size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize)
+size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
 {
-    BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
-    U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
     U32 tableLog = 0;
     U32 nbSymbols = 0;
     size_t iSize;
     void* const dtPtr = DTable + 1;
     HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
 
+    U32* rankVal;
+    BYTE* huffWeight;
+    size_t spaceUsed32 = 0;
+
+    rankVal = (U32 *)workSpace + spaceUsed32;
+    spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
+    huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
+    spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
+
+    if ((spaceUsed32 << 2) > wkspSize)
+        return ERROR(tableLog_tooLarge);
+    workSpace = (U32 *)workSpace + spaceUsed32;
+    wkspSize -= (spaceUsed32 << 2);
+
     HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
     /* memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */
 
@@ -102,16 +123,16 @@ size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize)
 
     /* Table header */
     {   DTableDesc dtd = HUF_getDTableDesc(DTable);
-        if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, huffman tree cannot fit in */
+        if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, Huffman tree cannot fit in */
         dtd.tableType = 0;
         dtd.tableLog = (BYTE)tableLog;
         memcpy(DTable, &dtd, sizeof(dtd));
     }
 
-    /* Prepare ranks */
+    /* Calculate starting value for each rank */
     {   U32 n, nextRankStart = 0;
         for (n=1; n<tableLog+1; n++) {
-            U32 current = nextRankStart;
+            U32 const current = nextRankStart;
             nextRankStart += (rankVal[n] << (n-1));
             rankVal[n] = current;
     }   }
@@ -121,17 +142,24 @@ size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize)
         for (n=0; n<nbSymbols; n++) {
             U32 const w = huffWeight[n];
             U32 const length = (1 << w) >> 1;
-            U32 i;
+            U32 u;
             HUF_DEltX2 D;
             D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
-            for (i = rankVal[w]; i < rankVal[w] + length; i++)
-                dt[i] = D;
+            for (u = rankVal[w]; u < rankVal[w] + length; u++)
+                dt[u] = D;
             rankVal[w] += length;
     }   }
 
     return iSize;
 }
 
+size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_readDTableX2_wksp(DTable, src, srcSize,
+                                 workSpace, sizeof(workSpace));
+}
+
 
 static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
 {
@@ -152,7 +180,7 @@ static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, con
     if (MEM_64bits()) \
         HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
 
-static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
+FORCE_INLINE size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
 {
     BYTE* const pStart = p;
 
@@ -209,11 +237,13 @@ size_t HUF_decompress1X2_usingDTable(
     return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
 }
 
-size_t HUF_decompress1X2_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
 {
     const BYTE* ip = (const BYTE*) cSrc;
 
-    size_t const hSize = HUF_readDTableX2 (DCtx, cSrc, cSrcSize);
+    size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
     if (HUF_isError(hSize)) return hSize;
     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
     ip += hSize; cSrcSize -= hSize;
@@ -221,6 +251,15 @@ size_t HUF_decompress1X2_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, cons
     return HUF_decompress1X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
 }
 
+
+size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                              const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
+                                       workSpace, sizeof(workSpace));
+}
+
 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
 {
     HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
@@ -332,11 +371,14 @@ size_t HUF_decompress4X2_usingDTable(
 }
 
 
-size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
 {
     const BYTE* ip = (const BYTE*) cSrc;
 
-    size_t const hSize = HUF_readDTableX2 (dctx, cSrc, cSrcSize);
+    size_t const hSize = HUF_readDTableX2_wksp (dctx, cSrc, cSrcSize,
+                                                workSpace, wkspSize);
     if (HUF_isError(hSize)) return hSize;
     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
     ip += hSize; cSrcSize -= hSize;
@@ -344,6 +386,13 @@ size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, cons
     return HUF_decompress4X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, dctx);
 }
 
+
+size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+                                       workSpace, sizeof(workSpace));
+}
 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
 {
     HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
@@ -358,13 +407,15 @@ typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4;  /* doubl
 
 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
 
+/* HUF_fillDTableX4Level2() :
+ * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
 static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed,
                            const U32* rankValOrigin, const int minWeight,
                            const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
                            U32 nbBitsBaseline, U16 baseSeq)
 {
     HUF_DEltX4 DElt;
-    U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
+    U32 rankVal[HUF_TABLELOG_MAX + 1];
 
     /* get pre-calculated rankVal */
     memcpy(rankVal, rankValOrigin, sizeof(rankVal));
@@ -398,14 +449,15 @@ static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 co
     }   }
 }
 
-typedef U32 rankVal_t[HUF_TABLELOG_ABSOLUTEMAX][HUF_TABLELOG_ABSOLUTEMAX + 1];
+typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
+typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
 
 static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
                            const sortedSymbol_t* sortedList, const U32 sortedListSize,
                            const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
                            const U32 nbBitsBaseline)
 {
-    U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
+    U32 rankVal[HUF_TABLELOG_MAX + 1];
     const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
     const U32 minBits  = nbBitsBaseline - maxWeight;
     U32 s;
@@ -442,23 +494,46 @@ static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
     }
 }
 
-size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize)
+size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
+                             size_t srcSize, void* workSpace,
+                             size_t wkspSize)
 {
-    BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
-    sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
-    U32 rankStats[HUF_TABLELOG_ABSOLUTEMAX + 1] = { 0 };
-    U32 rankStart0[HUF_TABLELOG_ABSOLUTEMAX + 2] = { 0 };
-    U32* const rankStart = rankStart0+1;
-    rankVal_t rankVal;
     U32 tableLog, maxW, sizeOfSort, nbSymbols;
     DTableDesc dtd = HUF_getDTableDesc(DTable);
     U32 const maxTableLog = dtd.maxTableLog;
     size_t iSize;
     void* dtPtr = DTable+1;   /* force compiler to avoid strict-aliasing */
     HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr;
+    U32 *rankStart;
 
-    HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable));   /* if compilation fails here, assertion is false */
-    if (maxTableLog > HUF_TABLELOG_ABSOLUTEMAX) return ERROR(tableLog_tooLarge);
+    rankValCol_t* rankVal;
+    U32* rankStats;
+    U32* rankStart0;
+    sortedSymbol_t* sortedSymbol;
+    BYTE* weightList;
+    size_t spaceUsed32 = 0;
+
+    rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
+    spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
+    rankStats = (U32 *)workSpace + spaceUsed32;
+    spaceUsed32 += HUF_TABLELOG_MAX + 1;
+    rankStart0 = (U32 *)workSpace + spaceUsed32;
+    spaceUsed32 += HUF_TABLELOG_MAX + 2;
+    sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
+    spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
+    weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
+    spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
+
+    if ((spaceUsed32 << 2) > wkspSize)
+        return ERROR(tableLog_tooLarge);
+    workSpace = (U32 *)workSpace + spaceUsed32;
+    wkspSize -= (spaceUsed32 << 2);
+
+    rankStart = rankStart0 + 1;
+    memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable));   /* if compiler fails here, assertion is wrong */
+    if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
     /* memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
 
     iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
@@ -522,6 +597,12 @@ size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize)
     return iSize;
 }
 
+size_t HUF_readDTableX4(HUF_DTable* DTable, const void* src, size_t srcSize)
+{
+  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+  return HUF_readDTableX4_wksp(DTable, src, srcSize,
+                               workSpace, sizeof(workSpace));
+}
 
 static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
 {
@@ -540,7 +621,8 @@ static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DE
         if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
             BIT_skipBits(DStream, dt[val].nbBits);
             if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
-                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+                /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
     }   }
     return 1;
 }
@@ -557,7 +639,7 @@ static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DE
     if (MEM_64bits()) \
         ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
 
-static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
+FORCE_INLINE size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
 {
     BYTE* const pStart = p;
 
@@ -621,11 +703,14 @@ size_t HUF_decompress1X4_usingDTable(
     return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
 }
 
-size_t HUF_decompress1X4_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
 {
     const BYTE* ip = (const BYTE*) cSrc;
 
-    size_t const hSize = HUF_readDTableX4 (DCtx, cSrc, cSrcSize);
+    size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize,
+                                               workSpace, wkspSize);
     if (HUF_isError(hSize)) return hSize;
     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
     ip += hSize; cSrcSize -= hSize;
@@ -633,6 +718,15 @@ size_t HUF_decompress1X4_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, cons
     return HUF_decompress1X4_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
 }
 
+
+size_t HUF_decompress1X4_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                              const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress1X4_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
+                                       workSpace, sizeof(workSpace));
+}
+
 size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
 {
     HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX);
@@ -743,11 +837,14 @@ size_t HUF_decompress4X4_usingDTable(
 }
 
 
-size_t HUF_decompress4X4_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
 {
     const BYTE* ip = (const BYTE*) cSrc;
 
-    size_t hSize = HUF_readDTableX4 (dctx, cSrc, cSrcSize);
+    size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize,
+                                         workSpace, wkspSize);
     if (HUF_isError(hSize)) return hSize;
     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
     ip += hSize; cSrcSize -= hSize;
@@ -755,6 +852,15 @@ size_t HUF_decompress4X4_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, cons
     return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
 }
 
+
+size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
+                              const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+                                       workSpace, sizeof(workSpace));
+}
+
 size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
 {
     HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX);
@@ -856,19 +962,32 @@ size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const
     }
 }
 
-size_t HUF_decompress4X_hufOnly (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+                                         workSpace, sizeof(workSpace));
+}
+
+
+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
+                                     size_t dstSize, const void* cSrc,
+                                     size_t cSrcSize, void* workSpace,
+                                     size_t wkspSize)
 {
     /* validation checks */
     if (dstSize == 0) return ERROR(dstSize_tooSmall);
     if ((cSrcSize >= dstSize) || (cSrcSize <= 1)) return ERROR(corruption_detected);   /* invalid */
 
     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
-        return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
-                        HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
+        return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
+                        HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
     }
 }
 
-size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                  const void* cSrc, size_t cSrcSize,
+                                  void* workSpace, size_t wkspSize)
 {
     /* validation checks */
     if (dstSize == 0) return ERROR(dstSize_tooSmall);
@@ -877,7 +996,17 @@ size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const
     if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
 
     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
-        return algoNb ? HUF_decompress1X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
-                        HUF_decompress1X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
+        return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize):
+                        HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize);
     }
 }
+
+size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
+                             const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+                                      workSpace, sizeof(workSpace));
+}
diff --git a/contrib/libzstd/include/zstd/decompress/zbuff_decompress.c b/contrib/libzstd/include/zstd/decompress/zbuff_decompress.c
deleted file mode 100644
index b20ee9705cf..00000000000
--- a/contrib/libzstd/include/zstd/decompress/zbuff_decompress.c
+++ /dev/null
@@ -1,252 +0,0 @@
-/**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
- */
-
-
-
-/* *************************************
-*  Dependencies
-***************************************/
-#include <stdlib.h>
-#include "error_private.h"
-#include "zstd_internal.h"  /* MIN, ZSTD_blockHeaderSize, ZSTD_BLOCKSIZE_MAX */
-#define ZBUFF_STATIC_LINKING_ONLY
-#include "zbuff.h"
-
-
-typedef enum { ZBUFFds_init, ZBUFFds_loadHeader,
-               ZBUFFds_read, ZBUFFds_load, ZBUFFds_flush } ZBUFF_dStage;
-
-/* *** Resource management *** */
-struct ZBUFF_DCtx_s {
-    ZSTD_DCtx* zd;
-    ZSTD_frameParams fParams;
-    ZBUFF_dStage stage;
-    char*  inBuff;
-    size_t inBuffSize;
-    size_t inPos;
-    char*  outBuff;
-    size_t outBuffSize;
-    size_t outStart;
-    size_t outEnd;
-    size_t blockSize;
-    BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
-    size_t lhSize;
-    ZSTD_customMem customMem;
-};   /* typedef'd to ZBUFF_DCtx within "zbuff.h" */
-
-
-ZBUFF_DCtx* ZBUFF_createDCtx(void)
-{
-    return ZBUFF_createDCtx_advanced(defaultCustomMem);
-}
-
-ZBUFF_DCtx* ZBUFF_createDCtx_advanced(ZSTD_customMem customMem)
-{
-    ZBUFF_DCtx* zbd;
-
-    if (!customMem.customAlloc && !customMem.customFree)
-        customMem = defaultCustomMem;
-
-    if (!customMem.customAlloc || !customMem.customFree)
-        return NULL;
-
-    zbd = (ZBUFF_DCtx*)customMem.customAlloc(customMem.opaque, sizeof(ZBUFF_DCtx));
-    if (zbd==NULL) return NULL;
-    memset(zbd, 0, sizeof(ZBUFF_DCtx));
-    memcpy(&zbd->customMem, &customMem, sizeof(ZSTD_customMem));
-    zbd->zd = ZSTD_createDCtx_advanced(customMem);
-    if (zbd->zd == NULL) { ZBUFF_freeDCtx(zbd); return NULL; }
-    zbd->stage = ZBUFFds_init;
-    return zbd;
-}
-
-size_t ZBUFF_freeDCtx(ZBUFF_DCtx* zbd)
-{
-    if (zbd==NULL) return 0;   /* support free on null */
-    ZSTD_freeDCtx(zbd->zd);
-    if (zbd->inBuff) zbd->customMem.customFree(zbd->customMem.opaque, zbd->inBuff);
-    if (zbd->outBuff) zbd->customMem.customFree(zbd->customMem.opaque, zbd->outBuff);
-    zbd->customMem.customFree(zbd->customMem.opaque, zbd);
-    return 0;
-}
-
-
-/* *** Initialization *** */
-
-size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* zbd, const void* dict, size_t dictSize)
-{
-    zbd->stage = ZBUFFds_loadHeader;
-    zbd->lhSize = zbd->inPos = zbd->outStart = zbd->outEnd = 0;
-    return ZSTD_decompressBegin_usingDict(zbd->zd, dict, dictSize);
-}
-
-size_t ZBUFF_decompressInit(ZBUFF_DCtx* zbd)
-{
-    return ZBUFF_decompressInitDictionary(zbd, NULL, 0);
-}
-
-
-/* internal util function */
-MEM_STATIC size_t ZBUFF_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
-{
-    size_t const length = MIN(dstCapacity, srcSize);
-    memcpy(dst, src, length);
-    return length;
-}
-
-
-/* *** Decompression *** */
-
-size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbd,
-                                void* dst, size_t* dstCapacityPtr,
-                          const void* src, size_t* srcSizePtr)
-{
-    const char* const istart = (const char*)src;
-    const char* const iend = istart + *srcSizePtr;
-    const char* ip = istart;
-    char* const ostart = (char*)dst;
-    char* const oend = ostart + *dstCapacityPtr;
-    char* op = ostart;
-    U32 someMoreWork = 1;
-
-    while (someMoreWork) {
-        switch(zbd->stage)
-        {
-        case ZBUFFds_init :
-            return ERROR(init_missing);
-
-        case ZBUFFds_loadHeader :
-            {   size_t const hSize = ZSTD_getFrameParams(&(zbd->fParams), zbd->headerBuffer, zbd->lhSize);
-                if (ZSTD_isError(hSize)) return hSize;
-                if (hSize != 0) {   /* need more input */
-                    size_t const toLoad = hSize - zbd->lhSize;   /* if hSize!=0, hSize > zbd->lhSize */
-                    if (toLoad > (size_t)(iend-ip)) {   /* not enough input to load full header */
-                        memcpy(zbd->headerBuffer + zbd->lhSize, ip, iend-ip);
-                        zbd->lhSize += iend-ip;
-                        *dstCapacityPtr = 0;
-                        return (hSize - zbd->lhSize) + ZSTD_blockHeaderSize;   /* remaining header bytes + next block header */
-                    }
-                    memcpy(zbd->headerBuffer + zbd->lhSize, ip, toLoad); zbd->lhSize = hSize; ip += toLoad;
-                    break;
-            }   }
-
-            /* Consume header */
-            {   size_t const h1Size = ZSTD_nextSrcSizeToDecompress(zbd->zd);  /* == ZSTD_frameHeaderSize_min */
-                size_t const h1Result = ZSTD_decompressContinue(zbd->zd, NULL, 0, zbd->headerBuffer, h1Size);
-                if (ZSTD_isError(h1Result)) return h1Result;   /* should not happen : already checked */
-                if (h1Size < zbd->lhSize) {   /* long header */
-                    size_t const h2Size = ZSTD_nextSrcSizeToDecompress(zbd->zd);
-                    size_t const h2Result = ZSTD_decompressContinue(zbd->zd, NULL, 0, zbd->headerBuffer+h1Size, h2Size);
-                    if (ZSTD_isError(h2Result)) return h2Result;
-            }   }
-
-            zbd->fParams.windowSize = MAX(zbd->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
-
-            /* Frame header instruct buffer sizes */
-            {   size_t const blockSize = MIN(zbd->fParams.windowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX);
-                size_t const neededOutSize = zbd->fParams.windowSize + blockSize;
-                zbd->blockSize = blockSize;
-                if (zbd->inBuffSize < blockSize) {
-                    zbd->customMem.customFree(zbd->customMem.opaque, zbd->inBuff);
-                    zbd->inBuffSize = blockSize;
-                    zbd->inBuff = (char*)zbd->customMem.customAlloc(zbd->customMem.opaque, blockSize);
-                    if (zbd->inBuff == NULL) return ERROR(memory_allocation);
-                }
-                if (zbd->outBuffSize < neededOutSize) {
-                    zbd->customMem.customFree(zbd->customMem.opaque, zbd->outBuff);
-                    zbd->outBuffSize = neededOutSize;
-                    zbd->outBuff = (char*)zbd->customMem.customAlloc(zbd->customMem.opaque, neededOutSize);
-                    if (zbd->outBuff == NULL) return ERROR(memory_allocation);
-            }   }
-            zbd->stage = ZBUFFds_read;
-            /* pass-through */
-
-        case ZBUFFds_read:
-            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zbd->zd);
-                if (neededInSize==0) {  /* end of frame */
-                    zbd->stage = ZBUFFds_init;
-                    someMoreWork = 0;
-                    break;
-                }
-                if ((size_t)(iend-ip) >= neededInSize) {  /* decode directly from src */
-                    const int isSkipFrame = ZSTD_isSkipFrame(zbd->zd);
-                    size_t const decodedSize = ZSTD_decompressContinue(zbd->zd,
-                        zbd->outBuff + zbd->outStart, (isSkipFrame ? 0 : zbd->outBuffSize - zbd->outStart),
-                        ip, neededInSize);
-                    if (ZSTD_isError(decodedSize)) return decodedSize;
-                    ip += neededInSize;
-                    if (!decodedSize && !isSkipFrame) break;   /* this was just a header */
-                    zbd->outEnd = zbd->outStart +  decodedSize;
-                    zbd->stage = ZBUFFds_flush;
-                    break;
-                }
-                if (ip==iend) { someMoreWork = 0; break; }   /* no more input */
-                zbd->stage = ZBUFFds_load;
-                /* pass-through */
-            }
-
-        case ZBUFFds_load:
-            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zbd->zd);
-                size_t const toLoad = neededInSize - zbd->inPos;   /* should always be <= remaining space within inBuff */
-                size_t loadedSize;
-                if (toLoad > zbd->inBuffSize - zbd->inPos) return ERROR(corruption_detected);   /* should never happen */
-                loadedSize = ZBUFF_limitCopy(zbd->inBuff + zbd->inPos, toLoad, ip, iend-ip);
-                ip += loadedSize;
-                zbd->inPos += loadedSize;
-                if (loadedSize < toLoad) { someMoreWork = 0; break; }   /* not enough input, wait for more */
-
-                /* decode loaded input */
-                {  const int isSkipFrame = ZSTD_isSkipFrame(zbd->zd);
-                   size_t const decodedSize = ZSTD_decompressContinue(zbd->zd,
-                        zbd->outBuff + zbd->outStart, zbd->outBuffSize - zbd->outStart,
-                        zbd->inBuff, neededInSize);
-                    if (ZSTD_isError(decodedSize)) return decodedSize;
-                    zbd->inPos = 0;   /* input is consumed */
-                    if (!decodedSize && !isSkipFrame) { zbd->stage = ZBUFFds_read; break; }   /* this was just a header */
-                    zbd->outEnd = zbd->outStart +  decodedSize;
-                    zbd->stage = ZBUFFds_flush;
-                    /* pass-through */
-            }   }
-
-        case ZBUFFds_flush:
-            {   size_t const toFlushSize = zbd->outEnd - zbd->outStart;
-                size_t const flushedSize = ZBUFF_limitCopy(op, oend-op, zbd->outBuff + zbd->outStart, toFlushSize);
-                op += flushedSize;
-                zbd->outStart += flushedSize;
-                if (flushedSize == toFlushSize) {  /* flush completed */
-                    zbd->stage = ZBUFFds_read;
-                    if (zbd->outStart + zbd->blockSize > zbd->outBuffSize)
-                        zbd->outStart = zbd->outEnd = 0;
-                    break;
-                }
-                /* cannot flush everything */
-                someMoreWork = 0;
-                break;
-            }
-        default: return ERROR(GENERIC);   /* impossible */
-    }   }
-
-    /* result */
-    *srcSizePtr = ip-istart;
-    *dstCapacityPtr = op-ostart;
-    {   size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zbd->zd);
-        if (!nextSrcSizeHint) return (zbd->outEnd != zbd->outStart);   /* return 0 only if fully flushed too */
-        nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zbd->zd) == ZSTDnit_block);
-        if (zbd->inPos > nextSrcSizeHint) return ERROR(GENERIC);   /* should never happen */
-        nextSrcSizeHint -= zbd->inPos;   /* already loaded*/
-        return nextSrcSizeHint;
-    }
-}
-
-
-/* *************************************
-*  Tool functions
-***************************************/
-size_t ZBUFF_recommendedDInSize(void)  { return ZSTD_BLOCKSIZE_ABSOLUTEMAX + ZSTD_blockHeaderSize /* block header size*/ ; }
-size_t ZBUFF_recommendedDOutSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
diff --git a/contrib/libzstd/include/zstd/decompress/zstd_decompress.c b/contrib/libzstd/include/zstd/decompress/zstd_decompress.c
index 47b5f42c781..003d703a5eb 100644
--- a/contrib/libzstd/include/zstd/decompress/zstd_decompress.c
+++ b/contrib/libzstd/include/zstd/decompress/zstd_decompress.c
@@ -34,7 +34,7 @@
 *  Frames requiring more memory will be rejected.
 */
 #ifndef ZSTD_MAXWINDOWSIZE_DEFAULT
-#  define ZSTD_MAXWINDOWSIZE_DEFAULT (257 << 20)   /* 257 MB */
+#  define ZSTD_MAXWINDOWSIZE_DEFAULT ((1 << ZSTD_WINDOWLOG_MAX) + 1)   /* defined within zstd.h */
 #endif
 
 
@@ -43,8 +43,6 @@
 *********************************************************/
 #include <string.h>      /* memcpy, memmove, memset */
 #include "mem.h"         /* low level memory routines */
-#define XXH_STATIC_LINKING_ONLY   /* XXH64_state_t */
-#include "xxhash.h"      /* XXH64_* */
 #define FSE_STATIC_LINKING_ONLY
 #include "fse.h"
 #define HUF_STATIC_LINKING_ONLY
@@ -55,9 +53,18 @@
 #  include "zstd_legacy.h"
 #endif
 
+#if defined(_MSC_VER) && !defined(_M_IA64)  /* _mm_prefetch() is not defined for ia64 */
+#  include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
+#  define ZSTD_PREFETCH(ptr)   _mm_prefetch((const char*)ptr, _MM_HINT_T0)
+#elif defined(__GNUC__)
+#  define ZSTD_PREFETCH(ptr)   __builtin_prefetch(ptr, 0, 0)
+#else
+#  define ZSTD_PREFETCH(ptr)   /* disabled */
+#endif
+
 
 /*-*************************************
-*  Macros
+*  Errors
 ***************************************/
 #define ZSTD_isError ERR_isError   /* for inlining */
 #define FSE_isError  ERR_isError
@@ -78,23 +85,31 @@ typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
                ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
                ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
 
+typedef enum { zdss_init=0, zdss_loadHeader,
+               zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
+
+typedef struct {
+    FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
+    FSE_DTable OFTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
+    FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
+    HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)];  /* can accommodate HUF_decompress4X */
+    U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    U32 rep[ZSTD_REP_NUM];
+} ZSTD_entropyTables_t;
+
 struct ZSTD_DCtx_s
 {
     const FSE_DTable* LLTptr;
     const FSE_DTable* MLTptr;
     const FSE_DTable* OFTptr;
     const HUF_DTable* HUFptr;
-    FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
-    FSE_DTable OFTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
-    FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
-    HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)];  /* can accommodate HUF_decompress4X */
-    const void* previousDstEnd;
-    const void* base;
-    const void* vBase;
-    const void* dictEnd;
+    ZSTD_entropyTables_t entropy;
+    const void* previousDstEnd;   /* detect continuity */
+    const void* base;             /* start of current segment */
+    const void* vBase;            /* virtual start of previous segment if it was just before current one */
+    const void* dictEnd;          /* end of previous segment */
     size_t expected;
-    U32 rep[ZSTD_REP_NUM];
-    ZSTD_frameParams fParams;
+    ZSTD_frameHeader fParams;
     blockType_e bType;   /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */
     ZSTD_dStage stage;
     U32 litEntropy;
@@ -104,14 +119,41 @@ struct ZSTD_DCtx_s
     U32 dictID;
     const BYTE* litPtr;
     ZSTD_customMem customMem;
-    size_t litBufSize;
     size_t litSize;
     size_t rleSize;
-    BYTE litBuffer[ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH];
+    size_t staticSize;
+
+    /* streaming */
+    ZSTD_DDict* ddictLocal;
+    const ZSTD_DDict* ddict;
+    ZSTD_dStreamStage streamStage;
+    char*  inBuff;
+    size_t inBuffSize;
+    size_t inPos;
+    size_t maxWindowSize;
+    char*  outBuff;
+    size_t outBuffSize;
+    size_t outStart;
+    size_t outEnd;
+    size_t blockSize;
+    size_t lhSize;
+    void* legacyContext;
+    U32 previousLegacyVersion;
+    U32 legacyVersion;
+    U32 hostageByte;
+
+    /* workspace */
+    BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
     BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
 };  /* typedef'd to ZSTD_DCtx within "zstd.h" */
 
-size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx) { if (dctx==NULL) return 0; return sizeof(ZSTD_DCtx); }  /* support sizeof on NULL */
+size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx)
+{
+    if (dctx==NULL) return 0;   /* support sizeof NULL */
+    return sizeof(*dctx)
+           + ZSTD_sizeof_DDict(dctx->ddictLocal)
+           + dctx->inBuffSize + dctx->outBuffSize;
+}
 
 size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }
 
@@ -123,67 +165,86 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
     dctx->base = NULL;
     dctx->vBase = NULL;
     dctx->dictEnd = NULL;
-    dctx->hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
+    dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
     dctx->litEntropy = dctx->fseEntropy = 0;
     dctx->dictID = 0;
-    MEM_STATIC_ASSERT(sizeof(dctx->rep) == sizeof(repStartValue));
-    memcpy(dctx->rep, repStartValue, sizeof(repStartValue));  /* initial repcodes */
-    dctx->LLTptr = dctx->LLTable;
-    dctx->MLTptr = dctx->MLTable;
-    dctx->OFTptr = dctx->OFTable;
-    dctx->HUFptr = dctx->hufTable;
+    MEM_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
+    memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue));  /* initial repcodes */
+    dctx->LLTptr = dctx->entropy.LLTable;
+    dctx->MLTptr = dctx->entropy.MLTable;
+    dctx->OFTptr = dctx->entropy.OFTable;
+    dctx->HUFptr = dctx->entropy.hufTable;
     return 0;
 }
 
+static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
+{
+    ZSTD_decompressBegin(dctx);   /* cannot fail */
+    dctx->staticSize = 0;
+    dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
+    dctx->ddict   = NULL;
+    dctx->ddictLocal = NULL;
+    dctx->inBuff  = NULL;
+    dctx->inBuffSize = 0;
+    dctx->outBuffSize= 0;
+    dctx->streamStage = zdss_init;
+}
+
 ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
 {
-    ZSTD_DCtx* dctx;
+    if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
 
-    if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
-    if (!customMem.customAlloc || !customMem.customFree) return NULL;
+    {   ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(*dctx), customMem);
+        if (!dctx) return NULL;
+        dctx->customMem = customMem;
+        dctx->legacyContext = NULL;
+        dctx->previousLegacyVersion = 0;
+        ZSTD_initDCtx_internal(dctx);
+        return dctx;
+    }
+}
 
-    dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(ZSTD_DCtx), customMem);
-    if (!dctx) return NULL;
-    memcpy(&dctx->customMem, &customMem, sizeof(customMem));
-    ZSTD_decompressBegin(dctx);
+ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*) workspace;
+
+    if ((size_t)workspace & 7) return NULL;  /* 8-aligned */
+    if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL;  /* minimum size */
+
+    ZSTD_initDCtx_internal(dctx);
+    dctx->staticSize = workspaceSize;
+    dctx->inBuff = (char*)(dctx+1);
     return dctx;
 }
 
 ZSTD_DCtx* ZSTD_createDCtx(void)
 {
-    return ZSTD_createDCtx_advanced(defaultCustomMem);
+    return ZSTD_createDCtx_advanced(ZSTD_defaultCMem);
 }
 
 size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
 {
     if (dctx==NULL) return 0;   /* support free on NULL */
-    ZSTD_free(dctx, dctx->customMem);
-    return 0;   /* reserved as a potential error code in the future */
+    if (dctx->staticSize) return ERROR(memory_allocation);   /* not compatible with static DCtx */
+    {   ZSTD_customMem const cMem = dctx->customMem;
+        ZSTD_freeDDict(dctx->ddictLocal);
+        dctx->ddictLocal = NULL;
+        ZSTD_free(dctx->inBuff, cMem);
+        dctx->inBuff = NULL;
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+        if (dctx->legacyContext)
+            ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion);
+#endif
+        ZSTD_free(dctx, cMem);
+        return 0;
+    }
 }
 
+/* no longer useful */
 void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
 {
-    size_t const workSpaceSize = (ZSTD_BLOCKSIZE_ABSOLUTEMAX+WILDCOPY_OVERLENGTH) + ZSTD_frameHeaderSize_max;
-    memcpy(dstDCtx, srcDCtx, sizeof(ZSTD_DCtx) - workSpaceSize);  /* no need to copy workspace */
-}
-
-static void ZSTD_refDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
-{
-    ZSTD_decompressBegin(dstDCtx);  /* init */
-    dstDCtx->dictEnd = srcDCtx->dictEnd;
-    dstDCtx->vBase = srcDCtx->vBase;
-    dstDCtx->base = srcDCtx->base;
-    dstDCtx->previousDstEnd = srcDCtx->previousDstEnd;
-    dstDCtx->dictID = srcDCtx->dictID;
-    dstDCtx->litEntropy = srcDCtx->litEntropy;
-    dstDCtx->fseEntropy = srcDCtx->fseEntropy;
-    dstDCtx->LLTptr = srcDCtx->LLTable;
-    dstDCtx->MLTptr = srcDCtx->MLTable;
-    dstDCtx->OFTptr = srcDCtx->OFTable;
-    dstDCtx->HUFptr = srcDCtx->hufTable;
-    dstDCtx->rep[0] = srcDCtx->rep[0];
-    dstDCtx->rep[1] = srcDCtx->rep[1];
-    dstDCtx->rep[2] = srcDCtx->rep[2];
+    size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx);
+    memcpy(dstDCtx, srcDCtx, toCopy);  /* no need to copy workspace */
 }
 
 
@@ -191,12 +252,29 @@ static void ZSTD_refDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
 *   Decompression section
 ***************************************************************/
 
-/* See compression format details in : zstd_compression_format.md */
+/*! ZSTD_isFrame() :
+ *  Tells if the content of `buffer` starts with a valid Frame Identifier.
+ *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
+ *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
+ *  Note 3 : Skippable Frame Identifiers are considered valid. */
+unsigned ZSTD_isFrame(const void* buffer, size_t size)
+{
+    if (size < 4) return 0;
+    {   U32 const magic = MEM_readLE32(buffer);
+        if (magic == ZSTD_MAGICNUMBER) return 1;
+        if ((magic & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
+    }
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+    if (ZSTD_isLegacy(buffer, size)) return 1;
+#endif
+    return 0;
+}
+
 
 /** ZSTD_frameHeaderSize() :
 *   srcSize must be >= ZSTD_frameHeaderSize_prefix.
 *   @return : size of the Frame Header */
-static size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
+size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
 {
     if (srcSize < ZSTD_frameHeaderSize_prefix) return ERROR(srcSize_wrong);
     {   BYTE const fhd = ((const BYTE*)src)[4];
@@ -209,22 +287,24 @@ static size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
 }
 
 
-/** ZSTD_getFrameParams() :
+/** ZSTD_getFrameHeader() :
 *   decode Frame Header, or require larger `srcSize`.
-*   @return : 0, `fparamsPtr` is correctly filled,
+*   @return : 0, `zfhPtr` is correctly filled,
 *            >0, `srcSize` is too small, result is expected `srcSize`,
 *             or an error code, which can be tested using ZSTD_isError() */
-size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize)
+size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize)
 {
     const BYTE* ip = (const BYTE*)src;
-
     if (srcSize < ZSTD_frameHeaderSize_prefix) return ZSTD_frameHeaderSize_prefix;
+
     if (MEM_readLE32(src) != ZSTD_MAGICNUMBER) {
         if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
-            if (srcSize < ZSTD_skippableHeaderSize) return ZSTD_skippableHeaderSize; /* magic number + skippable frame length */
-            memset(fparamsPtr, 0, sizeof(*fparamsPtr));
-            fparamsPtr->frameContentSize = MEM_readLE32((const char *)src + 4);
-            fparamsPtr->windowSize = 0; /* windowSize==0 means a frame is skippable */
+            /* skippable frame */
+            if (srcSize < ZSTD_skippableHeaderSize)
+                return ZSTD_skippableHeaderSize; /* magic number + frame length */
+            memset(zfhPtr, 0, sizeof(*zfhPtr));
+            zfhPtr->frameContentSize = MEM_readLE32((const char *)src + 4);
+            zfhPtr->windowSize = 0; /* windowSize==0 means a frame is skippable */
             return 0;
         }
         return ERROR(prefix_unknown);
@@ -244,11 +324,13 @@ size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t
         U32 windowSize = 0;
         U32 dictID = 0;
         U64 frameContentSize = 0;
-        if ((fhdByte & 0x08) != 0) return ERROR(frameParameter_unsupported);   /* reserved bits, which must be zero */
+        if ((fhdByte & 0x08) != 0)
+            return ERROR(frameParameter_unsupported);   /* reserved bits, must be zero */
         if (!singleSegment) {
             BYTE const wlByte = ip[pos++];
             U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
-            if (windowLog > ZSTD_WINDOWLOG_MAX) return ERROR(frameParameter_unsupported);
+            if (windowLog > ZSTD_WINDOWLOG_MAX)
+                return ERROR(frameParameter_windowTooLarge);
             windowSize = (1U << windowLog);
             windowSize += (windowSize >> 3) * (wlByte&7);
         }
@@ -270,15 +352,91 @@ size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t
             case 3 : frameContentSize = MEM_readLE64(ip+pos); break;
         }
         if (!windowSize) windowSize = (U32)frameContentSize;
-        if (windowSize > windowSizeMax) return ERROR(frameParameter_unsupported);
-        fparamsPtr->frameContentSize = frameContentSize;
-        fparamsPtr->windowSize = windowSize;
-        fparamsPtr->dictID = dictID;
-        fparamsPtr->checksumFlag = checksumFlag;
+        if (windowSize > windowSizeMax) return ERROR(frameParameter_windowTooLarge);
+        zfhPtr->frameContentSize = frameContentSize;
+        zfhPtr->windowSize = windowSize;
+        zfhPtr->dictID = dictID;
+        zfhPtr->checksumFlag = checksumFlag;
     }
     return 0;
 }
 
+/** ZSTD_getFrameContentSize() :
+*   compatible with legacy mode
+*   @return : decompressed size of the single frame pointed to be `src` if known, otherwise
+*             - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+*             - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */
+unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize)
+{
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+    if (ZSTD_isLegacy(src, srcSize)) {
+        unsigned long long const ret = ZSTD_getDecompressedSize_legacy(src, srcSize);
+        return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret;
+    }
+#endif
+    {   ZSTD_frameHeader fParams;
+        if (ZSTD_getFrameHeader(&fParams, src, srcSize) != 0) return ZSTD_CONTENTSIZE_ERROR;
+        if (fParams.windowSize == 0) {
+            /* Either skippable or empty frame, size == 0 either way */
+            return 0;
+        } else if (fParams.frameContentSize != 0) {
+            return fParams.frameContentSize;
+        } else {
+            return ZSTD_CONTENTSIZE_UNKNOWN;
+        }
+    }
+}
+
+/** ZSTD_findDecompressedSize() :
+ *  compatible with legacy mode
+ *  `srcSize` must be the exact length of some number of ZSTD compressed and/or
+ *      skippable frames
+ *  @return : decompressed size of the frames contained */
+unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
+{
+    unsigned long long totalDstSize = 0;
+
+    while (srcSize >= ZSTD_frameHeaderSize_prefix) {
+        const U32 magicNumber = MEM_readLE32(src);
+
+        if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
+            size_t skippableSize;
+            if (srcSize < ZSTD_skippableHeaderSize)
+                return ERROR(srcSize_wrong);
+            skippableSize = MEM_readLE32((const BYTE *)src + 4) +
+                            ZSTD_skippableHeaderSize;
+            if (srcSize < skippableSize) {
+                return ZSTD_CONTENTSIZE_ERROR;
+            }
+
+            src = (const BYTE *)src + skippableSize;
+            srcSize -= skippableSize;
+            continue;
+        }
+
+        {   unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
+            if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret;
+
+            /* check for overflow */
+            if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR;
+            totalDstSize += ret;
+        }
+        {   size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
+            if (ZSTD_isError(frameSrcSize)) {
+                return ZSTD_CONTENTSIZE_ERROR;
+            }
+
+            src = (const BYTE *)src + frameSrcSize;
+            srcSize -= frameSrcSize;
+        }
+    }
+
+    if (srcSize) {
+        return ZSTD_CONTENTSIZE_ERROR;
+    }
+
+    return totalDstSize;
+}
 
 /** ZSTD_getDecompressedSize() :
 *   compatible with legacy mode
@@ -289,26 +447,22 @@ size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t
                    - frame header not complete (`srcSize` too small) */
 unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize)
 {
-#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
-    if (ZSTD_isLegacy(src, srcSize)) return ZSTD_getDecompressedSize_legacy(src, srcSize);
-#endif
-    {   ZSTD_frameParams fparams;
-        size_t const frResult = ZSTD_getFrameParams(&fparams, src, srcSize);
-        if (frResult!=0) return 0;
-        return fparams.frameContentSize;
-    }
+    unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
+    return ret >= ZSTD_CONTENTSIZE_ERROR ? 0 : ret;
 }
 
 
 /** ZSTD_decodeFrameHeader() :
-*   `srcSize` must be the size provided by ZSTD_frameHeaderSize().
+*   `headerSize` must be the size provided by ZSTD_frameHeaderSize().
 *   @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
-static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t srcSize)
+static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize)
 {
-    size_t const result = ZSTD_getFrameParams(&(dctx->fParams), src, srcSize);
+    size_t const result = ZSTD_getFrameHeader(&(dctx->fParams), src, headerSize);
+    if (ZSTD_isError(result)) return result;  /* invalid header */
+    if (result>0) return ERROR(srcSize_wrong);   /* headerSize too small */
     if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID)) return ERROR(dictionary_wrong);
     if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0);
-    return result;
+    return 0;
 }
 
 
@@ -321,7 +475,8 @@ typedef struct
 
 /*! ZSTD_getcBlockSize() :
 *   Provides the size of compressed block from block header `src` */
-size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
+                          blockProperties_t* bpPtr)
 {
     if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
     {   U32 const cBlockHeader = MEM_readLE24(src);
@@ -336,7 +491,8 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bp
 }
 
 
-static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
+                          const void* src, size_t srcSize)
 {
     if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall);
     memcpy(dst, src, srcSize);
@@ -344,7 +500,9 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src,
 }
 
 
-static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, size_t regenSize)
+static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
+                         const void* src, size_t srcSize,
+                               size_t regenSize)
 {
     if (srcSize != 1) return ERROR(srcSize_wrong);
     if (regenSize > dstCapacity) return ERROR(dstSize_tooSmall);
@@ -395,7 +553,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                     litCSize = (lhc >> 22) + (istart[4] << 10);
                     break;
                 }
-                if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(corruption_detected);
+                if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
                 if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
 
                 if (HUF_isError((litEncType==set_repeat) ?
@@ -403,15 +561,17 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                                         HUF_decompress1X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr) :
                                         HUF_decompress4X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr) ) :
                                     ( singleStream ?
-                                        HUF_decompress1X2_DCtx(dctx->hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize) :
-                                        HUF_decompress4X_hufOnly (dctx->hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize)) ))
+                                        HUF_decompress1X2_DCtx_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
+                                                                    dctx->entropy.workspace, sizeof(dctx->entropy.workspace)) :
+                                        HUF_decompress4X_hufOnly_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
+                                                                      dctx->entropy.workspace, sizeof(dctx->entropy.workspace)))))
                     return ERROR(corruption_detected);
 
                 dctx->litPtr = dctx->litBuffer;
-                dctx->litBufSize = ZSTD_BLOCKSIZE_ABSOLUTEMAX+WILDCOPY_OVERLENGTH;
                 dctx->litSize = litSize;
                 dctx->litEntropy = 1;
-                if (litEncType==set_compressed) dctx->HUFptr = dctx->hufTable;
+                if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
+                memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
                 return litCSize + lhSize;
             }
 
@@ -438,13 +598,12 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                     if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
                     memcpy(dctx->litBuffer, istart+lhSize, litSize);
                     dctx->litPtr = dctx->litBuffer;
-                    dctx->litBufSize = ZSTD_BLOCKSIZE_ABSOLUTEMAX+8;
                     dctx->litSize = litSize;
+                    memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
                     return lhSize+litSize;
                 }
                 /* direct reference into compressed stream */
                 dctx->litPtr = istart+lhSize;
-                dctx->litBufSize = srcSize-lhSize;
                 dctx->litSize = litSize;
                 return lhSize+litSize;
             }
@@ -468,10 +627,9 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                     if (srcSize<4) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
                     break;
                 }
-                if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(corruption_detected);
-                memset(dctx->litBuffer, istart[lhSize], litSize);
+                if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
+                memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
                 dctx->litPtr = dctx->litBuffer;
-                dctx->litBufSize = ZSTD_BLOCKSIZE_ABSOLUTEMAX+WILDCOPY_OVERLENGTH;
                 dctx->litSize = litSize;
                 return lhSize+1;
             }
@@ -487,176 +645,70 @@ typedef union {
     U32 alignedBy4;
 } FSE_decode_t4;
 
+/* Default FSE distribution table for Literal Lengths */
 static const FSE_decode_t4 LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
     { { LL_DEFAULTNORMLOG, 1, 1 } }, /* header : tableLog, fastMode, fastMode */
-    { {  0,  0,  4 } },              /* 0 : base, symbol, bits */
-    { { 16,  0,  4 } },
-    { { 32,  1,  5 } },
-    { {  0,  3,  5 } },
-    { {  0,  4,  5 } },
-    { {  0,  6,  5 } },
-    { {  0,  7,  5 } },
-    { {  0,  9,  5 } },
-    { {  0, 10,  5 } },
-    { {  0, 12,  5 } },
-    { {  0, 14,  6 } },
-    { {  0, 16,  5 } },
-    { {  0, 18,  5 } },
-    { {  0, 19,  5 } },
-    { {  0, 21,  5 } },
-    { {  0, 22,  5 } },
-    { {  0, 24,  5 } },
-    { { 32, 25,  5 } },
-    { {  0, 26,  5 } },
-    { {  0, 27,  6 } },
-    { {  0, 29,  6 } },
-    { {  0, 31,  6 } },
-    { { 32,  0,  4 } },
-    { {  0,  1,  4 } },
-    { {  0,  2,  5 } },
-    { { 32,  4,  5 } },
-    { {  0,  5,  5 } },
-    { { 32,  7,  5 } },
-    { {  0,  8,  5 } },
-    { { 32, 10,  5 } },
-    { {  0, 11,  5 } },
-    { {  0, 13,  6 } },
-    { { 32, 16,  5 } },
-    { {  0, 17,  5 } },
-    { { 32, 19,  5 } },
-    { {  0, 20,  5 } },
-    { { 32, 22,  5 } },
-    { {  0, 23,  5 } },
-    { {  0, 25,  4 } },
-    { { 16, 25,  4 } },
-    { { 32, 26,  5 } },
-    { {  0, 28,  6 } },
-    { {  0, 30,  6 } },
-    { { 48,  0,  4 } },
-    { { 16,  1,  4 } },
-    { { 32,  2,  5 } },
-    { { 32,  3,  5 } },
-    { { 32,  5,  5 } },
-    { { 32,  6,  5 } },
-    { { 32,  8,  5 } },
-    { { 32,  9,  5 } },
-    { { 32, 11,  5 } },
-    { { 32, 12,  5 } },
-    { {  0, 15,  6 } },
-    { { 32, 17,  5 } },
-    { { 32, 18,  5 } },
-    { { 32, 20,  5 } },
-    { { 32, 21,  5 } },
-    { { 32, 23,  5 } },
-    { { 32, 24,  5 } },
-    { {  0, 35,  6 } },
-    { {  0, 34,  6 } },
-    { {  0, 33,  6 } },
-    { {  0, 32,  6 } },
+     /* base, symbol, bits */
+    { {  0,  0,  4 } }, { { 16,  0,  4 } }, { { 32,  1,  5 } }, { {  0,  3,  5 } },
+    { {  0,  4,  5 } }, { {  0,  6,  5 } }, { {  0,  7,  5 } }, { {  0,  9,  5 } },
+    { {  0, 10,  5 } }, { {  0, 12,  5 } }, { {  0, 14,  6 } }, { {  0, 16,  5 } },
+    { {  0, 18,  5 } }, { {  0, 19,  5 } }, { {  0, 21,  5 } }, { {  0, 22,  5 } },
+    { {  0, 24,  5 } }, { { 32, 25,  5 } }, { {  0, 26,  5 } }, { {  0, 27,  6 } },
+    { {  0, 29,  6 } }, { {  0, 31,  6 } }, { { 32,  0,  4 } }, { {  0,  1,  4 } },
+    { {  0,  2,  5 } }, { { 32,  4,  5 } }, { {  0,  5,  5 } }, { { 32,  7,  5 } },
+    { {  0,  8,  5 } }, { { 32, 10,  5 } }, { {  0, 11,  5 } }, { {  0, 13,  6 } },
+    { { 32, 16,  5 } }, { {  0, 17,  5 } }, { { 32, 19,  5 } }, { {  0, 20,  5 } },
+    { { 32, 22,  5 } }, { {  0, 23,  5 } }, { {  0, 25,  4 } }, { { 16, 25,  4 } },
+    { { 32, 26,  5 } }, { {  0, 28,  6 } }, { {  0, 30,  6 } }, { { 48,  0,  4 } },
+    { { 16,  1,  4 } }, { { 32,  2,  5 } }, { { 32,  3,  5 } }, { { 32,  5,  5 } },
+    { { 32,  6,  5 } }, { { 32,  8,  5 } }, { { 32,  9,  5 } }, { { 32, 11,  5 } },
+    { { 32, 12,  5 } }, { {  0, 15,  6 } }, { { 32, 17,  5 } }, { { 32, 18,  5 } },
+    { { 32, 20,  5 } }, { { 32, 21,  5 } }, { { 32, 23,  5 } }, { { 32, 24,  5 } },
+    { {  0, 35,  6 } }, { {  0, 34,  6 } }, { {  0, 33,  6 } }, { {  0, 32,  6 } },
 };   /* LL_defaultDTable */
 
+/* Default FSE distribution table for Match Lengths */
 static const FSE_decode_t4 ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
     { { ML_DEFAULTNORMLOG, 1, 1 } }, /* header : tableLog, fastMode, fastMode */
-    { {  0,  0,  6 } },              /* 0 : base, symbol, bits */
-    { {  0,  1,  4 } },
-    { { 32,  2,  5 } },
-    { {  0,  3,  5 } },
-    { {  0,  5,  5 } },
-    { {  0,  6,  5 } },
-    { {  0,  8,  5 } },
-    { {  0, 10,  6 } },
-    { {  0, 13,  6 } },
-    { {  0, 16,  6 } },
-    { {  0, 19,  6 } },
-    { {  0, 22,  6 } },
-    { {  0, 25,  6 } },
-    { {  0, 28,  6 } },
-    { {  0, 31,  6 } },
-    { {  0, 33,  6 } },
-    { {  0, 35,  6 } },
-    { {  0, 37,  6 } },
-    { {  0, 39,  6 } },
-    { {  0, 41,  6 } },
-    { {  0, 43,  6 } },
-    { {  0, 45,  6 } },
-    { { 16,  1,  4 } },
-    { {  0,  2,  4 } },
-    { { 32,  3,  5 } },
-    { {  0,  4,  5 } },
-    { { 32,  6,  5 } },
-    { {  0,  7,  5 } },
-    { {  0,  9,  6 } },
-    { {  0, 12,  6 } },
-    { {  0, 15,  6 } },
-    { {  0, 18,  6 } },
-    { {  0, 21,  6 } },
-    { {  0, 24,  6 } },
-    { {  0, 27,  6 } },
-    { {  0, 30,  6 } },
-    { {  0, 32,  6 } },
-    { {  0, 34,  6 } },
-    { {  0, 36,  6 } },
-    { {  0, 38,  6 } },
-    { {  0, 40,  6 } },
-    { {  0, 42,  6 } },
-    { {  0, 44,  6 } },
-    { { 32,  1,  4 } },
-    { { 48,  1,  4 } },
-    { { 16,  2,  4 } },
-    { { 32,  4,  5 } },
-    { { 32,  5,  5 } },
-    { { 32,  7,  5 } },
-    { { 32,  8,  5 } },
-    { {  0, 11,  6 } },
-    { {  0, 14,  6 } },
-    { {  0, 17,  6 } },
-    { {  0, 20,  6 } },
-    { {  0, 23,  6 } },
-    { {  0, 26,  6 } },
-    { {  0, 29,  6 } },
-    { {  0, 52,  6 } },
-    { {  0, 51,  6 } },
-    { {  0, 50,  6 } },
-    { {  0, 49,  6 } },
-    { {  0, 48,  6 } },
-    { {  0, 47,  6 } },
-    { {  0, 46,  6 } },
+    /* base, symbol, bits */
+    { {  0,  0,  6 } }, { {  0,  1,  4 } }, { { 32,  2,  5 } }, { {  0,  3,  5 } },
+    { {  0,  5,  5 } }, { {  0,  6,  5 } }, { {  0,  8,  5 } }, { {  0, 10,  6 } },
+    { {  0, 13,  6 } }, { {  0, 16,  6 } }, { {  0, 19,  6 } }, { {  0, 22,  6 } },
+    { {  0, 25,  6 } }, { {  0, 28,  6 } }, { {  0, 31,  6 } }, { {  0, 33,  6 } },
+    { {  0, 35,  6 } }, { {  0, 37,  6 } }, { {  0, 39,  6 } }, { {  0, 41,  6 } },
+    { {  0, 43,  6 } }, { {  0, 45,  6 } }, { { 16,  1,  4 } }, { {  0,  2,  4 } },
+    { { 32,  3,  5 } }, { {  0,  4,  5 } }, { { 32,  6,  5 } }, { {  0,  7,  5 } },
+    { {  0,  9,  6 } }, { {  0, 12,  6 } }, { {  0, 15,  6 } }, { {  0, 18,  6 } },
+    { {  0, 21,  6 } }, { {  0, 24,  6 } }, { {  0, 27,  6 } }, { {  0, 30,  6 } },
+    { {  0, 32,  6 } }, { {  0, 34,  6 } }, { {  0, 36,  6 } }, { {  0, 38,  6 } },
+    { {  0, 40,  6 } }, { {  0, 42,  6 } }, { {  0, 44,  6 } }, { { 32,  1,  4 } },
+    { { 48,  1,  4 } }, { { 16,  2,  4 } }, { { 32,  4,  5 } }, { { 32,  5,  5 } },
+    { { 32,  7,  5 } }, { { 32,  8,  5 } }, { {  0, 11,  6 } }, { {  0, 14,  6 } },
+    { {  0, 17,  6 } }, { {  0, 20,  6 } }, { {  0, 23,  6 } }, { {  0, 26,  6 } },
+    { {  0, 29,  6 } }, { {  0, 52,  6 } }, { {  0, 51,  6 } }, { {  0, 50,  6 } },
+    { {  0, 49,  6 } }, { {  0, 48,  6 } }, { {  0, 47,  6 } }, { {  0, 46,  6 } },
 };   /* ML_defaultDTable */
 
+/* Default FSE distribution table for Offset Codes */
 static const FSE_decode_t4 OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
     { { OF_DEFAULTNORMLOG, 1, 1 } }, /* header : tableLog, fastMode, fastMode */
-    { {  0,  0,  5 } },              /* 0 : base, symbol, bits */
-    { {  0,  6,  4 } },
-    { {  0,  9,  5 } },
-    { {  0, 15,  5 } },
-    { {  0, 21,  5 } },
-    { {  0,  3,  5 } },
-    { {  0,  7,  4 } },
-    { {  0, 12,  5 } },
-    { {  0, 18,  5 } },
-    { {  0, 23,  5 } },
-    { {  0,  5,  5 } },
-    { {  0,  8,  4 } },
-    { {  0, 14,  5 } },
-    { {  0, 20,  5 } },
-    { {  0,  2,  5 } },
-    { { 16,  7,  4 } },
-    { {  0, 11,  5 } },
-    { {  0, 17,  5 } },
-    { {  0, 22,  5 } },
-    { {  0,  4,  5 } },
-    { { 16,  8,  4 } },
-    { {  0, 13,  5 } },
-    { {  0, 19,  5 } },
-    { {  0,  1,  5 } },
-    { { 16,  6,  4 } },
-    { {  0, 10,  5 } },
-    { {  0, 16,  5 } },
-    { {  0, 28,  5 } },
-    { {  0, 27,  5 } },
-    { {  0, 26,  5 } },
-    { {  0, 25,  5 } },
-    { {  0, 24,  5 } },
+    /* base, symbol, bits */
+    { {  0,  0,  5 } }, { {  0,  6,  4 } },
+    { {  0,  9,  5 } }, { {  0, 15,  5 } },
+    { {  0, 21,  5 } }, { {  0,  3,  5 } },
+    { {  0,  7,  4 } }, { {  0, 12,  5 } },
+    { {  0, 18,  5 } }, { {  0, 23,  5 } },
+    { {  0,  5,  5 } }, { {  0,  8,  4 } },
+    { {  0, 14,  5 } }, { {  0, 20,  5 } },
+    { {  0,  2,  5 } }, { { 16,  7,  4 } },
+    { {  0, 11,  5 } }, { {  0, 17,  5 } },
+    { {  0, 22,  5 } }, { {  0,  4,  5 } },
+    { { 16,  8,  4 } }, { {  0, 13,  5 } },
+    { {  0, 19,  5 } }, { {  0,  1,  5 } },
+    { { 16,  6,  4 } }, { {  0, 10,  5 } },
+    { {  0, 16,  5 } }, { {  0, 28,  5 } },
+    { {  0, 27,  5 } }, { {  0, 26,  5 } },
+    { {  0, 25,  5 } }, { {  0, 24,  5 } },
 };   /* OF_defaultDTable */
 
 /*! ZSTD_buildSeqTable() :
@@ -702,6 +754,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
     const BYTE* const istart = (const BYTE* const)src;
     const BYTE* const iend = istart + srcSize;
     const BYTE* ip = istart;
+    DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
 
     /* check */
     if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
@@ -710,10 +763,13 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
     {   int nbSeq = *ip++;
         if (!nbSeq) { *nbSeqPtr=0; return 1; }
         if (nbSeq > 0x7F) {
-            if (nbSeq == 0xFF)
+            if (nbSeq == 0xFF) {
+                if (ip+2 > iend) return ERROR(srcSize_wrong);
                 nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
-            else
+            } else {
+                if (ip >= iend) return ERROR(srcSize_wrong);
                 nbSeq = ((nbSeq-0x80)<<8) + *ip++;
+            }
         }
         *nbSeqPtr = nbSeq;
     }
@@ -726,19 +782,19 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
         ip++;
 
         /* Build DTables */
-        {   size_t const llhSize = ZSTD_buildSeqTable(dctx->LLTable, &dctx->LLTptr,
+        {   size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
                                                       LLtype, MaxLL, LLFSELog,
                                                       ip, iend-ip, LL_defaultDTable, dctx->fseEntropy);
             if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
             ip += llhSize;
         }
-        {   size_t const ofhSize = ZSTD_buildSeqTable(dctx->OFTable, &dctx->OFTptr,
+        {   size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
                                                       OFtype, MaxOff, OffFSELog,
                                                       ip, iend-ip, OF_defaultDTable, dctx->fseEntropy);
             if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
             ip += ofhSize;
         }
-        {   size_t const mlhSize = ZSTD_buildSeqTable(dctx->MLTable, &dctx->MLTptr,
+        {   size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
                                                       MLtype, MaxML, MLFSELog,
                                                       ip, iend-ip, ML_defaultDTable, dctx->fseEntropy);
             if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
@@ -754,6 +810,7 @@ typedef struct {
     size_t litLength;
     size_t matchLength;
     size_t offset;
+    const BYTE* match;
 } seq_t;
 
 typedef struct {
@@ -762,88 +819,17 @@ typedef struct {
     FSE_DState_t stateOffb;
     FSE_DState_t stateML;
     size_t prevOffset[ZSTD_REP_NUM];
+    const BYTE* base;
+    size_t pos;
+    uPtrDiff gotoDict;
 } seqState_t;
 
 
-static seq_t ZSTD_decodeSequence(seqState_t* seqState)
-{
-    seq_t seq;
-
-    U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
-    U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
-    U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb);   /* <= maxOff, by table construction */
-
-    U32 const llBits = LL_bits[llCode];
-    U32 const mlBits = ML_bits[mlCode];
-    U32 const ofBits = ofCode;
-    U32 const totalBits = llBits+mlBits+ofBits;
-
-    static const U32 LL_base[MaxLL+1] = {
-                             0,  1,  2,  3,  4,  5,  6,  7,  8,  9,   10,    11,    12,    13,    14,     15,
-                            16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
-                            0x2000, 0x4000, 0x8000, 0x10000 };
-
-    static const U32 ML_base[MaxML+1] = {
-                             3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,   14,    15,    16,    17,    18,
-                            19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,   30,    31,    32,    33,    34,
-                            35, 37, 39, 41, 43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
-                            0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
-
-    static const U32 OF_base[MaxOff+1] = {
-                 0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
-                 0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
-                 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
-                 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD };
-
-    /* sequence */
-    {   size_t offset;
-        if (!ofCode)
-            offset = 0;
-        else {
-            offset = OF_base[ofCode] + BIT_readBits(&seqState->DStream, ofBits);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
-            if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
-        }
-
-        if (ofCode <= 1) {
-            offset += (llCode==0);
-            if (offset) {
-                size_t const temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
-                if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
-                seqState->prevOffset[1] = seqState->prevOffset[0];
-                seqState->prevOffset[0] = offset = temp;
-            } else {
-                offset = seqState->prevOffset[0];
-            }
-        } else {
-            seqState->prevOffset[2] = seqState->prevOffset[1];
-            seqState->prevOffset[1] = seqState->prevOffset[0];
-            seqState->prevOffset[0] = offset;
-        }
-        seq.offset = offset;
-    }
-
-    seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BIT_readBits(&seqState->DStream, mlBits) : 0);  /* <=  16 bits */
-    if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream);
-
-    seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBits(&seqState->DStream, llBits) : 0);    /* <=  16 bits */
-    if (MEM_32bits() ||
-       (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&seqState->DStream);
-
-    /* ANS state update */
-    FSE_updateState(&seqState->stateLL, &seqState->DStream);    /* <=  9 bits */
-    FSE_updateState(&seqState->stateML, &seqState->DStream);    /* <=  9 bits */
-    if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
-    FSE_updateState(&seqState->stateOffb, &seqState->DStream);  /* <=  8 bits */
-
-    return seq;
-}
-
-
-FORCE_INLINE
-size_t ZSTD_execSequence(BYTE* op,
-                                BYTE* const oend, seq_t sequence,
-                                const BYTE** litPtr, const BYTE* const litLimit_w,
-                                const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
+FORCE_NOINLINE
+size_t ZSTD_execSequenceLast7(BYTE* op,
+                              BYTE* const oend, seq_t sequence,
+                              const BYTE** litPtr, const BYTE* const litLimit,
+                              const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
 {
     BYTE* const oLitEnd = op + sequence.litLength;
     size_t const sequenceLength = sequence.litLength + sequence.matchLength;
@@ -853,15 +839,17 @@ size_t ZSTD_execSequence(BYTE* op,
     const BYTE* match = oLitEnd - sequence.offset;
 
     /* check */
-    if ((oLitEnd>oend_w) | (oMatchEnd>oend)) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
-    if (iLitEnd > litLimit_w) return ERROR(corruption_detected);   /* over-read beyond lit buffer */
+    if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
+    if (iLitEnd > litLimit) return ERROR(corruption_detected);   /* over-read beyond lit buffer */
+    if (oLitEnd <= oend_w) return ERROR(GENERIC);   /* Precondition */
 
-    /* copy Literals */
-    ZSTD_copy8(op, *litPtr);
-    if (sequence.litLength > 8)
-        ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8);   /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
-    op = oLitEnd;
-    *litPtr = iLitEnd;   /* update for next sequence */
+    /* copy literals */
+    if (op < oend_w) {
+        ZSTD_wildcopy(op, *litPtr, oend_w - op);
+        *litPtr += oend_w - op;
+        op = oend_w;
+    }
+    while (op < oLitEnd) *op++ = *(*litPtr)++;
 
     /* copy Match */
     if (sequence.offset > (size_t)(oLitEnd - base)) {
@@ -879,12 +867,151 @@ size_t ZSTD_execSequence(BYTE* op,
             sequence.matchLength -= length1;
             match = base;
     }   }
+    while (op < oMatchEnd) *op++ = *match++;
+    return sequenceLength;
+}
+
+
+static seq_t ZSTD_decodeSequence(seqState_t* seqState)
+{
+    seq_t seq;
+
+    U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
+    U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
+    U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb);   /* <= maxOff, by table construction */
+
+    U32 const llBits = LL_bits[llCode];
+    U32 const mlBits = ML_bits[mlCode];
+    U32 const ofBits = ofCode;
+    U32 const totalBits = llBits+mlBits+ofBits;
+
+    static const U32 LL_base[MaxLL+1] = {
+                             0,    1,    2,     3,     4,     5,     6,      7,
+                             8,    9,   10,    11,    12,    13,    14,     15,
+                            16,   18,   20,    22,    24,    28,    32,     40,
+                            48,   64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
+                            0x2000, 0x4000, 0x8000, 0x10000 };
+
+    static const U32 ML_base[MaxML+1] = {
+                             3,  4,  5,    6,     7,     8,     9,    10,
+                            11, 12, 13,   14,    15,    16,    17,    18,
+                            19, 20, 21,   22,    23,    24,    25,    26,
+                            27, 28, 29,   30,    31,    32,    33,    34,
+                            35, 37, 39,   41,    43,    47,    51,    59,
+                            67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
+                            0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
+
+    static const U32 OF_base[MaxOff+1] = {
+                     0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
+                     0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
+                     0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
+                     0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD };
+
+    /* sequence */
+    {   size_t offset;
+        if (!ofCode)
+            offset = 0;
+        else {
+            offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
+            if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
+        }
+
+        if (ofCode <= 1) {
+            offset += (llCode==0);
+            if (offset) {
+                size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
+                temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */
+                if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
+                seqState->prevOffset[1] = seqState->prevOffset[0];
+                seqState->prevOffset[0] = offset = temp;
+            } else {
+                offset = seqState->prevOffset[0];
+            }
+        } else {
+            seqState->prevOffset[2] = seqState->prevOffset[1];
+            seqState->prevOffset[1] = seqState->prevOffset[0];
+            seqState->prevOffset[0] = offset;
+        }
+        seq.offset = offset;
+    }
+
+    seq.matchLength = ML_base[mlCode]
+                    + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0);  /* <=  16 bits */
+    if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream);
+
+    seq.litLength = LL_base[llCode]
+                  + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0);    /* <=  16 bits */
+    if (  MEM_32bits()
+      || (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) )
+       BIT_reloadDStream(&seqState->DStream);
+
+    DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
+                (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
+
+    /* ANS state update */
+    FSE_updateState(&seqState->stateLL, &seqState->DStream);    /* <=  9 bits */
+    FSE_updateState(&seqState->stateML, &seqState->DStream);    /* <=  9 bits */
+    if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
+    FSE_updateState(&seqState->stateOffb, &seqState->DStream);  /* <=  8 bits */
+
+    return seq;
+}
+
+
+FORCE_INLINE
+size_t ZSTD_execSequence(BYTE* op,
+                         BYTE* const oend, seq_t sequence,
+                         const BYTE** litPtr, const BYTE* const litLimit,
+                         const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
+{
+    BYTE* const oLitEnd = op + sequence.litLength;
+    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
+    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+
+    /* check */
+    if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
+    if (iLitEnd > litLimit) return ERROR(corruption_detected);   /* over-read beyond lit buffer */
+    if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd);
+
+    /* copy Literals */
+    ZSTD_copy8(op, *litPtr);
+    if (sequence.litLength > 8)
+        ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8);   /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
+    op = oLitEnd;
+    *litPtr = iLitEnd;   /* update for next sequence */
+
+    /* copy Match */
+    if (sequence.offset > (size_t)(oLitEnd - base)) {
+        /* offset beyond prefix -> go into extDict */
+        if (sequence.offset > (size_t)(oLitEnd - vBase))
+            return ERROR(corruption_detected);
+        match = dictEnd + (match - base);
+        if (match + sequence.matchLength <= dictEnd) {
+            memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {   size_t const length1 = dictEnd - match;
+            memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = base;
+            if (op > oend_w || sequence.matchLength < MINMATCH) {
+              U32 i;
+              for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
+              return sequenceLength;
+            }
+    }   }
+    /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
 
     /* match within prefix */
     if (sequence.offset < 8) {
         /* close range match, overlap */
         static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
-        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* substracted */
+        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
         int const sub2 = dec64table[sequence.offset];
         op[0] = match[0];
         op[1] = match[1];
@@ -906,7 +1033,7 @@ size_t ZSTD_execSequence(BYTE* op,
         }
         while (op < oMatchEnd) *op++ = *match++;
     } else {
-        ZSTD_wildcopy(op, match, sequence.matchLength-8);   /* works even if matchLength < 8 */
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
     }
     return sequenceLength;
 }
@@ -923,15 +1050,17 @@ static size_t ZSTD_decompressSequences(
     BYTE* const oend = ostart + maxDstSize;
     BYTE* op = ostart;
     const BYTE* litPtr = dctx->litPtr;
-    const BYTE* const litLimit_w = litPtr + dctx->litBufSize - WILDCOPY_OVERLENGTH;
     const BYTE* const litEnd = litPtr + dctx->litSize;
     const BYTE* const base = (const BYTE*) (dctx->base);
     const BYTE* const vBase = (const BYTE*) (dctx->vBase);
     const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
     int nbSeq;
+    DEBUGLOG(5, "ZSTD_decompressSequences");
 
     /* Build Decoding Tables */
     {   size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize);
+        DEBUGLOG(5, "ZSTD_decodeSeqHeaders: size=%u, nbSeq=%i",
+                    (U32)seqHSize, nbSeq);
         if (ZSTD_isError(seqHSize)) return seqHSize;
         ip += seqHSize;
     }
@@ -940,7 +1069,7 @@ static size_t ZSTD_decompressSequences(
     if (nbSeq) {
         seqState_t seqState;
         dctx->fseEntropy = 1;
-        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->rep[i]; }
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
         CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
         FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
         FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
@@ -949,15 +1078,17 @@ static size_t ZSTD_decompressSequences(
         for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
             nbSeq--;
             {   seq_t const sequence = ZSTD_decodeSequence(&seqState);
-                size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_w, base, vBase, dictEnd);
+                size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd);
+                DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
                 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
                 op += oneSeqSize;
         }   }
 
         /* check if reached exact end */
+        DEBUGLOG(5, "after decode loop, remaining nbSeq : %i", nbSeq);
         if (nbSeq) return ERROR(corruption_detected);
         /* save reps for next block */
-        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->rep[i] = (U32)(seqState.prevOffset[i]); }
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
     }
 
     /* last literal segment */
@@ -971,6 +1102,297 @@ static size_t ZSTD_decompressSequences(
 }
 
 
+FORCE_INLINE seq_t ZSTD_decodeSequenceLong_generic(seqState_t* seqState, int const longOffsets)
+{
+    seq_t seq;
+
+    U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
+    U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
+    U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb);   /* <= maxOff, by table construction */
+
+    U32 const llBits = LL_bits[llCode];
+    U32 const mlBits = ML_bits[mlCode];
+    U32 const ofBits = ofCode;
+    U32 const totalBits = llBits+mlBits+ofBits;
+
+    static const U32 LL_base[MaxLL+1] = {
+                             0,  1,    2,     3,     4,     5,     6,      7,
+                             8,  9,   10,    11,    12,    13,    14,     15,
+                            16, 18,   20,    22,    24,    28,    32,     40,
+                            48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
+                            0x2000, 0x4000, 0x8000, 0x10000 };
+
+    static const U32 ML_base[MaxML+1] = {
+                             3,  4,  5,    6,     7,     8,     9,    10,
+                            11, 12, 13,   14,    15,    16,    17,    18,
+                            19, 20, 21,   22,    23,    24,    25,    26,
+                            27, 28, 29,   30,    31,    32,    33,    34,
+                            35, 37, 39,   41,    43,    47,    51,    59,
+                            67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
+                            0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
+
+    static const U32 OF_base[MaxOff+1] = {
+                     0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
+                     0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
+                     0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
+                     0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD };
+
+    /* sequence */
+    {   size_t offset;
+        if (!ofCode)
+            offset = 0;
+        else {
+            if (longOffsets) {
+                int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN);
+                offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
+                if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
+                if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
+            } else {
+                offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
+                if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
+            }
+        }
+
+        if (ofCode <= 1) {
+            offset += (llCode==0);
+            if (offset) {
+                size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
+                temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */
+                if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
+                seqState->prevOffset[1] = seqState->prevOffset[0];
+                seqState->prevOffset[0] = offset = temp;
+            } else {
+                offset = seqState->prevOffset[0];
+            }
+        } else {
+            seqState->prevOffset[2] = seqState->prevOffset[1];
+            seqState->prevOffset[1] = seqState->prevOffset[0];
+            seqState->prevOffset[0] = offset;
+        }
+        seq.offset = offset;
+    }
+
+    seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0);  /* <=  16 bits */
+    if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream);
+
+    seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0);    /* <=  16 bits */
+    if (MEM_32bits() ||
+       (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&seqState->DStream);
+
+    {   size_t const pos = seqState->pos + seq.litLength;
+        seq.match = seqState->base + pos - seq.offset;    /* single memory segment */
+        if (seq.offset > pos) seq.match += seqState->gotoDict;   /* separate memory segment */
+        seqState->pos = pos + seq.matchLength;
+    }
+
+    /* ANS state update */
+    FSE_updateState(&seqState->stateLL, &seqState->DStream);    /* <=  9 bits */
+    FSE_updateState(&seqState->stateML, &seqState->DStream);    /* <=  9 bits */
+    if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
+    FSE_updateState(&seqState->stateOffb, &seqState->DStream);  /* <=  8 bits */
+
+    return seq;
+}
+
+static seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, unsigned const windowSize) {
+    if (ZSTD_highbit32(windowSize) > STREAM_ACCUMULATOR_MIN) {
+        return ZSTD_decodeSequenceLong_generic(seqState, 1);
+    } else {
+        return ZSTD_decodeSequenceLong_generic(seqState, 0);
+    }
+}
+
+FORCE_INLINE
+size_t ZSTD_execSequenceLong(BYTE* op,
+                                BYTE* const oend, seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit,
+                                const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
+{
+    BYTE* const oLitEnd = op + sequence.litLength;
+    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
+    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+    const BYTE* match = sequence.match;
+
+    /* check */
+#if 1
+    if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
+    if (iLitEnd > litLimit) return ERROR(corruption_detected);   /* over-read beyond lit buffer */
+    if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd);
+#endif
+
+    /* copy Literals */
+    ZSTD_copy8(op, *litPtr);
+    if (sequence.litLength > 8)
+        ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8);   /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
+    op = oLitEnd;
+    *litPtr = iLitEnd;   /* update for next sequence */
+
+    /* copy Match */
+#if 1
+    if (sequence.offset > (size_t)(oLitEnd - base)) {
+        /* offset beyond prefix */
+        if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected);
+        if (match + sequence.matchLength <= dictEnd) {
+            memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {   size_t const length1 = dictEnd - match;
+            memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = base;
+            if (op > oend_w || sequence.matchLength < MINMATCH) {
+              U32 i;
+              for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
+              return sequenceLength;
+            }
+    }   }
+    /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
+#endif
+
+    /* match within prefix */
+    if (sequence.offset < 8) {
+        /* close range match, overlap */
+        static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
+        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
+        int const sub2 = dec64table[sequence.offset];
+        op[0] = match[0];
+        op[1] = match[1];
+        op[2] = match[2];
+        op[3] = match[3];
+        match += dec32table[sequence.offset];
+        ZSTD_copy4(op+4, match);
+        match -= sub2;
+    } else {
+        ZSTD_copy8(op, match);
+    }
+    op += 8; match += 8;
+
+    if (oMatchEnd > oend-(16-MINMATCH)) {
+        if (op < oend_w) {
+            ZSTD_wildcopy(op, match, oend_w - op);
+            match += oend_w - op;
+            op = oend_w;
+        }
+        while (op < oMatchEnd) *op++ = *match++;
+    } else {
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
+    }
+    return sequenceLength;
+}
+
+static size_t ZSTD_decompressSequencesLong(
+                               ZSTD_DCtx* dctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize)
+{
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const oend = ostart + maxDstSize;
+    BYTE* op = ostart;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    const BYTE* const base = (const BYTE*) (dctx->base);
+    const BYTE* const vBase = (const BYTE*) (dctx->vBase);
+    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+    unsigned const windowSize32 = (unsigned)dctx->fParams.windowSize;
+    int nbSeq;
+
+    /* Build Decoding Tables */
+    {   size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize);
+        if (ZSTD_isError(seqHSize)) return seqHSize;
+        ip += seqHSize;
+    }
+
+    /* Regen sequences */
+    if (nbSeq) {
+#define STORED_SEQS 4
+#define STOSEQ_MASK (STORED_SEQS-1)
+#define ADVANCED_SEQS 4
+        seq_t sequences[STORED_SEQS];
+        int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
+        seqState_t seqState;
+        int seqNb;
+        dctx->fseEntropy = 1;
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
+        seqState.base = base;
+        seqState.pos = (size_t)(op-base);
+        seqState.gotoDict = (uPtrDiff)dictEnd - (uPtrDiff)base; /* cast to avoid undefined behaviour */
+        CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
+        FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
+        FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
+        FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+
+        /* prepare in advance */
+        for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNb<seqAdvance; seqNb++) {
+            sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, windowSize32);
+        }
+        if (seqNb<seqAdvance) return ERROR(corruption_detected);
+
+        /* decode and decompress */
+        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb<nbSeq ; seqNb++) {
+            seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, windowSize32);
+            size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd);
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            ZSTD_PREFETCH(sequence.match);
+            sequences[seqNb&STOSEQ_MASK] = sequence;
+            op += oneSeqSize;
+        }
+        if (seqNb<nbSeq) return ERROR(corruption_detected);
+
+        /* finish queue */
+        seqNb -= seqAdvance;
+        for ( ; seqNb<nbSeq ; seqNb++) {
+            size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd);
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* save reps for next block */
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
+    }
+
+    /* last literal segment */
+    {   size_t const lastLLSize = litEnd - litPtr;
+        if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
+        memcpy(op, litPtr, lastLLSize);
+        op += lastLLSize;
+    }
+
+    return op-ostart;
+}
+
+
+static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
+                            void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{   /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+    DEBUGLOG(5, "ZSTD_decompressBlock_internal");
+
+    if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
+
+    /* Decode literals section */
+    {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
+        DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
+        if (ZSTD_isError(litCSize)) return litCSize;
+        ip += litCSize;
+        srcSize -= litCSize;
+    }
+    if (sizeof(size_t) > 4)  /* do not enable prefetching on 32-bits x86, as it's performance detrimental */
+                             /* likely because of register pressure */
+                             /* if that's the correct cause, then 32-bits ARM should be affected differently */
+                             /* it would be good to test this on ARM real hardware, to see if prefetch version improves speed */
+        if (dctx->fParams.windowSize > (1<<23))
+            return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize);
+    return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize);
+}
+
+
 static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
 {
     if (dst != dctx->previousDstEnd) {   /* not contiguous */
@@ -981,25 +1403,6 @@ static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
     }
 }
 
-
-static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
-                            void* dst, size_t dstCapacity,
-                      const void* src, size_t srcSize)
-{   /* blockType == blockCompressed */
-    const BYTE* ip = (const BYTE*)src;
-
-    if (srcSize >= ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(srcSize_wrong);
-
-    /* Decode literals sub-block */
-    {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
-        if (ZSTD_isError(litCSize)) return litCSize;
-        ip += litCSize;
-        srcSize -= litCSize;
-    }
-    return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize);
-}
-
-
 size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
                             void* dst, size_t dstCapacity,
                       const void* src, size_t srcSize)
@@ -1029,27 +1432,81 @@ size_t ZSTD_generateNxBytes(void* dst, size_t dstCapacity, BYTE byte, size_t len
     return length;
 }
 
+/** ZSTD_findFrameCompressedSize() :
+ *  compatible with legacy mode
+ *  `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame
+ *  `srcSize` must be at least as large as the frame contained
+ *  @return : the compressed size of the frame starting at `src` */
+size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
+{
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+    if (ZSTD_isLegacy(src, srcSize)) return ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
+#endif
+    if (srcSize >= ZSTD_skippableHeaderSize &&
+            (MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
+        return ZSTD_skippableHeaderSize + MEM_readLE32((const BYTE*)src + 4);
+    } else {
+        const BYTE* ip = (const BYTE*)src;
+        const BYTE* const ipstart = ip;
+        size_t remainingSize = srcSize;
+        ZSTD_frameHeader fParams;
+
+        size_t const headerSize = ZSTD_frameHeaderSize(ip, remainingSize);
+        if (ZSTD_isError(headerSize)) return headerSize;
+
+        /* Frame Header */
+        {   size_t const ret = ZSTD_getFrameHeader(&fParams, ip, remainingSize);
+            if (ZSTD_isError(ret)) return ret;
+            if (ret > 0) return ERROR(srcSize_wrong);
+        }
+
+        ip += headerSize;
+        remainingSize -= headerSize;
+
+        /* Loop on each block */
+        while (1) {
+            blockProperties_t blockProperties;
+            size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
+            if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+            if (ZSTD_blockHeaderSize + cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+            ip += ZSTD_blockHeaderSize + cBlockSize;
+            remainingSize -= ZSTD_blockHeaderSize + cBlockSize;
+
+            if (blockProperties.lastBlock) break;
+        }
+
+        if (fParams.checksumFlag) {   /* Frame content checksum */
+            if (remainingSize < 4) return ERROR(srcSize_wrong);
+            ip += 4;
+            remainingSize -= 4;
+        }
+
+        return ip - ipstart;
+    }
+}
 
 /*! ZSTD_decompressFrame() :
-*   `dctx` must be properly initialized */
+*   @dctx must be properly initialized */
 static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
                                  void* dst, size_t dstCapacity,
-                                 const void* src, size_t srcSize)
+                                 const void** srcPtr, size_t *srcSizePtr)
 {
-    const BYTE* ip = (const BYTE*)src;
+    const BYTE* ip = (const BYTE*)(*srcPtr);
     BYTE* const ostart = (BYTE* const)dst;
     BYTE* const oend = ostart + dstCapacity;
     BYTE* op = ostart;
-    size_t remainingSize = srcSize;
+    size_t remainingSize = *srcSizePtr;
 
     /* check */
-    if (srcSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    if (remainingSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
 
     /* Frame Header */
-    {   size_t const frameHeaderSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_prefix);
+    {   size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_frameHeaderSize_prefix);
         if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
-        if (srcSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
-        CHECK_F(ZSTD_decodeFrameHeader(dctx, src, frameHeaderSize));
+        if (remainingSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+        CHECK_F(ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize));
         ip += frameHeaderSize; remainingSize -= frameHeaderSize;
     }
 
@@ -1094,25 +1551,111 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
         if (remainingSize<4) return ERROR(checksum_wrong);
         checkRead = MEM_readLE32(ip);
         if (checkRead != checkCalc) return ERROR(checksum_wrong);
+        ip += 4;
         remainingSize -= 4;
     }
 
-    if (remainingSize) return ERROR(srcSize_wrong);
+    /* Allow caller to get size read */
+    *srcPtr = ip;
+    *srcSizePtr = remainingSize;
     return op-ostart;
 }
 
+static const void* ZSTD_DDictDictContent(const ZSTD_DDict* ddict);
+static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict);
+
+static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
+                                        void* dst, size_t dstCapacity,
+                                  const void* src, size_t srcSize,
+                                  const void *dict, size_t dictSize,
+                                  const ZSTD_DDict* ddict)
+{
+    void* const dststart = dst;
+
+    if (ddict) {
+        if (dict) {
+            /* programmer error, these two cases should be mutually exclusive */
+            return ERROR(GENERIC);
+        }
+
+        dict = ZSTD_DDictDictContent(ddict);
+        dictSize = ZSTD_DDictDictSize(ddict);
+    }
+
+    while (srcSize >= ZSTD_frameHeaderSize_prefix) {
+        U32 magicNumber;
+
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+        if (ZSTD_isLegacy(src, srcSize)) {
+            size_t decodedSize;
+            size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
+            if (ZSTD_isError(frameSize)) return frameSize;
+            /* legacy support is incompatible with static dctx */
+            if (dctx->staticSize) return ERROR(memory_allocation);
+
+            decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
+
+            dst = (BYTE*)dst + decodedSize;
+            dstCapacity -= decodedSize;
+
+            src = (const BYTE*)src + frameSize;
+            srcSize -= frameSize;
+
+            continue;
+        }
+#endif
+
+        magicNumber = MEM_readLE32(src);
+        if (magicNumber != ZSTD_MAGICNUMBER) {
+            if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
+                size_t skippableSize;
+                if (srcSize < ZSTD_skippableHeaderSize)
+                    return ERROR(srcSize_wrong);
+                skippableSize = MEM_readLE32((const BYTE *)src + 4) +
+                                ZSTD_skippableHeaderSize;
+                if (srcSize < skippableSize) {
+                    return ERROR(srcSize_wrong);
+                }
+
+                src = (const BYTE *)src + skippableSize;
+                srcSize -= skippableSize;
+                continue;
+            } else {
+                return ERROR(prefix_unknown);
+            }
+        }
+
+        if (ddict) {
+            /* we were called from ZSTD_decompress_usingDDict */
+            CHECK_F(ZSTD_decompressBegin_usingDDict(dctx, ddict));
+        } else {
+            /* this will initialize correctly with no dict if dict == NULL, so
+             * use this in all cases but ddict */
+            CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize));
+        }
+        ZSTD_checkContinuity(dctx, dst);
+
+        {   const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
+                                                    &src, &srcSize);
+            if (ZSTD_isError(res)) return res;
+            /* don't need to bounds check this, ZSTD_decompressFrame will have
+             * already */
+            dst = (BYTE*)dst + res;
+            dstCapacity -= res;
+        }
+    }
+
+    if (srcSize) return ERROR(srcSize_wrong); /* input not entirely consumed */
+
+    return (BYTE*)dst - (BYTE*)dststart;
+}
 
 size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
                                  void* dst, size_t dstCapacity,
                            const void* src, size_t srcSize,
                            const void* dict, size_t dictSize)
 {
-#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
-    if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, dict, dictSize);
-#endif
-    ZSTD_decompressBegin_usingDict(dctx, dict, dictSize);
-    ZSTD_checkContinuity(dctx, dst);
-    return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize);
+    return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL);
 }
 
 
@@ -1124,7 +1667,7 @@ size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const
 
 size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
-#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE==1)
+#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1)
     size_t regenSize;
     ZSTD_DCtx* const dctx = ZSTD_createDCtx();
     if (dctx==NULL) return ERROR(memory_allocation);
@@ -1148,6 +1691,7 @@ ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
     switch(dctx->stage)
     {
     default:   /* should not happen */
+        assert(0);
     case ZSTDds_getFrameHeaderSize:
     case ZSTDds_decodeFrameHeader:
         return ZSTDnit_frameHeader;
@@ -1165,21 +1709,24 @@ ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
     }
 }
 
-int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; }   /* for zbuff */
+static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; }
 
 /** ZSTD_decompressContinue() :
-*   @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity)
-*             or an error code, which can be tested using ZSTD_isError() */
+ *  srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress())
+ *  @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity)
+ *            or an error code, which can be tested using ZSTD_isError() */
 size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
+    DEBUGLOG(5, "ZSTD_decompressContinue");
     /* Sanity check */
-    if (srcSize != dctx->expected) return ERROR(srcSize_wrong);
+    if (srcSize != dctx->expected) return ERROR(srcSize_wrong);   /* unauthorized */
     if (dstCapacity) ZSTD_checkContinuity(dctx, dst);
 
     switch (dctx->stage)
     {
     case ZSTDds_getFrameHeaderSize :
-        if (srcSize != ZSTD_frameHeaderSize_prefix) return ERROR(srcSize_wrong);      /* impossible */
+        if (srcSize != ZSTD_frameHeaderSize_prefix) return ERROR(srcSize_wrong);      /* unauthorized */
+        assert(src != NULL);
         if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {        /* skippable frame */
             memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_prefix);
             dctx->expected = ZSTD_skippableHeaderSize - ZSTD_frameHeaderSize_prefix;  /* magic number + skippable frame length */
@@ -1197,6 +1744,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
         dctx->expected = 0;   /* not necessary to copy more */
 
     case ZSTDds_decodeFrameHeader:
+        assert(src != NULL);
         memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected);
         CHECK_F(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize));
         dctx->expected = ZSTD_blockHeaderSize;
@@ -1224,17 +1772,19 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
                     dctx->stage = ZSTDds_getFrameHeaderSize;
                 }
             } else {
-                dctx->expected = 3;  /* go directly to next header */
+                dctx->expected = ZSTD_blockHeaderSize;  /* jump to next header */
                 dctx->stage = ZSTDds_decodeBlockHeader;
             }
             return 0;
         }
     case ZSTDds_decompressLastBlock:
     case ZSTDds_decompressBlock:
+        DEBUGLOG(5, "case ZSTDds_decompressBlock");
         {   size_t rSize;
             switch(dctx->bType)
             {
             case bt_compressed:
+                DEBUGLOG(5, "case bt_compressed");
                 rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize);
                 break;
             case bt_raw :
@@ -1274,7 +1824,8 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
             return 0;
         }
     case ZSTDds_decodeSkippableHeader:
-        {   memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected);
+        {   assert(src != NULL);
+            memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected);
             dctx->expected = MEM_readLE32(dctx->headerBuffer + 4);
             dctx->stage = ZSTDds_skipFrame;
             return 0;
@@ -1299,47 +1850,61 @@ static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dict
     return 0;
 }
 
-static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* const dict, size_t const dictSize)
+/* ZSTD_loadEntropy() :
+ * dict : must point at beginning of a valid zstd dictionary
+ * @return : size of entropy tables read */
+static size_t ZSTD_loadEntropy(ZSTD_entropyTables_t* entropy, const void* const dict, size_t const dictSize)
 {
     const BYTE* dictPtr = (const BYTE*)dict;
     const BYTE* const dictEnd = dictPtr + dictSize;
 
-    {   size_t const hSize = HUF_readDTableX4(dctx->hufTable, dict, dictSize);
+    if (dictSize <= 8) return ERROR(dictionary_corrupted);
+    dictPtr += 8;   /* skip header = magic + dictID */
+
+
+    {   size_t const hSize = HUF_readDTableX4_wksp(
+            entropy->hufTable, dictPtr, dictEnd - dictPtr,
+            entropy->workspace, sizeof(entropy->workspace));
         if (HUF_isError(hSize)) return ERROR(dictionary_corrupted);
         dictPtr += hSize;
     }
 
     {   short offcodeNCount[MaxOff+1];
-        U32 offcodeMaxValue=MaxOff, offcodeLog=OffFSELog;
+        U32 offcodeMaxValue = MaxOff, offcodeLog;
         size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
-        CHECK_E(FSE_buildDTable(dctx->OFTable, offcodeNCount, offcodeMaxValue, offcodeLog), dictionary_corrupted);
+        if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
+        CHECK_E(FSE_buildDTable(entropy->OFTable, offcodeNCount, offcodeMaxValue, offcodeLog), dictionary_corrupted);
         dictPtr += offcodeHeaderSize;
     }
 
     {   short matchlengthNCount[MaxML+1];
-        unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog;
+        unsigned matchlengthMaxValue = MaxML, matchlengthLog;
         size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
-        CHECK_E(FSE_buildDTable(dctx->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog), dictionary_corrupted);
+        if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
+        CHECK_E(FSE_buildDTable(entropy->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog), dictionary_corrupted);
         dictPtr += matchlengthHeaderSize;
     }
 
     {   short litlengthNCount[MaxLL+1];
-        unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog;
+        unsigned litlengthMaxValue = MaxLL, litlengthLog;
         size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
-        CHECK_E(FSE_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog), dictionary_corrupted);
+        if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
+        CHECK_E(FSE_buildDTable(entropy->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog), dictionary_corrupted);
         dictPtr += litlengthHeaderSize;
     }
 
     if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
-    dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
-    dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
-    dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
-    dictPtr += 12;
+    {   int i;
+        size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
+        for (i=0; i<3; i++) {
+            U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
+            if (rep==0 || rep >= dictContentSize) return ERROR(dictionary_corrupted);
+            entropy->rep[i] = rep;
+    }   }
 
-    dctx->litEntropy = dctx->fseEntropy = 1;
     return dictPtr - (const BYTE*)dict;
 }
 
@@ -1347,19 +1912,18 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict
 {
     if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize);
     {   U32 const magic = MEM_readLE32(dict);
-        if (magic != ZSTD_DICT_MAGIC) {
+        if (magic != ZSTD_MAGIC_DICTIONARY) {
             return ZSTD_refDictContent(dctx, dict, dictSize);   /* pure content mode */
     }   }
     dctx->dictID = MEM_readLE32((const char*)dict + 4);
 
     /* load entropy tables */
-    dict = (const char*)dict + 8;
-    dictSize -= 8;
-    {   size_t const eSize = ZSTD_loadEntropy(dctx, dict, dictSize);
+    {   size_t const eSize = ZSTD_loadEntropy(&dctx->entropy, dict, dictSize);
         if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted);
         dict = (const char*)dict + eSize;
         dictSize -= eSize;
     }
+    dctx->litEntropy = dctx->fseEntropy = 1;
 
     /* reference dictionary content */
     return ZSTD_refDictContent(dctx, dict, dictSize);
@@ -1376,67 +1940,212 @@ size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t
 /* ======   ZSTD_DDict   ====== */
 
 struct ZSTD_DDict_s {
-    void* dict;
+    void* dictBuffer;
+    const void* dictContent;
     size_t dictSize;
-    ZSTD_DCtx* refContext;
+    ZSTD_entropyTables_t entropy;
+    U32 dictID;
+    U32 entropyPresent;
+    ZSTD_customMem cMem;
 };  /* typedef'd to ZSTD_DDict within "zstd.h" */
 
-ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_customMem customMem)
+static const void* ZSTD_DDictDictContent(const ZSTD_DDict* ddict)
 {
-    if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
-    if (!customMem.customAlloc || !customMem.customFree) return NULL;
+    return ddict->dictContent;
+}
+
+static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict)
+{
+    return ddict->dictSize;
+}
+
+size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dstDCtx, const ZSTD_DDict* ddict)
+{
+    CHECK_F(ZSTD_decompressBegin(dstDCtx));
+    if (ddict) {   /* support begin on NULL */
+        dstDCtx->dictID = ddict->dictID;
+        dstDCtx->base = ddict->dictContent;
+        dstDCtx->vBase = ddict->dictContent;
+        dstDCtx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
+        dstDCtx->previousDstEnd = dstDCtx->dictEnd;
+        if (ddict->entropyPresent) {
+            dstDCtx->litEntropy = 1;
+            dstDCtx->fseEntropy = 1;
+            dstDCtx->LLTptr = ddict->entropy.LLTable;
+            dstDCtx->MLTptr = ddict->entropy.MLTable;
+            dstDCtx->OFTptr = ddict->entropy.OFTable;
+            dstDCtx->HUFptr = ddict->entropy.hufTable;
+            dstDCtx->entropy.rep[0] = ddict->entropy.rep[0];
+            dstDCtx->entropy.rep[1] = ddict->entropy.rep[1];
+            dstDCtx->entropy.rep[2] = ddict->entropy.rep[2];
+        } else {
+            dstDCtx->litEntropy = 0;
+            dstDCtx->fseEntropy = 0;
+        }
+    }
+    return 0;
+}
+
+static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict)
+{
+    ddict->dictID = 0;
+    ddict->entropyPresent = 0;
+    if (ddict->dictSize < 8) return 0;
+    {   U32 const magic = MEM_readLE32(ddict->dictContent);
+        if (magic != ZSTD_MAGIC_DICTIONARY) return 0;   /* pure content mode */
+    }
+    ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + 4);
+
+    /* load entropy tables */
+    CHECK_E( ZSTD_loadEntropy(&ddict->entropy, ddict->dictContent, ddict->dictSize), dictionary_corrupted );
+    ddict->entropyPresent = 1;
+    return 0;
+}
+
+
+static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, const void* dict, size_t dictSize, unsigned byReference)
+{
+    if ((byReference) || (!dict) || (!dictSize)) {
+        ddict->dictBuffer = NULL;
+        ddict->dictContent = dict;
+    } else {
+        void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem);
+        ddict->dictBuffer = internalBuffer;
+        ddict->dictContent = internalBuffer;
+        if (!internalBuffer) return ERROR(memory_allocation);
+        memcpy(internalBuffer, dict, dictSize);
+    }
+    ddict->dictSize = dictSize;
+    ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
+
+    /* parse dictionary content */
+    CHECK_F( ZSTD_loadEntropy_inDDict(ddict) );
+
+    return 0;
+}
+
+ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, unsigned byReference, ZSTD_customMem customMem)
+{
+    if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
 
     {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
-        void* const dictContent = ZSTD_malloc(dictSize, customMem);
-        ZSTD_DCtx* const dctx = ZSTD_createDCtx_advanced(customMem);
+        if (!ddict) return NULL;
+        ddict->cMem = customMem;
 
-        if (!dictContent || !ddict || !dctx) {
-            ZSTD_free(dictContent, customMem);
-            ZSTD_free(ddict, customMem);
-            ZSTD_free(dctx, customMem);
+        if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, byReference) )) {
+            ZSTD_freeDDict(ddict);
             return NULL;
         }
 
-        memcpy(dictContent, dict, dictSize);
-        {   size_t const errorCode = ZSTD_decompressBegin_usingDict(dctx, dictContent, dictSize);
-            if (ZSTD_isError(errorCode)) {
-                ZSTD_free(dictContent, customMem);
-                ZSTD_free(ddict, customMem);
-                ZSTD_free(dctx, customMem);
-                return NULL;
-        }   }
-
-        ddict->dict = dictContent;
-        ddict->dictSize = dictSize;
-        ddict->refContext = dctx;
         return ddict;
     }
 }
 
 /*! ZSTD_createDDict() :
-*   Create a digested dictionary, ready to start decompression without startup delay.
-*   `dict` can be released after `ZSTD_DDict` creation */
+*   Create a digested dictionary, to start decompression without startup delay.
+*   `dict` content is copied inside DDict.
+*   Consequently, `dict` can be released after `ZSTD_DDict` creation */
 ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
 {
     ZSTD_customMem const allocator = { NULL, NULL, NULL };
-    return ZSTD_createDDict_advanced(dict, dictSize, allocator);
+    return ZSTD_createDDict_advanced(dict, dictSize, 0, allocator);
 }
 
+/*! ZSTD_createDDict_byReference() :
+ *  Create a digested dictionary, to start decompression without startup delay.
+ *  Dictionary content is simply referenced, it will be accessed during decompression.
+ *  Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
+ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
+{
+    ZSTD_customMem const allocator = { NULL, NULL, NULL };
+    return ZSTD_createDDict_advanced(dictBuffer, dictSize, 1, allocator);
+}
+
+
+ZSTD_DDict* ZSTD_initStaticDDict(void* workspace, size_t workspaceSize,
+                                 const void* dict, size_t dictSize,
+                                 unsigned byReference)
+{
+    size_t const neededSpace = sizeof(ZSTD_DDict) + (byReference ? 0 : dictSize);
+    ZSTD_DDict* const ddict = (ZSTD_DDict*)workspace;
+    assert(workspace != NULL);
+    assert(dict != NULL);
+    if ((size_t)workspace & 7) return NULL;  /* 8-aligned */
+    if (workspaceSize < neededSpace) return NULL;
+    if (!byReference) {
+        memcpy(ddict+1, dict, dictSize);  /* local copy */
+        dict = ddict+1;
+    }
+    if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, 1 /* byRef */) ))
+        return NULL;
+    return ddict;
+}
+
+
 size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
 {
     if (ddict==NULL) return 0;   /* support free on NULL */
-    {   ZSTD_customMem const cMem = ddict->refContext->customMem;
-        ZSTD_freeDCtx(ddict->refContext);
-        ZSTD_free(ddict->dict, cMem);
+    {   ZSTD_customMem const cMem = ddict->cMem;
+        ZSTD_free(ddict->dictBuffer, cMem);
         ZSTD_free(ddict, cMem);
         return 0;
     }
 }
 
+/*! ZSTD_estimateDDictSize() :
+ *  Estimate amount of memory that will be needed to create a dictionary for decompression.
+ *  Note : dictionary created "byReference" are smaller */
+size_t ZSTD_estimateDDictSize(size_t dictSize, unsigned byReference)
+{
+    return sizeof(ZSTD_DDict) + (byReference ? 0 : dictSize);
+}
+
 size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
 {
     if (ddict==NULL) return 0;   /* support sizeof on NULL */
-    return sizeof(*ddict) + sizeof(ddict->refContext) + ddict->dictSize;
+    return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
+}
+
+/*! ZSTD_getDictID_fromDict() :
+ *  Provides the dictID stored within dictionary.
+ *  if @return == 0, the dictionary is not conformant with Zstandard specification.
+ *  It can still be loaded, but as a content-only dictionary. */
+unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
+{
+    if (dictSize < 8) return 0;
+    if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0;
+    return MEM_readLE32((const char*)dict + 4);
+}
+
+/*! ZSTD_getDictID_fromDDict() :
+ *  Provides the dictID of the dictionary loaded into `ddict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
+{
+    if (ddict==NULL) return 0;
+    return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
+}
+
+/*! ZSTD_getDictID_fromFrame() :
+ *  Provides the dictID required to decompresse frame stored within `src`.
+ *  If @return == 0, the dictID could not be decoded.
+ *  This could for one of the following reasons :
+ *  - The frame does not require a dictionary (most common case).
+ *  - The frame was built with dictID intentionally removed.
+ *    Needed dictionary is a hidden information.
+ *    Note : this use case also happens when using a non-conformant dictionary.
+ *  - `srcSize` is too small, and as a result, frame header could not be decoded.
+ *    Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`.
+ *  - This is not a Zstandard frame.
+ *  When identifying the exact failure cause, it's possible to use
+ *  ZSTD_getFrameHeader(), which will provide a more precise error code. */
+unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize)
+{
+    ZSTD_frameHeader zfp = { 0 , 0 , 0 , 0 };
+    size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize);
+    if (ZSTD_isError(hError)) return 0;
+    return zfp.dictID;
 }
 
 
@@ -1448,12 +2157,10 @@ size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
                             const void* src, size_t srcSize,
                             const ZSTD_DDict* ddict)
 {
-#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
-    if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, ddict->dict, ddict->dictSize);
-#endif
-    ZSTD_refDCtx(dctx, ddict->refContext);
-    ZSTD_checkContinuity(dctx, dst);
-    return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize);
+    /* pass content and size in case legacy frames are encountered */
+    return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize,
+                                     NULL, 0,
+                                     ddict);
 }
 
 
@@ -1461,87 +2168,42 @@ size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
 *   Streaming decompression
 *====================================*/
 
-typedef enum { zdss_init, zdss_loadHeader,
-               zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
-
-/* *** Resource management *** */
-struct ZSTD_DStream_s {
-    ZSTD_DCtx* dctx;
-    ZSTD_DDict* ddict;
-    ZSTD_frameParams fParams;
-    ZSTD_dStreamStage stage;
-    char*  inBuff;
-    size_t inBuffSize;
-    size_t inPos;
-    size_t maxWindowSize;
-    char*  outBuff;
-    size_t outBuffSize;
-    size_t outStart;
-    size_t outEnd;
-    size_t blockSize;
-    BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];   /* tmp buffer to store frame header */
-    size_t lhSize;
-    ZSTD_customMem customMem;
-    void* legacyContext;
-    U32 previousLegacyVersion;
-    U32 legacyVersion;
-    U32 hostageByte;
-};   /* typedef'd to ZSTD_DStream within "zstd.h" */
-
-
 ZSTD_DStream* ZSTD_createDStream(void)
 {
-    return ZSTD_createDStream_advanced(defaultCustomMem);
+    return ZSTD_createDStream_advanced(ZSTD_defaultCMem);
+}
+
+ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize)
+{
+    return ZSTD_initStaticDCtx(workspace, workspaceSize);
 }
 
 ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem)
 {
-    ZSTD_DStream* zds;
-
-    if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
-    if (!customMem.customAlloc || !customMem.customFree) return NULL;
-
-    zds = (ZSTD_DStream*) ZSTD_malloc(sizeof(ZSTD_DStream), customMem);
-    if (zds==NULL) return NULL;
-    memset(zds, 0, sizeof(ZSTD_DStream));
-    memcpy(&zds->customMem, &customMem, sizeof(ZSTD_customMem));
-    zds->dctx = ZSTD_createDCtx_advanced(customMem);
-    if (zds->dctx == NULL) { ZSTD_freeDStream(zds); return NULL; }
-    zds->stage = zdss_init;
-    zds->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
-    return zds;
+    return ZSTD_createDCtx_advanced(customMem);
 }
 
 size_t ZSTD_freeDStream(ZSTD_DStream* zds)
 {
-    if (zds==NULL) return 0;   /* support free on null */
-    {   ZSTD_customMem const cMem = zds->customMem;
-        ZSTD_freeDCtx(zds->dctx);
-        ZSTD_freeDDict(zds->ddict);
-        ZSTD_free(zds->inBuff, cMem);
-        ZSTD_free(zds->outBuff, cMem);
-#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
-        if (zds->legacyContext)
-            ZSTD_freeLegacyStreamContext(zds->legacyContext, zds->previousLegacyVersion);
-#endif
-        ZSTD_free(zds, cMem);
-        return 0;
-    }
+    return ZSTD_freeDCtx(zds);
 }
 
 
 /* *** Initialization *** */
 
-size_t ZSTD_DStreamInSize(void)  { return ZSTD_BLOCKSIZE_ABSOLUTEMAX + ZSTD_blockHeaderSize; }
-size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
+size_t ZSTD_DStreamInSize(void)  { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; }
+size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
 
 size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
 {
-    zds->stage = zdss_loadHeader;
+    zds->streamStage = zdss_loadHeader;
     zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
-    ZSTD_freeDDict(zds->ddict);
-    zds->ddict = ZSTD_createDDict(dict, dictSize);
-    if (zds->ddict == NULL) return ERROR(memory_allocation);
+    ZSTD_freeDDict(zds->ddictLocal);
+    if (dict && dictSize >= 8) {
+        zds->ddictLocal = ZSTD_createDDict(dict, dictSize);
+        if (zds->ddictLocal == NULL) return ERROR(memory_allocation);
+    } else zds->ddictLocal = NULL;
+    zds->ddict = zds->ddictLocal;
     zds->legacyVersion = 0;
     zds->hostageByte = 0;
     return ZSTD_frameHeaderSize_prefix;
@@ -1552,10 +2214,18 @@ size_t ZSTD_initDStream(ZSTD_DStream* zds)
     return ZSTD_initDStream_usingDict(zds, NULL, 0);
 }
 
+/* ZSTD_initDStream_usingDDict() :
+ * ddict will just be referenced, and must outlive decompression session */
+size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict)
+{
+    size_t const initResult = ZSTD_initDStream(zds);
+    zds->ddict = ddict;
+    return initResult;
+}
+
 size_t ZSTD_resetDStream(ZSTD_DStream* zds)
 {
-    if (zds->ddict == NULL) return ERROR(stage_wrong);  /* must be init at least once */
-    zds->stage = zdss_loadHeader;
+    zds->streamStage = zdss_loadHeader;
     zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
     zds->legacyVersion = 0;
     zds->hostageByte = 0;
@@ -1568,7 +2238,7 @@ size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds,
     switch(paramType)
     {
         default : return ERROR(parameter_unknown);
-        case ZSTDdsp_maxWindowSize : zds->maxWindowSize = paramValue; break;
+        case DStream_p_maxWindowSize : zds->maxWindowSize = paramValue ? paramValue : (U32)(-1); break;
     }
     return 0;
 }
@@ -1576,8 +2246,24 @@ size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds,
 
 size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds)
 {
-    if (zds==NULL) return 0;   /* support sizeof on NULL */
-    return sizeof(*zds) + ZSTD_sizeof_DCtx(zds->dctx) + ZSTD_sizeof_DDict(zds->ddict) + zds->inBuffSize + zds->outBuffSize;
+    return ZSTD_sizeof_DCtx(zds);
+}
+
+size_t ZSTD_estimateDStreamSize(size_t windowSize)
+{
+    size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
+    size_t const inBuffSize = blockSize;  /* no block can be larger */
+    size_t const outBuffSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2);
+    return sizeof(ZSTD_DStream) + ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize;
+}
+
+ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize)
+{
+    ZSTD_frameHeader fh;
+    size_t const err = ZSTD_getFrameHeader(&fh, src, srcSize);
+    if (ZSTD_isError(err)) return err;
+    if (err>0) return ERROR(srcSize_wrong);
+    return ZSTD_estimateDStreamSize(fh.windowSize);
 }
 
 
@@ -1601,101 +2287,148 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
     char* op = ostart;
     U32 someMoreWork = 1;
 
+    DEBUGLOG(5, "ZSTD_decompressStream");
+    DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos));
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
-    if (zds->legacyVersion)
+    if (zds->legacyVersion) {
+        /* legacy support is incompatible with static dctx */
+        if (zds->staticSize) return ERROR(memory_allocation);
         return ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input);
+    }
 #endif
 
     while (someMoreWork) {
-        switch(zds->stage)
+        switch(zds->streamStage)
         {
         case zdss_init :
-            return ERROR(init_missing);
+            ZSTD_resetDStream(zds);   /* transparent reset on starting decoding a new frame */
+            /* fall-through */
 
         case zdss_loadHeader :
-            {   size_t const hSize = ZSTD_getFrameParams(&zds->fParams, zds->headerBuffer, zds->lhSize);
-                if (ZSTD_isError(hSize))
+            {   size_t const hSize = ZSTD_getFrameHeader(&zds->fParams, zds->headerBuffer, zds->lhSize);
+                if (ZSTD_isError(hSize)) {
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
-                {   U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
+                    U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
                     if (legacyVersion) {
+                        const void* const dict = zds->ddict ? zds->ddict->dictContent : NULL;
+                        size_t const dictSize = zds->ddict ? zds->ddict->dictSize : 0;
+                        /* legacy support is incompatible with static dctx */
+                        if (zds->staticSize) return ERROR(memory_allocation);
                         CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext, zds->previousLegacyVersion, legacyVersion,
-                                                       zds->ddict->dict, zds->ddict->dictSize));
+                                                       dict, dictSize));
                         zds->legacyVersion = zds->previousLegacyVersion = legacyVersion;
                         return ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input);
                     } else {
                         return hSize; /* error */
-                }   }
+                    }
 #else
                     return hSize;
 #endif
+                }
                 if (hSize != 0) {   /* need more input */
                     size_t const toLoad = hSize - zds->lhSize;   /* if hSize!=0, hSize > zds->lhSize */
                     if (toLoad > (size_t)(iend-ip)) {   /* not enough input to load full header */
-                        memcpy(zds->headerBuffer + zds->lhSize, ip, iend-ip);
-                        zds->lhSize += iend-ip;
+                        if (iend-ip > 0) {
+                            memcpy(zds->headerBuffer + zds->lhSize, ip, iend-ip);
+                            zds->lhSize += iend-ip;
+                        }
                         input->pos = input->size;
                         return (MAX(ZSTD_frameHeaderSize_min, hSize) - zds->lhSize) + ZSTD_blockHeaderSize;   /* remaining header bytes + next block header */
                     }
+                    assert(ip != NULL);
                     memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
                     break;
             }   }
 
-            /* Consume header */
-            ZSTD_refDCtx(zds->dctx, zds->ddict->refContext);
-            {   size_t const h1Size = ZSTD_nextSrcSizeToDecompress(zds->dctx);  /* == ZSTD_frameHeaderSize_prefix */
-                CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer, h1Size));
-                {   size_t const h2Size = ZSTD_nextSrcSizeToDecompress(zds->dctx);
-                    CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer+h1Size, h2Size));
+            /* check for single-pass mode opportunity */
+            if (zds->fParams.frameContentSize && zds->fParams.windowSize /* skippable frame if == 0 */
+                && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) {
+                size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart);
+                if (cSize <= (size_t)(iend-istart)) {
+                    size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, zds->ddict);
+                    if (ZSTD_isError(decompressedSize)) return decompressedSize;
+                    ip = istart + cSize;
+                    op += decompressedSize;
+                    zds->expected = 0;
+                    zds->streamStage = zdss_init;
+                    someMoreWork = 0;
+                    break;
             }   }
 
+            /* Consume header (see ZSTDds_decodeFrameHeader) */
+            DEBUGLOG(4, "Consume header");
+            CHECK_F(ZSTD_decompressBegin_usingDDict(zds, zds->ddict));
+
+            if ((MEM_readLE32(zds->headerBuffer) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {  /* skippable frame */
+                zds->expected = MEM_readLE32(zds->headerBuffer + 4);
+                zds->stage = ZSTDds_skipFrame;
+            } else {
+                CHECK_F(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize));
+                zds->expected = ZSTD_blockHeaderSize;
+                zds->stage = ZSTDds_decodeBlockHeader;
+            }
+
+            /* control buffer memory usage */
+            DEBUGLOG(4, "Control max buffer memory usage");
             zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
-            if (zds->fParams.windowSize > zds->maxWindowSize) return ERROR(frameParameter_unsupported);
+            if (zds->fParams.windowSize > zds->maxWindowSize) return ERROR(frameParameter_windowTooLarge);
 
             /* Adapt buffer sizes to frame header instructions */
-            {   size_t const blockSize = MIN(zds->fParams.windowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX);
-                size_t const neededOutSize = zds->fParams.windowSize + blockSize;
+            {   size_t const blockSize = MIN(zds->fParams.windowSize, ZSTD_BLOCKSIZE_MAX);
+                size_t const neededOutSize = zds->fParams.windowSize + blockSize + WILDCOPY_OVERLENGTH * 2;
                 zds->blockSize = blockSize;
-                if (zds->inBuffSize < blockSize) {
-                    ZSTD_free(zds->inBuff, zds->customMem);
+                if ((zds->inBuffSize < blockSize) || (zds->outBuffSize < neededOutSize)) {
+                    size_t const bufferSize = blockSize + neededOutSize;
+                    DEBUGLOG(4, "inBuff  : from %u to %u",
+                                (U32)zds->inBuffSize, (U32)blockSize);
+                    DEBUGLOG(4, "outBuff : from %u to %u",
+                                (U32)zds->outBuffSize, (U32)neededOutSize);
+                    if (zds->staticSize) {  /* static DCtx */
+                        DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize);
+                        assert(zds->staticSize >= sizeof(ZSTD_DCtx));  /* controlled at init */
+                        if (bufferSize > zds->staticSize - sizeof(ZSTD_DCtx))
+                            return ERROR(memory_allocation);
+                    } else {
+                        ZSTD_free(zds->inBuff, zds->customMem);
+                        zds->inBuffSize = 0;
+                        zds->outBuffSize = 0;
+                        zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem);
+                        if (zds->inBuff == NULL) return ERROR(memory_allocation);
+                    }
                     zds->inBuffSize = blockSize;
-                    zds->inBuff = (char*)ZSTD_malloc(blockSize, zds->customMem);
-                    if (zds->inBuff == NULL) return ERROR(memory_allocation);
-                }
-                if (zds->outBuffSize < neededOutSize) {
-                    ZSTD_free(zds->outBuff, zds->customMem);
+                    zds->outBuff = zds->inBuff + zds->inBuffSize;
                     zds->outBuffSize = neededOutSize;
-                    zds->outBuff = (char*)ZSTD_malloc(neededOutSize, zds->customMem);
-                    if (zds->outBuff == NULL) return ERROR(memory_allocation);
             }   }
-            zds->stage = zdss_read;
+            zds->streamStage = zdss_read;
             /* pass-through */
 
         case zdss_read:
-            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx);
+            DEBUGLOG(5, "stage zdss_read");
+            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
+                DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize);
                 if (neededInSize==0) {  /* end of frame */
-                    zds->stage = zdss_init;
+                    zds->streamStage = zdss_init;
                     someMoreWork = 0;
                     break;
                 }
                 if ((size_t)(iend-ip) >= neededInSize) {  /* decode directly from src */
-                    const int isSkipFrame = ZSTD_isSkipFrame(zds->dctx);
-                    size_t const decodedSize = ZSTD_decompressContinue(zds->dctx,
+                    int const isSkipFrame = ZSTD_isSkipFrame(zds);
+                    size_t const decodedSize = ZSTD_decompressContinue(zds,
                         zds->outBuff + zds->outStart, (isSkipFrame ? 0 : zds->outBuffSize - zds->outStart),
                         ip, neededInSize);
                     if (ZSTD_isError(decodedSize)) return decodedSize;
                     ip += neededInSize;
                     if (!decodedSize && !isSkipFrame) break;   /* this was just a header */
                     zds->outEnd = zds->outStart + decodedSize;
-                    zds->stage = zdss_flush;
+                    zds->streamStage = zdss_flush;
                     break;
-                }
-                if (ip==iend) { someMoreWork = 0; break; }   /* no more input */
-                zds->stage = zdss_load;
-                /* pass-through */
-            }
+            }   }
+            if (ip==iend) { someMoreWork = 0; break; }   /* no more input */
+            zds->streamStage = zdss_load;
+            /* pass-through */
 
         case zdss_load:
-            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx);
+            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
                 size_t const toLoad = neededInSize - zds->inPos;   /* should always be <= remaining space within inBuff */
                 size_t loadedSize;
                 if (toLoad > zds->inBuffSize - zds->inPos) return ERROR(corruption_detected);   /* should never happen */
@@ -1705,17 +2438,17 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                 if (loadedSize < toLoad) { someMoreWork = 0; break; }   /* not enough input, wait for more */
 
                 /* decode loaded input */
-                {  const int isSkipFrame = ZSTD_isSkipFrame(zds->dctx);
-                   size_t const decodedSize = ZSTD_decompressContinue(zds->dctx,
+                {  const int isSkipFrame = ZSTD_isSkipFrame(zds);
+                   size_t const decodedSize = ZSTD_decompressContinue(zds,
                         zds->outBuff + zds->outStart, zds->outBuffSize - zds->outStart,
                         zds->inBuff, neededInSize);
                     if (ZSTD_isError(decodedSize)) return decodedSize;
                     zds->inPos = 0;   /* input is consumed */
-                    if (!decodedSize && !isSkipFrame) { zds->stage = zdss_read; break; }   /* this was just a header */
+                    if (!decodedSize && !isSkipFrame) { zds->streamStage = zdss_read; break; }   /* this was just a header */
                     zds->outEnd = zds->outStart +  decodedSize;
-                    zds->stage = zdss_flush;
-                    /* pass-through */
             }   }
+            zds->streamStage = zdss_flush;
+            /* pass-through */
 
         case zdss_flush:
             {   size_t const toFlushSize = zds->outEnd - zds->outStart;
@@ -1723,37 +2456,41 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                 op += flushedSize;
                 zds->outStart += flushedSize;
                 if (flushedSize == toFlushSize) {  /* flush completed */
-                    zds->stage = zdss_read;
+                    zds->streamStage = zdss_read;
                     if (zds->outStart + zds->blockSize > zds->outBuffSize)
                         zds->outStart = zds->outEnd = 0;
                     break;
-                }
-                /* cannot complete flush */
-                someMoreWork = 0;
-                break;
-            }
+            }   }
+            /* cannot complete flush */
+            someMoreWork = 0;
+            break;
+
         default: return ERROR(GENERIC);   /* impossible */
     }   }
 
     /* result */
     input->pos += (size_t)(ip-istart);
     output->pos += (size_t)(op-ostart);
-    {   size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds->dctx);
+    {   size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds);
         if (!nextSrcSizeHint) {   /* frame fully decoded */
             if (zds->outEnd == zds->outStart) {  /* output fully flushed */
                 if (zds->hostageByte) {
-                    if (input->pos >= input->size) { zds->stage = zdss_read; return 1; }  /* can't release hostage (not present) */
+                    if (input->pos >= input->size) {
+                        /* can't release hostage (not present) */
+                        zds->streamStage = zdss_read;
+                        return 1;
+                    }
                     input->pos++;  /* release hostage */
-                }
+                }   /* zds->hostageByte */
                 return 0;
-            }
+            }  /* zds->outEnd == zds->outStart */
             if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */
                 input->pos--;   /* note : pos > 0, otherwise, impossible to finish reading last block */
                 zds->hostageByte=1;
             }
             return 1;
-        }
-        nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds->dctx) == ZSTDnit_block);   /* preload header of next block */
+        }  /* nextSrcSizeHint==0 */
+        nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block);   /* preload header of next block */
         if (zds->inPos > nextSrcSizeHint) return ERROR(GENERIC);   /* should never happen */
         nextSrcSizeHint -= zds->inPos;   /* already loaded*/
         return nextSrcSizeHint;
diff --git a/contrib/libzstd/include/zstd/common/zbuff.h b/contrib/libzstd/include/zstd/deprecated/zbuff.h
similarity index 66%
rename from contrib/libzstd/include/zstd/common/zbuff.h
rename to contrib/libzstd/include/zstd/deprecated/zbuff.h
index f99e0619763..f62091976c7 100644
--- a/contrib/libzstd/include/zstd/common/zbuff.h
+++ b/contrib/libzstd/include/zstd/deprecated/zbuff.h
@@ -9,35 +9,52 @@
 
 /* ***************************************************************
 *  NOTES/WARNINGS
-*****************************************************************/
-/* The streaming API defined here will soon be deprecated by the
-* new one in 'zstd.h'; consider migrating towards newer streaming
-* API. See 'lib/README.md'.
-*****************************************************************/
+******************************************************************/
+/* The streaming API defined here is deprecated.
+ * Consider migrating towards ZSTD_compressStream() API in `zstd.h`
+ * See 'lib/README.md'.
+ *****************************************************************/
 
-#ifndef ZSTD_BUFFERED_H_23987
-#define ZSTD_BUFFERED_H_23987
 
 #if defined (__cplusplus)
 extern "C" {
 #endif
 
+#ifndef ZSTD_BUFFERED_H_23987
+#define ZSTD_BUFFERED_H_23987
+
 /* *************************************
 *  Dependencies
 ***************************************/
 #include <stddef.h>      /* size_t */
+#include "zstd.h"        /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */
 
 
 /* ***************************************************************
 *  Compiler specifics
 *****************************************************************/
-/* ZSTD_DLL_EXPORT :
-*  Enable exporting of functions when building a Windows DLL */
-#if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
-#  define ZSTDLIB_API __declspec(dllexport)
+/* Deprecation warnings */
+/* Should these warnings be a problem,
+   it is generally possible to disable them,
+   typically with -Wno-deprecated-declarations for gcc
+   or _CRT_SECURE_NO_WARNINGS in Visual.
+   Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS */
+#ifdef ZBUFF_DISABLE_DEPRECATE_WARNINGS
+#  define ZBUFF_DEPRECATED(message) ZSTDLIB_API  /* disable deprecation warnings */
 #else
-#  define ZSTDLIB_API
-#endif
+#  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
+#    define ZBUFF_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API
+#  elif (defined(__GNUC__) && (__GNUC__ >= 5)) || defined(__clang__)
+#    define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message)))
+#  elif defined(__GNUC__) && (__GNUC__ >= 3)
+#    define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated))
+#  elif defined(_MSC_VER)
+#    define ZBUFF_DEPRECATED(message) ZSTDLIB_API __declspec(deprecated(message))
+#  else
+#    pragma message("WARNING: You need to implement ZBUFF_DEPRECATED for this compiler")
+#    define ZBUFF_DEPRECATED(message) ZSTDLIB_API
+#  endif
+#endif /* ZBUFF_DISABLE_DEPRECATE_WARNINGS */
 
 
 /* *************************************
@@ -49,16 +66,16 @@ extern "C" {
 *  ZBUFF and ZSTD are 100% interoperable,
 *  frames created by one can be decoded by the other one */
 
-typedef struct ZBUFF_CCtx_s ZBUFF_CCtx;
-ZSTDLIB_API ZBUFF_CCtx* ZBUFF_createCCtx(void);
-ZSTDLIB_API size_t      ZBUFF_freeCCtx(ZBUFF_CCtx* cctx);
+typedef ZSTD_CStream ZBUFF_CCtx;
+ZBUFF_DEPRECATED("use ZSTD_createCStream") ZBUFF_CCtx* ZBUFF_createCCtx(void);
+ZBUFF_DEPRECATED("use ZSTD_freeCStream")   size_t      ZBUFF_freeCCtx(ZBUFF_CCtx* cctx);
 
-ZSTDLIB_API size_t ZBUFF_compressInit(ZBUFF_CCtx* cctx, int compressionLevel);
-ZSTDLIB_API size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
+ZBUFF_DEPRECATED("use ZSTD_initCStream")           size_t ZBUFF_compressInit(ZBUFF_CCtx* cctx, int compressionLevel);
+ZBUFF_DEPRECATED("use ZSTD_initCStream_usingDict") size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
 
-ZSTDLIB_API size_t ZBUFF_compressContinue(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr, const void* src, size_t* srcSizePtr);
-ZSTDLIB_API size_t ZBUFF_compressFlush(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr);
-ZSTDLIB_API size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr);
+ZBUFF_DEPRECATED("use ZSTD_compressStream") size_t ZBUFF_compressContinue(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr, const void* src, size_t* srcSizePtr);
+ZBUFF_DEPRECATED("use ZSTD_flushStream")    size_t ZBUFF_compressFlush(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr);
+ZBUFF_DEPRECATED("use ZSTD_endStream")      size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr);
 
 /*-*************************************************
 *  Streaming compression - howto
@@ -101,14 +118,14 @@ ZSTDLIB_API size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* dstCap
 * **************************************************/
 
 
-typedef struct ZBUFF_DCtx_s ZBUFF_DCtx;
-ZSTDLIB_API ZBUFF_DCtx* ZBUFF_createDCtx(void);
-ZSTDLIB_API size_t      ZBUFF_freeDCtx(ZBUFF_DCtx* dctx);
+typedef ZSTD_DStream ZBUFF_DCtx;
+ZBUFF_DEPRECATED("use ZSTD_createDStream") ZBUFF_DCtx* ZBUFF_createDCtx(void);
+ZBUFF_DEPRECATED("use ZSTD_freeDStream")   size_t      ZBUFF_freeDCtx(ZBUFF_DCtx* dctx);
 
-ZSTDLIB_API size_t ZBUFF_decompressInit(ZBUFF_DCtx* dctx);
-ZSTDLIB_API size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* dctx, const void* dict, size_t dictSize);
+ZBUFF_DEPRECATED("use ZSTD_initDStream")           size_t ZBUFF_decompressInit(ZBUFF_DCtx* dctx);
+ZBUFF_DEPRECATED("use ZSTD_initDStream_usingDict") size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* dctx, const void* dict, size_t dictSize);
 
-ZSTDLIB_API size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx,
+ZBUFF_DEPRECATED("use ZSTD_decompressStream") size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx,
                                             void* dst, size_t* dstCapacityPtr,
                                       const void* src, size_t* srcSizePtr);
 
@@ -141,18 +158,22 @@ ZSTDLIB_API size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx,
 /* *************************************
 *  Tool functions
 ***************************************/
-ZSTDLIB_API unsigned ZBUFF_isError(size_t errorCode);
-ZSTDLIB_API const char* ZBUFF_getErrorName(size_t errorCode);
+ZBUFF_DEPRECATED("use ZSTD_isError")      unsigned ZBUFF_isError(size_t errorCode);
+ZBUFF_DEPRECATED("use ZSTD_getErrorName") const char* ZBUFF_getErrorName(size_t errorCode);
 
 /** Functions below provide recommended buffer sizes for Compression or Decompression operations.
 *   These sizes are just hints, they tend to offer better latency */
-ZSTDLIB_API size_t ZBUFF_recommendedCInSize(void);
-ZSTDLIB_API size_t ZBUFF_recommendedCOutSize(void);
-ZSTDLIB_API size_t ZBUFF_recommendedDInSize(void);
-ZSTDLIB_API size_t ZBUFF_recommendedDOutSize(void);
+ZBUFF_DEPRECATED("use ZSTD_CStreamInSize")  size_t ZBUFF_recommendedCInSize(void);
+ZBUFF_DEPRECATED("use ZSTD_CStreamOutSize") size_t ZBUFF_recommendedCOutSize(void);
+ZBUFF_DEPRECATED("use ZSTD_DStreamInSize")  size_t ZBUFF_recommendedDInSize(void);
+ZBUFF_DEPRECATED("use ZSTD_DStreamOutSize") size_t ZBUFF_recommendedDOutSize(void);
+
+#endif  /* ZSTD_BUFFERED_H_23987 */
 
 
 #ifdef ZBUFF_STATIC_LINKING_ONLY
+#ifndef ZBUFF_STATIC_H_30298098432
+#define ZBUFF_STATIC_H_30298098432
 
 /* ====================================================================================
  * The definitions in this section are considered experimental.
@@ -169,23 +190,23 @@ ZSTDLIB_API size_t ZBUFF_recommendedDOutSize(void);
 /*--- Custom memory allocator ---*/
 /*! ZBUFF_createCCtx_advanced() :
  *  Create a ZBUFF compression context using external alloc and free functions */
-ZSTDLIB_API ZBUFF_CCtx* ZBUFF_createCCtx_advanced(ZSTD_customMem customMem);
+ZBUFF_DEPRECATED("use ZSTD_createCStream_advanced") ZBUFF_CCtx* ZBUFF_createCCtx_advanced(ZSTD_customMem customMem);
 
 /*! ZBUFF_createDCtx_advanced() :
  *  Create a ZBUFF decompression context using external alloc and free functions */
-ZSTDLIB_API ZBUFF_DCtx* ZBUFF_createDCtx_advanced(ZSTD_customMem customMem);
+ZBUFF_DEPRECATED("use ZSTD_createDStream_advanced") ZBUFF_DCtx* ZBUFF_createDCtx_advanced(ZSTD_customMem customMem);
 
 
 /*--- Advanced Streaming Initialization ---*/
-ZSTDLIB_API size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
+ZBUFF_DEPRECATED("use ZSTD_initDStream_usingDict") size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
                                                const void* dict, size_t dictSize,
                                                ZSTD_parameters params, unsigned long long pledgedSrcSize);
 
-#endif /* ZBUFF_STATIC_LINKING_ONLY */
+
+#endif    /* ZBUFF_STATIC_H_30298098432 */
+#endif    /* ZBUFF_STATIC_LINKING_ONLY */
 
 
 #if defined (__cplusplus)
 }
 #endif
-
-#endif  /* ZSTD_BUFFERED_H_23987 */
diff --git a/contrib/libzstd/include/zstd/deprecated/zbuff_common.c b/contrib/libzstd/include/zstd/deprecated/zbuff_common.c
new file mode 100644
index 00000000000..9fff6eb2095
--- /dev/null
+++ b/contrib/libzstd/include/zstd/deprecated/zbuff_common.c
@@ -0,0 +1,26 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include "error_private.h"
+#include "zbuff.h"
+
+/*-****************************************
+*  ZBUFF Error Management  (deprecated)
+******************************************/
+
+/*! ZBUFF_isError() :
+*   tells if a return value is an error code */
+unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); }
+/*! ZBUFF_getErrorName() :
+*   provides error code string from function result (useful for debugging) */
+const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
+
diff --git a/contrib/libzstd/include/zstd/deprecated/zbuff_compress.c b/contrib/libzstd/include/zstd/deprecated/zbuff_compress.c
new file mode 100644
index 00000000000..5a37a0027ae
--- /dev/null
+++ b/contrib/libzstd/include/zstd/deprecated/zbuff_compress.c
@@ -0,0 +1,145 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+
+
+/* *************************************
+*  Dependencies
+***************************************/
+#define ZBUFF_STATIC_LINKING_ONLY
+#include "zbuff.h"
+
+
+/*-***********************************************************
+*  Streaming compression
+*
+*  A ZBUFF_CCtx object is required to track streaming operation.
+*  Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources.
+*  Use ZBUFF_compressInit() to start a new compression operation.
+*  ZBUFF_CCtx objects can be reused multiple times.
+*
+*  Use ZBUFF_compressContinue() repetitively to consume your input.
+*  *srcSizePtr and *dstCapacityPtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input.
+*  The content of dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change dst .
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  ZBUFF_compressFlush() can be used to instruct ZBUFF to compress and output whatever remains within its buffer.
+*  Note that it will not output more than *dstCapacityPtr.
+*  Therefore, some content might still be left into its internal buffer if dst buffer is too small.
+*  @return : nb of bytes still present into internal buffer (0 if it's empty)
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  ZBUFF_compressEnd() instructs to finish a frame.
+*  It will perform a flush and write frame epilogue.
+*  Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *dstCapacityPtr is too small.
+*  @return : nb of bytes still present into internal buffer (0 if it's empty)
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory)
+*  input : ZSTD_BLOCKSIZE_MAX (128 KB), internal unit size, it improves latency to use this value.
+*  output : ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + ZBUFF_endFrameSize : ensures it's always possible to write/flush/end a full block at best speed.
+* ***********************************************************/
+
+ZBUFF_CCtx* ZBUFF_createCCtx(void)
+{
+    return ZSTD_createCStream();
+}
+
+ZBUFF_CCtx* ZBUFF_createCCtx_advanced(ZSTD_customMem customMem)
+{
+    return ZSTD_createCStream_advanced(customMem);
+}
+
+size_t ZBUFF_freeCCtx(ZBUFF_CCtx* zbc)
+{
+    return ZSTD_freeCStream(zbc);
+}
+
+
+/* ======   Initialization   ====== */
+
+size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
+                                   const void* dict, size_t dictSize,
+                                   ZSTD_parameters params, unsigned long long pledgedSrcSize)
+{
+    return ZSTD_initCStream_advanced(zbc, dict, dictSize, params, pledgedSrcSize);
+}
+
+
+size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel)
+{
+    return ZSTD_initCStream_usingDict(zbc, dict, dictSize, compressionLevel);
+}
+
+size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel)
+{
+    return ZSTD_initCStream(zbc, compressionLevel);
+}
+
+/* ======   Compression   ====== */
+
+
+size_t ZBUFF_compressContinue(ZBUFF_CCtx* zbc,
+                              void* dst, size_t* dstCapacityPtr,
+                        const void* src, size_t* srcSizePtr)
+{
+    size_t result;
+    ZSTD_outBuffer outBuff;
+    ZSTD_inBuffer inBuff;
+    outBuff.dst = dst;
+    outBuff.pos = 0;
+    outBuff.size = *dstCapacityPtr;
+    inBuff.src = src;
+    inBuff.pos = 0;
+    inBuff.size = *srcSizePtr;
+    result = ZSTD_compressStream(zbc, &outBuff, &inBuff);
+    *dstCapacityPtr = outBuff.pos;
+    *srcSizePtr = inBuff.pos;
+    return result;
+}
+
+
+
+/* ======   Finalize   ====== */
+
+size_t ZBUFF_compressFlush(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr)
+{
+    size_t result;
+    ZSTD_outBuffer outBuff;
+    outBuff.dst = dst;
+    outBuff.pos = 0;
+    outBuff.size = *dstCapacityPtr;
+    result = ZSTD_flushStream(zbc, &outBuff);
+    *dstCapacityPtr = outBuff.pos;
+    return result;
+}
+
+
+size_t ZBUFF_compressEnd(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr)
+{
+    size_t result;
+    ZSTD_outBuffer outBuff;
+    outBuff.dst = dst;
+    outBuff.pos = 0;
+    outBuff.size = *dstCapacityPtr;
+    result = ZSTD_endStream(zbc, &outBuff);
+    *dstCapacityPtr = outBuff.pos;
+    return result;
+}
+
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+size_t ZBUFF_recommendedCInSize(void)  { return ZSTD_CStreamInSize(); }
+size_t ZBUFF_recommendedCOutSize(void) { return ZSTD_CStreamOutSize(); }
diff --git a/contrib/libzstd/include/zstd/deprecated/zbuff_decompress.c b/contrib/libzstd/include/zstd/deprecated/zbuff_decompress.c
new file mode 100644
index 00000000000..d9c155e08eb
--- /dev/null
+++ b/contrib/libzstd/include/zstd/deprecated/zbuff_decompress.c
@@ -0,0 +1,74 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+
+
+/* *************************************
+*  Dependencies
+***************************************/
+#define ZBUFF_STATIC_LINKING_ONLY
+#include "zbuff.h"
+
+
+ZBUFF_DCtx* ZBUFF_createDCtx(void)
+{
+    return ZSTD_createDStream();
+}
+
+ZBUFF_DCtx* ZBUFF_createDCtx_advanced(ZSTD_customMem customMem)
+{
+    return ZSTD_createDStream_advanced(customMem);
+}
+
+size_t ZBUFF_freeDCtx(ZBUFF_DCtx* zbd)
+{
+    return ZSTD_freeDStream(zbd);
+}
+
+
+/* *** Initialization *** */
+
+size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* zbd, const void* dict, size_t dictSize)
+{
+    return ZSTD_initDStream_usingDict(zbd, dict, dictSize);
+}
+
+size_t ZBUFF_decompressInit(ZBUFF_DCtx* zbd)
+{
+    return ZSTD_initDStream(zbd);
+}
+
+
+/* *** Decompression *** */
+
+size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbd,
+                                void* dst, size_t* dstCapacityPtr,
+                          const void* src, size_t* srcSizePtr)
+{
+    ZSTD_outBuffer outBuff;
+    ZSTD_inBuffer inBuff;
+    size_t result;
+    outBuff.dst  = dst;
+    outBuff.pos  = 0;
+    outBuff.size = *dstCapacityPtr;
+    inBuff.src  = src;
+    inBuff.pos  = 0;
+    inBuff.size = *srcSizePtr;
+    result = ZSTD_decompressStream(zbd, &outBuff, &inBuff);
+    *dstCapacityPtr = outBuff.pos;
+    *srcSizePtr = inBuff.pos;
+    return result;
+}
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+size_t ZBUFF_recommendedDInSize(void)  { return ZSTD_DStreamInSize(); }
+size_t ZBUFF_recommendedDOutSize(void) { return ZSTD_DStreamOutSize(); }
diff --git a/contrib/libzstd/include/zstd/dictBuilder/cover.c b/contrib/libzstd/include/zstd/dictBuilder/cover.c
new file mode 100644
index 00000000000..06c1b9fadb7
--- /dev/null
+++ b/contrib/libzstd/include/zstd/dictBuilder/cover.c
@@ -0,0 +1,1036 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+/* *****************************************************************************
+ * Constructs a dictionary using a heuristic based on the following paper:
+ *
+ * Liao, Petri, Moffat, Wirth
+ * Effective Construction of Relative Lempel-Ziv Dictionaries
+ * Published in WWW 2016.
+ *
+ * Adapted from code originally written by @ot (Giuseppe Ottaviano).
+ ******************************************************************************/
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include <stdio.h>  /* fprintf */
+#include <stdlib.h> /* malloc, free, qsort */
+#include <string.h> /* memset */
+#include <time.h>   /* clock */
+
+#include "mem.h" /* read */
+#include "pool.h"
+#include "threading.h"
+#include "zstd_internal.h" /* includes zstd.h */
+#ifndef ZDICT_STATIC_LINKING_ONLY
+#define ZDICT_STATIC_LINKING_ONLY
+#endif
+#include "zdict.h"
+
+/*-*************************************
+*  Constants
+***************************************/
+#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
+
+/*-*************************************
+*  Console display
+***************************************/
+static int g_displayLevel = 2;
+#define DISPLAY(...)                                                           \
+  {                                                                            \
+    fprintf(stderr, __VA_ARGS__);                                              \
+    fflush(stderr);                                                            \
+  }
+#define LOCALDISPLAYLEVEL(displayLevel, l, ...)                                \
+  if (displayLevel >= l) {                                                     \
+    DISPLAY(__VA_ARGS__);                                                      \
+  } /* 0 : no display;   1: errors;   2: default;  3: details;  4: debug */
+#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
+
+#define LOCALDISPLAYUPDATE(displayLevel, l, ...)                               \
+  if (displayLevel >= l) {                                                     \
+    if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) {             \
+      g_time = clock();                                                        \
+      DISPLAY(__VA_ARGS__);                                                    \
+    }                                                                          \
+  }
+#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
+static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
+static clock_t g_time = 0;
+
+/*-*************************************
+* Hash table
+***************************************
+* A small specialized hash map for storing activeDmers.
+* The map does not resize, so if it becomes full it will loop forever.
+* Thus, the map must be large enough to store every value.
+* The map implements linear probing and keeps its load less than 0.5.
+*/
+
+#define MAP_EMPTY_VALUE ((U32)-1)
+typedef struct COVER_map_pair_t_s {
+  U32 key;
+  U32 value;
+} COVER_map_pair_t;
+
+typedef struct COVER_map_s {
+  COVER_map_pair_t *data;
+  U32 sizeLog;
+  U32 size;
+  U32 sizeMask;
+} COVER_map_t;
+
+/**
+ * Clear the map.
+ */
+static void COVER_map_clear(COVER_map_t *map) {
+  memset(map->data, MAP_EMPTY_VALUE, map->size * sizeof(COVER_map_pair_t));
+}
+
+/**
+ * Initializes a map of the given size.
+ * Returns 1 on success and 0 on failure.
+ * The map must be destroyed with COVER_map_destroy().
+ * The map is only guaranteed to be large enough to hold size elements.
+ */
+static int COVER_map_init(COVER_map_t *map, U32 size) {
+  map->sizeLog = ZSTD_highbit32(size) + 2;
+  map->size = (U32)1 << map->sizeLog;
+  map->sizeMask = map->size - 1;
+  map->data = (COVER_map_pair_t *)malloc(map->size * sizeof(COVER_map_pair_t));
+  if (!map->data) {
+    map->sizeLog = 0;
+    map->size = 0;
+    return 0;
+  }
+  COVER_map_clear(map);
+  return 1;
+}
+
+/**
+ * Internal hash function
+ */
+static const U32 prime4bytes = 2654435761U;
+static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
+  return (key * prime4bytes) >> (32 - map->sizeLog);
+}
+
+/**
+ * Helper function that returns the index that a key should be placed into.
+ */
+static U32 COVER_map_index(COVER_map_t *map, U32 key) {
+  const U32 hash = COVER_map_hash(map, key);
+  U32 i;
+  for (i = hash;; i = (i + 1) & map->sizeMask) {
+    COVER_map_pair_t *pos = &map->data[i];
+    if (pos->value == MAP_EMPTY_VALUE) {
+      return i;
+    }
+    if (pos->key == key) {
+      return i;
+    }
+  }
+}
+
+/**
+ * Returns the pointer to the value for key.
+ * If key is not in the map, it is inserted and the value is set to 0.
+ * The map must not be full.
+ */
+static U32 *COVER_map_at(COVER_map_t *map, U32 key) {
+  COVER_map_pair_t *pos = &map->data[COVER_map_index(map, key)];
+  if (pos->value == MAP_EMPTY_VALUE) {
+    pos->key = key;
+    pos->value = 0;
+  }
+  return &pos->value;
+}
+
+/**
+ * Deletes key from the map if present.
+ */
+static void COVER_map_remove(COVER_map_t *map, U32 key) {
+  U32 i = COVER_map_index(map, key);
+  COVER_map_pair_t *del = &map->data[i];
+  U32 shift = 1;
+  if (del->value == MAP_EMPTY_VALUE) {
+    return;
+  }
+  for (i = (i + 1) & map->sizeMask;; i = (i + 1) & map->sizeMask) {
+    COVER_map_pair_t *const pos = &map->data[i];
+    /* If the position is empty we are done */
+    if (pos->value == MAP_EMPTY_VALUE) {
+      del->value = MAP_EMPTY_VALUE;
+      return;
+    }
+    /* If pos can be moved to del do so */
+    if (((i - COVER_map_hash(map, pos->key)) & map->sizeMask) >= shift) {
+      del->key = pos->key;
+      del->value = pos->value;
+      del = pos;
+      shift = 1;
+    } else {
+      ++shift;
+    }
+  }
+}
+
+/**
+ * Destroyes a map that is inited with COVER_map_init().
+ */
+static void COVER_map_destroy(COVER_map_t *map) {
+  if (map->data) {
+    free(map->data);
+  }
+  map->data = NULL;
+  map->size = 0;
+}
+
+/*-*************************************
+* Context
+***************************************/
+
+typedef struct {
+  const BYTE *samples;
+  size_t *offsets;
+  const size_t *samplesSizes;
+  size_t nbSamples;
+  U32 *suffix;
+  size_t suffixSize;
+  U32 *freqs;
+  U32 *dmerAt;
+  unsigned d;
+} COVER_ctx_t;
+
+/* We need a global context for qsort... */
+static COVER_ctx_t *g_ctx = NULL;
+
+/*-*************************************
+*  Helper functions
+***************************************/
+
+/**
+ * Returns the sum of the sample sizes.
+ */
+static size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
+  size_t sum = 0;
+  size_t i;
+  for (i = 0; i < nbSamples; ++i) {
+    sum += samplesSizes[i];
+  }
+  return sum;
+}
+
+/**
+ * Returns -1 if the dmer at lp is less than the dmer at rp.
+ * Return 0 if the dmers at lp and rp are equal.
+ * Returns 1 if the dmer at lp is greater than the dmer at rp.
+ */
+static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) {
+  U32 const lhs = *(U32 const *)lp;
+  U32 const rhs = *(U32 const *)rp;
+  return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d);
+}
+/**
+ * Faster version for d <= 8.
+ */
+static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
+  U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1);
+  U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask;
+  U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask;
+  if (lhs < rhs) {
+    return -1;
+  }
+  return (lhs > rhs);
+}
+
+/**
+ * Same as COVER_cmp() except ties are broken by pointer value
+ * NOTE: g_ctx must be set to call this function.  A global is required because
+ * qsort doesn't take an opaque pointer.
+ */
+static int COVER_strict_cmp(const void *lp, const void *rp) {
+  int result = COVER_cmp(g_ctx, lp, rp);
+  if (result == 0) {
+    result = lp < rp ? -1 : 1;
+  }
+  return result;
+}
+/**
+ * Faster version for d <= 8.
+ */
+static int COVER_strict_cmp8(const void *lp, const void *rp) {
+  int result = COVER_cmp8(g_ctx, lp, rp);
+  if (result == 0) {
+    result = lp < rp ? -1 : 1;
+  }
+  return result;
+}
+
+/**
+ * Returns the first pointer in [first, last) whose element does not compare
+ * less than value.  If no such element exists it returns last.
+ */
+static const size_t *COVER_lower_bound(const size_t *first, const size_t *last,
+                                       size_t value) {
+  size_t count = last - first;
+  while (count != 0) {
+    size_t step = count / 2;
+    const size_t *ptr = first;
+    ptr += step;
+    if (*ptr < value) {
+      first = ++ptr;
+      count -= step + 1;
+    } else {
+      count = step;
+    }
+  }
+  return first;
+}
+
+/**
+ * Generic groupBy function.
+ * Groups an array sorted by cmp into groups with equivalent values.
+ * Calls grp for each group.
+ */
+static void
+COVER_groupBy(const void *data, size_t count, size_t size, COVER_ctx_t *ctx,
+              int (*cmp)(COVER_ctx_t *, const void *, const void *),
+              void (*grp)(COVER_ctx_t *, const void *, const void *)) {
+  const BYTE *ptr = (const BYTE *)data;
+  size_t num = 0;
+  while (num < count) {
+    const BYTE *grpEnd = ptr + size;
+    ++num;
+    while (num < count && cmp(ctx, ptr, grpEnd) == 0) {
+      grpEnd += size;
+      ++num;
+    }
+    grp(ctx, ptr, grpEnd);
+    ptr = grpEnd;
+  }
+}
+
+/*-*************************************
+*  Cover functions
+***************************************/
+
+/**
+ * Called on each group of positions with the same dmer.
+ * Counts the frequency of each dmer and saves it in the suffix array.
+ * Fills `ctx->dmerAt`.
+ */
+static void COVER_group(COVER_ctx_t *ctx, const void *group,
+                        const void *groupEnd) {
+  /* The group consists of all the positions with the same first d bytes. */
+  const U32 *grpPtr = (const U32 *)group;
+  const U32 *grpEnd = (const U32 *)groupEnd;
+  /* The dmerId is how we will reference this dmer.
+   * This allows us to map the whole dmer space to a much smaller space, the
+   * size of the suffix array.
+   */
+  const U32 dmerId = (U32)(grpPtr - ctx->suffix);
+  /* Count the number of samples this dmer shows up in */
+  U32 freq = 0;
+  /* Details */
+  const size_t *curOffsetPtr = ctx->offsets;
+  const size_t *offsetsEnd = ctx->offsets + ctx->nbSamples;
+  /* Once *grpPtr >= curSampleEnd this occurrence of the dmer is in a
+   * different sample than the last.
+   */
+  size_t curSampleEnd = ctx->offsets[0];
+  for (; grpPtr != grpEnd; ++grpPtr) {
+    /* Save the dmerId for this position so we can get back to it. */
+    ctx->dmerAt[*grpPtr] = dmerId;
+    /* Dictionaries only help for the first reference to the dmer.
+     * After that zstd can reference the match from the previous reference.
+     * So only count each dmer once for each sample it is in.
+     */
+    if (*grpPtr < curSampleEnd) {
+      continue;
+    }
+    freq += 1;
+    /* Binary search to find the end of the sample *grpPtr is in.
+     * In the common case that grpPtr + 1 == grpEnd we can skip the binary
+     * search because the loop is over.
+     */
+    if (grpPtr + 1 != grpEnd) {
+      const size_t *sampleEndPtr =
+          COVER_lower_bound(curOffsetPtr, offsetsEnd, *grpPtr);
+      curSampleEnd = *sampleEndPtr;
+      curOffsetPtr = sampleEndPtr + 1;
+    }
+  }
+  /* At this point we are never going to look at this segment of the suffix
+   * array again.  We take advantage of this fact to save memory.
+   * We store the frequency of the dmer in the first position of the group,
+   * which is dmerId.
+   */
+  ctx->suffix[dmerId] = freq;
+}
+
+/**
+ * A segment is a range in the source as well as the score of the segment.
+ */
+typedef struct {
+  U32 begin;
+  U32 end;
+  double score;
+} COVER_segment_t;
+
+/**
+ * Selects the best segment in an epoch.
+ * Segments of are scored according to the function:
+ *
+ * Let F(d) be the frequency of dmer d.
+ * Let S_i be the dmer at position i of segment S which has length k.
+ *
+ *     Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
+ *
+ * Once the dmer d is in the dictionay we set F(d) = 0.
+ */
+static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
+                                           COVER_map_t *activeDmers, U32 begin,
+                                           U32 end,
+                                           ZDICT_cover_params_t parameters) {
+  /* Constants */
+  const U32 k = parameters.k;
+  const U32 d = parameters.d;
+  const U32 dmersInK = k - d + 1;
+  /* Try each segment (activeSegment) and save the best (bestSegment) */
+  COVER_segment_t bestSegment = {0, 0, 0};
+  COVER_segment_t activeSegment;
+  /* Reset the activeDmers in the segment */
+  COVER_map_clear(activeDmers);
+  /* The activeSegment starts at the beginning of the epoch. */
+  activeSegment.begin = begin;
+  activeSegment.end = begin;
+  activeSegment.score = 0;
+  /* Slide the activeSegment through the whole epoch.
+   * Save the best segment in bestSegment.
+   */
+  while (activeSegment.end < end) {
+    /* The dmerId for the dmer at the next position */
+    U32 newDmer = ctx->dmerAt[activeSegment.end];
+    /* The entry in activeDmers for this dmerId */
+    U32 *newDmerOcc = COVER_map_at(activeDmers, newDmer);
+    /* If the dmer isn't already present in the segment add its score. */
+    if (*newDmerOcc == 0) {
+      /* The paper suggest using the L-0.5 norm, but experiments show that it
+       * doesn't help.
+       */
+      activeSegment.score += freqs[newDmer];
+    }
+    /* Add the dmer to the segment */
+    activeSegment.end += 1;
+    *newDmerOcc += 1;
+
+    /* If the window is now too large, drop the first position */
+    if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
+      U32 delDmer = ctx->dmerAt[activeSegment.begin];
+      U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer);
+      activeSegment.begin += 1;
+      *delDmerOcc -= 1;
+      /* If this is the last occurence of the dmer, subtract its score */
+      if (*delDmerOcc == 0) {
+        COVER_map_remove(activeDmers, delDmer);
+        activeSegment.score -= freqs[delDmer];
+      }
+    }
+
+    /* If this segment is the best so far save it */
+    if (activeSegment.score > bestSegment.score) {
+      bestSegment = activeSegment;
+    }
+  }
+  {
+    /* Trim off the zero frequency head and tail from the segment. */
+    U32 newBegin = bestSegment.end;
+    U32 newEnd = bestSegment.begin;
+    U32 pos;
+    for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
+      U32 freq = freqs[ctx->dmerAt[pos]];
+      if (freq != 0) {
+        newBegin = MIN(newBegin, pos);
+        newEnd = pos + 1;
+      }
+    }
+    bestSegment.begin = newBegin;
+    bestSegment.end = newEnd;
+  }
+  {
+    /* Zero out the frequency of each dmer covered by the chosen segment. */
+    U32 pos;
+    for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
+      freqs[ctx->dmerAt[pos]] = 0;
+    }
+  }
+  return bestSegment;
+}
+
+/**
+ * Check the validity of the parameters.
+ * Returns non-zero if the parameters are valid and 0 otherwise.
+ */
+static int COVER_checkParameters(ZDICT_cover_params_t parameters) {
+  /* k and d are required parameters */
+  if (parameters.d == 0 || parameters.k == 0) {
+    return 0;
+  }
+  /* d <= k */
+  if (parameters.d > parameters.k) {
+    return 0;
+  }
+  return 1;
+}
+
+/**
+ * Clean up a context initialized with `COVER_ctx_init()`.
+ */
+static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
+  if (!ctx) {
+    return;
+  }
+  if (ctx->suffix) {
+    free(ctx->suffix);
+    ctx->suffix = NULL;
+  }
+  if (ctx->freqs) {
+    free(ctx->freqs);
+    ctx->freqs = NULL;
+  }
+  if (ctx->dmerAt) {
+    free(ctx->dmerAt);
+    ctx->dmerAt = NULL;
+  }
+  if (ctx->offsets) {
+    free(ctx->offsets);
+    ctx->offsets = NULL;
+  }
+}
+
+/**
+ * Prepare a context for dictionary building.
+ * The context is only dependent on the parameter `d` and can used multiple
+ * times.
+ * Returns 1 on success or zero on error.
+ * The context must be destroyed with `COVER_ctx_destroy()`.
+ */
+static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
+                          const size_t *samplesSizes, unsigned nbSamples,
+                          unsigned d) {
+  const BYTE *const samples = (const BYTE *)samplesBuffer;
+  const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
+  /* Checks */
+  if (totalSamplesSize < MAX(d, sizeof(U64)) ||
+      totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
+    DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n",
+                 (COVER_MAX_SAMPLES_SIZE >> 20));
+    return 0;
+  }
+  /* Zero the context */
+  memset(ctx, 0, sizeof(*ctx));
+  DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbSamples,
+               (U32)totalSamplesSize);
+  ctx->samples = samples;
+  ctx->samplesSizes = samplesSizes;
+  ctx->nbSamples = nbSamples;
+  /* Partial suffix array */
+  ctx->suffixSize = totalSamplesSize - MAX(d, sizeof(U64)) + 1;
+  ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
+  /* Maps index to the dmerID */
+  ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
+  /* The offsets of each file */
+  ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t));
+  if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
+    DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
+    COVER_ctx_destroy(ctx);
+    return 0;
+  }
+  ctx->freqs = NULL;
+  ctx->d = d;
+
+  /* Fill offsets from the samlesSizes */
+  {
+    U32 i;
+    ctx->offsets[0] = 0;
+    for (i = 1; i <= nbSamples; ++i) {
+      ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];
+    }
+  }
+  DISPLAYLEVEL(2, "Constructing partial suffix array\n");
+  {
+    /* suffix is a partial suffix array.
+     * It only sorts suffixes by their first parameters.d bytes.
+     * The sort is stable, so each dmer group is sorted by position in input.
+     */
+    U32 i;
+    for (i = 0; i < ctx->suffixSize; ++i) {
+      ctx->suffix[i] = i;
+    }
+    /* qsort doesn't take an opaque pointer, so pass as a global */
+    g_ctx = ctx;
+    qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
+          (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
+  }
+  DISPLAYLEVEL(2, "Computing frequencies\n");
+  /* For each dmer group (group of positions with the same first d bytes):
+   * 1. For each position we set dmerAt[position] = dmerID.  The dmerID is
+   *    (groupBeginPtr - suffix).  This allows us to go from position to
+   *    dmerID so we can look up values in freq.
+   * 2. We calculate how many samples the dmer occurs in and save it in
+   *    freqs[dmerId].
+   */
+  COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx,
+                (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
+  ctx->freqs = ctx->suffix;
+  ctx->suffix = NULL;
+  return 1;
+}
+
+/**
+ * Given the prepared context build the dictionary.
+ */
+static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
+                                    COVER_map_t *activeDmers, void *dictBuffer,
+                                    size_t dictBufferCapacity,
+                                    ZDICT_cover_params_t parameters) {
+  BYTE *const dict = (BYTE *)dictBuffer;
+  size_t tail = dictBufferCapacity;
+  /* Divide the data up into epochs of equal size.
+   * We will select at least one segment from each epoch.
+   */
+  const U32 epochs = (U32)(dictBufferCapacity / parameters.k);
+  const U32 epochSize = (U32)(ctx->suffixSize / epochs);
+  size_t epoch;
+  DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs,
+               epochSize);
+  /* Loop through the epochs until there are no more segments or the dictionary
+   * is full.
+   */
+  for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
+    const U32 epochBegin = (U32)(epoch * epochSize);
+    const U32 epochEnd = epochBegin + epochSize;
+    size_t segmentSize;
+    /* Select a segment */
+    COVER_segment_t segment = COVER_selectSegment(
+        ctx, freqs, activeDmers, epochBegin, epochEnd, parameters);
+    /* Trim the segment if necessary and if it is empty then we are done */
+    segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
+    if (segmentSize == 0) {
+      break;
+    }
+    /* We fill the dictionary from the back to allow the best segments to be
+     * referenced with the smallest offsets.
+     */
+    tail -= segmentSize;
+    memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
+    DISPLAYUPDATE(
+        2, "\r%u%%       ",
+        (U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
+  }
+  DISPLAYLEVEL(2, "\r%79s\r", "");
+  return tail;
+}
+
+ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
+    void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
+    const size_t *samplesSizes, unsigned nbSamples,
+    ZDICT_cover_params_t parameters) {
+  BYTE *const dict = (BYTE *)dictBuffer;
+  COVER_ctx_t ctx;
+  COVER_map_t activeDmers;
+  /* Checks */
+  if (!COVER_checkParameters(parameters)) {
+    DISPLAYLEVEL(1, "Cover parameters incorrect\n");
+    return ERROR(GENERIC);
+  }
+  if (nbSamples == 0) {
+    DISPLAYLEVEL(1, "Cover must have at least one input file\n");
+    return ERROR(GENERIC);
+  }
+  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
+    DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
+                 ZDICT_DICTSIZE_MIN);
+    return ERROR(dstSize_tooSmall);
+  }
+  /* Initialize global data */
+  g_displayLevel = parameters.zParams.notificationLevel;
+  /* Initialize context and activeDmers */
+  if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
+                      parameters.d)) {
+    return ERROR(GENERIC);
+  }
+  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
+    DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
+    COVER_ctx_destroy(&ctx);
+    return ERROR(GENERIC);
+  }
+
+  DISPLAYLEVEL(2, "Building dictionary\n");
+  {
+    const size_t tail =
+        COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer,
+                              dictBufferCapacity, parameters);
+    const size_t dictionarySize = ZDICT_finalizeDictionary(
+        dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
+        samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
+    if (!ZSTD_isError(dictionarySize)) {
+      DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
+                   (U32)dictionarySize);
+    }
+    COVER_ctx_destroy(&ctx);
+    COVER_map_destroy(&activeDmers);
+    return dictionarySize;
+  }
+}
+
+/**
+ * COVER_best_t is used for two purposes:
+ * 1. Synchronizing threads.
+ * 2. Saving the best parameters and dictionary.
+ *
+ * All of the methods except COVER_best_init() are thread safe if zstd is
+ * compiled with multithreaded support.
+ */
+typedef struct COVER_best_s {
+  pthread_mutex_t mutex;
+  pthread_cond_t cond;
+  size_t liveJobs;
+  void *dict;
+  size_t dictSize;
+  ZDICT_cover_params_t parameters;
+  size_t compressedSize;
+} COVER_best_t;
+
+/**
+ * Initialize the `COVER_best_t`.
+ */
+static void COVER_best_init(COVER_best_t *best) {
+  if (!best) {
+    return;
+  }
+  pthread_mutex_init(&best->mutex, NULL);
+  pthread_cond_init(&best->cond, NULL);
+  best->liveJobs = 0;
+  best->dict = NULL;
+  best->dictSize = 0;
+  best->compressedSize = (size_t)-1;
+  memset(&best->parameters, 0, sizeof(best->parameters));
+}
+
+/**
+ * Wait until liveJobs == 0.
+ */
+static void COVER_best_wait(COVER_best_t *best) {
+  if (!best) {
+    return;
+  }
+  pthread_mutex_lock(&best->mutex);
+  while (best->liveJobs != 0) {
+    pthread_cond_wait(&best->cond, &best->mutex);
+  }
+  pthread_mutex_unlock(&best->mutex);
+}
+
+/**
+ * Call COVER_best_wait() and then destroy the COVER_best_t.
+ */
+static void COVER_best_destroy(COVER_best_t *best) {
+  if (!best) {
+    return;
+  }
+  COVER_best_wait(best);
+  if (best->dict) {
+    free(best->dict);
+  }
+  pthread_mutex_destroy(&best->mutex);
+  pthread_cond_destroy(&best->cond);
+}
+
+/**
+ * Called when a thread is about to be launched.
+ * Increments liveJobs.
+ */
+static void COVER_best_start(COVER_best_t *best) {
+  if (!best) {
+    return;
+  }
+  pthread_mutex_lock(&best->mutex);
+  ++best->liveJobs;
+  pthread_mutex_unlock(&best->mutex);
+}
+
+/**
+ * Called when a thread finishes executing, both on error or success.
+ * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
+ * If this dictionary is the best so far save it and its parameters.
+ */
+static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
+                              ZDICT_cover_params_t parameters, void *dict,
+                              size_t dictSize) {
+  if (!best) {
+    return;
+  }
+  {
+    size_t liveJobs;
+    pthread_mutex_lock(&best->mutex);
+    --best->liveJobs;
+    liveJobs = best->liveJobs;
+    /* If the new dictionary is better */
+    if (compressedSize < best->compressedSize) {
+      /* Allocate space if necessary */
+      if (!best->dict || best->dictSize < dictSize) {
+        if (best->dict) {
+          free(best->dict);
+        }
+        best->dict = malloc(dictSize);
+        if (!best->dict) {
+          best->compressedSize = ERROR(GENERIC);
+          best->dictSize = 0;
+          return;
+        }
+      }
+      /* Save the dictionary, parameters, and size */
+      memcpy(best->dict, dict, dictSize);
+      best->dictSize = dictSize;
+      best->parameters = parameters;
+      best->compressedSize = compressedSize;
+    }
+    pthread_mutex_unlock(&best->mutex);
+    if (liveJobs == 0) {
+      pthread_cond_broadcast(&best->cond);
+    }
+  }
+}
+
+/**
+ * Parameters for COVER_tryParameters().
+ */
+typedef struct COVER_tryParameters_data_s {
+  const COVER_ctx_t *ctx;
+  COVER_best_t *best;
+  size_t dictBufferCapacity;
+  ZDICT_cover_params_t parameters;
+} COVER_tryParameters_data_t;
+
+/**
+ * Tries a set of parameters and upates the COVER_best_t with the results.
+ * This function is thread safe if zstd is compiled with multithreaded support.
+ * It takes its parameters as an *OWNING* opaque pointer to support threading.
+ */
+static void COVER_tryParameters(void *opaque) {
+  /* Save parameters as local variables */
+  COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
+  const COVER_ctx_t *const ctx = data->ctx;
+  const ZDICT_cover_params_t parameters = data->parameters;
+  size_t dictBufferCapacity = data->dictBufferCapacity;
+  size_t totalCompressedSize = ERROR(GENERIC);
+  /* Allocate space for hash table, dict, and freqs */
+  COVER_map_t activeDmers;
+  BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
+  U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
+  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
+    DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
+    goto _cleanup;
+  }
+  if (!dict || !freqs) {
+    DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
+    goto _cleanup;
+  }
+  /* Copy the frequencies because we need to modify them */
+  memcpy(freqs, ctx->freqs, ctx->suffixSize * sizeof(U32));
+  /* Build the dictionary */
+  {
+    const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
+                                              dictBufferCapacity, parameters);
+    dictBufferCapacity = ZDICT_finalizeDictionary(
+        dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
+        ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples,
+        parameters.zParams);
+    if (ZDICT_isError(dictBufferCapacity)) {
+      DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
+      goto _cleanup;
+    }
+  }
+  /* Check total compressed size */
+  {
+    /* Pointers */
+    ZSTD_CCtx *cctx;
+    ZSTD_CDict *cdict;
+    void *dst;
+    /* Local variables */
+    size_t dstCapacity;
+    size_t i;
+    /* Allocate dst with enough space to compress the maximum sized sample */
+    {
+      size_t maxSampleSize = 0;
+      for (i = 0; i < ctx->nbSamples; ++i) {
+        maxSampleSize = MAX(ctx->samplesSizes[i], maxSampleSize);
+      }
+      dstCapacity = ZSTD_compressBound(maxSampleSize);
+      dst = malloc(dstCapacity);
+    }
+    /* Create the cctx and cdict */
+    cctx = ZSTD_createCCtx();
+    cdict = ZSTD_createCDict(dict, dictBufferCapacity,
+                             parameters.zParams.compressionLevel);
+    if (!dst || !cctx || !cdict) {
+      goto _compressCleanup;
+    }
+    /* Compress each sample and sum their sizes (or error) */
+    totalCompressedSize = 0;
+    for (i = 0; i < ctx->nbSamples; ++i) {
+      const size_t size = ZSTD_compress_usingCDict(
+          cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i],
+          ctx->samplesSizes[i], cdict);
+      if (ZSTD_isError(size)) {
+        totalCompressedSize = ERROR(GENERIC);
+        goto _compressCleanup;
+      }
+      totalCompressedSize += size;
+    }
+  _compressCleanup:
+    ZSTD_freeCCtx(cctx);
+    ZSTD_freeCDict(cdict);
+    if (dst) {
+      free(dst);
+    }
+  }
+
+_cleanup:
+  COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
+                    dictBufferCapacity);
+  free(data);
+  COVER_map_destroy(&activeDmers);
+  if (dict) {
+    free(dict);
+  }
+  if (freqs) {
+    free(freqs);
+  }
+}
+
+ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
+    void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
+    const size_t *samplesSizes, unsigned nbSamples,
+    ZDICT_cover_params_t *parameters) {
+  /* constants */
+  const unsigned nbThreads = parameters->nbThreads;
+  const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
+  const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
+  const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
+  const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
+  const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
+  const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
+  const unsigned kIterations =
+      (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
+  /* Local variables */
+  const int displayLevel = parameters->zParams.notificationLevel;
+  unsigned iteration = 1;
+  unsigned d;
+  unsigned k;
+  COVER_best_t best;
+  POOL_ctx *pool = NULL;
+  /* Checks */
+  if (kMinK < kMaxD || kMaxK < kMinK) {
+    LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
+    return ERROR(GENERIC);
+  }
+  if (nbSamples == 0) {
+    DISPLAYLEVEL(1, "Cover must have at least one input file\n");
+    return ERROR(GENERIC);
+  }
+  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
+    DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
+                 ZDICT_DICTSIZE_MIN);
+    return ERROR(dstSize_tooSmall);
+  }
+  if (nbThreads > 1) {
+    pool = POOL_create(nbThreads, 1);
+    if (!pool) {
+      return ERROR(memory_allocation);
+    }
+  }
+  /* Initialization */
+  COVER_best_init(&best);
+  /* Turn down global display level to clean up display at level 2 and below */
+  g_displayLevel = parameters->zParams.notificationLevel - 1;
+  /* Loop through d first because each new value needs a new context */
+  LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
+                    kIterations);
+  for (d = kMinD; d <= kMaxD; d += 2) {
+    /* Initialize the context for this value of d */
+    COVER_ctx_t ctx;
+    LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
+    if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d)) {
+      LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
+      COVER_best_destroy(&best);
+      POOL_free(pool);
+      return ERROR(GENERIC);
+    }
+    /* Loop through k reusing the same context */
+    for (k = kMinK; k <= kMaxK; k += kStepSize) {
+      /* Prepare the arguments */
+      COVER_tryParameters_data_t *data = (COVER_tryParameters_data_t *)malloc(
+          sizeof(COVER_tryParameters_data_t));
+      LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);
+      if (!data) {
+        LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");
+        COVER_best_destroy(&best);
+        COVER_ctx_destroy(&ctx);
+        POOL_free(pool);
+        return ERROR(GENERIC);
+      }
+      data->ctx = &ctx;
+      data->best = &best;
+      data->dictBufferCapacity = dictBufferCapacity;
+      data->parameters = *parameters;
+      data->parameters.k = k;
+      data->parameters.d = d;
+      data->parameters.steps = kSteps;
+      /* Check the parameters */
+      if (!COVER_checkParameters(data->parameters)) {
+        DISPLAYLEVEL(1, "Cover parameters incorrect\n");
+        free(data);
+        continue;
+      }
+      /* Call the function and pass ownership of data to it */
+      COVER_best_start(&best);
+      if (pool) {
+        POOL_add(pool, &COVER_tryParameters, data);
+      } else {
+        COVER_tryParameters(data);
+      }
+      /* Print status */
+      LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%%       ",
+                         (U32)((iteration * 100) / kIterations));
+      ++iteration;
+    }
+    COVER_best_wait(&best);
+    COVER_ctx_destroy(&ctx);
+  }
+  LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");
+  /* Fill the output buffer and parameters with output of the best parameters */
+  {
+    const size_t dictSize = best.dictSize;
+    if (ZSTD_isError(best.compressedSize)) {
+      const size_t compressedSize = best.compressedSize;
+      COVER_best_destroy(&best);
+      POOL_free(pool);
+      return compressedSize;
+    }
+    *parameters = best.parameters;
+    memcpy(dictBuffer, best.dict, dictSize);
+    COVER_best_destroy(&best);
+    POOL_free(pool);
+    return dictSize;
+  }
+}
diff --git a/contrib/libzstd/include/zstd/dictBuilder/zdict.c b/contrib/libzstd/include/zstd/dictBuilder/zdict.c
index 47a82af14b4..742586eacdd 100644
--- a/contrib/libzstd/include/zstd/dictBuilder/zdict.c
+++ b/contrib/libzstd/include/zstd/dictBuilder/zdict.c
@@ -11,8 +11,9 @@
 /*-**************************************
 *  Tuning parameters
 ****************************************/
+#define MINRATIO 4   /* minimum nb of apparition to be selected in dictionary */
 #define ZDICT_MAX_SAMPLES_SIZE (2000U << 20)
-#define ZDICT_MIN_SAMPLES_SIZE 512
+#define ZDICT_MIN_SAMPLES_SIZE (ZDICT_CONTENTSIZE_MIN * MINRATIO)
 
 
 /*-**************************************
@@ -36,12 +37,11 @@
 #include <time.h>          /* clock */
 
 #include "mem.h"           /* read */
-#include "error_private.h"
 #include "fse.h"           /* FSE_normalizeCount, FSE_writeNCount */
 #define HUF_STATIC_LINKING_ONLY
-#include "huf.h"
+#include "huf.h"           /* HUF_buildCTable, HUF_writeCTable */
 #include "zstd_internal.h" /* includes zstd.h */
-#include "xxhash.h"
+#include "xxhash.h"        /* XXH64 */
 #include "divsufsort.h"
 #ifndef ZDICT_STATIC_LINKING_ONLY
 #  define ZDICT_STATIC_LINKING_ONLY
@@ -60,11 +60,8 @@
 
 #define NOISELENGTH 32
 
-#define MINRATIO 4
-static const int g_compressionLevel_default = 5;
+static const int g_compressionLevel_default = 6;
 static const U32 g_selectivity_default = 9;
-static const size_t g_provision_entropySize = 200;
-static const size_t g_min_fast_dictContent = 192;
 
 
 /*-*************************************
@@ -97,7 +94,7 @@ const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(error
 unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
 {
     if (dictSize < 8) return 0;
-    if (MEM_readLE32(dictBuffer) != ZSTD_DICT_MAGIC) return 0;
+    if (MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return 0;
     return MEM_readLE32((const char*)dictBuffer + 4);
 }
 
@@ -307,13 +304,13 @@ static dictItem ZDICT_analyzePos(
         } while (length >=MINMATCHLENGTH);
 
         /* look backward */
-		length = MINMATCHLENGTH;
-		while ((length >= MINMATCHLENGTH) & (start > 0)) {
-			length = ZDICT_count(b + pos, b + suffix[start - 1]);
-			if (length >= LLIMIT) length = LLIMIT - 1;
-			lengthList[length]++;
-			if (length >= MINMATCHLENGTH) start--;
-		}
+        length = MINMATCHLENGTH;
+        while ((length >= MINMATCHLENGTH) & (start > 0)) {
+            length = ZDICT_count(b + pos, b + suffix[start - 1]);
+            if (length >= LLIMIT) length = LLIMIT - 1;
+            lengthList[length]++;
+            if (length >= MINMATCHLENGTH) start--;
+        }
 
         /* largest useful length */
         memset(cumulLength, 0, sizeof(cumulLength));
@@ -364,28 +361,43 @@ static dictItem ZDICT_analyzePos(
 }
 
 
+static int isIncluded(const void* in, const void* container, size_t length)
+{
+    const char* const ip = (const char*) in;
+    const char* const into = (const char*) container;
+    size_t u;
+
+    for (u=0; u<length; u++) {  /* works because end of buffer is a noisy guard band */
+        if (ip[u] != into[u]) break;
+    }
+
+    return u==length;
+}
+
 /*! ZDICT_checkMerge
     check if dictItem can be merged, do it if possible
     @return : id of destination elt, 0 if not merged
 */
-static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
+static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const void* buffer)
 {
     const U32 tableSize = table->pos;
-    const U32 max = elt.pos + (elt.length-1);
+    const U32 eltEnd = elt.pos + elt.length;
+    const char* const buf = (const char*) buffer;
 
     /* tail overlap */
     U32 u; for (u=1; u<tableSize; u++) {
         if (u==eltNbToSkip) continue;
-        if ((table[u].pos > elt.pos) && (table[u].pos < max)) {  /* overlap */
+        if ((table[u].pos > elt.pos) && (table[u].pos <= eltEnd)) {  /* overlap, existing > new */
             /* append */
-            U32 addedLength = table[u].pos - elt.pos;
+            U32 const addedLength = table[u].pos - elt.pos;
             table[u].length += addedLength;
             table[u].pos = elt.pos;
             table[u].savings += elt.savings * addedLength / elt.length;   /* rough approx */
-            table[u].savings += elt.length / 8;    /* rough approx */
+            table[u].savings += elt.length / 8;    /* rough approx bonus */
             elt = table[u];
+            /* sort : improve rank */
             while ((u>1) && (table[u-1].savings < elt.savings))
-                table[u] = table[u-1], u--;
+            table[u] = table[u-1], u--;
             table[u] = elt;
             return u;
     }   }
@@ -393,20 +405,33 @@ static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
     /* front overlap */
     for (u=1; u<tableSize; u++) {
         if (u==eltNbToSkip) continue;
-        if ((table[u].pos + table[u].length > elt.pos) && (table[u].pos < elt.pos)) {  /* overlap */
+
+        if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) {  /* overlap, existing < new */
             /* append */
-            int addedLength = (elt.pos + elt.length) - (table[u].pos + table[u].length);
-            table[u].savings += elt.length / 8;    /* rough approx */
-            if (addedLength > 0) {   /* otherwise, already included */
+            int const addedLength = (int)eltEnd - (table[u].pos + table[u].length);
+            table[u].savings += elt.length / 8;    /* rough approx bonus */
+            if (addedLength > 0) {   /* otherwise, elt fully included into existing */
                 table[u].length += addedLength;
                 table[u].savings += elt.savings * addedLength / elt.length;   /* rough approx */
             }
+            /* sort : improve rank */
             elt = table[u];
             while ((u>1) && (table[u-1].savings < elt.savings))
                 table[u] = table[u-1], u--;
             table[u] = elt;
             return u;
-    }   }
+        }
+
+        if (MEM_read64(buf + table[u].pos) == MEM_read64(buf + elt.pos + 1)) {
+            if (isIncluded(buf + table[u].pos, buf + elt.pos + 1, table[u].length)) {
+                size_t const addedLength = MAX( (int)elt.length - (int)table[u].length , 1 );
+                table[u].pos = elt.pos;
+                table[u].savings += (U32)(elt.savings * addedLength / elt.length);
+                table[u].length = MIN(elt.length, table[u].length + 1);
+                return u;
+            }
+        }
+    }
 
     return 0;
 }
@@ -424,14 +449,14 @@ static void ZDICT_removeDictItem(dictItem* table, U32 id)
 }
 
 
-static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt)
+static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt, const void* buffer)
 {
     /* merge if possible */
-    U32 mergeId = ZDICT_checkMerge(table, elt, 0);
+    U32 mergeId = ZDICT_tryMerge(table, elt, 0, buffer);
     if (mergeId) {
         U32 newMerge = 1;
         while (newMerge) {
-            newMerge = ZDICT_checkMerge(table, table[mergeId], mergeId);
+            newMerge = ZDICT_tryMerge(table, table[mergeId], mergeId, buffer);
             if (newMerge) ZDICT_removeDictItem(table, mergeId);
             mergeId = newMerge;
         }
@@ -462,7 +487,7 @@ static U32 ZDICT_dictSize(const dictItem* dictList)
 }
 
 
-static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
+static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
                             const void* const buffer, size_t bufferSize,   /* buffer must end with noisy guard band */
                             const size_t* fileSizes, unsigned nbFiles,
                             U32 minRatio, U32 notificationLevel)
@@ -479,7 +504,7 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
 #   define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
             if (ZDICT_clockSpan(displayClock) > refreshRate)  \
             { displayClock = clock(); DISPLAY(__VA_ARGS__); \
-            if (notificationLevel>=4) fflush(stdout); } }
+            if (notificationLevel>=4) fflush(stderr); } }
 
     /* init */
     DISPLAYLEVEL(2, "\r%70s\r", "");   /* clean display line */
@@ -520,7 +545,7 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
             if (doneMarks[cursor]) { cursor++; continue; }
             solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio, notificationLevel);
             if (solution.length==0) { cursor++; continue; }
-            ZDICT_insertDictItem(dictList, dictListSize, solution);
+            ZDICT_insertDictItem(dictList, dictListSize, solution, buffer);
             cursor += solution.length;
             DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
     }   }
@@ -551,7 +576,7 @@ typedef struct
 {
     ZSTD_CCtx* ref;
     ZSTD_CCtx* zc;
-    void* workPlace;   /* must be ZSTD_BLOCKSIZE_ABSOLUTEMAX allocated */
+    void* workPlace;   /* must be ZSTD_BLOCKSIZE_MAX allocated */
 } EStats_ress_t;
 
 #define MAXREPOFFSET 1024
@@ -560,15 +585,15 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
                             U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
                             const void* src, size_t srcSize, U32 notificationLevel)
 {
-    size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << params.cParams.windowLog);
+    size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog);
     size_t cSize;
 
     if (srcSize > blockSizeMax) srcSize = blockSizeMax;   /* protection vs large samples */
     {  size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
             if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
     }
-    cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize);
-    if (ZSTD_isError(cSize)) { DISPLAYLEVEL(1, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
+    cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
+    if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
 
     if (cSize) {  /* if == 0; block is not compressible */
         const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc);
@@ -609,17 +634,6 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
     }   }   }
 }
 
-/*
-static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles)
-{
-    unsigned u;
-    size_t max=0;
-    for (u=0; u<nbFiles; u++)
-        if (max < fileSizes[u]) max = fileSizes[u];
-    return max;
-}
-*/
-
 static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles)
 {
     size_t total=0;
@@ -675,26 +689,26 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
     /* init */
     esr.ref = ZSTD_createCCtx();
     esr.zc = ZSTD_createCCtx();
-    esr.workPlace = malloc(ZSTD_BLOCKSIZE_ABSOLUTEMAX);
+    esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
     if (!esr.ref || !esr.zc || !esr.workPlace) {
         eSize = ERROR(memory_allocation);
         DISPLAYLEVEL(1, "Not enough memory \n");
         goto _cleanup;
     }
     if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionary_wrong); goto _cleanup; }   /* too large dictionary */
-    for (u=0; u<256; u++) countLit[u]=1;   /* any character must be described */
-    for (u=0; u<=offcodeMax; u++) offcodeCount[u]=1;
-    for (u=0; u<=MaxML; u++) matchLengthCount[u]=1;
-    for (u=0; u<=MaxLL; u++) litLengthCount[u]=1;
+    for (u=0; u<256; u++) countLit[u] = 1;   /* any character must be described */
+    for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1;
+    for (u=0; u<=MaxML; u++) matchLengthCount[u] = 1;
+    for (u=0; u<=MaxLL; u++) litLengthCount[u] = 1;
     memset(repOffset, 0, sizeof(repOffset));
     repOffset[1] = repOffset[4] = repOffset[8] = 1;
     memset(bestRepOffset, 0, sizeof(bestRepOffset));
-    if (compressionLevel==0) compressionLevel=g_compressionLevel_default;
+    if (compressionLevel==0) compressionLevel = g_compressionLevel_default;
     params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
     {   size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
-            if (ZSTD_isError(beginResult)) {
+        if (ZSTD_isError(beginResult)) {
+            DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced() failed : %s \n", ZSTD_getErrorName(beginResult));
             eSize = ERROR(GENERIC);
-            DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced failed \n");
             goto _cleanup;
     }   }
 
@@ -811,7 +825,6 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
     MEM_writeLE32(dstPtr+4, repStartValue[1]);
     MEM_writeLE32(dstPtr+8, repStartValue[2]);
 #endif
-    dstPtr += 12;
     eSize += 12;
 
 _cleanup:
@@ -823,26 +836,66 @@ _cleanup:
 }
 
 
-size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
-                                                 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
-                                                 ZDICT_params_t params)
+
+size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
+                          const void* customDictContent, size_t dictContentSize,
+                          const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+                          ZDICT_params_t params)
 {
     size_t hSize;
+#define HBUFFSIZE 256   /* should prove large enough for all entropy headers */
+    BYTE header[HBUFFSIZE];
     int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
     U32 const notificationLevel = params.notificationLevel;
 
+    /* check conditions */
+    if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
+    if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
+    if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
+
     /* dictionary header */
-    MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC);
-    {   U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
+    MEM_writeLE32(header, ZSTD_MAGIC_DICTIONARY);
+    {   U64 const randomID = XXH64(customDictContent, dictContentSize, 0);
         U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
         U32 const dictID = params.dictID ? params.dictID : compliantID;
-        MEM_writeLE32((char*)dictBuffer+4, dictID);
+        MEM_writeLE32(header+4, dictID);
     }
     hSize = 8;
 
     /* entropy tables */
     DISPLAYLEVEL(2, "\r%70s\r", "");   /* clean display line */
     DISPLAYLEVEL(2, "statistics ... \n");
+    {   size_t const eSize = ZDICT_analyzeEntropy(header+hSize, HBUFFSIZE-hSize,
+                                  compressionLevel,
+                                  samplesBuffer, samplesSizes, nbSamples,
+                                  customDictContent, dictContentSize,
+                                  notificationLevel);
+        if (ZDICT_isError(eSize)) return eSize;
+        hSize += eSize;
+    }
+
+    /* copy elements in final buffer ; note : src and dst buffer can overlap */
+    if (hSize + dictContentSize > dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize;
+    {   size_t const dictSize = hSize + dictContentSize;
+        char* dictEnd = (char*)dictBuffer + dictSize;
+        memmove(dictEnd - dictContentSize, customDictContent, dictContentSize);
+        memcpy(dictBuffer, header, hSize);
+        return dictSize;
+    }
+}
+
+
+size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
+                                                 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+                                                 ZDICT_params_t params)
+{
+    int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
+    U32 const notificationLevel = params.notificationLevel;
+    size_t hSize = 8;
+
+    /* calculate entropy tables */
+    DISPLAYLEVEL(2, "\r%70s\r", "");   /* clean display line */
+    DISPLAYLEVEL(2, "statistics ... \n");
     {   size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize,
                                   compressionLevel,
                                   samplesBuffer, samplesSizes, nbSamples,
@@ -852,6 +905,13 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
         hSize += eSize;
     }
 
+    /* add dictionary header (after entropy tables) */
+    MEM_writeLE32(dictBuffer, ZSTD_MAGIC_DICTIONARY);
+    {   U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
+        U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
+        U32 const dictID = params.dictID ? params.dictID : compliantID;
+        MEM_writeLE32((char*)dictBuffer+4, dictID);
+    }
 
     if (hSize + dictContentSize < dictBufferCapacity)
         memmove((char*)dictBuffer + hSize, (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize);
@@ -859,14 +919,14 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
 }
 
 
-/*! ZDICT_trainFromBuffer_unsafe() :
+/*! ZDICT_trainFromBuffer_unsafe_legacy() :
 *   Warning : `samplesBuffer` must be followed by noisy guard band.
 *   @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
 */
-size_t ZDICT_trainFromBuffer_unsafe(
+size_t ZDICT_trainFromBuffer_unsafe_legacy(
                             void* dictBuffer, size_t maxDictSize,
                             const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
-                            ZDICT_params_t params)
+                            ZDICT_legacy_params_t params)
 {
     U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16));
     dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
@@ -875,33 +935,35 @@ size_t ZDICT_trainFromBuffer_unsafe(
     size_t const targetDictSize = maxDictSize;
     size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
     size_t dictSize = 0;
-    U32 const notificationLevel = params.notificationLevel;
+    U32 const notificationLevel = params.zParams.notificationLevel;
 
     /* checks */
     if (!dictList) return ERROR(memory_allocation);
-    if (maxDictSize <= g_provision_entropySize + g_min_fast_dictContent) { free(dictList); return ERROR(dstSize_tooSmall); }
-    if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return 0; }   /* not enough source to create dictionary */
+    if (maxDictSize < ZDICT_DICTSIZE_MIN) { free(dictList); return ERROR(dstSize_tooSmall); }   /* requested dictionary size is too small */
+    if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return ERROR(dictionaryCreation_failed); }   /* not enough source to create dictionary */
 
     /* init */
     ZDICT_initDictItem(dictList);
 
     /* build dictionary */
-    ZDICT_trainBuffer(dictList, dictListSize,
-                    samplesBuffer, samplesBuffSize,
-                    samplesSizes, nbSamples,
-                    minRep, notificationLevel);
+    ZDICT_trainBuffer_legacy(dictList, dictListSize,
+                       samplesBuffer, samplesBuffSize,
+                       samplesSizes, nbSamples,
+                       minRep, notificationLevel);
 
     /* display best matches */
-    if (params.notificationLevel>= 3) {
+    if (params.zParams.notificationLevel>= 3) {
         U32 const nb = MIN(25, dictList[0].pos);
         U32 const dictContentSize = ZDICT_dictSize(dictList);
         U32 u;
-        DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
-        DISPLAYLEVEL(3, "list %u best segments \n", nb);
-        for (u=1; u<=nb; u++) {
-            U32 pos = dictList[u].pos;
-            U32 length = dictList[u].length;
-            U32 printedLength = MIN(40, length);
+        DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos-1, dictContentSize);
+        DISPLAYLEVEL(3, "list %u best segments \n", nb-1);
+        for (u=1; u<nb; u++) {
+            U32 const pos = dictList[u].pos;
+            U32 const length = dictList[u].length;
+            U32 const printedLength = MIN(40, length);
+            if ((pos > samplesBuffSize) || ((pos + length) > samplesBuffSize))
+                return ERROR(GENERIC);   /* should never happen */
             DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
                          u, length, pos, dictList[u].savings);
             ZDICT_printHex((const char*)samplesBuffer+pos, printedLength);
@@ -911,14 +973,15 @@ size_t ZDICT_trainFromBuffer_unsafe(
 
     /* create dictionary */
     {   U32 dictContentSize = ZDICT_dictSize(dictList);
-        if (dictContentSize < targetDictSize/3) {
+        if (dictContentSize < ZDICT_CONTENTSIZE_MIN) { free(dictList); return ERROR(dictionaryCreation_failed); }   /* dictionary content too small */
+        if (dictContentSize < targetDictSize/4) {
             DISPLAYLEVEL(2, "!  warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (U32)maxDictSize);
+            if (samplesBuffSize < 10 * targetDictSize)
+                DISPLAYLEVEL(2, "!  consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
             if (minRep > MINRATIO) {
                 DISPLAYLEVEL(2, "!  consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1);
                 DISPLAYLEVEL(2, "!  note : larger dictionaries are not necessarily better, test its efficiency on samples \n");
             }
-            if (samplesBuffSize < 10 * targetDictSize)
-                DISPLAYLEVEL(2, "!  consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
         }
 
         if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) {
@@ -926,7 +989,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
             while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; }
             DISPLAYLEVEL(2, "!  note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (U32)maxDictSize);
             DISPLAYLEVEL(2, "!  consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity);
-            DISPLAYLEVEL(2, "!  always test dictionary efficiency on samples \n");
+            DISPLAYLEVEL(2, "!  always test dictionary efficiency on real samples \n");
         }
 
         /* limit dictionary size */
@@ -952,7 +1015,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
 
         dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize,
                                                              samplesBuffer, samplesSizes, nbSamples,
-                                                             params);
+                                                             params.zParams);
     }
 
     /* clean up */
@@ -963,9 +1026,9 @@ size_t ZDICT_trainFromBuffer_unsafe(
 
 /* issue : samplesBuffer need to be followed by a noisy guard band.
 *  work around : duplicate the buffer, and add the noise */
-size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
-                                      const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
-                                      ZDICT_params_t params)
+size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
+                              const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+                              ZDICT_legacy_params_t params)
 {
     size_t result;
     void* newBuff;
@@ -978,10 +1041,9 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
     memcpy(newBuff, samplesBuffer, sBuffSize);
     ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH);   /* guard band, for end of buffer condition */
 
-    result = ZDICT_trainFromBuffer_unsafe(
-                                        dictBuffer, dictBufferCapacity,
-                                        newBuff, samplesSizes, nbSamples,
-                                        params);
+    result =
+        ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, dictBufferCapacity, newBuff,
+                                            samplesSizes, nbSamples, params);
     free(newBuff);
     return result;
 }
@@ -990,11 +1052,13 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
 size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
                              const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
 {
-    ZDICT_params_t params;
+    ZDICT_cover_params_t params;
     memset(&params, 0, sizeof(params));
-    return ZDICT_trainFromBuffer_advanced(dictBuffer, dictBufferCapacity,
-                                          samplesBuffer, samplesSizes, nbSamples,
-                                          params);
+    params.d = 8;
+    params.steps = 4;
+    return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity,
+                                               samplesBuffer, samplesSizes,
+                                               nbSamples, &params);
 }
 
 size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
diff --git a/contrib/libzstd/include/zstd/dictBuilder/zdict.h b/contrib/libzstd/include/zstd/dictBuilder/zdict.h
index 642a43516b9..7bfbb351a1d 100644
--- a/contrib/libzstd/include/zstd/dictBuilder/zdict.h
+++ b/contrib/libzstd/include/zstd/dictBuilder/zdict.h
@@ -19,30 +19,37 @@ extern "C" {
 #include <stddef.h>  /* size_t */
 
 
-/*======  Export for Windows  ======*/
-/*!
-*  ZSTD_DLL_EXPORT :
-*  Enable exporting of functions when building a Windows DLL
-*/
-#if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
-#  define ZDICTLIB_API __declspec(dllexport)
+/* =====   ZDICTLIB_API : control library symbols visibility   ===== */
+#ifndef ZDICTLIB_VISIBILITY
+#  if defined(__GNUC__) && (__GNUC__ >= 4)
+#    define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
+#  else
+#    define ZDICTLIB_VISIBILITY
+#  endif
+#endif
+#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
+#  define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
+#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
+#  define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
 #else
-#  define ZDICTLIB_API
+#  define ZDICTLIB_API ZDICTLIB_VISIBILITY
 #endif
 
 
-/*! ZDICT_trainFromBuffer() :
-    Train a dictionary from an array of samples.
-    Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
-    supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
-    The resulting dictionary will be saved into `dictBuffer`.
-    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
-              or an error code, which can be tested with ZDICT_isError().
-    Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
-           It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
-           In general, it's recommended to provide a few thousands samples, but this can vary a lot.
-           It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
-*/
+/*! ZDICT_trainFromBuffer():
+ * Train a dictionary from an array of samples.
+ * Uses ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4.
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
+ * The resulting dictionary will be saved into `dictBuffer`.
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+ *           or an error code, which can be tested with ZDICT_isError().
+ * Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte.
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
+ *        It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
+ *        In general, it's recommended to provide a few thousands samples, but this can vary a lot.
+ *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
+ */
 ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
                        const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
 
@@ -64,42 +71,134 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
  * ==================================================================================== */
 
 typedef struct {
-    unsigned selectivityLevel;   /* 0 means default; larger => select more => larger dictionary */
     int      compressionLevel;   /* 0 means default; target a specific zstd compression level */
     unsigned notificationLevel;  /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
     unsigned dictID;             /* 0 means auto mode (32-bits random value); other : force dictID value */
-    unsigned reserved[2];        /* reserved space for future parameters */
 } ZDICT_params_t;
 
+/*! ZDICT_cover_params_t:
+ *  For all values 0 means default.
+ *  k and d are the only required parameters.
+ */
+typedef struct {
+    unsigned k;                  /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
+    unsigned d;                  /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
+    unsigned steps;              /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
+    unsigned nbThreads;          /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
+    ZDICT_params_t zParams;
+} ZDICT_cover_params_t;
 
-/*! ZDICT_trainFromBuffer_advanced() :
-    Same as ZDICT_trainFromBuffer() with control over more parameters.
-    `parameters` is optional and can be provided with values set to 0 to mean "default".
-    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferSize`),
-              or an error code, which can be tested by ZDICT_isError().
-    note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0.
-*/
-size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
+
+/*! ZDICT_trainFromBuffer_cover():
+ * Train a dictionary from an array of samples using the COVER algorithm.
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
+ * The resulting dictionary will be saved into `dictBuffer`.
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+ *           or an error code, which can be tested with ZDICT_isError().
+ * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
+ *        It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
+ *        In general, it's recommended to provide a few thousands samples, but this can vary a lot.
+ *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
+ */
+ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
+    void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
+    const size_t *samplesSizes, unsigned nbSamples,
+    ZDICT_cover_params_t parameters);
+
+/*! ZDICT_optimizeTrainFromBuffer_cover():
+ * The same requirements as above hold for all the parameters except `parameters`.
+ * This function tries many parameter combinations and picks the best parameters.
+ * `*parameters` is filled with the best parameters found, and the dictionary
+ * constructed with those parameters is stored in `dictBuffer`.
+ *
+ * All of the parameters d, k, steps are optional.
+ * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
+ * if steps is zero it defaults to its default value.
+ * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
+ *
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+ *           or an error code, which can be tested with ZDICT_isError().
+ *           On success `*parameters` contains the parameters selected.
+ * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
+ */
+ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
+    void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
+    const size_t *samplesSizes, unsigned nbSamples,
+    ZDICT_cover_params_t *parameters);
+
+/*! ZDICT_finalizeDictionary():
+ * Given a custom content as a basis for dictionary, and a set of samples,
+ * finalize dictionary by adding headers and statistics.
+ *
+ * Samples must be stored concatenated in a flat buffer `samplesBuffer`,
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
+ *
+ * dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
+ * maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
+ *
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
+ *           or an error code, which can be tested by ZDICT_isError().
+ * Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
+ * Note 2: dictBuffer and dictContent can overlap
+ */
+#define ZDICT_CONTENTSIZE_MIN 128
+#define ZDICT_DICTSIZE_MIN    256
+ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
+                                const void* dictContent, size_t dictContentSize,
                                 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
                                 ZDICT_params_t parameters);
 
+typedef struct {
+    unsigned selectivityLevel;   /* 0 means default; larger => select more => larger dictionary */
+    ZDICT_params_t zParams;
+} ZDICT_legacy_params_t;
 
-/*! ZDICT_addEntropyTablesFromBuffer() :
+/*! ZDICT_trainFromBuffer_legacy():
+ * Train a dictionary from an array of samples.
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
+ * The resulting dictionary will be saved into `dictBuffer`.
+ * `parameters` is optional and can be provided with values set to 0 to mean "default".
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+ *           or an error code, which can be tested with ZDICT_isError().
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
+ *        It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
+ *        In general, it's recommended to provide a few thousands samples, but this can vary a lot.
+ *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
+ * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
+ */
+ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
+    void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
+    const size_t *samplesSizes, unsigned nbSamples, ZDICT_legacy_params_t parameters);
 
-    Given a content-only dictionary (built using any 3rd party algorithm),
-    add entropy tables computed from an array of samples.
-    Samples must be stored concatenated in a flat buffer `samplesBuffer`,
-    supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
+/* Deprecation warnings */
+/* It is generally possible to disable deprecation warnings from compiler,
+   for example with -Wno-deprecated-declarations for gcc
+   or _CRT_SECURE_NO_WARNINGS in Visual.
+   Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
+#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
+#  define ZDICT_DEPRECATED(message) ZDICTLIB_API   /* disable deprecation warnings */
+#else
+#  define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
+#    define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API
+#  elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__)
+#    define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
+#  elif (ZDICT_GCC_VERSION >= 301)
+#    define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
+#  elif defined(_MSC_VER)
+#    define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message))
+#  else
+#    pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler")
+#    define ZDICT_DEPRECATED(message) ZDICTLIB_API
+#  endif
+#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
 
-    The input dictionary content must be stored *at the end* of `dictBuffer`.
-    Its size is `dictContentSize`.
-    The resulting dictionary with added entropy tables will be *written back to `dictBuffer`*,
-    starting from its beginning.
-    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`).
-*/
+ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead")
 size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
-                                        const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
-
+                                  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
 
 
 #endif   /* ZDICT_STATIC_LINKING_ONLY */
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_legacy.h b/contrib/libzstd/include/zstd/legacy/zstd_legacy.h
index 7a7167a8036..3c9798f880e 100644
--- a/contrib/libzstd/include/zstd/legacy/zstd_legacy.h
+++ b/contrib/libzstd/include/zstd/legacy/zstd_legacy.h
@@ -7,8 +7,6 @@
  * of patent rights can be found in the PATENTS file in the same directory.
  */
 
-/// Milovidov: we had only used version 6 in ClickHouse.
-
 #ifndef ZSTD_LEGACY_H
 #define ZSTD_LEGACY_H
 
@@ -22,8 +20,33 @@ extern "C" {
 #include "mem.h"            /* MEM_STATIC */
 #include "error_private.h"  /* ERROR */
 #include "zstd.h"           /* ZSTD_inBuffer, ZSTD_outBuffer */
-#include "zstd_v06.h"
 
+#if !defined (ZSTD_LEGACY_SUPPORT) || (ZSTD_LEGACY_SUPPORT == 0)
+#  undef ZSTD_LEGACY_SUPPORT
+#  define ZSTD_LEGACY_SUPPORT 8
+#endif
+
+#if (ZSTD_LEGACY_SUPPORT <= 1)
+#  include "zstd_v01.h"
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 2)
+#  include "zstd_v02.h"
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 3)
+#  include "zstd_v03.h"
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+#  include "zstd_v04.h"
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+#  include "zstd_v05.h"
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+#  include "zstd_v06.h"
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+#  include "zstd_v07.h"
+#endif
 
 /** ZSTD_isLegacy() :
     @return : > 0 if supported by legacy decoder. 0 otherwise.
@@ -36,7 +59,27 @@ MEM_STATIC unsigned ZSTD_isLegacy(const void* src, size_t srcSize)
     magicNumberLE = MEM_readLE32(src);
     switch(magicNumberLE)
     {
+#if (ZSTD_LEGACY_SUPPORT <= 1)
+        case ZSTDv01_magicNumberLE:return 1;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 2)
+        case ZSTDv02_magicNumber : return 2;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 3)
+        case ZSTDv03_magicNumber : return 3;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case ZSTDv04_magicNumber : return 4;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case ZSTDv05_MAGICNUMBER : return 5;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
         case ZSTDv06_MAGICNUMBER : return 6;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case ZSTDv07_MAGICNUMBER : return 7;
+#endif
         default : return 0;
     }
 }
@@ -45,12 +88,31 @@ MEM_STATIC unsigned ZSTD_isLegacy(const void* src, size_t srcSize)
 MEM_STATIC unsigned long long ZSTD_getDecompressedSize_legacy(const void* src, size_t srcSize)
 {
     U32 const version = ZSTD_isLegacy(src, srcSize);
+    if (version < 5) return 0;  /* no decompressed size in frame header, or not a legacy format */
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+    if (version==5) {
+        ZSTDv05_parameters fParams;
+        size_t const frResult = ZSTDv05_getFrameParams(&fParams, src, srcSize);
+        if (frResult != 0) return 0;
+        return fParams.srcSize;
+    }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
     if (version==6) {
         ZSTDv06_frameParams fParams;
         size_t const frResult = ZSTDv06_getFrameParams(&fParams, src, srcSize);
         if (frResult != 0) return 0;
         return fParams.frameContentSize;
     }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+    if (version==7) {
+        ZSTDv07_frameParams fParams;
+        size_t const frResult = ZSTDv07_getFrameParams(&fParams, src, srcSize);
+        if (frResult != 0) return 0;
+        return fParams.frameContentSize;
+    }
+#endif
     return 0;   /* should not be possible */
 }
 
@@ -63,6 +125,33 @@ MEM_STATIC size_t ZSTD_decompressLegacy(
     U32 const version = ZSTD_isLegacy(src, compressedSize);
     switch(version)
     {
+#if (ZSTD_LEGACY_SUPPORT <= 1)
+        case 1 :
+            return ZSTDv01_decompress(dst, dstCapacity, src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 2)
+        case 2 :
+            return ZSTDv02_decompress(dst, dstCapacity, src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 3)
+        case 3 :
+            return ZSTDv03_decompress(dst, dstCapacity, src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case 4 :
+            return ZSTDv04_decompress(dst, dstCapacity, src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case 5 :
+            {   size_t result;
+                ZSTDv05_DCtx* const zd = ZSTDv05_createDCtx();
+                if (zd==NULL) return ERROR(memory_allocation);
+                result = ZSTDv05_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize);
+                ZSTDv05_freeDCtx(zd);
+                return result;
+            }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
         case 6 :
             {   size_t result;
                 ZSTDv06_DCtx* const zd = ZSTDv06_createDCtx();
@@ -71,19 +160,82 @@ MEM_STATIC size_t ZSTD_decompressLegacy(
                 ZSTDv06_freeDCtx(zd);
                 return result;
             }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case 7 :
+            {   size_t result;
+                ZSTDv07_DCtx* const zd = ZSTDv07_createDCtx();
+                if (zd==NULL) return ERROR(memory_allocation);
+                result = ZSTDv07_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize);
+                ZSTDv07_freeDCtx(zd);
+                return result;
+            }
+#endif
         default :
             return ERROR(prefix_unknown);
     }
 }
 
+MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src,
+                                             size_t compressedSize)
+{
+    U32 const version = ZSTD_isLegacy(src, compressedSize);
+    switch(version)
+    {
+#if (ZSTD_LEGACY_SUPPORT <= 1)
+        case 1 :
+            return ZSTDv01_findFrameCompressedSize(src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 2)
+        case 2 :
+            return ZSTDv02_findFrameCompressedSize(src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 3)
+        case 3 :
+            return ZSTDv03_findFrameCompressedSize(src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case 4 :
+            return ZSTDv04_findFrameCompressedSize(src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case 5 :
+            return ZSTDv05_findFrameCompressedSize(src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+        case 6 :
+            return ZSTDv06_findFrameCompressedSize(src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case 7 :
+            return ZSTDv07_findFrameCompressedSize(src, compressedSize);
+#endif
+        default :
+            return ERROR(prefix_unknown);
+    }
+}
 
 MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)
 {
     switch(version)
     {
         default :
+        case 1 :
+        case 2 :
+        case 3 :
             return ERROR(version_unsupported);
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case 4 : return ZBUFFv04_freeDCtx((ZBUFFv04_DCtx*)legacyContext);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case 5 : return ZBUFFv05_freeDCtx((ZBUFFv05_DCtx*)legacyContext);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
         case 6 : return ZBUFFv06_freeDCtx((ZBUFFv06_DCtx*)legacyContext);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case 7 : return ZBUFFv07_freeDCtx((ZBUFFv07_DCtx*)legacyContext);
+#endif
     }
 }
 
@@ -95,7 +247,32 @@ MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U
     switch(newVersion)
     {
         default :
+        case 1 :
+        case 2 :
+        case 3 :
             return 0;
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case 4 :
+        {
+            ZBUFFv04_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv04_createDCtx() : (ZBUFFv04_DCtx*)*legacyContext;
+            if (dctx==NULL) return ERROR(memory_allocation);
+            ZBUFFv04_decompressInit(dctx);
+            ZBUFFv04_decompressWithDictionary(dctx, dict, dictSize);
+            *legacyContext = dctx;
+            return 0;
+        }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case 5 :
+        {
+            ZBUFFv05_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv05_createDCtx() : (ZBUFFv05_DCtx*)*legacyContext;
+            if (dctx==NULL) return ERROR(memory_allocation);
+            ZBUFFv05_decompressInitDictionary(dctx, dict, dictSize);
+            *legacyContext = dctx;
+            return 0;
+        }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
         case 6 :
         {
             ZBUFFv06_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv06_createDCtx() : (ZBUFFv06_DCtx*)*legacyContext;
@@ -104,6 +281,17 @@ MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U
             *legacyContext = dctx;
             return 0;
         }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case 7 :
+        {
+            ZBUFFv07_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv07_createDCtx() : (ZBUFFv07_DCtx*)*legacyContext;
+            if (dctx==NULL) return ERROR(memory_allocation);
+            ZBUFFv07_decompressInitDictionary(dctx, dict, dictSize);
+            *legacyContext = dctx;
+            return 0;
+        }
+#endif
     }
 }
 
@@ -115,7 +303,39 @@ MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version,
     switch(version)
     {
         default :
+        case 1 :
+        case 2 :
+        case 3 :
             return ERROR(version_unsupported);
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case 4 :
+            {
+                ZBUFFv04_DCtx* dctx = (ZBUFFv04_DCtx*) legacyContext;
+                const void* src = (const char*)input->src + input->pos;
+                size_t readSize = input->size - input->pos;
+                void* dst = (char*)output->dst + output->pos;
+                size_t decodedSize = output->size - output->pos;
+                size_t const hintSize = ZBUFFv04_decompressContinue(dctx, dst, &decodedSize, src, &readSize);
+                output->pos += decodedSize;
+                input->pos += readSize;
+                return hintSize;
+            }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case 5 :
+            {
+                ZBUFFv05_DCtx* dctx = (ZBUFFv05_DCtx*) legacyContext;
+                const void* src = (const char*)input->src + input->pos;
+                size_t readSize = input->size - input->pos;
+                void* dst = (char*)output->dst + output->pos;
+                size_t decodedSize = output->size - output->pos;
+                size_t const hintSize = ZBUFFv05_decompressContinue(dctx, dst, &decodedSize, src, &readSize);
+                output->pos += decodedSize;
+                input->pos += readSize;
+                return hintSize;
+            }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
         case 6 :
             {
                 ZBUFFv06_DCtx* dctx = (ZBUFFv06_DCtx*) legacyContext;
@@ -128,6 +348,21 @@ MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version,
                 input->pos += readSize;
                 return hintSize;
             }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case 7 :
+            {
+                ZBUFFv07_DCtx* dctx = (ZBUFFv07_DCtx*) legacyContext;
+                const void* src = (const char*)input->src + input->pos;
+                size_t readSize = input->size - input->pos;
+                void* dst = (char*)output->dst + output->pos;
+                size_t decodedSize = output->size - output->pos;
+                size_t const hintSize = ZBUFFv07_decompressContinue(dctx, dst, &decodedSize, src, &readSize);
+                output->pos += decodedSize;
+                input->pos += readSize;
+                return hintSize;
+            }
+#endif
     }
 }
 
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v01.c b/contrib/libzstd/include/zstd/legacy/zstd_v01.c
new file mode 100644
index 00000000000..cf5354d6a9b
--- /dev/null
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v01.c
@@ -0,0 +1,2126 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+
+/******************************************
+*  Includes
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include "zstd_v01.h"
+#include "error_private.h"
+
+
+/******************************************
+*  Static allocation
+******************************************/
+/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+
+/* You can statically allocate Huff0 DTable as a table of unsigned short using below macro */
+#define HUF_DTABLE_SIZE_U16(maxTableLog)   (1 + (1<<maxTableLog))
+#define HUF_CREATE_STATIC_DTABLE(DTable, maxTableLog) \
+        unsigned short DTable[HUF_DTABLE_SIZE_U16(maxTableLog)] = { maxTableLog }
+
+
+/******************************************
+*  Error Management
+******************************************/
+#define FSE_LIST_ERRORS(ITEM) \
+        ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \
+        ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooSmall) \
+        ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\
+        ITEM(FSE_ERROR_corruptionDetected) \
+        ITEM(FSE_ERROR_maxCode)
+
+#define FSE_GENERATE_ENUM(ENUM) ENUM,
+typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
+
+
+/******************************************
+*  FSE symbol compression API
+******************************************/
+/*
+   This API consists of small unitary functions, which highly benefit from being inlined.
+   You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
+   Visual seems to do it automatically.
+   For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
+   If none of these solutions is applicable, include "fse.c" directly.
+*/
+
+typedef unsigned FSE_CTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+
+typedef struct
+{
+    size_t bitContainer;
+    int    bitPos;
+    char*  startPtr;
+    char*  ptr;
+    char*  endPtr;
+} FSE_CStream_t;
+
+typedef struct
+{
+    ptrdiff_t   value;
+    const void* stateTable;
+    const void* symbolTT;
+    unsigned    stateLog;
+} FSE_CState_t;
+
+typedef struct
+{
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} FSE_DStream_t;
+
+typedef struct
+{
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+typedef enum { FSE_DStream_unfinished = 0,
+               FSE_DStream_endOfBuffer = 1,
+               FSE_DStream_completed = 2,
+               FSE_DStream_tooFar = 3 } FSE_DStream_status;  /* result of FSE_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... ?! */
+
+
+/****************************************************************
+*  Tuning parameters
+****************************************************************/
+/* MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSE_MAX_MEMORY_USAGE 14
+#define FSE_DEFAULT_MEMORY_USAGE 13
+
+/* FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSE_MAX_SYMBOL_VALUE 255
+
+
+/****************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+
+
+/****************************************************************
+*  Byte symbol type
+****************************************************************/
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSE_decode_t;   /* size == U32 */
+
+
+
+/****************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#    ifdef __GNUC__
+#      define FORCE_INLINE static inline __attribute__((always_inline))
+#    else
+#      define FORCE_INLINE static inline
+#    endif
+#  else
+#    define FORCE_INLINE static
+#  endif /* __STDC_VERSION__ */
+#endif
+
+
+/****************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+
+#ifndef MEM_ACCESS_MODULE
+#define MEM_ACCESS_MODULE
+/****************************************************************
+*  Basic Types
+*****************************************************************/
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+# include <stdint.h>
+typedef  uint8_t BYTE;
+typedef uint16_t U16;
+typedef  int16_t S16;
+typedef uint32_t U32;
+typedef  int32_t S32;
+typedef uint64_t U64;
+typedef  int64_t S64;
+#else
+typedef unsigned char       BYTE;
+typedef unsigned short      U16;
+typedef   signed short      S16;
+typedef unsigned int        U32;
+typedef   signed int        S32;
+typedef unsigned long long  U64;
+typedef   signed long long  S64;
+#endif
+
+#endif   /* MEM_ACCESS_MODULE */
+
+/****************************************************************
+*  Memory I/O
+*****************************************************************/
+/* FSE_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets generating assembly depending on alignment.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef FSE_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define FSE_FORCE_MEMORY_ACCESS 2
+#  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define FSE_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+
+static unsigned FSE_32bits(void)
+{
+    return sizeof(void*)==4;
+}
+
+static unsigned FSE_isLittleEndian(void)
+{
+    const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2)
+
+static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; }
+static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; }
+static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+#elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
+
+static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+#else
+
+static U16 FSE_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static U32 FSE_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static U64 FSE_read64(const void* memPtr)
+{
+    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+#endif // FSE_FORCE_MEMORY_ACCESS
+
+static U16 FSE_readLE16(const void* memPtr)
+{
+    if (FSE_isLittleEndian())
+        return FSE_read16(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+static U32 FSE_readLE32(const void* memPtr)
+{
+    if (FSE_isLittleEndian())
+        return FSE_read32(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+
+static U64 FSE_readLE64(const void* memPtr)
+{
+    if (FSE_isLittleEndian())
+        return FSE_read64(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
+                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
+    }
+}
+
+static size_t FSE_readLEST(const void* memPtr)
+{
+    if (FSE_32bits())
+        return (size_t)FSE_readLE32(memPtr);
+    else
+        return (size_t)FSE_readLE64(memPtr);
+}
+
+
+
+/****************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+
+/****************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/****************************************************************
+*  Complex types
+****************************************************************/
+typedef struct
+{
+    int deltaFindState;
+    U32 deltaNbBits;
+} FSE_symbolCompressionTransform; /* total 8 bytes */
+
+typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+/****************************************************************
+*  Internal functions
+****************************************************************/
+FORCE_INLINE unsigned FSE_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (GCC_VERSION >= 304)   /* GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    unsigned r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
+/****************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+
+static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
+
+#define FSE_DECODE_TYPE FSE_decode_t
+
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSE_DTableHeader;   /* sizeof U32 */
+
+static size_t FSE_buildDTable
+(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)(ptr) + 1;   /* because dt is unsigned, 32-bits aligned on 32-bits */
+    const U32 tableSize = 1 << tableLog;
+    const U32 tableMask = tableSize-1;
+    const U32 step = FSE_tableStep(tableSize);
+    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
+    U32 position = 0;
+    U32 highThreshold = tableSize-1;
+    const S16 largeLimit= (S16)(1 << (tableLog-1));
+    U32 noLarge = 1;
+    U32 s;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
+    if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
+
+    /* Init, lay down lowprob symbols */
+    DTableH[0].tableLog = (U16)tableLog;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        if (normalizedCounter[s]==-1)
+        {
+            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+            symbolNext[s] = 1;
+        }
+        else
+        {
+            if (normalizedCounter[s] >= largeLimit) noLarge=0;
+            symbolNext[s] = normalizedCounter[s];
+        }
+    }
+
+    /* Spread symbols */
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        int i;
+        for (i=0; i<normalizedCounter[s]; i++)
+        {
+            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+            position = (position + step) & tableMask;
+            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }
+    }
+
+    if (position!=0) return (size_t)-FSE_ERROR_GENERIC;   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+
+    /* Build Decoding table */
+    {
+        U32 i;
+        for (i=0; i<tableSize; i++)
+        {
+            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
+            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
+        }
+    }
+
+    DTableH->fastMode = (U16)noLarge;
+    return 0;
+}
+
+
+/******************************************
+*  FSE byte symbol
+******************************************/
+#ifndef FSE_COMMONDEFS_ONLY
+
+static unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
+
+static short FSE_abs(short a)
+{
+    return a<0? -a : a;
+}
+
+
+/****************************************************************
+*  Header bitstream management
+****************************************************************/
+static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong;
+    bitStream = FSE_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge;
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr))
+    {
+        if (previous0)
+        {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF)
+            {
+                n0+=24;
+                if (ip < iend-5)
+                {
+                    ip+=2;
+                    bitStream = FSE_readLE32(ip) >> bitCount;
+                }
+                else
+                {
+                    bitStream >>= 16;
+                    bitCount+=16;
+                }
+            }
+            while ((bitStream & 3) == 3)
+            {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall;
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+            {
+                ip += bitCount>>3;
+                bitCount &= 7;
+                bitStream = FSE_readLE32(ip) >> bitCount;
+            }
+            else
+                bitStream >>= 2;
+        }
+        {
+            const short max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max)
+            {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            }
+            else
+            {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSE_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold)
+            {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            {
+                if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+                {
+                    ip += bitCount>>3;
+                    bitCount &= 7;
+                }
+                else
+                {
+                    bitCount -= (int)(8 * (iend - 4 - ip));
+                    ip = iend - 4;
+                }
+                bitStream = FSE_readLE32(ip) >> (bitCount & 31);
+            }
+        }
+    }
+    if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC;
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong;
+    return ip-istart;
+}
+
+
+/*********************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC;             /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+
+/* FSE_initDStream
+ * Initialize a FSE_DStream_t.
+ * srcBuffer must point at the beginning of an FSE block.
+ * The function result is the size of the FSE_block (== srcSize).
+ * If srcSize is too small, the function will return an errorCode;
+ */
+static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong;
+
+    if (srcSize >=  sizeof(size_t))
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
+        bitD->bitContainer = FSE_readLEST(bitD->ptr);
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
+        bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
+    }
+    else
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
+            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
+            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
+            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
+            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
+            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8;
+            default:;
+        }
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
+        bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
+        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+
+/*!FSE_lookBits
+ * Provides next n bits from the bitContainer.
+ * bitContainer is not modified (bits are still present for next read/look)
+ * On 32-bits, maxNbBits==25
+ * On 64-bits, maxNbBits==57
+ * return : value extracted.
+ */
+static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+}
+
+static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 !! */
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+}
+
+static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+
+/*!FSE_readBits
+ * Read next n bits from the bitContainer.
+ * On 32-bits, don't read more than maxNbBits==25
+ * On 64-bits, don't read more than maxNbBits==57
+ * Use the fast variant *only* if n >= 1.
+ * return : value extracted.
+ */
+static size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = FSE_lookBits(bitD, nbBits);
+    FSE_skipBits(bitD, nbBits);
+    return value;
+}
+
+static size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 !! */
+{
+    size_t value = FSE_lookBitsFast(bitD, nbBits);
+    FSE_skipBits(bitD, nbBits);
+    return value;
+}
+
+static unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
+{
+    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
+        return FSE_DStream_tooFar;
+
+    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
+    {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = FSE_readLEST(bitD->ptr);
+        return FSE_DStream_unfinished;
+    }
+    if (bitD->ptr == bitD->start)
+    {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer;
+        return FSE_DStream_completed;
+    }
+    {
+        U32 nbBytes = bitD->bitsConsumed >> 3;
+        U32 result = FSE_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start)
+        {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = FSE_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = FSE_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        return result;
+    }
+}
+
+
+static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
+    DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog);
+    FSE_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+static BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32  nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = FSE_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+static BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32 nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = FSE_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+/* FSE_endOfDStream
+   Tells if bitD has reached end of bitStream or not */
+
+static unsigned FSE_endOfDStream(const FSE_DStream_t* bitD)
+{
+    return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8));
+}
+
+static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSE_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    FSE_DStream_t bitD;
+    FSE_DState_t state1;
+    FSE_DState_t state2;
+    size_t errorCode;
+
+    /* Init */
+    errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+    if (FSE_isError(errorCode)) return errorCode;
+
+    FSE_initDState(&state1, &bitD, dt);
+    FSE_initDState(&state2, &bitD, dt);
+
+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op<olimit) ; op+=4)
+    {
+        op[0] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            FSE_reloadDStream(&bitD);
+
+        op[1] = FSE_GETSYMBOL(&state2);
+
+        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            FSE_reloadDStream(&bitD);
+
+        op[3] = FSE_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */
+    while (1)
+    {
+        if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state1);
+
+        if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state2);
+    }
+
+    /* end ? */
+    if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
+        return op-ostart;
+
+    if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
+
+    return (size_t)-FSE_ERROR_corruptionDetected;
+}
+
+
+static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSE_DTable* dt)
+{
+    FSE_DTableHeader DTableH;
+    memcpy(&DTableH, dt, sizeof(DTableH));   /* memcpy() into local variable, to avoid strict aliasing warning */
+
+    /* select fast mode (static) */
+    if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSE_MAX_SYMBOL_VALUE+1];
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    size_t errorCode;
+
+    if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
+
+    /* normal FSE decoding mode */
+    errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return errorCode;
+
+    /* always return, even if it is an error code */
+    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
+}
+
+
+
+/* *******************************************************
+*  Huff0 : Huffman block compression
+*********************************************************/
+#define HUF_MAX_SYMBOL_VALUE 255
+#define HUF_DEFAULT_TABLELOG  12       /* used by default, when not specified */
+#define HUF_MAX_TABLELOG  12           /* max possible tableLog; for allocation purpose; can be modified */
+#define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
+#  error "HUF_MAX_TABLELOG is too large !"
+#endif
+
+typedef struct HUF_CElt_s {
+  U16  val;
+  BYTE nbBits;
+} HUF_CElt ;
+
+typedef struct nodeElt_s {
+    U32 count;
+    U16 parent;
+    BYTE byte;
+    BYTE nbBits;
+} nodeElt;
+
+
+/* *******************************************************
+*  Huff0 : Huffman block decompression
+*********************************************************/
+typedef struct {
+    BYTE byte;
+    BYTE nbBits;
+} HUF_DElt;
+
+static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize)
+{
+    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];  /* large enough for values from 0 to 16 */
+    U32 weightTotal;
+    U32 maxBits;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize;
+    size_t oSize;
+    U32 n;
+    U32 nextRankStart;
+    void* ptr = DTable+1;
+    HUF_DElt* const dt = (HUF_DElt*)ptr;
+
+    if (!srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
+    iSize = ip[0];
+
+    FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16));   /* if compilation fails here, assertion is false */
+    //memset(huffWeight, 0, sizeof(huffWeight));   /* should not be necessary, but some analyzer complain ... */
+    if (iSize >= 128)  /* special header */
+    {
+        if (iSize >= (242))   /* RLE */
+        {
+            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
+            oSize = l[iSize-242];
+            memset(huffWeight, 1, sizeof(huffWeight));
+            iSize = 0;
+        }
+        else   /* Incompressible */
+        {
+            oSize = iSize - 127;
+            iSize = ((oSize+1)/2);
+            if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
+            ip += 1;
+            for (n=0; n<oSize; n+=2)
+            {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+            }
+        }
+    }
+    else  /* header compressed with FSE (normal case) */
+    {
+        if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
+        oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize);   /* max 255 values decoded, last one is implied */
+        if (FSE_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    memset(rankVal, 0, sizeof(rankVal));
+    weightTotal = 0;
+    for (n=0; n<oSize; n++)
+    {
+        if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected;
+        rankVal[huffWeight[n]]++;
+        weightTotal += (1 << huffWeight[n]) >> 1;
+    }
+    if (weightTotal == 0) return (size_t)-FSE_ERROR_corruptionDetected;
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    maxBits = FSE_highbit32(weightTotal) + 1;
+    if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge;   /* DTable is too small */
+    DTable[0] = (U16)maxBits;
+    {
+        U32 total = 1 << maxBits;
+        U32 rest = total - weightTotal;
+        U32 verif = 1 << FSE_highbit32(rest);
+        U32 lastWeight = FSE_highbit32(rest) + 1;
+        if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected;    /* last value must be a clean power of 2 */
+        huffWeight[oSize] = (BYTE)lastWeight;
+        rankVal[lastWeight]++;
+    }
+
+    /* check tree construction validity */
+    if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected;   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* Prepare ranks */
+    nextRankStart = 0;
+    for (n=1; n<=maxBits; n++)
+    {
+        U32 current = nextRankStart;
+        nextRankStart += (rankVal[n] << (n-1));
+        rankVal[n] = current;
+    }
+
+    /* fill DTable */
+    for (n=0; n<=oSize; n++)
+    {
+        const U32 w = huffWeight[n];
+        const U32 length = (1 << w) >> 1;
+        U32 i;
+        HUF_DElt D;
+        D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w);
+        for (i = rankVal[w]; i < rankVal[w] + length; i++)
+            dt[i] = D;
+        rankVal[w] += length;
+    }
+
+    return iSize+1;
+}
+
+
+static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog)
+{
+        const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+        const BYTE c = dt[val].byte;
+        FSE_skipBits(Dstream, dt[val].nbBits);
+        return c;
+}
+
+static size_t HUF_decompress_usingDTable(   /* -3% slower when non static */
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U16* DTable)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-15;
+
+    const void* ptr = DTable;
+    const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
+    const U32 dtLog = DTable[0];
+    size_t errorCode;
+    U32 reloadStatus;
+
+    /* Init */
+
+    const U16* jumpTable = (const U16*)cSrc;
+    const size_t length1 = FSE_readLE16(jumpTable);
+    const size_t length2 = FSE_readLE16(jumpTable+1);
+    const size_t length3 = FSE_readLE16(jumpTable+2);
+    const size_t length4 = cSrcSize - 6 - length1 - length2 - length3;   // check coherency !!
+    const char* const start1 = (const char*)(cSrc) + 6;
+    const char* const start2 = start1 + length1;
+    const char* const start3 = start2 + length2;
+    const char* const start4 = start3 + length3;
+    FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
+
+    if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
+
+    errorCode = FSE_initDStream(&bitD1, start1, length1);
+    if (FSE_isError(errorCode)) return errorCode;
+    errorCode = FSE_initDStream(&bitD2, start2, length2);
+    if (FSE_isError(errorCode)) return errorCode;
+    errorCode = FSE_initDStream(&bitD3, start3, length3);
+    if (FSE_isError(errorCode)) return errorCode;
+    errorCode = FSE_initDStream(&bitD4, start4, length4);
+    if (FSE_isError(errorCode)) return errorCode;
+
+    reloadStatus=FSE_reloadDStream(&bitD2);
+
+    /* 16 symbols per loop */
+    for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit);  /* D2-3-4 are supposed to be synchronized and finish together */
+        op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
+    {
+#define HUF_DECODE_SYMBOL_0(n, Dstream) \
+        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
+
+#define HUF_DECODE_SYMBOL_1(n, Dstream) \
+        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
+        if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
+
+#define HUF_DECODE_SYMBOL_2(n, Dstream) \
+        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
+        if (FSE_32bits()) FSE_reloadDStream(&Dstream)
+
+        HUF_DECODE_SYMBOL_1( 0, bitD1);
+        HUF_DECODE_SYMBOL_1( 1, bitD2);
+        HUF_DECODE_SYMBOL_1( 2, bitD3);
+        HUF_DECODE_SYMBOL_1( 3, bitD4);
+        HUF_DECODE_SYMBOL_2( 4, bitD1);
+        HUF_DECODE_SYMBOL_2( 5, bitD2);
+        HUF_DECODE_SYMBOL_2( 6, bitD3);
+        HUF_DECODE_SYMBOL_2( 7, bitD4);
+        HUF_DECODE_SYMBOL_1( 8, bitD1);
+        HUF_DECODE_SYMBOL_1( 9, bitD2);
+        HUF_DECODE_SYMBOL_1(10, bitD3);
+        HUF_DECODE_SYMBOL_1(11, bitD4);
+        HUF_DECODE_SYMBOL_0(12, bitD1);
+        HUF_DECODE_SYMBOL_0(13, bitD2);
+        HUF_DECODE_SYMBOL_0(14, bitD3);
+        HUF_DECODE_SYMBOL_0(15, bitD4);
+    }
+
+    if (reloadStatus!=FSE_DStream_completed)   /* not complete : some bitStream might be FSE_DStream_unfinished */
+        return (size_t)-FSE_ERROR_corruptionDetected;
+
+    /* tail */
+    {
+        // bitTail = bitD1;   // *much* slower : -20% !??!
+        FSE_DStream_t bitTail;
+        bitTail.ptr = bitD1.ptr;
+        bitTail.bitsConsumed = bitD1.bitsConsumed;
+        bitTail.bitContainer = bitD1.bitContainer;   // required in case of FSE_DStream_endOfBuffer
+        bitTail.start = start1;
+        for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
+        {
+            HUF_DECODE_SYMBOL_0(0, bitTail);
+        }
+
+        if (FSE_endOfDStream(&bitTail))
+            return op-ostart;
+    }
+
+    if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
+
+    return (size_t)-FSE_ERROR_corruptionDetected;
+}
+
+
+static size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLE(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+    size_t errorCode;
+
+    errorCode = HUF_readDTable (DTable, cSrc, cSrcSize);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable);
+}
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
+
+/*
+    zstd - standard compression library
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/****************************************************************
+*  Tuning parameters
+*****************************************************************/
+/* MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect */
+#define ZSTD_MEMORY_USAGE 17
+
+
+/**************************************
+   CPU Feature Detection
+**************************************/
+/*
+ * Automated efficient unaligned memory access detection
+ * Based on known hardware architectures
+ * This list will be updated thanks to feedbacks
+ */
+#if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \
+    || defined(__ARM_FEATURE_UNALIGNED) \
+    || defined(__i386__) || defined(__x86_64__) \
+    || defined(_M_IX86) || defined(_M_X64) \
+    || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \
+    || (defined(_M_ARM) && (_M_ARM >= 7))
+#  define ZSTD_UNALIGNED_ACCESS 1
+#else
+#  define ZSTD_UNALIGNED_ACCESS 0
+#endif
+
+
+/********************************************************
+*  Includes
+*********************************************************/
+#include <stdlib.h>      /* calloc */
+#include <string.h>      /* memcpy, memmove */
+#include <stdio.h>       /* debug : printf */
+
+
+/********************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef __AVX2__
+#  include <immintrin.h>   /* AVX2 intrinsics */
+#endif
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#endif
+
+
+#ifndef MEM_ACCESS_MODULE
+#define MEM_ACCESS_MODULE
+/********************************************************
+*  Basic Types
+*********************************************************/
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+# include <stdint.h>
+typedef  uint8_t BYTE;
+typedef uint16_t U16;
+typedef  int16_t S16;
+typedef uint32_t U32;
+typedef  int32_t S32;
+typedef uint64_t U64;
+#else
+typedef unsigned char       BYTE;
+typedef unsigned short      U16;
+typedef   signed short      S16;
+typedef unsigned int        U32;
+typedef   signed int        S32;
+typedef unsigned long long  U64;
+#endif
+
+#endif   /* MEM_ACCESS_MODULE */
+
+
+/********************************************************
+*  Constants
+*********************************************************/
+static const U32 ZSTD_magicNumber = 0xFD2FB51E;   /* 3rd version : seqNb header */
+
+#define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
+#define HASH_TABLESIZE (1 << HASH_LOG)
+#define HASH_MASK (HASH_TABLESIZE - 1)
+
+#define KNUTH 2654435761
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BLOCKSIZE (128 KB)                 /* define, for static allocation */
+
+#define WORKPLACESIZE (BLOCKSIZE*3)
+#define MINMATCH 4
+#define MLbits   7
+#define LLbits   6
+#define Offbits  5
+#define MaxML  ((1<<MLbits )-1)
+#define MaxLL  ((1<<LLbits )-1)
+#define MaxOff ((1<<Offbits)-1)
+#define LitFSELog  11
+#define MLFSELog   10
+#define LLFSELog   10
+#define OffFSELog   9
+#define MAX(a,b) ((a)<(b)?(b):(a))
+#define MaxSeq MAX(MaxLL, MaxML)
+
+#define LITERAL_NOENTROPY 63
+#define COMMAND_NOENTROPY 7   /* to remove */
+
+static const size_t ZSTD_blockHeaderSize = 3;
+static const size_t ZSTD_frameHeaderSize = 4;
+
+
+/********************************************************
+*  Memory operations
+*********************************************************/
+static unsigned ZSTD_32bits(void) { return sizeof(void*)==4; }
+
+static unsigned ZSTD_isLittleEndian(void)
+{
+    const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+static U16    ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; }
+
+static U32    ZSTD_read32(const void* p) { U32 r; memcpy(&r, p, sizeof(r)); return r; }
+
+static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+
+#define COPY8(d,s)    { ZSTD_copy8(d,s); d+=8; s+=8; }
+
+static void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
+{
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+    while (op < oend) COPY8(op, ip);
+}
+
+static U16 ZSTD_readLE16(const void* memPtr)
+{
+    if (ZSTD_isLittleEndian()) return ZSTD_read16(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)((U16)p[0] + ((U16)p[1]<<8));
+    }
+}
+
+
+static U32 ZSTD_readLE32(const void* memPtr)
+{
+    if (ZSTD_isLittleEndian())
+        return ZSTD_read32(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+static U32 ZSTD_readBE32(const void* memPtr)
+{
+    const BYTE* p = (const BYTE*)memPtr;
+    return (U32)(((U32)p[0]<<24) + ((U32)p[1]<<16) + ((U32)p[2]<<8) + ((U32)p[3]<<0));
+}
+
+
+/**************************************
+*  Local structures
+***************************************/
+typedef struct ZSTD_Cctx_s ZSTD_Cctx;
+
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+typedef struct {
+    void* buffer;
+    U32*  offsetStart;
+    U32*  offset;
+    BYTE* offCodeStart;
+    BYTE* offCode;
+    BYTE* litStart;
+    BYTE* lit;
+    BYTE* litLengthStart;
+    BYTE* litLength;
+    BYTE* matchLengthStart;
+    BYTE* matchLength;
+    BYTE* dumpsStart;
+    BYTE* dumps;
+} seqStore_t;
+
+
+typedef struct ZSTD_Cctx_s
+{
+    const BYTE* base;
+    U32 current;
+    U32 nextUpdate;
+    seqStore_t seqStore;
+#ifdef __AVX2__
+    __m256i hashTable[HASH_TABLESIZE>>3];
+#else
+    U32 hashTable[HASH_TABLESIZE];
+#endif
+    BYTE buffer[WORKPLACESIZE];
+} cctxi_t;
+
+
+
+
+/**************************************
+*  Error Management
+**************************************/
+/* published entry point */
+unsigned ZSTDv01_isError(size_t code) { return ERR_isError(code); }
+
+
+/**************************************
+*  Tool functions
+**************************************/
+#define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
+#define ZSTD_VERSION_MINOR    1    /* for new (non-breaking) interface capabilities */
+#define ZSTD_VERSION_RELEASE  3    /* for tweaks, bug-fixes, or development */
+#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+
+/**************************************************************
+*   Decompression code
+**************************************************************/
+
+size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = (const BYTE* const)src;
+    BYTE headerFlags;
+    U32 cSize;
+
+    if (srcSize < 3) return ERROR(srcSize_wrong);
+
+    headerFlags = *in;
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+
+    bpPtr->blockType = (blockType_t)(headerFlags >> 6);
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+
+static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+static size_t ZSTD_decompressLiterals(void* ctx,
+                                      void* dst, size_t maxDstSize,
+                                const void* src, size_t srcSize)
+{
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + maxDstSize;
+    const BYTE* ip = (const BYTE*)src;
+    size_t errorCode;
+    size_t litSize;
+
+    /* check : minimum 2, for litSize, +1, for content */
+    if (srcSize <= 3) return ERROR(corruption_detected);
+
+    litSize = ip[1] + (ip[0]<<8);
+    litSize += ((ip[-3] >> 3) & 7) << 16;   // mmmmh....
+    op = oend - litSize;
+
+    (void)ctx;
+    if (litSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2);
+    if (FSE_isError(errorCode)) return ERROR(GENERIC);
+    return litSize;
+}
+
+
+size_t ZSTDv01_decodeLiteralsBlock(void* ctx,
+                                void* dst, size_t maxDstSize,
+                          const BYTE** litStart, size_t* litSize,
+                          const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const oend = ostart + maxDstSize;
+    blockProperties_t litbp;
+
+    size_t litcSize = ZSTDv01_getcBlockSize(src, srcSize, &litbp);
+    if (ZSTDv01_isError(litcSize)) return litcSize;
+    if (litcSize > srcSize - ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    ip += ZSTD_blockHeaderSize;
+
+    switch(litbp.blockType)
+    {
+    case bt_raw:
+        *litStart = ip;
+        ip += litcSize;
+        *litSize = litcSize;
+        break;
+    case bt_rle:
+        {
+            size_t rleSize = litbp.origSize;
+            if (rleSize>maxDstSize) return ERROR(dstSize_tooSmall);
+            if (!srcSize) return ERROR(srcSize_wrong);
+            memset(oend - rleSize, *ip, rleSize);
+            *litStart = oend - rleSize;
+            *litSize = rleSize;
+            ip++;
+            break;
+        }
+    case bt_compressed:
+        {
+            size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize);
+            if (ZSTDv01_isError(decodedLitSize)) return decodedLitSize;
+            *litStart = oend - decodedLitSize;
+            *litSize = decodedLitSize;
+            ip += litcSize;
+            break;
+        }
+    case bt_end:
+    default:
+        return ERROR(GENERIC);
+    }
+
+    return ip-istart;
+}
+
+
+size_t ZSTDv01_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
+                         FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
+                         const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    const BYTE* const iend = istart + srcSize;
+    U32 LLtype, Offtype, MLtype;
+    U32 LLlog, Offlog, MLlog;
+    size_t dumpsLength;
+
+    /* check */
+    if (srcSize < 5) return ERROR(srcSize_wrong);
+
+    /* SeqHead */
+    *nbSeq = ZSTD_readLE16(ip); ip+=2;
+    LLtype  = *ip >> 6;
+    Offtype = (*ip >> 4) & 3;
+    MLtype  = (*ip >> 2) & 3;
+    if (*ip & 2)
+    {
+        dumpsLength  = ip[2];
+        dumpsLength += ip[1] << 8;
+        ip += 3;
+    }
+    else
+    {
+        dumpsLength  = ip[1];
+        dumpsLength += (ip[0] & 1) << 8;
+        ip += 2;
+    }
+    *dumpsPtr = ip;
+    ip += dumpsLength;
+    *dumpsLengthPtr = dumpsLength;
+
+    /* check */
+    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
+
+    /* sequences */
+    {
+        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL and MaxOff */
+        size_t headerSize;
+
+        /* Build DTables */
+        switch(LLtype)
+        {
+        case bt_rle :
+            LLlog = 0;
+            FSE_buildDTable_rle(DTableLL, *ip++); break;
+        case bt_raw :
+            LLlog = LLbits;
+            FSE_buildDTable_raw(DTableLL, LLbits); break;
+        default :
+            {   U32 max = MaxLL;
+                headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
+                if (FSE_isError(headerSize)) return ERROR(GENERIC);
+                if (LLlog > LLFSELog) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSE_buildDTable(DTableLL, norm, max, LLlog);
+        }   }
+
+        switch(Offtype)
+        {
+        case bt_rle :
+            Offlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableOffb, *ip++); break;
+        case bt_raw :
+            Offlog = Offbits;
+            FSE_buildDTable_raw(DTableOffb, Offbits); break;
+        default :
+            {   U32 max = MaxOff;
+                headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
+                if (FSE_isError(headerSize)) return ERROR(GENERIC);
+                if (Offlog > OffFSELog) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSE_buildDTable(DTableOffb, norm, max, Offlog);
+        }   }
+
+        switch(MLtype)
+        {
+        case bt_rle :
+            MLlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableML, *ip++); break;
+        case bt_raw :
+            MLlog = MLbits;
+            FSE_buildDTable_raw(DTableML, MLbits); break;
+        default :
+            {   U32 max = MaxML;
+                headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
+                if (FSE_isError(headerSize)) return ERROR(GENERIC);
+                if (MLlog > MLFSELog) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSE_buildDTable(DTableML, norm, max, MLlog);
+    }   }   }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t offset;
+    size_t matchLength;
+} seq_t;
+
+typedef struct {
+    FSE_DStream_t DStream;
+    FSE_DState_t stateLL;
+    FSE_DState_t stateOffb;
+    FSE_DState_t stateML;
+    size_t prevOffset;
+    const BYTE* dumps;
+    const BYTE* dumpsEnd;
+} seqState_t;
+
+
+static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
+{
+    size_t litLength;
+    size_t prevOffset;
+    size_t offset;
+    size_t matchLength;
+    const BYTE* dumps = seqState->dumps;
+    const BYTE* const de = seqState->dumpsEnd;
+
+    /* Literal length */
+    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
+    prevOffset = litLength ? seq->offset : seqState->prevOffset;
+    seqState->prevOffset = seq->offset;
+    if (litLength == MaxLL)
+    {
+        U32 add = dumps<de ? *dumps++ : 0;
+        if (add < 255) litLength += add;
+        else
+        {
+            if (dumps<=(de-3))
+            {
+                litLength = ZSTD_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+                dumps += 3;
+            }
+        }
+    }
+
+    /* Offset */
+    {
+        U32 offsetCode, nbBits;
+        offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));
+        if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
+        nbBits = offsetCode - 1;
+        if (offsetCode==0) nbBits = 0;   /* cmove */
+        offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits);
+        if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
+        if (offsetCode==0) offset = prevOffset;
+    }
+
+    /* MatchLength */
+    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
+    if (matchLength == MaxML)
+    {
+        U32 add = dumps<de ? *dumps++ : 0;
+        if (add < 255) matchLength += add;
+        else
+        {
+            if (dumps<=(de-3))
+            {
+                matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+                dumps += 3;
+            }
+        }
+    }
+    matchLength += MINMATCH;
+
+    /* save result */
+    seq->litLength = litLength;
+    seq->offset = offset;
+    seq->matchLength = matchLength;
+    seqState->dumps = dumps;
+}
+
+
+static size_t ZSTD_execSequence(BYTE* op,
+                                seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit,
+                                BYTE* const base, BYTE* const oend)
+{
+    static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
+    static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* substracted */
+    const BYTE* const ostart = op;
+    const size_t litLength = sequence.litLength;
+    BYTE* const endMatch = op + litLength + sequence.matchLength;    /* risk : address space overflow (32-bits) */
+    const BYTE* const litEnd = *litPtr + litLength;
+
+    /* check */
+    if (endMatch > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
+    if (litEnd > litLimit) return ERROR(corruption_detected);
+    if (sequence.matchLength > (size_t)(*litPtr-op))  return ERROR(dstSize_tooSmall);    /* overwrite literal segment */
+
+    /* copy Literals */
+    if (((size_t)(*litPtr - op) < 8) || ((size_t)(oend-litEnd) < 8) || (op+litLength > oend-8))
+        memmove(op, *litPtr, litLength);   /* overwrite risk */
+    else
+        ZSTD_wildcopy(op, *litPtr, litLength);
+    op += litLength;
+    *litPtr = litEnd;   /* update for next sequence */
+
+    /* check : last match must be at a minimum distance of 8 from end of dest buffer */
+    if (oend-op < 8) return ERROR(dstSize_tooSmall);
+
+    /* copy Match */
+    {
+        const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12);
+        const BYTE* match = op - sequence.offset;            /* possible underflow at op - offset ? */
+        size_t qutt = 12;
+        U64 saved[2];
+
+        /* check */
+        if (match < base) return ERROR(corruption_detected);
+        if (sequence.offset > (size_t)base) return ERROR(corruption_detected);
+
+        /* save beginning of literal sequence, in case of write overlap */
+        if (overlapRisk)
+        {
+            if ((endMatch + qutt) > oend) qutt = oend-endMatch;
+            memcpy(saved, endMatch, qutt);
+        }
+
+        if (sequence.offset < 8)
+        {
+            const int dec64 = dec64table[sequence.offset];
+            op[0] = match[0];
+            op[1] = match[1];
+            op[2] = match[2];
+            op[3] = match[3];
+            match += dec32table[sequence.offset];
+            ZSTD_copy4(op+4, match);
+            match -= dec64;
+        } else { ZSTD_copy8(op, match); }
+        op += 8; match += 8;
+
+        if (endMatch > oend-(16-MINMATCH))
+        {
+            if (op < oend-8)
+            {
+                ZSTD_wildcopy(op, match, (oend-8) - op);
+                match += (oend-8) - op;
+                op = oend-8;
+            }
+            while (op<endMatch) *op++ = *match++;
+        }
+        else
+            ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
+
+        /* restore, in case of overlap */
+        if (overlapRisk) memcpy(endMatch, saved, qutt);
+    }
+
+    return endMatch-ostart;
+}
+
+typedef struct ZSTDv01_Dctx_s
+{
+    U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
+    U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
+    U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
+    void* previousDstEnd;
+    void* base;
+    size_t expected;
+    blockType_t bType;
+    U32 phase;
+} dctx_t;
+
+
+static size_t ZSTD_decompressSequences(
+                               void* ctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize,
+                         const BYTE* litStart, size_t litSize)
+{
+    dctx_t* dctx = (dctx_t*)ctx;
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t errorCode, dumpsLength;
+    const BYTE* litPtr = litStart;
+    const BYTE* const litEnd = litStart + litSize;
+    int nbSeq;
+    const BYTE* dumps;
+    U32* DTableLL = dctx->LLTable;
+    U32* DTableML = dctx->MLTable;
+    U32* DTableOffb = dctx->OffTable;
+    BYTE* const base = (BYTE*) (dctx->base);
+
+    /* Build Decoding Tables */
+    errorCode = ZSTDv01_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
+                                      DTableLL, DTableML, DTableOffb,
+                                      ip, iend-ip);
+    if (ZSTDv01_isError(errorCode)) return errorCode;
+    ip += errorCode;
+
+    /* Regen sequences */
+    {
+        seq_t sequence;
+        seqState_t seqState;
+
+        memset(&sequence, 0, sizeof(sequence));
+        seqState.dumps = dumps;
+        seqState.dumpsEnd = dumps + dumpsLength;
+        seqState.prevOffset = 1;
+        errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip);
+        if (FSE_isError(errorCode)) return ERROR(corruption_detected);
+        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
+        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
+        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
+
+        for ( ; (FSE_reloadDStream(&(seqState.DStream)) <= FSE_DStream_completed) && (nbSeq>0) ; )
+        {
+            size_t oneSeqSize;
+            nbSeq--;
+            ZSTD_decodeSequence(&sequence, &seqState);
+            oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend);
+            if (ZSTDv01_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* check if reached exact end */
+        if ( !FSE_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected);   /* requested too much : data is corrupted */
+        if (nbSeq<0) return ERROR(corruption_detected);   /* requested too many sequences : data is corrupted */
+
+        /* last literal segment */
+        {
+            size_t lastLLSize = litEnd - litPtr;
+            if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
+            if (op != litPtr) memmove(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+
+static size_t ZSTD_decompressBlock(
+                            void* ctx,
+                            void* dst, size_t maxDstSize,
+                      const void* src, size_t srcSize)
+{
+    /* blockType == blockCompressed, srcSize is trusted */
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* litPtr = NULL;
+    size_t litSize = 0;
+    size_t errorCode;
+
+    /* Decode literals sub-block */
+    errorCode = ZSTDv01_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize);
+    if (ZSTDv01_isError(errorCode)) return errorCode;
+    ip += errorCode;
+    srcSize -= errorCode;
+
+    return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize);
+}
+
+
+size_t ZSTDv01_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* iend = ip + srcSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t remainingSize = srcSize;
+    U32 magicNumber;
+    size_t errorCode=0;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    magicNumber = ZSTD_readBE32(src);
+    if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t blockSize = ZSTDv01_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTDv01_isError(blockSize)) return blockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (blockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize);
+            break;
+        case bt_raw :
+            errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet supported */
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return ERROR(srcSize_wrong);
+            break;
+        default:
+            return ERROR(GENERIC);
+        }
+        if (blockSize == 0) break;   /* bt_end */
+
+        if (ZSTDv01_isError(errorCode)) return errorCode;
+        op += errorCode;
+        ip += blockSize;
+        remainingSize -= blockSize;
+    }
+
+    return op-ostart;
+}
+
+size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    dctx_t ctx;
+    ctx.base = dst;
+    return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
+}
+
+size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    size_t remainingSize = srcSize;
+    U32 magicNumber;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    magicNumber = ZSTD_readBE32(src);
+    if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties);
+        if (ZSTDv01_isError(blockSize)) return blockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (blockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        if (blockSize == 0) break;   /* bt_end */
+
+        ip += blockSize;
+        remainingSize -= blockSize;
+    }
+
+    return ip - (const BYTE*)src;
+}
+
+/*******************************
+*  Streaming Decompression API
+*******************************/
+
+size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx)
+{
+    dctx->expected = ZSTD_frameHeaderSize;
+    dctx->phase = 0;
+    dctx->previousDstEnd = NULL;
+    dctx->base = NULL;
+    return 0;
+}
+
+ZSTDv01_Dctx* ZSTDv01_createDCtx(void)
+{
+    ZSTDv01_Dctx* dctx = (ZSTDv01_Dctx*)malloc(sizeof(ZSTDv01_Dctx));
+    if (dctx==NULL) return NULL;
+    ZSTDv01_resetDCtx(dctx);
+    return dctx;
+}
+
+size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx)
+{
+    free(dctx);
+    return 0;
+}
+
+size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx)
+{
+    return ((dctx_t*)dctx)->expected;
+}
+
+size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    dctx_t* ctx = (dctx_t*)dctx;
+
+    /* Sanity check */
+    if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
+    if (dst != ctx->previousDstEnd)  /* not contiguous */
+        ctx->base = dst;
+
+    /* Decompress : frame header */
+    if (ctx->phase == 0)
+    {
+        /* Check frame magic header */
+        U32 magicNumber = ZSTD_readBE32(src);
+        if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        return 0;
+    }
+
+    /* Decompress : block header */
+    if (ctx->phase == 1)
+    {
+        blockProperties_t bp;
+        size_t blockSize = ZSTDv01_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+        if (ZSTDv01_isError(blockSize)) return blockSize;
+        if (bp.blockType == bt_end)
+        {
+            ctx->expected = 0;
+            ctx->phase = 0;
+        }
+        else
+        {
+            ctx->expected = blockSize;
+            ctx->bType = bp.blockType;
+            ctx->phase = 2;
+        }
+
+        return 0;
+    }
+
+    /* Decompress : block content */
+    {
+        size_t rSize;
+        switch(ctx->bType)
+        {
+        case bt_compressed:
+            rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
+            break;
+        case bt_raw :
+            rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet handled */
+            break;
+        case bt_end :   /* should never happen (filtered at phase 1) */
+            rSize = 0;
+            break;
+        default:
+            return ERROR(GENERIC);
+        }
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
+        return rSize;
+    }
+
+}
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v01.h b/contrib/libzstd/include/zstd/legacy/zstd_v01.h
new file mode 100644
index 00000000000..13cb3acfdc3
--- /dev/null
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v01.h
@@ -0,0 +1,88 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+#ifndef ZSTD_V01_H_28739879432
+#define ZSTD_V01_H_28739879432
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv01_decompress() : decompress ZSTD frames compliant with v0.1.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv01_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+/**
+ZSTDv01_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.1.x format
+    compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+    return : the number of bytes that would be read to decompress this frame
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t compressedSize);
+
+/**
+ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error
+*/
+unsigned ZSTDv01_isError(size_t code);
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv01_Dctx_s ZSTDv01_Dctx;
+ZSTDv01_Dctx* ZSTDv01_createDCtx(void);
+size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx);
+
+size_t ZSTDv01_decompressDCtx(void* ctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
+
+/* *************************************
+*  Streaming functions
+***************************************/
+size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx);
+
+size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx);
+size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv01_magicNumber   0xFD2FB51E   /* Big Endian version */
+#define ZSTDv01_magicNumberLE 0x1EB52FFD   /* Little Endian version */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_V01_H_28739879432 */
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v02.c b/contrib/libzstd/include/zstd/legacy/zstd_v02.c
new file mode 100644
index 00000000000..3cf8f477825
--- /dev/null
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v02.c
@@ -0,0 +1,3555 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include "zstd_v02.h"
+#include "error_private.h"
+
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined(_MSC_VER)   /* Visual Studio */
+#   include <stdlib.h>  /* _byteswap_ulong */
+#   include <intrin.h>  /* _byteswap_* */
+#endif
+
+
+/* ******************************************************************
+   mem.h
+   low-level memory access routines
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/******************************************
+*  Includes
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include <string.h>    /* memcpy */
+
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined(__GNUC__)
+#  define MEM_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define MEM_STATIC static inline
+#elif defined(_MSC_VER)
+#  define MEM_STATIC static __inline
+#else
+#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/****************************************************************
+*  Basic Types
+*****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef  int16_t S16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+  typedef  int64_t S64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef   signed short      S16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+  typedef   signed long long  S64;
+#endif
+
+
+/****************************************************************
+*  Memory I/O
+*****************************************************************/
+/* MEM_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets generating assembly depending on alignment.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define MEM_FORCE_MEMORY_ACCESS 2
+#  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define MEM_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; }
+MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; }
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
+
+/* violates C standard on structure alignment.
+Only use if no other choice to achieve best performance on target platform */
+MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
+MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
+MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+
+#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
+
+MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+
+#else
+
+/* default method, safe and standard.
+   can sometimes prove slower */
+
+MEM_STATIC U16 MEM_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U32 MEM_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U64 MEM_read64(const void* memPtr)
+{
+    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif // MEM_FORCE_MEMORY_ACCESS
+
+
+MEM_STATIC U16 MEM_readLE16(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read16(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
+{
+    if (MEM_isLittleEndian())
+    {
+        MEM_write16(memPtr, val);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val;
+        p[1] = (BYTE)(val>>8);
+    }
+}
+
+MEM_STATIC U32 MEM_readLE32(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read32(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+
+MEM_STATIC U64 MEM_readLE64(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read64(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
+                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
+    }
+}
+
+
+MEM_STATIC size_t MEM_readLEST(const void* memPtr)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_readLE32(memPtr);
+    else
+        return (size_t)MEM_readLE64(memPtr);
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* MEM_H_MODULE */
+
+
+/* ******************************************************************
+   bitstream
+   Part of NewGen Entropy library
+   header file (to include)
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef BITSTREAM_H_MODULE
+#define BITSTREAM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*
+*  This API consists of small unitary functions, which highly benefit from being inlined.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+
+/**********************************************
+*  bitStream decompression API (read backward)
+**********************************************/
+typedef struct
+{
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} BIT_DStream_t;
+
+typedef enum { BIT_DStream_unfinished = 0,
+               BIT_DStream_endOfBuffer = 1,
+               BIT_DStream_completed = 2,
+               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
+
+
+/*
+* Start by invoking BIT_initDStream().
+* A chunk of the bitStream is then stored into a local register.
+* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+* You can then retrieve bitFields stored into the local register, **in reverse order**.
+* Local register is manually filled from memory by the BIT_reloadDStream() method.
+* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BIT_DStream_unfinished.
+* Otherwise, it can be less than that, so proceed accordingly.
+* Checking if DStream has reached its end can be performed with BIT_endOfDStream()
+*/
+
+
+/******************************************
+*  unsafe API
+******************************************/
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/****************************************************************
+*  Helper functions
+****************************************************************/
+MEM_STATIC unsigned BIT_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    unsigned r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
+
+/**********************************************************
+* bitStream decoding
+**********************************************************/
+
+/*!BIT_initDStream
+*  Initialize a BIT_DStream_t.
+*  @bitD : a pointer to an already allocated BIT_DStream_t structure
+*  @srcBuffer must point at the beginning of a bitStream
+*  @srcSize must be the exact size of the bitStream
+*  @result : size of stream (== srcSize) or an errorCode if a problem is detected
+*/
+MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
+    if (srcSize >=  sizeof(size_t))   /* normal case */
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+    }
+    else
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
+            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
+            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
+            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
+            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
+            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8;
+            default:;
+        }
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+/*!BIT_lookBits
+ * Provides next n bits from local register
+ * local register is not modified (bits are still present for next read/look)
+ * On 32-bits, maxNbBits==25
+ * On 64-bits, maxNbBits==57
+ * @return : value extracted
+ */
+MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+}
+
+/*! BIT_lookBitsFast :
+*   unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+}
+
+MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+/*!BIT_readBits
+ * Read next n bits from local register.
+ * pay attention to not read more than nbBits contained into local register.
+ * @return : extracted value.
+ */
+MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BIT_lookBits(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*!BIT_readBitsFast :
+*  unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BIT_lookBitsFast(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
+{
+    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
+        return BIT_DStream_overflow;
+
+    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
+    {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        return BIT_DStream_unfinished;
+    }
+    if (bitD->ptr == bitD->start)
+    {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
+        return BIT_DStream_completed;
+    }
+    {
+        U32 nbBytes = bitD->bitsConsumed >> 3;
+        BIT_DStream_status result = BIT_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start)
+        {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = BIT_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        return result;
+    }
+}
+
+/*! BIT_endOfDStream
+*   @return Tells if DStream has reached its exact end
+*/
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
+{
+    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* BITSTREAM_H_MODULE */
+/* ******************************************************************
+   Error codes and messages
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define ERR_STATIC static inline
+#elif defined(_MSC_VER)
+#  define ERR_STATIC static __inline
+#elif defined(__GNUC__)
+#  define ERR_STATIC static __attribute__((unused))
+#else
+#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/******************************************
+*  Error Management
+******************************************/
+#define PREFIX(name) ZSTD_error_##name
+
+#define ERROR(name) (size_t)-PREFIX(name)
+
+#define ERROR_LIST(ITEM) \
+        ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \
+        ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \
+        ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \
+        ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \
+        ITEM(PREFIX(maxCode))
+
+#define ERROR_GENERATE_ENUM(ENUM) ENUM,
+typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
+
+#define ERROR_CONVERTTOSTRING(STRING) #STRING,
+#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR)
+static const char* ERR_strings[] = { ERROR_LIST(ERROR_GENERATE_STRING) };
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
+ERR_STATIC const char* ERR_getErrorName(size_t code)
+{
+    static const char* codeError = "Unspecified error code";
+    if (ERR_isError(code)) return ERR_strings[-(int)(code)];
+    return codeError;
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_H_MODULE */
+/*
+Constructor and Destructor of type FSE_CTable
+    Note that its size depends on 'tableLog' and 'maxSymbolValue' */
+typedef unsigned FSE_CTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+
+
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/******************************************
+*  Static allocation
+******************************************/
+/* FSE buffer bounds */
+#define FSE_NCOUNTBOUND 512
+#define FSE_BLOCKBOUND(size) (size + (size>>7))
+#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+
+
+/******************************************
+*  FSE advanced API
+******************************************/
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
+/* build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
+
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
+/* build a fake FSE_DTable, designed to always generate the same symbolValue */
+
+
+/******************************************
+*  FSE symbol decompression API
+******************************************/
+typedef struct
+{
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+
+static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
+
+static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+
+static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
+
+/*
+Let's now decompose FSE_decompress_usingDTable() into its unitary components.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BIT_DStream_t DStream;    // Stream context
+FSE_DState_t  DState;     // State context. Multiple ones are possible
+FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BIT_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSE_reloadDStream(&DStream);
+
+BIT_reloadDStream() result tells if there is still some more data to read from DStream.
+BIT_DStream_unfinished : there is still some data left into the DStream.
+BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BIT_reloadDStream(&DStream) >= BIT_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BIT_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSE_endOfDState(&DState);
+*/
+
+
+/******************************************
+*  FSE unsafe API
+******************************************/
+static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/******************************************
+*  Implementation of inline functions
+******************************************/
+
+/* decompression */
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSE_DTableHeader;   /* sizeof U32 */
+
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSE_decode_t;   /* size == U32 */
+
+MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
+{
+    FSE_DTableHeader DTableH;
+    memcpy(&DTableH, dt, sizeof(DTableH));
+    DStatePtr->state = BIT_readBits(bitD, DTableH.tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32  nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BIT_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32 nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BIT_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/******************************************
+*  Static allocation macros
+******************************************/
+/* Huff0 buffer bounds */
+#define HUF_CTABLEBOUND 129
+#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true if incompressible pre-filtered with fast heuristic */
+#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* static allocation of Huff0's DTable */
+#define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<maxTableLog))  /* nb Cells; use unsigned short for X2, unsigned int for X4 */
+#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+        unsigned short DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
+        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUF_CREATE_STATIC_DTABLEX6(DTable, maxTableLog) \
+        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog) * 3 / 2] = { maxTableLog }
+
+
+/******************************************
+*  Advanced functions
+******************************************/
+static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbols decoder */
+static size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* quad-symbols decoder */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+/*
+    zstd - standard compression library
+    Header File
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Version
+***************************************/
+#define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
+#define ZSTD_VERSION_MINOR    2    /* for new (non-breaking) interface capabilities */
+#define ZSTD_VERSION_RELEASE  2    /* for tweaks, bug-fixes, or development */
+#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTD_CCtx_s ZSTD_CCtx;   /* incomplete type */
+
+#if defined (__cplusplus)
+}
+#endif
+/*
+    zstd - standard compression library
+    Header File for static linking only
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* The objects defined into this file should be considered experimental.
+ * They are not labelled stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risk of future changes.
+ */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Streaming functions
+***************************************/
+
+typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+
+/*
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTD_magicNumber 0xFD2FB522   /* v0.2 (current)*/
+
+
+#if defined (__cplusplus)
+}
+#endif
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/****************************************************************
+*  Tuning parameters
+****************************************************************/
+/* MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSE_MAX_MEMORY_USAGE 14
+#define FSE_DEFAULT_MEMORY_USAGE 13
+
+/* FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSE_MAX_SYMBOL_VALUE 255
+
+
+/****************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+
+
+/****************************************************************
+*  Byte symbol type
+****************************************************************/
+#endif   /* !FSE_COMMONDEFS_ONLY */
+
+
+/****************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#    ifdef __GNUC__
+#      define FORCE_INLINE static inline __attribute__((always_inline))
+#    else
+#      define FORCE_INLINE static inline
+#    endif
+#  else
+#    define FORCE_INLINE static
+#  endif /* __STDC_VERSION__ */
+#endif
+
+
+/****************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+/****************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+
+/****************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/****************************************************************
+*  Complex types
+****************************************************************/
+typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+
+/****************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+/* Function templates */
+
+#define FSE_DECODE_TYPE FSE_decode_t
+
+static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
+
+static size_t FSE_buildDTable
+(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    void* ptr = dt+1;
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)ptr;
+    FSE_DTableHeader DTableH;
+    const U32 tableSize = 1 << tableLog;
+    const U32 tableMask = tableSize-1;
+    const U32 step = FSE_tableStep(tableSize);
+    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
+    U32 position = 0;
+    U32 highThreshold = tableSize-1;
+    const S16 largeLimit= (S16)(1 << (tableLog-1));
+    U32 noLarge = 1;
+    U32 s;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    DTableH.tableLog = (U16)tableLog;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        if (normalizedCounter[s]==-1)
+        {
+            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+            symbolNext[s] = 1;
+        }
+        else
+        {
+            if (normalizedCounter[s] >= largeLimit) noLarge=0;
+            symbolNext[s] = normalizedCounter[s];
+        }
+    }
+
+    /* Spread symbols */
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        int i;
+        for (i=0; i<normalizedCounter[s]; i++)
+        {
+            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+            position = (position + step) & tableMask;
+            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }
+    }
+
+    if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+
+    /* Build Decoding table */
+    {
+        U32 i;
+        for (i=0; i<tableSize; i++)
+        {
+            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[i].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) );
+            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
+        }
+    }
+
+    DTableH.fastMode = (U16)noLarge;
+    memcpy(dt, &DTableH, sizeof(DTableH));   /* memcpy(), to avoid strict aliasing warnings */
+    return 0;
+}
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+/******************************************
+*  FSE helper functions
+******************************************/
+static unsigned FSE_isError(size_t code) { return ERR_isError(code); }
+
+
+/****************************************************************
+*  FSE NCount encoding-decoding
+****************************************************************/
+static short FSE_abs(short a)
+{
+    return (short)(a<0 ? -a : a);
+}
+
+static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    if (hbSize < 4) return ERROR(srcSize_wrong);
+    bitStream = MEM_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr))
+    {
+        if (previous0)
+        {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF)
+            {
+                n0+=24;
+                if (ip < iend-5)
+                {
+                    ip+=2;
+                    bitStream = MEM_readLE32(ip) >> bitCount;
+                }
+                else
+                {
+                    bitStream >>= 16;
+                    bitCount+=16;
+                }
+            }
+            while ((bitStream & 3) == 3)
+            {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+            {
+                ip += bitCount>>3;
+                bitCount &= 7;
+                bitStream = MEM_readLE32(ip) >> bitCount;
+            }
+            else
+                bitStream >>= 2;
+        }
+        {
+            const short max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max)
+            {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            }
+            else
+            {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSE_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold)
+            {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            {
+                if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+                {
+                    ip += bitCount>>3;
+                    bitCount &= 7;
+                }
+                else
+                {
+                    bitCount -= (int)(8 * (iend - 4 - ip));
+                    ip = iend - 4;
+                }
+                bitStream = MEM_readLE32(ip) >> (bitCount & 31);
+            }
+        }
+    }
+    if (remaining != 1) return ERROR(GENERIC);
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong);
+    return ip-istart;
+}
+
+
+/*********************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSE_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    BIT_DStream_t bitD;
+    FSE_DState_t state1;
+    FSE_DState_t state2;
+    size_t errorCode;
+
+    /* Init */
+    errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+    if (FSE_isError(errorCode)) return errorCode;
+
+    FSE_initDState(&state1, &bitD, dt);
+    FSE_initDState(&state2, &bitD, dt);
+
+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op<olimit) ; op+=4)
+    {
+        op[0] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[1] = FSE_GETSYMBOL(&state2);
+
+        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[3] = FSE_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
+    while (1)
+    {
+        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state1);
+
+        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state2);
+    }
+
+    /* end ? */
+    if (BIT_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
+        return op-ostart;
+
+    if (op==omax) return ERROR(dstSize_tooSmall);   /* dst buffer is full, but cSrc unfinished */
+
+    return ERROR(corruption_detected);
+}
+
+
+static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSE_DTable* dt)
+{
+    FSE_DTableHeader DTableH;
+    memcpy(&DTableH, dt, sizeof(DTableH));
+
+    /* select fast mode (static) */
+    if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSE_MAX_SYMBOL_VALUE+1];
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    size_t errorCode;
+
+    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */
+
+    /* normal FSE decoding mode */
+    errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return errorCode;
+
+    /* always return, even if it is an error code */
+    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
+}
+
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+/****************************************************************
+*  Compiler specifics
+****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+/* inline is defined */
+#elif defined(_MSC_VER)
+#  define inline __inline
+#else
+#  define inline /* disable inline */
+#endif
+
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#endif
+
+
+/****************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+/****************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/******************************************
+*  Helper functions
+******************************************/
+static unsigned HUF_isError(size_t code) { return ERR_isError(code); }
+
+#define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#define HUF_MAX_TABLELOG  12           /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
+#define HUF_DEFAULT_TABLELOG  HUF_MAX_TABLELOG   /* tableLog by default, when not specified */
+#define HUF_MAX_SYMBOL_VALUE 255
+#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
+#  error "HUF_MAX_TABLELOG is too large !"
+#endif
+
+
+
+/*********************************************************
+*  Huff0 : Huffman block decompression
+*********************************************************/
+typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2;   /* single-symbol decoding */
+
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4;  /* double-symbols decoding */
+
+typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+
+/*! HUF_readStats
+    Read compact Huffman tree, saved by HUF_writeCTable
+    @huffWeight : destination buffer
+    @return : size read from `src`
+*/
+static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                            U32* nbSymbolsPtr, U32* tableLogPtr,
+                            const void* src, size_t srcSize)
+{
+    U32 weightTotal;
+    U32 tableLog;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize;
+    size_t oSize;
+    U32 n;
+
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
+    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128)  /* special header */
+    {
+        if (iSize >= (242))   /* RLE */
+        {
+            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
+            oSize = l[iSize-242];
+            memset(huffWeight, 1, hwSize);
+            iSize = 0;
+        }
+        else   /* Incompressible */
+        {
+            oSize = iSize - 127;
+            iSize = ((oSize+1)/2);
+            if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+            if (oSize >= hwSize) return ERROR(corruption_detected);
+            ip += 1;
+            for (n=0; n<oSize; n+=2)
+            {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+            }
+        }
+    }
+    else  /* header compressed with FSE (normal case) */
+    {
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
+        if (FSE_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32));
+    weightTotal = 0;
+    for (n=0; n<oSize; n++)
+    {
+        if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+        rankStats[huffWeight[n]]++;
+        weightTotal += (1 << huffWeight[n]) >> 1;
+    }
+    if (weightTotal == 0) return ERROR(corruption_detected);
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    tableLog = BIT_highbit32(weightTotal) + 1;
+    if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+    {
+        U32 total = 1 << tableLog;
+        U32 rest = total - weightTotal;
+        U32 verif = 1 << BIT_highbit32(rest);
+        U32 lastWeight = BIT_highbit32(rest) + 1;
+        if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+        huffWeight[oSize] = (BYTE)lastWeight;
+        rankStats[lastWeight]++;
+    }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32)(oSize+1);
+    *tableLogPtr = tableLog;
+    return iSize+1;
+}
+
+
+/**************************/
+/* single-symbol decoding */
+/**************************/
+
+static size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
+{
+    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    U32 nbSymbols = 0;
+    U32 n;
+    U32 nextRankStart;
+    void* ptr = DTable+1;
+    HUF_DEltX2* const dt = (HUF_DEltX2*)ptr;
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16));   /* if compilation fails here, assertion is false */
+    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge);   /* DTable is too small */
+    DTable[0] = (U16)tableLog;   /* maybe should separate sizeof DTable, as allocated, from used size of DTable, in case of DTable re-use */
+
+    /* Prepare ranks */
+    nextRankStart = 0;
+    for (n=1; n<=tableLog; n++)
+    {
+        U32 current = nextRankStart;
+        nextRankStart += (rankVal[n] << (n-1));
+        rankVal[n] = current;
+    }
+
+    /* fill DTable */
+    for (n=0; n<nbSymbols; n++)
+    {
+        const U32 w = huffWeight[n];
+        const U32 length = (1 << w) >> 1;
+        U32 i;
+        HUF_DEltX2 D;
+        D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
+        for (i = rankVal[w]; i < rankVal[w] + length; i++)
+            dt[i] = D;
+        rankVal[w] += length;
+    }
+
+    return iSize;
+}
+
+static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+        const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+        const BYTE c = dt[val].byte;
+        BIT_skipBits(Dstream, dt[val].nbBits);
+        return c;
+}
+
+#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+    *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4))
+    {
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    /* no more data to retrieve from bitstream, hence no need to reload */
+    while (p < pEnd)
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    return pEnd-pStart;
+}
+
+
+static size_t HUF_decompress4X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U16* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+
+        const void* ptr = DTable;
+        const HUF_DEltX2* const dt = ((const HUF_DEltX2*)ptr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; )
+        {
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+    size_t errorCode;
+
+    errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    return HUF_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/***************************/
+/* double-symbols decoding */
+/***************************/
+
+static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed,
+                           const U32* rankValOrigin, const int minWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
+                           U32 nbBitsBaseline, U16 baseSeq)
+{
+    HUF_DEltX4 DElt;
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    U32 s;
+
+    /* get pre-calculated rankVal */
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1)
+    {
+        U32 i, skipSize = rankVal[minWeight];
+        MEM_writeLE16(&(DElt.sequence), baseSeq);
+        DElt.nbBits   = (BYTE)(consumed);
+        DElt.length   = 1;
+        for (i = 0; i < skipSize; i++)
+            DTable[i] = DElt;
+    }
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++)   /* note : sortedSymbols already skipped */
+    {
+        const U32 symbol = sortedSymbols[s].symbol;
+        const U32 weight = sortedSymbols[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 length = 1 << (sizeLog-nbBits);
+        const U32 start = rankVal[weight];
+        U32 i = start;
+        const U32 end = start + length;
+
+        MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+        DElt.nbBits = (BYTE)(nbBits + consumed);
+        DElt.length = 2;
+        do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
+
+        rankVal[weight] += length;
+    }
+}
+
+typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1];
+
+static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
+                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32 nbBitsBaseline)
+{
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits  = nbBitsBaseline - maxWeight;
+    U32 s;
+
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++)
+    {
+        const U16 symbol = sortedList[s].symbol;
+        const U32 weight = sortedList[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 start = rankVal[weight];
+        const U32 length = 1 << (targetLog-nbBits);
+
+        if (targetLog-nbBits >= minBits)   /* enough room for a second symbol */
+        {
+            U32 sortedRank;
+            int minWeight = nbBits + scaleLog;
+            if (minWeight < 1) minWeight = 1;
+            sortedRank = rankStart[minWeight];
+            HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
+                           rankValOrigin[nbBits], minWeight,
+                           sortedList+sortedRank, sortedListSize-sortedRank,
+                           nbBitsBaseline, symbol);
+        }
+        else
+        {
+            U32 i;
+            const U32 end = start + length;
+            HUF_DEltX4 DElt;
+
+            MEM_writeLE16(&(DElt.sequence), symbol);
+            DElt.nbBits   = (BYTE)(nbBits);
+            DElt.length   = 1;
+            for (i = start; i < end; i++)
+                DTable[i] = DElt;
+        }
+        rankVal[weight] += length;
+    }
+}
+
+static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1];
+    sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 };
+    U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    rankVal_t rankVal;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    const U32 memLog = DTable[0];
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    void* ptr = DTable;
+    HUF_DEltX4* const dt = ((HUF_DEltX4*)ptr) + 1;
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32));   /* if compilation fails here, assertion is false */
+    if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--)
+        {if (!maxW) return ERROR(GENERIC); }  /* necessarily finds a solution before maxW==0 */
+
+    /* Get start index of each weight */
+    {
+        U32 w, nextRankStart = 0;
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {
+        U32 s;
+        for (s=0; s<nbSymbols; s++)
+        {
+            U32 w = weightList[s];
+            U32 r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {
+        const U32 minBits = tableLog+1 - maxW;
+        U32 nextRankVal = 0;
+        U32 w, consumed;
+        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
+        U32* rankVal0 = rankVal[0];
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankVal;
+            nextRankVal += rankStats[w] << (w+rescale);
+            rankVal0[w] = current;
+        }
+        for (consumed = minBits; consumed <= memLog - minBits; consumed++)
+        {
+            U32* rankValPtr = rankVal[consumed];
+            for (w = 1; w <= maxW; w++)
+            {
+                rankValPtr[w] = rankVal0[w] >> consumed;
+            }
+        }
+    }
+
+    HUF_fillDTableX4(dt, memLog,
+                   sortedSymbol, sizeOfSort,
+                   rankStart0, rankVal, maxW,
+                   tableLog+1);
+
+    return iSize;
+}
+
+
+static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 2);
+    BIT_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
+
+static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 1);
+    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
+    else
+    {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8))
+        {
+            BIT_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+        }
+    }
+    return 1;
+}
+
+
+#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 8 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd-7))
+    {
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-2))
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+
+    while (p <= pEnd-2)
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    if (p < pEnd)
+        p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
+
+    return p-pStart;
+}
+
+
+
+static size_t HUF_decompress4X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+
+        const void* ptr = DTable;
+        const HUF_DEltX4* const dt = ((const HUF_DEltX4*)ptr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; )
+        {
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX4(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/**********************************/
+/* quad-symbol decoding           */
+/**********************************/
+typedef struct { BYTE nbBits; BYTE nbBytes; } HUF_DDescX6;
+typedef union { BYTE byte[4]; U32 sequence; } HUF_DSeqX6;
+
+/* recursive, up to level 3; may benefit from <template>-like strategy to nest each level inline */
+static void HUF_fillDTableX6LevelN(HUF_DDescX6* DDescription, HUF_DSeqX6* DSequence, int sizeLog,
+                           const rankVal_t rankValOrigin, const U32 consumed, const int minWeight, const U32 maxWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, const U32* rankStart,
+                           const U32 nbBitsBaseline, HUF_DSeqX6 baseSeq, HUF_DDescX6 DDesc)
+{
+    const int scaleLog = nbBitsBaseline - sizeLog;   /* note : targetLog >= (nbBitsBaseline-1), hence scaleLog <= 1 */
+    const int minBits  = nbBitsBaseline - maxWeight;
+    const U32 level = DDesc.nbBytes;
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    U32 symbolStartPos, s;
+
+    /* local rankVal, will be modified */
+    memcpy(rankVal, rankValOrigin[consumed], sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1)
+    {
+        U32 i;
+        const U32 skipSize = rankVal[minWeight];
+        for (i = 0; i < skipSize; i++)
+        {
+            DSequence[i] = baseSeq;
+            DDescription[i] = DDesc;
+        }
+    }
+
+    /* fill DTable */
+    DDesc.nbBytes++;
+    symbolStartPos = rankStart[minWeight];
+    for (s=symbolStartPos; s<sortedListSize; s++)
+    {
+        const BYTE symbol = sortedSymbols[s].symbol;
+        const U32  weight = sortedSymbols[s].weight;   /* >= 1 (sorted) */
+        const int  nbBits = nbBitsBaseline - weight;   /* >= 1 (by construction) */
+        const int  totalBits = consumed+nbBits;
+        const U32  start  = rankVal[weight];
+        const U32  length = 1 << (sizeLog-nbBits);
+        baseSeq.byte[level] = symbol;
+        DDesc.nbBits = (BYTE)totalBits;
+
+        if ((level<3) && (sizeLog-totalBits >= minBits))   /* enough room for another symbol */
+        {
+            int nextMinWeight = totalBits + scaleLog;
+            if (nextMinWeight < 1) nextMinWeight = 1;
+            HUF_fillDTableX6LevelN(DDescription+start, DSequence+start, sizeLog-nbBits,
+                           rankValOrigin, totalBits, nextMinWeight, maxWeight,
+                           sortedSymbols, sortedListSize, rankStart,
+                           nbBitsBaseline, baseSeq, DDesc);   /* recursive (max : level 3) */
+        }
+        else
+        {
+            U32 i;
+            const U32 end = start + length;
+            for (i = start; i < end; i++)
+            {
+                DDescription[i] = DDesc;
+                DSequence[i] = baseSeq;
+            }
+        }
+        rankVal[weight] += length;
+    }
+}
+
+
+/* note : same preparation as X4 */
+static size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1];
+    sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 };
+    U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    rankVal_t rankVal;
+    const U32 memLog = DTable[0];
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+
+    if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable is too small */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--)
+        { if (!maxW) return ERROR(GENERIC); }  /* necessarily finds a solution before maxW==0 */
+
+
+    /* Get start index of each weight */
+    {
+        U32 w, nextRankStart = 0;
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {
+        U32 s;
+        for (s=0; s<nbSymbols; s++)
+        {
+            U32 w = weightList[s];
+            U32 r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {
+        const U32 minBits = tableLog+1 - maxW;
+        U32 nextRankVal = 0;
+        U32 w, consumed;
+        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
+        U32* rankVal0 = rankVal[0];
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankVal;
+            nextRankVal += rankStats[w] << (w+rescale);
+            rankVal0[w] = current;
+        }
+        for (consumed = minBits; consumed <= memLog - minBits; consumed++)
+        {
+            U32* rankValPtr = rankVal[consumed];
+            for (w = 1; w <= maxW; w++)
+            {
+                rankValPtr[w] = rankVal0[w] >> consumed;
+            }
+        }
+    }
+
+
+    /* fill tables */
+    {
+        void* ptr = DTable+1;
+        HUF_DDescX6* DDescription = (HUF_DDescX6*)(ptr);
+        void* dSeqStart = DTable + 1 + ((size_t)1<<(memLog-1));
+        HUF_DSeqX6* DSequence = (HUF_DSeqX6*)(dSeqStart);
+        HUF_DSeqX6 DSeq;
+        HUF_DDescX6 DDesc;
+        DSeq.sequence = 0;
+        DDesc.nbBits = 0;
+        DDesc.nbBytes = 0;
+        HUF_fillDTableX6LevelN(DDescription, DSequence, memLog,
+                       (const U32 (*)[HUF_ABSOLUTEMAX_TABLELOG + 1])rankVal, 0, 1, maxW,
+                       sortedSymbol, sizeOfSort, rankStart0,
+                       tableLog+1, DSeq, DDesc);
+    }
+
+    return iSize;
+}
+
+
+static U32 HUF_decodeSymbolX6(void* op, BIT_DStream_t* DStream, const HUF_DDescX6* dd, const HUF_DSeqX6* ds, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, ds+val, sizeof(HUF_DSeqX6));
+    BIT_skipBits(DStream, dd[val].nbBits);
+    return dd[val].nbBytes;
+}
+
+static U32 HUF_decodeLastSymbolsX6(void* op, const U32 maxL, BIT_DStream_t* DStream,
+                                  const HUF_DDescX6* dd, const HUF_DSeqX6* ds, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    U32 length = dd[val].nbBytes;
+    if (length <= maxL)
+    {
+        memcpy(op, ds+val, length);
+        BIT_skipBits(DStream, dd[val].nbBits);
+        return length;
+    }
+    memcpy(op, ds+val, maxL);
+    if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8))
+    {
+        BIT_skipBits(DStream, dd[val].nbBits);
+        if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+            DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+    }
+    return maxL;
+}
+
+
+#define HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX6(ptr, DStreamPtr, dd, ds, dtLog)
+
+#define HUF_DECODE_SYMBOLX6_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX6_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr)
+
+static inline size_t HUF_decodeStreamX6(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const U32* DTable, const U32 dtLog)
+{
+    const void* ddPtr = DTable+1;
+    const HUF_DDescX6* dd = (const HUF_DDescX6*)(ddPtr);
+    const void* dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1));
+    const HUF_DSeqX6* ds = (const HUF_DSeqX6*)(dsPtr);
+    BYTE* const pStart = p;
+
+    /* up to 16 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-16))
+    {
+        HUF_DECODE_SYMBOLX6_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX6_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX6_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX6_0(p, bitDPtr);
+    }
+
+    /* closer to the end, up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4))
+        HUF_DECODE_SYMBOLX6_0(p, bitDPtr);
+
+    while (p <= pEnd-4)
+        HUF_DECODE_SYMBOLX6_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    while (p < pEnd)
+        p += HUF_decodeLastSymbolsX6(p, (U32)(pEnd-p), bitDPtr, dd, ds, dtLog);
+
+    return p-pStart;
+}
+
+
+
+static size_t HUF_decompress4X6_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+
+        const U32 dtLog = DTable[0];
+        const void* ddPtr = DTable+1;
+        const HUF_DDescX6* dd = (const HUF_DDescX6*)(ddPtr);
+        const void* dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1));
+        const HUF_DSeqX6* ds = (const HUF_DSeqX6*)(dsPtr);
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-64 symbols per loop (4-16 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (op3 <= opStart4) && (endSignal==BIT_DStream_unfinished) && (op4<=(oend-16)) ; )
+        {
+            HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX6_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX6_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX6(op1, &bitD1, opStart2, DTable, dtLog);
+        HUF_decodeStreamX6(op2, &bitD2, opStart3, DTable, dtLog);
+        HUF_decodeStreamX6(op3, &bitD3, opStart4, DTable, dtLog);
+        HUF_decodeStreamX6(op4, &bitD4, oend,     DTable, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX6(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX6 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress4X6_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/**********************************/
+/* Generic decompression selector */
+/**********************************/
+
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+{
+    /* single, double, quad */
+    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
+    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
+    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
+    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
+    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
+    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
+    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
+    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
+    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
+    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
+    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
+    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+};
+
+typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+
+static size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    static const decompressionAlgo decompress[3] = { HUF_decompress4X2, HUF_decompress4X4, HUF_decompress4X6 };
+    /* estimate decompression time */
+    U32 Q;
+    const U32 D256 = (U32)(dstSize >> 8);
+    U32 Dtime[3];
+    U32 algoNb = 0;
+    int n;
+
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    /* decoder timing evaluation */
+    Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */
+    for (n=0; n<3; n++)
+        Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256);
+
+    Dtime[1] += Dtime[1] >> 4; Dtime[2] += Dtime[2] >> 3; /* advantage to algorithms using less memory, for cache eviction */
+
+    if (Dtime[1] < Dtime[0]) algoNb = 1;
+    if (Dtime[2] < Dtime[algoNb]) algoNb = 2;
+
+    return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
+
+    //return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);   /* multi-streams single-symbol decoding */
+    //return HUF_decompress4X4(dst, dstSize, cSrc, cSrcSize);   /* multi-streams double-symbols decoding */
+    //return HUF_decompress4X6(dst, dstSize, cSrc, cSrcSize);   /* multi-streams quad-symbols decoding */
+}
+/*
+    zstd - standard compression library
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+*  MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*/
+#define ZSTD_MEMORY_USAGE 17
+
+/*!
+ * HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0, fastest), or in memory heap (1, requires malloc())
+ * Note that compression context is fairly large, as a consequence heap memory is recommended.
+ */
+#ifndef ZSTD_HEAPMODE
+#  define ZSTD_HEAPMODE 1
+#endif /* ZSTD_HEAPMODE */
+
+/*!
+*  LEGACY_SUPPORT :
+*  decompressor can decode older formats (starting from Zstd 0.1+)
+*/
+#ifndef ZSTD_LEGACY_SUPPORT
+#  define ZSTD_LEGACY_SUPPORT 1
+#endif
+
+
+/* *******************************************************
+*  Includes
+*********************************************************/
+#include <stdlib.h>      /* calloc */
+#include <string.h>      /* memcpy, memmove */
+#include <stdio.h>       /* debug : printf */
+
+
+/* *******************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef __AVX2__
+#  include <immintrin.h>   /* AVX2 intrinsics */
+#endif
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#endif
+
+
+/* *******************************************************
+*  Constants
+*********************************************************/
+#define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
+#define HASH_TABLESIZE (1 << HASH_LOG)
+#define HASH_MASK (HASH_TABLESIZE - 1)
+
+#define KNUTH 2654435761
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+#define BIT1   2
+#define BIT0   1
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BLOCKSIZE (128 KB)                 /* define, for static allocation */
+#define MIN_SEQUENCES_SIZE (2 /*seqNb*/ + 2 /*dumps*/ + 3 /*seqTables*/ + 1 /*bitStream*/)
+#define MIN_CBLOCK_SIZE (3 /*litCSize*/ + MIN_SEQUENCES_SIZE)
+#define IS_RAW BIT0
+#define IS_RLE BIT1
+
+#define WORKPLACESIZE (BLOCKSIZE*3)
+#define MINMATCH 4
+#define MLbits   7
+#define LLbits   6
+#define Offbits  5
+#define MaxML  ((1<<MLbits )-1)
+#define MaxLL  ((1<<LLbits )-1)
+#define MaxOff   31
+#define LitFSELog  11
+#define MLFSELog   10
+#define LLFSELog   10
+#define OffFSELog   9
+#define MAX(a,b) ((a)<(b)?(b):(a))
+#define MaxSeq MAX(MaxLL, MaxML)
+
+#define LITERAL_NOENTROPY 63
+#define COMMAND_NOENTROPY 7   /* to remove */
+
+static const size_t ZSTD_blockHeaderSize = 3;
+static const size_t ZSTD_frameHeaderSize = 4;
+
+
+/* *******************************************************
+*  Memory operations
+**********************************************************/
+static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+
+#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+
+/*! ZSTD_wildcopy : custom version of memcpy(), can copy up to 7-8 bytes too many */
+static void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
+{
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+    do COPY8(op, ip) while (op < oend);
+}
+
+
+/* **************************************
+*  Local structures
+****************************************/
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+typedef struct {
+    void* buffer;
+    U32*  offsetStart;
+    U32*  offset;
+    BYTE* offCodeStart;
+    BYTE* offCode;
+    BYTE* litStart;
+    BYTE* lit;
+    BYTE* litLengthStart;
+    BYTE* litLength;
+    BYTE* matchLengthStart;
+    BYTE* matchLength;
+    BYTE* dumpsStart;
+    BYTE* dumps;
+} seqStore_t;
+
+
+/* *************************************
+*  Error Management
+***************************************/
+/*! ZSTD_isError
+*   tells if a return value is an error code */
+static unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
+
+
+
+/* *************************************************************
+*   Decompression section
+***************************************************************/
+struct ZSTD_DCtx_s
+{
+    U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
+    U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
+    U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
+    void* previousDstEnd;
+    void* base;
+    size_t expected;
+    blockType_t bType;
+    U32 phase;
+    const BYTE* litPtr;
+    size_t litSize;
+    BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */];
+};   /* typedef'd to ZSTD_Dctx within "zstd_static.h" */
+
+
+static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = (const BYTE* const)src;
+    BYTE headerFlags;
+    U32 cSize;
+
+    if (srcSize < 3) return ERROR(srcSize_wrong);
+
+    headerFlags = *in;
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+
+    bpPtr->blockType = (blockType_t)(headerFlags >> 6);
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+/** ZSTD_decompressLiterals
+    @return : nb of bytes read from src, or an error code*/
+static size_t ZSTD_decompressLiterals(void* dst, size_t* maxDstSizePtr,
+                                const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+
+    const size_t litSize = (MEM_readLE32(src) & 0x1FFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+    const size_t litCSize = (MEM_readLE32(ip+2) & 0xFFFFFF) >> 5;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+
+    if (litSize > *maxDstSizePtr) return ERROR(corruption_detected);
+    if (litCSize + 5 > srcSize) return ERROR(corruption_detected);
+
+    if (HUF_isError(HUF_decompress(dst, litSize, ip+5, litCSize))) return ERROR(corruption_detected);
+
+    *maxDstSizePtr = litSize;
+    return litCSize + 5;
+}
+
+
+/** ZSTD_decodeLiteralsBlock
+    @return : nb of bytes read from src (< srcSize )*/
+static size_t ZSTD_decodeLiteralsBlock(void* ctx,
+                          const void* src, size_t srcSize)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx;
+    const BYTE* const istart = (const BYTE* const)src;
+
+    /* any compressed block with literals segment must be at least this size */
+    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
+
+    switch(*istart & 3)
+    {
+    default:
+    case 0:
+        {
+            size_t litSize = BLOCKSIZE;
+            const size_t readSize = ZSTD_decompressLiterals(dctx->litBuffer, &litSize, src, srcSize);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litSize = litSize;
+            memset(dctx->litBuffer + dctx->litSize, 0, 8);
+            return readSize;   /* works if it's an error too */
+        }
+    case IS_RAW:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > srcSize-11)   /* risk of reading too far with wildcopy */
+            {
+                if (litSize > srcSize-3) return ERROR(corruption_detected);
+                memcpy(dctx->litBuffer, istart, litSize);
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litSize = litSize;
+                memset(dctx->litBuffer + dctx->litSize, 0, 8);
+                return litSize+3;
+            }
+            /* direct reference into compressed stream */
+            dctx->litPtr = istart+3;
+            dctx->litSize = litSize;
+            return litSize+3;
+        }
+    case IS_RLE:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
+            memset(dctx->litBuffer, istart[3], litSize + 8);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litSize = litSize;
+            return 4;
+        }
+    }
+}
+
+
+static size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
+                         FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
+                         const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    const BYTE* const iend = istart + srcSize;
+    U32 LLtype, Offtype, MLtype;
+    U32 LLlog, Offlog, MLlog;
+    size_t dumpsLength;
+
+    /* check */
+    if (srcSize < 5) return ERROR(srcSize_wrong);
+
+    /* SeqHead */
+    *nbSeq = MEM_readLE16(ip); ip+=2;
+    LLtype  = *ip >> 6;
+    Offtype = (*ip >> 4) & 3;
+    MLtype  = (*ip >> 2) & 3;
+    if (*ip & 2)
+    {
+        dumpsLength  = ip[2];
+        dumpsLength += ip[1] << 8;
+        ip += 3;
+    }
+    else
+    {
+        dumpsLength  = ip[1];
+        dumpsLength += (ip[0] & 1) << 8;
+        ip += 2;
+    }
+    *dumpsPtr = ip;
+    ip += dumpsLength;
+    *dumpsLengthPtr = dumpsLength;
+
+    /* check */
+    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
+
+    /* sequences */
+    {
+        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL and MaxOff */
+        size_t headerSize;
+
+        /* Build DTables */
+        switch(LLtype)
+        {
+        case bt_rle :
+            LLlog = 0;
+            FSE_buildDTable_rle(DTableLL, *ip++); break;
+        case bt_raw :
+            LLlog = LLbits;
+            FSE_buildDTable_raw(DTableLL, LLbits); break;
+        default :
+            {   U32 max = MaxLL;
+                headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
+                if (FSE_isError(headerSize)) return ERROR(GENERIC);
+                if (LLlog > LLFSELog) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSE_buildDTable(DTableLL, norm, max, LLlog);
+        }   }
+
+        switch(Offtype)
+        {
+        case bt_rle :
+            Offlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong);   /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */
+            break;
+        case bt_raw :
+            Offlog = Offbits;
+            FSE_buildDTable_raw(DTableOffb, Offbits); break;
+        default :
+            {   U32 max = MaxOff;
+                headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
+                if (FSE_isError(headerSize)) return ERROR(GENERIC);
+                if (Offlog > OffFSELog) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSE_buildDTable(DTableOffb, norm, max, Offlog);
+        }   }
+
+        switch(MLtype)
+        {
+        case bt_rle :
+            MLlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableML, *ip++); break;
+        case bt_raw :
+            MLlog = MLbits;
+            FSE_buildDTable_raw(DTableML, MLbits); break;
+        default :
+            {   U32 max = MaxML;
+                headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
+                if (FSE_isError(headerSize)) return ERROR(GENERIC);
+                if (MLlog > MLFSELog) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSE_buildDTable(DTableML, norm, max, MLlog);
+    }   }   }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t offset;
+    size_t matchLength;
+} seq_t;
+
+typedef struct {
+    BIT_DStream_t DStream;
+    FSE_DState_t stateLL;
+    FSE_DState_t stateOffb;
+    FSE_DState_t stateML;
+    size_t prevOffset;
+    const BYTE* dumps;
+    const BYTE* dumpsEnd;
+} seqState_t;
+
+
+static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
+{
+    size_t litLength;
+    size_t prevOffset;
+    size_t offset;
+    size_t matchLength;
+    const BYTE* dumps = seqState->dumps;
+    const BYTE* const de = seqState->dumpsEnd;
+
+    /* Literal length */
+    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
+    prevOffset = litLength ? seq->offset : seqState->prevOffset;
+    seqState->prevOffset = seq->offset;
+    if (litLength == MaxLL)
+    {
+        U32 add = *dumps++;
+        if (add < 255) litLength += add;
+        else
+        {
+            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            dumps += 3;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+
+    /* Offset */
+    {
+        static const size_t offsetPrefix[MaxOff+1] = {  /* note : size_t faster than U32 */
+                1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
+                512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
+                524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
+        U32 offsetCode, nbBits;
+        offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));   /* <= maxOff, by table construction */
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        nbBits = offsetCode - 1;
+        if (offsetCode==0) nbBits = 0;   /* cmove */
+        offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits);
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        if (offsetCode==0) offset = prevOffset;   /* cmove */
+    }
+
+    /* MatchLength */
+    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
+    if (matchLength == MaxML)
+    {
+        U32 add = *dumps++;
+        if (add < 255) matchLength += add;
+        else
+        {
+            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            dumps += 3;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+    matchLength += MINMATCH;
+
+    /* save result */
+    seq->litLength = litLength;
+    seq->offset = offset;
+    seq->matchLength = matchLength;
+    seqState->dumps = dumps;
+}
+
+
+static size_t ZSTD_execSequence(BYTE* op,
+                                seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit,
+                                BYTE* const base, BYTE* const oend)
+{
+    static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
+    static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* substracted */
+    const BYTE* const ostart = op;
+    BYTE* const oLitEnd = op + sequence.litLength;
+    BYTE* const oMatchEnd = op + sequence.litLength + sequence.matchLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_8 = oend-8;
+    const BYTE* const litEnd = *litPtr + sequence.litLength;
+
+    /* checks */
+    if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */
+    if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
+    if (litEnd > litLimit) return ERROR(corruption_detected);   /* overRead beyond lit buffer */
+
+    /* copy Literals */
+    ZSTD_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
+    op = oLitEnd;
+    *litPtr = litEnd;   /* update for next sequence */
+
+    /* copy Match */
+    {
+        const BYTE* match = op - sequence.offset;
+
+        /* check */
+        if (sequence.offset > (size_t)op) return ERROR(corruption_detected);   /* address space overflow test (this test seems kept by clang optimizer) */
+        //if (match > op) return ERROR(corruption_detected);   /* address space overflow test (is clang optimizer removing this test ?) */
+        if (match < base) return ERROR(corruption_detected);
+
+        /* close range match, overlap */
+        if (sequence.offset < 8)
+        {
+            const int dec64 = dec64table[sequence.offset];
+            op[0] = match[0];
+            op[1] = match[1];
+            op[2] = match[2];
+            op[3] = match[3];
+            match += dec32table[sequence.offset];
+            ZSTD_copy4(op+4, match);
+            match -= dec64;
+        }
+        else
+        {
+            ZSTD_copy8(op, match);
+        }
+        op += 8; match += 8;
+
+        if (oMatchEnd > oend-(16-MINMATCH))
+        {
+            if (op < oend_8)
+            {
+                ZSTD_wildcopy(op, match, oend_8 - op);
+                match += oend_8 - op;
+                op = oend_8;
+            }
+            while (op < oMatchEnd) *op++ = *match++;
+        }
+        else
+        {
+            ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
+        }
+    }
+
+    return oMatchEnd - ostart;
+}
+
+static size_t ZSTD_decompressSequences(
+                               void* ctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx;
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t errorCode, dumpsLength;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    int nbSeq;
+    const BYTE* dumps;
+    U32* DTableLL = dctx->LLTable;
+    U32* DTableML = dctx->MLTable;
+    U32* DTableOffb = dctx->OffTable;
+    BYTE* const base = (BYTE*) (dctx->base);
+
+    /* Build Decoding Tables */
+    errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
+                                      DTableLL, DTableML, DTableOffb,
+                                      ip, iend-ip);
+    if (ZSTD_isError(errorCode)) return errorCode;
+    ip += errorCode;
+
+    /* Regen sequences */
+    {
+        seq_t sequence;
+        seqState_t seqState;
+
+        memset(&sequence, 0, sizeof(sequence));
+        seqState.dumps = dumps;
+        seqState.dumpsEnd = dumps + dumpsLength;
+        seqState.prevOffset = 1;
+        errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip);
+        if (ERR_isError(errorCode)) return ERROR(corruption_detected);
+        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
+        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
+        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
+
+        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (nbSeq>0) ; )
+        {
+            size_t oneSeqSize;
+            nbSeq--;
+            ZSTD_decodeSequence(&sequence, &seqState);
+            oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend);
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* check if reached exact end */
+        if ( !BIT_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected);   /* requested too much : data is corrupted */
+        if (nbSeq<0) return ERROR(corruption_detected);   /* requested too many sequences : data is corrupted */
+
+        /* last literal segment */
+        {
+            size_t lastLLSize = litEnd - litPtr;
+            if (litPtr > litEnd) return ERROR(corruption_detected);
+            if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
+            if (op != litPtr) memmove(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+
+static size_t ZSTD_decompressBlock(
+                            void* ctx,
+                            void* dst, size_t maxDstSize,
+                      const void* src, size_t srcSize)
+{
+    /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+
+    /* Decode literals sub-block */
+    size_t litCSize = ZSTD_decodeLiteralsBlock(ctx, src, srcSize);
+    if (ZSTD_isError(litCSize)) return litCSize;
+    ip += litCSize;
+    srcSize -= litCSize;
+
+    return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize);
+}
+
+
+static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* iend = ip + srcSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t remainingSize = srcSize;
+    U32 magicNumber;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t decodedSize=0;
+        size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            decodedSize = ZSTD_decompressBlock(ctx, op, oend-op, ip, cBlockSize);
+            break;
+        case bt_raw :
+            decodedSize = ZSTD_copyUncompressedBlock(op, oend-op, ip, cBlockSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet supported */
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return ERROR(srcSize_wrong);
+            break;
+        default:
+            return ERROR(GENERIC);   /* impossible */
+        }
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        if (ZSTD_isError(decodedSize)) return decodedSize;
+        op += decodedSize;
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return op-ostart;
+}
+
+static size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    ZSTD_DCtx ctx;
+    ctx.base = dst;
+    return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
+}
+
+static size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
+{
+
+    const BYTE* ip = (const BYTE*)src;
+    size_t remainingSize = srcSize;
+    U32 magicNumber;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
+        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return ip - (const BYTE*)src;
+}
+
+/*******************************
+*  Streaming Decompression API
+*******************************/
+
+static size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx)
+{
+    dctx->expected = ZSTD_frameHeaderSize;
+    dctx->phase = 0;
+    dctx->previousDstEnd = NULL;
+    dctx->base = NULL;
+    return 0;
+}
+
+static ZSTD_DCtx* ZSTD_createDCtx(void)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx));
+    if (dctx==NULL) return NULL;
+    ZSTD_resetDCtx(dctx);
+    return dctx;
+}
+
+static size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
+{
+    free(dctx);
+    return 0;
+}
+
+static size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx)
+{
+    return dctx->expected;
+}
+
+static size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    /* Sanity check */
+    if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
+    if (dst != ctx->previousDstEnd)  /* not contiguous */
+        ctx->base = dst;
+
+    /* Decompress : frame header */
+    if (ctx->phase == 0)
+    {
+        /* Check frame magic header */
+        U32 magicNumber = MEM_readLE32(src);
+        if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        return 0;
+    }
+
+    /* Decompress : block header */
+    if (ctx->phase == 1)
+    {
+        blockProperties_t bp;
+        size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+        if (ZSTD_isError(blockSize)) return blockSize;
+        if (bp.blockType == bt_end)
+        {
+            ctx->expected = 0;
+            ctx->phase = 0;
+        }
+        else
+        {
+            ctx->expected = blockSize;
+            ctx->bType = bp.blockType;
+            ctx->phase = 2;
+        }
+
+        return 0;
+    }
+
+    /* Decompress : block content */
+    {
+        size_t rSize;
+        switch(ctx->bType)
+        {
+        case bt_compressed:
+            rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
+            break;
+        case bt_raw :
+            rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet handled */
+            break;
+        case bt_end :   /* should never happen (filtered at phase 1) */
+            rSize = 0;
+            break;
+        default:
+            return ERROR(GENERIC);
+        }
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
+        return rSize;
+    }
+
+}
+
+
+/* wrapper layer */
+
+unsigned ZSTDv02_isError(size_t code)
+{
+    return ZSTD_isError(code);
+}
+
+size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize)
+{
+    return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
+}
+
+size_t ZSTDv02_findFrameCompressedSize(const void *src, size_t compressedSize)
+{
+    return ZSTD_findFrameCompressedSize(src, compressedSize);
+}
+
+ZSTDv02_Dctx* ZSTDv02_createDCtx(void)
+{
+    return (ZSTDv02_Dctx*)ZSTD_createDCtx();
+}
+
+size_t ZSTDv02_freeDCtx(ZSTDv02_Dctx* dctx)
+{
+    return ZSTD_freeDCtx((ZSTD_DCtx*)dctx);
+}
+
+size_t ZSTDv02_resetDCtx(ZSTDv02_Dctx* dctx)
+{
+    return ZSTD_resetDCtx((ZSTD_DCtx*)dctx);
+}
+
+size_t ZSTDv02_nextSrcSizeToDecompress(ZSTDv02_Dctx* dctx)
+{
+    return ZSTD_nextSrcSizeToDecompress((ZSTD_DCtx*)dctx);
+}
+
+size_t ZSTDv02_decompressContinue(ZSTDv02_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return ZSTD_decompressContinue((ZSTD_DCtx*)dctx, dst, maxDstSize, src, srcSize);
+}
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v02.h b/contrib/libzstd/include/zstd/legacy/zstd_v02.h
new file mode 100644
index 00000000000..d14f0293cbe
--- /dev/null
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v02.h
@@ -0,0 +1,87 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+#ifndef ZSTD_V02_H_4174539423
+#define ZSTD_V02_H_4174539423
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv02_decompress() : decompress ZSTD frames compliant with v0.2.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+/**
+ZSTDv02_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.2.x format
+    compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+    return : the number of bytes that would be read to decompress this frame
+             or an errorCode if it fails (which can be tested using ZSTDv02_isError())
+*/
+size_t ZSTDv02_findFrameCompressedSize(const void* src, size_t compressedSize);
+
+/**
+ZSTDv02_isError() : tells if the result of ZSTDv02_decompress() is an error
+*/
+unsigned ZSTDv02_isError(size_t code);
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv02_Dctx_s ZSTDv02_Dctx;
+ZSTDv02_Dctx* ZSTDv02_createDCtx(void);
+size_t ZSTDv02_freeDCtx(ZSTDv02_Dctx* dctx);
+
+size_t ZSTDv02_decompressDCtx(void* ctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
+
+/* *************************************
+*  Streaming functions
+***************************************/
+size_t ZSTDv02_resetDCtx(ZSTDv02_Dctx* dctx);
+
+size_t ZSTDv02_nextSrcSizeToDecompress(ZSTDv02_Dctx* dctx);
+size_t ZSTDv02_decompressContinue(ZSTDv02_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv02_magicNumber 0xFD2FB522   /* v0.2 */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_V02_H_4174539423 */
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v03.c b/contrib/libzstd/include/zstd/legacy/zstd_v03.c
new file mode 100644
index 00000000000..f438330a469
--- /dev/null
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v03.c
@@ -0,0 +1,3196 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include "zstd_v03.h"
+#include "error_private.h"
+
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined(_MSC_VER)   /* Visual Studio */
+#   include <stdlib.h>  /* _byteswap_ulong */
+#   include <intrin.h>  /* _byteswap_* */
+#endif
+
+
+
+/* ******************************************************************
+   mem.h
+   low-level memory access routines
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/******************************************
+*  Includes
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include <string.h>    /* memcpy */
+
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined(__GNUC__)
+#  define MEM_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define MEM_STATIC static inline
+#elif defined(_MSC_VER)
+#  define MEM_STATIC static __inline
+#else
+#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/****************************************************************
+*  Basic Types
+*****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef  int16_t S16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+  typedef  int64_t S64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef   signed short      S16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+  typedef   signed long long  S64;
+#endif
+
+
+/****************************************************************
+*  Memory I/O
+*****************************************************************/
+/* MEM_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets generating assembly depending on alignment.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define MEM_FORCE_MEMORY_ACCESS 2
+#  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define MEM_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; }
+MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; }
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
+
+/* violates C standard on structure alignment.
+Only use if no other choice to achieve best performance on target platform */
+MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
+MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
+MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+
+#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
+
+MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+
+#else
+
+/* default method, safe and standard.
+   can sometimes prove slower */
+
+MEM_STATIC U16 MEM_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U32 MEM_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U64 MEM_read64(const void* memPtr)
+{
+    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+
+#endif // MEM_FORCE_MEMORY_ACCESS
+
+
+MEM_STATIC U16 MEM_readLE16(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read16(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
+{
+    if (MEM_isLittleEndian())
+    {
+        MEM_write16(memPtr, val);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val;
+        p[1] = (BYTE)(val>>8);
+    }
+}
+
+MEM_STATIC U32 MEM_readLE32(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read32(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+MEM_STATIC U64 MEM_readLE64(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read64(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
+                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
+    }
+}
+
+
+MEM_STATIC size_t MEM_readLEST(const void* memPtr)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_readLE32(memPtr);
+    else
+        return (size_t)MEM_readLE64(memPtr);
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* MEM_H_MODULE */
+
+
+/* ******************************************************************
+   bitstream
+   Part of NewGen Entropy library
+   header file (to include)
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef BITSTREAM_H_MODULE
+#define BITSTREAM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*
+*  This API consists of small unitary functions, which highly benefit from being inlined.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+
+/**********************************************
+*  bitStream decompression API (read backward)
+**********************************************/
+typedef struct
+{
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} BIT_DStream_t;
+
+typedef enum { BIT_DStream_unfinished = 0,
+               BIT_DStream_endOfBuffer = 1,
+               BIT_DStream_completed = 2,
+               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
+
+
+/*
+* Start by invoking BIT_initDStream().
+* A chunk of the bitStream is then stored into a local register.
+* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+* You can then retrieve bitFields stored into the local register, **in reverse order**.
+* Local register is manually filled from memory by the BIT_reloadDStream() method.
+* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BIT_DStream_unfinished.
+* Otherwise, it can be less than that, so proceed accordingly.
+* Checking if DStream has reached its end can be performed with BIT_endOfDStream()
+*/
+
+
+/******************************************
+*  unsafe API
+******************************************/
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/****************************************************************
+*  Helper functions
+****************************************************************/
+MEM_STATIC unsigned BIT_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    unsigned r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
+
+/**********************************************************
+* bitStream decoding
+**********************************************************/
+
+/*!BIT_initDStream
+*  Initialize a BIT_DStream_t.
+*  @bitD : a pointer to an already allocated BIT_DStream_t structure
+*  @srcBuffer must point at the beginning of a bitStream
+*  @srcSize must be the exact size of the bitStream
+*  @result : size of stream (== srcSize) or an errorCode if a problem is detected
+*/
+MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
+    if (srcSize >=  sizeof(size_t))   /* normal case */
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+    }
+    else
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
+            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
+            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
+            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
+            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
+            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8;
+            default:;
+        }
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+/*!BIT_lookBits
+ * Provides next n bits from local register
+ * local register is not modified (bits are still present for next read/look)
+ * On 32-bits, maxNbBits==25
+ * On 64-bits, maxNbBits==57
+ * @return : value extracted
+ */
+MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+}
+
+/*! BIT_lookBitsFast :
+*   unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+}
+
+MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+/*!BIT_readBits
+ * Read next n bits from local register.
+ * pay attention to not read more than nbBits contained into local register.
+ * @return : extracted value.
+ */
+MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BIT_lookBits(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*!BIT_readBitsFast :
+*  unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BIT_lookBitsFast(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
+{
+    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
+        return BIT_DStream_overflow;
+
+    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
+    {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        return BIT_DStream_unfinished;
+    }
+    if (bitD->ptr == bitD->start)
+    {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
+        return BIT_DStream_completed;
+    }
+    {
+        U32 nbBytes = bitD->bitsConsumed >> 3;
+        BIT_DStream_status result = BIT_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start)
+        {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = BIT_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        return result;
+    }
+}
+
+/*! BIT_endOfDStream
+*   @return Tells if DStream has reached its exact end
+*/
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
+{
+    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* BITSTREAM_H_MODULE */
+/* ******************************************************************
+   Error codes and messages
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define ERR_STATIC static inline
+#elif defined(_MSC_VER)
+#  define ERR_STATIC static __inline
+#elif defined(__GNUC__)
+#  define ERR_STATIC static __attribute__((unused))
+#else
+#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/******************************************
+*  Error Management
+******************************************/
+#define PREFIX(name) ZSTD_error_##name
+
+#define ERROR(name) (size_t)-PREFIX(name)
+
+#define ERROR_LIST(ITEM) \
+        ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \
+        ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \
+        ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \
+        ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \
+        ITEM(PREFIX(maxCode))
+
+#define ERROR_GENERATE_ENUM(ENUM) ENUM,
+typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
+
+#define ERROR_CONVERTTOSTRING(STRING) #STRING,
+#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR)
+static const char* ERR_strings[] = { ERROR_LIST(ERROR_GENERATE_STRING) };
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
+ERR_STATIC const char* ERR_getErrorName(size_t code)
+{
+    static const char* codeError = "Unspecified error code";
+    if (ERR_isError(code)) return ERR_strings[-(int)(code)];
+    return codeError;
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_H_MODULE */
+/*
+Constructor and Destructor of type FSE_CTable
+    Note that its size depends on 'tableLog' and 'maxSymbolValue' */
+typedef unsigned FSE_CTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+
+
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/******************************************
+*  Static allocation
+******************************************/
+/* FSE buffer bounds */
+#define FSE_NCOUNTBOUND 512
+#define FSE_BLOCKBOUND(size) (size + (size>>7))
+#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+
+
+/******************************************
+*  FSE advanced API
+******************************************/
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
+/* build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
+
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
+/* build a fake FSE_DTable, designed to always generate the same symbolValue */
+
+
+/******************************************
+*  FSE symbol decompression API
+******************************************/
+typedef struct
+{
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+
+static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
+
+static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+
+static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
+
+/*
+Let's now decompose FSE_decompress_usingDTable() into its unitary components.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BIT_DStream_t DStream;    // Stream context
+FSE_DState_t  DState;     // State context. Multiple ones are possible
+FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BIT_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSE_reloadDStream(&DStream);
+
+BIT_reloadDStream() result tells if there is still some more data to read from DStream.
+BIT_DStream_unfinished : there is still some data left into the DStream.
+BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BIT_reloadDStream(&DStream) >= BIT_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BIT_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSE_endOfDState(&DState);
+*/
+
+
+/******************************************
+*  FSE unsafe API
+******************************************/
+static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/******************************************
+*  Implementation of inline functions
+******************************************/
+
+/* decompression */
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSE_DTableHeader;   /* sizeof U32 */
+
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSE_decode_t;   /* size == U32 */
+
+MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
+{
+    FSE_DTableHeader DTableH;
+    memcpy(&DTableH, dt, sizeof(DTableH));
+    DStatePtr->state = BIT_readBits(bitD, DTableH.tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32  nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BIT_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32 nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BIT_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/******************************************
+*  Static allocation macros
+******************************************/
+/* Huff0 buffer bounds */
+#define HUF_CTABLEBOUND 129
+#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true if incompressible pre-filtered with fast heuristic */
+#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* static allocation of Huff0's DTable */
+#define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<maxTableLog))  /* nb Cells; use unsigned short for X2, unsigned int for X4 */
+#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+        unsigned short DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
+        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUF_CREATE_STATIC_DTABLEX6(DTable, maxTableLog) \
+        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog) * 3 / 2] = { maxTableLog }
+
+
+/******************************************
+*  Advanced functions
+******************************************/
+static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbols decoder */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+/*
+    zstd - standard compression library
+    Header File
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Version
+***************************************/
+#define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
+#define ZSTD_VERSION_MINOR    2    /* for new (non-breaking) interface capabilities */
+#define ZSTD_VERSION_RELEASE  2    /* for tweaks, bug-fixes, or development */
+#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTD_CCtx_s ZSTD_CCtx;   /* incomplete type */
+
+#if defined (__cplusplus)
+}
+#endif
+/*
+    zstd - standard compression library
+    Header File for static linking only
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* The objects defined into this file should be considered experimental.
+ * They are not labelled stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risk of future changes.
+ */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Streaming functions
+***************************************/
+
+typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+
+/*
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTD_magicNumber 0xFD2FB523   /* v0.3 */
+
+
+#if defined (__cplusplus)
+}
+#endif
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/****************************************************************
+*  Tuning parameters
+****************************************************************/
+/* MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSE_MAX_MEMORY_USAGE 14
+#define FSE_DEFAULT_MEMORY_USAGE 13
+
+/* FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSE_MAX_SYMBOL_VALUE 255
+
+
+/****************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+
+
+/****************************************************************
+*  Byte symbol type
+****************************************************************/
+#endif   /* !FSE_COMMONDEFS_ONLY */
+
+
+/****************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#    ifdef __GNUC__
+#      define FORCE_INLINE static inline __attribute__((always_inline))
+#    else
+#      define FORCE_INLINE static inline
+#    endif
+#  else
+#    define FORCE_INLINE static
+#  endif /* __STDC_VERSION__ */
+#endif
+
+
+/****************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+/****************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+
+/****************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/****************************************************************
+*  Complex types
+****************************************************************/
+typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+
+/****************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+/* Function templates */
+
+#define FSE_DECODE_TYPE FSE_decode_t
+
+static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
+
+static size_t FSE_buildDTable
+(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    void* ptr = dt+1;
+    FSE_DTableHeader DTableH;
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)ptr;
+    const U32 tableSize = 1 << tableLog;
+    const U32 tableMask = tableSize-1;
+    const U32 step = FSE_tableStep(tableSize);
+    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
+    U32 position = 0;
+    U32 highThreshold = tableSize-1;
+    const S16 largeLimit= (S16)(1 << (tableLog-1));
+    U32 noLarge = 1;
+    U32 s;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    DTableH.tableLog = (U16)tableLog;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        if (normalizedCounter[s]==-1)
+        {
+            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+            symbolNext[s] = 1;
+        }
+        else
+        {
+            if (normalizedCounter[s] >= largeLimit) noLarge=0;
+            symbolNext[s] = normalizedCounter[s];
+        }
+    }
+
+    /* Spread symbols */
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        int i;
+        for (i=0; i<normalizedCounter[s]; i++)
+        {
+            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+            position = (position + step) & tableMask;
+            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }
+    }
+
+    if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+
+    /* Build Decoding table */
+    {
+        U32 i;
+        for (i=0; i<tableSize; i++)
+        {
+            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[i].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) );
+            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
+        }
+    }
+
+    DTableH.fastMode = (U16)noLarge;
+    memcpy(dt, &DTableH, sizeof(DTableH));
+    return 0;
+}
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+/******************************************
+*  FSE helper functions
+******************************************/
+static unsigned FSE_isError(size_t code) { return ERR_isError(code); }
+
+
+/****************************************************************
+*  FSE NCount encoding-decoding
+****************************************************************/
+static short FSE_abs(short a)
+{
+    return a<0 ? -a : a;
+}
+
+static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    if (hbSize < 4) return ERROR(srcSize_wrong);
+    bitStream = MEM_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr))
+    {
+        if (previous0)
+        {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF)
+            {
+                n0+=24;
+                if (ip < iend-5)
+                {
+                    ip+=2;
+                    bitStream = MEM_readLE32(ip) >> bitCount;
+                }
+                else
+                {
+                    bitStream >>= 16;
+                    bitCount+=16;
+                }
+            }
+            while ((bitStream & 3) == 3)
+            {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+            {
+                ip += bitCount>>3;
+                bitCount &= 7;
+                bitStream = MEM_readLE32(ip) >> bitCount;
+            }
+            else
+                bitStream >>= 2;
+        }
+        {
+            const short max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max)
+            {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            }
+            else
+            {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSE_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold)
+            {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            {
+                if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+                {
+                    ip += bitCount>>3;
+                    bitCount &= 7;
+                }
+                else
+                {
+                    bitCount -= (int)(8 * (iend - 4 - ip));
+                    ip = iend - 4;
+                }
+                bitStream = MEM_readLE32(ip) >> (bitCount & 31);
+            }
+        }
+    }
+    if (remaining != 1) return ERROR(GENERIC);
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong);
+    return ip-istart;
+}
+
+
+/*********************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1;
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1;
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSE_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    BIT_DStream_t bitD;
+    FSE_DState_t state1;
+    FSE_DState_t state2;
+    size_t errorCode;
+
+    /* Init */
+    errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+    if (FSE_isError(errorCode)) return errorCode;
+
+    FSE_initDState(&state1, &bitD, dt);
+    FSE_initDState(&state2, &bitD, dt);
+
+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op<olimit) ; op+=4)
+    {
+        op[0] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[1] = FSE_GETSYMBOL(&state2);
+
+        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[3] = FSE_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
+    while (1)
+    {
+        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state1);
+
+        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state2);
+    }
+
+    /* end ? */
+    if (BIT_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
+        return op-ostart;
+
+    if (op==omax) return ERROR(dstSize_tooSmall);   /* dst buffer is full, but cSrc unfinished */
+
+    return ERROR(corruption_detected);
+}
+
+
+static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSE_DTable* dt)
+{
+    FSE_DTableHeader DTableH;
+    memcpy(&DTableH, dt, sizeof(DTableH));
+
+    /* select fast mode (static) */
+    if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSE_MAX_SYMBOL_VALUE+1];
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    size_t errorCode;
+
+    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */
+
+    /* normal FSE decoding mode */
+    errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return errorCode;
+
+    /* always return, even if it is an error code */
+    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
+}
+
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+/****************************************************************
+*  Compiler specifics
+****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+/* inline is defined */
+#elif defined(_MSC_VER)
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  define inline __inline
+#else
+#  define inline /* disable inline */
+#endif
+
+
+/****************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+/****************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/******************************************
+*  Helper functions
+******************************************/
+static unsigned HUF_isError(size_t code) { return ERR_isError(code); }
+
+#define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#define HUF_MAX_TABLELOG  12           /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
+#define HUF_DEFAULT_TABLELOG  HUF_MAX_TABLELOG   /* tableLog by default, when not specified */
+#define HUF_MAX_SYMBOL_VALUE 255
+#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
+#  error "HUF_MAX_TABLELOG is too large !"
+#endif
+
+
+
+/*********************************************************
+*  Huff0 : Huffman block decompression
+*********************************************************/
+typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2;   /* single-symbol decoding */
+
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4;  /* double-symbols decoding */
+
+typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+
+/*! HUF_readStats
+    Read compact Huffman tree, saved by HUF_writeCTable
+    @huffWeight : destination buffer
+    @return : size read from `src`
+*/
+static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                            U32* nbSymbolsPtr, U32* tableLogPtr,
+                            const void* src, size_t srcSize)
+{
+    U32 weightTotal;
+    U32 tableLog;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize;
+    size_t oSize;
+    U32 n;
+
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
+    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128)  /* special header */
+    {
+        if (iSize >= (242))   /* RLE */
+        {
+            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
+            oSize = l[iSize-242];
+            memset(huffWeight, 1, hwSize);
+            iSize = 0;
+        }
+        else   /* Incompressible */
+        {
+            oSize = iSize - 127;
+            iSize = ((oSize+1)/2);
+            if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+            if (oSize >= hwSize) return ERROR(corruption_detected);
+            ip += 1;
+            for (n=0; n<oSize; n+=2)
+            {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+            }
+        }
+    }
+    else  /* header compressed with FSE (normal case) */
+    {
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
+        if (FSE_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32));
+    weightTotal = 0;
+    for (n=0; n<oSize; n++)
+    {
+        if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+        rankStats[huffWeight[n]]++;
+        weightTotal += (1 << huffWeight[n]) >> 1;
+    }
+    if (weightTotal == 0) return ERROR(corruption_detected);
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    tableLog = BIT_highbit32(weightTotal) + 1;
+    if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+    {
+        U32 total = 1 << tableLog;
+        U32 rest = total - weightTotal;
+        U32 verif = 1 << BIT_highbit32(rest);
+        U32 lastWeight = BIT_highbit32(rest) + 1;
+        if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+        huffWeight[oSize] = (BYTE)lastWeight;
+        rankStats[lastWeight]++;
+    }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32)(oSize+1);
+    *tableLogPtr = tableLog;
+    return iSize+1;
+}
+
+
+/**************************/
+/* single-symbol decoding */
+/**************************/
+
+static size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
+{
+    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    U32 nbSymbols = 0;
+    U32 n;
+    U32 nextRankStart;
+    void* ptr = DTable+1;
+    HUF_DEltX2* const dt = (HUF_DEltX2*)(ptr);
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16));   /* if compilation fails here, assertion is false */
+    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge);   /* DTable is too small */
+    DTable[0] = (U16)tableLog;   /* maybe should separate sizeof DTable, as allocated, from used size of DTable, in case of DTable re-use */
+
+    /* Prepare ranks */
+    nextRankStart = 0;
+    for (n=1; n<=tableLog; n++)
+    {
+        U32 current = nextRankStart;
+        nextRankStart += (rankVal[n] << (n-1));
+        rankVal[n] = current;
+    }
+
+    /* fill DTable */
+    for (n=0; n<nbSymbols; n++)
+    {
+        const U32 w = huffWeight[n];
+        const U32 length = (1 << w) >> 1;
+        U32 i;
+        HUF_DEltX2 D;
+        D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
+        for (i = rankVal[w]; i < rankVal[w] + length; i++)
+            dt[i] = D;
+        rankVal[w] += length;
+    }
+
+    return iSize;
+}
+
+static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+        const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+        const BYTE c = dt[val].byte;
+        BIT_skipBits(Dstream, dt[val].nbBits);
+        return c;
+}
+
+#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+    *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4))
+    {
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    /* no more data to retrieve from bitstream, hence no need to reload */
+    while (p < pEnd)
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    return pEnd-pStart;
+}
+
+
+static size_t HUF_decompress4X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U16* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+
+        const void* ptr = DTable;
+        const HUF_DEltX2* const dt = ((const HUF_DEltX2*)ptr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; )
+        {
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+    size_t errorCode;
+
+    errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    return HUF_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/***************************/
+/* double-symbols decoding */
+/***************************/
+
+static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed,
+                           const U32* rankValOrigin, const int minWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
+                           U32 nbBitsBaseline, U16 baseSeq)
+{
+    HUF_DEltX4 DElt;
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    U32 s;
+
+    /* get pre-calculated rankVal */
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1)
+    {
+        U32 i, skipSize = rankVal[minWeight];
+        MEM_writeLE16(&(DElt.sequence), baseSeq);
+        DElt.nbBits   = (BYTE)(consumed);
+        DElt.length   = 1;
+        for (i = 0; i < skipSize; i++)
+            DTable[i] = DElt;
+    }
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++)   /* note : sortedSymbols already skipped */
+    {
+        const U32 symbol = sortedSymbols[s].symbol;
+        const U32 weight = sortedSymbols[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 length = 1 << (sizeLog-nbBits);
+        const U32 start = rankVal[weight];
+        U32 i = start;
+        const U32 end = start + length;
+
+        MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+        DElt.nbBits = (BYTE)(nbBits + consumed);
+        DElt.length = 2;
+        do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
+
+        rankVal[weight] += length;
+    }
+}
+
+typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1];
+
+static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
+                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32 nbBitsBaseline)
+{
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits  = nbBitsBaseline - maxWeight;
+    U32 s;
+
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++)
+    {
+        const U16 symbol = sortedList[s].symbol;
+        const U32 weight = sortedList[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 start = rankVal[weight];
+        const U32 length = 1 << (targetLog-nbBits);
+
+        if (targetLog-nbBits >= minBits)   /* enough room for a second symbol */
+        {
+            U32 sortedRank;
+            int minWeight = nbBits + scaleLog;
+            if (minWeight < 1) minWeight = 1;
+            sortedRank = rankStart[minWeight];
+            HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
+                           rankValOrigin[nbBits], minWeight,
+                           sortedList+sortedRank, sortedListSize-sortedRank,
+                           nbBitsBaseline, symbol);
+        }
+        else
+        {
+            U32 i;
+            const U32 end = start + length;
+            HUF_DEltX4 DElt;
+
+            MEM_writeLE16(&(DElt.sequence), symbol);
+            DElt.nbBits   = (BYTE)(nbBits);
+            DElt.length   = 1;
+            for (i = start; i < end; i++)
+                DTable[i] = DElt;
+        }
+        rankVal[weight] += length;
+    }
+}
+
+static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1];
+    sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 };
+    U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    rankVal_t rankVal;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    const U32 memLog = DTable[0];
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    void* ptr = DTable;
+    HUF_DEltX4* const dt = ((HUF_DEltX4*)ptr) + 1;
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32));   /* if compilation fails here, assertion is false */
+    if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--)
+        { if (!maxW) return ERROR(GENERIC); }  /* necessarily finds a solution before maxW==0 */
+
+    /* Get start index of each weight */
+    {
+        U32 w, nextRankStart = 0;
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {
+        U32 s;
+        for (s=0; s<nbSymbols; s++)
+        {
+            U32 w = weightList[s];
+            U32 r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {
+        const U32 minBits = tableLog+1 - maxW;
+        U32 nextRankVal = 0;
+        U32 w, consumed;
+        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
+        U32* rankVal0 = rankVal[0];
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankVal;
+            nextRankVal += rankStats[w] << (w+rescale);
+            rankVal0[w] = current;
+        }
+        for (consumed = minBits; consumed <= memLog - minBits; consumed++)
+        {
+            U32* rankValPtr = rankVal[consumed];
+            for (w = 1; w <= maxW; w++)
+            {
+                rankValPtr[w] = rankVal0[w] >> consumed;
+            }
+        }
+    }
+
+    HUF_fillDTableX4(dt, memLog,
+                   sortedSymbol, sizeOfSort,
+                   rankStart0, rankVal, maxW,
+                   tableLog+1);
+
+    return iSize;
+}
+
+
+static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 2);
+    BIT_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
+
+static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 1);
+    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
+    else
+    {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8))
+        {
+            BIT_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+        }
+    }
+    return 1;
+}
+
+
+#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 8 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd-7))
+    {
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-2))
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+
+    while (p <= pEnd-2)
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    if (p < pEnd)
+        p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
+
+    return p-pStart;
+}
+
+
+
+static size_t HUF_decompress4X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+
+        const void* ptr = DTable;
+        const HUF_DEltX4* const dt = ((const HUF_DEltX4*)ptr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; )
+        {
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX4(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/**********************************/
+/* Generic decompression selector */
+/**********************************/
+
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+{
+    /* single, double, quad */
+    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
+    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
+    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
+    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
+    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
+    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
+    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
+    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
+    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
+    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
+    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
+    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+};
+
+typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+
+static size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    static const decompressionAlgo decompress[3] = { HUF_decompress4X2, HUF_decompress4X4, NULL };
+    /* estimate decompression time */
+    U32 Q;
+    const U32 D256 = (U32)(dstSize >> 8);
+    U32 Dtime[3];
+    U32 algoNb = 0;
+    int n;
+
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    /* decoder timing evaluation */
+    Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */
+    for (n=0; n<3; n++)
+        Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256);
+
+    Dtime[1] += Dtime[1] >> 4; Dtime[2] += Dtime[2] >> 3; /* advantage to algorithms using less memory, for cache eviction */
+
+    if (Dtime[1] < Dtime[0]) algoNb = 1;
+
+    return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
+
+    //return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);   /* multi-streams single-symbol decoding */
+    //return HUF_decompress4X4(dst, dstSize, cSrc, cSrcSize);   /* multi-streams double-symbols decoding */
+    //return HUF_decompress4X6(dst, dstSize, cSrc, cSrcSize);   /* multi-streams quad-symbols decoding */
+}
+/*
+    zstd - standard compression library
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+*  MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*/
+#define ZSTD_MEMORY_USAGE 17
+
+/*!
+ * HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0, fastest), or in memory heap (1, requires malloc())
+ * Note that compression context is fairly large, as a consequence heap memory is recommended.
+ */
+#ifndef ZSTD_HEAPMODE
+#  define ZSTD_HEAPMODE 1
+#endif /* ZSTD_HEAPMODE */
+
+/*!
+*  LEGACY_SUPPORT :
+*  decompressor can decode older formats (starting from Zstd 0.1+)
+*/
+#ifndef ZSTD_LEGACY_SUPPORT
+#  define ZSTD_LEGACY_SUPPORT 1
+#endif
+
+
+/* *******************************************************
+*  Includes
+*********************************************************/
+#include <stdlib.h>      /* calloc */
+#include <string.h>      /* memcpy, memmove */
+#include <stdio.h>       /* debug : printf */
+
+
+/* *******************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef __AVX2__
+#  include <immintrin.h>   /* AVX2 intrinsics */
+#endif
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#else
+#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#endif
+
+
+/* *******************************************************
+*  Constants
+*********************************************************/
+#define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
+#define HASH_TABLESIZE (1 << HASH_LOG)
+#define HASH_MASK (HASH_TABLESIZE - 1)
+
+#define KNUTH 2654435761
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+#define BIT1   2
+#define BIT0   1
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BLOCKSIZE (128 KB)                 /* define, for static allocation */
+#define MIN_SEQUENCES_SIZE (2 /*seqNb*/ + 2 /*dumps*/ + 3 /*seqTables*/ + 1 /*bitStream*/)
+#define MIN_CBLOCK_SIZE (3 /*litCSize*/ + MIN_SEQUENCES_SIZE)
+#define IS_RAW BIT0
+#define IS_RLE BIT1
+
+#define WORKPLACESIZE (BLOCKSIZE*3)
+#define MINMATCH 4
+#define MLbits   7
+#define LLbits   6
+#define Offbits  5
+#define MaxML  ((1<<MLbits )-1)
+#define MaxLL  ((1<<LLbits )-1)
+#define MaxOff   31
+#define LitFSELog  11
+#define MLFSELog   10
+#define LLFSELog   10
+#define OffFSELog   9
+#define MAX(a,b) ((a)<(b)?(b):(a))
+#define MaxSeq MAX(MaxLL, MaxML)
+
+#define LITERAL_NOENTROPY 63
+#define COMMAND_NOENTROPY 7   /* to remove */
+
+static const size_t ZSTD_blockHeaderSize = 3;
+static const size_t ZSTD_frameHeaderSize = 4;
+
+
+/* *******************************************************
+*  Memory operations
+**********************************************************/
+static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+
+#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+
+/*! ZSTD_wildcopy : custom version of memcpy(), can copy up to 7-8 bytes too many */
+static void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
+{
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+    do COPY8(op, ip) while (op < oend);
+}
+
+
+/* **************************************
+*  Local structures
+****************************************/
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+typedef struct {
+    void* buffer;
+    U32*  offsetStart;
+    U32*  offset;
+    BYTE* offCodeStart;
+    BYTE* offCode;
+    BYTE* litStart;
+    BYTE* lit;
+    BYTE* litLengthStart;
+    BYTE* litLength;
+    BYTE* matchLengthStart;
+    BYTE* matchLength;
+    BYTE* dumpsStart;
+    BYTE* dumps;
+} seqStore_t;
+
+
+/* *************************************
+*  Error Management
+***************************************/
+/*! ZSTD_isError
+*   tells if a return value is an error code */
+static unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
+
+
+
+/* *************************************************************
+*   Decompression section
+***************************************************************/
+struct ZSTD_DCtx_s
+{
+    U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
+    U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
+    U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
+    void* previousDstEnd;
+    void* base;
+    size_t expected;
+    blockType_t bType;
+    U32 phase;
+    const BYTE* litPtr;
+    size_t litSize;
+    BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */];
+};   /* typedef'd to ZSTD_Dctx within "zstd_static.h" */
+
+
+static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = (const BYTE* const)src;
+    BYTE headerFlags;
+    U32 cSize;
+
+    if (srcSize < 3) return ERROR(srcSize_wrong);
+
+    headerFlags = *in;
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+
+    bpPtr->blockType = (blockType_t)(headerFlags >> 6);
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+/** ZSTD_decompressLiterals
+    @return : nb of bytes read from src, or an error code*/
+static size_t ZSTD_decompressLiterals(void* dst, size_t* maxDstSizePtr,
+                                const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+
+    const size_t litSize = (MEM_readLE32(src) & 0x1FFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+    const size_t litCSize = (MEM_readLE32(ip+2) & 0xFFFFFF) >> 5;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+
+    if (litSize > *maxDstSizePtr) return ERROR(corruption_detected);
+    if (litCSize + 5 > srcSize) return ERROR(corruption_detected);
+
+    if (HUF_isError(HUF_decompress(dst, litSize, ip+5, litCSize))) return ERROR(corruption_detected);
+
+    *maxDstSizePtr = litSize;
+    return litCSize + 5;
+}
+
+
+/** ZSTD_decodeLiteralsBlock
+    @return : nb of bytes read from src (< srcSize )*/
+static size_t ZSTD_decodeLiteralsBlock(void* ctx,
+                          const void* src, size_t srcSize)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx;
+    const BYTE* const istart = (const BYTE* const)src;
+
+    /* any compressed block with literals segment must be at least this size */
+    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
+
+    switch(*istart & 3)
+    {
+    default:
+    case 0:
+        {
+            size_t litSize = BLOCKSIZE;
+            const size_t readSize = ZSTD_decompressLiterals(dctx->litBuffer, &litSize, src, srcSize);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litSize = litSize;
+            memset(dctx->litBuffer + dctx->litSize, 0, 8);
+            return readSize;   /* works if it's an error too */
+        }
+    case IS_RAW:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > srcSize-11)   /* risk of reading too far with wildcopy */
+            {
+                if (litSize > srcSize-3) return ERROR(corruption_detected);
+                memcpy(dctx->litBuffer, istart, litSize);
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litSize = litSize;
+                memset(dctx->litBuffer + dctx->litSize, 0, 8);
+                return litSize+3;
+            }
+            /* direct reference into compressed stream */
+            dctx->litPtr = istart+3;
+            dctx->litSize = litSize;
+            return litSize+3;
+        }
+    case IS_RLE:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
+            memset(dctx->litBuffer, istart[3], litSize + 8);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litSize = litSize;
+            return 4;
+        }
+    }
+}
+
+
+static size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
+                         FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
+                         const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    const BYTE* const iend = istart + srcSize;
+    U32 LLtype, Offtype, MLtype;
+    U32 LLlog, Offlog, MLlog;
+    size_t dumpsLength;
+
+    /* check */
+    if (srcSize < 5) return ERROR(srcSize_wrong);
+
+    /* SeqHead */
+    *nbSeq = MEM_readLE16(ip); ip+=2;
+    LLtype  = *ip >> 6;
+    Offtype = (*ip >> 4) & 3;
+    MLtype  = (*ip >> 2) & 3;
+    if (*ip & 2)
+    {
+        dumpsLength  = ip[2];
+        dumpsLength += ip[1] << 8;
+        ip += 3;
+    }
+    else
+    {
+        dumpsLength  = ip[1];
+        dumpsLength += (ip[0] & 1) << 8;
+        ip += 2;
+    }
+    *dumpsPtr = ip;
+    ip += dumpsLength;
+    *dumpsLengthPtr = dumpsLength;
+
+    /* check */
+    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
+
+    /* sequences */
+    {
+        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL and MaxOff */
+        size_t headerSize;
+
+        /* Build DTables */
+        switch(LLtype)
+        {
+        case bt_rle :
+            LLlog = 0;
+            FSE_buildDTable_rle(DTableLL, *ip++); break;
+        case bt_raw :
+            LLlog = LLbits;
+            FSE_buildDTable_raw(DTableLL, LLbits); break;
+        default :
+            {   U32 max = MaxLL;
+                headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
+                if (FSE_isError(headerSize)) return ERROR(GENERIC);
+                if (LLlog > LLFSELog) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSE_buildDTable(DTableLL, norm, max, LLlog);
+        }   }
+
+        switch(Offtype)
+        {
+        case bt_rle :
+            Offlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong);   /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */
+            break;
+        case bt_raw :
+            Offlog = Offbits;
+            FSE_buildDTable_raw(DTableOffb, Offbits); break;
+        default :
+            {   U32 max = MaxOff;
+                headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
+                if (FSE_isError(headerSize)) return ERROR(GENERIC);
+                if (Offlog > OffFSELog) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSE_buildDTable(DTableOffb, norm, max, Offlog);
+        }   }
+
+        switch(MLtype)
+        {
+        case bt_rle :
+            MLlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableML, *ip++); break;
+        case bt_raw :
+            MLlog = MLbits;
+            FSE_buildDTable_raw(DTableML, MLbits); break;
+        default :
+            {   U32 max = MaxML;
+                headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
+                if (FSE_isError(headerSize)) return ERROR(GENERIC);
+                if (MLlog > MLFSELog) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSE_buildDTable(DTableML, norm, max, MLlog);
+    }   }   }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t offset;
+    size_t matchLength;
+} seq_t;
+
+typedef struct {
+    BIT_DStream_t DStream;
+    FSE_DState_t stateLL;
+    FSE_DState_t stateOffb;
+    FSE_DState_t stateML;
+    size_t prevOffset;
+    const BYTE* dumps;
+    const BYTE* dumpsEnd;
+} seqState_t;
+
+
+static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
+{
+    size_t litLength;
+    size_t prevOffset;
+    size_t offset;
+    size_t matchLength;
+    const BYTE* dumps = seqState->dumps;
+    const BYTE* const de = seqState->dumpsEnd;
+
+    /* Literal length */
+    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
+    prevOffset = litLength ? seq->offset : seqState->prevOffset;
+    seqState->prevOffset = seq->offset;
+    if (litLength == MaxLL)
+    {
+        U32 add = *dumps++;
+        if (add < 255) litLength += add;
+        else
+        {
+            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            dumps += 3;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+
+    /* Offset */
+    {
+        static const size_t offsetPrefix[MaxOff+1] = {  /* note : size_t faster than U32 */
+                1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
+                512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
+                524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
+        U32 offsetCode, nbBits;
+        offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));   /* <= maxOff, by table construction */
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        nbBits = offsetCode - 1;
+        if (offsetCode==0) nbBits = 0;   /* cmove */
+        offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits);
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        if (offsetCode==0) offset = prevOffset;   /* cmove */
+    }
+
+    /* MatchLength */
+    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
+    if (matchLength == MaxML)
+    {
+        U32 add = *dumps++;
+        if (add < 255) matchLength += add;
+        else
+        {
+            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            dumps += 3;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+    matchLength += MINMATCH;
+
+    /* save result */
+    seq->litLength = litLength;
+    seq->offset = offset;
+    seq->matchLength = matchLength;
+    seqState->dumps = dumps;
+}
+
+
+static size_t ZSTD_execSequence(BYTE* op,
+                                seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit,
+                                BYTE* const base, BYTE* const oend)
+{
+    static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
+    static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* substracted */
+    const BYTE* const ostart = op;
+    BYTE* const oLitEnd = op + sequence.litLength;
+    BYTE* const oMatchEnd = op + sequence.litLength + sequence.matchLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_8 = oend-8;
+    const BYTE* const litEnd = *litPtr + sequence.litLength;
+
+    /* checks */
+    if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */
+    if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
+    if (litEnd > litLimit) return ERROR(corruption_detected);   /* overRead beyond lit buffer */
+
+    /* copy Literals */
+    ZSTD_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
+    op = oLitEnd;
+    *litPtr = litEnd;   /* update for next sequence */
+
+    /* copy Match */
+    {
+        const BYTE* match = op - sequence.offset;
+
+        /* check */
+        if (sequence.offset > (size_t)op) return ERROR(corruption_detected);   /* address space overflow test (this test seems kept by clang optimizer) */
+        //if (match > op) return ERROR(corruption_detected);   /* address space overflow test (is clang optimizer removing this test ?) */
+        if (match < base) return ERROR(corruption_detected);
+
+        /* close range match, overlap */
+        if (sequence.offset < 8)
+        {
+            const int dec64 = dec64table[sequence.offset];
+            op[0] = match[0];
+            op[1] = match[1];
+            op[2] = match[2];
+            op[3] = match[3];
+            match += dec32table[sequence.offset];
+            ZSTD_copy4(op+4, match);
+            match -= dec64;
+        }
+        else
+        {
+            ZSTD_copy8(op, match);
+        }
+        op += 8; match += 8;
+
+        if (oMatchEnd > oend-(16-MINMATCH))
+        {
+            if (op < oend_8)
+            {
+                ZSTD_wildcopy(op, match, oend_8 - op);
+                match += oend_8 - op;
+                op = oend_8;
+            }
+            while (op < oMatchEnd) *op++ = *match++;
+        }
+        else
+        {
+            ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
+        }
+    }
+
+    return oMatchEnd - ostart;
+}
+
+static size_t ZSTD_decompressSequences(
+                               void* ctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx;
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t errorCode, dumpsLength;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    int nbSeq;
+    const BYTE* dumps;
+    U32* DTableLL = dctx->LLTable;
+    U32* DTableML = dctx->MLTable;
+    U32* DTableOffb = dctx->OffTable;
+    BYTE* const base = (BYTE*) (dctx->base);
+
+    /* Build Decoding Tables */
+    errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
+                                      DTableLL, DTableML, DTableOffb,
+                                      ip, iend-ip);
+    if (ZSTD_isError(errorCode)) return errorCode;
+    ip += errorCode;
+
+    /* Regen sequences */
+    {
+        seq_t sequence;
+        seqState_t seqState;
+
+        memset(&sequence, 0, sizeof(sequence));
+        seqState.dumps = dumps;
+        seqState.dumpsEnd = dumps + dumpsLength;
+        seqState.prevOffset = sequence.offset = 4;
+        errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip);
+        if (ERR_isError(errorCode)) return ERROR(corruption_detected);
+        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
+        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
+        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
+
+        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (nbSeq>0) ; )
+        {
+            size_t oneSeqSize;
+            nbSeq--;
+            ZSTD_decodeSequence(&sequence, &seqState);
+            oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend);
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* check if reached exact end */
+        if ( !BIT_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected);   /* requested too much : data is corrupted */
+        if (nbSeq<0) return ERROR(corruption_detected);   /* requested too many sequences : data is corrupted */
+
+        /* last literal segment */
+        {
+            size_t lastLLSize = litEnd - litPtr;
+            if (litPtr > litEnd) return ERROR(corruption_detected);
+            if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
+            if (op != litPtr) memmove(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+
+static size_t ZSTD_decompressBlock(
+                            void* ctx,
+                            void* dst, size_t maxDstSize,
+                      const void* src, size_t srcSize)
+{
+    /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+
+    /* Decode literals sub-block */
+    size_t litCSize = ZSTD_decodeLiteralsBlock(ctx, src, srcSize);
+    if (ZSTD_isError(litCSize)) return litCSize;
+    ip += litCSize;
+    srcSize -= litCSize;
+
+    return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize);
+}
+
+
+static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* iend = ip + srcSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t remainingSize = srcSize;
+    U32 magicNumber;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t decodedSize=0;
+        size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            decodedSize = ZSTD_decompressBlock(ctx, op, oend-op, ip, cBlockSize);
+            break;
+        case bt_raw :
+            decodedSize = ZSTD_copyUncompressedBlock(op, oend-op, ip, cBlockSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet supported */
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return ERROR(srcSize_wrong);
+            break;
+        default:
+            return ERROR(GENERIC);   /* impossible */
+        }
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        if (ZSTD_isError(decodedSize)) return decodedSize;
+        op += decodedSize;
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return op-ostart;
+}
+
+static size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    ZSTD_DCtx ctx;
+    ctx.base = dst;
+    return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
+}
+
+static size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    size_t remainingSize = srcSize;
+    U32 magicNumber;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
+        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return ip - (const BYTE*)src;
+}
+
+
+/*******************************
+*  Streaming Decompression API
+*******************************/
+
+static size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx)
+{
+    dctx->expected = ZSTD_frameHeaderSize;
+    dctx->phase = 0;
+    dctx->previousDstEnd = NULL;
+    dctx->base = NULL;
+    return 0;
+}
+
+static ZSTD_DCtx* ZSTD_createDCtx(void)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx));
+    if (dctx==NULL) return NULL;
+    ZSTD_resetDCtx(dctx);
+    return dctx;
+}
+
+static size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
+{
+    free(dctx);
+    return 0;
+}
+
+static size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx)
+{
+    return dctx->expected;
+}
+
+static size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    /* Sanity check */
+    if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
+    if (dst != ctx->previousDstEnd)  /* not contiguous */
+        ctx->base = dst;
+
+    /* Decompress : frame header */
+    if (ctx->phase == 0)
+    {
+        /* Check frame magic header */
+        U32 magicNumber = MEM_readLE32(src);
+        if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        return 0;
+    }
+
+    /* Decompress : block header */
+    if (ctx->phase == 1)
+    {
+        blockProperties_t bp;
+        size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+        if (ZSTD_isError(blockSize)) return blockSize;
+        if (bp.blockType == bt_end)
+        {
+            ctx->expected = 0;
+            ctx->phase = 0;
+        }
+        else
+        {
+            ctx->expected = blockSize;
+            ctx->bType = bp.blockType;
+            ctx->phase = 2;
+        }
+
+        return 0;
+    }
+
+    /* Decompress : block content */
+    {
+        size_t rSize;
+        switch(ctx->bType)
+        {
+        case bt_compressed:
+            rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
+            break;
+        case bt_raw :
+            rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet handled */
+            break;
+        case bt_end :   /* should never happen (filtered at phase 1) */
+            rSize = 0;
+            break;
+        default:
+            return ERROR(GENERIC);
+        }
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
+        return rSize;
+    }
+
+}
+
+
+/* wrapper layer */
+
+unsigned ZSTDv03_isError(size_t code)
+{
+    return ZSTD_isError(code);
+}
+
+size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize)
+{
+    return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
+}
+
+size_t ZSTDv03_findFrameCompressedSize(const void* src, size_t srcSize)
+{
+    return ZSTD_findFrameCompressedSize(src, srcSize);
+}
+
+ZSTDv03_Dctx* ZSTDv03_createDCtx(void)
+{
+    return (ZSTDv03_Dctx*)ZSTD_createDCtx();
+}
+
+size_t ZSTDv03_freeDCtx(ZSTDv03_Dctx* dctx)
+{
+    return ZSTD_freeDCtx((ZSTD_DCtx*)dctx);
+}
+
+size_t ZSTDv03_resetDCtx(ZSTDv03_Dctx* dctx)
+{
+    return ZSTD_resetDCtx((ZSTD_DCtx*)dctx);
+}
+
+size_t ZSTDv03_nextSrcSizeToDecompress(ZSTDv03_Dctx* dctx)
+{
+    return ZSTD_nextSrcSizeToDecompress((ZSTD_DCtx*)dctx);
+}
+
+size_t ZSTDv03_decompressContinue(ZSTDv03_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return ZSTD_decompressContinue((ZSTD_DCtx*)dctx, dst, maxDstSize, src, srcSize);
+}
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v03.h b/contrib/libzstd/include/zstd/legacy/zstd_v03.h
new file mode 100644
index 00000000000..07f7597bb7c
--- /dev/null
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v03.h
@@ -0,0 +1,87 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+#ifndef ZSTD_V03_H_298734209782
+#define ZSTD_V03_H_298734209782
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv03_decompress() : decompress ZSTD frames compliant with v0.3.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+/**
+ZSTDv03_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.3.x format
+    compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+    return : the number of bytes that would be read to decompress this frame
+             or an errorCode if it fails (which can be tested using ZSTDv03_isError())
+*/
+size_t ZSTDv03_findFrameCompressedSize(const void* src, size_t compressedSize);
+
+    /**
+ZSTDv03_isError() : tells if the result of ZSTDv03_decompress() is an error
+*/
+unsigned ZSTDv03_isError(size_t code);
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv03_Dctx_s ZSTDv03_Dctx;
+ZSTDv03_Dctx* ZSTDv03_createDCtx(void);
+size_t ZSTDv03_freeDCtx(ZSTDv03_Dctx* dctx);
+
+size_t ZSTDv03_decompressDCtx(void* ctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
+
+/* *************************************
+*  Streaming functions
+***************************************/
+size_t ZSTDv03_resetDCtx(ZSTDv03_Dctx* dctx);
+
+size_t ZSTDv03_nextSrcSizeToDecompress(ZSTDv03_Dctx* dctx);
+size_t ZSTDv03_decompressContinue(ZSTDv03_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv03_magicNumber 0xFD2FB523   /* v0.3 */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_V03_H_298734209782 */
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v04.c b/contrib/libzstd/include/zstd/legacy/zstd_v04.c
new file mode 100644
index 00000000000..8b8e23cb09c
--- /dev/null
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v04.c
@@ -0,0 +1,3823 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+
+/*- Dependencies -*/
+#include "zstd_v04.h"
+#include "error_private.h"
+
+
+/* ******************************************************************
+   mem.h
+   low-level memory access routines
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/******************************************
+*  Includes
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include <string.h>    /* memcpy */
+
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined(_MSC_VER)   /* Visual Studio */
+#   include <stdlib.h>  /* _byteswap_ulong */
+#   include <intrin.h>  /* _byteswap_* */
+#endif
+#if defined(__GNUC__)
+#  define MEM_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define MEM_STATIC static inline
+#elif defined(_MSC_VER)
+#  define MEM_STATIC static __inline
+#else
+#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/****************************************************************
+*  Basic Types
+*****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef  int16_t S16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+  typedef  int64_t S64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef   signed short      S16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+  typedef   signed long long  S64;
+#endif
+
+
+/****************************************************************
+*  Memory I/O
+*****************************************************************/
+/* MEM_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets generating assembly depending on alignment.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define MEM_FORCE_MEMORY_ACCESS 2
+#  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define MEM_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; }
+MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; }
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
+
+/* violates C standard on structure alignment.
+Only use if no other choice to achieve best performance on target platform */
+MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
+MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
+MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+
+#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
+
+MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+
+#else
+
+/* default method, safe and standard.
+   can sometimes prove slower */
+
+MEM_STATIC U16 MEM_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U32 MEM_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U64 MEM_read64(const void* memPtr)
+{
+    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif // MEM_FORCE_MEMORY_ACCESS
+
+
+MEM_STATIC U16 MEM_readLE16(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read16(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
+{
+    if (MEM_isLittleEndian())
+    {
+        MEM_write16(memPtr, val);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val;
+        p[1] = (BYTE)(val>>8);
+    }
+}
+
+MEM_STATIC U32 MEM_readLE32(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read32(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+
+MEM_STATIC U64 MEM_readLE64(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read64(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
+                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
+    }
+}
+
+
+MEM_STATIC size_t MEM_readLEST(const void* memPtr)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_readLE32(memPtr);
+    else
+        return (size_t)MEM_readLE64(memPtr);
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* MEM_H_MODULE */
+
+/*
+    zstd - standard compression library
+    Header File for static linking only
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#ifndef ZSTD_STATIC_H
+#define ZSTD_STATIC_H
+
+/* The objects defined into this file shall be considered experimental.
+ * They are not considered stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risks of future changes.
+ */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Types
+***************************************/
+#define ZSTD_WINDOWLOG_MAX 26
+#define ZSTD_WINDOWLOG_MIN 18
+#define ZSTD_WINDOWLOG_ABSOLUTEMIN 11
+#define ZSTD_CONTENTLOG_MAX (ZSTD_WINDOWLOG_MAX+1)
+#define ZSTD_CONTENTLOG_MIN 4
+#define ZSTD_HASHLOG_MAX 28
+#define ZSTD_HASHLOG_MIN 4
+#define ZSTD_SEARCHLOG_MAX (ZSTD_CONTENTLOG_MAX-1)
+#define ZSTD_SEARCHLOG_MIN 1
+#define ZSTD_SEARCHLENGTH_MAX 7
+#define ZSTD_SEARCHLENGTH_MIN 4
+
+/** from faster to stronger */
+typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2 } ZSTD_strategy;
+
+typedef struct
+{
+    U64 srcSize;       /* optional : tells how much bytes are present in the frame. Use 0 if not known. */
+    U32 windowLog;     /* largest match distance : larger == more compression, more memory needed during decompression */
+    U32 contentLog;    /* full search segment : larger == more compression, slower, more memory (useless for fast) */
+    U32 hashLog;       /* dispatch table : larger == more memory, faster */
+    U32 searchLog;     /* nb of searches : larger == more compression, slower */
+    U32 searchLength;  /* size of matches : larger == faster decompression, sometimes less compression */
+    ZSTD_strategy strategy;
+} ZSTD_parameters;
+
+typedef ZSTDv04_Dctx ZSTD_DCtx;
+
+/* *************************************
+*  Advanced functions
+***************************************/
+/** ZSTD_decompress_usingDict
+*   Same as ZSTD_decompressDCtx, using a Dictionary content as prefix
+*   Note : dict can be NULL, in which case, it's equivalent to ZSTD_decompressDCtx() */
+static size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx,
+                                             void* dst, size_t maxDstSize,
+                                       const void* src, size_t srcSize,
+                                       const void* dict,size_t dictSize);
+
+
+/* **************************************
+*  Streaming functions (direct mode)
+****************************************/
+static size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx);
+static size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize);
+static void   ZSTD_decompress_insertDictionary(ZSTD_DCtx* ctx, const void* src, size_t srcSize);
+
+static size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
+static size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+
+/**
+  Streaming decompression, bufferless mode
+
+  A ZSTD_DCtx object is required to track streaming operations.
+  Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
+  A ZSTD_DCtx object can be re-used multiple times. Use ZSTD_resetDCtx() to return to fresh status.
+
+  First operation is to retrieve frame parameters, using ZSTD_getFrameParams().
+  This function doesn't consume its input. It needs enough input data to properly decode the frame header.
+  Objective is to retrieve *params.windowlog, to know minimum amount of memory required during decoding.
+  Result : 0 when successful, it means the ZSTD_parameters structure has been filled.
+           >0 : means there is not enough data into src. Provides the expected size to successfully decode header.
+           errorCode, which can be tested using ZSTD_isError() (For example, if it's not a ZSTD header)
+
+  Then, you can optionally insert a dictionary.
+  This operation must mimic the compressor behavior, otherwise decompression will fail or be corrupted.
+
+  Then it's possible to start decompression.
+  Use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() requires this exact amount of bytes, or it will fail.
+  ZSTD_decompressContinue() needs previous data blocks during decompression, up to (1 << windowlog).
+  They should preferably be located contiguously, prior to current block. Alternatively, a round buffer is also possible.
+
+  @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+
+  A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
+  Context can then be reset to start a new decompression.
+*/
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+
+#endif  /* ZSTD_STATIC_H */
+
+
+/*
+    zstd_internal - common functions to include
+    Header File for include
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#ifndef ZSTD_CCOMMON_H_MODULE
+#define ZSTD_CCOMMON_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Common macros
+***************************************/
+#define MIN(a,b) ((a)<(b) ? (a) : (b))
+#define MAX(a,b) ((a)>(b) ? (a) : (b))
+
+
+/* *************************************
+*  Common constants
+***************************************/
+#define ZSTD_MAGICNUMBER 0xFD2FB524   /* v0.4 */
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BLOCKSIZE (128 KB)                 /* define, for static allocation */
+
+static const size_t ZSTD_blockHeaderSize = 3;
+static const size_t ZSTD_frameHeaderSize_min = 5;
+#define ZSTD_frameHeaderSize_max 5         /* define, for static allocation */
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+#define BIT1   2
+#define BIT0   1
+
+#define IS_RAW BIT0
+#define IS_RLE BIT1
+
+#define MINMATCH 4
+#define REPCODE_STARTVALUE 4
+
+#define MLbits   7
+#define LLbits   6
+#define Offbits  5
+#define MaxML  ((1<<MLbits) - 1)
+#define MaxLL  ((1<<LLbits) - 1)
+#define MaxOff ((1<<Offbits)- 1)
+#define MLFSELog   10
+#define LLFSELog   10
+#define OffFSELog   9
+#define MaxSeq MAX(MaxLL, MaxML)
+
+#define MIN_SEQUENCES_SIZE (2 /*seqNb*/ + 2 /*dumps*/ + 3 /*seqTables*/ + 1 /*bitStream*/)
+#define MIN_CBLOCK_SIZE (3 /*litCSize*/ + MIN_SEQUENCES_SIZE)
+
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+
+/* ******************************************
+*  Shared functions to include for inlining
+********************************************/
+static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+
+#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+
+/*! ZSTD_wildcopy : custom version of memcpy(), can copy up to 7-8 bytes too many */
+static void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
+{
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+    do
+        COPY8(op, ip)
+    while (op < oend);
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   header file
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef FSE_H
+#define FSE_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* *****************************************
+*  Includes
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+
+
+/* *****************************************
+*  FSE simple functions
+******************************************/
+static size_t FSE_decompress(void* dst,  size_t maxDstSize,
+                const void* cSrc, size_t cSrcSize);
+/*!
+FSE_decompress():
+    Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'maxDstSize'.
+    return : size of regenerated data (<= maxDstSize)
+             or an error code, which can be tested using FSE_isError()
+
+    ** Important ** : FSE_decompress() doesn't decompress non-compressible nor RLE data !!!
+    Why ? : making this distinction requires a header.
+    Header management is intentionally delegated to the user layer, which can better manage special cases.
+*/
+
+
+/* *****************************************
+*  Tool functions
+******************************************/
+/* Error Management */
+static unsigned    FSE_isError(size_t code);        /* tells if a return value is an error code */
+
+
+
+/* *****************************************
+*  FSE detailed API
+******************************************/
+/*!
+FSE_compress() does the following:
+1. count symbol occurrence from source[] into table count[]
+2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
+3. save normalized counters to memory buffer using writeNCount()
+4. build encoding table 'CTable' from normalized counters
+5. encode the data stream using encoding table 'CTable'
+
+FSE_decompress() does the following:
+1. read normalized counters with readNCount()
+2. build decoding table 'DTable' from normalized counters
+3. decode the data stream using decoding table 'DTable'
+
+The following API allows targeting specific sub-functions for advanced tasks.
+For example, it's possible to compress several blocks using the same 'CTable',
+or to save and provide normalized distribution using external method.
+*/
+
+
+/* *** DECOMPRESSION *** */
+
+/*!
+FSE_readNCount():
+   Read compactly saved 'normalizedCounter' from 'rBuffer'.
+   return : size read from 'rBuffer'
+            or an errorCode, which can be tested using FSE_isError()
+            maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
+static  size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
+
+/*!
+Constructor and Destructor of type FSE_DTable
+    Note that its size depends on 'tableLog' */
+typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+
+/*!
+FSE_buildDTable():
+   Builds 'dt', which must be already allocated, using FSE_createDTable()
+   return : 0,
+            or an errorCode, which can be tested using FSE_isError() */
+static size_t FSE_buildDTable ( FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*!
+FSE_decompress_usingDTable():
+   Decompress compressed source 'cSrc' of size 'cSrcSize' using 'dt'
+   into 'dst' which must be already allocated.
+   return : size of regenerated data (necessarily <= maxDstSize)
+            or an errorCode, which can be tested using FSE_isError() */
+static  size_t FSE_decompress_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
+
+/*!
+Tutorial :
+----------
+(Note : these functions only decompress FSE-compressed blocks.
+ If block is uncompressed, use memcpy() instead
+ If block is a single repeated byte, use memset() instead )
+
+The first step is to obtain the normalized frequencies of symbols.
+This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
+In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
+or size the table to handle worst case situations (typically 256).
+FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
+The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
+Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
+This is performed by the function FSE_buildDTable().
+The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+'FSE_DTable' can then be used to decompress 'cSrc', with FSE_decompress_usingDTable().
+'cSrcSize' must be strictly correct, otherwise decompression will fail.
+FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=maxDstSize).
+If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
+*/
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* FSE_H */
+
+
+/* ******************************************************************
+   bitstream
+   Part of NewGen Entropy library
+   header file (to include)
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef BITSTREAM_H_MODULE
+#define BITSTREAM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*
+*  This API consists of small unitary functions, which highly benefit from being inlined.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+/**********************************************
+*  bitStream decompression API (read backward)
+**********************************************/
+typedef struct
+{
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} BIT_DStream_t;
+
+typedef enum { BIT_DStream_unfinished = 0,
+               BIT_DStream_endOfBuffer = 1,
+               BIT_DStream_completed = 2,
+               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
+
+
+/*
+* Start by invoking BIT_initDStream().
+* A chunk of the bitStream is then stored into a local register.
+* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+* You can then retrieve bitFields stored into the local register, **in reverse order**.
+* Local register is manually filled from memory by the BIT_reloadDStream() method.
+* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BIT_DStream_unfinished.
+* Otherwise, it can be less than that, so proceed accordingly.
+* Checking if DStream has reached its end can be performed with BIT_endOfDStream()
+*/
+
+
+/******************************************
+*  unsafe API
+******************************************/
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/****************************************************************
+*  Helper functions
+****************************************************************/
+MEM_STATIC unsigned BIT_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    unsigned r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
+/**********************************************************
+* bitStream decoding
+**********************************************************/
+
+/*!BIT_initDStream
+*  Initialize a BIT_DStream_t.
+*  @bitD : a pointer to an already allocated BIT_DStream_t structure
+*  @srcBuffer must point at the beginning of a bitStream
+*  @srcSize must be the exact size of the bitStream
+*  @result : size of stream (== srcSize) or an errorCode if a problem is detected
+*/
+MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
+    if (srcSize >=  sizeof(size_t))   /* normal case */
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+    }
+    else
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);/* fall-through */
+            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);/* fall-through */
+            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);/* fall-through */
+            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24; /* fall-through */
+            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16; /* fall-through */
+            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8; /* fall-through */
+            default: break;
+        }
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+/*!BIT_lookBits
+ * Provides next n bits from local register
+ * local register is not modified (bits are still present for next read/look)
+ * On 32-bits, maxNbBits==25
+ * On 64-bits, maxNbBits==57
+ * @return : value extracted
+ */
+MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+}
+
+/*! BIT_lookBitsFast :
+*   unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+}
+
+MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+/*!BIT_readBits
+ * Read next n bits from local register.
+ * pay attention to not read more than nbBits contained into local register.
+ * @return : extracted value.
+ */
+MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BIT_lookBits(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*!BIT_readBitsFast :
+*  unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BIT_lookBitsFast(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
+{
+    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
+        return BIT_DStream_overflow;
+
+    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
+    {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        return BIT_DStream_unfinished;
+    }
+    if (bitD->ptr == bitD->start)
+    {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
+        return BIT_DStream_completed;
+    }
+    {
+        U32 nbBytes = bitD->bitsConsumed >> 3;
+        BIT_DStream_status result = BIT_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start)
+        {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = BIT_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        return result;
+    }
+}
+
+/*! BIT_endOfDStream
+*   @return Tells if DStream has reached its exact end
+*/
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
+{
+    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* BITSTREAM_H_MODULE */
+
+
+
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef FSE_STATIC_H
+#define FSE_STATIC_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* *****************************************
+*  Static allocation
+*******************************************/
+/* FSE buffer bounds */
+#define FSE_NCOUNTBOUND 512
+#define FSE_BLOCKBOUND(size) (size + (size>>7))
+#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* It is possible to statically allocate FSE CTable/DTable as a table of unsigned using below macros */
+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+
+
+/* *****************************************
+*  FSE advanced API
+*******************************************/
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
+/* build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
+
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
+/* build a fake FSE_DTable, designed to always generate the same symbolValue */
+
+
+
+/* *****************************************
+*  FSE symbol decompression API
+*******************************************/
+typedef struct
+{
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+
+static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
+
+static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+
+static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
+
+/*!
+Let's now decompose FSE_decompress_usingDTable() into its unitary components.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BIT_DStream_t DStream;    // Stream context
+FSE_DState_t  DState;     // State context. Multiple ones are possible
+FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BIT_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSE_reloadDStream(&DStream);
+
+BIT_reloadDStream() result tells if there is still some more data to read from DStream.
+BIT_DStream_unfinished : there is still some data left into the DStream.
+BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BIT_reloadDStream(&DStream) >= BIT_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BIT_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSE_endOfDState(&DState);
+*/
+
+
+/* *****************************************
+*  FSE unsafe API
+*******************************************/
+static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/* *****************************************
+*  Implementation of inlined functions
+*******************************************/
+/* decompression */
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSE_DTableHeader;   /* sizeof U32 */
+
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSE_decode_t;   /* size == U32 */
+
+MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
+{
+    FSE_DTableHeader DTableH;
+    memcpy(&DTableH, dt, sizeof(DTableH));
+    DStatePtr->state = BIT_readBits(bitD, DTableH.tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32  nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BIT_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32 nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BIT_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* FSE_STATIC_H */
+
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/* **************************************************************
+*  Tuning parameters
+****************************************************************/
+/*!MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSE_MAX_MEMORY_USAGE 14
+#define FSE_DEFAULT_MEMORY_USAGE 13
+
+/*!FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSE_MAX_SYMBOL_VALUE 255
+
+
+/* **************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+#define FSE_DECODE_TYPE FSE_decode_t
+
+
+#endif   /* !FSE_COMMONDEFS_ONLY */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#    ifdef __GNUC__
+#      define FORCE_INLINE static inline __attribute__((always_inline))
+#    else
+#      define FORCE_INLINE static inline
+#    endif
+#  else
+#    define FORCE_INLINE static
+#  endif /* __STDC_VERSION__ */
+#endif
+
+
+/* **************************************************************
+*  Dependencies
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+
+/* ***************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/* **************************************************************
+*  Complex types
+****************************************************************/
+typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+
+/*-**************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
+
+
+static size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    FSE_DTableHeader DTableH;
+    void* const tdPtr = dt+1;   /* because dt is unsigned, 32-bits aligned on 32-bits */
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
+    const U32 tableSize = 1 << tableLog;
+    const U32 tableMask = tableSize-1;
+    const U32 step = FSE_tableStep(tableSize);
+    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
+    U32 position = 0;
+    U32 highThreshold = tableSize-1;
+    const S16 largeLimit= (S16)(1 << (tableLog-1));
+    U32 noLarge = 1;
+    U32 s;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    DTableH.tableLog = (U16)tableLog;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        if (normalizedCounter[s]==-1)
+        {
+            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+            symbolNext[s] = 1;
+        }
+        else
+        {
+            if (normalizedCounter[s] >= largeLimit) noLarge=0;
+            symbolNext[s] = normalizedCounter[s];
+        }
+    }
+
+    /* Spread symbols */
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        int i;
+        for (i=0; i<normalizedCounter[s]; i++)
+        {
+            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+            position = (position + step) & tableMask;
+            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }
+    }
+
+    if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+
+    /* Build Decoding table */
+    {
+        U32 i;
+        for (i=0; i<tableSize; i++)
+        {
+            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[i].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) );
+            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
+        }
+    }
+
+    DTableH.fastMode = (U16)noLarge;
+    memcpy(dt, &DTableH, sizeof(DTableH));
+    return 0;
+}
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+/******************************************
+*  FSE helper functions
+******************************************/
+static unsigned FSE_isError(size_t code) { return ERR_isError(code); }
+
+
+/****************************************************************
+*  FSE NCount encoding-decoding
+****************************************************************/
+static short FSE_abs(short a)
+{
+    return a<0 ? -a : a;
+}
+
+static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    if (hbSize < 4) return ERROR(srcSize_wrong);
+    bitStream = MEM_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr))
+    {
+        if (previous0)
+        {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF)
+            {
+                n0+=24;
+                if (ip < iend-5)
+                {
+                    ip+=2;
+                    bitStream = MEM_readLE32(ip) >> bitCount;
+                }
+                else
+                {
+                    bitStream >>= 16;
+                    bitCount+=16;
+                }
+            }
+            while ((bitStream & 3) == 3)
+            {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+            {
+                ip += bitCount>>3;
+                bitCount &= 7;
+                bitStream = MEM_readLE32(ip) >> bitCount;
+            }
+            else
+                bitStream >>= 2;
+        }
+        {
+            const short max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max)
+            {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            }
+            else
+            {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSE_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold)
+            {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            {
+                if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+                {
+                    ip += bitCount>>3;
+                    bitCount &= 7;
+                }
+                else
+                {
+                    bitCount -= (int)(8 * (iend - 4 - ip));
+                    ip = iend - 4;
+                }
+                bitStream = MEM_readLE32(ip) >> (bitCount & 31);
+            }
+        }
+    }
+    if (remaining != 1) return ERROR(GENERIC);
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong);
+    return ip-istart;
+}
+
+
+/*********************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSE_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    BIT_DStream_t bitD;
+    FSE_DState_t state1;
+    FSE_DState_t state2;
+    size_t errorCode;
+
+    /* Init */
+    errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+    if (FSE_isError(errorCode)) return errorCode;
+
+    FSE_initDState(&state1, &bitD, dt);
+    FSE_initDState(&state2, &bitD, dt);
+
+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op<olimit) ; op+=4)
+    {
+        op[0] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[1] = FSE_GETSYMBOL(&state2);
+
+        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[3] = FSE_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
+    while (1)
+    {
+        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state1);
+
+        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state2);
+    }
+
+    /* end ? */
+    if (BIT_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
+        return op-ostart;
+
+    if (op==omax) return ERROR(dstSize_tooSmall);   /* dst buffer is full, but cSrc unfinished */
+
+    return ERROR(corruption_detected);
+}
+
+
+static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSE_DTable* dt)
+{
+    FSE_DTableHeader DTableH;
+    U32 fastMode;
+
+    memcpy(&DTableH, dt, sizeof(DTableH));
+    fastMode = DTableH.fastMode;
+
+    /* select fast mode (static) */
+    if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSE_MAX_SYMBOL_VALUE+1];
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    size_t errorCode;
+
+    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */
+
+    /* normal FSE decoding mode */
+    errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return errorCode;
+
+    /* always return, even if it is an error code */
+    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
+}
+
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
+
+
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   header file
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef HUFF0_H
+#define HUFF0_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* ****************************************
+*  Dependency
+******************************************/
+#include <stddef.h>    /* size_t */
+
+
+/* ****************************************
+*  Huff0 simple functions
+******************************************/
+static size_t HUF_decompress(void* dst,  size_t dstSize,
+                const void* cSrc, size_t cSrcSize);
+/*!
+HUF_decompress():
+    Decompress Huff0 data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'dstSize'.
+    'dstSize' must be the exact size of original (uncompressed) data.
+    Note : in contrast with FSE, HUF_decompress can regenerate RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, because it knows size to regenerate.
+    @return : size of regenerated data (== dstSize)
+              or an error code, which can be tested using HUF_isError()
+*/
+
+
+/* ****************************************
+*  Tool functions
+******************************************/
+/* Error Management */
+static unsigned    HUF_isError(size_t code);        /* tells if a return value is an error code */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* HUFF0_H */
+
+
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef HUFF0_STATIC_H
+#define HUFF0_STATIC_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/* ****************************************
+*  Static allocation macros
+******************************************/
+/* static allocation of Huff0's DTable */
+#define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<maxTableLog))  /* nb Cells; use unsigned short for X2, unsigned int for X4 */
+#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+        unsigned short DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
+        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUF_CREATE_STATIC_DTABLEX6(DTable, maxTableLog) \
+        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog) * 3 / 2] = { maxTableLog }
+
+
+/* ****************************************
+*  Advanced decompression functions
+******************************************/
+static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbols decoder */
+
+
+/* ****************************************
+*  Huff0 detailed API
+******************************************/
+/*!
+HUF_decompress() does the following:
+1. select the decompression algorithm (X2, X4, X6) based on pre-computed heuristics
+2. build Huffman table from save, using HUF_readDTableXn()
+3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable
+
+*/
+static size_t HUF_readDTableX2 (unsigned short* DTable, const void* src, size_t srcSize);
+static size_t HUF_readDTableX4 (unsigned* DTable, const void* src, size_t srcSize);
+
+static size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned short* DTable);
+static size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* HUFF0_STATIC_H */
+
+
+
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+/* inline is defined */
+#elif defined(_MSC_VER)
+#  define inline __inline
+#else
+#  define inline /* disable inline */
+#endif
+
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#endif
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+
+/* **************************************************************
+*  Constants
+****************************************************************/
+#define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#define HUF_MAX_TABLELOG  12           /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
+#define HUF_DEFAULT_TABLELOG  HUF_MAX_TABLELOG   /* tableLog by default, when not specified */
+#define HUF_MAX_SYMBOL_VALUE 255
+#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
+#  error "HUF_MAX_TABLELOG is too large !"
+#endif
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+static unsigned HUF_isError(size_t code) { return ERR_isError(code); }
+#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+
+/*-*******************************************************
+*  Huff0 : Huffman block decompression
+*********************************************************/
+typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2;   /* single-symbol decoding */
+
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4;  /* double-symbols decoding */
+
+typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+
+/*! HUF_readStats
+    Read compact Huffman tree, saved by HUF_writeCTable
+    @huffWeight : destination buffer
+    @return : size read from `src`
+*/
+static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                            U32* nbSymbolsPtr, U32* tableLogPtr,
+                            const void* src, size_t srcSize)
+{
+    U32 weightTotal;
+    U32 tableLog;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize;
+    size_t oSize;
+    U32 n;
+
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
+    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128)  /* special header */
+    {
+        if (iSize >= (242))   /* RLE */
+        {
+            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
+            oSize = l[iSize-242];
+            memset(huffWeight, 1, hwSize);
+            iSize = 0;
+        }
+        else   /* Incompressible */
+        {
+            oSize = iSize - 127;
+            iSize = ((oSize+1)/2);
+            if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+            if (oSize >= hwSize) return ERROR(corruption_detected);
+            ip += 1;
+            for (n=0; n<oSize; n+=2)
+            {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+            }
+        }
+    }
+    else  /* header compressed with FSE (normal case) */
+    {
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
+        if (FSE_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32));
+    weightTotal = 0;
+    for (n=0; n<oSize; n++)
+    {
+        if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+        rankStats[huffWeight[n]]++;
+        weightTotal += (1 << huffWeight[n]) >> 1;
+    }
+    if (weightTotal == 0) return ERROR(corruption_detected);
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    tableLog = BIT_highbit32(weightTotal) + 1;
+    if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+    {
+        U32 total = 1 << tableLog;
+        U32 rest = total - weightTotal;
+        U32 verif = 1 << BIT_highbit32(rest);
+        U32 lastWeight = BIT_highbit32(rest) + 1;
+        if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+        huffWeight[oSize] = (BYTE)lastWeight;
+        rankStats[lastWeight]++;
+    }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32)(oSize+1);
+    *tableLogPtr = tableLog;
+    return iSize+1;
+}
+
+
+/**************************/
+/* single-symbol decoding */
+/**************************/
+
+static size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
+{
+    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    size_t iSize;
+    U32 nbSymbols = 0;
+    U32 n;
+    U32 nextRankStart;
+    void* const dtPtr = DTable + 1;
+    HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16));   /* if compilation fails here, assertion is false */
+    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge);   /* DTable is too small */
+    DTable[0] = (U16)tableLog;   /* maybe should separate sizeof DTable, as allocated, from used size of DTable, in case of DTable re-use */
+
+    /* Prepare ranks */
+    nextRankStart = 0;
+    for (n=1; n<=tableLog; n++)
+    {
+        U32 current = nextRankStart;
+        nextRankStart += (rankVal[n] << (n-1));
+        rankVal[n] = current;
+    }
+
+    /* fill DTable */
+    for (n=0; n<nbSymbols; n++)
+    {
+        const U32 w = huffWeight[n];
+        const U32 length = (1 << w) >> 1;
+        U32 i;
+        HUF_DEltX2 D;
+        D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
+        for (i = rankVal[w]; i < rankVal[w] + length; i++)
+            dt[i] = D;
+        rankVal[w] += length;
+    }
+
+    return iSize;
+}
+
+static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+        const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+        const BYTE c = dt[val].byte;
+        BIT_skipBits(Dstream, dt[val].nbBits);
+        return c;
+}
+
+#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+    *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4))
+    {
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    /* no more data to retrieve from bitstream, hence no need to reload */
+    while (p < pEnd)
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    return pEnd-pStart;
+}
+
+
+static size_t HUF_decompress4X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U16* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable;
+        const HUF_DEltX2* const dt = ((const HUF_DEltX2*)dtPtr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; )
+        {
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+    size_t errorCode;
+
+    errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    return HUF_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/***************************/
+/* double-symbols decoding */
+/***************************/
+
+static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed,
+                           const U32* rankValOrigin, const int minWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
+                           U32 nbBitsBaseline, U16 baseSeq)
+{
+    HUF_DEltX4 DElt;
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    U32 s;
+
+    /* get pre-calculated rankVal */
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1)
+    {
+        U32 i, skipSize = rankVal[minWeight];
+        MEM_writeLE16(&(DElt.sequence), baseSeq);
+        DElt.nbBits   = (BYTE)(consumed);
+        DElt.length   = 1;
+        for (i = 0; i < skipSize; i++)
+            DTable[i] = DElt;
+    }
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++)   /* note : sortedSymbols already skipped */
+    {
+        const U32 symbol = sortedSymbols[s].symbol;
+        const U32 weight = sortedSymbols[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 length = 1 << (sizeLog-nbBits);
+        const U32 start = rankVal[weight];
+        U32 i = start;
+        const U32 end = start + length;
+
+        MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+        DElt.nbBits = (BYTE)(nbBits + consumed);
+        DElt.length = 2;
+        do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
+
+        rankVal[weight] += length;
+    }
+}
+
+typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1];
+
+static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
+                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32 nbBitsBaseline)
+{
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits  = nbBitsBaseline - maxWeight;
+    U32 s;
+
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++)
+    {
+        const U16 symbol = sortedList[s].symbol;
+        const U32 weight = sortedList[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 start = rankVal[weight];
+        const U32 length = 1 << (targetLog-nbBits);
+
+        if (targetLog-nbBits >= minBits)   /* enough room for a second symbol */
+        {
+            U32 sortedRank;
+            int minWeight = nbBits + scaleLog;
+            if (minWeight < 1) minWeight = 1;
+            sortedRank = rankStart[minWeight];
+            HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
+                           rankValOrigin[nbBits], minWeight,
+                           sortedList+sortedRank, sortedListSize-sortedRank,
+                           nbBitsBaseline, symbol);
+        }
+        else
+        {
+            U32 i;
+            const U32 end = start + length;
+            HUF_DEltX4 DElt;
+
+            MEM_writeLE16(&(DElt.sequence), symbol);
+            DElt.nbBits   = (BYTE)(nbBits);
+            DElt.length   = 1;
+            for (i = start; i < end; i++)
+                DTable[i] = DElt;
+        }
+        rankVal[weight] += length;
+    }
+}
+
+static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1];
+    sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 };
+    U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    rankVal_t rankVal;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    const U32 memLog = DTable[0];
+    size_t iSize;
+    void* dtPtr = DTable;
+    HUF_DEltX4* const dt = ((HUF_DEltX4*)dtPtr) + 1;
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32));   /* if compilation fails here, assertion is false */
+    if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--)
+        { if (!maxW) return ERROR(GENERIC); }  /* necessarily finds a solution before maxW==0 */
+
+    /* Get start index of each weight */
+    {
+        U32 w, nextRankStart = 0;
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {
+        U32 s;
+        for (s=0; s<nbSymbols; s++)
+        {
+            U32 w = weightList[s];
+            U32 r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {
+        const U32 minBits = tableLog+1 - maxW;
+        U32 nextRankVal = 0;
+        U32 w, consumed;
+        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
+        U32* rankVal0 = rankVal[0];
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankVal;
+            nextRankVal += rankStats[w] << (w+rescale);
+            rankVal0[w] = current;
+        }
+        for (consumed = minBits; consumed <= memLog - minBits; consumed++)
+        {
+            U32* rankValPtr = rankVal[consumed];
+            for (w = 1; w <= maxW; w++)
+            {
+                rankValPtr[w] = rankVal0[w] >> consumed;
+            }
+        }
+    }
+
+    HUF_fillDTableX4(dt, memLog,
+                   sortedSymbol, sizeOfSort,
+                   rankStart0, rankVal, maxW,
+                   tableLog+1);
+
+    return iSize;
+}
+
+
+static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 2);
+    BIT_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
+
+static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 1);
+    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
+    else
+    {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8))
+        {
+            BIT_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+        }
+    }
+    return 1;
+}
+
+
+#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 8 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd-7))
+    {
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-2))
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+
+    while (p <= pEnd-2)
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    if (p < pEnd)
+        p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
+
+    return p-pStart;
+}
+
+static size_t HUF_decompress4X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable;
+        const HUF_DEltX4* const dt = ((const HUF_DEltX4*)dtPtr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; )
+        {
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX4(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/**********************************/
+/* Generic decompression selector */
+/**********************************/
+
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+{
+    /* single, double, quad */
+    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
+    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
+    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
+    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
+    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
+    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
+    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
+    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
+    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
+    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
+    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
+    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+};
+
+typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+
+static size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    static const decompressionAlgo decompress[3] = { HUF_decompress4X2, HUF_decompress4X4, NULL };
+    /* estimate decompression time */
+    U32 Q;
+    const U32 D256 = (U32)(dstSize >> 8);
+    U32 Dtime[3];
+    U32 algoNb = 0;
+    int n;
+
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    /* decoder timing evaluation */
+    Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */
+    for (n=0; n<3; n++)
+        Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256);
+
+    Dtime[1] += Dtime[1] >> 4; Dtime[2] += Dtime[2] >> 3; /* advantage to algorithms using less memory, for cache eviction */
+
+    if (Dtime[1] < Dtime[0]) algoNb = 1;
+
+    return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
+
+    //return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);   /* multi-streams single-symbol decoding */
+    //return HUF_decompress4X4(dst, dstSize, cSrc, cSrcSize);   /* multi-streams double-symbols decoding */
+    //return HUF_decompress4X6(dst, dstSize, cSrc, cSrcSize);   /* multi-streams quad-symbols decoding */
+}
+
+
+
+#endif   /* ZSTD_CCOMMON_H_MODULE */
+
+
+/*
+    zstd - decompression module fo v0.4 legacy format
+    Copyright (C) 2015-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+ * HEAPMODE :
+ * Select how default decompression function ZSTD_decompress() will allocate memory,
+ * in memory stack (0), or in memory heap (1, requires malloc())
+ */
+#ifndef ZSTD_HEAPMODE
+#  define ZSTD_HEAPMODE 1
+#endif
+
+
+/* *******************************************************
+*  Includes
+*********************************************************/
+#include <stdlib.h>      /* calloc */
+#include <string.h>      /* memcpy, memmove */
+#include <stdio.h>       /* debug : printf */
+
+
+/* *******************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#endif
+
+
+/* *************************************
+*  Local types
+***************************************/
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+
+/* *******************************************************
+*  Memory operations
+**********************************************************/
+static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+
+/* *************************************
+*  Error Management
+***************************************/
+
+/*! ZSTD_isError
+*   tells if a return value is an error code */
+static unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
+
+
+/* *************************************************************
+*   Context management
+***************************************************************/
+typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
+               ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock } ZSTD_dStage;
+
+struct ZSTDv04_Dctx_s
+{
+    U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
+    U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
+    U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
+    const void* previousDstEnd;
+    const void* base;
+    const void* vBase;
+    const void* dictEnd;
+    size_t expected;
+    size_t headerSize;
+    ZSTD_parameters params;
+    blockType_t bType;
+    ZSTD_dStage stage;
+    const BYTE* litPtr;
+    size_t litSize;
+    BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */];
+    BYTE headerBuffer[ZSTD_frameHeaderSize_max];
+};  /* typedef'd to ZSTD_DCtx within "zstd_static.h" */
+
+static size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx)
+{
+    dctx->expected = ZSTD_frameHeaderSize_min;
+    dctx->stage = ZSTDds_getFrameHeaderSize;
+    dctx->previousDstEnd = NULL;
+    dctx->base = NULL;
+    dctx->vBase = NULL;
+    dctx->dictEnd = NULL;
+    return 0;
+}
+
+static ZSTD_DCtx* ZSTD_createDCtx(void)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx));
+    if (dctx==NULL) return NULL;
+    ZSTD_resetDCtx(dctx);
+    return dctx;
+}
+
+static size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
+{
+    free(dctx);
+    return 0;
+}
+
+
+/* *************************************************************
+*   Decompression section
+***************************************************************/
+/** ZSTD_decodeFrameHeader_Part1
+*   decode the 1st part of the Frame Header, which tells Frame Header size.
+*   srcSize must be == ZSTD_frameHeaderSize_min
+*   @return : the full size of the Frame Header */
+static size_t ZSTD_decodeFrameHeader_Part1(ZSTD_DCtx* zc, const void* src, size_t srcSize)
+{
+    U32 magicNumber;
+    if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
+    zc->headerSize = ZSTD_frameHeaderSize_min;
+    return zc->headerSize;
+}
+
+
+static size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize)
+{
+    U32 magicNumber;
+    if (srcSize < ZSTD_frameHeaderSize_min) return ZSTD_frameHeaderSize_max;
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
+    memset(params, 0, sizeof(*params));
+    params->windowLog = (((const BYTE*)src)[4] & 15) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
+    if ((((const BYTE*)src)[4] >> 4) != 0) return ERROR(frameParameter_unsupported);   /* reserved bits */
+    return 0;
+}
+
+/** ZSTD_decodeFrameHeader_Part2
+*   decode the full Frame Header
+*   srcSize must be the size provided by ZSTD_decodeFrameHeader_Part1
+*   @return : 0, or an error code, which can be tested using ZSTD_isError() */
+static size_t ZSTD_decodeFrameHeader_Part2(ZSTD_DCtx* zc, const void* src, size_t srcSize)
+{
+    size_t result;
+    if (srcSize != zc->headerSize) return ERROR(srcSize_wrong);
+    result = ZSTD_getFrameParams(&(zc->params), src, srcSize);
+    if ((MEM_32bits()) && (zc->params.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bits);
+    return result;
+}
+
+
+static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = (const BYTE* const)src;
+    BYTE headerFlags;
+    U32 cSize;
+
+    if (srcSize < 3) return ERROR(srcSize_wrong);
+
+    headerFlags = *in;
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+
+    bpPtr->blockType = (blockType_t)(headerFlags >> 6);
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+static size_t ZSTD_copyRawBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+/** ZSTD_decompressLiterals
+    @return : nb of bytes read from src, or an error code*/
+static size_t ZSTD_decompressLiterals(void* dst, size_t* maxDstSizePtr,
+                                const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+
+    const size_t litSize = (MEM_readLE32(src) & 0x1FFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+    const size_t litCSize = (MEM_readLE32(ip+2) & 0xFFFFFF) >> 5;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+
+    if (litSize > *maxDstSizePtr) return ERROR(corruption_detected);
+    if (litCSize + 5 > srcSize) return ERROR(corruption_detected);
+
+    if (HUF_isError(HUF_decompress(dst, litSize, ip+5, litCSize))) return ERROR(corruption_detected);
+
+    *maxDstSizePtr = litSize;
+    return litCSize + 5;
+}
+
+
+/** ZSTD_decodeLiteralsBlock
+    @return : nb of bytes read from src (< srcSize ) */
+static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+                          const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
+{
+    const BYTE* const istart = (const BYTE*) src;
+
+    /* any compressed block with literals segment must be at least this size */
+    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
+
+    switch(*istart & 3)
+    {
+    /* compressed */
+    case 0:
+        {
+            size_t litSize = BLOCKSIZE;
+            const size_t readSize = ZSTD_decompressLiterals(dctx->litBuffer, &litSize, src, srcSize);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litSize = litSize;
+            memset(dctx->litBuffer + dctx->litSize, 0, 8);
+            return readSize;   /* works if it's an error too */
+        }
+    case IS_RAW:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > srcSize-11)   /* risk of reading too far with wildcopy */
+            {
+                if (litSize > srcSize-3) return ERROR(corruption_detected);
+                memcpy(dctx->litBuffer, istart, litSize);
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litSize = litSize;
+                memset(dctx->litBuffer + dctx->litSize, 0, 8);
+                return litSize+3;
+            }
+            /* direct reference into compressed stream */
+            dctx->litPtr = istart+3;
+            dctx->litSize = litSize;
+            return litSize+3;        }
+    case IS_RLE:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
+            memset(dctx->litBuffer, istart[3], litSize + 8);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litSize = litSize;
+            return 4;
+        }
+    default:
+        return ERROR(corruption_detected);   /* forbidden nominal case */
+    }
+}
+
+
+static size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
+                         FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
+                         const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    const BYTE* const iend = istart + srcSize;
+    U32 LLtype, Offtype, MLtype;
+    U32 LLlog, Offlog, MLlog;
+    size_t dumpsLength;
+
+    /* check */
+    if (srcSize < 5) return ERROR(srcSize_wrong);
+
+    /* SeqHead */
+    *nbSeq = MEM_readLE16(ip); ip+=2;
+    LLtype  = *ip >> 6;
+    Offtype = (*ip >> 4) & 3;
+    MLtype  = (*ip >> 2) & 3;
+    if (*ip & 2)
+    {
+        dumpsLength  = ip[2];
+        dumpsLength += ip[1] << 8;
+        ip += 3;
+    }
+    else
+    {
+        dumpsLength  = ip[1];
+        dumpsLength += (ip[0] & 1) << 8;
+        ip += 2;
+    }
+    *dumpsPtr = ip;
+    ip += dumpsLength;
+    *dumpsLengthPtr = dumpsLength;
+
+    /* check */
+    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
+
+    /* sequences */
+    {
+        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL >= MaxOff */
+        size_t headerSize;
+
+        /* Build DTables */
+        switch(LLtype)
+        {
+        case bt_rle :
+            LLlog = 0;
+            FSE_buildDTable_rle(DTableLL, *ip++); break;
+        case bt_raw :
+            LLlog = LLbits;
+            FSE_buildDTable_raw(DTableLL, LLbits); break;
+        default :
+            {   U32 max = MaxLL;
+                headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
+                if (FSE_isError(headerSize)) return ERROR(GENERIC);
+                if (LLlog > LLFSELog) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSE_buildDTable(DTableLL, norm, max, LLlog);
+        }   }
+
+        switch(Offtype)
+        {
+        case bt_rle :
+            Offlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong);   /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */
+            break;
+        case bt_raw :
+            Offlog = Offbits;
+            FSE_buildDTable_raw(DTableOffb, Offbits); break;
+        default :
+            {   U32 max = MaxOff;
+                headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
+                if (FSE_isError(headerSize)) return ERROR(GENERIC);
+                if (Offlog > OffFSELog) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSE_buildDTable(DTableOffb, norm, max, Offlog);
+        }   }
+
+        switch(MLtype)
+        {
+        case bt_rle :
+            MLlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableML, *ip++); break;
+        case bt_raw :
+            MLlog = MLbits;
+            FSE_buildDTable_raw(DTableML, MLbits); break;
+        default :
+            {   U32 max = MaxML;
+                headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
+                if (FSE_isError(headerSize)) return ERROR(GENERIC);
+                if (MLlog > MLFSELog) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSE_buildDTable(DTableML, norm, max, MLlog);
+    }   }   }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t offset;
+    size_t matchLength;
+} seq_t;
+
+typedef struct {
+    BIT_DStream_t DStream;
+    FSE_DState_t stateLL;
+    FSE_DState_t stateOffb;
+    FSE_DState_t stateML;
+    size_t prevOffset;
+    const BYTE* dumps;
+    const BYTE* dumpsEnd;
+} seqState_t;
+
+
+static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
+{
+    size_t litLength;
+    size_t prevOffset;
+    size_t offset;
+    size_t matchLength;
+    const BYTE* dumps = seqState->dumps;
+    const BYTE* const de = seqState->dumpsEnd;
+
+    /* Literal length */
+    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
+    prevOffset = litLength ? seq->offset : seqState->prevOffset;
+    if (litLength == MaxLL) {
+        U32 add = *dumps++;
+        if (add < 255) litLength += add;
+        else {
+            litLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
+            dumps += 3;
+        }
+        if (dumps > de) { litLength = MaxLL+255; }  /* late correction, to avoid using uninitialized memory */
+        if (dumps >= de) { dumps = de-1; }  /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+
+    /* Offset */
+    {   static const U32 offsetPrefix[MaxOff+1] = {
+                1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
+                512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
+                524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
+        U32 offsetCode, nbBits;
+        offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));   /* <= maxOff, by table construction */
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        nbBits = offsetCode - 1;
+        if (offsetCode==0) nbBits = 0;   /* cmove */
+        offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits);
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        if (offsetCode==0) offset = prevOffset;   /* cmove */
+        if (offsetCode | !litLength) seqState->prevOffset = seq->offset;   /* cmove */
+    }
+
+    /* MatchLength */
+    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
+    if (matchLength == MaxML) {
+        U32 add = *dumps++;
+        if (add < 255) matchLength += add;
+        else {
+            matchLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
+            dumps += 3;
+        }
+        if (dumps > de) { matchLength = MaxML+255; }  /* late correction, to avoid using uninitialized memory */
+        if (dumps >= de) { dumps = de-1; }  /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+    matchLength += MINMATCH;
+
+    /* save result */
+    seq->litLength = litLength;
+    seq->offset = offset;
+    seq->matchLength = matchLength;
+    seqState->dumps = dumps;
+}
+
+
+static size_t ZSTD_execSequence(BYTE* op,
+                                BYTE* const oend, seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit,
+                                const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
+{
+    static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
+    static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* substracted */
+    BYTE* const oLitEnd = op + sequence.litLength;
+    const size_t sequenceLength = sequence.litLength + sequence.matchLength;
+    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_8 = oend-8;
+    const BYTE* const litEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+
+    /* check */
+    if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */
+    if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
+    if (litEnd > litLimit) return ERROR(corruption_detected);   /* risk read beyond lit buffer */
+
+    /* copy Literals */
+    ZSTD_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
+    op = oLitEnd;
+    *litPtr = litEnd;   /* update for next sequence */
+
+    /* copy Match */
+    if (sequence.offset > (size_t)(oLitEnd - base))
+    {
+        /* offset beyond prefix */
+        if (sequence.offset > (size_t)(oLitEnd - vBase))
+            return ERROR(corruption_detected);
+        match = dictEnd - (base-match);
+        if (match + sequence.matchLength <= dictEnd)
+        {
+            memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {
+            size_t length1 = dictEnd - match;
+            memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = base;
+            if (op > oend_8 || sequence.matchLength < MINMATCH) {
+              while (op < oMatchEnd) *op++ = *match++;
+              return sequenceLength;
+            }
+        }
+    }
+    /* Requirement: op <= oend_8 */
+
+    /* match within prefix */
+    if (sequence.offset < 8) {
+        /* close range match, overlap */
+        const int sub2 = dec64table[sequence.offset];
+        op[0] = match[0];
+        op[1] = match[1];
+        op[2] = match[2];
+        op[3] = match[3];
+        match += dec32table[sequence.offset];
+        ZSTD_copy4(op+4, match);
+        match -= sub2;
+    } else {
+        ZSTD_copy8(op, match);
+    }
+    op += 8; match += 8;
+
+    if (oMatchEnd > oend-(16-MINMATCH))
+    {
+        if (op < oend_8)
+        {
+            ZSTD_wildcopy(op, match, oend_8 - op);
+            match += oend_8 - op;
+            op = oend_8;
+        }
+        while (op < oMatchEnd) *op++ = *match++;
+    }
+    else
+    {
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
+    }
+    return sequenceLength;
+}
+
+
+static size_t ZSTD_decompressSequences(
+                               ZSTD_DCtx* dctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize)
+{
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t errorCode, dumpsLength;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    int nbSeq;
+    const BYTE* dumps;
+    U32* DTableLL = dctx->LLTable;
+    U32* DTableML = dctx->MLTable;
+    U32* DTableOffb = dctx->OffTable;
+    const BYTE* const base = (const BYTE*) (dctx->base);
+    const BYTE* const vBase = (const BYTE*) (dctx->vBase);
+    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+
+    /* Build Decoding Tables */
+    errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
+                                      DTableLL, DTableML, DTableOffb,
+                                      ip, iend-ip);
+    if (ZSTD_isError(errorCode)) return errorCode;
+    ip += errorCode;
+
+    /* Regen sequences */
+    {
+        seq_t sequence;
+        seqState_t seqState;
+
+        memset(&sequence, 0, sizeof(sequence));
+        sequence.offset = 4;
+        seqState.dumps = dumps;
+        seqState.dumpsEnd = dumps + dumpsLength;
+        seqState.prevOffset = 4;
+        errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip);
+        if (ERR_isError(errorCode)) return ERROR(corruption_detected);
+        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
+        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
+        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
+
+        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; )
+        {
+            size_t oneSeqSize;
+            nbSeq--;
+            ZSTD_decodeSequence(&sequence, &seqState);
+            oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd);
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* check if reached exact end */
+        if ( !BIT_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected);   /* DStream should be entirely and exactly consumed; otherwise data is corrupted */
+
+        /* last literal segment */
+        {
+            size_t lastLLSize = litEnd - litPtr;
+            if (litPtr > litEnd) return ERROR(corruption_detected);
+            if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
+            if (op != litPtr) memcpy(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+
+static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
+{
+    if (dst != dctx->previousDstEnd)   /* not contiguous */
+    {
+        dctx->dictEnd = dctx->previousDstEnd;
+        dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
+        dctx->base = dst;
+        dctx->previousDstEnd = dst;
+    }
+}
+
+
+static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
+                            void* dst, size_t maxDstSize,
+                      const void* src, size_t srcSize)
+{
+    /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+
+    /* Decode literals sub-block */
+    size_t litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
+    if (ZSTD_isError(litCSize)) return litCSize;
+    ip += litCSize;
+    srcSize -= litCSize;
+
+    return ZSTD_decompressSequences(dctx, dst, maxDstSize, ip, srcSize);
+}
+
+
+static size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx,
+                                 void* dst, size_t maxDstSize,
+                                 const void* src, size_t srcSize,
+                                 const void* dict, size_t dictSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* iend = ip + srcSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t remainingSize = srcSize;
+    blockProperties_t blockProperties;
+
+    /* init */
+    ZSTD_resetDCtx(ctx);
+    if (dict)
+    {
+        ZSTD_decompress_insertDictionary(ctx, dict, dictSize);
+        ctx->dictEnd = ctx->previousDstEnd;
+        ctx->vBase = (const char*)dst - ((const char*)(ctx->previousDstEnd) - (const char*)(ctx->base));
+        ctx->base = dst;
+    }
+    else
+    {
+        ctx->vBase = ctx->base = ctx->dictEnd = dst;
+    }
+
+    /* Frame Header */
+    {
+        size_t frameHeaderSize;
+        if (srcSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+        frameHeaderSize = ZSTD_decodeFrameHeader_Part1(ctx, src, ZSTD_frameHeaderSize_min);
+        if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
+        if (srcSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+        ip += frameHeaderSize; remainingSize -= frameHeaderSize;
+        frameHeaderSize = ZSTD_decodeFrameHeader_Part2(ctx, src, frameHeaderSize);
+        if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
+    }
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t decodedSize=0;
+        size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            decodedSize = ZSTD_decompressBlock_internal(ctx, op, oend-op, ip, cBlockSize);
+            break;
+        case bt_raw :
+            decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet supported */
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return ERROR(srcSize_wrong);
+            break;
+        default:
+            return ERROR(GENERIC);   /* impossible */
+        }
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        if (ZSTD_isError(decodedSize)) return decodedSize;
+        op += decodedSize;
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return op-ostart;
+}
+
+static size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    size_t remainingSize = srcSize;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    if (srcSize < ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);
+    if (MEM_readLE32(src) != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
+    ip += ZSTD_frameHeaderSize_min; remainingSize -= ZSTD_frameHeaderSize_min;
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
+        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return ip - (const BYTE*)src;
+}
+
+/* ******************************
+*  Streaming Decompression API
+********************************/
+static size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx)
+{
+    return dctx->expected;
+}
+
+static size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    /* Sanity check */
+    if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
+    ZSTD_checkContinuity(ctx, dst);
+
+    /* Decompress : frame header; part 1 */
+    switch (ctx->stage)
+    {
+    case ZSTDds_getFrameHeaderSize :
+        /* get frame header size */
+        if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);   /* impossible */
+        ctx->headerSize = ZSTD_decodeFrameHeader_Part1(ctx, src, ZSTD_frameHeaderSize_min);
+        if (ZSTD_isError(ctx->headerSize)) return ctx->headerSize;
+        memcpy(ctx->headerBuffer, src, ZSTD_frameHeaderSize_min);
+        if (ctx->headerSize > ZSTD_frameHeaderSize_min) return ERROR(GENERIC);   /* impossible */
+        ctx->expected = 0;   /* not necessary to copy more */
+        /* fallthrough */
+    case ZSTDds_decodeFrameHeader:
+        /* get frame header */
+        {   size_t const result = ZSTD_decodeFrameHeader_Part2(ctx, ctx->headerBuffer, ctx->headerSize);
+            if (ZSTD_isError(result)) return result;
+            ctx->expected = ZSTD_blockHeaderSize;
+            ctx->stage = ZSTDds_decodeBlockHeader;
+            return 0;
+        }
+    case ZSTDds_decodeBlockHeader:
+        /* Decode block header */
+        {   blockProperties_t bp;
+            size_t const blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+            if (ZSTD_isError(blockSize)) return blockSize;
+            if (bp.blockType == bt_end)
+            {
+                ctx->expected = 0;
+                ctx->stage = ZSTDds_getFrameHeaderSize;
+            }
+            else
+            {
+                ctx->expected = blockSize;
+                ctx->bType = bp.blockType;
+                ctx->stage = ZSTDds_decompressBlock;
+            }
+            return 0;
+        }
+    case ZSTDds_decompressBlock:
+        {
+            /* Decompress : block content */
+            size_t rSize;
+            switch(ctx->bType)
+            {
+            case bt_compressed:
+                rSize = ZSTD_decompressBlock_internal(ctx, dst, maxDstSize, src, srcSize);
+                break;
+            case bt_raw :
+                rSize = ZSTD_copyRawBlock(dst, maxDstSize, src, srcSize);
+                break;
+            case bt_rle :
+                return ERROR(GENERIC);   /* not yet handled */
+                break;
+            case bt_end :   /* should never happen (filtered at phase 1) */
+                rSize = 0;
+                break;
+            default:
+                return ERROR(GENERIC);
+            }
+            ctx->stage = ZSTDds_decodeBlockHeader;
+            ctx->expected = ZSTD_blockHeaderSize;
+            ctx->previousDstEnd = (char*)dst + rSize;
+            return rSize;
+        }
+    default:
+        return ERROR(GENERIC);   /* impossible */
+    }
+}
+
+
+static void ZSTD_decompress_insertDictionary(ZSTD_DCtx* ctx, const void* dict, size_t dictSize)
+{
+    ctx->dictEnd = ctx->previousDstEnd;
+    ctx->vBase = (const char*)dict - ((const char*)(ctx->previousDstEnd) - (const char*)(ctx->base));
+    ctx->base = dict;
+    ctx->previousDstEnd = (const char*)dict + dictSize;
+}
+
+
+
+/*
+    Buffered version of Zstd compression library
+    Copyright (C) 2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* The objects defined into this file should be considered experimental.
+ * They are not labelled stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risk of future changes.
+ */
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stdlib.h>
+
+
+/** ************************************************
+*  Streaming decompression
+*
+*  A ZBUFF_DCtx object is required to track streaming operation.
+*  Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources.
+*  Use ZBUFF_decompressInit() to start a new decompression operation.
+*  ZBUFF_DCtx objects can be reused multiple times.
+*
+*  Use ZBUFF_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *maxDstSizePtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input.
+*  The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst .
+*  return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
+*            or 0 when a frame is completely decoded
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory)
+*  output : 128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded.
+*  input : just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* **************************************************/
+
+typedef enum { ZBUFFds_init, ZBUFFds_readHeader, ZBUFFds_loadHeader, ZBUFFds_decodeHeader,
+               ZBUFFds_read, ZBUFFds_load, ZBUFFds_flush } ZBUFF_dStage;
+
+/* *** Resource management *** */
+
+#define ZSTD_frameHeaderSize_max 5   /* too magical, should come from reference */
+struct ZBUFFv04_DCtx_s {
+    ZSTD_DCtx* zc;
+    ZSTD_parameters params;
+    char* inBuff;
+    size_t inBuffSize;
+    size_t inPos;
+    char* outBuff;
+    size_t outBuffSize;
+    size_t outStart;
+    size_t outEnd;
+    size_t hPos;
+    const char* dict;
+    size_t dictSize;
+    ZBUFF_dStage stage;
+    unsigned char headerBuffer[ZSTD_frameHeaderSize_max];
+};   /* typedef'd to ZBUFF_DCtx within "zstd_buffered.h" */
+
+typedef ZBUFFv04_DCtx ZBUFF_DCtx;
+
+
+static ZBUFF_DCtx* ZBUFF_createDCtx(void)
+{
+    ZBUFF_DCtx* zbc = (ZBUFF_DCtx*)malloc(sizeof(ZBUFF_DCtx));
+    if (zbc==NULL) return NULL;
+    memset(zbc, 0, sizeof(*zbc));
+    zbc->zc = ZSTD_createDCtx();
+    zbc->stage = ZBUFFds_init;
+    return zbc;
+}
+
+static size_t ZBUFF_freeDCtx(ZBUFF_DCtx* zbc)
+{
+    if (zbc==NULL) return 0;   /* support free on null */
+    ZSTD_freeDCtx(zbc->zc);
+    free(zbc->inBuff);
+    free(zbc->outBuff);
+    free(zbc);
+    return 0;
+}
+
+
+/* *** Initialization *** */
+
+static size_t ZBUFF_decompressInit(ZBUFF_DCtx* zbc)
+{
+    zbc->stage = ZBUFFds_readHeader;
+    zbc->hPos = zbc->inPos = zbc->outStart = zbc->outEnd = zbc->dictSize = 0;
+    return ZSTD_resetDCtx(zbc->zc);
+}
+
+
+static size_t ZBUFF_decompressWithDictionary(ZBUFF_DCtx* zbc, const void* src, size_t srcSize)
+{
+    zbc->dict = (const char*)src;
+    zbc->dictSize = srcSize;
+    return 0;
+}
+
+static size_t ZBUFF_limitCopy(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    size_t length = MIN(maxDstSize, srcSize);
+    memcpy(dst, src, length);
+    return length;
+}
+
+/* *** Decompression *** */
+
+static size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr)
+{
+    const char* const istart = (const char*)src;
+    const char* ip = istart;
+    const char* const iend = istart + *srcSizePtr;
+    char* const ostart = (char*)dst;
+    char* op = ostart;
+    char* const oend = ostart + *maxDstSizePtr;
+    U32 notDone = 1;
+
+    while (notDone)
+    {
+        switch(zbc->stage)
+        {
+
+        case ZBUFFds_init :
+            return ERROR(init_missing);
+
+        case ZBUFFds_readHeader :
+            /* read header from src */
+            {   size_t const headerSize = ZSTD_getFrameParams(&(zbc->params), src, *srcSizePtr);
+                if (ZSTD_isError(headerSize)) return headerSize;
+                if (headerSize) {
+                    /* not enough input to decode header : tell how many bytes would be necessary */
+                    memcpy(zbc->headerBuffer+zbc->hPos, src, *srcSizePtr);
+                    zbc->hPos += *srcSizePtr;
+                    *maxDstSizePtr = 0;
+                    zbc->stage = ZBUFFds_loadHeader;
+                    return headerSize - zbc->hPos;
+                }
+                zbc->stage = ZBUFFds_decodeHeader;
+                break;
+            }
+
+        case ZBUFFds_loadHeader:
+            /* complete header from src */
+            {   size_t headerSize = ZBUFF_limitCopy(
+                    zbc->headerBuffer + zbc->hPos, ZSTD_frameHeaderSize_max - zbc->hPos,
+                    src, *srcSizePtr);
+                zbc->hPos += headerSize;
+                ip += headerSize;
+                headerSize = ZSTD_getFrameParams(&(zbc->params), zbc->headerBuffer, zbc->hPos);
+                if (ZSTD_isError(headerSize)) return headerSize;
+                if (headerSize) {
+                    /* not enough input to decode header : tell how many bytes would be necessary */
+                    *maxDstSizePtr = 0;
+                    return headerSize - zbc->hPos;
+            }   }
+            /* intentional fallthrough */
+
+        case ZBUFFds_decodeHeader:
+                /* apply header to create / resize buffers */
+                {   size_t const neededOutSize = (size_t)1 << zbc->params.windowLog;
+                    size_t const neededInSize = BLOCKSIZE;   /* a block is never > BLOCKSIZE */
+                    if (zbc->inBuffSize < neededInSize) {
+                        free(zbc->inBuff);
+                        zbc->inBuffSize = neededInSize;
+                        zbc->inBuff = (char*)malloc(neededInSize);
+                        if (zbc->inBuff == NULL) return ERROR(memory_allocation);
+                    }
+                    if (zbc->outBuffSize < neededOutSize) {
+                        free(zbc->outBuff);
+                        zbc->outBuffSize = neededOutSize;
+                        zbc->outBuff = (char*)malloc(neededOutSize);
+                        if (zbc->outBuff == NULL) return ERROR(memory_allocation);
+                }   }
+                if (zbc->dictSize)
+                    ZSTD_decompress_insertDictionary(zbc->zc, zbc->dict, zbc->dictSize);
+                if (zbc->hPos) {
+                    /* some data already loaded into headerBuffer : transfer into inBuff */
+                    memcpy(zbc->inBuff, zbc->headerBuffer, zbc->hPos);
+                    zbc->inPos = zbc->hPos;
+                    zbc->hPos = 0;
+                    zbc->stage = ZBUFFds_load;
+                    break;
+                }
+                zbc->stage = ZBUFFds_read;
+		/* fall-through */
+        case ZBUFFds_read:
+            {
+                size_t neededInSize = ZSTD_nextSrcSizeToDecompress(zbc->zc);
+                if (neededInSize==0)   /* end of frame */
+                {
+                    zbc->stage = ZBUFFds_init;
+                    notDone = 0;
+                    break;
+                }
+                if ((size_t)(iend-ip) >= neededInSize)
+                {
+                    /* directly decode from src */
+                    size_t decodedSize = ZSTD_decompressContinue(zbc->zc,
+                        zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart,
+                        ip, neededInSize);
+                    if (ZSTD_isError(decodedSize)) return decodedSize;
+                    ip += neededInSize;
+                    if (!decodedSize) break;   /* this was just a header */
+                    zbc->outEnd = zbc->outStart +  decodedSize;
+                    zbc->stage = ZBUFFds_flush;
+                    break;
+                }
+                if (ip==iend) { notDone = 0; break; }   /* no more input */
+                zbc->stage = ZBUFFds_load;
+            }
+	    /* fall-through */
+        case ZBUFFds_load:
+            {
+                size_t neededInSize = ZSTD_nextSrcSizeToDecompress(zbc->zc);
+                size_t toLoad = neededInSize - zbc->inPos;   /* should always be <= remaining space within inBuff */
+                size_t loadedSize;
+                if (toLoad > zbc->inBuffSize - zbc->inPos) return ERROR(corruption_detected);   /* should never happen */
+                loadedSize = ZBUFF_limitCopy(zbc->inBuff + zbc->inPos, toLoad, ip, iend-ip);
+                ip += loadedSize;
+                zbc->inPos += loadedSize;
+                if (loadedSize < toLoad) { notDone = 0; break; }   /* not enough input, wait for more */
+                {
+                    size_t decodedSize = ZSTD_decompressContinue(zbc->zc,
+                        zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart,
+                        zbc->inBuff, neededInSize);
+                    if (ZSTD_isError(decodedSize)) return decodedSize;
+                    zbc->inPos = 0;   /* input is consumed */
+                    if (!decodedSize) { zbc->stage = ZBUFFds_read; break; }   /* this was just a header */
+                    zbc->outEnd = zbc->outStart +  decodedSize;
+                    zbc->stage = ZBUFFds_flush;
+                    /* ZBUFFds_flush follows */
+                }
+            }
+	    /* fall-through */
+        case ZBUFFds_flush:
+            {
+                size_t toFlushSize = zbc->outEnd - zbc->outStart;
+                size_t flushedSize = ZBUFF_limitCopy(op, oend-op, zbc->outBuff + zbc->outStart, toFlushSize);
+                op += flushedSize;
+                zbc->outStart += flushedSize;
+                if (flushedSize == toFlushSize)
+                {
+                    zbc->stage = ZBUFFds_read;
+                    if (zbc->outStart + BLOCKSIZE > zbc->outBuffSize)
+                        zbc->outStart = zbc->outEnd = 0;
+                    break;
+                }
+                /* cannot flush everything */
+                notDone = 0;
+                break;
+            }
+        default: return ERROR(GENERIC);   /* impossible */
+        }
+    }
+
+    *srcSizePtr = ip-istart;
+    *maxDstSizePtr = op-ostart;
+
+    {
+        size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zbc->zc);
+        if (nextSrcSizeHint > 3) nextSrcSizeHint+= 3;   /* get the next block header while at it */
+        nextSrcSizeHint -= zbc->inPos;   /* already loaded*/
+        return nextSrcSizeHint;
+    }
+}
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+unsigned ZBUFFv04_isError(size_t errorCode) { return ERR_isError(errorCode); }
+const char* ZBUFFv04_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
+
+size_t ZBUFFv04_recommendedDInSize()  { return BLOCKSIZE + 3; }
+size_t ZBUFFv04_recommendedDOutSize() { return BLOCKSIZE; }
+
+
+
+/*- ========================================================================= -*/
+
+/* final wrapping stage */
+
+size_t ZSTDv04_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return ZSTD_decompress_usingDict(dctx, dst, maxDstSize, src, srcSize, NULL, 0);
+}
+
+size_t ZSTDv04_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE==1)
+    size_t regenSize;
+    ZSTD_DCtx* dctx = ZSTD_createDCtx();
+    if (dctx==NULL) return ERROR(memory_allocation);
+    regenSize = ZSTDv04_decompressDCtx(dctx, dst, maxDstSize, src, srcSize);
+    ZSTD_freeDCtx(dctx);
+    return regenSize;
+#else
+    ZSTD_DCtx dctx;
+    return ZSTDv04_decompressDCtx(&dctx, dst, maxDstSize, src, srcSize);
+#endif
+}
+
+size_t ZSTDv04_findFrameCompressedSize(const void* src, size_t srcSize)
+{
+    return ZSTD_findFrameCompressedSize(src, srcSize);
+}
+
+size_t ZSTDv04_resetDCtx(ZSTDv04_Dctx* dctx) { return ZSTD_resetDCtx(dctx); }
+
+size_t ZSTDv04_nextSrcSizeToDecompress(ZSTDv04_Dctx* dctx)
+{
+    return ZSTD_nextSrcSizeToDecompress(dctx);
+}
+
+size_t ZSTDv04_decompressContinue(ZSTDv04_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return ZSTD_decompressContinue(dctx, dst, maxDstSize, src, srcSize);
+}
+
+
+
+ZBUFFv04_DCtx* ZBUFFv04_createDCtx(void) { return ZBUFF_createDCtx(); }
+size_t      ZBUFFv04_freeDCtx(ZBUFFv04_DCtx* dctx) { return ZBUFF_freeDCtx(dctx); }
+
+size_t ZBUFFv04_decompressInit(ZBUFFv04_DCtx* dctx) { return ZBUFF_decompressInit(dctx); }
+size_t ZBUFFv04_decompressWithDictionary(ZBUFFv04_DCtx* dctx, const void* src, size_t srcSize)
+{ return ZBUFF_decompressWithDictionary(dctx, src, srcSize); }
+
+size_t ZBUFFv04_decompressContinue(ZBUFFv04_DCtx* dctx, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr)
+{
+    return ZBUFF_decompressContinue(dctx, dst, maxDstSizePtr, src, srcSizePtr);
+}
+
+ZSTD_DCtx* ZSTDv04_createDCtx(void) { return ZSTD_createDCtx(); }
+size_t ZSTDv04_freeDCtx(ZSTD_DCtx* dctx) { return ZSTD_freeDCtx(dctx); }
+
+size_t ZSTDv04_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize)
+{
+    return ZSTD_getFrameParams(params, src, srcSize);
+}
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v04.h b/contrib/libzstd/include/zstd/legacy/zstd_v04.h
new file mode 100644
index 00000000000..1b5439d3924
--- /dev/null
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v04.h
@@ -0,0 +1,136 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+#ifndef ZSTD_V04_H_91868324769238
+#define ZSTD_V04_H_91868324769238
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv04_decompress() : decompress ZSTD frames compliant with v0.4.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv04_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+/**
+ZSTDv04_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.4.x format
+    compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+    return : the number of bytes that would be read to decompress this frame
+             or an errorCode if it fails (which can be tested using ZSTDv04_isError())
+*/
+size_t ZSTDv04_findFrameCompressedSize(const void* src, size_t compressedSize);
+
+/**
+ZSTDv04_isError() : tells if the result of ZSTDv04_decompress() is an error
+*/
+unsigned ZSTDv04_isError(size_t code);
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv04_Dctx_s ZSTDv04_Dctx;
+ZSTDv04_Dctx* ZSTDv04_createDCtx(void);
+size_t ZSTDv04_freeDCtx(ZSTDv04_Dctx* dctx);
+
+size_t ZSTDv04_decompressDCtx(ZSTDv04_Dctx* dctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
+
+
+/* *************************************
+*  Direct Streaming
+***************************************/
+size_t ZSTDv04_resetDCtx(ZSTDv04_Dctx* dctx);
+
+size_t ZSTDv04_nextSrcSizeToDecompress(ZSTDv04_Dctx* dctx);
+size_t ZSTDv04_decompressContinue(ZSTDv04_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+
+/* *************************************
+*  Buffered Streaming
+***************************************/
+typedef struct ZBUFFv04_DCtx_s ZBUFFv04_DCtx;
+ZBUFFv04_DCtx* ZBUFFv04_createDCtx(void);
+size_t         ZBUFFv04_freeDCtx(ZBUFFv04_DCtx* dctx);
+
+size_t ZBUFFv04_decompressInit(ZBUFFv04_DCtx* dctx);
+size_t ZBUFFv04_decompressWithDictionary(ZBUFFv04_DCtx* dctx, const void* dict, size_t dictSize);
+
+size_t ZBUFFv04_decompressContinue(ZBUFFv04_DCtx* dctx, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr);
+
+/** ************************************************
+*  Streaming decompression
+*
+*  A ZBUFF_DCtx object is required to track streaming operation.
+*  Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources.
+*  Use ZBUFF_decompressInit() to start a new decompression operation.
+*  ZBUFF_DCtx objects can be reused multiple times.
+*
+*  Optionally, a reference to a static dictionary can be set, using ZBUFF_decompressWithDictionary()
+*  It must be the same content as the one set during compression phase.
+*  Dictionary content must remain accessible during the decompression process.
+*
+*  Use ZBUFF_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *maxDstSizePtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
+*            or 0 when a frame is completely decoded
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize / ZBUFF_recommendedDOutSize
+*  output : ZBUFF_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded.
+*  input : ZBUFF_recommendedDInSize==128Kb+3; just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* **************************************************/
+unsigned ZBUFFv04_isError(size_t errorCode);
+const char* ZBUFFv04_getErrorName(size_t errorCode);
+
+
+/** The below functions provide recommended buffer sizes for Compression or Decompression operations.
+*   These sizes are not compulsory, they just tend to offer better latency */
+size_t ZBUFFv04_recommendedDInSize(void);
+size_t ZBUFFv04_recommendedDOutSize(void);
+
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv04_magicNumber 0xFD2FB524   /* v0.4 */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_V04_H_91868324769238 */
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v05.c b/contrib/libzstd/include/zstd/legacy/zstd_v05.c
new file mode 100644
index 00000000000..e929618a3bf
--- /dev/null
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v05.c
@@ -0,0 +1,4082 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+
+/*- Dependencies -*/
+#include "zstd_v05.h"
+#include "error_private.h"
+
+
+/* ******************************************************************
+   mem.h
+   low-level memory access routines
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSEv05 source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-****************************************
+*  Dependencies
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include <string.h>    /* memcpy */
+
+
+/*-****************************************
+*  Compiler specifics
+******************************************/
+#if defined(__GNUC__)
+#  define MEM_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define MEM_STATIC static inline
+#elif defined(_MSC_VER)
+#  define MEM_STATIC static __inline
+#else
+#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/*-**************************************************************
+*  Basic Types
+*****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef  int16_t S16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+  typedef  int64_t S64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef   signed short      S16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+  typedef   signed long long  S64;
+#endif
+
+
+/*-**************************************************************
+*  Memory I/O
+*****************************************************************/
+/* MEM_FORCE_MEMORY_ACCESS :
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets depending on alignment.
+ *            In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define MEM_FORCE_MEMORY_ACCESS 2
+#  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define MEM_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; }
+MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; }
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
+
+/* violates C standard, by lying on structure alignment.
+Only use if no other choice to achieve best performance on target platform */
+MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
+MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
+MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
+
+#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign;
+
+MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; }
+
+#else
+
+/* default method, safe and standard.
+   can sometimes prove slower */
+
+MEM_STATIC U16 MEM_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U32 MEM_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U64 MEM_read64(const void* memPtr)
+{
+    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write32(void* memPtr, U32 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write64(void* memPtr, U64 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif /* MEM_FORCE_MEMORY_ACCESS */
+
+
+MEM_STATIC U16 MEM_readLE16(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read16(memPtr);
+    else {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
+{
+    if (MEM_isLittleEndian()) {
+        MEM_write16(memPtr, val);
+    } else {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val;
+        p[1] = (BYTE)(val>>8);
+    }
+}
+
+MEM_STATIC U32 MEM_readLE32(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read32(memPtr);
+    else {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+
+MEM_STATIC U64 MEM_readLE64(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read64(memPtr);
+    else {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
+                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
+    }
+}
+
+
+MEM_STATIC size_t MEM_readLEST(const void* memPtr)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_readLE32(memPtr);
+    else
+        return (size_t)MEM_readLE64(memPtr);
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* MEM_H_MODULE */
+
+/*
+    zstd - standard compression library
+    Header File for static linking only
+    Copyright (C) 2014-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd homepage : http://www.zstd.net
+*/
+#ifndef ZSTD_STATIC_H
+#define ZSTD_STATIC_H
+
+/* The prototypes defined within this file are considered experimental.
+ * They should not be used in the context DLL as they may change in the future.
+ * Prefer static linking if you need them, to control breaking version changes issues.
+ */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/*-*************************************
+*  Types
+***************************************/
+#define ZSTDv05_WINDOWLOG_ABSOLUTEMIN 11
+
+
+/*-*************************************
+*  Advanced functions
+***************************************/
+/*- Advanced Decompression functions -*/
+
+/*! ZSTDv05_decompress_usingPreparedDCtx() :
+*   Same as ZSTDv05_decompress_usingDict, but using a reference context `preparedDCtx`, where dictionary has been loaded.
+*   It avoids reloading the dictionary each time.
+*   `preparedDCtx` must have been properly initialized using ZSTDv05_decompressBegin_usingDict().
+*   Requires 2 contexts : 1 for reference, which will not be modified, and 1 to run the decompression operation */
+size_t ZSTDv05_decompress_usingPreparedDCtx(
+                                             ZSTDv05_DCtx* dctx, const ZSTDv05_DCtx* preparedDCtx,
+                                             void* dst, size_t dstCapacity,
+                                       const void* src, size_t srcSize);
+
+
+/* **************************************
+*  Streaming functions (direct mode)
+****************************************/
+size_t ZSTDv05_decompressBegin(ZSTDv05_DCtx* dctx);
+
+/*
+  Streaming decompression, direct mode (bufferless)
+
+  A ZSTDv05_DCtx object is required to track streaming operations.
+  Use ZSTDv05_createDCtx() / ZSTDv05_freeDCtx() to manage it.
+  A ZSTDv05_DCtx object can be re-used multiple times.
+
+  First typical operation is to retrieve frame parameters, using ZSTDv05_getFrameParams().
+  This operation is independent, and just needs enough input data to properly decode the frame header.
+  Objective is to retrieve *params.windowlog, to know minimum amount of memory required during decoding.
+  Result : 0 when successful, it means the ZSTDv05_parameters structure has been filled.
+           >0 : means there is not enough data into src. Provides the expected size to successfully decode header.
+           errorCode, which can be tested using ZSTDv05_isError()
+
+  Start decompression, with ZSTDv05_decompressBegin() or ZSTDv05_decompressBegin_usingDict()
+  Alternatively, you can copy a prepared context, using ZSTDv05_copyDCtx()
+
+  Then use ZSTDv05_nextSrcSizeToDecompress() and ZSTDv05_decompressContinue() alternatively.
+  ZSTDv05_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTDv05_decompressContinue().
+  ZSTDv05_decompressContinue() requires this exact amount of bytes, or it will fail.
+  ZSTDv05_decompressContinue() needs previous data blocks during decompression, up to (1 << windowlog).
+  They should preferably be located contiguously, prior to current block. Alternatively, a round buffer is also possible.
+
+  @result of ZSTDv05_decompressContinue() is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTDv05_decompressContinue() has decoded some header.
+
+  A frame is fully decoded when ZSTDv05_nextSrcSizeToDecompress() returns zero.
+  Context can then be reset to start a new decompression.
+*/
+
+
+/* **************************************
+*  Block functions
+****************************************/
+/*! Block functions produce and decode raw zstd blocks, without frame metadata.
+    User will have to take in charge required information to regenerate data, such as block sizes.
+
+    A few rules to respect :
+    - Uncompressed block size must be <= 128 KB
+    - Compressing or decompressing requires a context structure
+      + Use ZSTDv05_createCCtx() and ZSTDv05_createDCtx()
+    - It is necessary to init context before starting
+      + compression : ZSTDv05_compressBegin()
+      + decompression : ZSTDv05_decompressBegin()
+      + variants _usingDict() are also allowed
+      + copyCCtx() and copyDCtx() work too
+    - When a block is considered not compressible enough, ZSTDv05_compressBlock() result will be zero.
+      In which case, nothing is produced into `dst`.
+      + User must test for such outcome and deal directly with uncompressed data
+      + ZSTDv05_decompressBlock() doesn't accept uncompressed data as input !!
+*/
+
+size_t ZSTDv05_decompressBlock(ZSTDv05_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTDv05_STATIC_H */
+
+
+/*
+    zstd_internal - common functions to include
+    Header File for include
+    Copyright (C) 2014-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+*/
+#ifndef ZSTD_CCOMMON_H_MODULE
+#define ZSTD_CCOMMON_H_MODULE
+
+
+
+/*-*************************************
+*  Common macros
+***************************************/
+#define MIN(a,b) ((a)<(b) ? (a) : (b))
+#define MAX(a,b) ((a)>(b) ? (a) : (b))
+
+
+/*-*************************************
+*  Common constants
+***************************************/
+#define ZSTDv05_DICT_MAGIC  0xEC30A435
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BLOCKSIZE (128 KB)                 /* define, for static allocation */
+
+static const size_t ZSTDv05_blockHeaderSize = 3;
+static const size_t ZSTDv05_frameHeaderSize_min = 5;
+#define ZSTDv05_frameHeaderSize_max 5         /* define, for static allocation */
+
+#define BITv057 128
+#define BITv056  64
+#define BITv055  32
+#define BITv054  16
+#define BITv051   2
+#define BITv050   1
+
+#define IS_HUFv05 0
+#define IS_PCH 1
+#define IS_RAW 2
+#define IS_RLE 3
+
+#define MINMATCH 4
+#define REPCODE_STARTVALUE 1
+
+#define Litbits  8
+#define MLbits   7
+#define LLbits   6
+#define Offbits  5
+#define MaxLit ((1<<Litbits) - 1)
+#define MaxML  ((1<<MLbits) - 1)
+#define MaxLL  ((1<<LLbits) - 1)
+#define MaxOff ((1<<Offbits)- 1)
+#define MLFSEv05Log   10
+#define LLFSEv05Log   10
+#define OffFSEv05Log   9
+#define MaxSeq MAX(MaxLL, MaxML)
+
+#define FSEv05_ENCODING_RAW     0
+#define FSEv05_ENCODING_RLE     1
+#define FSEv05_ENCODING_STATIC  2
+#define FSEv05_ENCODING_DYNAMIC 3
+
+
+#define HufLog 12
+
+#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
+#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */
+
+#define WILDCOPY_OVERLENGTH 8
+
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+
+/*-*******************************************
+*  Shared functions to include for inlining
+*********************************************/
+static void ZSTDv05_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+
+#define COPY8(d,s) { ZSTDv05_copy8(d,s); d+=8; s+=8; }
+
+/*! ZSTDv05_wildcopy() :
+*   custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
+MEM_STATIC void ZSTDv05_wildcopy(void* dst, const void* src, ptrdiff_t length)
+{
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+    do
+        COPY8(op, ip)
+    while (op < oend);
+}
+
+
+/*-*******************************************
+*  Private interfaces
+*********************************************/
+typedef struct {
+    void* buffer;
+    U32*  offsetStart;
+    U32*  offset;
+    BYTE* offCodeStart;
+    BYTE* offCode;
+    BYTE* litStart;
+    BYTE* lit;
+    BYTE* litLengthStart;
+    BYTE* litLength;
+    BYTE* matchLengthStart;
+    BYTE* matchLength;
+    BYTE* dumpsStart;
+    BYTE* dumps;
+    /* opt */
+    U32* matchLengthFreq;
+    U32* litLengthFreq;
+    U32* litFreq;
+    U32* offCodeFreq;
+    U32  matchLengthSum;
+    U32  litLengthSum;
+    U32  litSum;
+    U32  offCodeSum;
+} seqStore_t;
+
+
+
+#endif   /* ZSTDv05_CCOMMON_H_MODULE */
+/* ******************************************************************
+   FSEv05 : Finite State Entropy coder
+   header file
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef FSEv05_H
+#define FSEv05_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* *****************************************
+*  Includes
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+
+
+/*-****************************************
+*  FSEv05 simple functions
+******************************************/
+size_t FSEv05_decompress(void* dst,  size_t maxDstSize,
+                const void* cSrc, size_t cSrcSize);
+/*!
+FSEv05_decompress():
+    Decompress FSEv05 data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'maxDstSize'.
+    return : size of regenerated data (<= maxDstSize)
+             or an error code, which can be tested using FSEv05_isError()
+
+    ** Important ** : FSEv05_decompress() doesn't decompress non-compressible nor RLE data !!!
+    Why ? : making this distinction requires a header.
+    Header management is intentionally delegated to the user layer, which can better manage special cases.
+*/
+
+
+/* *****************************************
+*  Tool functions
+******************************************/
+/* Error Management */
+unsigned    FSEv05_isError(size_t code);        /* tells if a return value is an error code */
+const char* FSEv05_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+
+
+
+
+/* *****************************************
+*  FSEv05 detailed API
+******************************************/
+/* *** DECOMPRESSION *** */
+
+/*!
+FSEv05_readNCount():
+   Read compactly saved 'normalizedCounter' from 'rBuffer'.
+   return : size read from 'rBuffer'
+            or an errorCode, which can be tested using FSEv05_isError()
+            maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
+size_t FSEv05_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
+
+/*!
+Constructor and Destructor of type FSEv05_DTable
+    Note that its size depends on 'tableLog' */
+typedef unsigned FSEv05_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+FSEv05_DTable* FSEv05_createDTable(unsigned tableLog);
+void        FSEv05_freeDTable(FSEv05_DTable* dt);
+
+/*!
+FSEv05_buildDTable():
+   Builds 'dt', which must be already allocated, using FSEv05_createDTable()
+   @return : 0,
+             or an errorCode, which can be tested using FSEv05_isError() */
+size_t FSEv05_buildDTable (FSEv05_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*!
+FSEv05_decompress_usingDTable():
+   Decompress compressed source @cSrc of size @cSrcSize using `dt`
+   into `dst` which must be already allocated.
+   @return : size of regenerated data (necessarily <= @dstCapacity)
+             or an errorCode, which can be tested using FSEv05_isError() */
+size_t FSEv05_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSEv05_DTable* dt);
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* FSEv05_H */
+/* ******************************************************************
+   bitstream
+   Part of FSEv05 library
+   header file (to include)
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+#ifndef BITv05STREAM_H_MODULE
+#define BITv05STREAM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*
+*  This API consists of small unitary functions, which highly benefit from being inlined.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+
+
+/*-********************************************
+*  bitStream decoding API (read backward)
+**********************************************/
+typedef struct
+{
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} BITv05_DStream_t;
+
+typedef enum { BITv05_DStream_unfinished = 0,
+               BITv05_DStream_endOfBuffer = 1,
+               BITv05_DStream_completed = 2,
+               BITv05_DStream_overflow = 3 } BITv05_DStream_status;  /* result of BITv05_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t   BITv05_initDStream(BITv05_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t   BITv05_readBits(BITv05_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BITv05_DStream_status BITv05_reloadDStream(BITv05_DStream_t* bitD);
+MEM_STATIC unsigned BITv05_endOfDStream(const BITv05_DStream_t* bitD);
+
+
+/*!
+* Start by invoking BITv05_initDStream().
+* A chunk of the bitStream is then stored into a local register.
+* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+* You can then retrieve bitFields stored into the local register, **in reverse order**.
+* Local register is explicitly reloaded from memory by the BITv05_reloadDStream() method.
+* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BITv05_DStream_unfinished.
+* Otherwise, it can be less than that, so proceed accordingly.
+* Checking if DStream has reached its end can be performed with BITv05_endOfDStream()
+*/
+
+
+/*-****************************************
+*  unsafe API
+******************************************/
+MEM_STATIC size_t BITv05_readBitsFast(BITv05_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/*-**************************************************************
+*  Helper functions
+****************************************************************/
+MEM_STATIC unsigned BITv05_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    unsigned r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
+
+/*-********************************************************
+* bitStream decoding
+**********************************************************/
+/*!BITv05_initDStream
+*  Initialize a BITv05_DStream_t.
+*  @bitD : a pointer to an already allocated BITv05_DStream_t structure
+*  @srcBuffer must point at the beginning of a bitStream
+*  @srcSize must be the exact size of the bitStream
+*  @result : size of stream (== srcSize) or an errorCode if a problem is detected
+*/
+MEM_STATIC size_t BITv05_initDStream(BITv05_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
+    if (srcSize >=  sizeof(size_t)) {  /* normal case */
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BITv05_highbit32(contain32);
+    } else {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);/* fall-through */
+            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);/* fall-through */
+            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);/* fall-through */
+            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24; /* fall-through */
+            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16; /* fall-through */
+            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8; /* fall-through */
+            default: break;
+        }
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BITv05_highbit32(contain32);
+        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+/*!BITv05_lookBits
+ * Provides next n bits from local register
+ * local register is not modified (bits are still present for next read/look)
+ * On 32-bits, maxNbBits==25
+ * On 64-bits, maxNbBits==57
+ * @return : value extracted
+ */
+MEM_STATIC size_t BITv05_lookBits(BITv05_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+}
+
+/*! BITv05_lookBitsFast :
+*   unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BITv05_lookBitsFast(BITv05_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+}
+
+MEM_STATIC void BITv05_skipBits(BITv05_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+/*!BITv05_readBits
+ * Read next n bits from local register.
+ * pay attention to not read more than nbBits contained into local register.
+ * @return : extracted value.
+ */
+MEM_STATIC size_t BITv05_readBits(BITv05_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BITv05_lookBits(bitD, nbBits);
+    BITv05_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*!BITv05_readBitsFast :
+*  unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BITv05_readBitsFast(BITv05_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BITv05_lookBitsFast(bitD, nbBits);
+    BITv05_skipBits(bitD, nbBits);
+    return value;
+}
+
+MEM_STATIC BITv05_DStream_status BITv05_reloadDStream(BITv05_DStream_t* bitD)
+{
+    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
+        return BITv05_DStream_overflow;
+
+    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        return BITv05_DStream_unfinished;
+    }
+    if (bitD->ptr == bitD->start) {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BITv05_DStream_endOfBuffer;
+        return BITv05_DStream_completed;
+    }
+    {
+        U32 nbBytes = bitD->bitsConsumed >> 3;
+        BITv05_DStream_status result = BITv05_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start) {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = BITv05_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        return result;
+    }
+}
+
+/*! BITv05_endOfDStream
+*   @return Tells if DStream has reached its exact end
+*/
+MEM_STATIC unsigned BITv05_endOfDStream(const BITv05_DStream_t* DStream)
+{
+    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* BITv05STREAM_H_MODULE */
+/* ******************************************************************
+   FSEv05 : Finite State Entropy coder
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef FSEv05_STATIC_H
+#define FSEv05_STATIC_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/* *****************************************
+*  Static allocation
+*******************************************/
+/* It is possible to statically allocate FSEv05 CTable/DTable as a table of unsigned using below macros */
+#define FSEv05_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+
+
+/* *****************************************
+*  FSEv05 advanced API
+*******************************************/
+size_t FSEv05_buildDTable_raw (FSEv05_DTable* dt, unsigned nbBits);
+/* build a fake FSEv05_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
+
+size_t FSEv05_buildDTable_rle (FSEv05_DTable* dt, unsigned char symbolValue);
+/* build a fake FSEv05_DTable, designed to always generate the same symbolValue */
+
+
+
+/* *****************************************
+*  FSEv05 symbol decompression API
+*******************************************/
+typedef struct
+{
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSEv05_DState_t;
+
+
+static void     FSEv05_initDState(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD, const FSEv05_DTable* dt);
+
+static unsigned char FSEv05_decodeSymbol(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD);
+
+static unsigned FSEv05_endOfDState(const FSEv05_DState_t* DStatePtr);
+
+/*!
+Let's now decompose FSEv05_decompress_usingDTable() into its unitary components.
+You will decode FSEv05-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BITv05_DStream_t DStream;    // Stream context
+FSEv05_DState_t  DState;     // State context. Multiple ones are possible
+FSEv05_DTable*   DTablePtr;  // Decoding table, provided by FSEv05_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BITv05_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSEv05_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSEv05_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSEv05_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BITv05_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSEv05_reloadDStream(&DStream);
+
+BITv05_reloadDStream() result tells if there is still some more data to read from DStream.
+BITv05_DStream_unfinished : there is still some data left into the DStream.
+BITv05_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BITv05_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BITv05_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BITv05_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BITv05_reloadDStream(&DStream) >= BITv05_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BITv05_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSEv05_endOfDState(&DState);
+*/
+
+
+/* *****************************************
+*  FSEv05 unsafe API
+*******************************************/
+static unsigned char FSEv05_decodeSymbolFast(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/* *****************************************
+*  Implementation of inlined functions
+*******************************************/
+/* decompression */
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSEv05_DTableHeader;   /* sizeof U32 */
+
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSEv05_decode_t;   /* size == U32 */
+
+MEM_STATIC void FSEv05_initDState(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD, const FSEv05_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSEv05_DTableHeader* const DTableH = (const FSEv05_DTableHeader*)ptr;
+    DStatePtr->state = BITv05_readBits(bitD, DTableH->tableLog);
+    BITv05_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC BYTE FSEv05_peakSymbol(FSEv05_DState_t* DStatePtr)
+{
+    const FSEv05_decode_t DInfo = ((const FSEv05_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    return DInfo.symbol;
+}
+
+MEM_STATIC BYTE FSEv05_decodeSymbol(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD)
+{
+    const FSEv05_decode_t DInfo = ((const FSEv05_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32  nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BITv05_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC BYTE FSEv05_decodeSymbolFast(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD)
+{
+    const FSEv05_decode_t DInfo = ((const FSEv05_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32 nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BITv05_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC unsigned FSEv05_endOfDState(const FSEv05_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* FSEv05_STATIC_H */
+/* ******************************************************************
+   FSEv05 : Finite State Entropy coder
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSEv05 source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#ifndef FSEv05_COMMONDEFS_ONLY
+
+/* **************************************************************
+*  Tuning parameters
+****************************************************************/
+/*!MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSEv05_MAX_MEMORY_USAGE 14
+#define FSEv05_DEFAULT_MEMORY_USAGE 13
+
+/*!FSEv05_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSEv05_MAX_SYMBOL_VALUE 255
+
+
+/* **************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSEv05_FUNCTION_TYPE BYTE
+#define FSEv05_FUNCTION_EXTENSION
+#define FSEv05_DECODE_TYPE FSEv05_decode_t
+
+
+#endif   /* !FSEv05_COMMONDEFS_ONLY */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#    ifdef __GNUC__
+#      define FORCE_INLINE static inline __attribute__((always_inline))
+#    else
+#      define FORCE_INLINE static inline
+#    endif
+#  else
+#    define FORCE_INLINE static
+#  endif /* __STDC_VERSION__ */
+#endif
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+
+
+/* ***************************************************************
+*  Constants
+*****************************************************************/
+#define FSEv05_MAX_TABLELOG  (FSEv05_MAX_MEMORY_USAGE-2)
+#define FSEv05_MAX_TABLESIZE (1U<<FSEv05_MAX_TABLELOG)
+#define FSEv05_MAXTABLESIZE_MASK (FSEv05_MAX_TABLESIZE-1)
+#define FSEv05_DEFAULT_TABLELOG (FSEv05_DEFAULT_MEMORY_USAGE-2)
+#define FSEv05_MIN_TABLELOG 5
+
+#define FSEv05_TABLELOG_ABSOLUTE_MAX 15
+#if FSEv05_MAX_TABLELOG > FSEv05_TABLELOG_ABSOLUTE_MAX
+#error "FSEv05_MAX_TABLELOG > FSEv05_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define FSEv05_STATIC_ASSERT(c) { enum { FSEv05_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/* **************************************************************
+*  Complex types
+****************************************************************/
+typedef U32 DTable_max_t[FSEv05_DTABLE_SIZE_U32(FSEv05_MAX_TABLELOG)];
+
+
+/* **************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSEv05_FUNCTION_EXTENSION
+#  error "FSEv05_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSEv05_FUNCTION_TYPE
+#  error "FSEv05_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSEv05_CAT(X,Y) X##Y
+#define FSEv05_FUNCTION_NAME(X,Y) FSEv05_CAT(X,Y)
+#define FSEv05_TYPE_NAME(X,Y) FSEv05_CAT(X,Y)
+
+
+/* Function templates */
+static U32 FSEv05_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
+
+
+
+FSEv05_DTable* FSEv05_createDTable (unsigned tableLog)
+{
+    if (tableLog > FSEv05_TABLELOG_ABSOLUTE_MAX) tableLog = FSEv05_TABLELOG_ABSOLUTE_MAX;
+    return (FSEv05_DTable*)malloc( FSEv05_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
+}
+
+void FSEv05_freeDTable (FSEv05_DTable* dt)
+{
+    free(dt);
+}
+
+size_t FSEv05_buildDTable(FSEv05_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    FSEv05_DTableHeader DTableH;
+    void* const tdPtr = dt+1;   /* because dt is unsigned, 32-bits aligned on 32-bits */
+    FSEv05_DECODE_TYPE* const tableDecode = (FSEv05_DECODE_TYPE*) (tdPtr);
+    const U32 tableSize = 1 << tableLog;
+    const U32 tableMask = tableSize-1;
+    const U32 step = FSEv05_tableStep(tableSize);
+    U16 symbolNext[FSEv05_MAX_SYMBOL_VALUE+1];
+    U32 position = 0;
+    U32 highThreshold = tableSize-1;
+    const S16 largeLimit= (S16)(1 << (tableLog-1));
+    U32 noLarge = 1;
+    U32 s;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSEv05_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSEv05_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    DTableH.tableLog = (U16)tableLog;
+    for (s=0; s<=maxSymbolValue; s++) {
+        if (normalizedCounter[s]==-1) {
+            tableDecode[highThreshold--].symbol = (FSEv05_FUNCTION_TYPE)s;
+            symbolNext[s] = 1;
+        } else {
+            if (normalizedCounter[s] >= largeLimit) noLarge=0;
+            symbolNext[s] = normalizedCounter[s];
+    }   }
+
+    /* Spread symbols */
+    for (s=0; s<=maxSymbolValue; s++) {
+        int i;
+        for (i=0; i<normalizedCounter[s]; i++) {
+            tableDecode[position].symbol = (FSEv05_FUNCTION_TYPE)s;
+            position = (position + step) & tableMask;
+            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+    }   }
+
+    if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+
+    /* Build Decoding table */
+    {
+        U32 i;
+        for (i=0; i<tableSize; i++) {
+            FSEv05_FUNCTION_TYPE symbol = (FSEv05_FUNCTION_TYPE)(tableDecode[i].symbol);
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[i].nbBits = (BYTE) (tableLog - BITv05_highbit32 ((U32)nextState) );
+            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
+    }   }
+
+    DTableH.fastMode = (U16)noLarge;
+    memcpy(dt, &DTableH, sizeof(DTableH));
+    return 0;
+}
+
+
+#ifndef FSEv05_COMMONDEFS_ONLY
+/*-****************************************
+*  FSEv05 helper functions
+******************************************/
+unsigned FSEv05_isError(size_t code) { return ERR_isError(code); }
+
+const char* FSEv05_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/*-**************************************************************
+*  FSEv05 NCount encoding-decoding
+****************************************************************/
+static short FSEv05_abs(short a) { return a<0 ? -a : a; }
+
+
+size_t FSEv05_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    if (hbSize < 4) return ERROR(srcSize_wrong);
+    bitStream = MEM_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSEv05_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSEv05_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr)) {
+        if (previous0) {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF) {
+                n0+=24;
+                if (ip < iend-5) {
+                    ip+=2;
+                    bitStream = MEM_readLE32(ip) >> bitCount;
+                } else {
+                    bitStream >>= 16;
+                    bitCount+=16;
+            }   }
+            while ((bitStream & 3) == 3) {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                ip += bitCount>>3;
+                bitCount &= 7;
+                bitStream = MEM_readLE32(ip) >> bitCount;
+            }
+            else
+                bitStream >>= 2;
+        }
+        {
+            const short max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max) {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            } else {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSEv05_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold) {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                ip += bitCount>>3;
+                bitCount &= 7;
+            } else {
+                bitCount -= (int)(8 * (iend - 4 - ip));
+                ip = iend - 4;
+            }
+            bitStream = MEM_readLE32(ip) >> (bitCount & 31);
+    }   }
+    if (remaining != 1) return ERROR(GENERIC);
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong);
+    return ip-istart;
+}
+
+
+
+/*-*******************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+size_t FSEv05_buildDTable_rle (FSEv05_DTable* dt, BYTE symbolValue)
+{
+    void* ptr = dt;
+    FSEv05_DTableHeader* const DTableH = (FSEv05_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSEv05_decode_t* const cell = (FSEv05_decode_t*)dPtr;
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+size_t FSEv05_buildDTable_raw (FSEv05_DTable* dt, unsigned nbBits)
+{
+    void* ptr = dt;
+    FSEv05_DTableHeader* const DTableH = (FSEv05_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSEv05_decode_t* const dinfo = (FSEv05_decode_t*)dPtr;
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<=maxSymbolValue; s++) {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+FORCE_INLINE size_t FSEv05_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSEv05_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    BITv05_DStream_t bitD;
+    FSEv05_DState_t state1;
+    FSEv05_DState_t state2;
+    size_t errorCode;
+
+    /* Init */
+    errorCode = BITv05_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+    if (FSEv05_isError(errorCode)) return errorCode;
+
+    FSEv05_initDState(&state1, &bitD, dt);
+    FSEv05_initDState(&state2, &bitD, dt);
+
+#define FSEv05_GETSYMBOL(statePtr) fast ? FSEv05_decodeSymbolFast(statePtr, &bitD) : FSEv05_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (BITv05_reloadDStream(&bitD)==BITv05_DStream_unfinished) && (op<olimit) ; op+=4) {
+        op[0] = FSEv05_GETSYMBOL(&state1);
+
+        if (FSEv05_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BITv05_reloadDStream(&bitD);
+
+        op[1] = FSEv05_GETSYMBOL(&state2);
+
+        if (FSEv05_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (BITv05_reloadDStream(&bitD) > BITv05_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSEv05_GETSYMBOL(&state1);
+
+        if (FSEv05_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BITv05_reloadDStream(&bitD);
+
+        op[3] = FSEv05_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BITv05_reloadDStream(&bitD) >= FSEv05_DStream_partiallyFilled; Ends at exactly BITv05_DStream_completed */
+    while (1) {
+        if ( (BITv05_reloadDStream(&bitD)>BITv05_DStream_completed) || (op==omax) || (BITv05_endOfDStream(&bitD) && (fast || FSEv05_endOfDState(&state1))) )
+            break;
+
+        *op++ = FSEv05_GETSYMBOL(&state1);
+
+        if ( (BITv05_reloadDStream(&bitD)>BITv05_DStream_completed) || (op==omax) || (BITv05_endOfDStream(&bitD) && (fast || FSEv05_endOfDState(&state2))) )
+            break;
+
+        *op++ = FSEv05_GETSYMBOL(&state2);
+    }
+
+    /* end ? */
+    if (BITv05_endOfDStream(&bitD) && FSEv05_endOfDState(&state1) && FSEv05_endOfDState(&state2))
+        return op-ostart;
+
+    if (op==omax) return ERROR(dstSize_tooSmall);   /* dst buffer is full, but cSrc unfinished */
+
+    return ERROR(corruption_detected);
+}
+
+
+size_t FSEv05_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSEv05_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSEv05_DTableHeader* DTableH = (const FSEv05_DTableHeader*)ptr;
+    const U32 fastMode = DTableH->fastMode;
+
+    /* select fast mode (static) */
+    if (fastMode) return FSEv05_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSEv05_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+size_t FSEv05_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSEv05_MAX_SYMBOL_VALUE+1];
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSEv05_MAX_SYMBOL_VALUE;
+    size_t errorCode;
+
+    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */
+
+    /* normal FSEv05 decoding mode */
+    errorCode = FSEv05_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSEv05_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    errorCode = FSEv05_buildDTable (dt, counting, maxSymbolValue, tableLog);
+    if (FSEv05_isError(errorCode)) return errorCode;
+
+    /* always return, even if it is an error code */
+    return FSEv05_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
+}
+
+
+
+#endif   /* FSEv05_COMMONDEFS_ONLY */
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   header file
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+#ifndef HUFF0_H
+#define HUFF0_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/* ****************************************
+*  Huff0 simple functions
+******************************************/
+size_t HUFv05_decompress(void* dst,  size_t dstSize,
+                const void* cSrc, size_t cSrcSize);
+/*!
+HUFv05_decompress():
+    Decompress Huff0 data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'dstSize'.
+    @dstSize : must be the **exact** size of original (uncompressed) data.
+    Note : in contrast with FSEv05, HUFv05_decompress can regenerate
+           RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
+           because it knows size to regenerate.
+    @return : size of regenerated data (== dstSize)
+              or an error code, which can be tested using HUFv05_isError()
+*/
+
+
+/* ****************************************
+*  Tool functions
+******************************************/
+/* Error Management */
+unsigned    HUFv05_isError(size_t code);        /* tells if a return value is an error code */
+const char* HUFv05_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* HUF0_H */
+/* ******************************************************************
+   Huff0 : Huffman codec, part of New Generation Entropy library
+   header file, for static linking only
+   Copyright (C) 2013-2016, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+#ifndef HUF0_STATIC_H
+#define HUF0_STATIC_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/* ****************************************
+*  Static allocation
+******************************************/
+/* static allocation of Huff0's DTable */
+#define HUFv05_DTABLE_SIZE(maxTableLog)   (1 + (1<<maxTableLog))
+#define HUFv05_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+        unsigned short DTable[HUFv05_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUFv05_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
+        unsigned int DTable[HUFv05_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUFv05_CREATE_STATIC_DTABLEX6(DTable, maxTableLog) \
+        unsigned int DTable[HUFv05_DTABLE_SIZE(maxTableLog) * 3 / 2] = { maxTableLog }
+
+
+/* ****************************************
+*  Advanced decompression functions
+******************************************/
+size_t HUFv05_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+size_t HUFv05_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbols decoder */
+
+
+/* ****************************************
+*  Huff0 detailed API
+******************************************/
+/*!
+HUFv05_decompress() does the following:
+1. select the decompression algorithm (X2, X4, X6) based on pre-computed heuristics
+2. build Huffman table from save, using HUFv05_readDTableXn()
+3. decode 1 or 4 segments in parallel using HUFv05_decompressSXn_usingDTable
+*/
+size_t HUFv05_readDTableX2 (unsigned short* DTable, const void* src, size_t srcSize);
+size_t HUFv05_readDTableX4 (unsigned* DTable, const void* src, size_t srcSize);
+
+size_t HUFv05_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned short* DTable);
+size_t HUFv05_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
+
+
+/* single stream variants */
+
+size_t HUFv05_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+size_t HUFv05_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbol decoder */
+
+size_t HUFv05_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned short* DTable);
+size_t HUFv05_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* HUF0_STATIC_H */
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSEv05+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+/* inline is defined */
+#elif defined(_MSC_VER)
+#  define inline __inline
+#else
+#  define inline /* disable inline */
+#endif
+
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#endif
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+
+/* **************************************************************
+*  Constants
+****************************************************************/
+#define HUFv05_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUFv05_MAX_TABLELOG. Beyond that value, code does not work */
+#define HUFv05_MAX_TABLELOG  12           /* max configured tableLog (for static allocation); can be modified up to HUFv05_ABSOLUTEMAX_TABLELOG */
+#define HUFv05_DEFAULT_TABLELOG  HUFv05_MAX_TABLELOG   /* tableLog by default, when not specified */
+#define HUFv05_MAX_SYMBOL_VALUE 255
+#if (HUFv05_MAX_TABLELOG > HUFv05_ABSOLUTEMAX_TABLELOG)
+#  error "HUFv05_MAX_TABLELOG is too large !"
+#endif
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+unsigned HUFv05_isError(size_t code) { return ERR_isError(code); }
+const char* HUFv05_getErrorName(size_t code) { return ERR_getErrorName(code); }
+#define HUFv05_STATIC_ASSERT(c) { enum { HUFv05_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/* *******************************************************
+*  Huff0 : Huffman block decompression
+*********************************************************/
+typedef struct { BYTE byte; BYTE nbBits; } HUFv05_DEltX2;   /* single-symbol decoding */
+
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUFv05_DEltX4;  /* double-symbols decoding */
+
+typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+
+/*! HUFv05_readStats
+    Read compact Huffman tree, saved by HUFv05_writeCTable
+    @huffWeight : destination buffer
+    @return : size read from `src`
+*/
+static size_t HUFv05_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                            U32* nbSymbolsPtr, U32* tableLogPtr,
+                            const void* src, size_t srcSize)
+{
+    U32 weightTotal;
+    U32 tableLog;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize;
+    size_t oSize;
+    U32 n;
+
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
+    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128)  { /* special header */
+        if (iSize >= (242)) {  /* RLE */
+            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
+            oSize = l[iSize-242];
+            memset(huffWeight, 1, hwSize);
+            iSize = 0;
+        }
+        else {   /* Incompressible */
+            oSize = iSize - 127;
+            iSize = ((oSize+1)/2);
+            if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+            if (oSize >= hwSize) return ERROR(corruption_detected);
+            ip += 1;
+            for (n=0; n<oSize; n+=2) {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+    }   }   }
+    else  {   /* header compressed with FSEv05 (normal case) */
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        oSize = FSEv05_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
+        if (FSEv05_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    memset(rankStats, 0, (HUFv05_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32));
+    weightTotal = 0;
+    for (n=0; n<oSize; n++) {
+        if (huffWeight[n] >= HUFv05_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+        rankStats[huffWeight[n]]++;
+        weightTotal += (1 << huffWeight[n]) >> 1;
+    }
+    if (weightTotal == 0) return ERROR(corruption_detected);
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    tableLog = BITv05_highbit32(weightTotal) + 1;
+    if (tableLog > HUFv05_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+    {   /* determine last weight */
+        U32 total = 1 << tableLog;
+        U32 rest = total - weightTotal;
+        U32 verif = 1 << BITv05_highbit32(rest);
+        U32 lastWeight = BITv05_highbit32(rest) + 1;
+        if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+        huffWeight[oSize] = (BYTE)lastWeight;
+        rankStats[lastWeight]++;
+    }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32)(oSize+1);
+    *tableLogPtr = tableLog;
+    return iSize+1;
+}
+
+
+/*-***************************/
+/*  single-symbol decoding   */
+/*-***************************/
+
+size_t HUFv05_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
+{
+    BYTE huffWeight[HUFv05_MAX_SYMBOL_VALUE + 1];
+    U32 rankVal[HUFv05_ABSOLUTEMAX_TABLELOG + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    size_t iSize;
+    U32 nbSymbols = 0;
+    U32 n;
+    U32 nextRankStart;
+    void* const dtPtr = DTable + 1;
+    HUFv05_DEltX2* const dt = (HUFv05_DEltX2*)dtPtr;
+
+    HUFv05_STATIC_ASSERT(sizeof(HUFv05_DEltX2) == sizeof(U16));   /* if compilation fails here, assertion is false */
+    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUFv05_readStats(huffWeight, HUFv05_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
+    if (HUFv05_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge);   /* DTable is too small */
+    DTable[0] = (U16)tableLog;   /* maybe should separate sizeof allocated DTable, from used size of DTable, in case of re-use */
+
+    /* Prepare ranks */
+    nextRankStart = 0;
+    for (n=1; n<=tableLog; n++) {
+        U32 current = nextRankStart;
+        nextRankStart += (rankVal[n] << (n-1));
+        rankVal[n] = current;
+    }
+
+    /* fill DTable */
+    for (n=0; n<nbSymbols; n++) {
+        const U32 w = huffWeight[n];
+        const U32 length = (1 << w) >> 1;
+        U32 i;
+        HUFv05_DEltX2 D;
+        D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
+        for (i = rankVal[w]; i < rankVal[w] + length; i++)
+            dt[i] = D;
+        rankVal[w] += length;
+    }
+
+    return iSize;
+}
+
+static BYTE HUFv05_decodeSymbolX2(BITv05_DStream_t* Dstream, const HUFv05_DEltX2* dt, const U32 dtLog)
+{
+        const size_t val = BITv05_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+        const BYTE c = dt[val].byte;
+        BITv05_skipBits(Dstream, dt[val].nbBits);
+        return c;
+}
+
+#define HUFv05_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+    *ptr++ = HUFv05_decodeSymbolX2(DStreamPtr, dt, dtLog)
+
+#define HUFv05_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUFv05_MAX_TABLELOG<=12)) \
+        HUFv05_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+#define HUFv05_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUFv05_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+static inline size_t HUFv05_decodeStreamX2(BYTE* p, BITv05_DStream_t* const bitDPtr, BYTE* const pEnd, const HUFv05_DEltX2* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 4 symbols at a time */
+    while ((BITv05_reloadDStream(bitDPtr) == BITv05_DStream_unfinished) && (p <= pEnd-4)) {
+        HUFv05_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX2_1(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX2_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BITv05_reloadDStream(bitDPtr) == BITv05_DStream_unfinished) && (p < pEnd))
+        HUFv05_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    /* no more data to retrieve from bitstream, hence no need to reload */
+    while (p < pEnd)
+        HUFv05_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    return pEnd-pStart;
+}
+
+size_t HUFv05_decompress1X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U16* DTable)
+{
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + dstSize;
+    const U32 dtLog = DTable[0];
+    const void* dtPtr = DTable;
+    const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr)+1;
+    BITv05_DStream_t bitD;
+
+    if (dstSize <= cSrcSize) return ERROR(dstSize_tooSmall);
+    { size_t const errorCode = BITv05_initDStream(&bitD, cSrc, cSrcSize);
+      if (HUFv05_isError(errorCode)) return errorCode; }
+
+    HUFv05_decodeStreamX2(op, &bitD, oend, dt, dtLog);
+
+    /* check */
+    if (!BITv05_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    return dstSize;
+}
+
+size_t HUFv05_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv05_CREATE_STATIC_DTABLEX2(DTable, HUFv05_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+    size_t errorCode;
+
+    errorCode = HUFv05_readDTableX2 (DTable, cSrc, cSrcSize);
+    if (HUFv05_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    return HUFv05_decompress1X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+size_t HUFv05_decompress4X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U16* DTable)
+{
+    const BYTE* const istart = (const BYTE*) cSrc;
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* const oend = ostart + dstSize;
+    const void* const dtPtr = DTable;
+    const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr) +1;
+    const U32 dtLog = DTable[0];
+    size_t errorCode;
+
+    /* Init */
+    BITv05_DStream_t bitD1;
+    BITv05_DStream_t bitD2;
+    BITv05_DStream_t bitD3;
+    BITv05_DStream_t bitD4;
+    const size_t length1 = MEM_readLE16(istart);
+    const size_t length2 = MEM_readLE16(istart+2);
+    const size_t length3 = MEM_readLE16(istart+4);
+    size_t length4;
+    const BYTE* const istart1 = istart + 6;  /* jumpTable */
+    const BYTE* const istart2 = istart1 + length1;
+    const BYTE* const istart3 = istart2 + length2;
+    const BYTE* const istart4 = istart3 + length3;
+    const size_t segmentSize = (dstSize+3) / 4;
+    BYTE* const opStart2 = ostart + segmentSize;
+    BYTE* const opStart3 = opStart2 + segmentSize;
+    BYTE* const opStart4 = opStart3 + segmentSize;
+    BYTE* op1 = ostart;
+    BYTE* op2 = opStart2;
+    BYTE* op3 = opStart3;
+    BYTE* op4 = opStart4;
+    U32 endSignal;
+
+    /* Check */
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    length4 = cSrcSize - (length1 + length2 + length3 + 6);
+    if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+    errorCode = BITv05_initDStream(&bitD1, istart1, length1);
+    if (HUFv05_isError(errorCode)) return errorCode;
+    errorCode = BITv05_initDStream(&bitD2, istart2, length2);
+    if (HUFv05_isError(errorCode)) return errorCode;
+    errorCode = BITv05_initDStream(&bitD3, istart3, length3);
+    if (HUFv05_isError(errorCode)) return errorCode;
+    errorCode = BITv05_initDStream(&bitD4, istart4, length4);
+    if (HUFv05_isError(errorCode)) return errorCode;
+
+    /* 16-32 symbols per loop (4-8 symbols per stream) */
+    endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
+    for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) {
+        HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
+        HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
+        HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
+        HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
+        HUFv05_DECODE_SYMBOLX2_1(op1, &bitD1);
+        HUFv05_DECODE_SYMBOLX2_1(op2, &bitD2);
+        HUFv05_DECODE_SYMBOLX2_1(op3, &bitD3);
+        HUFv05_DECODE_SYMBOLX2_1(op4, &bitD4);
+        HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
+        HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
+        HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
+        HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
+        HUFv05_DECODE_SYMBOLX2_0(op1, &bitD1);
+        HUFv05_DECODE_SYMBOLX2_0(op2, &bitD2);
+        HUFv05_DECODE_SYMBOLX2_0(op3, &bitD3);
+        HUFv05_DECODE_SYMBOLX2_0(op4, &bitD4);
+        endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
+    }
+
+    /* check corruption */
+    if (op1 > opStart2) return ERROR(corruption_detected);
+    if (op2 > opStart3) return ERROR(corruption_detected);
+    if (op3 > opStart4) return ERROR(corruption_detected);
+    /* note : op4 supposed already verified within main loop */
+
+    /* finish bitStreams one by one */
+    HUFv05_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+    HUFv05_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+    HUFv05_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+    HUFv05_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+    /* check */
+    endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4);
+    if (!endSignal) return ERROR(corruption_detected);
+
+    /* decoded size */
+    return dstSize;
+}
+
+
+size_t HUFv05_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv05_CREATE_STATIC_DTABLEX2(DTable, HUFv05_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+    size_t errorCode;
+
+    errorCode = HUFv05_readDTableX2 (DTable, cSrc, cSrcSize);
+    if (HUFv05_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    return HUFv05_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/* *************************/
+/* double-symbols decoding */
+/* *************************/
+
+static void HUFv05_fillDTableX4Level2(HUFv05_DEltX4* DTable, U32 sizeLog, const U32 consumed,
+                           const U32* rankValOrigin, const int minWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
+                           U32 nbBitsBaseline, U16 baseSeq)
+{
+    HUFv05_DEltX4 DElt;
+    U32 rankVal[HUFv05_ABSOLUTEMAX_TABLELOG + 1];
+    U32 s;
+
+    /* get pre-calculated rankVal */
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1) {
+        U32 i, skipSize = rankVal[minWeight];
+        MEM_writeLE16(&(DElt.sequence), baseSeq);
+        DElt.nbBits   = (BYTE)(consumed);
+        DElt.length   = 1;
+        for (i = 0; i < skipSize; i++)
+            DTable[i] = DElt;
+    }
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++) {   /* note : sortedSymbols already skipped */
+        const U32 symbol = sortedSymbols[s].symbol;
+        const U32 weight = sortedSymbols[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 length = 1 << (sizeLog-nbBits);
+        const U32 start = rankVal[weight];
+        U32 i = start;
+        const U32 end = start + length;
+
+        MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+        DElt.nbBits = (BYTE)(nbBits + consumed);
+        DElt.length = 2;
+        do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
+
+        rankVal[weight] += length;
+    }
+}
+
+typedef U32 rankVal_t[HUFv05_ABSOLUTEMAX_TABLELOG][HUFv05_ABSOLUTEMAX_TABLELOG + 1];
+
+static void HUFv05_fillDTableX4(HUFv05_DEltX4* DTable, const U32 targetLog,
+                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32 nbBitsBaseline)
+{
+    U32 rankVal[HUFv05_ABSOLUTEMAX_TABLELOG + 1];
+    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits  = nbBitsBaseline - maxWeight;
+    U32 s;
+
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++) {
+        const U16 symbol = sortedList[s].symbol;
+        const U32 weight = sortedList[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 start = rankVal[weight];
+        const U32 length = 1 << (targetLog-nbBits);
+
+        if (targetLog-nbBits >= minBits) {   /* enough room for a second symbol */
+            U32 sortedRank;
+            int minWeight = nbBits + scaleLog;
+            if (minWeight < 1) minWeight = 1;
+            sortedRank = rankStart[minWeight];
+            HUFv05_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
+                           rankValOrigin[nbBits], minWeight,
+                           sortedList+sortedRank, sortedListSize-sortedRank,
+                           nbBitsBaseline, symbol);
+        } else {
+            U32 i;
+            const U32 end = start + length;
+            HUFv05_DEltX4 DElt;
+
+            MEM_writeLE16(&(DElt.sequence), symbol);
+            DElt.nbBits   = (BYTE)(nbBits);
+            DElt.length   = 1;
+            for (i = start; i < end; i++)
+                DTable[i] = DElt;
+        }
+        rankVal[weight] += length;
+    }
+}
+
+size_t HUFv05_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUFv05_MAX_SYMBOL_VALUE + 1];
+    sortedSymbol_t sortedSymbol[HUFv05_MAX_SYMBOL_VALUE + 1];
+    U32 rankStats[HUFv05_ABSOLUTEMAX_TABLELOG + 1] = { 0 };
+    U32 rankStart0[HUFv05_ABSOLUTEMAX_TABLELOG + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    rankVal_t rankVal;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    const U32 memLog = DTable[0];
+    size_t iSize;
+    void* dtPtr = DTable;
+    HUFv05_DEltX4* const dt = ((HUFv05_DEltX4*)dtPtr) + 1;
+
+    HUFv05_STATIC_ASSERT(sizeof(HUFv05_DEltX4) == sizeof(U32));   /* if compilation fails here, assertion is false */
+    if (memLog > HUFv05_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUFv05_readStats(weightList, HUFv05_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUFv05_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+
+    /* Get start index of each weight */
+    {
+        U32 w, nextRankStart = 0;
+        for (w=1; w<=maxW; w++) {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {
+        U32 s;
+        for (s=0; s<nbSymbols; s++) {
+            U32 w = weightList[s];
+            U32 r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {
+        const U32 minBits = tableLog+1 - maxW;
+        U32 nextRankVal = 0;
+        U32 w, consumed;
+        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
+        U32* rankVal0 = rankVal[0];
+        for (w=1; w<=maxW; w++) {
+            U32 current = nextRankVal;
+            nextRankVal += rankStats[w] << (w+rescale);
+            rankVal0[w] = current;
+        }
+        for (consumed = minBits; consumed <= memLog - minBits; consumed++) {
+            U32* rankValPtr = rankVal[consumed];
+            for (w = 1; w <= maxW; w++) {
+                rankValPtr[w] = rankVal0[w] >> consumed;
+    }   }   }
+
+    HUFv05_fillDTableX4(dt, memLog,
+                   sortedSymbol, sizeOfSort,
+                   rankStart0, rankVal, maxW,
+                   tableLog+1);
+
+    return iSize;
+}
+
+
+static U32 HUFv05_decodeSymbolX4(void* op, BITv05_DStream_t* DStream, const HUFv05_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BITv05_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 2);
+    BITv05_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
+
+static U32 HUFv05_decodeLastSymbolX4(void* op, BITv05_DStream_t* DStream, const HUFv05_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BITv05_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 1);
+    if (dt[val].length==1) BITv05_skipBits(DStream, dt[val].nbBits);
+    else {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
+            BITv05_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+    }   }
+    return 1;
+}
+
+
+#define HUFv05_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
+    ptr += HUFv05_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUFv05_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUFv05_MAX_TABLELOG<=12)) \
+        ptr += HUFv05_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUFv05_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        ptr += HUFv05_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+static inline size_t HUFv05_decodeStreamX4(BYTE* p, BITv05_DStream_t* bitDPtr, BYTE* const pEnd, const HUFv05_DEltX4* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 8 symbols at a time */
+    while ((BITv05_reloadDStream(bitDPtr) == BITv05_DStream_unfinished) && (p < pEnd-7)) {
+        HUFv05_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX4_1(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX4_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BITv05_reloadDStream(bitDPtr) == BITv05_DStream_unfinished) && (p <= pEnd-2))
+        HUFv05_DECODE_SYMBOLX4_0(p, bitDPtr);
+
+    while (p <= pEnd-2)
+        HUFv05_DECODE_SYMBOLX4_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    if (p < pEnd)
+        p += HUFv05_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
+
+    return p-pStart;
+}
+
+
+size_t HUFv05_decompress1X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    const BYTE* const istart = (const BYTE*) cSrc;
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* const oend = ostart + dstSize;
+
+    const U32 dtLog = DTable[0];
+    const void* const dtPtr = DTable;
+    const HUFv05_DEltX4* const dt = ((const HUFv05_DEltX4*)dtPtr) +1;
+    size_t errorCode;
+
+    /* Init */
+    BITv05_DStream_t bitD;
+    errorCode = BITv05_initDStream(&bitD, istart, cSrcSize);
+    if (HUFv05_isError(errorCode)) return errorCode;
+
+    /* finish bitStreams one by one */
+    HUFv05_decodeStreamX4(ostart, &bitD, oend,     dt, dtLog);
+
+    /* check */
+    if (!BITv05_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    /* decoded size */
+    return dstSize;
+}
+
+size_t HUFv05_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv05_CREATE_STATIC_DTABLEX4(DTable, HUFv05_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUFv05_readDTableX4 (DTable, cSrc, cSrcSize);
+    if (HUFv05_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUFv05_decompress1X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+size_t HUFv05_decompress4X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable;
+        const HUFv05_DEltX4* const dt = ((const HUFv05_DEltX4*)dtPtr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BITv05_DStream_t bitD1;
+        BITv05_DStream_t bitD2;
+        BITv05_DStream_t bitD3;
+        BITv05_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BITv05_initDStream(&bitD1, istart1, length1);
+        if (HUFv05_isError(errorCode)) return errorCode;
+        errorCode = BITv05_initDStream(&bitD2, istart2, length2);
+        if (HUFv05_isError(errorCode)) return errorCode;
+        errorCode = BITv05_initDStream(&bitD3, istart3, length3);
+        if (HUFv05_isError(errorCode)) return errorCode;
+        errorCode = BITv05_initDStream(&bitD4, istart4, length4);
+        if (HUFv05_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
+        for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) {
+            HUFv05_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUFv05_DECODE_SYMBOLX4_1(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX4_1(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX4_1(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX4_1(op4, &bitD4);
+            HUFv05_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUFv05_DECODE_SYMBOLX4_0(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX4_0(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX4_0(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX4_0(op4, &bitD4);
+
+            endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUFv05_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
+        HUFv05_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
+        HUFv05_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
+        HUFv05_decodeStreamX4(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+size_t HUFv05_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv05_CREATE_STATIC_DTABLEX4(DTable, HUFv05_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUFv05_readDTableX4 (DTable, cSrc, cSrcSize);
+    if (HUFv05_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUFv05_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/* ********************************/
+/* Generic decompression selector */
+/* ********************************/
+
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+{
+    /* single, double, quad */
+    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
+    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
+    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
+    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
+    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
+    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
+    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
+    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
+    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
+    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
+    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
+    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+};
+
+typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+
+size_t HUFv05_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    static const decompressionAlgo decompress[3] = { HUFv05_decompress4X2, HUFv05_decompress4X4, NULL };
+    /* estimate decompression time */
+    U32 Q;
+    const U32 D256 = (U32)(dstSize >> 8);
+    U32 Dtime[3];
+    U32 algoNb = 0;
+    int n;
+
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize >= dstSize) return ERROR(corruption_detected);   /* invalid, or not compressed, but not compressed already dealt with */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    /* decoder timing evaluation */
+    Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */
+    for (n=0; n<3; n++)
+        Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256);
+
+    Dtime[1] += Dtime[1] >> 4; Dtime[2] += Dtime[2] >> 3; /* advantage to algorithms using less memory, for cache eviction */
+
+    if (Dtime[1] < Dtime[0]) algoNb = 1;
+
+    return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
+
+    //return HUFv05_decompress4X2(dst, dstSize, cSrc, cSrcSize);   /* multi-streams single-symbol decoding */
+    //return HUFv05_decompress4X4(dst, dstSize, cSrc, cSrcSize);   /* multi-streams double-symbols decoding */
+    //return HUFv05_decompress4X6(dst, dstSize, cSrc, cSrcSize);   /* multi-streams quad-symbols decoding */
+}
+/*
+    zstd - standard compression library
+    Copyright (C) 2014-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+*/
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+ * HEAPMODE :
+ * Select how default decompression function ZSTDv05_decompress() will allocate memory,
+ * in memory stack (0), or in memory heap (1, requires malloc())
+ */
+#ifndef ZSTDv05_HEAPMODE
+#  define ZSTDv05_HEAPMODE 1
+#endif
+
+
+/*-*******************************************************
+*  Dependencies
+*********************************************************/
+#include <stdlib.h>      /* calloc */
+#include <string.h>      /* memcpy, memmove */
+#include <stdio.h>       /* debug only : printf */
+
+
+/*-*******************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#endif
+
+
+/*-*************************************
+*  Local types
+***************************************/
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+
+/* *******************************************************
+*  Memory operations
+**********************************************************/
+static void ZSTDv05_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+
+/* *************************************
+*  Error Management
+***************************************/
+/*! ZSTDv05_isError() :
+*   tells if a return value is an error code */
+unsigned ZSTDv05_isError(size_t code) { return ERR_isError(code); }
+
+
+/*! ZSTDv05_getErrorName() :
+*   provides error code string (useful for debugging) */
+const char* ZSTDv05_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/* *************************************************************
+*   Context management
+***************************************************************/
+typedef enum { ZSTDv05ds_getFrameHeaderSize, ZSTDv05ds_decodeFrameHeader,
+               ZSTDv05ds_decodeBlockHeader, ZSTDv05ds_decompressBlock } ZSTDv05_dStage;
+
+struct ZSTDv05_DCtx_s
+{
+    FSEv05_DTable LLTable[FSEv05_DTABLE_SIZE_U32(LLFSEv05Log)];
+    FSEv05_DTable OffTable[FSEv05_DTABLE_SIZE_U32(OffFSEv05Log)];
+    FSEv05_DTable MLTable[FSEv05_DTABLE_SIZE_U32(MLFSEv05Log)];
+    unsigned   hufTableX4[HUFv05_DTABLE_SIZE(HufLog)];
+    const void* previousDstEnd;
+    const void* base;
+    const void* vBase;
+    const void* dictEnd;
+    size_t expected;
+    size_t headerSize;
+    ZSTDv05_parameters params;
+    blockType_t bType;   /* used in ZSTDv05_decompressContinue(), to transfer blockType between header decoding and block decoding stages */
+    ZSTDv05_dStage stage;
+    U32 flagStaticTables;
+    const BYTE* litPtr;
+    size_t litSize;
+    BYTE litBuffer[BLOCKSIZE + WILDCOPY_OVERLENGTH];
+    BYTE headerBuffer[ZSTDv05_frameHeaderSize_max];
+};  /* typedef'd to ZSTDv05_DCtx within "zstd_static.h" */
+
+size_t ZSTDv05_sizeofDCtx (void) { return sizeof(ZSTDv05_DCtx); }
+
+size_t ZSTDv05_decompressBegin(ZSTDv05_DCtx* dctx)
+{
+    dctx->expected = ZSTDv05_frameHeaderSize_min;
+    dctx->stage = ZSTDv05ds_getFrameHeaderSize;
+    dctx->previousDstEnd = NULL;
+    dctx->base = NULL;
+    dctx->vBase = NULL;
+    dctx->dictEnd = NULL;
+    dctx->hufTableX4[0] = HufLog;
+    dctx->flagStaticTables = 0;
+    return 0;
+}
+
+ZSTDv05_DCtx* ZSTDv05_createDCtx(void)
+{
+    ZSTDv05_DCtx* dctx = (ZSTDv05_DCtx*)malloc(sizeof(ZSTDv05_DCtx));
+    if (dctx==NULL) return NULL;
+    ZSTDv05_decompressBegin(dctx);
+    return dctx;
+}
+
+size_t ZSTDv05_freeDCtx(ZSTDv05_DCtx* dctx)
+{
+    free(dctx);
+    return 0;   /* reserved as a potential error code in the future */
+}
+
+void ZSTDv05_copyDCtx(ZSTDv05_DCtx* dstDCtx, const ZSTDv05_DCtx* srcDCtx)
+{
+    memcpy(dstDCtx, srcDCtx,
+           sizeof(ZSTDv05_DCtx) - (BLOCKSIZE+WILDCOPY_OVERLENGTH + ZSTDv05_frameHeaderSize_max));  /* no need to copy workspace */
+}
+
+
+/* *************************************************************
+*   Decompression section
+***************************************************************/
+
+/* Frame format description
+   Frame Header -  [ Block Header - Block ] - Frame End
+   1) Frame Header
+      - 4 bytes - Magic Number : ZSTDv05_MAGICNUMBER (defined within zstd_internal.h)
+      - 1 byte  - Window Descriptor
+   2) Block Header
+      - 3 bytes, starting with a 2-bits descriptor
+                 Uncompressed, Compressed, Frame End, unused
+   3) Block
+      See Block Format Description
+   4) Frame End
+      - 3 bytes, compatible with Block Header
+*/
+
+/* Block format description
+
+   Block = Literal Section - Sequences Section
+   Prerequisite : size of (compressed) block, maximum size of regenerated data
+
+   1) Literal Section
+
+   1.1) Header : 1-5 bytes
+        flags: 2 bits
+            00 compressed by Huff0
+            01 unused
+            10 is Raw (uncompressed)
+            11 is Rle
+            Note : using 01 => Huff0 with precomputed table ?
+            Note : delta map ? => compressed ?
+
+   1.1.1) Huff0-compressed literal block : 3-5 bytes
+            srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream
+            srcSize < 1 KB => 3 bytes (2-2-10-10)
+            srcSize < 16KB => 4 bytes (2-2-14-14)
+            else           => 5 bytes (2-2-18-18)
+            big endian convention
+
+   1.1.2) Raw (uncompressed) literal block header : 1-3 bytes
+        size :  5 bits: (IS_RAW<<6) + (0<<4) + size
+               12 bits: (IS_RAW<<6) + (2<<4) + (size>>8)
+                        size&255
+               20 bits: (IS_RAW<<6) + (3<<4) + (size>>16)
+                        size>>8&255
+                        size&255
+
+   1.1.3) Rle (repeated single byte) literal block header : 1-3 bytes
+        size :  5 bits: (IS_RLE<<6) + (0<<4) + size
+               12 bits: (IS_RLE<<6) + (2<<4) + (size>>8)
+                        size&255
+               20 bits: (IS_RLE<<6) + (3<<4) + (size>>16)
+                        size>>8&255
+                        size&255
+
+   1.1.4) Huff0-compressed literal block, using precomputed CTables : 3-5 bytes
+            srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream
+            srcSize < 1 KB => 3 bytes (2-2-10-10)
+            srcSize < 16KB => 4 bytes (2-2-14-14)
+            else           => 5 bytes (2-2-18-18)
+            big endian convention
+
+        1- CTable available (stored into workspace ?)
+        2- Small input (fast heuristic ? Full comparison ? depend on clevel ?)
+
+
+   1.2) Literal block content
+
+   1.2.1) Huff0 block, using sizes from header
+        See Huff0 format
+
+   1.2.2) Huff0 block, using prepared table
+
+   1.2.3) Raw content
+
+   1.2.4) single byte
+
+
+   2) Sequences section
+      TO DO
+*/
+
+
+/** ZSTDv05_decodeFrameHeader_Part1() :
+*   decode the 1st part of the Frame Header, which tells Frame Header size.
+*   srcSize must be == ZSTDv05_frameHeaderSize_min.
+*   @return : the full size of the Frame Header */
+static size_t ZSTDv05_decodeFrameHeader_Part1(ZSTDv05_DCtx* zc, const void* src, size_t srcSize)
+{
+    U32 magicNumber;
+    if (srcSize != ZSTDv05_frameHeaderSize_min)
+        return ERROR(srcSize_wrong);
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTDv05_MAGICNUMBER) return ERROR(prefix_unknown);
+    zc->headerSize = ZSTDv05_frameHeaderSize_min;
+    return zc->headerSize;
+}
+
+
+size_t ZSTDv05_getFrameParams(ZSTDv05_parameters* params, const void* src, size_t srcSize)
+{
+    U32 magicNumber;
+    if (srcSize < ZSTDv05_frameHeaderSize_min) return ZSTDv05_frameHeaderSize_max;
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTDv05_MAGICNUMBER) return ERROR(prefix_unknown);
+    memset(params, 0, sizeof(*params));
+    params->windowLog = (((const BYTE*)src)[4] & 15) + ZSTDv05_WINDOWLOG_ABSOLUTEMIN;
+    if ((((const BYTE*)src)[4] >> 4) != 0) return ERROR(frameParameter_unsupported);   /* reserved bits */
+    return 0;
+}
+
+/** ZSTDv05_decodeFrameHeader_Part2() :
+*   decode the full Frame Header.
+*   srcSize must be the size provided by ZSTDv05_decodeFrameHeader_Part1().
+*   @return : 0, or an error code, which can be tested using ZSTDv05_isError() */
+static size_t ZSTDv05_decodeFrameHeader_Part2(ZSTDv05_DCtx* zc, const void* src, size_t srcSize)
+{
+    size_t result;
+    if (srcSize != zc->headerSize)
+        return ERROR(srcSize_wrong);
+    result = ZSTDv05_getFrameParams(&(zc->params), src, srcSize);
+    if ((MEM_32bits()) && (zc->params.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bits);
+    return result;
+}
+
+
+size_t ZSTDv05_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = (const BYTE* const)src;
+    BYTE headerFlags;
+    U32 cSize;
+
+    if (srcSize < 3)
+        return ERROR(srcSize_wrong);
+
+    headerFlags = *in;
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+
+    bpPtr->blockType = (blockType_t)(headerFlags >> 6);
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+
+static size_t ZSTDv05_copyRawBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+/*! ZSTDv05_decodeLiteralsBlock() :
+    @return : nb of bytes read from src (< srcSize ) */
+size_t ZSTDv05_decodeLiteralsBlock(ZSTDv05_DCtx* dctx,
+                          const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
+{
+    const BYTE* const istart = (const BYTE*) src;
+
+    /* any compressed block with literals segment must be at least this size */
+    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
+
+    switch(istart[0]>> 6)
+    {
+    case IS_HUFv05:
+        {
+            size_t litSize, litCSize, singleStream=0;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            if (srcSize < 5) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
+            switch(lhSize)
+            {
+            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
+                /* 2 - 2 - 10 - 10 */
+                lhSize=3;
+                singleStream = istart[0] & 16;
+                litSize  = ((istart[0] & 15) << 6) + (istart[1] >> 2);
+                litCSize = ((istart[1] &  3) << 8) + istart[2];
+                break;
+            case 2:
+                /* 2 - 2 - 14 - 14 */
+                lhSize=4;
+                litSize  = ((istart[0] & 15) << 10) + (istart[1] << 2) + (istart[2] >> 6);
+                litCSize = ((istart[2] & 63) <<  8) + istart[3];
+                break;
+            case 3:
+                /* 2 - 2 - 18 - 18 */
+                lhSize=5;
+                litSize  = ((istart[0] & 15) << 14) + (istart[1] << 6) + (istart[2] >> 2);
+                litCSize = ((istart[2] &  3) << 16) + (istart[3] << 8) + istart[4];
+                break;
+            }
+            if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
+            if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
+
+            if (HUFv05_isError(singleStream ?
+                            HUFv05_decompress1X2(dctx->litBuffer, litSize, istart+lhSize, litCSize) :
+                            HUFv05_decompress   (dctx->litBuffer, litSize, istart+lhSize, litCSize) ))
+                return ERROR(corruption_detected);
+
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litSize = litSize;
+            memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+            return litCSize + lhSize;
+        }
+    case IS_PCH:
+        {
+            size_t errorCode;
+            size_t litSize, litCSize;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            if (lhSize != 1)  /* only case supported for now : small litSize, single stream */
+                return ERROR(corruption_detected);
+            if (!dctx->flagStaticTables)
+                return ERROR(dictionary_corrupted);
+
+            /* 2 - 2 - 10 - 10 */
+            lhSize=3;
+            litSize  = ((istart[0] & 15) << 6) + (istart[1] >> 2);
+            litCSize = ((istart[1] &  3) << 8) + istart[2];
+            if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
+
+            errorCode = HUFv05_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTableX4);
+            if (HUFv05_isError(errorCode)) return ERROR(corruption_detected);
+
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litSize = litSize;
+            memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+            return litCSize + lhSize;
+        }
+    case IS_RAW:
+        {
+            size_t litSize;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            switch(lhSize)
+            {
+            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
+                lhSize=1;
+                litSize = istart[0] & 31;
+                break;
+            case 2:
+                litSize = ((istart[0] & 15) << 8) + istart[1];
+                break;
+            case 3:
+                litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2];
+                break;
+            }
+
+            if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
+                if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
+                memcpy(dctx->litBuffer, istart+lhSize, litSize);
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litSize = litSize;
+                memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+                return lhSize+litSize;
+            }
+            /* direct reference into compressed stream */
+            dctx->litPtr = istart+lhSize;
+            dctx->litSize = litSize;
+            return lhSize+litSize;
+        }
+    case IS_RLE:
+        {
+            size_t litSize;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            switch(lhSize)
+            {
+            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
+                lhSize = 1;
+                litSize = istart[0] & 31;
+                break;
+            case 2:
+                litSize = ((istart[0] & 15) << 8) + istart[1];
+                break;
+            case 3:
+                litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2];
+                if (srcSize<4) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
+                break;
+            }
+            if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
+            memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litSize = litSize;
+            return lhSize+1;
+        }
+    default:
+        return ERROR(corruption_detected);   /* impossible */
+    }
+}
+
+
+size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
+                         FSEv05_DTable* DTableLL, FSEv05_DTable* DTableML, FSEv05_DTable* DTableOffb,
+                         const void* src, size_t srcSize, U32 flagStaticTable)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    const BYTE* const iend = istart + srcSize;
+    U32 LLtype, Offtype, MLtype;
+    U32 LLlog, Offlog, MLlog;
+    size_t dumpsLength;
+
+    /* check */
+    if (srcSize < MIN_SEQUENCES_SIZE)
+        return ERROR(srcSize_wrong);
+
+    /* SeqHead */
+    *nbSeq = *ip++;
+    if (*nbSeq==0) return 1;
+    if (*nbSeq >= 128) {
+        if (ip >= iend) return ERROR(srcSize_wrong);
+        *nbSeq = ((nbSeq[0]-128)<<8) + *ip++;
+    }
+
+    if (ip >= iend) return ERROR(srcSize_wrong);
+    LLtype  = *ip >> 6;
+    Offtype = (*ip >> 4) & 3;
+    MLtype  = (*ip >> 2) & 3;
+    if (*ip & 2) {
+        if (ip+3 > iend) return ERROR(srcSize_wrong);
+        dumpsLength  = ip[2];
+        dumpsLength += ip[1] << 8;
+        ip += 3;
+    } else {
+        if (ip+2 > iend) return ERROR(srcSize_wrong);
+        dumpsLength  = ip[1];
+        dumpsLength += (ip[0] & 1) << 8;
+        ip += 2;
+    }
+    *dumpsPtr = ip;
+    ip += dumpsLength;
+    *dumpsLengthPtr = dumpsLength;
+
+    /* check */
+    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
+
+    /* sequences */
+    {
+        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL >= MaxOff */
+        size_t headerSize;
+
+        /* Build DTables */
+        switch(LLtype)
+        {
+        case FSEv05_ENCODING_RLE :
+            LLlog = 0;
+            FSEv05_buildDTable_rle(DTableLL, *ip++);
+            break;
+        case FSEv05_ENCODING_RAW :
+            LLlog = LLbits;
+            FSEv05_buildDTable_raw(DTableLL, LLbits);
+            break;
+        case FSEv05_ENCODING_STATIC:
+            if (!flagStaticTable) return ERROR(corruption_detected);
+            break;
+        case FSEv05_ENCODING_DYNAMIC :
+        default :   /* impossible */
+            {   U32 max = MaxLL;
+                headerSize = FSEv05_readNCount(norm, &max, &LLlog, ip, iend-ip);
+                if (FSEv05_isError(headerSize)) return ERROR(GENERIC);
+                if (LLlog > LLFSEv05Log) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSEv05_buildDTable(DTableLL, norm, max, LLlog);
+        }   }
+
+        switch(Offtype)
+        {
+        case FSEv05_ENCODING_RLE :
+            Offlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong);   /* min : "raw", hence no header, but at least xxLog bits */
+            FSEv05_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */
+            break;
+        case FSEv05_ENCODING_RAW :
+            Offlog = Offbits;
+            FSEv05_buildDTable_raw(DTableOffb, Offbits);
+            break;
+        case FSEv05_ENCODING_STATIC:
+            if (!flagStaticTable) return ERROR(corruption_detected);
+            break;
+        case FSEv05_ENCODING_DYNAMIC :
+        default :   /* impossible */
+            {   U32 max = MaxOff;
+                headerSize = FSEv05_readNCount(norm, &max, &Offlog, ip, iend-ip);
+                if (FSEv05_isError(headerSize)) return ERROR(GENERIC);
+                if (Offlog > OffFSEv05Log) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSEv05_buildDTable(DTableOffb, norm, max, Offlog);
+        }   }
+
+        switch(MLtype)
+        {
+        case FSEv05_ENCODING_RLE :
+            MLlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
+            FSEv05_buildDTable_rle(DTableML, *ip++);
+            break;
+        case FSEv05_ENCODING_RAW :
+            MLlog = MLbits;
+            FSEv05_buildDTable_raw(DTableML, MLbits);
+            break;
+        case FSEv05_ENCODING_STATIC:
+            if (!flagStaticTable) return ERROR(corruption_detected);
+            break;
+        case FSEv05_ENCODING_DYNAMIC :
+        default :   /* impossible */
+            {   U32 max = MaxML;
+                headerSize = FSEv05_readNCount(norm, &max, &MLlog, ip, iend-ip);
+                if (FSEv05_isError(headerSize)) return ERROR(GENERIC);
+                if (MLlog > MLFSEv05Log) return ERROR(corruption_detected);
+                ip += headerSize;
+                FSEv05_buildDTable(DTableML, norm, max, MLlog);
+    }   }   }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t matchLength;
+    size_t offset;
+} seq_t;
+
+typedef struct {
+    BITv05_DStream_t DStream;
+    FSEv05_DState_t stateLL;
+    FSEv05_DState_t stateOffb;
+    FSEv05_DState_t stateML;
+    size_t prevOffset;
+    const BYTE* dumps;
+    const BYTE* dumpsEnd;
+} seqState_t;
+
+
+
+static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
+{
+    size_t litLength;
+    size_t prevOffset;
+    size_t offset;
+    size_t matchLength;
+    const BYTE* dumps = seqState->dumps;
+    const BYTE* const de = seqState->dumpsEnd;
+
+    /* Literal length */
+    litLength = FSEv05_peakSymbol(&(seqState->stateLL));
+    prevOffset = litLength ? seq->offset : seqState->prevOffset;
+    if (litLength == MaxLL) {
+        U32 add = *dumps++;
+        if (add < 255) litLength += add;
+        else {
+            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no risk : dumps is always followed by seq tables > 1 byte */
+            if (litLength&1) litLength>>=1, dumps += 3;
+            else litLength = (U16)(litLength)>>1, dumps += 2;
+        }
+        if (dumps > de) { litLength = MaxLL+255; }  /* late correction, to avoid using uninitialized memory */
+        if (dumps >= de) { dumps = de-1; }  /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+
+    /* Offset */
+    {
+        static const U32 offsetPrefix[MaxOff+1] = {
+                1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
+                512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
+                524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
+        U32 offsetCode = FSEv05_peakSymbol(&(seqState->stateOffb));   /* <= maxOff, by table construction */
+        U32 nbBits = offsetCode - 1;
+        if (offsetCode==0) nbBits = 0;   /* cmove */
+        offset = offsetPrefix[offsetCode] + BITv05_readBits(&(seqState->DStream), nbBits);
+        if (MEM_32bits()) BITv05_reloadDStream(&(seqState->DStream));
+        if (offsetCode==0) offset = prevOffset;   /* repcode, cmove */
+        if (offsetCode | !litLength) seqState->prevOffset = seq->offset;   /* cmove */
+        FSEv05_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));    /* update */
+    }
+
+    /* Literal length update */
+    FSEv05_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));   /* update */
+    if (MEM_32bits()) BITv05_reloadDStream(&(seqState->DStream));
+
+    /* MatchLength */
+    matchLength = FSEv05_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
+    if (matchLength == MaxML) {
+        U32 add = *dumps++;
+        if (add < 255) matchLength += add;
+        else {
+            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            if (matchLength&1) matchLength>>=1, dumps += 3;
+            else matchLength = (U16)(matchLength)>>1, dumps += 2;
+        }
+        if (dumps > de) { matchLength = MaxML+255; }  /* late correction, to avoid using uninitialized memory */
+        if (dumps >= de) { dumps = de-1; }  /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+    matchLength += MINMATCH;
+
+    /* save result */
+    seq->litLength = litLength;
+    seq->offset = offset;
+    seq->matchLength = matchLength;
+    seqState->dumps = dumps;
+
+#if 0   /* debug */
+    {
+        static U64 totalDecoded = 0;
+        printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n",
+           (U32)(totalDecoded), (U32)litLength, (U32)matchLength, (U32)offset);
+        totalDecoded += litLength + matchLength;
+    }
+#endif
+}
+
+
+static size_t ZSTDv05_execSequence(BYTE* op,
+                                BYTE* const oend, seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit,
+                                const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
+{
+    static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
+    static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* substracted */
+    BYTE* const oLitEnd = op + sequence.litLength;
+    const size_t sequenceLength = sequence.litLength + sequence.matchLength;
+    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_8 = oend-8;
+    const BYTE* const litEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+
+    /* check */
+    if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */
+    if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
+    if (litEnd > litLimit) return ERROR(corruption_detected);   /* risk read beyond lit buffer */
+
+    /* copy Literals */
+    ZSTDv05_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
+    op = oLitEnd;
+    *litPtr = litEnd;   /* update for next sequence */
+
+    /* copy Match */
+    if (sequence.offset > (size_t)(oLitEnd - base)) {
+        /* offset beyond prefix */
+        if (sequence.offset > (size_t)(oLitEnd - vBase))
+            return ERROR(corruption_detected);
+        match = dictEnd - (base-match);
+        if (match + sequence.matchLength <= dictEnd) {
+            memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {
+            size_t length1 = dictEnd - match;
+            memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = base;
+            if (op > oend_8 || sequence.matchLength < MINMATCH) {
+              while (op < oMatchEnd) *op++ = *match++;
+              return sequenceLength;
+            }
+    }   }
+    /* Requirement: op <= oend_8 */
+
+    /* match within prefix */
+    if (sequence.offset < 8) {
+        /* close range match, overlap */
+        const int sub2 = dec64table[sequence.offset];
+        op[0] = match[0];
+        op[1] = match[1];
+        op[2] = match[2];
+        op[3] = match[3];
+        match += dec32table[sequence.offset];
+        ZSTDv05_copy4(op+4, match);
+        match -= sub2;
+    } else {
+        ZSTDv05_copy8(op, match);
+    }
+    op += 8; match += 8;
+
+    if (oMatchEnd > oend-(16-MINMATCH)) {
+        if (op < oend_8) {
+            ZSTDv05_wildcopy(op, match, oend_8 - op);
+            match += oend_8 - op;
+            op = oend_8;
+        }
+        while (op < oMatchEnd)
+            *op++ = *match++;
+    } else {
+        ZSTDv05_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
+    }
+    return sequenceLength;
+}
+
+
+static size_t ZSTDv05_decompressSequences(
+                               ZSTDv05_DCtx* dctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize)
+{
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t errorCode, dumpsLength;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    int nbSeq;
+    const BYTE* dumps;
+    U32* DTableLL = dctx->LLTable;
+    U32* DTableML = dctx->MLTable;
+    U32* DTableOffb = dctx->OffTable;
+    const BYTE* const base = (const BYTE*) (dctx->base);
+    const BYTE* const vBase = (const BYTE*) (dctx->vBase);
+    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+
+    /* Build Decoding Tables */
+    errorCode = ZSTDv05_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
+                                      DTableLL, DTableML, DTableOffb,
+                                      ip, seqSize, dctx->flagStaticTables);
+    if (ZSTDv05_isError(errorCode)) return errorCode;
+    ip += errorCode;
+
+    /* Regen sequences */
+    if (nbSeq) {
+        seq_t sequence;
+        seqState_t seqState;
+
+        memset(&sequence, 0, sizeof(sequence));
+        sequence.offset = REPCODE_STARTVALUE;
+        seqState.dumps = dumps;
+        seqState.dumpsEnd = dumps + dumpsLength;
+        seqState.prevOffset = REPCODE_STARTVALUE;
+        errorCode = BITv05_initDStream(&(seqState.DStream), ip, iend-ip);
+        if (ERR_isError(errorCode)) return ERROR(corruption_detected);
+        FSEv05_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
+        FSEv05_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
+        FSEv05_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
+
+        for ( ; (BITv05_reloadDStream(&(seqState.DStream)) <= BITv05_DStream_completed) && nbSeq ; ) {
+            size_t oneSeqSize;
+            nbSeq--;
+            ZSTDv05_decodeSequence(&sequence, &seqState);
+            oneSeqSize = ZSTDv05_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd);
+            if (ZSTDv05_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* check if reached exact end */
+        if (nbSeq) return ERROR(corruption_detected);
+    }
+
+    /* last literal segment */
+    {
+        size_t lastLLSize = litEnd - litPtr;
+        if (litPtr > litEnd) return ERROR(corruption_detected);   /* too many literals already used */
+        if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
+        memcpy(op, litPtr, lastLLSize);
+        op += lastLLSize;
+    }
+
+    return op-ostart;
+}
+
+
+static void ZSTDv05_checkContinuity(ZSTDv05_DCtx* dctx, const void* dst)
+{
+    if (dst != dctx->previousDstEnd) {   /* not contiguous */
+        dctx->dictEnd = dctx->previousDstEnd;
+        dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
+        dctx->base = dst;
+        dctx->previousDstEnd = dst;
+    }
+}
+
+
+static size_t ZSTDv05_decompressBlock_internal(ZSTDv05_DCtx* dctx,
+                            void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{   /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+    size_t litCSize;
+
+    if (srcSize >= BLOCKSIZE) return ERROR(srcSize_wrong);
+
+    /* Decode literals sub-block */
+    litCSize = ZSTDv05_decodeLiteralsBlock(dctx, src, srcSize);
+    if (ZSTDv05_isError(litCSize)) return litCSize;
+    ip += litCSize;
+    srcSize -= litCSize;
+
+    return ZSTDv05_decompressSequences(dctx, dst, dstCapacity, ip, srcSize);
+}
+
+
+size_t ZSTDv05_decompressBlock(ZSTDv05_DCtx* dctx,
+                            void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{
+    ZSTDv05_checkContinuity(dctx, dst);
+    return ZSTDv05_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize);
+}
+
+
+/*! ZSTDv05_decompress_continueDCtx
+*   dctx must have been properly initialized */
+static size_t ZSTDv05_decompress_continueDCtx(ZSTDv05_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                                 const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* iend = ip + srcSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t remainingSize = srcSize;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    {
+        size_t frameHeaderSize;
+        if (srcSize < ZSTDv05_frameHeaderSize_min+ZSTDv05_blockHeaderSize) return ERROR(srcSize_wrong);
+        frameHeaderSize = ZSTDv05_decodeFrameHeader_Part1(dctx, src, ZSTDv05_frameHeaderSize_min);
+        if (ZSTDv05_isError(frameHeaderSize)) return frameHeaderSize;
+        if (srcSize < frameHeaderSize+ZSTDv05_blockHeaderSize) return ERROR(srcSize_wrong);
+        ip += frameHeaderSize; remainingSize -= frameHeaderSize;
+        frameHeaderSize = ZSTDv05_decodeFrameHeader_Part2(dctx, src, frameHeaderSize);
+        if (ZSTDv05_isError(frameHeaderSize)) return frameHeaderSize;
+    }
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t decodedSize=0;
+        size_t cBlockSize = ZSTDv05_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTDv05_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTDv05_blockHeaderSize;
+        remainingSize -= ZSTDv05_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            decodedSize = ZSTDv05_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize);
+            break;
+        case bt_raw :
+            decodedSize = ZSTDv05_copyRawBlock(op, oend-op, ip, cBlockSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet supported */
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return ERROR(srcSize_wrong);
+            break;
+        default:
+            return ERROR(GENERIC);   /* impossible */
+        }
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        if (ZSTDv05_isError(decodedSize)) return decodedSize;
+        op += decodedSize;
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return op-ostart;
+}
+
+
+size_t ZSTDv05_decompress_usingPreparedDCtx(ZSTDv05_DCtx* dctx, const ZSTDv05_DCtx* refDCtx,
+                                         void* dst, size_t maxDstSize,
+                                   const void* src, size_t srcSize)
+{
+    ZSTDv05_copyDCtx(dctx, refDCtx);
+    ZSTDv05_checkContinuity(dctx, dst);
+    return ZSTDv05_decompress_continueDCtx(dctx, dst, maxDstSize, src, srcSize);
+}
+
+
+size_t ZSTDv05_decompress_usingDict(ZSTDv05_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                                 const void* src, size_t srcSize,
+                                 const void* dict, size_t dictSize)
+{
+    ZSTDv05_decompressBegin_usingDict(dctx, dict, dictSize);
+    ZSTDv05_checkContinuity(dctx, dst);
+    return ZSTDv05_decompress_continueDCtx(dctx, dst, maxDstSize, src, srcSize);
+}
+
+
+size_t ZSTDv05_decompressDCtx(ZSTDv05_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return ZSTDv05_decompress_usingDict(dctx, dst, maxDstSize, src, srcSize, NULL, 0);
+}
+
+size_t ZSTDv05_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+#if defined(ZSTDv05_HEAPMODE) && (ZSTDv05_HEAPMODE==1)
+    size_t regenSize;
+    ZSTDv05_DCtx* dctx = ZSTDv05_createDCtx();
+    if (dctx==NULL) return ERROR(memory_allocation);
+    regenSize = ZSTDv05_decompressDCtx(dctx, dst, maxDstSize, src, srcSize);
+    ZSTDv05_freeDCtx(dctx);
+    return regenSize;
+#else
+    ZSTDv05_DCtx dctx;
+    return ZSTDv05_decompressDCtx(&dctx, dst, maxDstSize, src, srcSize);
+#endif
+}
+
+size_t ZSTDv05_findFrameCompressedSize(const void *src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    size_t remainingSize = srcSize;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    if (srcSize < ZSTDv05_frameHeaderSize_min) return ERROR(srcSize_wrong);
+    if (MEM_readLE32(src) != ZSTDv05_MAGICNUMBER) return ERROR(prefix_unknown);
+    ip += ZSTDv05_frameHeaderSize_min; remainingSize -= ZSTDv05_frameHeaderSize_min;
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t cBlockSize = ZSTDv05_getcBlockSize(ip, remainingSize, &blockProperties);
+        if (ZSTDv05_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTDv05_blockHeaderSize;
+        remainingSize -= ZSTDv05_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return ip - (const BYTE*)src;
+}
+
+/* ******************************
+*  Streaming Decompression API
+********************************/
+size_t ZSTDv05_nextSrcSizeToDecompress(ZSTDv05_DCtx* dctx)
+{
+    return dctx->expected;
+}
+
+size_t ZSTDv05_decompressContinue(ZSTDv05_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    /* Sanity check */
+    if (srcSize != dctx->expected) return ERROR(srcSize_wrong);
+    ZSTDv05_checkContinuity(dctx, dst);
+
+    /* Decompress : frame header; part 1 */
+    switch (dctx->stage)
+    {
+    case ZSTDv05ds_getFrameHeaderSize :
+        /* get frame header size */
+        if (srcSize != ZSTDv05_frameHeaderSize_min) return ERROR(srcSize_wrong);   /* impossible */
+        dctx->headerSize = ZSTDv05_decodeFrameHeader_Part1(dctx, src, ZSTDv05_frameHeaderSize_min);
+        if (ZSTDv05_isError(dctx->headerSize)) return dctx->headerSize;
+        memcpy(dctx->headerBuffer, src, ZSTDv05_frameHeaderSize_min);
+        if (dctx->headerSize > ZSTDv05_frameHeaderSize_min) return ERROR(GENERIC); /* should never happen */
+        dctx->expected = 0;   /* not necessary to copy more */
+        /* fallthrough */
+    case ZSTDv05ds_decodeFrameHeader:
+        /* get frame header */
+        {   size_t const result = ZSTDv05_decodeFrameHeader_Part2(dctx, dctx->headerBuffer, dctx->headerSize);
+            if (ZSTDv05_isError(result)) return result;
+            dctx->expected = ZSTDv05_blockHeaderSize;
+            dctx->stage = ZSTDv05ds_decodeBlockHeader;
+            return 0;
+        }
+    case ZSTDv05ds_decodeBlockHeader:
+        {
+            /* Decode block header */
+            blockProperties_t bp;
+            size_t blockSize = ZSTDv05_getcBlockSize(src, ZSTDv05_blockHeaderSize, &bp);
+            if (ZSTDv05_isError(blockSize)) return blockSize;
+            if (bp.blockType == bt_end) {
+                dctx->expected = 0;
+                dctx->stage = ZSTDv05ds_getFrameHeaderSize;
+            }
+            else {
+                dctx->expected = blockSize;
+                dctx->bType = bp.blockType;
+                dctx->stage = ZSTDv05ds_decompressBlock;
+            }
+            return 0;
+        }
+    case ZSTDv05ds_decompressBlock:
+        {
+            /* Decompress : block content */
+            size_t rSize;
+            switch(dctx->bType)
+            {
+            case bt_compressed:
+                rSize = ZSTDv05_decompressBlock_internal(dctx, dst, maxDstSize, src, srcSize);
+                break;
+            case bt_raw :
+                rSize = ZSTDv05_copyRawBlock(dst, maxDstSize, src, srcSize);
+                break;
+            case bt_rle :
+                return ERROR(GENERIC);   /* not yet handled */
+                break;
+            case bt_end :   /* should never happen (filtered at phase 1) */
+                rSize = 0;
+                break;
+            default:
+                return ERROR(GENERIC);   /* impossible */
+            }
+            dctx->stage = ZSTDv05ds_decodeBlockHeader;
+            dctx->expected = ZSTDv05_blockHeaderSize;
+            dctx->previousDstEnd = (char*)dst + rSize;
+            return rSize;
+        }
+    default:
+        return ERROR(GENERIC);   /* impossible */
+    }
+}
+
+
+static void ZSTDv05_refDictContent(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    dctx->dictEnd = dctx->previousDstEnd;
+    dctx->vBase = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
+    dctx->base = dict;
+    dctx->previousDstEnd = (const char*)dict + dictSize;
+}
+
+static size_t ZSTDv05_loadEntropy(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    size_t hSize, offcodeHeaderSize, matchlengthHeaderSize, errorCode, litlengthHeaderSize;
+    short offcodeNCount[MaxOff+1];
+    U32 offcodeMaxValue=MaxOff, offcodeLog;
+    short matchlengthNCount[MaxML+1];
+    unsigned matchlengthMaxValue = MaxML, matchlengthLog;
+    short litlengthNCount[MaxLL+1];
+    unsigned litlengthMaxValue = MaxLL, litlengthLog;
+
+    hSize = HUFv05_readDTableX4(dctx->hufTableX4, dict, dictSize);
+    if (HUFv05_isError(hSize)) return ERROR(dictionary_corrupted);
+    dict = (const char*)dict + hSize;
+    dictSize -= hSize;
+
+    offcodeHeaderSize = FSEv05_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize);
+    if (FSEv05_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
+    if (offcodeLog > OffFSEv05Log) return ERROR(dictionary_corrupted);
+    errorCode = FSEv05_buildDTable(dctx->OffTable, offcodeNCount, offcodeMaxValue, offcodeLog);
+    if (FSEv05_isError(errorCode)) return ERROR(dictionary_corrupted);
+    dict = (const char*)dict + offcodeHeaderSize;
+    dictSize -= offcodeHeaderSize;
+
+    matchlengthHeaderSize = FSEv05_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize);
+    if (FSEv05_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
+    if (matchlengthLog > MLFSEv05Log) return ERROR(dictionary_corrupted);
+    errorCode = FSEv05_buildDTable(dctx->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog);
+    if (FSEv05_isError(errorCode)) return ERROR(dictionary_corrupted);
+    dict = (const char*)dict + matchlengthHeaderSize;
+    dictSize -= matchlengthHeaderSize;
+
+    litlengthHeaderSize = FSEv05_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize);
+    if (litlengthLog > LLFSEv05Log) return ERROR(dictionary_corrupted);
+    if (FSEv05_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
+    errorCode = FSEv05_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog);
+    if (FSEv05_isError(errorCode)) return ERROR(dictionary_corrupted);
+
+    dctx->flagStaticTables = 1;
+    return hSize + offcodeHeaderSize + matchlengthHeaderSize + litlengthHeaderSize;
+}
+
+static size_t ZSTDv05_decompress_insertDictionary(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    size_t eSize;
+    U32 magic = MEM_readLE32(dict);
+    if (magic != ZSTDv05_DICT_MAGIC) {
+        /* pure content mode */
+        ZSTDv05_refDictContent(dctx, dict, dictSize);
+        return 0;
+    }
+    /* load entropy tables */
+    dict = (const char*)dict + 4;
+    dictSize -= 4;
+    eSize = ZSTDv05_loadEntropy(dctx, dict, dictSize);
+    if (ZSTDv05_isError(eSize)) return ERROR(dictionary_corrupted);
+
+    /* reference dictionary content */
+    dict = (const char*)dict + eSize;
+    dictSize -= eSize;
+    ZSTDv05_refDictContent(dctx, dict, dictSize);
+
+    return 0;
+}
+
+
+size_t ZSTDv05_decompressBegin_usingDict(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    size_t errorCode;
+    errorCode = ZSTDv05_decompressBegin(dctx);
+    if (ZSTDv05_isError(errorCode)) return errorCode;
+
+    if (dict && dictSize) {
+        errorCode = ZSTDv05_decompress_insertDictionary(dctx, dict, dictSize);
+        if (ZSTDv05_isError(errorCode)) return ERROR(dictionary_corrupted);
+    }
+
+    return 0;
+}
+
+/*
+    Buffered version of Zstd compression library
+    Copyright (C) 2015-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* The objects defined into this file should be considered experimental.
+ * They are not labelled stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risk of future changes.
+ */
+
+
+
+/* *************************************
+*  Constants
+***************************************/
+static size_t ZBUFFv05_blockHeaderSize = 3;
+
+
+
+/* *** Compression *** */
+
+static size_t ZBUFFv05_limitCopy(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    size_t length = MIN(maxDstSize, srcSize);
+    memcpy(dst, src, length);
+    return length;
+}
+
+
+
+
+/** ************************************************
+*  Streaming decompression
+*
+*  A ZBUFFv05_DCtx object is required to track streaming operation.
+*  Use ZBUFFv05_createDCtx() and ZBUFFv05_freeDCtx() to create/release resources.
+*  Use ZBUFFv05_decompressInit() to start a new decompression operation.
+*  ZBUFFv05_DCtx objects can be reused multiple times.
+*
+*  Use ZBUFFv05_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *maxDstSizePtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input.
+*  The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst .
+*  return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
+*            or 0 when a frame is completely decoded
+*            or an error code, which can be tested using ZBUFFv05_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory)
+*  output : 128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded.
+*  input : just follow indications from ZBUFFv05_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* **************************************************/
+
+typedef enum { ZBUFFv05ds_init, ZBUFFv05ds_readHeader, ZBUFFv05ds_loadHeader, ZBUFFv05ds_decodeHeader,
+               ZBUFFv05ds_read, ZBUFFv05ds_load, ZBUFFv05ds_flush } ZBUFFv05_dStage;
+
+/* *** Resource management *** */
+
+#define ZSTDv05_frameHeaderSize_max 5   /* too magical, should come from reference */
+struct ZBUFFv05_DCtx_s {
+    ZSTDv05_DCtx* zc;
+    ZSTDv05_parameters params;
+    char* inBuff;
+    size_t inBuffSize;
+    size_t inPos;
+    char* outBuff;
+    size_t outBuffSize;
+    size_t outStart;
+    size_t outEnd;
+    size_t hPos;
+    ZBUFFv05_dStage stage;
+    unsigned char headerBuffer[ZSTDv05_frameHeaderSize_max];
+};   /* typedef'd to ZBUFFv05_DCtx within "zstd_buffered.h" */
+
+
+ZBUFFv05_DCtx* ZBUFFv05_createDCtx(void)
+{
+    ZBUFFv05_DCtx* zbc = (ZBUFFv05_DCtx*)malloc(sizeof(ZBUFFv05_DCtx));
+    if (zbc==NULL) return NULL;
+    memset(zbc, 0, sizeof(*zbc));
+    zbc->zc = ZSTDv05_createDCtx();
+    zbc->stage = ZBUFFv05ds_init;
+    return zbc;
+}
+
+size_t ZBUFFv05_freeDCtx(ZBUFFv05_DCtx* zbc)
+{
+    if (zbc==NULL) return 0;   /* support free on null */
+    ZSTDv05_freeDCtx(zbc->zc);
+    free(zbc->inBuff);
+    free(zbc->outBuff);
+    free(zbc);
+    return 0;
+}
+
+
+/* *** Initialization *** */
+
+size_t ZBUFFv05_decompressInitDictionary(ZBUFFv05_DCtx* zbc, const void* dict, size_t dictSize)
+{
+    zbc->stage = ZBUFFv05ds_readHeader;
+    zbc->hPos = zbc->inPos = zbc->outStart = zbc->outEnd = 0;
+    return ZSTDv05_decompressBegin_usingDict(zbc->zc, dict, dictSize);
+}
+
+size_t ZBUFFv05_decompressInit(ZBUFFv05_DCtx* zbc)
+{
+    return ZBUFFv05_decompressInitDictionary(zbc, NULL, 0);
+}
+
+
+/* *** Decompression *** */
+
+size_t ZBUFFv05_decompressContinue(ZBUFFv05_DCtx* zbc, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr)
+{
+    const char* const istart = (const char*)src;
+    const char* ip = istart;
+    const char* const iend = istart + *srcSizePtr;
+    char* const ostart = (char*)dst;
+    char* op = ostart;
+    char* const oend = ostart + *maxDstSizePtr;
+    U32 notDone = 1;
+
+    while (notDone) {
+        switch(zbc->stage)
+        {
+        case ZBUFFv05ds_init :
+            return ERROR(init_missing);
+
+        case ZBUFFv05ds_readHeader :
+            /* read header from src */
+            {
+                size_t headerSize = ZSTDv05_getFrameParams(&(zbc->params), src, *srcSizePtr);
+                if (ZSTDv05_isError(headerSize)) return headerSize;
+                if (headerSize) {
+                    /* not enough input to decode header : tell how many bytes would be necessary */
+                    memcpy(zbc->headerBuffer+zbc->hPos, src, *srcSizePtr);
+                    zbc->hPos += *srcSizePtr;
+                    *maxDstSizePtr = 0;
+                    zbc->stage = ZBUFFv05ds_loadHeader;
+                    return headerSize - zbc->hPos;
+                }
+                zbc->stage = ZBUFFv05ds_decodeHeader;
+                break;
+            }
+	    /* fall-through */
+        case ZBUFFv05ds_loadHeader:
+            /* complete header from src */
+            {
+                size_t headerSize = ZBUFFv05_limitCopy(
+                    zbc->headerBuffer + zbc->hPos, ZSTDv05_frameHeaderSize_max - zbc->hPos,
+                    src, *srcSizePtr);
+                zbc->hPos += headerSize;
+                ip += headerSize;
+                headerSize = ZSTDv05_getFrameParams(&(zbc->params), zbc->headerBuffer, zbc->hPos);
+                if (ZSTDv05_isError(headerSize)) return headerSize;
+                if (headerSize) {
+                    /* not enough input to decode header : tell how many bytes would be necessary */
+                    *maxDstSizePtr = 0;
+                    return headerSize - zbc->hPos;
+                }
+                // zbc->stage = ZBUFFv05ds_decodeHeader; break;   /* useless : stage follows */
+            }
+	    /* fall-through */
+        case ZBUFFv05ds_decodeHeader:
+                /* apply header to create / resize buffers */
+                {
+                    size_t neededOutSize = (size_t)1 << zbc->params.windowLog;
+                    size_t neededInSize = BLOCKSIZE;   /* a block is never > BLOCKSIZE */
+                    if (zbc->inBuffSize < neededInSize) {
+                        free(zbc->inBuff);
+                        zbc->inBuffSize = neededInSize;
+                        zbc->inBuff = (char*)malloc(neededInSize);
+                        if (zbc->inBuff == NULL) return ERROR(memory_allocation);
+                    }
+                    if (zbc->outBuffSize < neededOutSize) {
+                        free(zbc->outBuff);
+                        zbc->outBuffSize = neededOutSize;
+                        zbc->outBuff = (char*)malloc(neededOutSize);
+                        if (zbc->outBuff == NULL) return ERROR(memory_allocation);
+                }   }
+                if (zbc->hPos) {
+                    /* some data already loaded into headerBuffer : transfer into inBuff */
+                    memcpy(zbc->inBuff, zbc->headerBuffer, zbc->hPos);
+                    zbc->inPos = zbc->hPos;
+                    zbc->hPos = 0;
+                    zbc->stage = ZBUFFv05ds_load;
+                    break;
+                }
+                zbc->stage = ZBUFFv05ds_read;
+		/* fall-through */
+        case ZBUFFv05ds_read:
+            {
+                size_t neededInSize = ZSTDv05_nextSrcSizeToDecompress(zbc->zc);
+                if (neededInSize==0) {  /* end of frame */
+                    zbc->stage = ZBUFFv05ds_init;
+                    notDone = 0;
+                    break;
+                }
+                if ((size_t)(iend-ip) >= neededInSize) {
+                    /* directly decode from src */
+                    size_t decodedSize = ZSTDv05_decompressContinue(zbc->zc,
+                        zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart,
+                        ip, neededInSize);
+                    if (ZSTDv05_isError(decodedSize)) return decodedSize;
+                    ip += neededInSize;
+                    if (!decodedSize) break;   /* this was just a header */
+                    zbc->outEnd = zbc->outStart +  decodedSize;
+                    zbc->stage = ZBUFFv05ds_flush;
+                    break;
+                }
+                if (ip==iend) { notDone = 0; break; }   /* no more input */
+                zbc->stage = ZBUFFv05ds_load;
+            }
+	    /* fall-through */
+        case ZBUFFv05ds_load:
+            {
+                size_t neededInSize = ZSTDv05_nextSrcSizeToDecompress(zbc->zc);
+                size_t toLoad = neededInSize - zbc->inPos;   /* should always be <= remaining space within inBuff */
+                size_t loadedSize;
+                if (toLoad > zbc->inBuffSize - zbc->inPos) return ERROR(corruption_detected);   /* should never happen */
+                loadedSize = ZBUFFv05_limitCopy(zbc->inBuff + zbc->inPos, toLoad, ip, iend-ip);
+                ip += loadedSize;
+                zbc->inPos += loadedSize;
+                if (loadedSize < toLoad) { notDone = 0; break; }   /* not enough input, wait for more */
+                {
+                    size_t decodedSize = ZSTDv05_decompressContinue(zbc->zc,
+                        zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart,
+                        zbc->inBuff, neededInSize);
+                    if (ZSTDv05_isError(decodedSize)) return decodedSize;
+                    zbc->inPos = 0;   /* input is consumed */
+                    if (!decodedSize) { zbc->stage = ZBUFFv05ds_read; break; }   /* this was just a header */
+                    zbc->outEnd = zbc->outStart +  decodedSize;
+                    zbc->stage = ZBUFFv05ds_flush;
+                    // break; /* ZBUFFv05ds_flush follows */
+                }
+	    }
+	    /* fall-through */
+        case ZBUFFv05ds_flush:
+            {
+                size_t toFlushSize = zbc->outEnd - zbc->outStart;
+                size_t flushedSize = ZBUFFv05_limitCopy(op, oend-op, zbc->outBuff + zbc->outStart, toFlushSize);
+                op += flushedSize;
+                zbc->outStart += flushedSize;
+                if (flushedSize == toFlushSize) {
+                    zbc->stage = ZBUFFv05ds_read;
+                    if (zbc->outStart + BLOCKSIZE > zbc->outBuffSize)
+                        zbc->outStart = zbc->outEnd = 0;
+                    break;
+                }
+                /* cannot flush everything */
+                notDone = 0;
+                break;
+            }
+        default: return ERROR(GENERIC);   /* impossible */
+    }   }
+
+    *srcSizePtr = ip-istart;
+    *maxDstSizePtr = op-ostart;
+
+    {   size_t nextSrcSizeHint = ZSTDv05_nextSrcSizeToDecompress(zbc->zc);
+        if (nextSrcSizeHint > ZBUFFv05_blockHeaderSize) nextSrcSizeHint+= ZBUFFv05_blockHeaderSize;   /* get next block header too */
+        nextSrcSizeHint -= zbc->inPos;   /* already loaded*/
+        return nextSrcSizeHint;
+    }
+}
+
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+unsigned ZBUFFv05_isError(size_t errorCode) { return ERR_isError(errorCode); }
+const char* ZBUFFv05_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
+
+size_t ZBUFFv05_recommendedDInSize(void)  { return BLOCKSIZE + ZBUFFv05_blockHeaderSize /* block header size*/ ; }
+size_t ZBUFFv05_recommendedDOutSize(void) { return BLOCKSIZE; }
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v05.h b/contrib/libzstd/include/zstd/legacy/zstd_v05.h
new file mode 100644
index 00000000000..8ce662fd9fc
--- /dev/null
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v05.h
@@ -0,0 +1,156 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+#ifndef ZSTDv05_H
+#define ZSTDv05_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include <stddef.h>   /* size_t */
+#include "mem.h"      /* U64, U32 */
+
+
+/* *************************************
+*  Simple functions
+***************************************/
+/*! ZSTDv05_decompress() :
+    `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail.
+    `dstCapacity` must be large enough, equal or larger than originalSize.
+    @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+              or an errorCode if it fails (which can be tested using ZSTDv05_isError()) */
+size_t ZSTDv05_decompress( void* dst, size_t dstCapacity,
+                     const void* src, size_t compressedSize);
+
+/**
+ZSTDv05_getFrameSrcSize() : get the source length of a ZSTD frame
+    compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+    return : the number of bytes that would be read to decompress this frame
+             or an errorCode if it fails (which can be tested using ZSTDv05_isError())
+*/
+size_t ZSTDv05_findFrameCompressedSize(const void* src, size_t compressedSize);
+
+/* *************************************
+*  Helper functions
+***************************************/
+/* Error Management */
+unsigned    ZSTDv05_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+const char* ZSTDv05_getErrorName(size_t code);     /*!< provides readable string for an error code */
+
+
+/* *************************************
+*  Explicit memory management
+***************************************/
+/** Decompression context */
+typedef struct ZSTDv05_DCtx_s ZSTDv05_DCtx;
+ZSTDv05_DCtx* ZSTDv05_createDCtx(void);
+size_t ZSTDv05_freeDCtx(ZSTDv05_DCtx* dctx);      /*!< @return : errorCode */
+
+/** ZSTDv05_decompressDCtx() :
+*   Same as ZSTDv05_decompress(), but requires an already allocated ZSTDv05_DCtx (see ZSTDv05_createDCtx()) */
+size_t ZSTDv05_decompressDCtx(ZSTDv05_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*-***********************
+*  Simple Dictionary API
+*************************/
+/*! ZSTDv05_decompress_usingDict() :
+*   Decompression using a pre-defined Dictionary content (see dictBuilder).
+*   Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted.
+*   Note : dict can be NULL, in which case, it's equivalent to ZSTDv05_decompressDCtx() */
+size_t ZSTDv05_decompress_usingDict(ZSTDv05_DCtx* dctx,
+                                            void* dst, size_t dstCapacity,
+                                      const void* src, size_t srcSize,
+                                      const void* dict,size_t dictSize);
+
+/*-************************
+*  Advanced Streaming API
+***************************/
+typedef enum { ZSTDv05_fast, ZSTDv05_greedy, ZSTDv05_lazy, ZSTDv05_lazy2, ZSTDv05_btlazy2, ZSTDv05_opt, ZSTDv05_btopt } ZSTDv05_strategy;
+typedef struct {
+    U64 srcSize;
+    U32 windowLog;     /* the only useful information to retrieve */
+    U32 contentLog; U32 hashLog; U32 searchLog; U32 searchLength; U32 targetLength; ZSTDv05_strategy strategy;
+} ZSTDv05_parameters;
+size_t ZSTDv05_getFrameParams(ZSTDv05_parameters* params, const void* src, size_t srcSize);
+
+size_t ZSTDv05_decompressBegin_usingDict(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize);
+void   ZSTDv05_copyDCtx(ZSTDv05_DCtx* dstDCtx, const ZSTDv05_DCtx* srcDCtx);
+size_t ZSTDv05_nextSrcSizeToDecompress(ZSTDv05_DCtx* dctx);
+size_t ZSTDv05_decompressContinue(ZSTDv05_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*-***********************
+*  ZBUFF API
+*************************/
+typedef struct ZBUFFv05_DCtx_s ZBUFFv05_DCtx;
+ZBUFFv05_DCtx* ZBUFFv05_createDCtx(void);
+size_t         ZBUFFv05_freeDCtx(ZBUFFv05_DCtx* dctx);
+
+size_t ZBUFFv05_decompressInit(ZBUFFv05_DCtx* dctx);
+size_t ZBUFFv05_decompressInitDictionary(ZBUFFv05_DCtx* dctx, const void* dict, size_t dictSize);
+
+size_t ZBUFFv05_decompressContinue(ZBUFFv05_DCtx* dctx,
+                                            void* dst, size_t* dstCapacityPtr,
+                                      const void* src, size_t* srcSizePtr);
+
+/*-***************************************************************************
+*  Streaming decompression
+*
+*  A ZBUFFv05_DCtx object is required to track streaming operations.
+*  Use ZBUFFv05_createDCtx() and ZBUFFv05_freeDCtx() to create/release resources.
+*  Use ZBUFFv05_decompressInit() to start a new decompression operation,
+*   or ZBUFFv05_decompressInitDictionary() if decompression requires a dictionary.
+*  Note that ZBUFFv05_DCtx objects can be reused multiple times.
+*
+*  Use ZBUFFv05_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *dstCapacityPtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of @dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change @dst.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency)
+*            or 0 when a frame is completely decoded
+*            or an error code, which can be tested using ZBUFFv05_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFFv05_recommendedDInSize() / ZBUFFv05_recommendedDOutSize()
+*  output : ZBUFFv05_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
+*  input  : ZBUFFv05_recommendedDInSize==128Kb+3; just follow indications from ZBUFFv05_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* *******************************************************************************/
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+unsigned ZBUFFv05_isError(size_t errorCode);
+const char* ZBUFFv05_getErrorName(size_t errorCode);
+
+/** Functions below provide recommended buffer sizes for Compression or Decompression operations.
+*   These sizes are just hints, and tend to offer better latency */
+size_t ZBUFFv05_recommendedDInSize(void);
+size_t ZBUFFv05_recommendedDOutSize(void);
+
+
+
+/*-*************************************
+*  Constants
+***************************************/
+#define ZSTDv05_MAGICNUMBER 0xFD2FB525   /* v0.5 */
+
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTDv0505_H */
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v06.c b/contrib/libzstd/include/zstd/legacy/zstd_v06.c
index d9e89f806fd..26f0929da6f 100644
--- a/contrib/libzstd/include/zstd/legacy/zstd_v06.c
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v06.c
@@ -537,7 +537,7 @@ static void ZSTDv06_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
 /*! ZSTDv06_wildcopy() :
 *   custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
 #define WILDCOPY_OVERLENGTH 8
-MEM_STATIC void ZSTDv06_wildcopy(void* dst, const void* src, size_t length)
+MEM_STATIC void ZSTDv06_wildcopy(void* dst, const void* src, ptrdiff_t length)
 {
     const BYTE* ip = (const BYTE*)src;
     BYTE* op = (BYTE*)dst;
@@ -910,13 +910,13 @@ MEM_STATIC size_t BITv06_initDStream(BITv06_DStream_t* bitD, const void* srcBuff
         bitD->bitContainer = *(const BYTE*)(bitD->start);
         switch(srcSize)
         {
-            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
-            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
-            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
-            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
-            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
-            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
-            default:;
+            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);/* fall-through */
+            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);/* fall-through */
+            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);/* fall-through */
+            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; /* fall-through */
+            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; /* fall-through */
+            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8; /* fall-through */
+            default: break;
         }
         { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
           if (lastByte == 0) return ERROR(GENERIC);   /* endMark not present */
@@ -982,8 +982,8 @@ MEM_STATIC size_t BITv06_readBitsFast(BITv06_DStream_t* bitD, U32 nbBits)
               if status == unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */
 MEM_STATIC BITv06_DStream_status BITv06_reloadDStream(BITv06_DStream_t* bitD)
 {
-	if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
-		return BITv06_DStream_overflow;
+    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
+        return BITv06_DStream_overflow;
 
     if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) {
         bitD->ptr -= bitD->bitsConsumed >> 3;
@@ -1932,9 +1932,11 @@ MEM_STATIC size_t HUFv06_readStats(BYTE* huffWeight, size_t hwSize, U32* rankSta
 {
     U32 weightTotal;
     const BYTE* ip = (const BYTE*) src;
-    size_t iSize = ip[0];
+    size_t iSize;
     size_t oSize;
 
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
     //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
 
     if (iSize >= 128)  { /* special header */
@@ -1969,6 +1971,7 @@ MEM_STATIC size_t HUFv06_readStats(BYTE* huffWeight, size_t hwSize, U32* rankSta
             rankStats[huffWeight[n]]++;
             weightTotal += (1 << huffWeight[n]) >> 1;
     }   }
+    if (weightTotal == 0) return ERROR(corruption_detected);
 
     /* get last non-null symbol weight (implied, total must be 2^n) */
     {   U32 const tableLog = BITv06_highbit32(weightTotal) + 1;
@@ -2890,7 +2893,6 @@ struct ZSTDv06_DCtx_s
     ZSTDv06_dStage stage;
     U32 flagRepeatTable;
     const BYTE* litPtr;
-    size_t litBufSize;
     size_t litSize;
     BYTE litBuffer[ZSTDv06_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
     BYTE headerBuffer[ZSTDv06_FRAMEHEADERSIZE_MAX];
@@ -3167,8 +3169,8 @@ size_t ZSTDv06_decodeLiteralsBlock(ZSTDv06_DCtx* dctx,
                 return ERROR(corruption_detected);
 
             dctx->litPtr = dctx->litBuffer;
-            dctx->litBufSize = ZSTDv06_BLOCKSIZE_MAX+8;
             dctx->litSize = litSize;
+            memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
             return litCSize + lhSize;
         }
     case IS_PCH:
@@ -3183,13 +3185,14 @@ size_t ZSTDv06_decodeLiteralsBlock(ZSTDv06_DCtx* dctx,
             lhSize=3;
             litSize  = ((istart[0] & 15) << 6) + (istart[1] >> 2);
             litCSize = ((istart[1] &  3) << 8) + istart[2];
+            if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
 
             {   size_t const errorCode = HUFv06_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTableX4);
                 if (HUFv06_isError(errorCode)) return ERROR(corruption_detected);
             }
             dctx->litPtr = dctx->litBuffer;
-            dctx->litBufSize = ZSTDv06_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH;
             dctx->litSize = litSize;
+            memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
             return litCSize + lhSize;
         }
     case IS_RAW:
@@ -3213,13 +3216,12 @@ size_t ZSTDv06_decodeLiteralsBlock(ZSTDv06_DCtx* dctx,
                 if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
                 memcpy(dctx->litBuffer, istart+lhSize, litSize);
                 dctx->litPtr = dctx->litBuffer;
-                dctx->litBufSize = ZSTDv06_BLOCKSIZE_MAX+8;
                 dctx->litSize = litSize;
+                memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
                 return lhSize+litSize;
             }
             /* direct reference into compressed stream */
             dctx->litPtr = istart+lhSize;
-            dctx->litBufSize = srcSize-lhSize;
             dctx->litSize = litSize;
             return lhSize+litSize;
         }
@@ -3241,9 +3243,8 @@ size_t ZSTDv06_decodeLiteralsBlock(ZSTDv06_DCtx* dctx,
                 break;
             }
             if (litSize > ZSTDv06_BLOCKSIZE_MAX) return ERROR(corruption_detected);
-            memset(dctx->litBuffer, istart[lhSize], litSize);
+            memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
             dctx->litPtr = dctx->litBuffer;
-            dctx->litBufSize = ZSTDv06_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH;
             dctx->litSize = litSize;
             return lhSize+1;
         }
@@ -3302,10 +3303,13 @@ size_t ZSTDv06_decodeSeqHeaders(int* nbSeqPtr,
     {   int nbSeq = *ip++;
         if (!nbSeq) { *nbSeqPtr=0; return 1; }
         if (nbSeq > 0x7F) {
-            if (nbSeq == 0xFF)
+            if (nbSeq == 0xFF) {
+                if (ip+2 > iend) return ERROR(srcSize_wrong);
                 nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
-            else
+            } else {
+                if (ip >= iend) return ERROR(srcSize_wrong);
                 nbSeq = ((nbSeq-0x80)<<8) + *ip++;
+            }
         }
         *nbSeqPtr = nbSeq;
     }
@@ -3431,7 +3435,7 @@ static void ZSTDv06_decodeSequence(seq_t* seq, seqState_t* seqState)
 
 size_t ZSTDv06_execSequence(BYTE* op,
                                 BYTE* const oend, seq_t sequence,
-                                const BYTE** litPtr, const BYTE* const litLimit_8,
+                                const BYTE** litPtr, const BYTE* const litLimit,
                                 const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
 {
     BYTE* const oLitEnd = op + sequence.litLength;
@@ -3444,7 +3448,7 @@ size_t ZSTDv06_execSequence(BYTE* op,
     /* check */
     if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */
     if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
-    if (iLitEnd > litLimit_8) return ERROR(corruption_detected);   /* over-read beyond lit buffer */
+    if (iLitEnd > litLimit) return ERROR(corruption_detected);   /* over-read beyond lit buffer */
 
     /* copy Literals */
     ZSTDv06_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
@@ -3466,7 +3470,12 @@ size_t ZSTDv06_execSequence(BYTE* op,
             op = oLitEnd + length1;
             sequence.matchLength -= length1;
             match = base;
+            if (op > oend_8 || sequence.matchLength < MINMATCH) {
+              while (op < oMatchEnd) *op++ = *match++;
+              return sequenceLength;
+            }
     }   }
+    /* Requirement: op <= oend_8 */
 
     /* match within prefix */
     if (sequence.offset < 8) {
@@ -3494,7 +3503,7 @@ size_t ZSTDv06_execSequence(BYTE* op,
         }
         while (op < oMatchEnd) *op++ = *match++;
     } else {
-        ZSTDv06_wildcopy(op, match, sequence.matchLength-8);   /* works even if matchLength < 8 */
+        ZSTDv06_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
     }
     return sequenceLength;
 }
@@ -3511,7 +3520,6 @@ static size_t ZSTDv06_decompressSequences(
     BYTE* const oend = ostart + maxDstSize;
     BYTE* op = ostart;
     const BYTE* litPtr = dctx->litPtr;
-    const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8;
     const BYTE* const litEnd = litPtr + dctx->litSize;
     FSEv06_DTable* DTableLL = dctx->LLTable;
     FSEv06_DTable* DTableML = dctx->MLTable;
@@ -3555,7 +3563,7 @@ static size_t ZSTDv06_decompressSequences(
                        pos, (U32)sequence.litLength, (U32)sequence.matchLength, (U32)sequence.offset);
 #endif
 
-            {   size_t const oneSeqSize = ZSTDv06_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd);
+            {   size_t const oneSeqSize = ZSTDv06_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd);
                 if (ZSTDv06_isError(oneSeqSize)) return oneSeqSize;
                 op += oneSeqSize;
         }   }
@@ -3721,6 +3729,37 @@ size_t ZSTDv06_decompress(void* dst, size_t dstCapacity, const void* src, size_t
 #endif
 }
 
+size_t ZSTDv06_findFrameCompressedSize(const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    size_t remainingSize = srcSize;
+    blockProperties_t blockProperties = { bt_compressed, 0 };
+
+    /* Frame Header */
+    {   size_t const frameHeaderSize = ZSTDv06_frameHeaderSize(src, ZSTDv06_frameHeaderSize_min);
+        if (ZSTDv06_isError(frameHeaderSize)) return frameHeaderSize;
+        if (MEM_readLE32(src) != ZSTDv06_MAGICNUMBER) return ERROR(prefix_unknown);
+        if (srcSize < frameHeaderSize+ZSTDv06_blockHeaderSize) return ERROR(srcSize_wrong);
+        ip += frameHeaderSize; remainingSize -= frameHeaderSize;
+    }
+
+    /* Loop on each block */
+    while (1) {
+        size_t const cBlockSize = ZSTDv06_getcBlockSize(ip, remainingSize, &blockProperties);
+        if (ZSTDv06_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTDv06_blockHeaderSize;
+        remainingSize -= ZSTDv06_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return ip - (const BYTE*)src;
+}
 
 /*_******************************
 *  Streaming Decompression API
@@ -3750,7 +3789,7 @@ size_t ZSTDv06_decompressContinue(ZSTDv06_DCtx* dctx, void* dst, size_t dstCapac
             return 0;
         }
         dctx->expected = 0;   /* not necessary to copy more */
-
+	/* fall-through */
     case ZSTDds_decodeFrameHeader:
         {   size_t result;
             memcpy(dctx->headerBuffer + ZSTDv06_frameHeaderSize_min, src, dctx->expected);
@@ -3822,9 +3861,10 @@ static size_t ZSTDv06_loadEntropy(ZSTDv06_DCtx* dctx, const void* dict, size_t d
     dictSize -= hSize;
 
     {   short offcodeNCount[MaxOff+1];
-        U32 offcodeMaxValue=MaxOff, offcodeLog=OffFSELog;
+        U32 offcodeMaxValue=MaxOff, offcodeLog;
         offcodeHeaderSize = FSEv06_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize);
         if (FSEv06_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
+        if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
         { size_t const errorCode = FSEv06_buildDTable(dctx->OffTable, offcodeNCount, offcodeMaxValue, offcodeLog);
           if (FSEv06_isError(errorCode)) return ERROR(dictionary_corrupted); }
         dict = (const char*)dict + offcodeHeaderSize;
@@ -3832,9 +3872,10 @@ static size_t ZSTDv06_loadEntropy(ZSTDv06_DCtx* dctx, const void* dict, size_t d
     }
 
     {   short matchlengthNCount[MaxML+1];
-        unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog;
+        unsigned matchlengthMaxValue = MaxML, matchlengthLog;
         matchlengthHeaderSize = FSEv06_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize);
         if (FSEv06_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
+        if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
         { size_t const errorCode = FSEv06_buildDTable(dctx->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog);
           if (FSEv06_isError(errorCode)) return ERROR(dictionary_corrupted); }
         dict = (const char*)dict + matchlengthHeaderSize;
@@ -3842,9 +3883,10 @@ static size_t ZSTDv06_loadEntropy(ZSTDv06_DCtx* dctx, const void* dict, size_t d
     }
 
     {   short litlengthNCount[MaxLL+1];
-        unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog;
+        unsigned litlengthMaxValue = MaxLL, litlengthLog;
         litlengthHeaderSize = FSEv06_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize);
         if (FSEv06_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
+        if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
         { size_t const errorCode = FSEv06_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog);
           if (FSEv06_isError(errorCode)) return ERROR(dictionary_corrupted); }
     }
@@ -4066,7 +4108,7 @@ size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* zbd,
                     zbd->inBuff = (char*)malloc(blockSize);
                     if (zbd->inBuff == NULL) return ERROR(memory_allocation);
                 }
-                {   size_t const neededOutSize = ((size_t)1 << zbd->fParams.windowLog) + blockSize;
+                {   size_t const neededOutSize = ((size_t)1 << zbd->fParams.windowLog) + blockSize + WILDCOPY_OVERLENGTH * 2;
                     if (zbd->outBuffSize < neededOutSize) {
                         free(zbd->outBuff);
                         zbd->outBuffSize = neededOutSize;
@@ -4074,7 +4116,7 @@ size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* zbd,
                         if (zbd->outBuff == NULL) return ERROR(memory_allocation);
             }   }   }
             zbd->stage = ZBUFFds_read;
-
+	    /* fall-through */
         case ZBUFFds_read:
             {   size_t const neededInSize = ZSTDv06_nextSrcSizeToDecompress(zbd->zd);
                 if (neededInSize==0) {  /* end of frame */
@@ -4096,7 +4138,7 @@ size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* zbd,
                 if (ip==iend) { notDone = 0; break; }   /* no more input */
                 zbd->stage = ZBUFFds_load;
             }
-
+	    /* fall-through */
         case ZBUFFds_load:
             {   size_t const neededInSize = ZSTDv06_nextSrcSizeToDecompress(zbd->zd);
                 size_t const toLoad = neededInSize - zbd->inPos;   /* should always be <= remaining space within inBuff */
@@ -4117,8 +4159,9 @@ size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* zbd,
                     zbd->outEnd = zbd->outStart +  decodedSize;
                     zbd->stage = ZBUFFds_flush;
                     // break; /* ZBUFFds_flush follows */
-            }   }
-
+                }
+	    }
+	    /* fall-through */
         case ZBUFFds_flush:
             {   size_t const toFlushSize = zbd->outEnd - zbd->outStart;
                 size_t const flushedSize = ZBUFFv06_limitCopy(op, oend-op, zbd->outBuff + zbd->outStart, toFlushSize);
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v06.h b/contrib/libzstd/include/zstd/legacy/zstd_v06.h
index 14040abdd94..10c9c772597 100644
--- a/contrib/libzstd/include/zstd/legacy/zstd_v06.h
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v06.h
@@ -41,6 +41,13 @@ extern "C" {
 ZSTDLIBv06_API size_t ZSTDv06_decompress( void* dst, size_t dstCapacity,
                                     const void* src, size_t compressedSize);
 
+/**
+ZSTDv06_getFrameSrcSize() : get the source length of a ZSTD frame
+    compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+    return : the number of bytes that would be read to decompress this frame
+             or an errorCode if it fails (which can be tested using ZSTDv06_isError())
+*/
+size_t ZSTDv06_findFrameCompressedSize(const void* src, size_t compressedSize);
 
 /* *************************************
 *  Helper functions
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v07.c b/contrib/libzstd/include/zstd/legacy/zstd_v07.c
new file mode 100644
index 00000000000..6669b71cea4
--- /dev/null
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v07.c
@@ -0,0 +1,4577 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+
+/*- Dependencies -*/
+#include <stddef.h>     /* size_t, ptrdiff_t */
+#include <string.h>     /* memcpy */
+#include <stdlib.h>     /* malloc, free, qsort */
+
+#ifndef XXH_STATIC_LINKING_ONLY
+#  define XXH_STATIC_LINKING_ONLY    /* XXH64_state_t */
+#endif
+#include "xxhash.h"                  /* XXH64_* */
+#include "zstd_v07.h"
+
+#define FSEv07_STATIC_LINKING_ONLY   /* FSEv07_MIN_TABLELOG */
+#define HUFv07_STATIC_LINKING_ONLY   /* HUFv07_TABLELOG_ABSOLUTEMAX */
+#define ZSTDv07_STATIC_LINKING_ONLY
+
+#include "error_private.h"
+
+
+#ifdef ZSTDv07_STATIC_LINKING_ONLY
+
+/* ====================================================================================
+ * The definitions in this section are considered experimental.
+ * They should never be used with a dynamic library, as they may change in the future.
+ * They are provided for advanced usages.
+ * Use them only in association with static linking.
+ * ==================================================================================== */
+
+/*--- Constants ---*/
+#define ZSTDv07_MAGIC_SKIPPABLE_START  0x184D2A50U
+
+#define ZSTDv07_WINDOWLOG_MAX_32  25
+#define ZSTDv07_WINDOWLOG_MAX_64  27
+#define ZSTDv07_WINDOWLOG_MAX    ((U32)(MEM_32bits() ? ZSTDv07_WINDOWLOG_MAX_32 : ZSTDv07_WINDOWLOG_MAX_64))
+#define ZSTDv07_WINDOWLOG_MIN     18
+#define ZSTDv07_CHAINLOG_MAX     (ZSTDv07_WINDOWLOG_MAX+1)
+#define ZSTDv07_CHAINLOG_MIN       4
+#define ZSTDv07_HASHLOG_MAX       ZSTDv07_WINDOWLOG_MAX
+#define ZSTDv07_HASHLOG_MIN       12
+#define ZSTDv07_HASHLOG3_MAX      17
+#define ZSTDv07_SEARCHLOG_MAX    (ZSTDv07_WINDOWLOG_MAX-1)
+#define ZSTDv07_SEARCHLOG_MIN      1
+#define ZSTDv07_SEARCHLENGTH_MAX   7
+#define ZSTDv07_SEARCHLENGTH_MIN   3
+#define ZSTDv07_TARGETLENGTH_MIN   4
+#define ZSTDv07_TARGETLENGTH_MAX 999
+
+#define ZSTDv07_FRAMEHEADERSIZE_MAX 18    /* for static allocation */
+static const size_t ZSTDv07_frameHeaderSize_min = 5;
+static const size_t ZSTDv07_frameHeaderSize_max = ZSTDv07_FRAMEHEADERSIZE_MAX;
+static const size_t ZSTDv07_skippableHeaderSize = 8;  /* magic number + skippable frame length */
+
+
+/* custom memory allocation functions */
+typedef void* (*ZSTDv07_allocFunction) (void* opaque, size_t size);
+typedef void  (*ZSTDv07_freeFunction) (void* opaque, void* address);
+typedef struct { ZSTDv07_allocFunction customAlloc; ZSTDv07_freeFunction customFree; void* opaque; } ZSTDv07_customMem;
+
+
+/*--- Advanced Decompression functions ---*/
+
+/*! ZSTDv07_estimateDCtxSize() :
+ *  Gives the potential amount of memory allocated to create a ZSTDv07_DCtx */
+ZSTDLIBv07_API size_t ZSTDv07_estimateDCtxSize(void);
+
+/*! ZSTDv07_createDCtx_advanced() :
+ *  Create a ZSTD decompression context using external alloc and free functions */
+ZSTDLIBv07_API ZSTDv07_DCtx* ZSTDv07_createDCtx_advanced(ZSTDv07_customMem customMem);
+
+/*! ZSTDv07_sizeofDCtx() :
+ *  Gives the amount of memory used by a given ZSTDv07_DCtx */
+ZSTDLIBv07_API size_t ZSTDv07_sizeofDCtx(const ZSTDv07_DCtx* dctx);
+
+
+/* ******************************************************************
+*  Buffer-less streaming functions (synchronous mode)
+********************************************************************/
+
+ZSTDLIBv07_API size_t ZSTDv07_decompressBegin(ZSTDv07_DCtx* dctx);
+ZSTDLIBv07_API size_t ZSTDv07_decompressBegin_usingDict(ZSTDv07_DCtx* dctx, const void* dict, size_t dictSize);
+ZSTDLIBv07_API void   ZSTDv07_copyDCtx(ZSTDv07_DCtx* dctx, const ZSTDv07_DCtx* preparedDCtx);
+
+ZSTDLIBv07_API size_t ZSTDv07_nextSrcSizeToDecompress(ZSTDv07_DCtx* dctx);
+ZSTDLIBv07_API size_t ZSTDv07_decompressContinue(ZSTDv07_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+/*
+  Buffer-less streaming decompression (synchronous mode)
+
+  A ZSTDv07_DCtx object is required to track streaming operations.
+  Use ZSTDv07_createDCtx() / ZSTDv07_freeDCtx() to manage it.
+  A ZSTDv07_DCtx object can be re-used multiple times.
+
+  First optional operation is to retrieve frame parameters, using ZSTDv07_getFrameParams(), which doesn't consume the input.
+  It can provide the minimum size of rolling buffer required to properly decompress data (`windowSize`),
+  and optionally the final size of uncompressed content.
+  (Note : content size is an optional info that may not be present. 0 means : content size unknown)
+  Frame parameters are extracted from the beginning of compressed frame.
+  The amount of data to read is variable, from ZSTDv07_frameHeaderSize_min to ZSTDv07_frameHeaderSize_max (so if `srcSize` >= ZSTDv07_frameHeaderSize_max, it will always work)
+  If `srcSize` is too small for operation to succeed, function will return the minimum size it requires to produce a result.
+  Result : 0 when successful, it means the ZSTDv07_frameParams structure has been filled.
+          >0 : means there is not enough data into `src`. Provides the expected size to successfully decode header.
+           errorCode, which can be tested using ZSTDv07_isError()
+
+  Start decompression, with ZSTDv07_decompressBegin() or ZSTDv07_decompressBegin_usingDict().
+  Alternatively, you can copy a prepared context, using ZSTDv07_copyDCtx().
+
+  Then use ZSTDv07_nextSrcSizeToDecompress() and ZSTDv07_decompressContinue() alternatively.
+  ZSTDv07_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTDv07_decompressContinue().
+  ZSTDv07_decompressContinue() requires this exact amount of bytes, or it will fail.
+
+  @result of ZSTDv07_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
+  It can be zero, which is not an error; it just means ZSTDv07_decompressContinue() has decoded some header.
+
+  ZSTDv07_decompressContinue() needs previous data blocks during decompression, up to `windowSize`.
+  They should preferably be located contiguously, prior to current block.
+  Alternatively, a round buffer of sufficient size is also possible. Sufficient size is determined by frame parameters.
+  ZSTDv07_decompressContinue() is very sensitive to contiguity,
+  if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place,
+    or that previous contiguous segment is large enough to properly handle maximum back-reference.
+
+  A frame is fully decoded when ZSTDv07_nextSrcSizeToDecompress() returns zero.
+  Context can then be reset to start a new decompression.
+
+
+  == Special case : skippable frames ==
+
+  Skippable frames allow the integration of user-defined data into a flow of concatenated frames.
+  Skippable frames will be ignored (skipped) by a decompressor. The format of skippable frame is following:
+  a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F
+  b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
+  c) Frame Content - any content (User Data) of length equal to Frame Size
+  For skippable frames ZSTDv07_decompressContinue() always returns 0.
+  For skippable frames ZSTDv07_getFrameParams() returns fparamsPtr->windowLog==0 what means that a frame is skippable.
+  It also returns Frame Size as fparamsPtr->frameContentSize.
+*/
+
+
+/* **************************************
+*  Block functions
+****************************************/
+/*! Block functions produce and decode raw zstd blocks, without frame metadata.
+    Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
+    User will have to take in charge required information to regenerate data, such as compressed and content sizes.
+
+    A few rules to respect :
+    - Compressing and decompressing require a context structure
+      + Use ZSTDv07_createCCtx() and ZSTDv07_createDCtx()
+    - It is necessary to init context before starting
+      + compression : ZSTDv07_compressBegin()
+      + decompression : ZSTDv07_decompressBegin()
+      + variants _usingDict() are also allowed
+      + copyCCtx() and copyDCtx() work too
+    - Block size is limited, it must be <= ZSTDv07_getBlockSizeMax()
+      + If you need to compress more, cut data into multiple blocks
+      + Consider using the regular ZSTDv07_compress() instead, as frame metadata costs become negligible when source size is large.
+    - When a block is considered not compressible enough, ZSTDv07_compressBlock() result will be zero.
+      In which case, nothing is produced into `dst`.
+      + User must test for such outcome and deal directly with uncompressed data
+      + ZSTDv07_decompressBlock() doesn't accept uncompressed data as input !!!
+      + In case of multiple successive blocks, decoder must be informed of uncompressed block existence to follow proper history.
+        Use ZSTDv07_insertBlock() in such a case.
+*/
+
+#define ZSTDv07_BLOCKSIZE_ABSOLUTEMAX (128 * 1024)   /* define, for static allocation */
+ZSTDLIBv07_API size_t ZSTDv07_decompressBlock(ZSTDv07_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIBv07_API size_t ZSTDv07_insertBlock(ZSTDv07_DCtx* dctx, const void* blockStart, size_t blockSize);  /**< insert block into `dctx` history. Useful for uncompressed blocks */
+
+
+#endif   /* ZSTDv07_STATIC_LINKING_ONLY */
+
+
+/* ******************************************************************
+   mem.h
+   low-level memory access routines
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-****************************************
+*  Compiler specifics
+******************************************/
+#if defined(_MSC_VER)   /* Visual Studio */
+#   include <stdlib.h>  /* _byteswap_ulong */
+#   include <intrin.h>  /* _byteswap_* */
+#endif
+#if defined(__GNUC__)
+#  define MEM_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define MEM_STATIC static inline
+#elif defined(_MSC_VER)
+#  define MEM_STATIC static __inline
+#else
+#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/*-**************************************************************
+*  Basic Types
+*****************************************************************/
+#if  !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef  int16_t S16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+  typedef  int64_t S64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef   signed short      S16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+  typedef   signed long long  S64;
+#endif
+
+
+/*-**************************************************************
+*  Memory I/O
+*****************************************************************/
+/* MEM_FORCE_MEMORY_ACCESS :
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets depending on alignment.
+ *            In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define MEM_FORCE_MEMORY_ACCESS 2
+#  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define MEM_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; }
+MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; }
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
+
+/* violates C standard, by lying on structure alignment.
+Only use if no other choice to achieve best performance on target platform */
+MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
+MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
+MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+
+#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign;
+
+MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+
+#else
+
+/* default method, safe and standard.
+   can sometimes prove slower */
+
+MEM_STATIC U16 MEM_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U32 MEM_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U64 MEM_read64(const void* memPtr)
+{
+    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif /* MEM_FORCE_MEMORY_ACCESS */
+
+MEM_STATIC U32 MEM_swap32(U32 in)
+{
+#if defined(_MSC_VER)     /* Visual Studio */
+    return _byteswap_ulong(in);
+#elif defined (__GNUC__)
+    return __builtin_bswap32(in);
+#else
+    return  ((in << 24) & 0xff000000 ) |
+            ((in <<  8) & 0x00ff0000 ) |
+            ((in >>  8) & 0x0000ff00 ) |
+            ((in >> 24) & 0x000000ff );
+#endif
+}
+
+MEM_STATIC U64 MEM_swap64(U64 in)
+{
+#if defined(_MSC_VER)     /* Visual Studio */
+    return _byteswap_uint64(in);
+#elif defined (__GNUC__)
+    return __builtin_bswap64(in);
+#else
+    return  ((in << 56) & 0xff00000000000000ULL) |
+            ((in << 40) & 0x00ff000000000000ULL) |
+            ((in << 24) & 0x0000ff0000000000ULL) |
+            ((in << 8)  & 0x000000ff00000000ULL) |
+            ((in >> 8)  & 0x00000000ff000000ULL) |
+            ((in >> 24) & 0x0000000000ff0000ULL) |
+            ((in >> 40) & 0x000000000000ff00ULL) |
+            ((in >> 56) & 0x00000000000000ffULL);
+#endif
+}
+
+
+/*=== Little endian r/w ===*/
+
+MEM_STATIC U16 MEM_readLE16(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read16(memPtr);
+    else {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
+{
+    if (MEM_isLittleEndian()) {
+        MEM_write16(memPtr, val);
+    } else {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val;
+        p[1] = (BYTE)(val>>8);
+    }
+}
+
+MEM_STATIC U32 MEM_readLE32(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read32(memPtr);
+    else
+        return MEM_swap32(MEM_read32(memPtr));
+}
+
+
+MEM_STATIC U64 MEM_readLE64(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read64(memPtr);
+    else
+        return MEM_swap64(MEM_read64(memPtr));
+}
+
+MEM_STATIC size_t MEM_readLEST(const void* memPtr)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_readLE32(memPtr);
+    else
+        return (size_t)MEM_readLE64(memPtr);
+}
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* MEM_H_MODULE */
+/* ******************************************************************
+   bitstream
+   Part of FSE library
+   header file (to include)
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+#ifndef BITSTREAM_H_MODULE
+#define BITSTREAM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*
+*  This API consists of small unitary functions, which must be inlined for best performance.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+
+/*=========================================
+*  Target specific
+=========================================*/
+#if defined(__BMI__) && defined(__GNUC__)
+#  include <immintrin.h>   /* support for bextr (experimental) */
+#endif
+
+/*-********************************************
+*  bitStream decoding API (read backward)
+**********************************************/
+typedef struct
+{
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} BITv07_DStream_t;
+
+typedef enum { BITv07_DStream_unfinished = 0,
+               BITv07_DStream_endOfBuffer = 1,
+               BITv07_DStream_completed = 2,
+               BITv07_DStream_overflow = 3 } BITv07_DStream_status;  /* result of BITv07_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t   BITv07_initDStream(BITv07_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t   BITv07_readBits(BITv07_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BITv07_DStream_status BITv07_reloadDStream(BITv07_DStream_t* bitD);
+MEM_STATIC unsigned BITv07_endOfDStream(const BITv07_DStream_t* bitD);
+
+
+/* Start by invoking BITv07_initDStream().
+*  A chunk of the bitStream is then stored into a local register.
+*  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+*  You can then retrieve bitFields stored into the local register, **in reverse order**.
+*  Local register is explicitly reloaded from memory by the BITv07_reloadDStream() method.
+*  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BITv07_DStream_unfinished.
+*  Otherwise, it can be less than that, so proceed accordingly.
+*  Checking if DStream has reached its end can be performed with BITv07_endOfDStream().
+*/
+
+
+/*-****************************************
+*  unsafe API
+******************************************/
+MEM_STATIC size_t BITv07_readBitsFast(BITv07_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/*-**************************************************************
+*  Internal functions
+****************************************************************/
+MEM_STATIC unsigned BITv07_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+#   endif
+}
+
+
+
+/*-********************************************************
+* bitStream decoding
+**********************************************************/
+/*! BITv07_initDStream() :
+*   Initialize a BITv07_DStream_t.
+*   `bitD` : a pointer to an already allocated BITv07_DStream_t structure.
+*   `srcSize` must be the *exact* size of the bitStream, in bytes.
+*   @return : size of stream (== srcSize) or an errorCode if a problem is detected
+*/
+MEM_STATIC size_t BITv07_initDStream(BITv07_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
+    if (srcSize >=  sizeof(bitD->bitContainer)) {  /* normal case */
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+          bitD->bitsConsumed = lastByte ? 8 - BITv07_highbit32(lastByte) : 0;
+          if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
+    } else {
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);/* fall-through */
+            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);/* fall-through */
+            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);/* fall-through */
+            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; /* fall-through */
+            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; /* fall-through */
+            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8; /* fall-through */
+            default: break;
+        }
+        { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+          bitD->bitsConsumed = lastByte ? 8 - BITv07_highbit32(lastByte) : 0;
+          if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
+        bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+
+/*! BITv07_lookBits() :
+ *  Provides next n bits from local register.
+ *  local register is not modified.
+ *  On 32-bits, maxNbBits==24.
+ *  On 64-bits, maxNbBits==56.
+ *  @return : value extracted
+ */
+ MEM_STATIC size_t BITv07_lookBits(const BITv07_DStream_t* bitD, U32 nbBits)
+{
+    U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+}
+
+/*! BITv07_lookBitsFast() :
+*   unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BITv07_lookBitsFast(const BITv07_DStream_t* bitD, U32 nbBits)
+{
+    U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+}
+
+MEM_STATIC void BITv07_skipBits(BITv07_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+/*! BITv07_readBits() :
+ *  Read (consume) next n bits from local register and update.
+ *  Pay attention to not read more than nbBits contained into local register.
+ *  @return : extracted value.
+ */
+MEM_STATIC size_t BITv07_readBits(BITv07_DStream_t* bitD, U32 nbBits)
+{
+    size_t const value = BITv07_lookBits(bitD, nbBits);
+    BITv07_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*! BITv07_readBitsFast() :
+*   unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BITv07_readBitsFast(BITv07_DStream_t* bitD, U32 nbBits)
+{
+    size_t const value = BITv07_lookBitsFast(bitD, nbBits);
+    BITv07_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*! BITv07_reloadDStream() :
+*   Refill `BITv07_DStream_t` from src buffer previously defined (see BITv07_initDStream() ).
+*   This function is safe, it guarantees it will not read beyond src buffer.
+*   @return : status of `BITv07_DStream_t` internal register.
+              if status == unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */
+MEM_STATIC BITv07_DStream_status BITv07_reloadDStream(BITv07_DStream_t* bitD)
+{
+    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should not happen => corruption detected */
+        return BITv07_DStream_overflow;
+
+    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        return BITv07_DStream_unfinished;
+    }
+    if (bitD->ptr == bitD->start) {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BITv07_DStream_endOfBuffer;
+        return BITv07_DStream_completed;
+    }
+    {   U32 nbBytes = bitD->bitsConsumed >> 3;
+        BITv07_DStream_status result = BITv07_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start) {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = BITv07_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        return result;
+    }
+}
+
+/*! BITv07_endOfDStream() :
+*   @return Tells if DStream has exactly reached its end (all bits consumed).
+*/
+MEM_STATIC unsigned BITv07_endOfDStream(const BITv07_DStream_t* DStream)
+{
+    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* BITSTREAM_H_MODULE */
+/* ******************************************************************
+   FSE : Finite State Entropy codec
+   Public Prototypes declaration
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+#ifndef FSEv07_H
+#define FSEv07_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/*-****************************************
+*  FSE simple functions
+******************************************/
+
+/*! FSEv07_decompress():
+    Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'dstCapacity'.
+    @return : size of regenerated data (<= maxDstSize),
+              or an error code, which can be tested using FSEv07_isError() .
+
+    ** Important ** : FSEv07_decompress() does not decompress non-compressible nor RLE data !!!
+    Why ? : making this distinction requires a header.
+    Header management is intentionally delegated to the user layer, which can better manage special cases.
+*/
+size_t FSEv07_decompress(void* dst,  size_t dstCapacity,
+                const void* cSrc, size_t cSrcSize);
+
+
+/* Error Management */
+unsigned    FSEv07_isError(size_t code);        /* tells if a return value is an error code */
+const char* FSEv07_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+
+
+/*-*****************************************
+*  FSE detailed API
+******************************************/
+/*!
+FSEv07_decompress() does the following:
+1. read normalized counters with readNCount()
+2. build decoding table 'DTable' from normalized counters
+3. decode the data stream using decoding table 'DTable'
+
+The following API allows targeting specific sub-functions for advanced tasks.
+For example, it's possible to compress several blocks using the same 'CTable',
+or to save and provide normalized distribution using external method.
+*/
+
+
+/* *** DECOMPRESSION *** */
+
+/*! FSEv07_readNCount():
+    Read compactly saved 'normalizedCounter' from 'rBuffer'.
+    @return : size read from 'rBuffer',
+              or an errorCode, which can be tested using FSEv07_isError().
+              maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
+size_t FSEv07_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
+
+/*! Constructor and Destructor of FSEv07_DTable.
+    Note that its size depends on 'tableLog' */
+typedef unsigned FSEv07_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+FSEv07_DTable* FSEv07_createDTable(unsigned tableLog);
+void        FSEv07_freeDTable(FSEv07_DTable* dt);
+
+/*! FSEv07_buildDTable():
+    Builds 'dt', which must be already allocated, using FSEv07_createDTable().
+    return : 0, or an errorCode, which can be tested using FSEv07_isError() */
+size_t FSEv07_buildDTable (FSEv07_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSEv07_decompress_usingDTable():
+    Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
+    into `dst` which must be already allocated.
+    @return : size of regenerated data (necessarily <= `dstCapacity`),
+              or an errorCode, which can be tested using FSEv07_isError() */
+size_t FSEv07_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSEv07_DTable* dt);
+
+/*!
+Tutorial :
+----------
+(Note : these functions only decompress FSE-compressed blocks.
+ If block is uncompressed, use memcpy() instead
+ If block is a single repeated byte, use memset() instead )
+
+The first step is to obtain the normalized frequencies of symbols.
+This can be performed by FSEv07_readNCount() if it was saved using FSEv07_writeNCount().
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
+In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
+or size the table to handle worst case situations (typically 256).
+FSEv07_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
+The result of FSEv07_readNCount() is the number of bytes read from 'rBuffer'.
+Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
+If there is an error, the function will return an error code, which can be tested using FSEv07_isError().
+
+The next step is to build the decompression tables 'FSEv07_DTable' from 'normalizedCounter'.
+This is performed by the function FSEv07_buildDTable().
+The space required by 'FSEv07_DTable' must be already allocated using FSEv07_createDTable().
+If there is an error, the function will return an error code, which can be tested using FSEv07_isError().
+
+`FSEv07_DTable` can then be used to decompress `cSrc`, with FSEv07_decompress_usingDTable().
+`cSrcSize` must be strictly correct, otherwise decompression will fail.
+FSEv07_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`).
+If there is an error, the function will return an error code, which can be tested using FSEv07_isError(). (ex: dst buffer too small)
+*/
+
+
+#ifdef FSEv07_STATIC_LINKING_ONLY
+
+
+/* *****************************************
+*  Static allocation
+*******************************************/
+/* FSE buffer bounds */
+#define FSEv07_NCOUNTBOUND 512
+#define FSEv07_BLOCKBOUND(size) (size + (size>>7))
+
+/* It is possible to statically allocate FSE CTable/DTable as a table of unsigned using below macros */
+#define FSEv07_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+
+
+/* *****************************************
+*  FSE advanced API
+*******************************************/
+size_t FSEv07_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
+/**< same as FSEv07_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr  */
+
+unsigned FSEv07_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
+/**< same as FSEv07_optimalTableLog(), which used `minus==2` */
+
+size_t FSEv07_buildDTable_raw (FSEv07_DTable* dt, unsigned nbBits);
+/**< build a fake FSEv07_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
+
+size_t FSEv07_buildDTable_rle (FSEv07_DTable* dt, unsigned char symbolValue);
+/**< build a fake FSEv07_DTable, designed to always generate the same symbolValue */
+
+
+
+/* *****************************************
+*  FSE symbol decompression API
+*******************************************/
+typedef struct
+{
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSEv07_DState_t;
+
+
+static void     FSEv07_initDState(FSEv07_DState_t* DStatePtr, BITv07_DStream_t* bitD, const FSEv07_DTable* dt);
+
+static unsigned char FSEv07_decodeSymbol(FSEv07_DState_t* DStatePtr, BITv07_DStream_t* bitD);
+
+
+/**<
+Let's now decompose FSEv07_decompress_usingDTable() into its unitary components.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BITv07_DStream_t DStream;    // Stream context
+FSEv07_DState_t  DState;     // State context. Multiple ones are possible
+FSEv07_DTable*   DTablePtr;  // Decoding table, provided by FSEv07_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BITv07_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSEv07_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSEv07_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSEv07_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BITv07_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSEv07_reloadDStream(&DStream);
+
+BITv07_reloadDStream() result tells if there is still some more data to read from DStream.
+BITv07_DStream_unfinished : there is still some data left into the DStream.
+BITv07_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BITv07_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BITv07_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BITv07_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BITv07_reloadDStream(&DStream) >= BITv07_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BITv07_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSEv07_endOfDState(&DState);
+*/
+
+
+/* *****************************************
+*  FSE unsafe API
+*******************************************/
+static unsigned char FSEv07_decodeSymbolFast(FSEv07_DState_t* DStatePtr, BITv07_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/* ======    Decompression    ====== */
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSEv07_DTableHeader;   /* sizeof U32 */
+
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSEv07_decode_t;   /* size == U32 */
+
+MEM_STATIC void FSEv07_initDState(FSEv07_DState_t* DStatePtr, BITv07_DStream_t* bitD, const FSEv07_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSEv07_DTableHeader* const DTableH = (const FSEv07_DTableHeader*)ptr;
+    DStatePtr->state = BITv07_readBits(bitD, DTableH->tableLog);
+    BITv07_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC BYTE FSEv07_peekSymbol(const FSEv07_DState_t* DStatePtr)
+{
+    FSEv07_decode_t const DInfo = ((const FSEv07_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    return DInfo.symbol;
+}
+
+MEM_STATIC void FSEv07_updateState(FSEv07_DState_t* DStatePtr, BITv07_DStream_t* bitD)
+{
+    FSEv07_decode_t const DInfo = ((const FSEv07_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    size_t const lowBits = BITv07_readBits(bitD, nbBits);
+    DStatePtr->state = DInfo.newState + lowBits;
+}
+
+MEM_STATIC BYTE FSEv07_decodeSymbol(FSEv07_DState_t* DStatePtr, BITv07_DStream_t* bitD)
+{
+    FSEv07_decode_t const DInfo = ((const FSEv07_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    BYTE const symbol = DInfo.symbol;
+    size_t const lowBits = BITv07_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+/*! FSEv07_decodeSymbolFast() :
+    unsafe, only works if no symbol has a probability > 50% */
+MEM_STATIC BYTE FSEv07_decodeSymbolFast(FSEv07_DState_t* DStatePtr, BITv07_DStream_t* bitD)
+{
+    FSEv07_decode_t const DInfo = ((const FSEv07_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    BYTE const symbol = DInfo.symbol;
+    size_t const lowBits = BITv07_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+
+
+#ifndef FSEv07_COMMONDEFS_ONLY
+
+/* **************************************************************
+*  Tuning parameters
+****************************************************************/
+/*!MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSEv07_MAX_MEMORY_USAGE 14
+#define FSEv07_DEFAULT_MEMORY_USAGE 13
+
+/*!FSEv07_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSEv07_MAX_SYMBOL_VALUE 255
+
+
+/* **************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSEv07_FUNCTION_TYPE BYTE
+#define FSEv07_FUNCTION_EXTENSION
+#define FSEv07_DECODE_TYPE FSEv07_decode_t
+
+
+#endif   /* !FSEv07_COMMONDEFS_ONLY */
+
+
+/* ***************************************************************
+*  Constants
+*****************************************************************/
+#define FSEv07_MAX_TABLELOG  (FSEv07_MAX_MEMORY_USAGE-2)
+#define FSEv07_MAX_TABLESIZE (1U<<FSEv07_MAX_TABLELOG)
+#define FSEv07_MAXTABLESIZE_MASK (FSEv07_MAX_TABLESIZE-1)
+#define FSEv07_DEFAULT_TABLELOG (FSEv07_DEFAULT_MEMORY_USAGE-2)
+#define FSEv07_MIN_TABLELOG 5
+
+#define FSEv07_TABLELOG_ABSOLUTE_MAX 15
+#if FSEv07_MAX_TABLELOG > FSEv07_TABLELOG_ABSOLUTE_MAX
+#  error "FSEv07_MAX_TABLELOG > FSEv07_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+#define FSEv07_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
+
+
+#endif /* FSEv07_STATIC_LINKING_ONLY */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* FSEv07_H */
+/* ******************************************************************
+   Huffman coder, part of New Generation Entropy library
+   header file
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+#ifndef HUFv07_H_298734234
+#define HUFv07_H_298734234
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/* *** simple functions *** */
+/**
+HUFv07_decompress() :
+    Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated buffer 'dst', of minimum size 'dstSize'.
+    `dstSize` : **must** be the ***exact*** size of original (uncompressed) data.
+    Note : in contrast with FSE, HUFv07_decompress can regenerate
+           RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
+           because it knows size to regenerate.
+    @return : size of regenerated data (== dstSize),
+              or an error code, which can be tested using HUFv07_isError()
+*/
+size_t HUFv07_decompress(void* dst,  size_t dstSize,
+                const void* cSrc, size_t cSrcSize);
+
+
+/* ****************************************
+*  Tool functions
+******************************************/
+#define HUFv07_BLOCKSIZE_MAX (128 * 1024)
+
+/* Error Management */
+unsigned    HUFv07_isError(size_t code);        /**< tells if a return value is an error code */
+const char* HUFv07_getErrorName(size_t code);   /**< provides error code string (useful for debugging) */
+
+
+/* *** Advanced function *** */
+
+
+#ifdef HUFv07_STATIC_LINKING_ONLY
+
+
+/* *** Constants *** */
+#define HUFv07_TABLELOG_ABSOLUTEMAX  16   /* absolute limit of HUFv07_MAX_TABLELOG. Beyond that value, code does not work */
+#define HUFv07_TABLELOG_MAX  12           /* max configured tableLog (for static allocation); can be modified up to HUFv07_ABSOLUTEMAX_TABLELOG */
+#define HUFv07_TABLELOG_DEFAULT  11       /* tableLog by default, when not specified */
+#define HUFv07_SYMBOLVALUE_MAX 255
+#if (HUFv07_TABLELOG_MAX > HUFv07_TABLELOG_ABSOLUTEMAX)
+#  error "HUFv07_TABLELOG_MAX is too large !"
+#endif
+
+
+/* ****************************************
+*  Static allocation
+******************************************/
+/* HUF buffer bounds */
+#define HUFv07_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true if incompressible pre-filtered with fast heuristic */
+
+/* static allocation of HUF's DTable */
+typedef U32 HUFv07_DTable;
+#define HUFv07_DTABLE_SIZE(maxTableLog)   (1 + (1<<(maxTableLog)))
+#define HUFv07_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+        HUFv07_DTable DTable[HUFv07_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1)*0x1000001) }
+#define HUFv07_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
+        HUFv07_DTable DTable[HUFv07_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog)*0x1000001) }
+
+
+/* ****************************************
+*  Advanced decompression functions
+******************************************/
+size_t HUFv07_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
+size_t HUFv07_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
+
+size_t HUFv07_decompress4X_DCtx (HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< decodes RLE and uncompressed */
+size_t HUFv07_decompress4X_hufOnly(HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
+size_t HUFv07_decompress4X2_DCtx(HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
+size_t HUFv07_decompress4X4_DCtx(HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
+
+size_t HUFv07_decompress1X_DCtx (HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+size_t HUFv07_decompress1X2_DCtx(HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
+size_t HUFv07_decompress1X4_DCtx(HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
+
+
+/* ****************************************
+*  HUF detailed API
+******************************************/
+/*!
+The following API allows targeting specific sub-functions for advanced tasks.
+For example, it's possible to compress several blocks using the same 'CTable',
+or to save and regenerate 'CTable' using external methods.
+*/
+/* FSEv07_count() : find it within "fse.h" */
+
+/*! HUFv07_readStats() :
+    Read compact Huffman tree, saved by HUFv07_writeCTable().
+    `huffWeight` is destination buffer.
+    @return : size read from `src` , or an error Code .
+    Note : Needed by HUFv07_readCTable() and HUFv07_readDTableXn() . */
+size_t HUFv07_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize);
+
+
+/*
+HUFv07_decompress() does the following:
+1. select the decompression algorithm (X2, X4) based on pre-computed heuristics
+2. build Huffman table from save, using HUFv07_readDTableXn()
+3. decode 1 or 4 segments in parallel using HUFv07_decompressSXn_usingDTable
+*/
+
+/** HUFv07_selectDecoder() :
+*   Tells which decoder is likely to decode faster,
+*   based on a set of pre-determined metrics.
+*   @return : 0==HUFv07_decompress4X2, 1==HUFv07_decompress4X4 .
+*   Assumption : 0 < cSrcSize < dstSize <= 128 KB */
+U32 HUFv07_selectDecoder (size_t dstSize, size_t cSrcSize);
+
+size_t HUFv07_readDTableX2 (HUFv07_DTable* DTable, const void* src, size_t srcSize);
+size_t HUFv07_readDTableX4 (HUFv07_DTable* DTable, const void* src, size_t srcSize);
+
+size_t HUFv07_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUFv07_DTable* DTable);
+size_t HUFv07_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUFv07_DTable* DTable);
+size_t HUFv07_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUFv07_DTable* DTable);
+
+
+/* single stream variants */
+size_t HUFv07_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+size_t HUFv07_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbol decoder */
+
+size_t HUFv07_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUFv07_DTable* DTable);
+size_t HUFv07_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUFv07_DTable* DTable);
+size_t HUFv07_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUFv07_DTable* DTable);
+
+
+#endif /* HUFv07_STATIC_LINKING_ONLY */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* HUFv07_H_298734234 */
+/*
+   Common functions of New Generation Entropy library
+   Copyright (C) 2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+*************************************************************************** */
+
+
+
+/*-****************************************
+*  FSE Error Management
+******************************************/
+unsigned FSEv07_isError(size_t code) { return ERR_isError(code); }
+
+const char* FSEv07_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/* **************************************************************
+*  HUF Error Management
+****************************************************************/
+unsigned HUFv07_isError(size_t code) { return ERR_isError(code); }
+
+const char* HUFv07_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/*-**************************************************************
+*  FSE NCount encoding-decoding
+****************************************************************/
+static short FSEv07_abs(short a) { return (short)(a<0 ? -a : a); }
+
+size_t FSEv07_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    if (hbSize < 4) return ERROR(srcSize_wrong);
+    bitStream = MEM_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSEv07_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSEv07_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr)) {
+        if (previous0) {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF) {
+                n0+=24;
+                if (ip < iend-5) {
+                    ip+=2;
+                    bitStream = MEM_readLE32(ip) >> bitCount;
+                } else {
+                    bitStream >>= 16;
+                    bitCount+=16;
+            }   }
+            while ((bitStream & 3) == 3) {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                ip += bitCount>>3;
+                bitCount &= 7;
+                bitStream = MEM_readLE32(ip) >> bitCount;
+            }
+            else
+                bitStream >>= 2;
+        }
+        {   short const max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max) {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            } else {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSEv07_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold) {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                ip += bitCount>>3;
+                bitCount &= 7;
+            } else {
+                bitCount -= (int)(8 * (iend - 4 - ip));
+                ip = iend - 4;
+            }
+            bitStream = MEM_readLE32(ip) >> (bitCount & 31);
+    }   }   /* while ((remaining>1) && (charnum<=*maxSVPtr)) */
+    if (remaining != 1) return ERROR(GENERIC);
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong);
+    return ip-istart;
+}
+
+
+/*! HUFv07_readStats() :
+    Read compact Huffman tree, saved by HUFv07_writeCTable().
+    `huffWeight` is destination buffer.
+    @return : size read from `src` , or an error Code .
+    Note : Needed by HUFv07_readCTable() and HUFv07_readDTableXn() .
+*/
+size_t HUFv07_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize)
+{
+    U32 weightTotal;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize;
+    size_t oSize;
+
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
+    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128)  { /* special header */
+        if (iSize >= (242)) {  /* RLE */
+            static U32 l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
+            oSize = l[iSize-242];
+            memset(huffWeight, 1, hwSize);
+            iSize = 0;
+        }
+        else {   /* Incompressible */
+            oSize = iSize - 127;
+            iSize = ((oSize+1)/2);
+            if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+            if (oSize >= hwSize) return ERROR(corruption_detected);
+            ip += 1;
+            {   U32 n;
+                for (n=0; n<oSize; n+=2) {
+                    huffWeight[n]   = ip[n/2] >> 4;
+                    huffWeight[n+1] = ip[n/2] & 15;
+    }   }   }   }
+    else  {   /* header compressed with FSE (normal case) */
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        oSize = FSEv07_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
+        if (FSEv07_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    memset(rankStats, 0, (HUFv07_TABLELOG_ABSOLUTEMAX + 1) * sizeof(U32));
+    weightTotal = 0;
+    {   U32 n; for (n=0; n<oSize; n++) {
+            if (huffWeight[n] >= HUFv07_TABLELOG_ABSOLUTEMAX) return ERROR(corruption_detected);
+            rankStats[huffWeight[n]]++;
+            weightTotal += (1 << huffWeight[n]) >> 1;
+    }   }
+    if (weightTotal == 0) return ERROR(corruption_detected);
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    {   U32 const tableLog = BITv07_highbit32(weightTotal) + 1;
+        if (tableLog > HUFv07_TABLELOG_ABSOLUTEMAX) return ERROR(corruption_detected);
+        *tableLogPtr = tableLog;
+        /* determine last weight */
+        {   U32 const total = 1 << tableLog;
+            U32 const rest = total - weightTotal;
+            U32 const verif = 1 << BITv07_highbit32(rest);
+            U32 const lastWeight = BITv07_highbit32(rest) + 1;
+            if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+            huffWeight[oSize] = (BYTE)lastWeight;
+            rankStats[lastWeight]++;
+    }   }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32)(oSize+1);
+    return iSize+1;
+}
+/* ******************************************************************
+   FSE : Finite State Entropy decoder
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#    ifdef __GNUC__
+#      define FORCE_INLINE static inline __attribute__((always_inline))
+#    else
+#      define FORCE_INLINE static inline
+#    endif
+#  else
+#    define FORCE_INLINE static
+#  endif /* __STDC_VERSION__ */
+#endif
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define FSEv07_isError ERR_isError
+#define FSEv07_STATIC_ASSERT(c) { enum { FSEv07_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/* **************************************************************
+*  Complex types
+****************************************************************/
+typedef U32 DTable_max_t[FSEv07_DTABLE_SIZE_U32(FSEv07_MAX_TABLELOG)];
+
+
+/* **************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSEv07_FUNCTION_EXTENSION
+#  error "FSEv07_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSEv07_FUNCTION_TYPE
+#  error "FSEv07_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSEv07_CAT(X,Y) X##Y
+#define FSEv07_FUNCTION_NAME(X,Y) FSEv07_CAT(X,Y)
+#define FSEv07_TYPE_NAME(X,Y) FSEv07_CAT(X,Y)
+
+
+/* Function templates */
+FSEv07_DTable* FSEv07_createDTable (unsigned tableLog)
+{
+    if (tableLog > FSEv07_TABLELOG_ABSOLUTE_MAX) tableLog = FSEv07_TABLELOG_ABSOLUTE_MAX;
+    return (FSEv07_DTable*)malloc( FSEv07_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
+}
+
+void FSEv07_freeDTable (FSEv07_DTable* dt)
+{
+    free(dt);
+}
+
+size_t FSEv07_buildDTable(FSEv07_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    void* const tdPtr = dt+1;   /* because *dt is unsigned, 32-bits aligned on 32-bits */
+    FSEv07_DECODE_TYPE* const tableDecode = (FSEv07_DECODE_TYPE*) (tdPtr);
+    U16 symbolNext[FSEv07_MAX_SYMBOL_VALUE+1];
+
+    U32 const maxSV1 = maxSymbolValue + 1;
+    U32 const tableSize = 1 << tableLog;
+    U32 highThreshold = tableSize-1;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSEv07_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSEv07_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    {   FSEv07_DTableHeader DTableH;
+        DTableH.tableLog = (U16)tableLog;
+        DTableH.fastMode = 1;
+        {   S16 const largeLimit= (S16)(1 << (tableLog-1));
+            U32 s;
+            for (s=0; s<maxSV1; s++) {
+                if (normalizedCounter[s]==-1) {
+                    tableDecode[highThreshold--].symbol = (FSEv07_FUNCTION_TYPE)s;
+                    symbolNext[s] = 1;
+                } else {
+                    if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
+                    symbolNext[s] = normalizedCounter[s];
+        }   }   }
+        memcpy(dt, &DTableH, sizeof(DTableH));
+    }
+
+    /* Spread symbols */
+    {   U32 const tableMask = tableSize-1;
+        U32 const step = FSEv07_TABLESTEP(tableSize);
+        U32 s, position = 0;
+        for (s=0; s<maxSV1; s++) {
+            int i;
+            for (i=0; i<normalizedCounter[s]; i++) {
+                tableDecode[position].symbol = (FSEv07_FUNCTION_TYPE)s;
+                position = (position + step) & tableMask;
+                while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }   }
+
+        if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+    }
+
+    /* Build Decoding table */
+    {   U32 u;
+        for (u=0; u<tableSize; u++) {
+            FSEv07_FUNCTION_TYPE const symbol = (FSEv07_FUNCTION_TYPE)(tableDecode[u].symbol);
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[u].nbBits = (BYTE) (tableLog - BITv07_highbit32 ((U32)nextState) );
+            tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
+    }   }
+
+    return 0;
+}
+
+
+
+#ifndef FSEv07_COMMONDEFS_ONLY
+
+/*-*******************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+size_t FSEv07_buildDTable_rle (FSEv07_DTable* dt, BYTE symbolValue)
+{
+    void* ptr = dt;
+    FSEv07_DTableHeader* const DTableH = (FSEv07_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSEv07_decode_t* const cell = (FSEv07_decode_t*)dPtr;
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+size_t FSEv07_buildDTable_raw (FSEv07_DTable* dt, unsigned nbBits)
+{
+    void* ptr = dt;
+    FSEv07_DTableHeader* const DTableH = (FSEv07_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSEv07_decode_t* const dinfo = (FSEv07_decode_t*)dPtr;
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSV1 = tableMask+1;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<maxSV1; s++) {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+FORCE_INLINE size_t FSEv07_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSEv07_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    BITv07_DStream_t bitD;
+    FSEv07_DState_t state1;
+    FSEv07_DState_t state2;
+
+    /* Init */
+    { size_t const errorCode = BITv07_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+      if (FSEv07_isError(errorCode)) return errorCode; }
+
+    FSEv07_initDState(&state1, &bitD, dt);
+    FSEv07_initDState(&state2, &bitD, dt);
+
+#define FSEv07_GETSYMBOL(statePtr) fast ? FSEv07_decodeSymbolFast(statePtr, &bitD) : FSEv07_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (BITv07_reloadDStream(&bitD)==BITv07_DStream_unfinished) && (op<olimit) ; op+=4) {
+        op[0] = FSEv07_GETSYMBOL(&state1);
+
+        if (FSEv07_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BITv07_reloadDStream(&bitD);
+
+        op[1] = FSEv07_GETSYMBOL(&state2);
+
+        if (FSEv07_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (BITv07_reloadDStream(&bitD) > BITv07_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSEv07_GETSYMBOL(&state1);
+
+        if (FSEv07_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BITv07_reloadDStream(&bitD);
+
+        op[3] = FSEv07_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BITv07_reloadDStream(&bitD) >= FSEv07_DStream_partiallyFilled; Ends at exactly BITv07_DStream_completed */
+    while (1) {
+        if (op>(omax-2)) return ERROR(dstSize_tooSmall);
+
+        *op++ = FSEv07_GETSYMBOL(&state1);
+
+        if (BITv07_reloadDStream(&bitD)==BITv07_DStream_overflow) {
+            *op++ = FSEv07_GETSYMBOL(&state2);
+            break;
+        }
+
+        if (op>(omax-2)) return ERROR(dstSize_tooSmall);
+
+        *op++ = FSEv07_GETSYMBOL(&state2);
+
+        if (BITv07_reloadDStream(&bitD)==BITv07_DStream_overflow) {
+            *op++ = FSEv07_GETSYMBOL(&state1);
+            break;
+    }   }
+
+    return op-ostart;
+}
+
+
+size_t FSEv07_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSEv07_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSEv07_DTableHeader* DTableH = (const FSEv07_DTableHeader*)ptr;
+    const U32 fastMode = DTableH->fastMode;
+
+    /* select fast mode (static) */
+    if (fastMode) return FSEv07_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSEv07_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+size_t FSEv07_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSEv07_MAX_SYMBOL_VALUE+1];
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSEv07_MAX_SYMBOL_VALUE;
+
+    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */
+
+    /* normal FSE decoding mode */
+    {   size_t const NCountLength = FSEv07_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+        if (FSEv07_isError(NCountLength)) return NCountLength;
+        if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */
+        ip += NCountLength;
+        cSrcSize -= NCountLength;
+    }
+
+    { size_t const errorCode = FSEv07_buildDTable (dt, counting, maxSymbolValue, tableLog);
+      if (FSEv07_isError(errorCode)) return errorCode; }
+
+    return FSEv07_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);   /* always return, even if it is an error code */
+}
+
+
+
+#endif   /* FSEv07_COMMONDEFS_ONLY */
+
+/* ******************************************************************
+   Huffman decoder, part of New Generation Entropy library
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+/* inline is defined */
+#elif defined(_MSC_VER)
+#  define inline __inline
+#else
+#  define inline /* disable inline */
+#endif
+
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#endif
+
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define HUFv07_STATIC_ASSERT(c) { enum { HUFv07_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/*-***************************/
+/*  generic DTableDesc       */
+/*-***************************/
+
+typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
+
+static DTableDesc HUFv07_getDTableDesc(const HUFv07_DTable* table)
+{
+    DTableDesc dtd;
+    memcpy(&dtd, table, sizeof(dtd));
+    return dtd;
+}
+
+
+/*-***************************/
+/*  single-symbol decoding   */
+/*-***************************/
+
+typedef struct { BYTE byte; BYTE nbBits; } HUFv07_DEltX2;   /* single-symbol decoding */
+
+size_t HUFv07_readDTableX2 (HUFv07_DTable* DTable, const void* src, size_t srcSize)
+{
+    BYTE huffWeight[HUFv07_SYMBOLVALUE_MAX + 1];
+    U32 rankVal[HUFv07_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    U32 nbSymbols = 0;
+    size_t iSize;
+    void* const dtPtr = DTable + 1;
+    HUFv07_DEltX2* const dt = (HUFv07_DEltX2*)dtPtr;
+
+    HUFv07_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUFv07_DTable));
+    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUFv07_readStats(huffWeight, HUFv07_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
+    if (HUFv07_isError(iSize)) return iSize;
+
+    /* Table header */
+    {   DTableDesc dtd = HUFv07_getDTableDesc(DTable);
+        if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, huffman tree cannot fit in */
+        dtd.tableType = 0;
+        dtd.tableLog = (BYTE)tableLog;
+        memcpy(DTable, &dtd, sizeof(dtd));
+    }
+
+    /* Prepare ranks */
+    {   U32 n, nextRankStart = 0;
+        for (n=1; n<tableLog+1; n++) {
+            U32 current = nextRankStart;
+            nextRankStart += (rankVal[n] << (n-1));
+            rankVal[n] = current;
+    }   }
+
+    /* fill DTable */
+    {   U32 n;
+        for (n=0; n<nbSymbols; n++) {
+            U32 const w = huffWeight[n];
+            U32 const length = (1 << w) >> 1;
+            U32 i;
+            HUFv07_DEltX2 D;
+            D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
+            for (i = rankVal[w]; i < rankVal[w] + length; i++)
+                dt[i] = D;
+            rankVal[w] += length;
+    }   }
+
+    return iSize;
+}
+
+
+static BYTE HUFv07_decodeSymbolX2(BITv07_DStream_t* Dstream, const HUFv07_DEltX2* dt, const U32 dtLog)
+{
+    size_t const val = BITv07_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+    BYTE const c = dt[val].byte;
+    BITv07_skipBits(Dstream, dt[val].nbBits);
+    return c;
+}
+
+#define HUFv07_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+    *ptr++ = HUFv07_decodeSymbolX2(DStreamPtr, dt, dtLog)
+
+#define HUFv07_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUFv07_TABLELOG_MAX<=12)) \
+        HUFv07_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+#define HUFv07_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUFv07_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+static inline size_t HUFv07_decodeStreamX2(BYTE* p, BITv07_DStream_t* const bitDPtr, BYTE* const pEnd, const HUFv07_DEltX2* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 4 symbols at a time */
+    while ((BITv07_reloadDStream(bitDPtr) == BITv07_DStream_unfinished) && (p <= pEnd-4)) {
+        HUFv07_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUFv07_DECODE_SYMBOLX2_1(p, bitDPtr);
+        HUFv07_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUFv07_DECODE_SYMBOLX2_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BITv07_reloadDStream(bitDPtr) == BITv07_DStream_unfinished) && (p < pEnd))
+        HUFv07_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    /* no more data to retrieve from bitstream, hence no need to reload */
+    while (p < pEnd)
+        HUFv07_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    return pEnd-pStart;
+}
+
+static size_t HUFv07_decompress1X2_usingDTable_internal(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUFv07_DTable* DTable)
+{
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + dstSize;
+    const void* dtPtr = DTable + 1;
+    const HUFv07_DEltX2* const dt = (const HUFv07_DEltX2*)dtPtr;
+    BITv07_DStream_t bitD;
+    DTableDesc const dtd = HUFv07_getDTableDesc(DTable);
+    U32 const dtLog = dtd.tableLog;
+
+    { size_t const errorCode = BITv07_initDStream(&bitD, cSrc, cSrcSize);
+      if (HUFv07_isError(errorCode)) return errorCode; }
+
+    HUFv07_decodeStreamX2(op, &bitD, oend, dt, dtLog);
+
+    /* check */
+    if (!BITv07_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    return dstSize;
+}
+
+size_t HUFv07_decompress1X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUFv07_DTable* DTable)
+{
+    DTableDesc dtd = HUFv07_getDTableDesc(DTable);
+    if (dtd.tableType != 0) return ERROR(GENERIC);
+    return HUFv07_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+
+size_t HUFv07_decompress1X2_DCtx (HUFv07_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUFv07_readDTableX2 (DCtx, cSrc, cSrcSize);
+    if (HUFv07_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUFv07_decompress1X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
+}
+
+size_t HUFv07_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv07_CREATE_STATIC_DTABLEX2(DTable, HUFv07_TABLELOG_MAX);
+    return HUFv07_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
+}
+
+
+static size_t HUFv07_decompress4X2_usingDTable_internal(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUFv07_DTable* DTable)
+{
+    /* Check */
+    if (cSrcSize < 10) return ERROR(corruption_detected);  /* strict minimum : jump table + 1 byte per stream */
+
+    {   const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable + 1;
+        const HUFv07_DEltX2* const dt = (const HUFv07_DEltX2*)dtPtr;
+
+        /* Init */
+        BITv07_DStream_t bitD1;
+        BITv07_DStream_t bitD2;
+        BITv07_DStream_t bitD3;
+        BITv07_DStream_t bitD4;
+        size_t const length1 = MEM_readLE16(istart);
+        size_t const length2 = MEM_readLE16(istart+2);
+        size_t const length3 = MEM_readLE16(istart+4);
+        size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+        DTableDesc const dtd = HUFv07_getDTableDesc(DTable);
+        U32 const dtLog = dtd.tableLog;
+
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        { size_t const errorCode = BITv07_initDStream(&bitD1, istart1, length1);
+          if (HUFv07_isError(errorCode)) return errorCode; }
+        { size_t const errorCode = BITv07_initDStream(&bitD2, istart2, length2);
+          if (HUFv07_isError(errorCode)) return errorCode; }
+        { size_t const errorCode = BITv07_initDStream(&bitD3, istart3, length3);
+          if (HUFv07_isError(errorCode)) return errorCode; }
+        { size_t const errorCode = BITv07_initDStream(&bitD4, istart4, length4);
+          if (HUFv07_isError(errorCode)) return errorCode; }
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BITv07_reloadDStream(&bitD1) | BITv07_reloadDStream(&bitD2) | BITv07_reloadDStream(&bitD3) | BITv07_reloadDStream(&bitD4);
+        for ( ; (endSignal==BITv07_DStream_unfinished) && (op4<(oend-7)) ; ) {
+            HUFv07_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUFv07_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUFv07_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUFv07_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUFv07_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUFv07_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUFv07_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUFv07_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUFv07_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUFv07_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUFv07_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUFv07_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUFv07_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUFv07_DECODE_SYMBOLX2_0(op2, &bitD2);
+            HUFv07_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUFv07_DECODE_SYMBOLX2_0(op4, &bitD4);
+            endSignal = BITv07_reloadDStream(&bitD1) | BITv07_reloadDStream(&bitD2) | BITv07_reloadDStream(&bitD3) | BITv07_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUFv07_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUFv07_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUFv07_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUFv07_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BITv07_endOfDStream(&bitD1) & BITv07_endOfDStream(&bitD2) & BITv07_endOfDStream(&bitD3) & BITv07_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+size_t HUFv07_decompress4X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUFv07_DTable* DTable)
+{
+    DTableDesc dtd = HUFv07_getDTableDesc(DTable);
+    if (dtd.tableType != 0) return ERROR(GENERIC);
+    return HUFv07_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+
+
+size_t HUFv07_decompress4X2_DCtx (HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUFv07_readDTableX2 (dctx, cSrc, cSrcSize);
+    if (HUFv07_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUFv07_decompress4X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, dctx);
+}
+
+size_t HUFv07_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv07_CREATE_STATIC_DTABLEX2(DTable, HUFv07_TABLELOG_MAX);
+    return HUFv07_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
+}
+
+
+/* *************************/
+/* double-symbols decoding */
+/* *************************/
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUFv07_DEltX4;  /* double-symbols decoding */
+
+typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+
+static void HUFv07_fillDTableX4Level2(HUFv07_DEltX4* DTable, U32 sizeLog, const U32 consumed,
+                           const U32* rankValOrigin, const int minWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
+                           U32 nbBitsBaseline, U16 baseSeq)
+{
+    HUFv07_DEltX4 DElt;
+    U32 rankVal[HUFv07_TABLELOG_ABSOLUTEMAX + 1];
+
+    /* get pre-calculated rankVal */
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1) {
+        U32 i, skipSize = rankVal[minWeight];
+        MEM_writeLE16(&(DElt.sequence), baseSeq);
+        DElt.nbBits   = (BYTE)(consumed);
+        DElt.length   = 1;
+        for (i = 0; i < skipSize; i++)
+            DTable[i] = DElt;
+    }
+
+    /* fill DTable */
+    { U32 s; for (s=0; s<sortedListSize; s++) {   /* note : sortedSymbols already skipped */
+        const U32 symbol = sortedSymbols[s].symbol;
+        const U32 weight = sortedSymbols[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 length = 1 << (sizeLog-nbBits);
+        const U32 start = rankVal[weight];
+        U32 i = start;
+        const U32 end = start + length;
+
+        MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+        DElt.nbBits = (BYTE)(nbBits + consumed);
+        DElt.length = 2;
+        do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
+
+        rankVal[weight] += length;
+    }}
+}
+
+typedef U32 rankVal_t[HUFv07_TABLELOG_ABSOLUTEMAX][HUFv07_TABLELOG_ABSOLUTEMAX + 1];
+
+static void HUFv07_fillDTableX4(HUFv07_DEltX4* DTable, const U32 targetLog,
+                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32 nbBitsBaseline)
+{
+    U32 rankVal[HUFv07_TABLELOG_ABSOLUTEMAX + 1];
+    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits  = nbBitsBaseline - maxWeight;
+    U32 s;
+
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++) {
+        const U16 symbol = sortedList[s].symbol;
+        const U32 weight = sortedList[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 start = rankVal[weight];
+        const U32 length = 1 << (targetLog-nbBits);
+
+        if (targetLog-nbBits >= minBits) {   /* enough room for a second symbol */
+            U32 sortedRank;
+            int minWeight = nbBits + scaleLog;
+            if (minWeight < 1) minWeight = 1;
+            sortedRank = rankStart[minWeight];
+            HUFv07_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
+                           rankValOrigin[nbBits], minWeight,
+                           sortedList+sortedRank, sortedListSize-sortedRank,
+                           nbBitsBaseline, symbol);
+        } else {
+            HUFv07_DEltX4 DElt;
+            MEM_writeLE16(&(DElt.sequence), symbol);
+            DElt.nbBits = (BYTE)(nbBits);
+            DElt.length = 1;
+            {   U32 u;
+                const U32 end = start + length;
+                for (u = start; u < end; u++) DTable[u] = DElt;
+        }   }
+        rankVal[weight] += length;
+    }
+}
+
+size_t HUFv07_readDTableX4 (HUFv07_DTable* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUFv07_SYMBOLVALUE_MAX + 1];
+    sortedSymbol_t sortedSymbol[HUFv07_SYMBOLVALUE_MAX + 1];
+    U32 rankStats[HUFv07_TABLELOG_ABSOLUTEMAX + 1] = { 0 };
+    U32 rankStart0[HUFv07_TABLELOG_ABSOLUTEMAX + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    rankVal_t rankVal;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    DTableDesc dtd = HUFv07_getDTableDesc(DTable);
+    U32 const maxTableLog = dtd.maxTableLog;
+    size_t iSize;
+    void* dtPtr = DTable+1;   /* force compiler to avoid strict-aliasing */
+    HUFv07_DEltX4* const dt = (HUFv07_DEltX4*)dtPtr;
+
+    HUFv07_STATIC_ASSERT(sizeof(HUFv07_DEltX4) == sizeof(HUFv07_DTable));   /* if compilation fails here, assertion is false */
+    if (maxTableLog > HUFv07_TABLELOG_ABSOLUTEMAX) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUFv07_readStats(weightList, HUFv07_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUFv07_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+
+    /* Get start index of each weight */
+    {   U32 w, nextRankStart = 0;
+        for (w=1; w<maxW+1; w++) {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {   U32 s;
+        for (s=0; s<nbSymbols; s++) {
+            U32 const w = weightList[s];
+            U32 const r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {   U32* const rankVal0 = rankVal[0];
+        {   int const rescale = (maxTableLog-tableLog) - 1;   /* tableLog <= maxTableLog */
+            U32 nextRankVal = 0;
+            U32 w;
+            for (w=1; w<maxW+1; w++) {
+                U32 current = nextRankVal;
+                nextRankVal += rankStats[w] << (w+rescale);
+                rankVal0[w] = current;
+        }   }
+        {   U32 const minBits = tableLog+1 - maxW;
+            U32 consumed;
+            for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
+                U32* const rankValPtr = rankVal[consumed];
+                U32 w;
+                for (w = 1; w < maxW+1; w++) {
+                    rankValPtr[w] = rankVal0[w] >> consumed;
+    }   }   }   }
+
+    HUFv07_fillDTableX4(dt, maxTableLog,
+                   sortedSymbol, sizeOfSort,
+                   rankStart0, rankVal, maxW,
+                   tableLog+1);
+
+    dtd.tableLog = (BYTE)maxTableLog;
+    dtd.tableType = 1;
+    memcpy(DTable, &dtd, sizeof(dtd));
+    return iSize;
+}
+
+
+static U32 HUFv07_decodeSymbolX4(void* op, BITv07_DStream_t* DStream, const HUFv07_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BITv07_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 2);
+    BITv07_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
+
+static U32 HUFv07_decodeLastSymbolX4(void* op, BITv07_DStream_t* DStream, const HUFv07_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BITv07_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 1);
+    if (dt[val].length==1) BITv07_skipBits(DStream, dt[val].nbBits);
+    else {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
+            BITv07_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+    }   }
+    return 1;
+}
+
+
+#define HUFv07_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
+    ptr += HUFv07_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUFv07_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUFv07_TABLELOG_MAX<=12)) \
+        ptr += HUFv07_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUFv07_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        ptr += HUFv07_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+static inline size_t HUFv07_decodeStreamX4(BYTE* p, BITv07_DStream_t* bitDPtr, BYTE* const pEnd, const HUFv07_DEltX4* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 8 symbols at a time */
+    while ((BITv07_reloadDStream(bitDPtr) == BITv07_DStream_unfinished) && (p < pEnd-7)) {
+        HUFv07_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUFv07_DECODE_SYMBOLX4_1(p, bitDPtr);
+        HUFv07_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUFv07_DECODE_SYMBOLX4_0(p, bitDPtr);
+    }
+
+    /* closer to end : up to 2 symbols at a time */
+    while ((BITv07_reloadDStream(bitDPtr) == BITv07_DStream_unfinished) && (p <= pEnd-2))
+        HUFv07_DECODE_SYMBOLX4_0(p, bitDPtr);
+
+    while (p <= pEnd-2)
+        HUFv07_DECODE_SYMBOLX4_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    if (p < pEnd)
+        p += HUFv07_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
+
+    return p-pStart;
+}
+
+
+static size_t HUFv07_decompress1X4_usingDTable_internal(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUFv07_DTable* DTable)
+{
+    BITv07_DStream_t bitD;
+
+    /* Init */
+    {   size_t const errorCode = BITv07_initDStream(&bitD, cSrc, cSrcSize);
+        if (HUFv07_isError(errorCode)) return errorCode;
+    }
+
+    /* decode */
+    {   BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable+1;   /* force compiler to not use strict-aliasing */
+        const HUFv07_DEltX4* const dt = (const HUFv07_DEltX4*)dtPtr;
+        DTableDesc const dtd = HUFv07_getDTableDesc(DTable);
+        HUFv07_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
+    }
+
+    /* check */
+    if (!BITv07_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    /* decoded size */
+    return dstSize;
+}
+
+size_t HUFv07_decompress1X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUFv07_DTable* DTable)
+{
+    DTableDesc dtd = HUFv07_getDTableDesc(DTable);
+    if (dtd.tableType != 1) return ERROR(GENERIC);
+    return HUFv07_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+
+size_t HUFv07_decompress1X4_DCtx (HUFv07_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUFv07_readDTableX4 (DCtx, cSrc, cSrcSize);
+    if (HUFv07_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUFv07_decompress1X4_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
+}
+
+size_t HUFv07_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv07_CREATE_STATIC_DTABLEX4(DTable, HUFv07_TABLELOG_MAX);
+    return HUFv07_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
+}
+
+static size_t HUFv07_decompress4X4_usingDTable_internal(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUFv07_DTable* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {   const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable+1;
+        const HUFv07_DEltX4* const dt = (const HUFv07_DEltX4*)dtPtr;
+
+        /* Init */
+        BITv07_DStream_t bitD1;
+        BITv07_DStream_t bitD2;
+        BITv07_DStream_t bitD3;
+        BITv07_DStream_t bitD4;
+        size_t const length1 = MEM_readLE16(istart);
+        size_t const length2 = MEM_readLE16(istart+2);
+        size_t const length3 = MEM_readLE16(istart+4);
+        size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        size_t const segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+        DTableDesc const dtd = HUFv07_getDTableDesc(DTable);
+        U32 const dtLog = dtd.tableLog;
+
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        { size_t const errorCode = BITv07_initDStream(&bitD1, istart1, length1);
+          if (HUFv07_isError(errorCode)) return errorCode; }
+        { size_t const errorCode = BITv07_initDStream(&bitD2, istart2, length2);
+          if (HUFv07_isError(errorCode)) return errorCode; }
+        { size_t const errorCode = BITv07_initDStream(&bitD3, istart3, length3);
+          if (HUFv07_isError(errorCode)) return errorCode; }
+        { size_t const errorCode = BITv07_initDStream(&bitD4, istart4, length4);
+          if (HUFv07_isError(errorCode)) return errorCode; }
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BITv07_reloadDStream(&bitD1) | BITv07_reloadDStream(&bitD2) | BITv07_reloadDStream(&bitD3) | BITv07_reloadDStream(&bitD4);
+        for ( ; (endSignal==BITv07_DStream_unfinished) && (op4<(oend-7)) ; ) {
+            HUFv07_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUFv07_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUFv07_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUFv07_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUFv07_DECODE_SYMBOLX4_1(op1, &bitD1);
+            HUFv07_DECODE_SYMBOLX4_1(op2, &bitD2);
+            HUFv07_DECODE_SYMBOLX4_1(op3, &bitD3);
+            HUFv07_DECODE_SYMBOLX4_1(op4, &bitD4);
+            HUFv07_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUFv07_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUFv07_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUFv07_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUFv07_DECODE_SYMBOLX4_0(op1, &bitD1);
+            HUFv07_DECODE_SYMBOLX4_0(op2, &bitD2);
+            HUFv07_DECODE_SYMBOLX4_0(op3, &bitD3);
+            HUFv07_DECODE_SYMBOLX4_0(op4, &bitD4);
+
+            endSignal = BITv07_reloadDStream(&bitD1) | BITv07_reloadDStream(&bitD2) | BITv07_reloadDStream(&bitD3) | BITv07_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUFv07_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
+        HUFv07_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
+        HUFv07_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
+        HUFv07_decodeStreamX4(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        { U32 const endCheck = BITv07_endOfDStream(&bitD1) & BITv07_endOfDStream(&bitD2) & BITv07_endOfDStream(&bitD3) & BITv07_endOfDStream(&bitD4);
+          if (!endCheck) return ERROR(corruption_detected); }
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+size_t HUFv07_decompress4X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUFv07_DTable* DTable)
+{
+    DTableDesc dtd = HUFv07_getDTableDesc(DTable);
+    if (dtd.tableType != 1) return ERROR(GENERIC);
+    return HUFv07_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+
+
+size_t HUFv07_decompress4X4_DCtx (HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUFv07_readDTableX4 (dctx, cSrc, cSrcSize);
+    if (HUFv07_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUFv07_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
+}
+
+size_t HUFv07_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv07_CREATE_STATIC_DTABLEX4(DTable, HUFv07_TABLELOG_MAX);
+    return HUFv07_decompress4X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
+}
+
+
+/* ********************************/
+/* Generic decompression selector */
+/* ********************************/
+
+size_t HUFv07_decompress1X_usingDTable(void* dst, size_t maxDstSize,
+                                    const void* cSrc, size_t cSrcSize,
+                                    const HUFv07_DTable* DTable)
+{
+    DTableDesc const dtd = HUFv07_getDTableDesc(DTable);
+    return dtd.tableType ? HUFv07_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) :
+                           HUFv07_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
+}
+
+size_t HUFv07_decompress4X_usingDTable(void* dst, size_t maxDstSize,
+                                    const void* cSrc, size_t cSrcSize,
+                                    const HUFv07_DTable* DTable)
+{
+    DTableDesc const dtd = HUFv07_getDTableDesc(DTable);
+    return dtd.tableType ? HUFv07_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) :
+                           HUFv07_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
+}
+
+
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+{
+    /* single, double, quad */
+    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
+    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
+    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
+    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
+    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
+    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
+    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
+    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
+    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
+    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
+    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
+    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+};
+
+/** HUFv07_selectDecoder() :
+*   Tells which decoder is likely to decode faster,
+*   based on a set of pre-determined metrics.
+*   @return : 0==HUFv07_decompress4X2, 1==HUFv07_decompress4X4 .
+*   Assumption : 0 < cSrcSize < dstSize <= 128 KB */
+U32 HUFv07_selectDecoder (size_t dstSize, size_t cSrcSize)
+{
+    /* decoder timing evaluation */
+    U32 const Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */
+    U32 const D256 = (U32)(dstSize >> 8);
+    U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
+    U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
+    DTime1 += DTime1 >> 3;  /* advantage to algorithm using less memory, for cache eviction */
+
+    return DTime1 < DTime0;
+}
+
+
+typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+
+size_t HUFv07_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    static const decompressionAlgo decompress[2] = { HUFv07_decompress4X2, HUFv07_decompress4X4 };
+
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    {   U32 const algoNb = HUFv07_selectDecoder(dstSize, cSrcSize);
+        return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
+    }
+
+    //return HUFv07_decompress4X2(dst, dstSize, cSrc, cSrcSize);   /* multi-streams single-symbol decoding */
+    //return HUFv07_decompress4X4(dst, dstSize, cSrc, cSrcSize);   /* multi-streams double-symbols decoding */
+}
+
+size_t HUFv07_decompress4X_DCtx (HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    {   U32 const algoNb = HUFv07_selectDecoder(dstSize, cSrcSize);
+        return algoNb ? HUFv07_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
+                        HUFv07_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
+    }
+}
+
+size_t HUFv07_decompress4X_hufOnly (HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if ((cSrcSize >= dstSize) || (cSrcSize <= 1)) return ERROR(corruption_detected);   /* invalid */
+
+    {   U32 const algoNb = HUFv07_selectDecoder(dstSize, cSrcSize);
+        return algoNb ? HUFv07_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
+                        HUFv07_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
+    }
+}
+
+size_t HUFv07_decompress1X_DCtx (HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    {   U32 const algoNb = HUFv07_selectDecoder(dstSize, cSrcSize);
+        return algoNb ? HUFv07_decompress1X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
+                        HUFv07_decompress1X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
+    }
+}
+/*
+    Common functions of Zstd compression library
+    Copyright (C) 2015-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd homepage : http://www.zstd.net/
+*/
+
+
+
+/*-****************************************
+*  ZSTD Error Management
+******************************************/
+/*! ZSTDv07_isError() :
+*   tells if a return value is an error code */
+unsigned ZSTDv07_isError(size_t code) { return ERR_isError(code); }
+
+/*! ZSTDv07_getErrorName() :
+*   provides error code string from function result (useful for debugging) */
+const char* ZSTDv07_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+
+/* **************************************************************
+*  ZBUFF Error Management
+****************************************************************/
+unsigned ZBUFFv07_isError(size_t errorCode) { return ERR_isError(errorCode); }
+
+const char* ZBUFFv07_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
+
+
+
+void* ZSTDv07_defaultAllocFunction(void* opaque, size_t size)
+{
+    void* address = malloc(size);
+    (void)opaque;
+    /* printf("alloc %p, %d opaque=%p \n", address, (int)size, opaque); */
+    return address;
+}
+
+void ZSTDv07_defaultFreeFunction(void* opaque, void* address)
+{
+    (void)opaque;
+    /* if (address) printf("free %p opaque=%p \n", address, opaque); */
+    free(address);
+}
+/*
+    zstd_internal - common functions to include
+    Header File for include
+    Copyright (C) 2014-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd homepage : https://www.zstd.net
+*/
+#ifndef ZSTDv07_CCOMMON_H_MODULE
+#define ZSTDv07_CCOMMON_H_MODULE
+
+
+/*-*************************************
+*  Common macros
+***************************************/
+#define MIN(a,b) ((a)<(b) ? (a) : (b))
+#define MAX(a,b) ((a)>(b) ? (a) : (b))
+
+
+/*-*************************************
+*  Common constants
+***************************************/
+#define ZSTDv07_OPT_NUM    (1<<12)
+#define ZSTDv07_DICT_MAGIC  0xEC30A437   /* v0.7 */
+
+#define ZSTDv07_REP_NUM    3
+#define ZSTDv07_REP_INIT   ZSTDv07_REP_NUM
+#define ZSTDv07_REP_MOVE   (ZSTDv07_REP_NUM-1)
+static const U32 repStartValue[ZSTDv07_REP_NUM] = { 1, 4, 8 };
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+#define BIT1   2
+#define BIT0   1
+
+#define ZSTDv07_WINDOWLOG_ABSOLUTEMIN 10
+static const size_t ZSTDv07_fcs_fieldSize[4] = { 0, 2, 4, 8 };
+static const size_t ZSTDv07_did_fieldSize[4] = { 0, 1, 2, 4 };
+
+#define ZSTDv07_BLOCKHEADERSIZE 3   /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
+static const size_t ZSTDv07_blockHeaderSize = ZSTDv07_BLOCKHEADERSIZE;
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
+#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */
+
+#define HufLog 12
+typedef enum { lbt_huffman, lbt_repeat, lbt_raw, lbt_rle } litBlockType_t;
+
+#define LONGNBSEQ 0x7F00
+
+#define MINMATCH 3
+#define EQUAL_READ32 4
+
+#define Litbits  8
+#define MaxLit ((1<<Litbits) - 1)
+#define MaxML  52
+#define MaxLL  35
+#define MaxOff 28
+#define MaxSeq MAX(MaxLL, MaxML)   /* Assumption : MaxOff < MaxLL,MaxML */
+#define MLFSELog    9
+#define LLFSELog    9
+#define OffFSELog   8
+
+#define FSEv07_ENCODING_RAW     0
+#define FSEv07_ENCODING_RLE     1
+#define FSEv07_ENCODING_STATIC  2
+#define FSEv07_ENCODING_DYNAMIC 3
+
+static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                      1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12,
+                                     13,14,15,16 };
+static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
+                                             2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
+                                            -1,-1,-1,-1 };
+static const U32 LL_defaultNormLog = 6;
+
+static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                      1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9,10,11,
+                                     12,13,14,15,16 };
+static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+                                             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,
+                                            -1,-1,-1,-1,-1 };
+static const U32 ML_defaultNormLog = 6;
+
+static const S16 OF_defaultNorm[MaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+                                              1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 };
+static const U32 OF_defaultNormLog = 5;
+
+
+/*-*******************************************
+*  Shared functions to include for inlining
+*********************************************/
+static void ZSTDv07_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+#define COPY8(d,s) { ZSTDv07_copy8(d,s); d+=8; s+=8; }
+
+/*! ZSTDv07_wildcopy() :
+*   custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
+#define WILDCOPY_OVERLENGTH 8
+MEM_STATIC void ZSTDv07_wildcopy(void* dst, const void* src, ptrdiff_t length)
+{
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+    do
+        COPY8(op, ip)
+    while (op < oend);
+}
+
+
+/*-*******************************************
+*  Private interfaces
+*********************************************/
+typedef struct ZSTDv07_stats_s ZSTDv07_stats_t;
+
+typedef struct {
+    U32 off;
+    U32 len;
+} ZSTDv07_match_t;
+
+typedef struct {
+    U32 price;
+    U32 off;
+    U32 mlen;
+    U32 litlen;
+    U32 rep[ZSTDv07_REP_INIT];
+} ZSTDv07_optimal_t;
+
+struct ZSTDv07_stats_s { U32 unused; };
+
+typedef struct {
+    void* buffer;
+    U32*  offsetStart;
+    U32*  offset;
+    BYTE* offCodeStart;
+    BYTE* litStart;
+    BYTE* lit;
+    U16*  litLengthStart;
+    U16*  litLength;
+    BYTE* llCodeStart;
+    U16*  matchLengthStart;
+    U16*  matchLength;
+    BYTE* mlCodeStart;
+    U32   longLengthID;   /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
+    U32   longLengthPos;
+    /* opt */
+    ZSTDv07_optimal_t* priceTable;
+    ZSTDv07_match_t* matchTable;
+    U32* matchLengthFreq;
+    U32* litLengthFreq;
+    U32* litFreq;
+    U32* offCodeFreq;
+    U32  matchLengthSum;
+    U32  matchSum;
+    U32  litLengthSum;
+    U32  litSum;
+    U32  offCodeSum;
+    U32  log2matchLengthSum;
+    U32  log2matchSum;
+    U32  log2litLengthSum;
+    U32  log2litSum;
+    U32  log2offCodeSum;
+    U32  factor;
+    U32  cachedPrice;
+    U32  cachedLitLength;
+    const BYTE* cachedLiterals;
+    ZSTDv07_stats_t stats;
+} seqStore_t;
+
+void ZSTDv07_seqToCodes(const seqStore_t* seqStorePtr, size_t const nbSeq);
+
+/* custom memory allocation functions */
+static const ZSTDv07_customMem defaultCustomMem = { ZSTDv07_defaultAllocFunction, ZSTDv07_defaultFreeFunction, NULL };
+
+#endif   /* ZSTDv07_CCOMMON_H_MODULE */
+/*
+    zstd - standard compression library
+    Copyright (C) 2014-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd homepage : http://www.zstd.net
+*/
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+ * HEAPMODE :
+ * Select how default decompression function ZSTDv07_decompress() will allocate memory,
+ * in memory stack (0), or in memory heap (1, requires malloc())
+ */
+#ifndef ZSTDv07_HEAPMODE
+#  define ZSTDv07_HEAPMODE 1
+#endif
+
+
+/*-*******************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#  pragma warning(disable : 4100)        /* disable: C4100: unreferenced formal parameter */
+#endif
+
+
+/*-*************************************
+*  Macros
+***************************************/
+#define ZSTDv07_isError ERR_isError   /* for inlining */
+#define FSEv07_isError  ERR_isError
+#define HUFv07_isError  ERR_isError
+
+
+/*_*******************************************************
+*  Memory operations
+**********************************************************/
+static void ZSTDv07_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+
+/*-*************************************************************
+*   Context management
+***************************************************************/
+typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
+               ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
+               ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTDv07_dStage;
+
+struct ZSTDv07_DCtx_s
+{
+    FSEv07_DTable LLTable[FSEv07_DTABLE_SIZE_U32(LLFSELog)];
+    FSEv07_DTable OffTable[FSEv07_DTABLE_SIZE_U32(OffFSELog)];
+    FSEv07_DTable MLTable[FSEv07_DTABLE_SIZE_U32(MLFSELog)];
+    HUFv07_DTable hufTable[HUFv07_DTABLE_SIZE(HufLog)];  /* can accommodate HUFv07_decompress4X */
+    const void* previousDstEnd;
+    const void* base;
+    const void* vBase;
+    const void* dictEnd;
+    size_t expected;
+    U32 rep[3];
+    ZSTDv07_frameParams fParams;
+    blockType_t bType;   /* used in ZSTDv07_decompressContinue(), to transfer blockType between header decoding and block decoding stages */
+    ZSTDv07_dStage stage;
+    U32 litEntropy;
+    U32 fseEntropy;
+    XXH64_state_t xxhState;
+    size_t headerSize;
+    U32 dictID;
+    const BYTE* litPtr;
+    ZSTDv07_customMem customMem;
+    size_t litSize;
+    BYTE litBuffer[ZSTDv07_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH];
+    BYTE headerBuffer[ZSTDv07_FRAMEHEADERSIZE_MAX];
+};  /* typedef'd to ZSTDv07_DCtx within "zstd_static.h" */
+
+int ZSTDv07_isSkipFrame(ZSTDv07_DCtx* dctx);
+
+size_t ZSTDv07_sizeofDCtx (const ZSTDv07_DCtx* dctx) { return sizeof(*dctx); }
+
+size_t ZSTDv07_estimateDCtxSize(void) { return sizeof(ZSTDv07_DCtx); }
+
+size_t ZSTDv07_decompressBegin(ZSTDv07_DCtx* dctx)
+{
+    dctx->expected = ZSTDv07_frameHeaderSize_min;
+    dctx->stage = ZSTDds_getFrameHeaderSize;
+    dctx->previousDstEnd = NULL;
+    dctx->base = NULL;
+    dctx->vBase = NULL;
+    dctx->dictEnd = NULL;
+    dctx->hufTable[0] = (HUFv07_DTable)((HufLog)*0x1000001);
+    dctx->litEntropy = dctx->fseEntropy = 0;
+    dctx->dictID = 0;
+    { int i; for (i=0; i<ZSTDv07_REP_NUM; i++) dctx->rep[i] = repStartValue[i]; }
+    return 0;
+}
+
+ZSTDv07_DCtx* ZSTDv07_createDCtx_advanced(ZSTDv07_customMem customMem)
+{
+    ZSTDv07_DCtx* dctx;
+
+    if (!customMem.customAlloc && !customMem.customFree)
+        customMem = defaultCustomMem;
+
+    if (!customMem.customAlloc || !customMem.customFree)
+        return NULL;
+
+    dctx = (ZSTDv07_DCtx*) customMem.customAlloc(customMem.opaque, sizeof(ZSTDv07_DCtx));
+    if (!dctx) return NULL;
+    memcpy(&dctx->customMem, &customMem, sizeof(ZSTDv07_customMem));
+    ZSTDv07_decompressBegin(dctx);
+    return dctx;
+}
+
+ZSTDv07_DCtx* ZSTDv07_createDCtx(void)
+{
+    return ZSTDv07_createDCtx_advanced(defaultCustomMem);
+}
+
+size_t ZSTDv07_freeDCtx(ZSTDv07_DCtx* dctx)
+{
+    if (dctx==NULL) return 0;   /* support free on NULL */
+    dctx->customMem.customFree(dctx->customMem.opaque, dctx);
+    return 0;   /* reserved as a potential error code in the future */
+}
+
+void ZSTDv07_copyDCtx(ZSTDv07_DCtx* dstDCtx, const ZSTDv07_DCtx* srcDCtx)
+{
+    memcpy(dstDCtx, srcDCtx,
+           sizeof(ZSTDv07_DCtx) - (ZSTDv07_BLOCKSIZE_ABSOLUTEMAX+WILDCOPY_OVERLENGTH + ZSTDv07_frameHeaderSize_max));  /* no need to copy workspace */
+}
+
+
+/*-*************************************************************
+*   Decompression section
+***************************************************************/
+
+/* Frame format description
+   Frame Header -  [ Block Header - Block ] - Frame End
+   1) Frame Header
+      - 4 bytes - Magic Number : ZSTDv07_MAGICNUMBER (defined within zstd.h)
+      - 1 byte  - Frame Descriptor
+   2) Block Header
+      - 3 bytes, starting with a 2-bits descriptor
+                 Uncompressed, Compressed, Frame End, unused
+   3) Block
+      See Block Format Description
+   4) Frame End
+      - 3 bytes, compatible with Block Header
+*/
+
+
+/* Frame Header :
+
+   1 byte - FrameHeaderDescription :
+   bit 0-1 : dictID (0, 1, 2 or 4 bytes)
+   bit 2   : checksumFlag
+   bit 3   : reserved (must be zero)
+   bit 4   : reserved (unused, can be any value)
+   bit 5   : Single Segment (if 1, WindowLog byte is not present)
+   bit 6-7 : FrameContentFieldSize (0, 2, 4, or 8)
+             if (SkippedWindowLog && !FrameContentFieldsize) FrameContentFieldsize=1;
+
+   Optional : WindowLog (0 or 1 byte)
+   bit 0-2 : octal Fractional (1/8th)
+   bit 3-7 : Power of 2, with 0 = 1 KB (up to 2 TB)
+
+   Optional : dictID (0, 1, 2 or 4 bytes)
+   Automatic adaptation
+   0 : no dictID
+   1 : 1 - 255
+   2 : 256 - 65535
+   4 : all other values
+
+   Optional : content size (0, 1, 2, 4 or 8 bytes)
+   0 : unknown          (fcfs==0 and swl==0)
+   1 : 0-255 bytes      (fcfs==0 and swl==1)
+   2 : 256 - 65535+256  (fcfs==1)
+   4 : 0 - 4GB-1        (fcfs==2)
+   8 : 0 - 16EB-1       (fcfs==3)
+*/
+
+
+/* Compressed Block, format description
+
+   Block = Literal Section - Sequences Section
+   Prerequisite : size of (compressed) block, maximum size of regenerated data
+
+   1) Literal Section
+
+   1.1) Header : 1-5 bytes
+        flags: 2 bits
+            00 compressed by Huff0
+            01 unused
+            10 is Raw (uncompressed)
+            11 is Rle
+            Note : using 01 => Huff0 with precomputed table ?
+            Note : delta map ? => compressed ?
+
+   1.1.1) Huff0-compressed literal block : 3-5 bytes
+            srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream
+            srcSize < 1 KB => 3 bytes (2-2-10-10)
+            srcSize < 16KB => 4 bytes (2-2-14-14)
+            else           => 5 bytes (2-2-18-18)
+            big endian convention
+
+   1.1.2) Raw (uncompressed) literal block header : 1-3 bytes
+        size :  5 bits: (IS_RAW<<6) + (0<<4) + size
+               12 bits: (IS_RAW<<6) + (2<<4) + (size>>8)
+                        size&255
+               20 bits: (IS_RAW<<6) + (3<<4) + (size>>16)
+                        size>>8&255
+                        size&255
+
+   1.1.3) Rle (repeated single byte) literal block header : 1-3 bytes
+        size :  5 bits: (IS_RLE<<6) + (0<<4) + size
+               12 bits: (IS_RLE<<6) + (2<<4) + (size>>8)
+                        size&255
+               20 bits: (IS_RLE<<6) + (3<<4) + (size>>16)
+                        size>>8&255
+                        size&255
+
+   1.1.4) Huff0-compressed literal block, using precomputed CTables : 3-5 bytes
+            srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream
+            srcSize < 1 KB => 3 bytes (2-2-10-10)
+            srcSize < 16KB => 4 bytes (2-2-14-14)
+            else           => 5 bytes (2-2-18-18)
+            big endian convention
+
+        1- CTable available (stored into workspace ?)
+        2- Small input (fast heuristic ? Full comparison ? depend on clevel ?)
+
+
+   1.2) Literal block content
+
+   1.2.1) Huff0 block, using sizes from header
+        See Huff0 format
+
+   1.2.2) Huff0 block, using prepared table
+
+   1.2.3) Raw content
+
+   1.2.4) single byte
+
+
+   2) Sequences section
+      TO DO
+*/
+
+/** ZSTDv07_frameHeaderSize() :
+*   srcSize must be >= ZSTDv07_frameHeaderSize_min.
+*   @return : size of the Frame Header */
+static size_t ZSTDv07_frameHeaderSize(const void* src, size_t srcSize)
+{
+    if (srcSize < ZSTDv07_frameHeaderSize_min) return ERROR(srcSize_wrong);
+    {   BYTE const fhd = ((const BYTE*)src)[4];
+        U32 const dictID= fhd & 3;
+        U32 const directMode = (fhd >> 5) & 1;
+        U32 const fcsId = fhd >> 6;
+        return ZSTDv07_frameHeaderSize_min + !directMode + ZSTDv07_did_fieldSize[dictID] + ZSTDv07_fcs_fieldSize[fcsId]
+                + (directMode && !ZSTDv07_fcs_fieldSize[fcsId]);
+    }
+}
+
+
+/** ZSTDv07_getFrameParams() :
+*   decode Frame Header, or require larger `srcSize`.
+*   @return : 0, `fparamsPtr` is correctly filled,
+*            >0, `srcSize` is too small, result is expected `srcSize`,
+*             or an error code, which can be tested using ZSTDv07_isError() */
+size_t ZSTDv07_getFrameParams(ZSTDv07_frameParams* fparamsPtr, const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+
+    if (srcSize < ZSTDv07_frameHeaderSize_min) return ZSTDv07_frameHeaderSize_min;
+    if (MEM_readLE32(src) != ZSTDv07_MAGICNUMBER) {
+        if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTDv07_MAGIC_SKIPPABLE_START) {
+            if (srcSize < ZSTDv07_skippableHeaderSize) return ZSTDv07_skippableHeaderSize; /* magic number + skippable frame length */
+            memset(fparamsPtr, 0, sizeof(*fparamsPtr));
+            fparamsPtr->frameContentSize = MEM_readLE32((const char *)src + 4);
+            fparamsPtr->windowSize = 0; /* windowSize==0 means a frame is skippable */
+            return 0;
+        }
+        return ERROR(prefix_unknown);
+    }
+
+    /* ensure there is enough `srcSize` to fully read/decode frame header */
+    { size_t const fhsize = ZSTDv07_frameHeaderSize(src, srcSize);
+      if (srcSize < fhsize) return fhsize; }
+
+    {   BYTE const fhdByte = ip[4];
+        size_t pos = 5;
+        U32 const dictIDSizeCode = fhdByte&3;
+        U32 const checksumFlag = (fhdByte>>2)&1;
+        U32 const directMode = (fhdByte>>5)&1;
+        U32 const fcsID = fhdByte>>6;
+        U32 const windowSizeMax = 1U << ZSTDv07_WINDOWLOG_MAX;
+        U32 windowSize = 0;
+        U32 dictID = 0;
+        U64 frameContentSize = 0;
+        if ((fhdByte & 0x08) != 0) return ERROR(frameParameter_unsupported);   /* reserved bits, which must be zero */
+        if (!directMode) {
+            BYTE const wlByte = ip[pos++];
+            U32 const windowLog = (wlByte >> 3) + ZSTDv07_WINDOWLOG_ABSOLUTEMIN;
+            if (windowLog > ZSTDv07_WINDOWLOG_MAX) return ERROR(frameParameter_unsupported);
+            windowSize = (1U << windowLog);
+            windowSize += (windowSize >> 3) * (wlByte&7);
+        }
+
+        switch(dictIDSizeCode)
+        {
+            default:   /* impossible */
+            case 0 : break;
+            case 1 : dictID = ip[pos]; pos++; break;
+            case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break;
+            case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break;
+        }
+        switch(fcsID)
+        {
+            default:   /* impossible */
+            case 0 : if (directMode) frameContentSize = ip[pos]; break;
+            case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break;
+            case 2 : frameContentSize = MEM_readLE32(ip+pos); break;
+            case 3 : frameContentSize = MEM_readLE64(ip+pos); break;
+        }
+        if (!windowSize) windowSize = (U32)frameContentSize;
+        if (windowSize > windowSizeMax) return ERROR(frameParameter_unsupported);
+        fparamsPtr->frameContentSize = frameContentSize;
+        fparamsPtr->windowSize = windowSize;
+        fparamsPtr->dictID = dictID;
+        fparamsPtr->checksumFlag = checksumFlag;
+    }
+    return 0;
+}
+
+
+/** ZSTDv07_getDecompressedSize() :
+*   compatible with legacy mode
+*   @return : decompressed size if known, 0 otherwise
+              note : 0 can mean any of the following :
+                   - decompressed size is not provided within frame header
+                   - frame header unknown / not supported
+                   - frame header not completely provided (`srcSize` too small) */
+unsigned long long ZSTDv07_getDecompressedSize(const void* src, size_t srcSize)
+{
+    {   ZSTDv07_frameParams fparams;
+        size_t const frResult = ZSTDv07_getFrameParams(&fparams, src, srcSize);
+        if (frResult!=0) return 0;
+        return fparams.frameContentSize;
+    }
+}
+
+
+/** ZSTDv07_decodeFrameHeader() :
+*   `srcSize` must be the size provided by ZSTDv07_frameHeaderSize().
+*   @return : 0 if success, or an error code, which can be tested using ZSTDv07_isError() */
+static size_t ZSTDv07_decodeFrameHeader(ZSTDv07_DCtx* dctx, const void* src, size_t srcSize)
+{
+    size_t const result = ZSTDv07_getFrameParams(&(dctx->fParams), src, srcSize);
+    if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID)) return ERROR(dictionary_wrong);
+    if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0);
+    return result;
+}
+
+
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+/*! ZSTDv07_getcBlockSize() :
+*   Provides the size of compressed block from block header `src` */
+size_t ZSTDv07_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = (const BYTE* const)src;
+    U32 cSize;
+
+    if (srcSize < ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
+
+    bpPtr->blockType = (blockType_t)((*in) >> 6);
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+
+static size_t ZSTDv07_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall);
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+/*! ZSTDv07_decodeLiteralsBlock() :
+    @return : nb of bytes read from src (< srcSize ) */
+size_t ZSTDv07_decodeLiteralsBlock(ZSTDv07_DCtx* dctx,
+                          const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
+{
+    const BYTE* const istart = (const BYTE*) src;
+
+    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
+
+    switch((litBlockType_t)(istart[0]>> 6))
+    {
+    case lbt_huffman:
+        {   size_t litSize, litCSize, singleStream=0;
+            U32 lhSize = (istart[0] >> 4) & 3;
+            if (srcSize < 5) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for lhSize, + cSize (+nbSeq) */
+            switch(lhSize)
+            {
+            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
+                /* 2 - 2 - 10 - 10 */
+                lhSize=3;
+                singleStream = istart[0] & 16;
+                litSize  = ((istart[0] & 15) << 6) + (istart[1] >> 2);
+                litCSize = ((istart[1] &  3) << 8) + istart[2];
+                break;
+            case 2:
+                /* 2 - 2 - 14 - 14 */
+                lhSize=4;
+                litSize  = ((istart[0] & 15) << 10) + (istart[1] << 2) + (istart[2] >> 6);
+                litCSize = ((istart[2] & 63) <<  8) + istart[3];
+                break;
+            case 3:
+                /* 2 - 2 - 18 - 18 */
+                lhSize=5;
+                litSize  = ((istart[0] & 15) << 14) + (istart[1] << 6) + (istart[2] >> 2);
+                litCSize = ((istart[2] &  3) << 16) + (istart[3] << 8) + istart[4];
+                break;
+            }
+            if (litSize > ZSTDv07_BLOCKSIZE_ABSOLUTEMAX) return ERROR(corruption_detected);
+            if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
+
+            if (HUFv07_isError(singleStream ?
+                            HUFv07_decompress1X2_DCtx(dctx->hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize) :
+                            HUFv07_decompress4X_hufOnly (dctx->hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize) ))
+                return ERROR(corruption_detected);
+
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litSize = litSize;
+            dctx->litEntropy = 1;
+            memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+            return litCSize + lhSize;
+        }
+    case lbt_repeat:
+        {   size_t litSize, litCSize;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            if (lhSize != 1)  /* only case supported for now : small litSize, single stream */
+                return ERROR(corruption_detected);
+            if (dctx->litEntropy==0)
+                return ERROR(dictionary_corrupted);
+
+            /* 2 - 2 - 10 - 10 */
+            lhSize=3;
+            litSize  = ((istart[0] & 15) << 6) + (istart[1] >> 2);
+            litCSize = ((istart[1] &  3) << 8) + istart[2];
+            if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
+
+            {   size_t const errorCode = HUFv07_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTable);
+                if (HUFv07_isError(errorCode)) return ERROR(corruption_detected);
+            }
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litSize = litSize;
+            memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+            return litCSize + lhSize;
+        }
+    case lbt_raw:
+        {   size_t litSize;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            switch(lhSize)
+            {
+            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
+                lhSize=1;
+                litSize = istart[0] & 31;
+                break;
+            case 2:
+                litSize = ((istart[0] & 15) << 8) + istart[1];
+                break;
+            case 3:
+                litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2];
+                break;
+            }
+
+            if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
+                if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
+                memcpy(dctx->litBuffer, istart+lhSize, litSize);
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litSize = litSize;
+                memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+                return lhSize+litSize;
+            }
+            /* direct reference into compressed stream */
+            dctx->litPtr = istart+lhSize;
+            dctx->litSize = litSize;
+            return lhSize+litSize;
+        }
+    case lbt_rle:
+        {   size_t litSize;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            switch(lhSize)
+            {
+            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
+                lhSize = 1;
+                litSize = istart[0] & 31;
+                break;
+            case 2:
+                litSize = ((istart[0] & 15) << 8) + istart[1];
+                break;
+            case 3:
+                litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2];
+                if (srcSize<4) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
+                break;
+            }
+            if (litSize > ZSTDv07_BLOCKSIZE_ABSOLUTEMAX) return ERROR(corruption_detected);
+            memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litSize = litSize;
+            return lhSize+1;
+        }
+    default:
+        return ERROR(corruption_detected);   /* impossible */
+    }
+}
+
+
+/*! ZSTDv07_buildSeqTable() :
+    @return : nb bytes read from src,
+              or an error code if it fails, testable with ZSTDv07_isError()
+*/
+size_t ZSTDv07_buildSeqTable(FSEv07_DTable* DTable, U32 type, U32 max, U32 maxLog,
+                                 const void* src, size_t srcSize,
+                                 const S16* defaultNorm, U32 defaultLog, U32 flagRepeatTable)
+{
+    switch(type)
+    {
+    case FSEv07_ENCODING_RLE :
+        if (!srcSize) return ERROR(srcSize_wrong);
+        if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected);
+        FSEv07_buildDTable_rle(DTable, *(const BYTE*)src);   /* if *src > max, data is corrupted */
+        return 1;
+    case FSEv07_ENCODING_RAW :
+        FSEv07_buildDTable(DTable, defaultNorm, max, defaultLog);
+        return 0;
+    case FSEv07_ENCODING_STATIC:
+        if (!flagRepeatTable) return ERROR(corruption_detected);
+        return 0;
+    default :   /* impossible */
+    case FSEv07_ENCODING_DYNAMIC :
+        {   U32 tableLog;
+            S16 norm[MaxSeq+1];
+            size_t const headerSize = FSEv07_readNCount(norm, &max, &tableLog, src, srcSize);
+            if (FSEv07_isError(headerSize)) return ERROR(corruption_detected);
+            if (tableLog > maxLog) return ERROR(corruption_detected);
+            FSEv07_buildDTable(DTable, norm, max, tableLog);
+            return headerSize;
+    }   }
+}
+
+
+size_t ZSTDv07_decodeSeqHeaders(int* nbSeqPtr,
+                             FSEv07_DTable* DTableLL, FSEv07_DTable* DTableML, FSEv07_DTable* DTableOffb, U32 flagRepeatTable,
+                             const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* ip = istart;
+
+    /* check */
+    if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
+
+    /* SeqHead */
+    {   int nbSeq = *ip++;
+        if (!nbSeq) { *nbSeqPtr=0; return 1; }
+        if (nbSeq > 0x7F) {
+            if (nbSeq == 0xFF) {
+                if (ip+2 > iend) return ERROR(srcSize_wrong);
+                nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
+            } else {
+                if (ip >= iend) return ERROR(srcSize_wrong);
+                nbSeq = ((nbSeq-0x80)<<8) + *ip++;
+            }
+        }
+        *nbSeqPtr = nbSeq;
+    }
+
+    /* FSE table descriptors */
+    {   U32 const LLtype  = *ip >> 6;
+        U32 const OFtype = (*ip >> 4) & 3;
+        U32 const MLtype  = (*ip >> 2) & 3;
+        ip++;
+
+        /* check */
+        if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
+
+        /* Build DTables */
+        {   size_t const llhSize = ZSTDv07_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable);
+            if (ZSTDv07_isError(llhSize)) return ERROR(corruption_detected);
+            ip += llhSize;
+        }
+        {   size_t const ofhSize = ZSTDv07_buildSeqTable(DTableOffb, OFtype, MaxOff, OffFSELog, ip, iend-ip, OF_defaultNorm, OF_defaultNormLog, flagRepeatTable);
+            if (ZSTDv07_isError(ofhSize)) return ERROR(corruption_detected);
+            ip += ofhSize;
+        }
+        {   size_t const mlhSize = ZSTDv07_buildSeqTable(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip, ML_defaultNorm, ML_defaultNormLog, flagRepeatTable);
+            if (ZSTDv07_isError(mlhSize)) return ERROR(corruption_detected);
+            ip += mlhSize;
+    }   }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t matchLength;
+    size_t offset;
+} seq_t;
+
+typedef struct {
+    BITv07_DStream_t DStream;
+    FSEv07_DState_t stateLL;
+    FSEv07_DState_t stateOffb;
+    FSEv07_DState_t stateML;
+    size_t prevOffset[ZSTDv07_REP_INIT];
+} seqState_t;
+
+
+static seq_t ZSTDv07_decodeSequence(seqState_t* seqState)
+{
+    seq_t seq;
+
+    U32 const llCode = FSEv07_peekSymbol(&(seqState->stateLL));
+    U32 const mlCode = FSEv07_peekSymbol(&(seqState->stateML));
+    U32 const ofCode = FSEv07_peekSymbol(&(seqState->stateOffb));   /* <= maxOff, by table construction */
+
+    U32 const llBits = LL_bits[llCode];
+    U32 const mlBits = ML_bits[mlCode];
+    U32 const ofBits = ofCode;
+    U32 const totalBits = llBits+mlBits+ofBits;
+
+    static const U32 LL_base[MaxLL+1] = {
+                             0,  1,  2,  3,  4,  5,  6,  7,  8,  9,   10,    11,    12,    13,    14,     15,
+                            16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
+                            0x2000, 0x4000, 0x8000, 0x10000 };
+
+    static const U32 ML_base[MaxML+1] = {
+                             3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,   14,    15,    16,    17,    18,
+                            19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,   30,    31,    32,    33,    34,
+                            35, 37, 39, 41, 43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
+                            0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
+
+    static const U32 OF_base[MaxOff+1] = {
+                 0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
+                 0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
+                 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
+                 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD };
+
+    /* sequence */
+    {   size_t offset;
+        if (!ofCode)
+            offset = 0;
+        else {
+            offset = OF_base[ofCode] + BITv07_readBits(&(seqState->DStream), ofBits);   /* <=  (ZSTDv07_WINDOWLOG_MAX-1) bits */
+            if (MEM_32bits()) BITv07_reloadDStream(&(seqState->DStream));
+        }
+
+        if (ofCode <= 1) {
+            if ((llCode == 0) & (offset <= 1)) offset = 1-offset;
+            if (offset) {
+                size_t const temp = seqState->prevOffset[offset];
+                if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
+                seqState->prevOffset[1] = seqState->prevOffset[0];
+                seqState->prevOffset[0] = offset = temp;
+            } else {
+                offset = seqState->prevOffset[0];
+            }
+        } else {
+            seqState->prevOffset[2] = seqState->prevOffset[1];
+            seqState->prevOffset[1] = seqState->prevOffset[0];
+            seqState->prevOffset[0] = offset;
+        }
+        seq.offset = offset;
+    }
+
+    seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BITv07_readBits(&(seqState->DStream), mlBits) : 0);   /* <=  16 bits */
+    if (MEM_32bits() && (mlBits+llBits>24)) BITv07_reloadDStream(&(seqState->DStream));
+
+    seq.litLength = LL_base[llCode] + ((llCode>15) ? BITv07_readBits(&(seqState->DStream), llBits) : 0);   /* <=  16 bits */
+    if (MEM_32bits() ||
+       (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BITv07_reloadDStream(&(seqState->DStream));
+
+    /* ANS state update */
+    FSEv07_updateState(&(seqState->stateLL), &(seqState->DStream));   /* <=  9 bits */
+    FSEv07_updateState(&(seqState->stateML), &(seqState->DStream));   /* <=  9 bits */
+    if (MEM_32bits()) BITv07_reloadDStream(&(seqState->DStream));     /* <= 18 bits */
+    FSEv07_updateState(&(seqState->stateOffb), &(seqState->DStream)); /* <=  8 bits */
+
+    return seq;
+}
+
+
+static
+size_t ZSTDv07_execSequence(BYTE* op,
+                                BYTE* const oend, seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit,
+                                const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
+{
+    BYTE* const oLitEnd = op + sequence.litLength;
+    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_w = oend-WILDCOPY_OVERLENGTH;
+    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+
+    /* check */
+    if ((oLitEnd>oend_w) | (oMatchEnd>oend)) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
+    if (iLitEnd > litLimit) return ERROR(corruption_detected);   /* over-read beyond lit buffer */
+
+    /* copy Literals */
+    ZSTDv07_wildcopy(op, *litPtr, sequence.litLength);   /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
+    op = oLitEnd;
+    *litPtr = iLitEnd;   /* update for next sequence */
+
+    /* copy Match */
+    if (sequence.offset > (size_t)(oLitEnd - base)) {
+        /* offset beyond prefix */
+        if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected);
+        match = dictEnd - (base-match);
+        if (match + sequence.matchLength <= dictEnd) {
+            memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {   size_t const length1 = dictEnd - match;
+            memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = base;
+            if (op > oend_w || sequence.matchLength < MINMATCH) {
+              while (op < oMatchEnd) *op++ = *match++;
+              return sequenceLength;
+            }
+    }   }
+    /* Requirement: op <= oend_w */
+
+    /* match within prefix */
+    if (sequence.offset < 8) {
+        /* close range match, overlap */
+        static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
+        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* substracted */
+        int const sub2 = dec64table[sequence.offset];
+        op[0] = match[0];
+        op[1] = match[1];
+        op[2] = match[2];
+        op[3] = match[3];
+        match += dec32table[sequence.offset];
+        ZSTDv07_copy4(op+4, match);
+        match -= sub2;
+    } else {
+        ZSTDv07_copy8(op, match);
+    }
+    op += 8; match += 8;
+
+    if (oMatchEnd > oend-(16-MINMATCH)) {
+        if (op < oend_w) {
+            ZSTDv07_wildcopy(op, match, oend_w - op);
+            match += oend_w - op;
+            op = oend_w;
+        }
+        while (op < oMatchEnd) *op++ = *match++;
+    } else {
+        ZSTDv07_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
+    }
+    return sequenceLength;
+}
+
+
+static size_t ZSTDv07_decompressSequences(
+                               ZSTDv07_DCtx* dctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize)
+{
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const oend = ostart + maxDstSize;
+    BYTE* op = ostart;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    FSEv07_DTable* DTableLL = dctx->LLTable;
+    FSEv07_DTable* DTableML = dctx->MLTable;
+    FSEv07_DTable* DTableOffb = dctx->OffTable;
+    const BYTE* const base = (const BYTE*) (dctx->base);
+    const BYTE* const vBase = (const BYTE*) (dctx->vBase);
+    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+    int nbSeq;
+
+    /* Build Decoding Tables */
+    {   size_t const seqHSize = ZSTDv07_decodeSeqHeaders(&nbSeq, DTableLL, DTableML, DTableOffb, dctx->fseEntropy, ip, seqSize);
+        if (ZSTDv07_isError(seqHSize)) return seqHSize;
+        ip += seqHSize;
+    }
+
+    /* Regen sequences */
+    if (nbSeq) {
+        seqState_t seqState;
+        dctx->fseEntropy = 1;
+        { U32 i; for (i=0; i<ZSTDv07_REP_INIT; i++) seqState.prevOffset[i] = dctx->rep[i]; }
+        { size_t const errorCode = BITv07_initDStream(&(seqState.DStream), ip, iend-ip);
+          if (ERR_isError(errorCode)) return ERROR(corruption_detected); }
+        FSEv07_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
+        FSEv07_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
+        FSEv07_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
+
+        for ( ; (BITv07_reloadDStream(&(seqState.DStream)) <= BITv07_DStream_completed) && nbSeq ; ) {
+            nbSeq--;
+            {   seq_t const sequence = ZSTDv07_decodeSequence(&seqState);
+                size_t const oneSeqSize = ZSTDv07_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd);
+                if (ZSTDv07_isError(oneSeqSize)) return oneSeqSize;
+                op += oneSeqSize;
+        }   }
+
+        /* check if reached exact end */
+        if (nbSeq) return ERROR(corruption_detected);
+        /* save reps for next block */
+        { U32 i; for (i=0; i<ZSTDv07_REP_INIT; i++) dctx->rep[i] = (U32)(seqState.prevOffset[i]); }
+    }
+
+    /* last literal segment */
+    {   size_t const lastLLSize = litEnd - litPtr;
+        //if (litPtr > litEnd) return ERROR(corruption_detected);   /* too many literals already used */
+        if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
+        memcpy(op, litPtr, lastLLSize);
+        op += lastLLSize;
+    }
+
+    return op-ostart;
+}
+
+
+static void ZSTDv07_checkContinuity(ZSTDv07_DCtx* dctx, const void* dst)
+{
+    if (dst != dctx->previousDstEnd) {   /* not contiguous */
+        dctx->dictEnd = dctx->previousDstEnd;
+        dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
+        dctx->base = dst;
+        dctx->previousDstEnd = dst;
+    }
+}
+
+
+static size_t ZSTDv07_decompressBlock_internal(ZSTDv07_DCtx* dctx,
+                            void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{   /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+
+    if (srcSize >= ZSTDv07_BLOCKSIZE_ABSOLUTEMAX) return ERROR(srcSize_wrong);
+
+    /* Decode literals sub-block */
+    {   size_t const litCSize = ZSTDv07_decodeLiteralsBlock(dctx, src, srcSize);
+        if (ZSTDv07_isError(litCSize)) return litCSize;
+        ip += litCSize;
+        srcSize -= litCSize;
+    }
+    return ZSTDv07_decompressSequences(dctx, dst, dstCapacity, ip, srcSize);
+}
+
+
+size_t ZSTDv07_decompressBlock(ZSTDv07_DCtx* dctx,
+                            void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{
+    size_t dSize;
+    ZSTDv07_checkContinuity(dctx, dst);
+    dSize = ZSTDv07_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize);
+    dctx->previousDstEnd = (char*)dst + dSize;
+    return dSize;
+}
+
+
+/** ZSTDv07_insertBlock() :
+    insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
+ZSTDLIBv07_API size_t ZSTDv07_insertBlock(ZSTDv07_DCtx* dctx, const void* blockStart, size_t blockSize)
+{
+    ZSTDv07_checkContinuity(dctx, blockStart);
+    dctx->previousDstEnd = (const char*)blockStart + blockSize;
+    return blockSize;
+}
+
+
+size_t ZSTDv07_generateNxBytes(void* dst, size_t dstCapacity, BYTE byte, size_t length)
+{
+    if (length > dstCapacity) return ERROR(dstSize_tooSmall);
+    memset(dst, byte, length);
+    return length;
+}
+
+
+/*! ZSTDv07_decompressFrame() :
+*   `dctx` must be properly initialized */
+static size_t ZSTDv07_decompressFrame(ZSTDv07_DCtx* dctx,
+                                 void* dst, size_t dstCapacity,
+                                 const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* const iend = ip + srcSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart;
+    size_t remainingSize = srcSize;
+
+    /* check */
+    if (srcSize < ZSTDv07_frameHeaderSize_min+ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
+
+    /* Frame Header */
+    {   size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, ZSTDv07_frameHeaderSize_min);
+        if (ZSTDv07_isError(frameHeaderSize)) return frameHeaderSize;
+        if (srcSize < frameHeaderSize+ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
+        if (ZSTDv07_decodeFrameHeader(dctx, src, frameHeaderSize)) return ERROR(corruption_detected);
+        ip += frameHeaderSize; remainingSize -= frameHeaderSize;
+    }
+
+    /* Loop on each block */
+    while (1) {
+        size_t decodedSize;
+        blockProperties_t blockProperties;
+        size_t const cBlockSize = ZSTDv07_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTDv07_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTDv07_blockHeaderSize;
+        remainingSize -= ZSTDv07_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            decodedSize = ZSTDv07_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize);
+            break;
+        case bt_raw :
+            decodedSize = ZSTDv07_copyRawBlock(op, oend-op, ip, cBlockSize);
+            break;
+        case bt_rle :
+            decodedSize = ZSTDv07_generateNxBytes(op, oend-op, *ip, blockProperties.origSize);
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return ERROR(srcSize_wrong);
+            decodedSize = 0;
+            break;
+        default:
+            return ERROR(GENERIC);   /* impossible */
+        }
+        if (blockProperties.blockType == bt_end) break;   /* bt_end */
+
+        if (ZSTDv07_isError(decodedSize)) return decodedSize;
+        if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, op, decodedSize);
+        op += decodedSize;
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return op-ostart;
+}
+
+
+/*! ZSTDv07_decompress_usingPreparedDCtx() :
+*   Same as ZSTDv07_decompress_usingDict, but using a reference context `preparedDCtx`, where dictionary has been loaded.
+*   It avoids reloading the dictionary each time.
+*   `preparedDCtx` must have been properly initialized using ZSTDv07_decompressBegin_usingDict().
+*   Requires 2 contexts : 1 for reference (preparedDCtx), which will not be modified, and 1 to run the decompression operation (dctx) */
+size_t ZSTDv07_decompress_usingPreparedDCtx(ZSTDv07_DCtx* dctx, const ZSTDv07_DCtx* refDCtx,
+                                         void* dst, size_t dstCapacity,
+                                   const void* src, size_t srcSize)
+{
+    ZSTDv07_copyDCtx(dctx, refDCtx);
+    ZSTDv07_checkContinuity(dctx, dst);
+    return ZSTDv07_decompressFrame(dctx, dst, dstCapacity, src, srcSize);
+}
+
+
+size_t ZSTDv07_decompress_usingDict(ZSTDv07_DCtx* dctx,
+                                 void* dst, size_t dstCapacity,
+                                 const void* src, size_t srcSize,
+                                 const void* dict, size_t dictSize)
+{
+    ZSTDv07_decompressBegin_usingDict(dctx, dict, dictSize);
+    ZSTDv07_checkContinuity(dctx, dst);
+    return ZSTDv07_decompressFrame(dctx, dst, dstCapacity, src, srcSize);
+}
+
+
+size_t ZSTDv07_decompressDCtx(ZSTDv07_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    return ZSTDv07_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0);
+}
+
+
+size_t ZSTDv07_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+#if defined(ZSTDv07_HEAPMODE) && (ZSTDv07_HEAPMODE==1)
+    size_t regenSize;
+    ZSTDv07_DCtx* const dctx = ZSTDv07_createDCtx();
+    if (dctx==NULL) return ERROR(memory_allocation);
+    regenSize = ZSTDv07_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
+    ZSTDv07_freeDCtx(dctx);
+    return regenSize;
+#else   /* stack mode */
+    ZSTDv07_DCtx dctx;
+    return ZSTDv07_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize);
+#endif
+}
+
+size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    size_t remainingSize = srcSize;
+
+    /* check */
+    if (srcSize < ZSTDv07_frameHeaderSize_min+ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
+
+    /* Frame Header */
+    {   size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, ZSTDv07_frameHeaderSize_min);
+        if (ZSTDv07_isError(frameHeaderSize)) return frameHeaderSize;
+        if (MEM_readLE32(src) != ZSTDv07_MAGICNUMBER) return ERROR(prefix_unknown);
+        if (srcSize < frameHeaderSize+ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
+        ip += frameHeaderSize; remainingSize -= frameHeaderSize;
+    }
+
+    /* Loop on each block */
+    while (1) {
+        blockProperties_t blockProperties;
+        size_t const cBlockSize = ZSTDv07_getcBlockSize(ip, remainingSize, &blockProperties);
+        if (ZSTDv07_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTDv07_blockHeaderSize;
+        remainingSize -= ZSTDv07_blockHeaderSize;
+
+        if (blockProperties.blockType == bt_end) break;
+
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return ip - (const BYTE*)src;
+}
+
+/*_******************************
+*  Streaming Decompression API
+********************************/
+size_t ZSTDv07_nextSrcSizeToDecompress(ZSTDv07_DCtx* dctx)
+{
+    return dctx->expected;
+}
+
+int ZSTDv07_isSkipFrame(ZSTDv07_DCtx* dctx)
+{
+    return dctx->stage == ZSTDds_skipFrame;
+}
+
+/** ZSTDv07_decompressContinue() :
+*   @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity)
+*             or an error code, which can be tested using ZSTDv07_isError() */
+size_t ZSTDv07_decompressContinue(ZSTDv07_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    /* Sanity check */
+    if (srcSize != dctx->expected) return ERROR(srcSize_wrong);
+    if (dstCapacity) ZSTDv07_checkContinuity(dctx, dst);
+
+    switch (dctx->stage)
+    {
+    case ZSTDds_getFrameHeaderSize :
+        if (srcSize != ZSTDv07_frameHeaderSize_min) return ERROR(srcSize_wrong);   /* impossible */
+        if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTDv07_MAGIC_SKIPPABLE_START) {
+            memcpy(dctx->headerBuffer, src, ZSTDv07_frameHeaderSize_min);
+            dctx->expected = ZSTDv07_skippableHeaderSize - ZSTDv07_frameHeaderSize_min; /* magic number + skippable frame length */
+            dctx->stage = ZSTDds_decodeSkippableHeader;
+            return 0;
+        }
+        dctx->headerSize = ZSTDv07_frameHeaderSize(src, ZSTDv07_frameHeaderSize_min);
+        if (ZSTDv07_isError(dctx->headerSize)) return dctx->headerSize;
+        memcpy(dctx->headerBuffer, src, ZSTDv07_frameHeaderSize_min);
+        if (dctx->headerSize > ZSTDv07_frameHeaderSize_min) {
+            dctx->expected = dctx->headerSize - ZSTDv07_frameHeaderSize_min;
+            dctx->stage = ZSTDds_decodeFrameHeader;
+            return 0;
+        }
+        dctx->expected = 0;   /* not necessary to copy more */
+	/* fall-through */
+    case ZSTDds_decodeFrameHeader:
+        {   size_t result;
+            memcpy(dctx->headerBuffer + ZSTDv07_frameHeaderSize_min, src, dctx->expected);
+            result = ZSTDv07_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize);
+            if (ZSTDv07_isError(result)) return result;
+            dctx->expected = ZSTDv07_blockHeaderSize;
+            dctx->stage = ZSTDds_decodeBlockHeader;
+            return 0;
+        }
+    case ZSTDds_decodeBlockHeader:
+        {   blockProperties_t bp;
+            size_t const cBlockSize = ZSTDv07_getcBlockSize(src, ZSTDv07_blockHeaderSize, &bp);
+            if (ZSTDv07_isError(cBlockSize)) return cBlockSize;
+            if (bp.blockType == bt_end) {
+                if (dctx->fParams.checksumFlag) {
+                    U64 const h64 = XXH64_digest(&dctx->xxhState);
+                    U32 const h32 = (U32)(h64>>11) & ((1<<22)-1);
+                    const BYTE* const ip = (const BYTE*)src;
+                    U32 const check32 = ip[2] + (ip[1] << 8) + ((ip[0] & 0x3F) << 16);
+                    if (check32 != h32) return ERROR(checksum_wrong);
+                }
+                dctx->expected = 0;
+                dctx->stage = ZSTDds_getFrameHeaderSize;
+            } else {
+                dctx->expected = cBlockSize;
+                dctx->bType = bp.blockType;
+                dctx->stage = ZSTDds_decompressBlock;
+            }
+            return 0;
+        }
+    case ZSTDds_decompressBlock:
+        {   size_t rSize;
+            switch(dctx->bType)
+            {
+            case bt_compressed:
+                rSize = ZSTDv07_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize);
+                break;
+            case bt_raw :
+                rSize = ZSTDv07_copyRawBlock(dst, dstCapacity, src, srcSize);
+                break;
+            case bt_rle :
+                return ERROR(GENERIC);   /* not yet handled */
+                break;
+            case bt_end :   /* should never happen (filtered at phase 1) */
+                rSize = 0;
+                break;
+            default:
+                return ERROR(GENERIC);   /* impossible */
+            }
+            dctx->stage = ZSTDds_decodeBlockHeader;
+            dctx->expected = ZSTDv07_blockHeaderSize;
+            dctx->previousDstEnd = (char*)dst + rSize;
+            if (ZSTDv07_isError(rSize)) return rSize;
+            if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);
+            return rSize;
+        }
+    case ZSTDds_decodeSkippableHeader:
+        {   memcpy(dctx->headerBuffer + ZSTDv07_frameHeaderSize_min, src, dctx->expected);
+            dctx->expected = MEM_readLE32(dctx->headerBuffer + 4);
+            dctx->stage = ZSTDds_skipFrame;
+            return 0;
+        }
+    case ZSTDds_skipFrame:
+        {   dctx->expected = 0;
+            dctx->stage = ZSTDds_getFrameHeaderSize;
+            return 0;
+        }
+    default:
+        return ERROR(GENERIC);   /* impossible */
+    }
+}
+
+
+static size_t ZSTDv07_refDictContent(ZSTDv07_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    dctx->dictEnd = dctx->previousDstEnd;
+    dctx->vBase = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
+    dctx->base = dict;
+    dctx->previousDstEnd = (const char*)dict + dictSize;
+    return 0;
+}
+
+static size_t ZSTDv07_loadEntropy(ZSTDv07_DCtx* dctx, const void* const dict, size_t const dictSize)
+{
+    const BYTE* dictPtr = (const BYTE*)dict;
+    const BYTE* const dictEnd = dictPtr + dictSize;
+
+    {   size_t const hSize = HUFv07_readDTableX4(dctx->hufTable, dict, dictSize);
+        if (HUFv07_isError(hSize)) return ERROR(dictionary_corrupted);
+        dictPtr += hSize;
+    }
+
+    {   short offcodeNCount[MaxOff+1];
+        U32 offcodeMaxValue=MaxOff, offcodeLog;
+        size_t const offcodeHeaderSize = FSEv07_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
+        if (FSEv07_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
+        if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
+        { size_t const errorCode = FSEv07_buildDTable(dctx->OffTable, offcodeNCount, offcodeMaxValue, offcodeLog);
+          if (FSEv07_isError(errorCode)) return ERROR(dictionary_corrupted); }
+        dictPtr += offcodeHeaderSize;
+    }
+
+    {   short matchlengthNCount[MaxML+1];
+        unsigned matchlengthMaxValue = MaxML, matchlengthLog;
+        size_t const matchlengthHeaderSize = FSEv07_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
+        if (FSEv07_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
+        if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
+        { size_t const errorCode = FSEv07_buildDTable(dctx->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog);
+          if (FSEv07_isError(errorCode)) return ERROR(dictionary_corrupted); }
+        dictPtr += matchlengthHeaderSize;
+    }
+
+    {   short litlengthNCount[MaxLL+1];
+        unsigned litlengthMaxValue = MaxLL, litlengthLog;
+        size_t const litlengthHeaderSize = FSEv07_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
+        if (FSEv07_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
+        if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
+        { size_t const errorCode = FSEv07_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog);
+          if (FSEv07_isError(errorCode)) return ERROR(dictionary_corrupted); }
+        dictPtr += litlengthHeaderSize;
+    }
+
+    if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
+    dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] == 0 || dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
+    dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] == 0 || dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
+    dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] == 0 || dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
+    dictPtr += 12;
+
+    dctx->litEntropy = dctx->fseEntropy = 1;
+    return dictPtr - (const BYTE*)dict;
+}
+
+static size_t ZSTDv07_decompress_insertDictionary(ZSTDv07_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    if (dictSize < 8) return ZSTDv07_refDictContent(dctx, dict, dictSize);
+    {   U32 const magic = MEM_readLE32(dict);
+        if (magic != ZSTDv07_DICT_MAGIC) {
+            return ZSTDv07_refDictContent(dctx, dict, dictSize);   /* pure content mode */
+    }   }
+    dctx->dictID = MEM_readLE32((const char*)dict + 4);
+
+    /* load entropy tables */
+    dict = (const char*)dict + 8;
+    dictSize -= 8;
+    {   size_t const eSize = ZSTDv07_loadEntropy(dctx, dict, dictSize);
+        if (ZSTDv07_isError(eSize)) return ERROR(dictionary_corrupted);
+        dict = (const char*)dict + eSize;
+        dictSize -= eSize;
+    }
+
+    /* reference dictionary content */
+    return ZSTDv07_refDictContent(dctx, dict, dictSize);
+}
+
+
+size_t ZSTDv07_decompressBegin_usingDict(ZSTDv07_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    { size_t const errorCode = ZSTDv07_decompressBegin(dctx);
+      if (ZSTDv07_isError(errorCode)) return errorCode; }
+
+    if (dict && dictSize) {
+        size_t const errorCode = ZSTDv07_decompress_insertDictionary(dctx, dict, dictSize);
+        if (ZSTDv07_isError(errorCode)) return ERROR(dictionary_corrupted);
+    }
+
+    return 0;
+}
+
+
+struct ZSTDv07_DDict_s {
+    void* dict;
+    size_t dictSize;
+    ZSTDv07_DCtx* refContext;
+};  /* typedef'd tp ZSTDv07_CDict within zstd.h */
+
+ZSTDv07_DDict* ZSTDv07_createDDict_advanced(const void* dict, size_t dictSize, ZSTDv07_customMem customMem)
+{
+    if (!customMem.customAlloc && !customMem.customFree)
+        customMem = defaultCustomMem;
+
+    if (!customMem.customAlloc || !customMem.customFree)
+        return NULL;
+
+    {   ZSTDv07_DDict* const ddict = (ZSTDv07_DDict*) customMem.customAlloc(customMem.opaque, sizeof(*ddict));
+        void* const dictContent = customMem.customAlloc(customMem.opaque, dictSize);
+        ZSTDv07_DCtx* const dctx = ZSTDv07_createDCtx_advanced(customMem);
+
+        if (!dictContent || !ddict || !dctx) {
+            customMem.customFree(customMem.opaque, dictContent);
+            customMem.customFree(customMem.opaque, ddict);
+            customMem.customFree(customMem.opaque, dctx);
+            return NULL;
+        }
+
+        memcpy(dictContent, dict, dictSize);
+        {   size_t const errorCode = ZSTDv07_decompressBegin_usingDict(dctx, dictContent, dictSize);
+            if (ZSTDv07_isError(errorCode)) {
+                customMem.customFree(customMem.opaque, dictContent);
+                customMem.customFree(customMem.opaque, ddict);
+                customMem.customFree(customMem.opaque, dctx);
+                return NULL;
+        }   }
+
+        ddict->dict = dictContent;
+        ddict->dictSize = dictSize;
+        ddict->refContext = dctx;
+        return ddict;
+    }
+}
+
+/*! ZSTDv07_createDDict() :
+*   Create a digested dictionary, ready to start decompression without startup delay.
+*   `dict` can be released after `ZSTDv07_DDict` creation */
+ZSTDv07_DDict* ZSTDv07_createDDict(const void* dict, size_t dictSize)
+{
+    ZSTDv07_customMem const allocator = { NULL, NULL, NULL };
+    return ZSTDv07_createDDict_advanced(dict, dictSize, allocator);
+}
+
+size_t ZSTDv07_freeDDict(ZSTDv07_DDict* ddict)
+{
+    ZSTDv07_freeFunction const cFree = ddict->refContext->customMem.customFree;
+    void* const opaque = ddict->refContext->customMem.opaque;
+    ZSTDv07_freeDCtx(ddict->refContext);
+    cFree(opaque, ddict->dict);
+    cFree(opaque, ddict);
+    return 0;
+}
+
+/*! ZSTDv07_decompress_usingDDict() :
+*   Decompression using a pre-digested Dictionary
+*   Use dictionary without significant overhead. */
+ZSTDLIBv07_API size_t ZSTDv07_decompress_usingDDict(ZSTDv07_DCtx* dctx,
+                                           void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize,
+                                     const ZSTDv07_DDict* ddict)
+{
+    return ZSTDv07_decompress_usingPreparedDCtx(dctx, ddict->refContext,
+                                           dst, dstCapacity,
+                                           src, srcSize);
+}
+/*
+    Buffered version of Zstd compression library
+    Copyright (C) 2015-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd homepage : http://www.zstd.net/
+*/
+
+
+
+/*-***************************************************************************
+*  Streaming decompression howto
+*
+*  A ZBUFFv07_DCtx object is required to track streaming operations.
+*  Use ZBUFFv07_createDCtx() and ZBUFFv07_freeDCtx() to create/release resources.
+*  Use ZBUFFv07_decompressInit() to start a new decompression operation,
+*   or ZBUFFv07_decompressInitDictionary() if decompression requires a dictionary.
+*  Note that ZBUFFv07_DCtx objects can be re-init multiple times.
+*
+*  Use ZBUFFv07_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *dstCapacityPtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of @dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change @dst.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency),
+*            or 0 when a frame is completely decoded,
+*            or an error code, which can be tested using ZBUFFv07_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFFv07_recommendedDInSize() and ZBUFFv07_recommendedDOutSize()
+*  output : ZBUFFv07_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
+*  input  : ZBUFFv07_recommendedDInSize == 128KB + 3;
+*           just follow indications from ZBUFFv07_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* *******************************************************************************/
+
+typedef enum { ZBUFFds_init, ZBUFFds_loadHeader,
+               ZBUFFds_read, ZBUFFds_load, ZBUFFds_flush } ZBUFFv07_dStage;
+
+/* *** Resource management *** */
+struct ZBUFFv07_DCtx_s {
+    ZSTDv07_DCtx* zd;
+    ZSTDv07_frameParams fParams;
+    ZBUFFv07_dStage stage;
+    char*  inBuff;
+    size_t inBuffSize;
+    size_t inPos;
+    char*  outBuff;
+    size_t outBuffSize;
+    size_t outStart;
+    size_t outEnd;
+    size_t blockSize;
+    BYTE headerBuffer[ZSTDv07_FRAMEHEADERSIZE_MAX];
+    size_t lhSize;
+    ZSTDv07_customMem customMem;
+};   /* typedef'd to ZBUFFv07_DCtx within "zstd_buffered.h" */
+
+ZSTDLIBv07_API ZBUFFv07_DCtx* ZBUFFv07_createDCtx_advanced(ZSTDv07_customMem customMem);
+
+ZBUFFv07_DCtx* ZBUFFv07_createDCtx(void)
+{
+    return ZBUFFv07_createDCtx_advanced(defaultCustomMem);
+}
+
+ZBUFFv07_DCtx* ZBUFFv07_createDCtx_advanced(ZSTDv07_customMem customMem)
+{
+    ZBUFFv07_DCtx* zbd;
+
+    if (!customMem.customAlloc && !customMem.customFree)
+        customMem = defaultCustomMem;
+
+    if (!customMem.customAlloc || !customMem.customFree)
+        return NULL;
+
+    zbd = (ZBUFFv07_DCtx*)customMem.customAlloc(customMem.opaque, sizeof(ZBUFFv07_DCtx));
+    if (zbd==NULL) return NULL;
+    memset(zbd, 0, sizeof(ZBUFFv07_DCtx));
+    memcpy(&zbd->customMem, &customMem, sizeof(ZSTDv07_customMem));
+    zbd->zd = ZSTDv07_createDCtx_advanced(customMem);
+    if (zbd->zd == NULL) { ZBUFFv07_freeDCtx(zbd); return NULL; }
+    zbd->stage = ZBUFFds_init;
+    return zbd;
+}
+
+size_t ZBUFFv07_freeDCtx(ZBUFFv07_DCtx* zbd)
+{
+    if (zbd==NULL) return 0;   /* support free on null */
+    ZSTDv07_freeDCtx(zbd->zd);
+    if (zbd->inBuff) zbd->customMem.customFree(zbd->customMem.opaque, zbd->inBuff);
+    if (zbd->outBuff) zbd->customMem.customFree(zbd->customMem.opaque, zbd->outBuff);
+    zbd->customMem.customFree(zbd->customMem.opaque, zbd);
+    return 0;
+}
+
+
+/* *** Initialization *** */
+
+size_t ZBUFFv07_decompressInitDictionary(ZBUFFv07_DCtx* zbd, const void* dict, size_t dictSize)
+{
+    zbd->stage = ZBUFFds_loadHeader;
+    zbd->lhSize = zbd->inPos = zbd->outStart = zbd->outEnd = 0;
+    return ZSTDv07_decompressBegin_usingDict(zbd->zd, dict, dictSize);
+}
+
+size_t ZBUFFv07_decompressInit(ZBUFFv07_DCtx* zbd)
+{
+    return ZBUFFv07_decompressInitDictionary(zbd, NULL, 0);
+}
+
+
+/* internal util function */
+MEM_STATIC size_t ZBUFFv07_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    size_t const length = MIN(dstCapacity, srcSize);
+    memcpy(dst, src, length);
+    return length;
+}
+
+
+/* *** Decompression *** */
+
+size_t ZBUFFv07_decompressContinue(ZBUFFv07_DCtx* zbd,
+                                void* dst, size_t* dstCapacityPtr,
+                          const void* src, size_t* srcSizePtr)
+{
+    const char* const istart = (const char*)src;
+    const char* const iend = istart + *srcSizePtr;
+    const char* ip = istart;
+    char* const ostart = (char*)dst;
+    char* const oend = ostart + *dstCapacityPtr;
+    char* op = ostart;
+    U32 notDone = 1;
+
+    while (notDone) {
+        switch(zbd->stage)
+        {
+        case ZBUFFds_init :
+            return ERROR(init_missing);
+
+        case ZBUFFds_loadHeader :
+            {   size_t const hSize = ZSTDv07_getFrameParams(&(zbd->fParams), zbd->headerBuffer, zbd->lhSize);
+                if (ZSTDv07_isError(hSize)) return hSize;
+                if (hSize != 0) {
+                    size_t const toLoad = hSize - zbd->lhSize;   /* if hSize!=0, hSize > zbd->lhSize */
+                    if (toLoad > (size_t)(iend-ip)) {   /* not enough input to load full header */
+                        memcpy(zbd->headerBuffer + zbd->lhSize, ip, iend-ip);
+                        zbd->lhSize += iend-ip;
+                        *dstCapacityPtr = 0;
+                        return (hSize - zbd->lhSize) + ZSTDv07_blockHeaderSize;   /* remaining header bytes + next block header */
+                    }
+                    memcpy(zbd->headerBuffer + zbd->lhSize, ip, toLoad); zbd->lhSize = hSize; ip += toLoad;
+                    break;
+            }   }
+
+            /* Consume header */
+            {   size_t const h1Size = ZSTDv07_nextSrcSizeToDecompress(zbd->zd);  /* == ZSTDv07_frameHeaderSize_min */
+                size_t const h1Result = ZSTDv07_decompressContinue(zbd->zd, NULL, 0, zbd->headerBuffer, h1Size);
+                if (ZSTDv07_isError(h1Result)) return h1Result;
+                if (h1Size < zbd->lhSize) {   /* long header */
+                    size_t const h2Size = ZSTDv07_nextSrcSizeToDecompress(zbd->zd);
+                    size_t const h2Result = ZSTDv07_decompressContinue(zbd->zd, NULL, 0, zbd->headerBuffer+h1Size, h2Size);
+                    if (ZSTDv07_isError(h2Result)) return h2Result;
+            }   }
+
+            zbd->fParams.windowSize = MAX(zbd->fParams.windowSize, 1U << ZSTDv07_WINDOWLOG_ABSOLUTEMIN);
+
+            /* Frame header instruct buffer sizes */
+            {   size_t const blockSize = MIN(zbd->fParams.windowSize, ZSTDv07_BLOCKSIZE_ABSOLUTEMAX);
+                zbd->blockSize = blockSize;
+                if (zbd->inBuffSize < blockSize) {
+                    zbd->customMem.customFree(zbd->customMem.opaque, zbd->inBuff);
+                    zbd->inBuffSize = blockSize;
+                    zbd->inBuff = (char*)zbd->customMem.customAlloc(zbd->customMem.opaque, blockSize);
+                    if (zbd->inBuff == NULL) return ERROR(memory_allocation);
+                }
+                {   size_t const neededOutSize = zbd->fParams.windowSize + blockSize + WILDCOPY_OVERLENGTH * 2;
+                    if (zbd->outBuffSize < neededOutSize) {
+                        zbd->customMem.customFree(zbd->customMem.opaque, zbd->outBuff);
+                        zbd->outBuffSize = neededOutSize;
+                        zbd->outBuff = (char*)zbd->customMem.customAlloc(zbd->customMem.opaque, neededOutSize);
+                        if (zbd->outBuff == NULL) return ERROR(memory_allocation);
+            }   }   }
+            zbd->stage = ZBUFFds_read;
+            /* pass-through */
+	    /* fall-through */
+        case ZBUFFds_read:
+            {   size_t const neededInSize = ZSTDv07_nextSrcSizeToDecompress(zbd->zd);
+                if (neededInSize==0) {  /* end of frame */
+                    zbd->stage = ZBUFFds_init;
+                    notDone = 0;
+                    break;
+                }
+                if ((size_t)(iend-ip) >= neededInSize) {  /* decode directly from src */
+                    const int isSkipFrame = ZSTDv07_isSkipFrame(zbd->zd);
+                    size_t const decodedSize = ZSTDv07_decompressContinue(zbd->zd,
+                        zbd->outBuff + zbd->outStart, (isSkipFrame ? 0 : zbd->outBuffSize - zbd->outStart),
+                        ip, neededInSize);
+                    if (ZSTDv07_isError(decodedSize)) return decodedSize;
+                    ip += neededInSize;
+                    if (!decodedSize && !isSkipFrame) break;   /* this was just a header */
+                    zbd->outEnd = zbd->outStart +  decodedSize;
+                    zbd->stage = ZBUFFds_flush;
+                    break;
+                }
+                if (ip==iend) { notDone = 0; break; }   /* no more input */
+                zbd->stage = ZBUFFds_load;
+            }
+	    /* fall-through */
+        case ZBUFFds_load:
+            {   size_t const neededInSize = ZSTDv07_nextSrcSizeToDecompress(zbd->zd);
+                size_t const toLoad = neededInSize - zbd->inPos;   /* should always be <= remaining space within inBuff */
+                size_t loadedSize;
+                if (toLoad > zbd->inBuffSize - zbd->inPos) return ERROR(corruption_detected);   /* should never happen */
+                loadedSize = ZBUFFv07_limitCopy(zbd->inBuff + zbd->inPos, toLoad, ip, iend-ip);
+                ip += loadedSize;
+                zbd->inPos += loadedSize;
+                if (loadedSize < toLoad) { notDone = 0; break; }   /* not enough input, wait for more */
+
+                /* decode loaded input */
+                {  const int isSkipFrame = ZSTDv07_isSkipFrame(zbd->zd);
+                   size_t const decodedSize = ZSTDv07_decompressContinue(zbd->zd,
+                        zbd->outBuff + zbd->outStart, zbd->outBuffSize - zbd->outStart,
+                        zbd->inBuff, neededInSize);
+                    if (ZSTDv07_isError(decodedSize)) return decodedSize;
+                    zbd->inPos = 0;   /* input is consumed */
+                    if (!decodedSize && !isSkipFrame) { zbd->stage = ZBUFFds_read; break; }   /* this was just a header */
+                    zbd->outEnd = zbd->outStart +  decodedSize;
+                    zbd->stage = ZBUFFds_flush;
+                    /* break; */
+                    /* pass-through */
+                }
+	    }
+	    /* fall-through */
+        case ZBUFFds_flush:
+            {   size_t const toFlushSize = zbd->outEnd - zbd->outStart;
+                size_t const flushedSize = ZBUFFv07_limitCopy(op, oend-op, zbd->outBuff + zbd->outStart, toFlushSize);
+                op += flushedSize;
+                zbd->outStart += flushedSize;
+                if (flushedSize == toFlushSize) {
+                    zbd->stage = ZBUFFds_read;
+                    if (zbd->outStart + zbd->blockSize > zbd->outBuffSize)
+                        zbd->outStart = zbd->outEnd = 0;
+                    break;
+                }
+                /* cannot flush everything */
+                notDone = 0;
+                break;
+            }
+        default: return ERROR(GENERIC);   /* impossible */
+    }   }
+
+    /* result */
+    *srcSizePtr = ip-istart;
+    *dstCapacityPtr = op-ostart;
+    {   size_t nextSrcSizeHint = ZSTDv07_nextSrcSizeToDecompress(zbd->zd);
+        nextSrcSizeHint -= zbd->inPos;   /* already loaded*/
+        return nextSrcSizeHint;
+    }
+}
+
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+size_t ZBUFFv07_recommendedDInSize(void)  { return ZSTDv07_BLOCKSIZE_ABSOLUTEMAX + ZSTDv07_blockHeaderSize /* block header size*/ ; }
+size_t ZBUFFv07_recommendedDOutSize(void) { return ZSTDv07_BLOCKSIZE_ABSOLUTEMAX; }
diff --git a/contrib/libzstd/include/zstd/legacy/zstd_v07.h b/contrib/libzstd/include/zstd/legacy/zstd_v07.h
new file mode 100644
index 00000000000..cc95c661bc9
--- /dev/null
+++ b/contrib/libzstd/include/zstd/legacy/zstd_v07.h
@@ -0,0 +1,181 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+#ifndef ZSTDv07_H_235446
+#define ZSTDv07_H_235446
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*======  Dependency  ======*/
+#include <stddef.h>   /* size_t */
+
+
+/*======  Export for Windows  ======*/
+/*!
+*  ZSTDv07_DLL_EXPORT :
+*  Enable exporting of functions when building a Windows DLL
+*/
+#if defined(_WIN32) && defined(ZSTDv07_DLL_EXPORT) && (ZSTDv07_DLL_EXPORT==1)
+#  define ZSTDLIBv07_API __declspec(dllexport)
+#else
+#  define ZSTDLIBv07_API
+#endif
+
+
+/* *************************************
+*  Simple API
+***************************************/
+/*! ZSTDv07_getDecompressedSize() :
+*   @return : decompressed size if known, 0 otherwise.
+       note 1 : if `0`, follow up with ZSTDv07_getFrameParams() to know precise failure cause.
+       note 2 : decompressed size could be wrong or intentionally modified !
+                always ensure results fit within application's authorized limits */
+unsigned long long ZSTDv07_getDecompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTDv07_decompress() :
+    `compressedSize` : must be _exact_ size of compressed input, otherwise decompression will fail.
+    `dstCapacity` must be equal or larger than originalSize.
+    @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+              or an errorCode if it fails (which can be tested using ZSTDv07_isError()) */
+ZSTDLIBv07_API size_t ZSTDv07_decompress( void* dst, size_t dstCapacity,
+                                    const void* src, size_t compressedSize);
+
+/**
+ZSTDv07_getFrameSrcSize() : get the source length of a ZSTD frame
+    compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+    return : the number of bytes that would be read to decompress this frame
+             or an errorCode if it fails (which can be tested using ZSTDv07_isError())
+*/
+size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t compressedSize);
+
+/*======  Helper functions  ======*/
+ZSTDLIBv07_API unsigned    ZSTDv07_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+ZSTDLIBv07_API const char* ZSTDv07_getErrorName(size_t code);     /*!< provides readable string from an error code */
+
+
+/*-*************************************
+*  Explicit memory management
+***************************************/
+/** Decompression context */
+typedef struct ZSTDv07_DCtx_s ZSTDv07_DCtx;
+ZSTDLIBv07_API ZSTDv07_DCtx* ZSTDv07_createDCtx(void);
+ZSTDLIBv07_API size_t     ZSTDv07_freeDCtx(ZSTDv07_DCtx* dctx);      /*!< @return : errorCode */
+
+/** ZSTDv07_decompressDCtx() :
+*   Same as ZSTDv07_decompress(), requires an allocated ZSTDv07_DCtx (see ZSTDv07_createDCtx()) */
+ZSTDLIBv07_API size_t ZSTDv07_decompressDCtx(ZSTDv07_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*-************************
+*  Simple dictionary API
+***************************/
+/*! ZSTDv07_decompress_usingDict() :
+*   Decompression using a pre-defined Dictionary content (see dictBuilder).
+*   Dictionary must be identical to the one used during compression.
+*   Note : This function load the dictionary, resulting in a significant startup time */
+ZSTDLIBv07_API size_t ZSTDv07_decompress_usingDict(ZSTDv07_DCtx* dctx,
+                                                   void* dst, size_t dstCapacity,
+                                             const void* src, size_t srcSize,
+                                             const void* dict,size_t dictSize);
+
+
+/*-**************************
+*  Advanced Dictionary API
+****************************/
+/*! ZSTDv07_createDDict() :
+*   Create a digested dictionary, ready to start decompression operation without startup delay.
+*   `dict` can be released after creation */
+typedef struct ZSTDv07_DDict_s ZSTDv07_DDict;
+ZSTDLIBv07_API ZSTDv07_DDict* ZSTDv07_createDDict(const void* dict, size_t dictSize);
+ZSTDLIBv07_API size_t      ZSTDv07_freeDDict(ZSTDv07_DDict* ddict);
+
+/*! ZSTDv07_decompress_usingDDict() :
+*   Decompression using a pre-digested Dictionary
+*   Faster startup than ZSTDv07_decompress_usingDict(), recommended when same dictionary is used multiple times. */
+ZSTDLIBv07_API size_t ZSTDv07_decompress_usingDDict(ZSTDv07_DCtx* dctx,
+                                                    void* dst, size_t dstCapacity,
+                                              const void* src, size_t srcSize,
+                                              const ZSTDv07_DDict* ddict);
+
+typedef struct {
+    unsigned long long frameContentSize;
+    unsigned windowSize;
+    unsigned dictID;
+    unsigned checksumFlag;
+} ZSTDv07_frameParams;
+
+ZSTDLIBv07_API size_t ZSTDv07_getFrameParams(ZSTDv07_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
+
+
+
+
+/* *************************************
+*  Streaming functions
+***************************************/
+typedef struct ZBUFFv07_DCtx_s ZBUFFv07_DCtx;
+ZSTDLIBv07_API ZBUFFv07_DCtx* ZBUFFv07_createDCtx(void);
+ZSTDLIBv07_API size_t      ZBUFFv07_freeDCtx(ZBUFFv07_DCtx* dctx);
+
+ZSTDLIBv07_API size_t ZBUFFv07_decompressInit(ZBUFFv07_DCtx* dctx);
+ZSTDLIBv07_API size_t ZBUFFv07_decompressInitDictionary(ZBUFFv07_DCtx* dctx, const void* dict, size_t dictSize);
+
+ZSTDLIBv07_API size_t ZBUFFv07_decompressContinue(ZBUFFv07_DCtx* dctx,
+                                            void* dst, size_t* dstCapacityPtr,
+                                      const void* src, size_t* srcSizePtr);
+
+/*-***************************************************************************
+*  Streaming decompression howto
+*
+*  A ZBUFFv07_DCtx object is required to track streaming operations.
+*  Use ZBUFFv07_createDCtx() and ZBUFFv07_freeDCtx() to create/release resources.
+*  Use ZBUFFv07_decompressInit() to start a new decompression operation,
+*   or ZBUFFv07_decompressInitDictionary() if decompression requires a dictionary.
+*  Note that ZBUFFv07_DCtx objects can be re-init multiple times.
+*
+*  Use ZBUFFv07_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *dstCapacityPtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency),
+*            or 0 when a frame is completely decoded,
+*            or an error code, which can be tested using ZBUFFv07_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFFv07_recommendedDInSize() and ZBUFFv07_recommendedDOutSize()
+*  output : ZBUFFv07_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
+*  input  : ZBUFFv07_recommendedDInSize == 128KB + 3;
+*           just follow indications from ZBUFFv07_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* *******************************************************************************/
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+ZSTDLIBv07_API unsigned ZBUFFv07_isError(size_t errorCode);
+ZSTDLIBv07_API const char* ZBUFFv07_getErrorName(size_t errorCode);
+
+/** Functions below provide recommended buffer sizes for Compression or Decompression operations.
+*   These sizes are just hints, they tend to offer better latency */
+ZSTDLIBv07_API size_t ZBUFFv07_recommendedDInSize(void);
+ZSTDLIBv07_API size_t ZBUFFv07_recommendedDOutSize(void);
+
+
+/*-*************************************
+*  Constants
+***************************************/
+#define ZSTDv07_MAGICNUMBER            0xFD2FB527   /* v0.7 */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTDv07_H_235446 */
diff --git a/contrib/libzstd/include/zstd/zstd.h b/contrib/libzstd/include/zstd/zstd.h
index dd3f5df4c7f..58e9a5606db 100644
--- a/contrib/libzstd/include/zstd/zstd.h
+++ b/contrib/libzstd/include/zstd/zstd.h
@@ -1,4 +1,4 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
@@ -7,80 +7,126 @@
  * of patent rights can be found in the PATENTS file in the same directory.
  */
 
-#ifndef ZSTD_H_235446
-#define ZSTD_H_235446
-
 #if defined (__cplusplus)
 extern "C" {
 #endif
 
-/*======   Dependency   ======*/
+#ifndef ZSTD_H_235446
+#define ZSTD_H_235446
+
+/* ======   Dependency   ======*/
 #include <stddef.h>   /* size_t */
 
 
-/*======  Export for Windows  ======*/
-/*!
-*  ZSTD_DLL_EXPORT :
-*  Enable exporting of functions when building a Windows DLL
-*/
-#if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
-#  define ZSTDLIB_API __declspec(dllexport)
+/* =====   ZSTDLIB_API : control library symbols visibility   ===== */
+#ifndef ZSTDLIB_VISIBILITY
+#  if defined(__GNUC__) && (__GNUC__ >= 4)
+#    define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default")))
+#  else
+#    define ZSTDLIB_VISIBILITY
+#  endif
+#endif
+#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
+#  define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY
+#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
+#  define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
 #else
-#  define ZSTDLIB_API
+#  define ZSTDLIB_API ZSTDLIB_VISIBILITY
 #endif
 
 
-/*=======   Version   =======*/
+/*******************************************************************************************************
+  Introduction
+
+  zstd, short for Zstandard, is a fast lossless compression algorithm,
+  targeting real-time compression scenarios at zlib-level and better compression ratios.
+  The zstd compression library provides in-memory compression and decompression functions.
+  The library supports compression levels from 1 up to ZSTD_maxCLevel() which is currently 22.
+  Levels >= 20, labeled `--ultra`, should be used with caution, as they require more memory.
+  Compression can be done in:
+    - a single step (described as Simple API)
+    - a single step, reusing a context (described as Explicit memory management)
+    - unbounded multiple steps (described as Streaming compression)
+  The compression ratio achievable on small data can be highly improved using a dictionary in:
+    - a single step (described as Simple dictionary API)
+    - a single step, reusing a dictionary (described as Fast dictionary API)
+
+  Advanced experimental functions can be accessed using #define ZSTD_STATIC_LINKING_ONLY before including zstd.h.
+  Advanced experimental APIs shall never be used with a dynamic library.
+  They are not "stable", their definition may change in the future. Only static linking is allowed.
+*********************************************************************************************************/
+
+/*------   Version   ------*/
 #define ZSTD_VERSION_MAJOR    1
-#define ZSTD_VERSION_MINOR    1
+#define ZSTD_VERSION_MINOR    3
 #define ZSTD_VERSION_RELEASE  0
 
+#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+ZSTDLIB_API unsigned ZSTD_versionNumber(void);   /**< useful to check dll version */
+
 #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
 #define ZSTD_QUOTE(str) #str
 #define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
 #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
-
-#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
-ZSTDLIB_API unsigned ZSTD_versionNumber (void);
+ZSTDLIB_API const char* ZSTD_versionString(void);   /* v1.3.0 */
 
 
-/* *************************************
+/***************************************
 *  Simple API
 ***************************************/
 /*! ZSTD_compress() :
-    Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
-    Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
-    @return : compressed size written into `dst` (<= `dstCapacity),
-              or an error code if it fails (which can be tested using ZSTD_isError()) */
+ *  Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
+ *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
+ *  @return : compressed size written into `dst` (<= `dstCapacity),
+ *            or an error code if it fails (which can be tested using ZSTD_isError()). */
 ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity,
                             const void* src, size_t srcSize,
                                   int compressionLevel);
 
 /*! ZSTD_decompress() :
-    `compressedSize` : must be the _exact_ size of a single compressed frame.
-    `dstCapacity` is an upper bound of originalSize.
-    If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
-    @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
-              or an errorCode if it fails (which can be tested using ZSTD_isError()) */
+ *  `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
+ *  `dstCapacity` is an upper bound of originalSize to regenerate.
+ *  If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
+ *  @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+ *            or an errorCode if it fails (which can be tested using ZSTD_isError()). */
 ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity,
                               const void* src, size_t compressedSize);
 
+/*! ZSTD_getFrameContentSize() : v1.3.0
+ *  `src` should point to the start of a ZSTD encoded frame.
+ *  `srcSize` must be at least as large as the frame header.
+ *            hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
+ *  @return : - decompressed size of the frame in `src`, if known
+ *            - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+ *            - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small)
+ *   note 1 : a 0 return value means the frame is valid but "empty".
+ *   note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode.
+ *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *            Optionally, application can rely on some implicit limit,
+ *            as ZSTD_decompress() only needs an upper bound of decompressed size.
+ *            (For example, data could be necessarily cut into blocks <= 16 KB).
+ *   note 3 : decompressed size is always present when compression is done with ZSTD_compress()
+ *   note 4 : decompressed size can be very large (64-bits value),
+ *            potentially larger than what local system can handle as a single memory segment.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ *            Always ensure return value fits within application's authorized limits.
+ *            Each application can set its own limits.
+ *   note 6 : This function replaces ZSTD_getDecompressedSize() */
+#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
+#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
+ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
+
 /*! ZSTD_getDecompressedSize() :
-*   'src' is the start of a zstd compressed frame.
-*   @return : content size to be decompressed, as a 64-bits value _if known_, 0 otherwise.
-*    note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
-*             When `return==0`, data to decompress could be any size.
-*             In which case, it's necessary to use streaming mode to decompress data.
-*             Optionally, application can still use ZSTD_decompress() while relying on implied limits.
-*             (For example, data may be necessarily cut into blocks <= 16 KB).
-*    note 2 : decompressed size is always present when compression is done with ZSTD_compress()
-*    note 3 : decompressed size can be very large (64-bits value),
-*             potentially larger than what local system can handle as a single memory segment.
-*             In which case, it's necessary to use streaming mode to decompress data.
-*    note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
-*             Always ensure result fits within application's authorized limits.
-*             Each application can set its own limits.
-*    note 5 : when `return==0`, if precise failure cause is needed, use ZSTD_getFrameParams() to know more. */
+ *  NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize().
+ *  Both functions work the same way,
+ *  but ZSTD_getDecompressedSize() blends
+ *  "empty", "unknown" and "error" results in the same return value (0),
+ *  while ZSTD_getFrameContentSize() distinguishes them.
+ *
+ *  'src' is the start of a zstd compressed frame.
+ *  @return : content size to be decompressed, as a 64-bits value _if known and not empty_, 0 otherwise. */
 ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
 
 
@@ -91,34 +137,49 @@ ZSTDLIB_API unsigned    ZSTD_isError(size_t code);          /*!< tells if a `siz
 ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */
 
 
-/*-*************************************
+/***************************************
 *  Explicit memory management
 ***************************************/
-/** Compression context */
+/*= Compression context
+ *  When compressing many times,
+ *  it is recommended to allocate a context just once, and re-use it for each successive compression operation.
+ *  This will make workload friendlier for system's memory.
+ *  Use one context per thread for parallel execution in multi-threaded environments. */
 typedef struct ZSTD_CCtx_s ZSTD_CCtx;
 ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
 ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
 
-/** ZSTD_compressCCtx() :
-    Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()) */
-ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel);
+/*! ZSTD_compressCCtx() :
+ *  Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()). */
+ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx,
+                                     void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                                     int compressionLevel);
 
-/** Decompression context */
+/*= Decompression context
+ *  When decompressing many times,
+ *  it is recommended to allocate a context only once,
+ *  and re-use it for each successive compression operation.
+ *  This will make workload friendlier for system's memory.
+ *  Use one context per thread for parallel execution. */
 typedef struct ZSTD_DCtx_s ZSTD_DCtx;
 ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
 ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
 
-/** ZSTD_decompressDCtx() :
-*   Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()) */
-ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+/*! ZSTD_decompressDCtx() :
+ *  Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()) */
+ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx,
+                                       void* dst, size_t dstCapacity,
+                                 const void* src, size_t srcSize);
 
 
-/*-************************
+/**************************
 *  Simple dictionary API
 ***************************/
 /*! ZSTD_compress_usingDict() :
-*   Compression using a predefined Dictionary (see dictBuilder/zdict.h).
-*   Note : This function load the dictionary, resulting in significant startup delay. */
+ *  Compression using a predefined Dictionary (see dictBuilder/zdict.h).
+ *  Note : This function loads the dictionary, resulting in significant startup delay.
+ *  Note : When `dict == NULL || dictSize < 8` no dictionary is used. */
 ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
                                            void* dst, size_t dstCapacity,
                                      const void* src, size_t srcSize,
@@ -126,51 +187,65 @@ ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
                                            int compressionLevel);
 
 /*! ZSTD_decompress_usingDict() :
-*   Decompression using a predefined Dictionary (see dictBuilder/zdict.h).
-*   Dictionary must be identical to the one used during compression.
-*   Note : This function load the dictionary, resulting in significant startup delay */
+ *  Decompression using a predefined Dictionary (see dictBuilder/zdict.h).
+ *  Dictionary must be identical to the one used during compression.
+ *  Note : This function loads the dictionary, resulting in significant startup delay.
+ *  Note : When `dict == NULL || dictSize < 8` no dictionary is used. */
 ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
                                              void* dst, size_t dstCapacity,
                                        const void* src, size_t srcSize,
                                        const void* dict,size_t dictSize);
 
 
-/*-**************************
-*  Fast Dictionary API
-****************************/
-/*! ZSTD_createCDict() :
-*   Create a digested dictionary, ready to start compression operation without startup delay.
-*   `dict` can be released after ZSTD_CDict creation */
+/**********************************
+ *  Bulk processing dictionary API
+ *********************************/
 typedef struct ZSTD_CDict_s ZSTD_CDict;
-ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel);
+
+/*! ZSTD_createCDict() :
+ *  When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
+ *  ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
+ *  ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ *  `dictBuffer` can be released after ZSTD_CDict creation, since its content is copied within CDict */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
+                                         int compressionLevel);
+
+/*! ZSTD_freeCDict() :
+ *  Function frees memory allocated by ZSTD_createCDict(). */
 ZSTDLIB_API size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
 
 /*! ZSTD_compress_usingCDict() :
-*   Compression using a digested Dictionary.
-*   Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
-*   Note that compression level is decided during dictionary creation */
+ *  Compression using a digested Dictionary.
+ *  Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
+ *  Note that compression level is decided during dictionary creation.
+ *  Frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */
 ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
                                             void* dst, size_t dstCapacity,
                                       const void* src, size_t srcSize,
                                       const ZSTD_CDict* cdict);
 
-/*! ZSTD_createDDict() :
-*   Create a digested dictionary, ready to start decompression operation without startup delay.
-*   `dict` can be released after creation */
+
 typedef struct ZSTD_DDict_s ZSTD_DDict;
-ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize);
+
+/*! ZSTD_createDDict() :
+ *  Create a digested dictionary, ready to start decompression operation without startup delay.
+ *  dictBuffer can be released after DDict creation, as its content is copied inside DDict */
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
+
+/*! ZSTD_freeDDict() :
+ *  Function frees memory allocated with ZSTD_createDDict() */
 ZSTDLIB_API size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
 
 /*! ZSTD_decompress_usingDDict() :
-*   Decompression using a digested Dictionary
-*   Faster startup than ZSTD_decompress_usingDict(), recommended when same dictionary is used multiple times. */
+ *  Decompression using a digested Dictionary.
+ *  Faster startup than ZSTD_decompress_usingDict(), recommended when same dictionary is used multiple times. */
 ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
                                               void* dst, size_t dstCapacity,
                                         const void* src, size_t srcSize,
                                         const ZSTD_DDict* ddict);
 
 
-/*-**************************
+/****************************
 *  Streaming
 ****************************/
 
@@ -187,61 +262,67 @@ typedef struct ZSTD_outBuffer_s {
 } ZSTD_outBuffer;
 
 
-/*======   streaming compression   ======*/
 
 /*-***********************************************************************
-*  Streaming compression - howto
+*  Streaming compression - HowTo
 *
 *  A ZSTD_CStream object is required to track streaming operation.
 *  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
 *  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
+*  It is recommended to re-use ZSTD_CStream in situations where many streaming operations will be achieved consecutively,
+*  since it will play nicer with system's memory, by re-using already allocated memory.
+*  Use one separate ZSTD_CStream per thread for parallel execution.
 *
-*  Start by initializing ZSTD_CStream.
+*  Start a new compression by initializing ZSTD_CStream.
 *  Use ZSTD_initCStream() to start a new compression operation.
-*  Use ZSTD_initCStream_usingDict() for a compression which requires a dictionary.
+*  Use ZSTD_initCStream_usingDict() or ZSTD_initCStream_usingCDict() for a compression which requires a dictionary (experimental section)
 *
 *  Use ZSTD_compressStream() repetitively to consume input stream.
 *  The function will automatically update both `pos` fields.
 *  Note that it may not consume the entire input, in which case `pos < size`,
 *  and it's up to the caller to present again remaining data.
 *  @return : a size hint, preferred nb of bytes to use as input for next function call
-*           (it's just a hint, to help latency a little, any other value will work fine)
-*           (note : the size hint is guaranteed to be <= ZSTD_CStreamInSize() )
 *            or an error code, which can be tested using ZSTD_isError().
+*            Note 1 : it's just a hint, to help latency a little, any other value will work fine.
+*            Note 2 : size hint is guaranteed to be <= ZSTD_CStreamInSize()
 *
-*  At any moment, it's possible to flush whatever data remains within buffer, using ZSTD_flushStream().
+*  At any moment, it's possible to flush whatever data remains within internal buffer, using ZSTD_flushStream().
 *  `output->pos` will be updated.
-*  Note some content might still be left within internal buffer if `output->size` is too small.
+*  Note that some content might still be left within internal buffer if `output->size` is too small.
 *  @return : nb of bytes still present within internal buffer (0 if it's empty)
 *            or an error code, which can be tested using ZSTD_isError().
 *
 *  ZSTD_endStream() instructs to finish a frame.
 *  It will perform a flush and write frame epilogue.
 *  The epilogue is required for decoders to consider a frame completed.
-*  Similar to ZSTD_flushStream(), it may not be able to flush the full content if `output->size` is too small.
+*  ZSTD_endStream() may not be able to flush full data if `output->size` is too small.
 *  In which case, call again ZSTD_endStream() to complete the flush.
-*  @return : nb of bytes still present within internal buffer (0 if it's empty)
+*  @return : 0 if frame fully completed and fully flushed,
+             or >0 if some data is still present within internal buffer
+                  (value is minimum size estimation for remaining data to flush, but it could be more)
 *            or an error code, which can be tested using ZSTD_isError().
 *
 * *******************************************************************/
 
-typedef struct ZSTD_CStream_s ZSTD_CStream;
+typedef ZSTD_CCtx ZSTD_CStream;  /**< CCtx and CStream are now effectively same object (>= v1.3.0) */
+                                 /* Continue to distinguish them for compatibility with versions <= v1.2.0 */
+/*===== ZSTD_CStream management functions =====*/
 ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void);
 ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
 
-ZSTDLIB_API size_t ZSTD_CStreamInSize(void);    /**< recommended size for input buffer */
-ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */
-
+/*===== Streaming compression functions =====*/
 ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
 ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
 ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
 ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
 
+ZSTDLIB_API size_t ZSTD_CStreamInSize(void);    /**< recommended size for input buffer */
+ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */
+
 
-/*======   decompression   ======*/
 
 /*-***************************************************************************
-*  Streaming decompression howto
+*  Streaming decompression - HowTo
 *
 *  A ZSTD_DStream object is required to track streaming operations.
 *  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
@@ -258,38 +339,46 @@ ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
 *  If `output.pos < output.size`, decoder has flushed everything it could.
 *  @return : 0 when a frame is completely decoded and fully flushed,
 *            an error code, which can be tested using ZSTD_isError(),
-*            any other value > 0, which means there is still some work to do to complete the frame.
-*            The return value is a suggested next input size (just an hint, to help latency).
+*            any other value > 0, which means there is still some decoding to do to complete current frame.
+*            The return value is a suggested next input size (a hint to improve latency) that will never load more than the current frame.
 * *******************************************************************************/
 
-typedef struct ZSTD_DStream_s ZSTD_DStream;
+typedef ZSTD_DCtx ZSTD_DStream;  /**< DCtx and DStream are now effectively same object (>= v1.3.0) */
+                                 /* Continue to distinguish them for compatibility with versions <= v1.2.0 */
+/*===== ZSTD_DStream management functions =====*/
 ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void);
 ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);
 
+/*===== Streaming decompression functions =====*/
+ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
+ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+
 ZSTDLIB_API size_t ZSTD_DStreamInSize(void);    /*!< recommended size for input buffer */
 ZSTDLIB_API size_t ZSTD_DStreamOutSize(void);   /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */
 
-ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
-ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+#endif  /* ZSTD_H_235446 */
 
 
 
-#ifdef ZSTD_STATIC_LINKING_ONLY
-
-/* ====================================================================================
+/****************************************************************************************
+ * START OF ADVANCED AND EXPERIMENTAL FUNCTIONS
  * The definitions in this section are considered experimental.
- * They should never be used with a dynamic library, as they may change in the future.
- * They are provided for advanced usages.
+ * They should never be used with a dynamic library, as prototypes may change in the future.
+ * They are provided for advanced scenarios.
  * Use them only in association with static linking.
- * ==================================================================================== */
+ * ***************************************************************************************/
 
-/*--- Constants ---*/
-#define ZSTD_MAGICNUMBER            0xFD2FB528   /* v0.8 */
+#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
+#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
+
+/* --- Constants ---*/
+#define ZSTD_MAGICNUMBER            0xFD2FB528   /* >= v0.8.0 */
 #define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50U
+#define ZSTD_MAGIC_DICTIONARY       0xEC30A437   /* v0.7+ */
 
-#define ZSTD_WINDOWLOG_MAX_32  25
+#define ZSTD_WINDOWLOG_MAX_32  27
 #define ZSTD_WINDOWLOG_MAX_64  27
-#define ZSTD_WINDOWLOG_MAX    ((U32)(MEM_32bits() ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
+#define ZSTD_WINDOWLOG_MAX    ((unsigned)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
 #define ZSTD_WINDOWLOG_MIN     10
 #define ZSTD_HASHLOG_MAX       ZSTD_WINDOWLOG_MAX
 #define ZSTD_HASHLOG_MIN        6
@@ -304,14 +393,16 @@ ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* outp
 #define ZSTD_TARGETLENGTH_MAX 999
 
 #define ZSTD_FRAMEHEADERSIZE_MAX 18    /* for static allocation */
-static const size_t ZSTD_frameHeaderSize_prefix = 5;
-static const size_t ZSTD_frameHeaderSize_min = 6;
+#define ZSTD_FRAMEHEADERSIZE_MIN  6
+static const size_t ZSTD_frameHeaderSize_prefix = 5;  /* minimum input size to know frame header size */
 static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX;
+static const size_t ZSTD_frameHeaderSize_min = ZSTD_FRAMEHEADERSIZE_MIN;
 static const size_t ZSTD_skippableHeaderSize = 8;  /* magic number + skippable frame length */
 
 
-/*--- Types ---*/
-typedef enum { ZSTD_fast, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt } ZSTD_strategy;   /* from faster to stronger */
+/*--- Advanced types ---*/
+typedef enum { ZSTD_fast=1, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2,
+               ZSTD_btlazy2, ZSTD_btopt, ZSTD_btultra } ZSTD_strategy;   /* from faster to stronger */
 
 typedef struct {
     unsigned windowLog;      /**< largest match distance : larger == more compression, more memory needed during decompression */
@@ -324,9 +415,9 @@ typedef struct {
 } ZSTD_compressionParameters;
 
 typedef struct {
-    unsigned contentSizeFlag; /**< 1: content size will be in frame header (if known). */
-    unsigned checksumFlag;    /**< 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */
-    unsigned noDictIDFlag;    /**< 1: no dict ID will be saved into frame header (if dictionary compression) */
+    unsigned contentSizeFlag; /**< 1: content size will be in frame header (when known) */
+    unsigned checksumFlag;    /**< 1: generate a 32-bits checksum at end of frame, for error detection */
+    unsigned noDictIDFlag;    /**< 1: no dictID will be saved into frame header (if dictionary compression) */
 } ZSTD_frameParameters;
 
 typedef struct {
@@ -334,125 +425,350 @@ typedef struct {
     ZSTD_frameParameters fParams;
 } ZSTD_parameters;
 
-/* custom memory allocation functions */
+typedef struct {
+    unsigned long long frameContentSize;
+    size_t windowSize;
+    unsigned dictID;
+    unsigned checksumFlag;
+} ZSTD_frameHeader;
+
+/*= Custom memory allocation functions */
 typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
 typedef void  (*ZSTD_freeFunction) (void* opaque, void* address);
 typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
+/* use this constant to defer to stdlib's functions */
+static const ZSTD_customMem ZSTD_defaultCMem = { NULL, NULL, NULL };
 
 
-/*-*************************************
+/***************************************
+*  Frame size functions
+***************************************/
+
+/*! ZSTD_findFrameCompressedSize() :
+ *  `src` should point to the start of a ZSTD encoded frame or skippable frame
+ *  `srcSize` must be at least as large as the frame
+ *  @return : the compressed size of the first frame starting at `src`,
+ *            suitable to pass to `ZSTD_decompress` or similar,
+ *            or an error code if input is invalid */
+ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTD_findDecompressedSize() :
+ *  `src` should point the start of a series of ZSTD encoded and/or skippable frames
+ *  `srcSize` must be the _exact_ size of this series
+ *       (i.e. there should be a frame boundary exactly at `srcSize` bytes after `src`)
+ *  @return : - decompressed size of all data in all successive frames
+ *            - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN
+ *            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
+ *
+ *   note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
+ *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 2 : decompressed size is always present when compression is done with ZSTD_compress()
+ *   note 3 : decompressed size can be very large (64-bits value),
+ *            potentially larger than what local system can handle as a single memory segment.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ *            Always ensure result fits within application's authorized limits.
+ *            Each application can set its own limits.
+ *   note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to
+ *            read each contained frame header.  This is fast as most of the data is skipped,
+ *            however it does mean that all frame data must be present and valid. */
+ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTD_frameHeaderSize() :
+*   `src` should point to the start of a ZSTD frame
+*   `srcSize` must be >= ZSTD_frameHeaderSize_prefix.
+*   @return : size of the Frame Header */
+ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
+
+
+/***************************************
+*  Context memory usage
+***************************************/
+
+/*! ZSTD_sizeof_*() :
+ *  These functions give the current memory usage of selected object.
+ *  Object memory usage can evolve if it's re-used multiple times. */
+ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
+ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
+ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
+ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
+ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
+
+/*! ZSTD_estimate*() :
+ *  These functions make it possible to estimate memory usage
+ *  of a future {D,C}Ctx, before its creation.
+ *  ZSTD_estimateCCtxSize() will provide a budget large enough for any compression level up to selected one.
+ *  It will also consider src size to be arbitrarily "large", which is worst case.
+ *  If srcSize is known to always be small, ZSTD_estimateCCtxSize_advanced() can provide a tighter estimation.
+ *  ZSTD_estimateCCtxSize_advanced() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
+ *  Note : CCtx estimation is only correct for single-threaded compression */
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize_advanced(ZSTD_compressionParameters cParams);
+ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void);
+
+/*! ZSTD_estimate?StreamSize() :
+ *  ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one.
+ *  It will also consider src size to be arbitrarily "large", which is worst case.
+ *  If srcSize is known to always be small, ZSTD_estimateCStreamSize_advanced() can provide a tighter estimation.
+ *  ZSTD_estimateCStreamSize_advanced() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
+ *  Note : CStream estimation is only correct for single-threaded compression.
+ *  ZSTD_DStream memory budget depends on window Size.
+ *  This information can be passed manually, using ZSTD_estimateDStreamSize,
+ *  or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
+ *  Note : if streaming is init with function ZSTD_init?Stream_usingDict(),
+ *         an internal ?Dict will be created, which additional size is not estimated here.
+ *         In this case, get total size by adding ZSTD_estimate?DictSize */
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize_advanced(ZSTD_compressionParameters cParams);
+ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize);
+ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
+
+/*! ZSTD_estimate?DictSize() :
+ *  ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict().
+ *  ZSTD_estimateCStreamSize_advanced() makes it possible to control precisely compression parameters, like ZSTD_createCDict_advanced().
+ *  Note : dictionary created "byReference" are smaller */
+ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, unsigned byReference);
+ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, unsigned byReference);
+
+
+/***************************************
 *  Advanced compression functions
 ***************************************/
-/*! ZSTD_estimateCCtxSize() :
- *  Gives the amount of memory allocated for a ZSTD_CCtx given a set of compression parameters.
- *  `frameContentSize` is an optional parameter, provide `0` if unknown */
-ZSTDLIB_API size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams);
-
 /*! ZSTD_createCCtx_advanced() :
  *  Create a ZSTD compression context using external alloc and free functions */
 ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
 
-/*! ZSTD_sizeofCCtx() :
- *  Gives the amount of memory used by a given ZSTD_CCtx */
-ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
+/*! ZSTD_initStaticCCtx() : initialize a fixed-size zstd compression context
+ *  workspace: The memory area to emplace the context into.
+ *             Provided pointer must 8-bytes aligned.
+ *             It must outlive context usage.
+ *  workspaceSize: Use ZSTD_estimateCCtxSize() or ZSTD_estimateCStreamSize()
+ *                 to determine how large workspace must be to support scenario.
+ * @return : pointer to ZSTD_CCtx*, or NULL if error (size too small)
+ *  Note : zstd will never resize nor malloc() when using a static cctx.
+ *         If it needs more memory than available, it will simply error out.
+ *  Note 2 : there is no corresponding "free" function.
+ *           Since workspace was allocated externally, it must be freed externally too.
+ *  Limitation 1 : currently not compatible with internal CDict creation, such as
+ *                 ZSTD_CCtx_loadDictionary() or ZSTD_initCStream_usingDict().
+ *  Limitation 2 : currently not compatible with multi-threading
+ */
+ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
 
+
+/* !!! To be deprecated !!! */
+typedef enum {
+    ZSTD_p_forceWindow,   /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0) */
+    ZSTD_p_forceRawDict   /* Force loading dictionary in "content-only" mode (no header analysis) */
+} ZSTD_CCtxParameter;
+/*! ZSTD_setCCtxParameter() :
+ *  Set advanced parameters, selected through enum ZSTD_CCtxParameter
+ *  @result : 0, or an error code (which can be tested with ZSTD_isError()) */
+ZSTDLIB_API size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value);
+
+
+/*! ZSTD_createCDict_byReference() :
+ *  Create a digested dictionary for compression
+ *  Dictionary content is simply referenced, and therefore stays in dictBuffer.
+ *  It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
+
+
+typedef enum { ZSTD_dm_auto=0,        /* dictionary is "full" if it starts with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */
+               ZSTD_dm_rawContent,    /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */
+               ZSTD_dm_fullDict       /* refuses to load a dictionary if it does not respect Zstandard's specification */
+} ZSTD_dictMode_e;
 /*! ZSTD_createCDict_advanced() :
  *  Create a ZSTD_CDict using external alloc and free, and customized compression parameters */
 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
-                                                  ZSTD_parameters params, ZSTD_customMem customMem);
+                                                  unsigned byReference, ZSTD_dictMode_e dictMode,
+                                                  ZSTD_compressionParameters cParams,
+                                                  ZSTD_customMem customMem);
 
-/*! ZSTD_sizeof_CDict() :
- *  Gives the amount of memory used by a given ZSTD_sizeof_CDict */
-ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
-
-/*! ZSTD_getParams() :
-*   same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of a `ZSTD_compressionParameters`.
-*   All fields of `ZSTD_frameParameters` are set to default (0) */
-ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, size_t dictSize);
+/*! ZSTD_initStaticCDict_advanced() :
+ *  Generate a digested dictionary in provided memory area.
+ *  workspace: The memory area to emplace the dictionary into.
+ *             Provided pointer must 8-bytes aligned.
+ *             It must outlive dictionary usage.
+ *  workspaceSize: Use ZSTD_estimateCDictSize()
+ *                 to determine how large workspace must be.
+ *  cParams : use ZSTD_getCParams() to transform a compression level
+ *            into its relevants cParams.
+ * @return : pointer to ZSTD_CDict*, or NULL if error (size too small)
+ *  Note : there is no corresponding "free" function.
+ *         Since workspace was allocated externally, it must be freed externally.
+ */
+ZSTDLIB_API ZSTD_CDict* ZSTD_initStaticCDict(
+                            void* workspace, size_t workspaceSize,
+                      const void* dict, size_t dictSize,
+                            unsigned byReference, ZSTD_dictMode_e dictMode,
+                            ZSTD_compressionParameters cParams);
 
 /*! ZSTD_getCParams() :
-*   @return ZSTD_compressionParameters structure for a selected compression level and srcSize.
-*   `srcSize` value is optional, select 0 if not known */
-ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSize, size_t dictSize);
+*   @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
+*   `estimatedSrcSize` value is optional, select 0 if not known */
+ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
+
+/*! ZSTD_getParams() :
+*   same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`.
+*   All fields of `ZSTD_frameParameters` are set to default (0) */
+ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
 
 /*! ZSTD_checkCParams() :
 *   Ensure param values remain within authorized range */
 ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
 
 /*! ZSTD_adjustCParams() :
-*   optimize params for a given `srcSize` and `dictSize`.
-*   both values are optional, select `0` if unknown. */
+ *  optimize params for a given `srcSize` and `dictSize`.
+ *  both values are optional, select `0` if unknown. */
 ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
 
 /*! ZSTD_compress_advanced() :
-*   Same as ZSTD_compress_usingDict(), with fine-tune control of each compression parameter */
-ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
-                                           void* dst, size_t dstCapacity,
-                                     const void* src, size_t srcSize,
-                                     const void* dict,size_t dictSize,
-                                           ZSTD_parameters params);
+*   Same as ZSTD_compress_usingDict(), with fine-tune control over each compression parameter */
+ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
+                                  void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                            const void* dict,size_t dictSize,
+                                  ZSTD_parameters params);
+
+/*! ZSTD_compress_usingCDict_advanced() :
+*   Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters */
+ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+                                  void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                            const ZSTD_CDict* cdict, ZSTD_frameParameters fParams);
 
 
-/*--- Advanced Decompression functions ---*/
+/*--- Advanced decompression functions ---*/
 
-/*! ZSTD_estimateDCtxSize() :
- *  Gives the potential amount of memory allocated to create a ZSTD_DCtx */
-ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void);
+/*! ZSTD_isFrame() :
+ *  Tells if the content of `buffer` starts with a valid Frame Identifier.
+ *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
+ *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
+ *  Note 3 : Skippable Frame Identifiers are considered valid. */
+ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size);
 
 /*! ZSTD_createDCtx_advanced() :
  *  Create a ZSTD decompression context using external alloc and free functions */
 ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
 
-/*! ZSTD_sizeof_DCtx() :
- *  Gives the amount of memory used by a given ZSTD_DCtx */
-ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
+/*! ZSTD_initStaticDCtx() : initialize a fixed-size zstd decompression context
+ *  workspace: The memory area to emplace the context into.
+ *             Provided pointer must 8-bytes aligned.
+ *             It must outlive context usage.
+ *  workspaceSize: Use ZSTD_estimateDCtxSize() or ZSTD_estimateDStreamSize()
+ *                 to determine how large workspace must be to support scenario.
+ * @return : pointer to ZSTD_DCtx*, or NULL if error (size too small)
+ *  Note : zstd will never resize nor malloc() when using a static dctx.
+ *         If it needs more memory than available, it will simply error out.
+ *  Note 2 : static dctx is incompatible with legacy support
+ *  Note 3 : there is no corresponding "free" function.
+ *           Since workspace was allocated externally, it must be freed externally.
+ *  Limitation : currently not compatible with internal DDict creation,
+ *               such as ZSTD_initDStream_usingDict().
+ */
+ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize);
 
-/*! ZSTD_sizeof_DDict() :
- *  Gives the amount of memory used by a given ZSTD_DDict */
-ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
+/*! ZSTD_createDDict_byReference() :
+ *  Create a digested dictionary, ready to start decompression operation without startup delay.
+ *  Dictionary content is referenced, and therefore stays in dictBuffer.
+ *  It is important that dictBuffer outlives DDict,
+ *  it must remain read accessible throughout the lifetime of DDict */
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
+
+/*! ZSTD_createDDict_advanced() :
+ *  Create a ZSTD_DDict using external alloc and free, optionally by reference */
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
+                                                  unsigned byReference, ZSTD_customMem customMem);
+
+/*! ZSTD_initStaticDDict() :
+ *  Generate a digested dictionary in provided memory area.
+ *  workspace: The memory area to emplace the dictionary into.
+ *             Provided pointer must 8-bytes aligned.
+ *             It must outlive dictionary usage.
+ *  workspaceSize: Use ZSTD_estimateDDictSize()
+ *                 to determine how large workspace must be.
+ * @return : pointer to ZSTD_DDict*, or NULL if error (size too small)
+ *  Note : there is no corresponding "free" function.
+ *         Since workspace was allocated externally, it must be freed externally.
+ */
+ZSTDLIB_API ZSTD_DDict* ZSTD_initStaticDDict(void* workspace, size_t workspaceSize,
+                                             const void* dict, size_t dictSize,
+                                             unsigned byReference);
+
+/*! ZSTD_getDictID_fromDict() :
+ *  Provides the dictID stored within dictionary.
+ *  if @return == 0, the dictionary is not conformant with Zstandard specification.
+ *  It can still be loaded, but as a content-only dictionary. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
+
+/*! ZSTD_getDictID_fromDDict() :
+ *  Provides the dictID of the dictionary loaded into `ddict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
+
+/*! ZSTD_getDictID_fromFrame() :
+ *  Provides the dictID required to decompressed the frame stored within `src`.
+ *  If @return == 0, the dictID could not be decoded.
+ *  This could for one of the following reasons :
+ *  - The frame does not require a dictionary to be decoded (most common case).
+ *  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information.
+ *    Note : this use case also happens when using a non-conformant dictionary.
+ *  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
+ *  - This is not a Zstandard frame.
+ *  When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
 
 
-/* ******************************************************************
-*  Advanced Streaming functions
+/********************************************************************
+*  Advanced streaming functions
 ********************************************************************/
 
-/*======   compression   ======*/
-
+/*=====   Advanced Streaming compression functions  =====*/
 ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
-ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
+ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize);    /**< same as ZSTD_initStaticCCtx() */
+ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize);   /**< pledgedSrcSize must be correct, a size of 0 means unknown.  for a frame size of 0 use initCStream_advanced */
+ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< creates of an internal CDict (incompatible with static CCtx), except if dict == NULL or dictSize < 8, in which case no dict is used. */
 ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
-                                             ZSTD_parameters params, unsigned long long pledgedSrcSize);  /**< pledgedSrcSize is optional and can be zero == unknown */
-ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);  /**< re-use compression parameters from previous init; saves dictionary loading */
-ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
+                                             ZSTD_parameters params, unsigned long long pledgedSrcSize);  /**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */
+ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);  /**< note : cdict will just be referenced, and must outlive compression session */
+ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize);  /**< same as ZSTD_initCStream_usingCDict(), with control over frame parameters */
+
+/*! ZSTD_resetCStream() :
+ *  start a new compression job, using same parameters from previous job.
+ *  This is typically useful to skip dictionary loading stage, since it will re-use it in-place..
+ *  Note that zcs must be init at least once before using ZSTD_resetCStream().
+ *  pledgedSrcSize==0 means "srcSize unknown".
+ *  If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
+ *  @return : 0, or an error code (which can be tested using ZSTD_isError()) */
+ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
 
 
-/*======   decompression   ======*/
-
-typedef enum { ZSTDdsp_maxWindowSize } ZSTD_DStreamParameter_e;
-
+/*=====   Advanced Streaming decompression functions  =====*/
+typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e;
 ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
-ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize);    /**< same as ZSTD_initStaticDCtx() */
 ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue);
+ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */
+ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);  /**< note : ddict will just be referenced, and must outlive decompression session */
 ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);  /**< re-use decompression parameters from previous init; saves dictionary loading */
-ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
 
 
-/* ******************************************************************
+/*********************************************************************
 *  Buffer-less and synchronous inner streaming functions
-********************************************************************/
-/* This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
+*
+*  This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
 *  But it's also a complex one, with many restrictions (documented below).
-*  Prefer using normal streaming API for an easier experience */
+*  Prefer using normal streaming API for an easier experience
+********************************************************************* */
 
-ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
-ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
-ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize);
-ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize);
+/**
+  Buffer-less streaming compression (synchronous mode)
 
-ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-
-/*
   A ZSTD_CCtx object is required to track streaming operations.
   Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
   ZSTD_CCtx object can be re-used multiple times within successive compression operations.
@@ -475,53 +791,50 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapaci
     In which case, it will "discard" the relevant memory section from its history.
 
   Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
-  It's possible to use a NULL,0 src content, in which case, it will write a final empty block to end the frame,
-  Without last block mark, frames will be considered unfinished (broken) by decoders.
+  It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
+  Without last block mark, frames will be considered unfinished (corrupted) by decoders.
 
-  You can then reuse `ZSTD_CCtx` (ZSTD_compressBegin()) to compress some new frame.
+  `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new frame.
 */
 
-typedef struct {
-    unsigned long long frameContentSize;
-    unsigned windowSize;
-    unsigned dictID;
-    unsigned checksumFlag;
-} ZSTD_frameParams;
+/*=====   Buffer-less streaming compression functions  =====*/
+ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */
+ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */
+ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   /* compression parameters are already set within cdict. pledgedSrcSize=0 means null-size */
+ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**<  note: if pledgedSrcSize can be 0, indicating unknown size.  if it is non-zero, it must be accurate.  for 0 size frames, use compressBegin_advanced */
 
-ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input, see details below */
+ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 
-ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
-ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
-ZSTDLIB_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
 
-ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
-ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 
-typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
-ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
-
-/*
+/*-
   Buffer-less streaming decompression (synchronous mode)
 
   A ZSTD_DCtx object is required to track streaming operations.
   Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
   A ZSTD_DCtx object can be re-used multiple times.
 
-  First typical operation is to retrieve frame parameters, using ZSTD_getFrameParams().
-  It fills a ZSTD_frameParams structure which provide important information to correctly decode the frame,
-  such as the minimum rolling buffer size to allocate to decompress data (`windowSize`),
-  and the dictionary ID used.
+  First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader().
+  It fills a ZSTD_frameHeader structure with important information to correctly decode the frame,
+  such as minimum rolling buffer size to allocate to decompress data (`windowSize`),
+  and the dictionary ID in use.
   (Note : content size is optional, it may not be present. 0 means : content size unknown).
   Note that these values could be wrong, either because of data malformation, or because an attacker is spoofing deliberate false information.
   As a consequence, check that values remain within valid application range, especially `windowSize`, before allocation.
-  Each application can set its own limit, depending on local restrictions. For extended interoperability, it is recommended to support at least 8 MB.
-  Frame parameters are extracted from the beginning of the compressed frame.
-  Data fragment must be large enough to ensure successful decoding, typically `ZSTD_frameHeaderSize_max` bytes.
-  @result : 0 : successful decoding, the `ZSTD_frameParams` structure is correctly filled.
+  Each application can set its own limit, depending on local restrictions.
+  For extended interoperability, it is recommended to support windowSize of at least 8 MB.
+  Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough.
+  Data fragment must be large enough to ensure successful decoding.
+  `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough.
+  @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled.
            >0 : `srcSize` is too small, please provide at least @result bytes on next attempt.
            errorCode, which can be tested using ZSTD_isError().
 
-  Start decompression, with ZSTD_decompressBegin() or ZSTD_decompressBegin_usingDict().
+  Start decompression, with ZSTD_decompressBegin().
+  If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict().
   Alternatively, you can copy a prepared context, using ZSTD_copyDCtx().
 
   Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
@@ -553,15 +866,237 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
   b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
   c) Frame Content - any content (User Data) of length equal to Frame Size
   For skippable frames ZSTD_decompressContinue() always returns 0.
-  For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0 what means that a frame is skippable.
+  For skippable frames ZSTD_getFrameHeader() returns fparamsPtr->windowLog==0 what means that a frame is skippable.
+    Note : If fparamsPtr->frameContentSize==0, it is ambiguous: the frame might actually be a Zstd encoded frame with no content.
+           For purposes of decompression, it is valid in both cases to skip the frame using
+           ZSTD_findFrameCompressedSize to find its size in bytes.
   It also returns Frame Size as fparamsPtr->frameContentSize.
 */
 
+/*=====   Buffer-less streaming decompression functions  =====*/
+ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
+ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+ZSTDLIB_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
 
-/* **************************************
-*  Block functions
-****************************************/
-/*! Block functions produce and decode raw zstd blocks, without frame metadata.
+ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
+ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
+
+
+
+/*===   New advanced API (experimental, and compression only)  ===*/
+
+/* notes on API design :
+ *   In this proposal, parameters are pushed one by one into an existing CCtx,
+ *   and then applied on all subsequent compression jobs.
+ *   When no parameter is ever provided, CCtx is created with compression level ZSTD_CLEVEL_DEFAULT.
+ *
+ *   This API is intended to replace all others experimental API.
+ *   It can basically do all other use cases, and even new ones.
+ *   It stands a good chance to become "stable",
+ *   after a reasonable testing period.
+ */
+
+/* note on naming convention :
+ *   Initially, the API favored names like ZSTD_setCCtxParameter() .
+ *   In this proposal, convention is changed towards ZSTD_CCtx_setParameter() .
+ *   The main driver is that it identifies more clearly the target object type.
+ *   It feels clearer in light of potential variants :
+ *   ZSTD_CDict_setParameter() (rather than ZSTD_setCDictParameter())
+ *   ZSTD_DCtx_setParameter()  (rather than ZSTD_setDCtxParameter() )
+ *   Left variant feels easier to distinguish.
+ */
+
+/* note on enum design :
+ * All enum will be manually set to explicit values before reaching "stable API" status */
+
+typedef enum {
+    /* compression parameters */
+    ZSTD_p_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table
+                              * Default level is ZSTD_CLEVEL_DEFAULT==3.
+                              * Special: value 0 means "do not change cLevel". */
+    ZSTD_p_windowLog,        /* Maximum allowed back-reference distance, expressed as power of 2.
+                              * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
+                              * Special: value 0 means "do not change windowLog". */
+    ZSTD_p_hashLog,          /* Size of the probe table, as a power of 2.
+                              * Resulting table size is (1 << (hashLog+2)).
+                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
+                              * Larger tables improve compression ratio of strategies <= dFast,
+                              * and improve speed of strategies > dFast.
+                              * Special: value 0 means "do not change hashLog". */
+    ZSTD_p_chainLog,         /* Size of the full-search table, as a power of 2.
+                              * Resulting table size is (1 << (chainLog+2)).
+                              * Larger tables result in better and slower compression.
+                              * This parameter is useless when using "fast" strategy.
+                              * Special: value 0 means "do not change chainLog". */
+    ZSTD_p_searchLog,        /* Number of search attempts, as a power of 2.
+                              * More attempts result in better and slower compression.
+                              * This parameter is useless when using "fast" and "dFast" strategies.
+                              * Special: value 0 means "do not change searchLog". */
+    ZSTD_p_minMatch,         /* Minimum size of searched matches (note : repCode matches can be smaller).
+                              * Larger values make faster compression and decompression, but decrease ratio.
+                              * Must be clamped between ZSTD_SEARCHLENGTH_MIN and ZSTD_SEARCHLENGTH_MAX.
+                              * Note that currently, for all strategies < btopt, effective minimum is 4.
+                              * Note that currently, for all strategies > fast, effective maximum is 6.
+                              * Special: value 0 means "do not change minMatchLength". */
+    ZSTD_p_targetLength,     /* Only useful for strategies >= btopt.
+                              * Length of Match considered "good enough" to stop search.
+                              * Larger values make compression stronger and slower.
+                              * Special: value 0 means "do not change targetLength". */
+    ZSTD_p_compressionStrategy, /* See ZSTD_strategy enum definition.
+                              * Cast selected strategy as unsigned for ZSTD_CCtx_setParameter() compatibility.
+                              * The higher the value of selected strategy, the more complex it is,
+                              * resulting in stronger and slower compression.
+                              * Special: value 0 means "do not change strategy". */
+
+    /* frame parameters */
+    ZSTD_p_contentSizeFlag=200, /* Content size is written into frame header _whenever known_ (default:1) */
+    ZSTD_p_checksumFlag,     /* A 32-bits checksum of content is written at end of frame (default:0) */
+    ZSTD_p_dictIDFlag,       /* When applicable, dictID of dictionary is provided in frame header (default:1) */
+
+    /* dictionary parameters (must be set before ZSTD_CCtx_loadDictionary) */
+    ZSTD_p_dictMode=300,     /* Select how dictionary content must be interpreted. Value must be from type ZSTD_dictMode_e.
+                              * default : 0==auto : dictionary will be "full" if it respects specification, otherwise it will be "rawContent" */
+    ZSTD_p_refDictContent,   /* Dictionary content will be referenced, instead of copied (default:0==byCopy).
+                              * It requires that dictionary buffer outlives its users */
+
+    /* multi-threading parameters */
+    ZSTD_p_nbThreads=400,    /* Select how many threads a compression job can spawn (default:1)
+                              * More threads improve speed, but also increase memory usage.
+                              * Can only receive a value > 1 if ZSTD_MULTITHREAD is enabled.
+                              * Special: value 0 means "do not change nbThreads" */
+    ZSTD_p_jobSize,          /* Size of a compression job. Each compression job is completed in parallel.
+                              * 0 means default, which is dynamically determined based on compression parameters.
+                              * Job size must be a minimum of overlapSize, or 1 KB, whichever is largest
+                              * The minimum size is automatically and transparently enforced */
+    ZSTD_p_overlapSizeLog,   /* Size of previous input reloaded at the beginning of each job.
+                              * 0 => no overlap, 6(default) => use 1/8th of windowSize, >=9 => use full windowSize */
+
+    /* advanced parameters - may not remain available after API update */
+    ZSTD_p_forceMaxWindow=1100, /* Force back-reference distances to remain < windowSize,
+                              * even when referencing into Dictionary content (default:0) */
+
+} ZSTD_cParameter;
+
+
+/*! ZSTD_CCtx_setParameter() :
+ *  Set one compression parameter, selected by enum ZSTD_cParameter.
+ *  Note : when `value` is an enum, cast it to unsigned for proper type checking.
+ *  @result : 0, or an error code (which can be tested with ZSTD_isError()). */
+ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value);
+
+/*! ZSTD_CCtx_setPledgedSrcSize() :
+ *  Total input data size to be compressed as a single frame.
+ *  This value will be controlled at the end, and result in error if not respected.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : 0 means zero, empty.
+ *           In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN.
+ *           Note that ZSTD_CONTENTSIZE_UNKNOWN is default value for new compression jobs.
+ *  Note 2 : If all data is provided and consumed in a single round,
+ *           this value is overriden by srcSize instead. */
+ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
+
+/*! ZSTD_CCtx_loadDictionary() :
+ *  Create an internal CDict from dict buffer.
+ *  Decompression will have to use same buffer.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
+ *            meaning "return to no-dictionary mode".
+ *  Note 1 : `dict` content will be copied internally,
+ *           except if ZSTD_p_refDictContent is set before loading.
+ *  Note 2 : Loading a dictionary involves building tables, which are dependent on compression parameters.
+ *           For this reason, compression parameters cannot be changed anymore after loading a dictionary.
+ *           It's also a CPU-heavy operation, with non-negligible impact on latency.
+ *  Note 3 : Dictionary will be used for all future compression jobs.
+ *           To return to "no-dictionary" situation, load a NULL dictionary */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_CCtx_refCDict() :
+ *  Reference a prepared dictionary, to be used for all next compression jobs.
+ *  Note that compression parameters are enforced from within CDict,
+ *  and supercede any compression parameter previously set within CCtx.
+ *  The dictionary will remain valid for future compression jobs using same CCtx.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special : adding a NULL CDict means "return to no-dictionary mode".
+ *  Note 1 : Currently, only one dictionary can be managed.
+ *           Adding a new dictionary effectively "discards" any previous one.
+ *  Note 2 : CDict is just referenced, its lifetime must outlive CCtx.
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
+
+/*! ZSTD_CCtx_refPrefix() :
+ *  Reference a prefix (single-usage dictionary) for next compression job.
+ *  Decompression need same prefix to properly regenerate data.
+ *  Prefix is **only used once**. Tables are discarded at end of compression job.
+ *  Subsequent compression jobs will be done without prefix (if none is explicitly referenced).
+ *  If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_CDict instead.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special : Adding any prefix (including NULL) invalidates any previous prefix or dictionary
+ *  Note 1 : Prefix buffer is referenced. It must outlive compression job.
+ *  Note 2 : Referencing a prefix involves building tables, which are dependent on compression parameters.
+ *           It's a CPU-heavy operation, with non-negligible impact on latency.
+ *  Note 3 : it's possible to alter ZSTD_p_dictMode using ZSTD_CCtx_setParameter() */
+ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize);
+
+
+
+typedef enum {
+    ZSTD_e_continue=0, /* collect more data, encoder transparently decides when to output result, for optimal conditions */
+    ZSTD_e_flush,      /* flush any data provided so far - frame will continue, future data can still reference previous data for better compression */
+    ZSTD_e_end         /* flush any remaining data and ends current frame. Any future compression starts a new frame. */
+} ZSTD_EndDirective;
+
+/*! ZSTD_compress_generic() :
+ *  Behave about the same as ZSTD_compressStream. To note :
+ *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_setParameter()
+ *  - Compression parameters cannot be changed once compression is started.
+ *  - *dstPos must be <= dstCapacity, *srcPos must be <= srcSize
+ *  - *dspPos and *srcPos will be updated. They are guaranteed to remain below their respective limit.
+ *  - @return provides the minimum amount of data still to flush from internal buffers
+ *            or an error code, which can be tested using ZSTD_isError().
+ *            if @return != 0, flush is not fully completed, there is some data left within internal buffers.
+ *  - after a ZSTD_e_end directive, if internal buffer is not fully flushed,
+ *            only ZSTD_e_end or ZSTD_e_flush operations are allowed.
+ *            It is necessary to fully flush internal buffers
+ *            before starting a new compression job, or changing compression parameters.
+ */
+ZSTDLIB_API size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
+                                          ZSTD_outBuffer* output,
+                                          ZSTD_inBuffer* input,
+                                          ZSTD_EndDirective endOp);
+
+/*! ZSTD_CCtx_reset() :
+ *  Return a CCtx to clean state.
+ *  Useful after an error, or to interrupt an ongoing compression job and start a new one.
+ *  Any internal data not yet flushed is cancelled.
+ *  Dictionary (if any) is dropped.
+ *  It's possible to modify compression parameters after a reset.
+ */
+ZSTDLIB_API void ZSTD_CCtx_reset(ZSTD_CCtx* cctx);   /* Not ready yet ! */
+
+
+/*! ZSTD_compress_generic_simpleArgs() :
+ *  Same as ZSTD_compress_generic(),
+ *  but using only integral types as arguments.
+ *  Argument list is larger and less expressive than ZSTD_{in,out}Buffer,
+ *  but can be helpful for binders from dynamic languages
+ *  which have troubles handling structures containing memory pointers.
+ */
+size_t ZSTD_compress_generic_simpleArgs (
+                            ZSTD_CCtx* cctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos,
+                            ZSTD_EndDirective endOp);
+
+
+
+/**
+    Block functions
+
+    Block functions produce and decode raw zstd blocks, without frame metadata.
     Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
     User will have to take in charge required information to regenerate data, such as compressed and content sizes.
 
@@ -569,32 +1104,33 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
     - Compressing and decompressing require a context structure
       + Use ZSTD_createCCtx() and ZSTD_createDCtx()
     - It is necessary to init context before starting
-      + compression : ZSTD_compressBegin()
-      + decompression : ZSTD_decompressBegin()
-      + variants _usingDict() are also allowed
-      + copyCCtx() and copyDCtx() work too
-    - Block size is limited, it must be <= ZSTD_getBlockSizeMax()
-      + If you need to compress more, cut data into multiple blocks
-      + Consider using the regular ZSTD_compress() instead, as frame metadata costs become negligible when source size is large.
+      + compression : any ZSTD_compressBegin*() variant, including with dictionary
+      + decompression : any ZSTD_decompressBegin*() variant, including with dictionary
+      + copyCCtx() and copyDCtx() can be used too
+    - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX
+      + If input is larger than a block size, it's necessary to split input data into multiple blocks
+      + For inputs larger than a single block size, consider using the regular ZSTD_compress() instead.
+        Frame metadata is not that costly, and quickly becomes negligible as source size grows larger.
     - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero.
       In which case, nothing is produced into `dst`.
       + User must test for such outcome and deal directly with uncompressed data
       + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!!
-      + In case of multiple successive blocks, decoder must be informed of uncompressed block existence to follow proper history.
-        Use ZSTD_insertBlock() in such a case.
+      + In case of multiple successive blocks, should some of them be uncompressed,
+        decoder must be informed of their existence in order to follow proper history.
+        Use ZSTD_insertBlock() for such a case.
 */
 
-#define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024)   /* define, for static allocation */
-ZSTDLIB_API size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx);
+#define ZSTD_BLOCKSIZELOG_MAX 17
+#define ZSTD_BLOCKSIZE_MAX   (1<<ZSTD_BLOCKSIZELOG_MAX)   /* define, for static allocation */
+/*=====   Raw zstd block functions  =====*/
+ZSTDLIB_API size_t ZSTD_getBlockSize   (const ZSTD_CCtx* cctx);
 ZSTDLIB_API size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /**< insert block into `dctx` history. Useful for uncompressed blocks */
 
 
-#endif   /* ZSTD_STATIC_LINKING_ONLY */
+#endif   /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
 
 #if defined (__cplusplus)
 }
 #endif
-
-#endif  /* ZSTD_H_235446 */
diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake
index 4c3c9c34107..e5a67e1c743 100644
--- a/dbms/cmake/version.cmake
+++ b/dbms/cmake/version.cmake
@@ -1,6 +1,6 @@
 # This strings autochanged from release_lib.sh:
-set(VERSION_DESCRIBE v1.1.54275-testing)
-set(VERSION_REVISION 54275)
+set(VERSION_DESCRIBE v1.1.54281-testing)
+set(VERSION_REVISION 54281)
 # end of autochange
 
 set (VERSION_MAJOR 1)
diff --git a/dbms/src/AggregateFunctions/AggregateFunctionAvg.h b/dbms/src/AggregateFunctions/AggregateFunctionAvg.h
index 9cdff36c4bc..a073b279583 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionAvg.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionAvg.h
@@ -23,7 +23,7 @@ struct AggregateFunctionAvgData
 
 /// Calculates arithmetic mean of numbers.
 template <typename T>
-class AggregateFunctionAvg final : public IUnaryAggregateFunction<AggregateFunctionAvgData<typename NearestFieldType<T>::Type>, AggregateFunctionAvg<T> >
+class AggregateFunctionAvg final : public IUnaryAggregateFunction<AggregateFunctionAvgData<typename NearestFieldType<T>::Type>, AggregateFunctionAvg<T>>
 {
 public:
     String getName() const override { return "avg"; }
diff --git a/dbms/src/AggregateFunctions/AggregateFunctionBitwise.cpp b/dbms/src/AggregateFunctions/AggregateFunctionBitwise.cpp
new file mode 100644
index 00000000000..da3a28f6015
--- /dev/null
+++ b/dbms/src/AggregateFunctions/AggregateFunctionBitwise.cpp
@@ -0,0 +1,39 @@
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <AggregateFunctions/AggregateFunctionBitwise.h>
+#include <AggregateFunctions/Helpers.h>
+
+namespace DB
+{
+
+namespace
+{
+
+template <template <typename> class Data>
+AggregateFunctionPtr createAggregateFunctionBitwise(const std::string & name, const DataTypes & argument_types, const Array & parameters)
+{
+    if (argument_types.size() != 1)
+        throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
+    AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunctionBitwise, Data>(*argument_types[0]));
+
+    if (!res)
+        throw Exception("Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+    return res;
+}
+
+}
+
+void registerAggregateFunctionsBitwise(AggregateFunctionFactory & factory)
+{
+    factory.registerFunction("groupBitOr", createAggregateFunctionBitwise<AggregateFunctionGroupBitOrData>);
+    factory.registerFunction("groupBitAnd", createAggregateFunctionBitwise<AggregateFunctionGroupBitAndData>);
+    factory.registerFunction("groupBitXor", createAggregateFunctionBitwise<AggregateFunctionGroupBitXorData>);
+
+    /// Aliases for compatibility with MySQL.
+    factory.registerFunction("BIT_OR", createAggregateFunctionBitwise<AggregateFunctionGroupBitOrData>, AggregateFunctionFactory::CaseInsensitive);
+    factory.registerFunction("BIT_AND", createAggregateFunctionBitwise<AggregateFunctionGroupBitAndData>, AggregateFunctionFactory::CaseInsensitive);
+    factory.registerFunction("BIT_XOR", createAggregateFunctionBitwise<AggregateFunctionGroupBitXorData>, AggregateFunctionFactory::CaseInsensitive);
+}
+
+}
diff --git a/dbms/src/AggregateFunctions/AggregateFunctionBitwise.h b/dbms/src/AggregateFunctions/AggregateFunctionBitwise.h
new file mode 100644
index 00000000000..281a7c122ea
--- /dev/null
+++ b/dbms/src/AggregateFunctions/AggregateFunctionBitwise.h
@@ -0,0 +1,87 @@
+#pragma once
+
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+
+#include <DataTypes/DataTypesNumber.h>
+#include <Columns/ColumnVector.h>
+
+#include <AggregateFunctions/IUnaryAggregateFunction.h>
+
+
+namespace DB
+{
+
+
+template <typename T>
+struct AggregateFunctionGroupBitOrData
+{
+    T value = 0;
+    static const char * name() { return "groupBitOr"; }
+    void update(T x) { value |= x; }
+};
+
+template <typename T>
+struct AggregateFunctionGroupBitAndData
+{
+    T value = -1; /// Two's complement arithmetic, sign extension.
+    static const char * name() { return "groupBitAnd"; }
+    void update(T x) { value &= x; }
+};
+
+template <typename T>
+struct AggregateFunctionGroupBitXorData
+{
+    T value = 0;
+    static const char * name() { return "groupBitXor"; }
+    void update(T x) { value ^= x; }
+};
+
+
+/// Counts bitwise operation on numbers.
+template <typename T, typename Data>
+class AggregateFunctionBitwise final : public IUnaryAggregateFunction<Data, AggregateFunctionBitwise<T, Data>>
+{
+public:
+    String getName() const override { return Data::name(); }
+
+    DataTypePtr getReturnType() const override
+    {
+        return std::make_shared<DataTypeNumber<T>>();
+    }
+
+    void setArgument(const DataTypePtr & argument)
+    {
+        if (!argument->behavesAsNumber())
+            throw Exception("Illegal type " + argument->getName() + " of argument for aggregate function " + getName(),
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+    }
+
+    void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena *) const
+    {
+        this->data(place).update(static_cast<const ColumnVector<T> &>(column).getData()[row_num]);
+    }
+
+    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    {
+        this->data(place).update(this->data(rhs).value);
+    }
+
+    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    {
+        writeBinary(this->data(place).value, buf);
+    }
+
+    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    {
+        readBinary(this->data(place).value, buf);
+    }
+
+    void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
+    {
+        static_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).value);
+    }
+};
+
+
+}
diff --git a/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp b/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp
index 19a01635065..6a541ff28b4 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp
+++ b/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp
@@ -1,13 +1,17 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <IO/WriteBuffer.h>
-#include <IO/WriteHelpers.h>
+
 #include <DataTypes/DataTypeAggregateFunction.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeNullable.h>
+#include <IO/WriteBuffer.h>
+#include <IO/WriteHelpers.h>
+#include <Interpreters/Context.h>
+
 #include <Common/StringUtils.h>
-#include <Poco/String.h>
 #include <Common/typeid_cast.h>
 
+#include <Poco/String.h>
+
 
 namespace DB
 {
diff --git a/dbms/src/AggregateFunctions/AggregateFunctionFactory.h b/dbms/src/AggregateFunctions/AggregateFunctionFactory.h
index be44a7a9e87..4e5423396bf 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionFactory.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionFactory.h
@@ -1,14 +1,22 @@
 #pragma once
 
-#include <unordered_map>
 #include <AggregateFunctions/IAggregateFunction.h>
+
 #include <ext/singleton.h>
 
+#include <functional>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
 
 namespace DB
 {
 
+class Context;
 class IDataType;
+
 using DataTypePtr = std::shared_ptr<IDataType>;
 using DataTypes = std::vector<DataTypePtr>;
 
@@ -19,22 +27,8 @@ class AggregateFunctionFactory final : public ext::singleton<AggregateFunctionFa
 {
     friend class StorageSystemFunctions;
 
-private:
-    /// No std::function, for smaller object size and less indirection.
-    using Creator = AggregateFunctionPtr(*)(const String & name, const DataTypes & argument_types, const Array & parameters);
-    using AggregateFunctions = std::unordered_map<String, Creator>;
-
 public:
-
-    AggregateFunctionPtr get(
-        const String & name,
-        const DataTypes & argument_types,
-        const Array & parameters = {},
-        int recursion_level = 0) const;
-
-    AggregateFunctionPtr tryGet(const String & name, const DataTypes & argument_types, const Array & parameters = {}) const;
-
-    bool isAggregateFunctionName(const String & name, int recursion_level = 0) const;
+    using Creator = std::function<AggregateFunctionPtr(const String &, const DataTypes &, const Array &)>;
 
     /// For compatibility with SQL, it's possible to specify that certain aggregate function name is case insensitive.
     enum CaseSensitiveness
@@ -43,11 +37,29 @@ public:
         CaseInsensitive
     };
 
-    /// Register an aggregate function by its name.
-    void registerFunction(const String & name, Creator creator, CaseSensitiveness case_sensitiveness = CaseSensitive);
+    /// Register a function by its name.
+    /// No locking, you must register all functions before usage of get.
+    void registerFunction(
+        const String & name,
+        Creator creator,
+        CaseSensitiveness case_sensitiveness = CaseSensitive);
+
+    /// Throws an exception if not found.
+    AggregateFunctionPtr get(
+        const String & name,
+        const DataTypes & argument_types,
+        const Array & parameters = {},
+        int recursion_level = 0) const;
+
+    /// Returns nullptr if not found.
+    AggregateFunctionPtr tryGet(
+        const String & name,
+        const DataTypes & argument_types,
+        const Array & parameters = {}) const;
+
+    bool isAggregateFunctionName(const String & name, int recursion_level = 0) const;
 
 private:
-
     AggregateFunctionPtr getImpl(
         const String & name,
         const DataTypes & argument_types,
@@ -55,6 +67,8 @@ private:
         int recursion_level) const;
 
 private:
+    using AggregateFunctions = std::unordered_map<String, Creator>;
+
     AggregateFunctions aggregate_functions;
 
     /// Case insensitive aggregate functions will be additionally added here with lowercased name.
diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
index 12151e4b771..6019cb3bf06 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
+++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
@@ -24,9 +24,9 @@ inline AggregateFunctionPtr createAggregateFunctionGroupArrayImpl(const DataType
         return AggregateFunctionPtr(res);
 
     if (typeid_cast<const DataTypeString *>(argument_type.get()))
-        return std::make_shared<GroupArrayGeneralListImpl<NodeString, has_limit::value>>(std::forward<TArgs>(args)...);
+        return std::make_shared<GroupArrayGeneralListImpl<GroupArrayListNodeString, has_limit::value>>(std::forward<TArgs>(args)...);
 
-    return std::make_shared<GroupArrayGeneralListImpl<NodeGeneral, has_limit::value>>(std::forward<TArgs>(args)...);
+    return std::make_shared<GroupArrayGeneralListImpl<GroupArrayListNodeGeneral, has_limit::value>>(std::forward<TArgs>(args)...);
 };
 
 
diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h
index b7db1f393e5..c766d8c02d6 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h
@@ -31,9 +31,6 @@ namespace ErrorCodes
 }
 
 
-namespace
-{
-
 /// A particular case is an implementation for numeric types.
 template <typename T>
 struct GroupArrayNumericData
@@ -146,12 +143,10 @@ public:
 /// General case
 
 
-/// Nodes used to implement linked list for stoarge of groupArray states
-struct NodeString;
-struct NodeGeneral;
+/// Nodes used to implement a linked list for storage of groupArray states
 
 template <typename Node>
-struct NodeBase
+struct GroupArrayListNodeBase
 {
     Node * next;
     UInt64 size; // size of payload
@@ -159,7 +154,7 @@ struct NodeBase
     /// Returns pointer to actual payload
     char * data()
     {
-        static_assert(sizeof(NodeBase) == sizeof(Node));
+        static_assert(sizeof(GroupArrayListNodeBase) == sizeof(Node));
         return reinterpret_cast<char *>(this) + sizeof(Node);
     }
 
@@ -189,9 +184,9 @@ struct NodeBase
     }
 };
 
-struct NodeString : public NodeBase<NodeString>
+struct GroupArrayListNodeString : public GroupArrayListNodeBase<GroupArrayListNodeString>
 {
-    using Node = NodeString;
+    using Node = GroupArrayListNodeString;
 
     /// Create node from string
     static Node * allocate(const IColumn & column, size_t row_num, Arena * arena)
@@ -212,9 +207,9 @@ struct NodeString : public NodeBase<NodeString>
     }
 };
 
-struct NodeGeneral : public NodeBase<NodeGeneral>
+struct GroupArrayListNodeGeneral : public GroupArrayListNodeBase<GroupArrayListNodeGeneral>
 {
-    using Node = NodeGeneral;
+    using Node = GroupArrayListNodeGeneral;
 
     static Node * allocate(const IColumn & column, size_t row_num, Arena * arena)
     {
@@ -267,7 +262,7 @@ public:
     void setParameters(const Array & params) override
     {
         if (!limit_num_elems && !params.empty())
-            throw Exception("This instatintion of " + getName() + "aggregate function doesn't accept any parameters. It is a bug.", ErrorCodes::LOGICAL_ERROR);
+            throw Exception("This instantiation of " + getName() + "aggregate function doesn't accept any parameters. It is a bug.", ErrorCodes::LOGICAL_ERROR);
     }
 
     void setArgument(const DataTypePtr & argument)
@@ -395,7 +390,7 @@ public:
 
         auto & column_data = column_array.getData();
 
-        if (std::is_same<Node, NodeString>::value)
+        if (std::is_same<Node, GroupArrayListNodeString>::value)
         {
             auto & string_offsets = static_cast<ColumnString &>(column_data).getOffsets();
             string_offsets.reserve(string_offsets.size() + data(place).elems);
@@ -415,9 +410,6 @@ public:
     }
 };
 
-}
-
-
 #undef AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ARRAY_SIZE
 
 }
diff --git a/dbms/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/dbms/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
index c27c8dfd568..42b2760e1f7 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
@@ -680,7 +680,7 @@ struct AggregateFunctionAnyHeavyData : Data
 
 
 template <typename Data>
-class AggregateFunctionsSingleValue final : public IUnaryAggregateFunction<Data, AggregateFunctionsSingleValue<Data> >
+class AggregateFunctionsSingleValue final : public IUnaryAggregateFunction<Data, AggregateFunctionsSingleValue<Data>>
 {
 private:
     DataTypePtr type;
diff --git a/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h b/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h
index eb7ae69aca0..32f4650a55b 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h
@@ -34,7 +34,7 @@ struct AggregateFunctionQuantileData
   */
 template <typename ArgumentFieldType, bool returns_float = true>
 class AggregateFunctionQuantile final
-    : public IUnaryAggregateFunction<AggregateFunctionQuantileData<ArgumentFieldType>, AggregateFunctionQuantile<ArgumentFieldType, returns_float> >
+    : public IUnaryAggregateFunction<AggregateFunctionQuantileData<ArgumentFieldType>, AggregateFunctionQuantile<ArgumentFieldType, returns_float>>
 {
 private:
     using Sample = typename AggregateFunctionQuantileData<ArgumentFieldType>::Sample;
@@ -108,7 +108,7 @@ public:
   */
 template <typename ArgumentFieldType, bool returns_float = true>
 class AggregateFunctionQuantiles final
-    : public IUnaryAggregateFunction<AggregateFunctionQuantileData<ArgumentFieldType>, AggregateFunctionQuantiles<ArgumentFieldType, returns_float> >
+    : public IUnaryAggregateFunction<AggregateFunctionQuantileData<ArgumentFieldType>, AggregateFunctionQuantiles<ArgumentFieldType, returns_float>>
 {
 private:
     using Sample = typename AggregateFunctionQuantileData<ArgumentFieldType>::Sample;
diff --git a/dbms/src/AggregateFunctions/AggregateFunctionQuantileTiming.h b/dbms/src/AggregateFunctions/AggregateFunctionQuantileTiming.h
index 9f4bb9eeda2..d07c3a7fcca 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionQuantileTiming.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionQuantileTiming.h
@@ -790,7 +790,7 @@ public:
 
 
 template <typename ArgumentFieldType>
-class AggregateFunctionQuantileTiming final : public IUnaryAggregateFunction<QuantileTiming, AggregateFunctionQuantileTiming<ArgumentFieldType> >
+class AggregateFunctionQuantileTiming final : public IUnaryAggregateFunction<QuantileTiming, AggregateFunctionQuantileTiming<ArgumentFieldType>>
 {
 private:
     double level;
@@ -910,7 +910,7 @@ public:
   * Returns an array of results.
   */
 template <typename ArgumentFieldType>
-class AggregateFunctionQuantilesTiming final : public IUnaryAggregateFunction<QuantileTiming, AggregateFunctionQuantilesTiming<ArgumentFieldType> >
+class AggregateFunctionQuantilesTiming final : public IUnaryAggregateFunction<QuantileTiming, AggregateFunctionQuantilesTiming<ArgumentFieldType>>
 {
 private:
     QuantileLevels<double> levels;
diff --git a/dbms/src/AggregateFunctions/AggregateFunctionStatistics.h b/dbms/src/AggregateFunctions/AggregateFunctionStatistics.h
index 96cfdcda3b8..4ab999dd904 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionStatistics.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionStatistics.h
@@ -112,7 +112,7 @@ private:
 template<typename T, typename Op>
 class AggregateFunctionVariance final
     : public IUnaryAggregateFunction<AggregateFunctionVarianceData<T, Op>,
-        AggregateFunctionVariance<T, Op> >
+        AggregateFunctionVariance<T, Op>>
 {
 public:
     String getName() const override { return Op::name; }
@@ -155,12 +155,9 @@ public:
     }
 };
 
-namespace
-{
-
 /** Implementing the varSamp function.
   */
-struct VarSampImpl
+struct AggregateFunctionVarSampImpl
 {
     static constexpr auto name = "varSamp";
 
@@ -175,19 +172,19 @@ struct VarSampImpl
 
 /** Implementing the stddevSamp function.
   */
-struct StdDevSampImpl
+struct AggregateFunctionStdDevSampImpl
 {
     static constexpr auto name = "stddevSamp";
 
     static inline Float64 apply(Float64 m2, UInt64 count)
     {
-        return sqrt(VarSampImpl::apply(m2, count));
+        return sqrt(AggregateFunctionVarSampImpl::apply(m2, count));
     }
 };
 
 /** Implementing the varPop function.
   */
-struct VarPopImpl
+struct AggregateFunctionVarPopImpl
 {
     static constexpr auto name = "varPop";
 
@@ -204,18 +201,16 @@ struct VarPopImpl
 
 /** Implementing the stddevPop function.
   */
-struct StdDevPopImpl
+struct AggregateFunctionStdDevPopImpl
 {
     static constexpr auto name = "stddevPop";
 
     static inline Float64 apply(Float64 m2, UInt64 count)
     {
-        return sqrt(VarPopImpl::apply(m2, count));
+        return sqrt(AggregateFunctionVarPopImpl::apply(m2, count));
     }
 };
 
-}
-
 /** If `compute_marginal_moments` flag is set this class provides the successor
   * CovarianceData support of marginal moments for calculating the correlation.
   */
@@ -376,7 +371,7 @@ template<typename T, typename U, typename Op, bool compute_marginal_moments = fa
 class AggregateFunctionCovariance final
     : public IBinaryAggregateFunction<
         CovarianceData<T, U, Op, compute_marginal_moments>,
-        AggregateFunctionCovariance<T, U, Op, compute_marginal_moments> >
+        AggregateFunctionCovariance<T, U, Op, compute_marginal_moments>>
 {
 public:
     String getName() const override { return Op::name; }
@@ -423,12 +418,9 @@ public:
     }
 };
 
-namespace
-{
-
 /** Implementing the covarSamp function.
   */
-struct CovarSampImpl
+struct AggregateFunctionCovarSampImpl
 {
     static constexpr auto name = "covarSamp";
 
@@ -443,7 +435,7 @@ struct CovarSampImpl
 
 /** Implementing the covarPop function.
   */
-struct CovarPopImpl
+struct AggregateFunctionCovarPopImpl
 {
     static constexpr auto name = "covarPop";
 
@@ -460,7 +452,7 @@ struct CovarPopImpl
 
 /** `corr` function implementation.
   */
-struct CorrImpl
+struct AggregateFunctionCorrImpl
 {
     static constexpr auto name = "corr";
 
@@ -473,27 +465,25 @@ struct CorrImpl
     }
 };
 
-}
+template<typename T>
+using AggregateFunctionVarSamp = AggregateFunctionVariance<T, AggregateFunctionVarSampImpl>;
 
 template<typename T>
-using AggregateFunctionVarSamp = AggregateFunctionVariance<T, VarSampImpl>;
+using AggregateFunctionStdDevSamp = AggregateFunctionVariance<T, AggregateFunctionStdDevSampImpl>;
 
 template<typename T>
-using AggregateFunctionStdDevSamp = AggregateFunctionVariance<T, StdDevSampImpl>;
+using AggregateFunctionVarPop = AggregateFunctionVariance<T, AggregateFunctionVarPopImpl>;
 
 template<typename T>
-using AggregateFunctionVarPop = AggregateFunctionVariance<T, VarPopImpl>;
-
-template<typename T>
-using AggregateFunctionStdDevPop = AggregateFunctionVariance<T, StdDevPopImpl>;
+using AggregateFunctionStdDevPop = AggregateFunctionVariance<T, AggregateFunctionStdDevPopImpl>;
 
 template<typename T, typename U>
-using AggregateFunctionCovarSamp = AggregateFunctionCovariance<T, U, CovarSampImpl>;
+using AggregateFunctionCovarSamp = AggregateFunctionCovariance<T, U, AggregateFunctionCovarSampImpl>;
 
 template<typename T, typename U>
-using AggregateFunctionCovarPop = AggregateFunctionCovariance<T, U, CovarPopImpl>;
+using AggregateFunctionCovarPop = AggregateFunctionCovariance<T, U, AggregateFunctionCovarPopImpl>;
 
 template<typename T, typename U>
-using AggregateFunctionCorr = AggregateFunctionCovariance<T, U, CorrImpl, true>;
+using AggregateFunctionCorr = AggregateFunctionCovariance<T, U, AggregateFunctionCorrImpl, true>;
 
 }
diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSum.h b/dbms/src/AggregateFunctions/AggregateFunctionSum.h
index 10bbc7d8d8b..4bb6a711c4f 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionSum.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionSum.h
@@ -15,15 +15,13 @@ namespace DB
 template <typename T>
 struct AggregateFunctionSumData
 {
-    T sum;
-
-    AggregateFunctionSumData() : sum(0) {}
+    T sum{};
 };
 
 
 /// Counts the sum of the numbers.
 template <typename T>
-class AggregateFunctionSum final : public IUnaryAggregateFunction<AggregateFunctionSumData<typename NearestFieldType<T>::Type>, AggregateFunctionSum<T> >
+class AggregateFunctionSum final : public IUnaryAggregateFunction<AggregateFunctionSumData<typename NearestFieldType<T>::Type>, AggregateFunctionSum<T>>
 {
 public:
     String getName() const override { return "sum"; }
diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniq.h b/dbms/src/AggregateFunctions/AggregateFunctionUniq.h
index b5b954b62a9..9c36b12a2cc 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionUniq.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionUniq.h
@@ -122,7 +122,7 @@ struct BaseUniqCombinedData
     using Key = UInt32;
     using Set = CombinedCardinalityEstimator<
         Key,
-        HashSet<Key, TrivialHash, HashTableGrower<> >,
+        HashSet<Key, TrivialHash, HashTableGrower<>>,
         16,
         14,
         17,
@@ -141,7 +141,7 @@ struct BaseUniqCombinedData<String, mode>
     using Key = UInt64;
     using Set = CombinedCardinalityEstimator<
         Key,
-        HashSet<Key, TrivialHash, HashTableGrower<> >,
+        HashSet<Key, TrivialHash, HashTableGrower<>>,
         16,
         14,
         17,
@@ -252,7 +252,7 @@ struct OneAdder;
 template <typename T, typename Data>
 struct OneAdder<T, Data, typename std::enable_if<
     std::is_same<Data, AggregateFunctionUniqUniquesHashSetData>::value ||
-    std::is_same<Data, AggregateFunctionUniqHLL12Data<T> >::value>::type>
+    std::is_same<Data, AggregateFunctionUniqHLL12Data<T>>::value>::type>
 {
     template <typename T2 = T>
     static void addImpl(Data & data, const IColumn & column, size_t row_num,
@@ -273,10 +273,10 @@ struct OneAdder<T, Data, typename std::enable_if<
 
 template <typename T, typename Data>
 struct OneAdder<T, Data, typename std::enable_if<
-    std::is_same<Data, AggregateFunctionUniqCombinedRawData<T> >::value ||
-    std::is_same<Data, AggregateFunctionUniqCombinedLinearCountingData<T> >::value ||
-    std::is_same<Data, AggregateFunctionUniqCombinedBiasCorrectedData<T> >::value ||
-    std::is_same<Data, AggregateFunctionUniqCombinedData<T> >::value>::type>
+    std::is_same<Data, AggregateFunctionUniqCombinedRawData<T>>::value ||
+    std::is_same<Data, AggregateFunctionUniqCombinedLinearCountingData<T>>::value ||
+    std::is_same<Data, AggregateFunctionUniqCombinedBiasCorrectedData<T>>::value ||
+    std::is_same<Data, AggregateFunctionUniqCombinedData<T>>::value>::type>
 {
     template <typename T2 = T>
     static void addImpl(Data & data, const IColumn & column, size_t row_num,
@@ -297,7 +297,7 @@ struct OneAdder<T, Data, typename std::enable_if<
 
 template <typename T, typename Data>
 struct OneAdder<T, Data, typename std::enable_if<
-    std::is_same<Data, AggregateFunctionUniqExactData<T> >::value>::type>
+    std::is_same<Data, AggregateFunctionUniqExactData<T>>::value>::type>
 {
     template <typename T2 = T>
     static void addImpl(Data & data, const IColumn & column, size_t row_num,
@@ -326,7 +326,7 @@ struct OneAdder<T, Data, typename std::enable_if<
 
 /// Calculates the number of different values approximately or exactly.
 template <typename T, typename Data>
-class AggregateFunctionUniq final : public IUnaryAggregateFunction<Data, AggregateFunctionUniq<T, Data> >
+class AggregateFunctionUniq final : public IUnaryAggregateFunction<Data, AggregateFunctionUniq<T, Data>>
 {
 public:
     String getName() const override { return Data::getName(); }
diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqUpTo.h b/dbms/src/AggregateFunctions/AggregateFunctionUniqUpTo.h
index d289bbcdc1b..3c40ad98c51 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionUniqUpTo.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqUpTo.h
@@ -116,7 +116,7 @@ struct AggregateFunctionUniqUpToData<String> : AggregateFunctionUniqUpToData<UIn
 constexpr UInt8 uniq_upto_max_threshold = 100;
 
 template <typename T>
-class AggregateFunctionUniqUpTo final : public IUnaryAggregateFunction<AggregateFunctionUniqUpToData<T>, AggregateFunctionUniqUpTo<T> >
+class AggregateFunctionUniqUpTo final : public IUnaryAggregateFunction<AggregateFunctionUniqUpToData<T>, AggregateFunctionUniqUpTo<T>>
 {
 private:
     UInt8 threshold = 5;    /// Default value if the parameter is not specified.
diff --git a/dbms/src/AggregateFunctions/Helpers.h b/dbms/src/AggregateFunctions/Helpers.h
index d57218888da..a07fa630032 100644
--- a/dbms/src/AggregateFunctions/Helpers.h
+++ b/dbms/src/AggregateFunctions/Helpers.h
@@ -18,18 +18,18 @@ namespace DB
 template <template <typename> class AggregateFunctionTemplate>
 static IAggregateFunction * createWithNumericType(const IDataType & argument_type)
 {
-         if (typeid_cast<const DataTypeUInt8    *>(&argument_type)) return new AggregateFunctionTemplate<UInt8>;
-    else if (typeid_cast<const DataTypeUInt16   *>(&argument_type)) return new AggregateFunctionTemplate<UInt16>;
-    else if (typeid_cast<const DataTypeUInt32   *>(&argument_type)) return new AggregateFunctionTemplate<UInt32>;
-    else if (typeid_cast<const DataTypeUInt64   *>(&argument_type)) return new AggregateFunctionTemplate<UInt64>;
-    else if (typeid_cast<const DataTypeInt8     *>(&argument_type)) return new AggregateFunctionTemplate<Int8>;
-    else if (typeid_cast<const DataTypeInt16    *>(&argument_type)) return new AggregateFunctionTemplate<Int16>;
-    else if (typeid_cast<const DataTypeInt32    *>(&argument_type)) return new AggregateFunctionTemplate<Int32>;
-    else if (typeid_cast<const DataTypeInt64    *>(&argument_type)) return new AggregateFunctionTemplate<Int64>;
-    else if (typeid_cast<const DataTypeFloat32  *>(&argument_type)) return new AggregateFunctionTemplate<Float32>;
-    else if (typeid_cast<const DataTypeFloat64  *>(&argument_type)) return new AggregateFunctionTemplate<Float64>;
-    else if (typeid_cast<const DataTypeEnum8    *>(&argument_type)) return new AggregateFunctionTemplate<UInt8>;
-    else if (typeid_cast<const DataTypeEnum16   *>(&argument_type)) return new AggregateFunctionTemplate<UInt16>;
+         if (typeid_cast<const DataTypeUInt8 *>(&argument_type)) return new AggregateFunctionTemplate<UInt8>;
+    else if (typeid_cast<const DataTypeUInt16 *>(&argument_type)) return new AggregateFunctionTemplate<UInt16>;
+    else if (typeid_cast<const DataTypeUInt32 *>(&argument_type)) return new AggregateFunctionTemplate<UInt32>;
+    else if (typeid_cast<const DataTypeUInt64 *>(&argument_type)) return new AggregateFunctionTemplate<UInt64>;
+    else if (typeid_cast<const DataTypeInt8 *>(&argument_type)) return new AggregateFunctionTemplate<Int8>;
+    else if (typeid_cast<const DataTypeInt16 *>(&argument_type)) return new AggregateFunctionTemplate<Int16>;
+    else if (typeid_cast<const DataTypeInt32 *>(&argument_type)) return new AggregateFunctionTemplate<Int32>;
+    else if (typeid_cast<const DataTypeInt64 *>(&argument_type)) return new AggregateFunctionTemplate<Int64>;
+    else if (typeid_cast<const DataTypeFloat32 *>(&argument_type)) return new AggregateFunctionTemplate<Float32>;
+    else if (typeid_cast<const DataTypeFloat64 *>(&argument_type)) return new AggregateFunctionTemplate<Float64>;
+    else if (typeid_cast<const DataTypeEnum8 *>(&argument_type)) return new AggregateFunctionTemplate<UInt8>;
+    else if (typeid_cast<const DataTypeEnum16 *>(&argument_type)) return new AggregateFunctionTemplate<UInt16>;
     else
         return nullptr;
 }
@@ -37,18 +37,18 @@ static IAggregateFunction * createWithNumericType(const IDataType & argument_typ
 template <template <typename, typename> class AggregateFunctionTemplate, class Data>
 static IAggregateFunction * createWithNumericType(const IDataType & argument_type)
 {
-         if (typeid_cast<const DataTypeUInt8    *>(&argument_type)) return new AggregateFunctionTemplate<UInt8, Data>;
-    else if (typeid_cast<const DataTypeUInt16   *>(&argument_type)) return new AggregateFunctionTemplate<UInt16, Data>;
-    else if (typeid_cast<const DataTypeUInt32   *>(&argument_type)) return new AggregateFunctionTemplate<UInt32, Data>;
-    else if (typeid_cast<const DataTypeUInt64   *>(&argument_type)) return new AggregateFunctionTemplate<UInt64, Data>;
-    else if (typeid_cast<const DataTypeInt8     *>(&argument_type)) return new AggregateFunctionTemplate<Int8, Data>;
-    else if (typeid_cast<const DataTypeInt16    *>(&argument_type)) return new AggregateFunctionTemplate<Int16, Data>;
-    else if (typeid_cast<const DataTypeInt32    *>(&argument_type)) return new AggregateFunctionTemplate<Int32, Data>;
-    else if (typeid_cast<const DataTypeInt64    *>(&argument_type)) return new AggregateFunctionTemplate<Int64, Data>;
-    else if (typeid_cast<const DataTypeFloat32  *>(&argument_type)) return new AggregateFunctionTemplate<Float32, Data>;
-    else if (typeid_cast<const DataTypeFloat64  *>(&argument_type)) return new AggregateFunctionTemplate<Float64, Data>;
-    else if (typeid_cast<const DataTypeEnum8    *>(&argument_type)) return new AggregateFunctionTemplate<UInt8, Data>;
-    else if (typeid_cast<const DataTypeEnum16   *>(&argument_type)) return new AggregateFunctionTemplate<UInt16, Data>;
+         if (typeid_cast<const DataTypeUInt8 *>(&argument_type)) return new AggregateFunctionTemplate<UInt8, Data>;
+    else if (typeid_cast<const DataTypeUInt16 *>(&argument_type)) return new AggregateFunctionTemplate<UInt16, Data>;
+    else if (typeid_cast<const DataTypeUInt32 *>(&argument_type)) return new AggregateFunctionTemplate<UInt32, Data>;
+    else if (typeid_cast<const DataTypeUInt64 *>(&argument_type)) return new AggregateFunctionTemplate<UInt64, Data>;
+    else if (typeid_cast<const DataTypeInt8 *>(&argument_type)) return new AggregateFunctionTemplate<Int8, Data>;
+    else if (typeid_cast<const DataTypeInt16 *>(&argument_type)) return new AggregateFunctionTemplate<Int16, Data>;
+    else if (typeid_cast<const DataTypeInt32 *>(&argument_type)) return new AggregateFunctionTemplate<Int32, Data>;
+    else if (typeid_cast<const DataTypeInt64 *>(&argument_type)) return new AggregateFunctionTemplate<Int64, Data>;
+    else if (typeid_cast<const DataTypeFloat32 *>(&argument_type)) return new AggregateFunctionTemplate<Float32, Data>;
+    else if (typeid_cast<const DataTypeFloat64 *>(&argument_type)) return new AggregateFunctionTemplate<Float64, Data>;
+    else if (typeid_cast<const DataTypeEnum8 *>(&argument_type)) return new AggregateFunctionTemplate<UInt8, Data>;
+    else if (typeid_cast<const DataTypeEnum16 *>(&argument_type)) return new AggregateFunctionTemplate<UInt16, Data>;
     else
         return nullptr;
 }
@@ -56,18 +56,18 @@ static IAggregateFunction * createWithNumericType(const IDataType & argument_typ
 template <template <typename, typename> class AggregateFunctionTemplate, class Data, typename ... TArgs>
 static IAggregateFunction * createWithNumericType(const IDataType & argument_type, TArgs && ... args)
 {
-         if (typeid_cast<const DataTypeUInt8    *>(&argument_type)) return new AggregateFunctionTemplate<UInt8, Data>(std::forward<TArgs>(args)...);
-    else if (typeid_cast<const DataTypeUInt16   *>(&argument_type)) return new AggregateFunctionTemplate<UInt16, Data>(std::forward<TArgs>(args)...);
-    else if (typeid_cast<const DataTypeUInt32   *>(&argument_type)) return new AggregateFunctionTemplate<UInt32, Data>(std::forward<TArgs>(args)...);
-    else if (typeid_cast<const DataTypeUInt64   *>(&argument_type)) return new AggregateFunctionTemplate<UInt64, Data>(std::forward<TArgs>(args)...);
-    else if (typeid_cast<const DataTypeInt8     *>(&argument_type)) return new AggregateFunctionTemplate<Int8, Data>(std::forward<TArgs>(args)...);
-    else if (typeid_cast<const DataTypeInt16    *>(&argument_type)) return new AggregateFunctionTemplate<Int16, Data>(std::forward<TArgs>(args)...);
-    else if (typeid_cast<const DataTypeInt32    *>(&argument_type)) return new AggregateFunctionTemplate<Int32, Data>(std::forward<TArgs>(args)...);
-    else if (typeid_cast<const DataTypeInt64    *>(&argument_type)) return new AggregateFunctionTemplate<Int64, Data>(std::forward<TArgs>(args)...);
-    else if (typeid_cast<const DataTypeFloat32  *>(&argument_type)) return new AggregateFunctionTemplate<Float32, Data>(std::forward<TArgs>(args)...);
-    else if (typeid_cast<const DataTypeFloat64  *>(&argument_type)) return new AggregateFunctionTemplate<Float64, Data>(std::forward<TArgs>(args)...);
-    else if (typeid_cast<const DataTypeEnum8    *>(&argument_type)) return new AggregateFunctionTemplate<UInt8, Data>(std::forward<TArgs>(args)...);
-    else if (typeid_cast<const DataTypeEnum16   *>(&argument_type)) return new AggregateFunctionTemplate<UInt16, Data>(std::forward<TArgs>(args)...);
+         if (typeid_cast<const DataTypeUInt8 *>(&argument_type)) return new AggregateFunctionTemplate<UInt8, Data>(std::forward<TArgs>(args)...);
+    else if (typeid_cast<const DataTypeUInt16 *>(&argument_type)) return new AggregateFunctionTemplate<UInt16, Data>(std::forward<TArgs>(args)...);
+    else if (typeid_cast<const DataTypeUInt32 *>(&argument_type)) return new AggregateFunctionTemplate<UInt32, Data>(std::forward<TArgs>(args)...);
+    else if (typeid_cast<const DataTypeUInt64 *>(&argument_type)) return new AggregateFunctionTemplate<UInt64, Data>(std::forward<TArgs>(args)...);
+    else if (typeid_cast<const DataTypeInt8 *>(&argument_type)) return new AggregateFunctionTemplate<Int8, Data>(std::forward<TArgs>(args)...);
+    else if (typeid_cast<const DataTypeInt16 *>(&argument_type)) return new AggregateFunctionTemplate<Int16, Data>(std::forward<TArgs>(args)...);
+    else if (typeid_cast<const DataTypeInt32 *>(&argument_type)) return new AggregateFunctionTemplate<Int32, Data>(std::forward<TArgs>(args)...);
+    else if (typeid_cast<const DataTypeInt64 *>(&argument_type)) return new AggregateFunctionTemplate<Int64, Data>(std::forward<TArgs>(args)...);
+    else if (typeid_cast<const DataTypeFloat32 *>(&argument_type)) return new AggregateFunctionTemplate<Float32, Data>(std::forward<TArgs>(args)...);
+    else if (typeid_cast<const DataTypeFloat64 *>(&argument_type)) return new AggregateFunctionTemplate<Float64, Data>(std::forward<TArgs>(args)...);
+    else if (typeid_cast<const DataTypeEnum8 *>(&argument_type)) return new AggregateFunctionTemplate<UInt8, Data>(std::forward<TArgs>(args)...);
+    else if (typeid_cast<const DataTypeEnum16 *>(&argument_type)) return new AggregateFunctionTemplate<UInt16, Data>(std::forward<TArgs>(args)...);
     else
         return nullptr;
 }
@@ -75,18 +75,30 @@ static IAggregateFunction * createWithNumericType(const IDataType & argument_typ
 template <template <typename, typename> class AggregateFunctionTemplate, template <typename> class Data>
 static IAggregateFunction * createWithNumericType(const IDataType & argument_type)
 {
-         if (typeid_cast<const DataTypeUInt8     *>(&argument_type))    return new AggregateFunctionTemplate<UInt8, Data<UInt8> >;
-    else if (typeid_cast<const DataTypeUInt16     *>(&argument_type))    return new AggregateFunctionTemplate<UInt16, Data<UInt16> >;
-    else if (typeid_cast<const DataTypeUInt32     *>(&argument_type))    return new AggregateFunctionTemplate<UInt32, Data<UInt32> >;
-    else if (typeid_cast<const DataTypeUInt64     *>(&argument_type))    return new AggregateFunctionTemplate<UInt64, Data<UInt64> >;
-    else if (typeid_cast<const DataTypeInt8     *>(&argument_type))    return new AggregateFunctionTemplate<Int8, Data<Int8> >;
-    else if (typeid_cast<const DataTypeInt16     *>(&argument_type))    return new AggregateFunctionTemplate<Int16, Data<Int16> >;
-    else if (typeid_cast<const DataTypeInt32     *>(&argument_type))    return new AggregateFunctionTemplate<Int32, Data<Int32> >;
-    else if (typeid_cast<const DataTypeInt64     *>(&argument_type))    return new AggregateFunctionTemplate<Int64, Data<Int64> >;
-    else if (typeid_cast<const DataTypeFloat32 *>(&argument_type))    return new AggregateFunctionTemplate<Float32, Data<Float32> >;
-    else if (typeid_cast<const DataTypeFloat64 *>(&argument_type))    return new AggregateFunctionTemplate<Float64, Data<Float64> >;
-    else if (typeid_cast<const DataTypeEnum8     *>(&argument_type))    return new AggregateFunctionTemplate<UInt8, Data<UInt8> >;
-    else if (typeid_cast<const DataTypeEnum16     *>(&argument_type))    return new AggregateFunctionTemplate<UInt16, Data<UInt16> >;
+         if (typeid_cast<const DataTypeUInt8 *>(&argument_type)) return new AggregateFunctionTemplate<UInt8, Data<UInt8>>;
+    else if (typeid_cast<const DataTypeUInt16 *>(&argument_type)) return new AggregateFunctionTemplate<UInt16, Data<UInt16>>;
+    else if (typeid_cast<const DataTypeUInt32 *>(&argument_type)) return new AggregateFunctionTemplate<UInt32, Data<UInt32>>;
+    else if (typeid_cast<const DataTypeUInt64 *>(&argument_type)) return new AggregateFunctionTemplate<UInt64, Data<UInt64>>;
+    else if (typeid_cast<const DataTypeInt8 *>(&argument_type)) return new AggregateFunctionTemplate<Int8, Data<Int8>>;
+    else if (typeid_cast<const DataTypeInt16 *>(&argument_type)) return new AggregateFunctionTemplate<Int16, Data<Int16>>;
+    else if (typeid_cast<const DataTypeInt32 *>(&argument_type)) return new AggregateFunctionTemplate<Int32, Data<Int32>>;
+    else if (typeid_cast<const DataTypeInt64 *>(&argument_type)) return new AggregateFunctionTemplate<Int64, Data<Int64>>;
+    else if (typeid_cast<const DataTypeFloat32 *>(&argument_type)) return new AggregateFunctionTemplate<Float32, Data<Float32>>;
+    else if (typeid_cast<const DataTypeFloat64 *>(&argument_type)) return new AggregateFunctionTemplate<Float64, Data<Float64>>;
+    else if (typeid_cast<const DataTypeEnum8 *>(&argument_type)) return new AggregateFunctionTemplate<UInt8, Data<UInt8>>;
+    else if (typeid_cast<const DataTypeEnum16 *>(&argument_type)) return new AggregateFunctionTemplate<UInt16, Data<UInt16>>;
+    else
+        return nullptr;
+}
+
+
+template <template <typename, typename> class AggregateFunctionTemplate, template <typename> class Data>
+static IAggregateFunction * createWithUnsignedIntegerType(const IDataType & argument_type)
+{
+         if (typeid_cast<const DataTypeUInt8 *>(&argument_type)) return new AggregateFunctionTemplate<UInt8, Data<UInt8>>;
+    else if (typeid_cast<const DataTypeUInt16 *>(&argument_type)) return new AggregateFunctionTemplate<UInt16, Data<UInt16>>;
+    else if (typeid_cast<const DataTypeUInt32 *>(&argument_type)) return new AggregateFunctionTemplate<UInt32, Data<UInt32>>;
+    else if (typeid_cast<const DataTypeUInt64 *>(&argument_type)) return new AggregateFunctionTemplate<UInt64, Data<UInt64>>;
     else
         return nullptr;
 }
@@ -97,18 +109,18 @@ static IAggregateFunction * createWithNumericType(const IDataType & argument_typ
 template <typename FirstType, template <typename, typename> class AggregateFunctionTemplate>
 static IAggregateFunction * createWithTwoNumericTypesSecond(const IDataType & second_type)
 {
-         if (typeid_cast<const DataTypeUInt8     *>(&second_type))    return new AggregateFunctionTemplate<FirstType, UInt8>;
-    else if (typeid_cast<const DataTypeUInt16     *>(&second_type))    return new AggregateFunctionTemplate<FirstType, UInt16>;
-    else if (typeid_cast<const DataTypeUInt32     *>(&second_type))    return new AggregateFunctionTemplate<FirstType, UInt32>;
-    else if (typeid_cast<const DataTypeUInt64     *>(&second_type))    return new AggregateFunctionTemplate<FirstType, UInt64>;
-    else if (typeid_cast<const DataTypeInt8     *>(&second_type))    return new AggregateFunctionTemplate<FirstType, Int8>;
-    else if (typeid_cast<const DataTypeInt16     *>(&second_type))    return new AggregateFunctionTemplate<FirstType, Int16>;
-    else if (typeid_cast<const DataTypeInt32     *>(&second_type))    return new AggregateFunctionTemplate<FirstType, Int32>;
-    else if (typeid_cast<const DataTypeInt64     *>(&second_type))    return new AggregateFunctionTemplate<FirstType, Int64>;
-    else if (typeid_cast<const DataTypeFloat32     *>(&second_type))    return new AggregateFunctionTemplate<FirstType, Float32>;
-    else if (typeid_cast<const DataTypeFloat64     *>(&second_type))    return new AggregateFunctionTemplate<FirstType, Float64>;
-    else if (typeid_cast<const DataTypeEnum8     *>(&second_type))    return new AggregateFunctionTemplate<FirstType, UInt8>;
-    else if (typeid_cast<const DataTypeEnum16     *>(&second_type))    return new AggregateFunctionTemplate<FirstType, UInt16>;
+         if (typeid_cast<const DataTypeUInt8 *>(&second_type)) return new AggregateFunctionTemplate<FirstType, UInt8>;
+    else if (typeid_cast<const DataTypeUInt16 *>(&second_type)) return new AggregateFunctionTemplate<FirstType, UInt16>;
+    else if (typeid_cast<const DataTypeUInt32 *>(&second_type)) return new AggregateFunctionTemplate<FirstType, UInt32>;
+    else if (typeid_cast<const DataTypeUInt64 *>(&second_type)) return new AggregateFunctionTemplate<FirstType, UInt64>;
+    else if (typeid_cast<const DataTypeInt8 *>(&second_type)) return new AggregateFunctionTemplate<FirstType, Int8>;
+    else if (typeid_cast<const DataTypeInt16 *>(&second_type)) return new AggregateFunctionTemplate<FirstType, Int16>;
+    else if (typeid_cast<const DataTypeInt32 *>(&second_type)) return new AggregateFunctionTemplate<FirstType, Int32>;
+    else if (typeid_cast<const DataTypeInt64 *>(&second_type)) return new AggregateFunctionTemplate<FirstType, Int64>;
+    else if (typeid_cast<const DataTypeFloat32 *>(&second_type)) return new AggregateFunctionTemplate<FirstType, Float32>;
+    else if (typeid_cast<const DataTypeFloat64 *>(&second_type)) return new AggregateFunctionTemplate<FirstType, Float64>;
+    else if (typeid_cast<const DataTypeEnum8 *>(&second_type)) return new AggregateFunctionTemplate<FirstType, UInt8>;
+    else if (typeid_cast<const DataTypeEnum16 *>(&second_type)) return new AggregateFunctionTemplate<FirstType, UInt16>;
     else
         return nullptr;
 }
@@ -116,18 +128,18 @@ static IAggregateFunction * createWithTwoNumericTypesSecond(const IDataType & se
 template <template <typename, typename> class AggregateFunctionTemplate>
 static IAggregateFunction * createWithTwoNumericTypes(const IDataType & first_type, const IDataType & second_type)
 {
-         if (typeid_cast<const DataTypeUInt8     *>(&first_type))    return createWithTwoNumericTypesSecond<UInt8, AggregateFunctionTemplate>(second_type);
-    else if (typeid_cast<const DataTypeUInt16     *>(&first_type))    return createWithTwoNumericTypesSecond<UInt16, AggregateFunctionTemplate>(second_type);
-    else if (typeid_cast<const DataTypeUInt32     *>(&first_type))    return createWithTwoNumericTypesSecond<UInt32, AggregateFunctionTemplate>(second_type);
-    else if (typeid_cast<const DataTypeUInt64     *>(&first_type))    return createWithTwoNumericTypesSecond<UInt64, AggregateFunctionTemplate>(second_type);
-    else if (typeid_cast<const DataTypeInt8     *>(&first_type))    return createWithTwoNumericTypesSecond<Int8, AggregateFunctionTemplate>(second_type);
-    else if (typeid_cast<const DataTypeInt16     *>(&first_type))    return createWithTwoNumericTypesSecond<Int16, AggregateFunctionTemplate>(second_type);
-    else if (typeid_cast<const DataTypeInt32     *>(&first_type))    return createWithTwoNumericTypesSecond<Int32, AggregateFunctionTemplate>(second_type);
-    else if (typeid_cast<const DataTypeInt64     *>(&first_type))    return createWithTwoNumericTypesSecond<Int64, AggregateFunctionTemplate>(second_type);
-    else if (typeid_cast<const DataTypeFloat32     *>(&first_type))    return createWithTwoNumericTypesSecond<Float32, AggregateFunctionTemplate>(second_type);
-    else if (typeid_cast<const DataTypeFloat64     *>(&first_type))    return createWithTwoNumericTypesSecond<Float64, AggregateFunctionTemplate>(second_type);
-    else if (typeid_cast<const DataTypeEnum8     *>(&first_type))    return createWithTwoNumericTypesSecond<UInt8, AggregateFunctionTemplate>(second_type);
-    else if (typeid_cast<const DataTypeEnum16     *>(&first_type))    return createWithTwoNumericTypesSecond<UInt16, AggregateFunctionTemplate>(second_type);
+         if (typeid_cast<const DataTypeUInt8 *>(&first_type)) return createWithTwoNumericTypesSecond<UInt8, AggregateFunctionTemplate>(second_type);
+    else if (typeid_cast<const DataTypeUInt16 *>(&first_type)) return createWithTwoNumericTypesSecond<UInt16, AggregateFunctionTemplate>(second_type);
+    else if (typeid_cast<const DataTypeUInt32 *>(&first_type)) return createWithTwoNumericTypesSecond<UInt32, AggregateFunctionTemplate>(second_type);
+    else if (typeid_cast<const DataTypeUInt64 *>(&first_type)) return createWithTwoNumericTypesSecond<UInt64, AggregateFunctionTemplate>(second_type);
+    else if (typeid_cast<const DataTypeInt8 *>(&first_type)) return createWithTwoNumericTypesSecond<Int8, AggregateFunctionTemplate>(second_type);
+    else if (typeid_cast<const DataTypeInt16 *>(&first_type)) return createWithTwoNumericTypesSecond<Int16, AggregateFunctionTemplate>(second_type);
+    else if (typeid_cast<const DataTypeInt32 *>(&first_type)) return createWithTwoNumericTypesSecond<Int32, AggregateFunctionTemplate>(second_type);
+    else if (typeid_cast<const DataTypeInt64 *>(&first_type)) return createWithTwoNumericTypesSecond<Int64, AggregateFunctionTemplate>(second_type);
+    else if (typeid_cast<const DataTypeFloat32 *>(&first_type)) return createWithTwoNumericTypesSecond<Float32, AggregateFunctionTemplate>(second_type);
+    else if (typeid_cast<const DataTypeFloat64 *>(&first_type)) return createWithTwoNumericTypesSecond<Float64, AggregateFunctionTemplate>(second_type);
+    else if (typeid_cast<const DataTypeEnum8 *>(&first_type)) return createWithTwoNumericTypesSecond<UInt8, AggregateFunctionTemplate>(second_type);
+    else if (typeid_cast<const DataTypeEnum16 *>(&first_type)) return createWithTwoNumericTypesSecond<UInt16, AggregateFunctionTemplate>(second_type);
     else
         return nullptr;
 }
diff --git a/dbms/src/AggregateFunctions/IAggregateFunction.h b/dbms/src/AggregateFunctions/IAggregateFunction.h
index 895844c1b7f..98ad8fe396e 100644
--- a/dbms/src/AggregateFunctions/IAggregateFunction.h
+++ b/dbms/src/AggregateFunctions/IAggregateFunction.h
@@ -41,6 +41,9 @@ namespace ErrorCodes
   * The data resulting from the aggregation (intermediate computing states) is stored in other objects
   *  (which can be created in some pool),
   *  and IAggregateFunction is the external interface for manipulating them.
+  *
+  * NOTE: If you add a new aggregate function, don't forget to add it to Interpreters/SpecializedAggregator.h
+  *  so that the new function works with runtime compilation.
   */
 class IAggregateFunction
 {
diff --git a/dbms/src/AggregateFunctions/ReservoirSampler.h b/dbms/src/AggregateFunctions/ReservoirSampler.h
index 381cf62f3b9..1f3c909e2a9 100644
--- a/dbms/src/AggregateFunctions/ReservoirSampler.h
+++ b/dbms/src/AggregateFunctions/ReservoirSampler.h
@@ -47,7 +47,7 @@ struct NanLikeValueConstructor<ResultType, false>
     }
 };
 
-template<typename T, ReservoirSamplerOnEmpty::Enum OnEmpty = ReservoirSamplerOnEmpty::THROW, typename Comparer = std::less<T> >
+template<typename T, ReservoirSamplerOnEmpty::Enum OnEmpty = ReservoirSamplerOnEmpty::THROW, typename Comparer = std::less<T>>
 class ReservoirSampler
 {
 public:
diff --git a/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp b/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp
index 5c8646fd583..e37ddeade76 100644
--- a/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp
+++ b/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp
@@ -23,6 +23,7 @@ void registerAggregateFunctionSum(AggregateFunctionFactory & factory);
 void registerAggregateFunctionsUniq(AggregateFunctionFactory & factory);
 void registerAggregateFunctionUniqUpTo(AggregateFunctionFactory & factory);
 void registerAggregateFunctionTopK(AggregateFunctionFactory & factory);
+void registerAggregateFunctionsBitwise(AggregateFunctionFactory & factory);
 void registerAggregateFunctionDebug(AggregateFunctionFactory & factory);
 
 
@@ -48,6 +49,7 @@ void registerAggregateFunctions()
     registerAggregateFunctionsUniq(factory);
     registerAggregateFunctionUniqUpTo(factory);
     registerAggregateFunctionTopK(factory);
+    registerAggregateFunctionsBitwise(factory);
     registerAggregateFunctionDebug(factory);
 }
 
diff --git a/dbms/src/Client/Connection.cpp b/dbms/src/Client/Connection.cpp
index d66571a6641..8054699c7ec 100644
--- a/dbms/src/Client/Connection.cpp
+++ b/dbms/src/Client/Connection.cpp
@@ -351,8 +351,8 @@ void Connection::sendQuery(
     block_in.reset();
     block_out.reset();
 
-    /// If server version is new enough, send empty block which meand end of data.
-    if (server_revision >= DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES && !with_pending_data)
+    /// Send empty block which means end of data.
+    if (!with_pending_data)
     {
         sendData(Block());
         out->next();
@@ -384,9 +384,7 @@ void Connection::sendData(const Block & block, const String & name)
     }
 
     writeVarUInt(Protocol::Client::Data, *out);
-
-    if (server_revision >= DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES)
-        writeStringBinary(name, *out);
+    writeStringBinary(name, *out);
 
     size_t prev_bytes = out->count();
 
@@ -405,9 +403,7 @@ void Connection::sendPreparedData(ReadBuffer & input, size_t size, const String
     /// NOTE 'Throttler' is not used in this method (could use, but it's not important right now).
 
     writeVarUInt(Protocol::Client::Data, *out);
-
-    if (server_revision >= DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES)
-        writeStringBinary(name, *out);
+    writeStringBinary(name, *out);
 
     if (0 == size)
         copyData(input, *out);
@@ -419,13 +415,6 @@ void Connection::sendPreparedData(ReadBuffer & input, size_t size, const String
 
 void Connection::sendExternalTablesData(ExternalTablesData & data)
 {
-    /// If working with older server, don't send any info.
-    if (server_revision < DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES)
-    {
-        out->next();
-        return;
-    }
-
     if (data.empty())
     {
         /// Send empty block, which means end of data transfer.
@@ -552,9 +541,7 @@ Block Connection::receiveData()
     initBlockInput();
 
     String external_table_name;
-
-    if (server_revision >= DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES)
-        readStringBinary(external_table_name, *in);
+    readStringBinary(external_table_name, *in);
 
     size_t prev_bytes = in->count();
 
diff --git a/dbms/src/Common/ConfigProcessor.cpp b/dbms/src/Common/ConfigProcessor.cpp
index 4c08df0602c..eec492bae77 100644
--- a/dbms/src/Common/ConfigProcessor.cpp
+++ b/dbms/src/Common/ConfigProcessor.cpp
@@ -75,7 +75,7 @@ using NodeListPtr = Poco::AutoPtr<Poco::XML::NodeList>;
 static ElementIdentifier getElementIdentifier(Node * element)
 {
     NamedNodeMapPtr attrs = element->attributes();
-    std::vector<std::pair<std::string, std::string> > attrs_kv;
+    std::vector<std::pair<std::string, std::string>> attrs_kv;
     for (size_t i = 0; i < attrs->length(); ++i)
     {
         Node * node = attrs->item(i);
diff --git a/dbms/src/Common/ConfigProcessor.h b/dbms/src/Common/ConfigProcessor.h
index ef859c9121b..95c1d0a3c4d 100644
--- a/dbms/src/Common/ConfigProcessor.h
+++ b/dbms/src/Common/ConfigProcessor.h
@@ -29,7 +29,7 @@ using XMLDocumentPtr = Poco::AutoPtr<Poco::XML::Document>;
 class ConfigProcessor
 {
 public:
-    using Substitutions = std::vector<std::pair<std::string, std::string> >;
+    using Substitutions = std::vector<std::pair<std::string, std::string>>;
 
     /// Set log_to_console to true if the logging subsystem is not initialized yet.
     ConfigProcessor(bool throw_on_bad_incl = false, bool log_to_console = false, const Substitutions & substitutions = Substitutions());
diff --git a/dbms/src/Common/ExternalTable.h b/dbms/src/Common/ExternalTable.h
index 9d7ce88e3b6..75e22309839 100644
--- a/dbms/src/Common/ExternalTable.h
+++ b/dbms/src/Common/ExternalTable.h
@@ -35,7 +35,7 @@ public:
     std::string format;     /// Name of the data storage format
 
     /// Description of the table structure: (column name, data type name)
-    std::vector<std::pair<std::string, std::string> > structure;
+    std::vector<std::pair<std::string, std::string>> structure;
 
     std::unique_ptr<ReadBuffer> read_buffer;
     Block sample_block;
diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h
index 5480cedcba5..d62043fbce2 100644
--- a/dbms/src/Common/HashTable/HashTable.h
+++ b/dbms/src/Common/HashTable/HashTable.h
@@ -293,6 +293,7 @@ protected:
         return place_value;
     }
 
+
     /// Find an empty cell, starting with the specified position and further along the collision resolution chain.
     size_t ALWAYS_INLINE findEmptyCell(const Key & x, size_t hash_value, size_t place_value) const
     {
@@ -422,6 +423,50 @@ protected:
     }
 
 
+    template <typename Derived, bool is_const>
+    class iterator_base
+    {
+        using Container = typename std::conditional<is_const, const Self, Self>::type;
+        using cell_type = typename std::conditional<is_const, const Cell, Cell>::type;
+
+        Container * container;
+        cell_type * ptr;
+
+        friend class HashTable;
+
+    public:
+        iterator_base() {}
+        iterator_base(Container * container_, cell_type * ptr_) : container(container_), ptr(ptr_) {}
+
+        bool operator== (const iterator_base & rhs) const { return ptr == rhs.ptr; }
+        bool operator!= (const iterator_base & rhs) const { return ptr != rhs.ptr; }
+
+        Derived & operator++()
+        {
+            if (unlikely(ptr->isZero(*container)))
+                ptr = container->buf;
+            else
+                ++ptr;
+
+            while (ptr < container->buf + container->grower.bufSize() && ptr->isZero(*container))
+                ++ptr;
+
+            return static_cast<Derived &>(*this);
+        }
+
+        auto & operator* () const { return ptr->getValue(); }
+        auto * operator->() const { return &ptr->getValue(); }
+
+        auto getPtr() const { return ptr; }
+        size_t getHash() const { return ptr->getHash(*container); }
+
+        size_t getCollisionChainLength() const
+        {
+            return container->grower.place((ptr - container->buf) - container->grower.place(getHash()));
+        }
+    };
+
+
 public:
     using key_type = Key;
     using value_type = typename Cell::value_type;
@@ -499,74 +544,17 @@ public:
         bool is_initialized = false;
     };
 
-    class iterator
+
+    class iterator : public iterator_base<iterator, false>
     {
-        Self * container;
-        Cell * ptr;
-
-        friend class HashTable;
-
     public:
-        iterator() {}
-        iterator(Self * container_, Cell * ptr_) : container(container_), ptr(ptr_) {}
-
-        bool operator== (const iterator & rhs) const { return ptr == rhs.ptr; }
-        bool operator!= (const iterator & rhs) const { return ptr != rhs.ptr; }
-
-        iterator & operator++()
-        {
-            if (unlikely(ptr->isZero(*container)))
-                ptr = container->buf;
-            else
-                ++ptr;
-
-            while (ptr < container->buf + container->grower.bufSize() && ptr->isZero(*container))
-                ++ptr;
-
-            return *this;
-        }
-
-        value_type & operator* () const { return ptr->getValue(); }
-        value_type * operator->() const { return &ptr->getValue(); }
-
-        Cell * getPtr() const { return ptr; }
-        size_t getHash() const { return ptr->getHash(*container); }
+        using iterator_base<iterator, false>::iterator_base;
     };
 
-
-    class const_iterator
+    class const_iterator : public iterator_base<const_iterator, true>
     {
-        const Self * container;
-        const Cell * ptr;
-
-        friend class HashTable;
-
     public:
-        const_iterator() {}
-        const_iterator(const Self * container_, const Cell * ptr_) : container(container_), ptr(ptr_) {}
-        const_iterator(const iterator & rhs) : container(rhs.container), ptr(rhs.ptr) {}
-
-        bool operator== (const const_iterator & rhs) const { return ptr == rhs.ptr; }
-        bool operator!= (const const_iterator & rhs) const { return ptr != rhs.ptr; }
-
-        const_iterator & operator++()
-        {
-            if (unlikely(ptr->isZero(*container)))
-                ptr = container->buf;
-            else
-                ++ptr;
-
-            while (ptr < container->buf + container->grower.bufSize() && ptr->isZero(*container))
-                ++ptr;
-
-            return *this;
-        }
-
-        const value_type & operator* () const { return ptr->getValue(); }
-        const value_type * operator->() const { return &ptr->getValue(); }
-
-        const Cell * getPtr() const { return ptr; }
-        size_t getHash() const { return ptr->getHash(*container); }
+        using iterator_base<const_iterator, true>::iterator_base;
     };
 
 
diff --git a/dbms/src/Common/HashTable/TwoLevelHashMap.h b/dbms/src/Common/HashTable/TwoLevelHashMap.h
index 00213ca8e6f..a6eab28b915 100644
--- a/dbms/src/Common/HashTable/TwoLevelHashMap.h
+++ b/dbms/src/Common/HashTable/TwoLevelHashMap.h
@@ -19,7 +19,7 @@ public:
     using mapped_type = typename Cell::Mapped;
     using value_type = typename Cell::value_type;
 
-    using TwoLevelHashTable<Key, Cell, Hash, Grower, Allocator, HashMapTable<Key, Cell, Hash, Grower, Allocator> >::TwoLevelHashTable;
+    using TwoLevelHashTable<Key, Cell, Hash, Grower, Allocator, HashMapTable<Key, Cell, Hash, Grower, Allocator>>::TwoLevelHashTable;
 
     mapped_type & ALWAYS_INLINE operator[](Key x)
     {
diff --git a/dbms/src/Common/LRUCache.h b/dbms/src/Common/LRUCache.h
index fe35ee3b3f9..3acfdcc9b7e 100644
--- a/dbms/src/Common/LRUCache.h
+++ b/dbms/src/Common/LRUCache.h
@@ -28,7 +28,7 @@ struct TrivialWeightFunction
 /// Cache starts to evict entries when their total weight exceeds max_size and when expiration time of these
 /// entries is due.
 /// Value weight should not change after insertion.
-template <typename TKey, typename TMapped, typename HashFunction = std::hash<TMapped>, typename WeightFunction = TrivialWeightFunction<TMapped> >
+template <typename TKey, typename TMapped, typename HashFunction = std::hash<TMapped>, typename WeightFunction = TrivialWeightFunction<TMapped>>
 class LRUCache
 {
 public:
diff --git a/dbms/src/Common/PoolWithFailoverBase.h b/dbms/src/Common/PoolWithFailoverBase.h
index 47ad7a7374a..6d436051685 100644
--- a/dbms/src/Common/PoolWithFailoverBase.h
+++ b/dbms/src/Common/PoolWithFailoverBase.h
@@ -71,7 +71,7 @@ public:
         TryResult() = default;
 
         explicit TryResult(Entry entry_)
-            : entry(std::move(entry))
+            : entry(std::move(entry_))
             , is_usable(true)
             , is_up_to_date(true)
         {
diff --git a/dbms/src/Common/Throttler.h b/dbms/src/Common/Throttler.h
index 0b242b25110..9104d39da92 100644
--- a/dbms/src/Common/Throttler.h
+++ b/dbms/src/Common/Throttler.h
@@ -27,10 +27,11 @@ namespace ErrorCodes
 class Throttler
 {
 public:
-    Throttler(size_t max_speed_, size_t limit_, const char * limit_exceeded_exception_message_)
-        : max_speed(max_speed_), limit(limit_), limit_exceeded_exception_message(limit_exceeded_exception_message_) {}
+    Throttler(size_t max_speed_, size_t limit_, const char * limit_exceeded_exception_message_,
+              const std::shared_ptr<Throttler> & parent = nullptr)
+        : max_speed(max_speed_), limit(limit_), limit_exceeded_exception_message(limit_exceeded_exception_message_), parent(parent) {}
 
-    void add(size_t amount)
+    void add(const size_t amount)
     {
         size_t new_count;
         UInt64 elapsed_ns = 0;
@@ -70,6 +71,9 @@ public:
                 nanosleep(&sleep_ts, nullptr);    /// NOTE Returns early in case of a signal. This is considered normal.
             }
         }
+
+        if (parent)
+            parent->add(amount);
     }
 
 private:
@@ -79,6 +83,9 @@ private:
     const char * limit_exceeded_exception_message = nullptr;
     Stopwatch watch {CLOCK_MONOTONIC_COARSE};
     std::mutex mutex;
+
+    /// Used to implement a hierarchy of throttlers
+    std::shared_ptr<Throttler> parent;
 };
 
 
diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp
index 6ba093719e6..29f42ac2a30 100644
--- a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp
@@ -1,3 +1,4 @@
+#include <random>
 #include <functional>
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <common/logger_useful.h>
@@ -70,11 +71,12 @@ void ZooKeeper::processCallback(zhandle_t * zh, int type, int state, const char
         destroyContext(context);
 }
 
-void ZooKeeper::init(const std::string & hosts_, int32_t session_timeout_ms_)
+void ZooKeeper::init(const std::string & hosts_, const std::string & identity_, int32_t session_timeout_ms_)
 {
     log = &Logger::get("ZooKeeper");
     zoo_set_debug_level(ZOO_LOG_LEVEL_ERROR);
     hosts = hosts_;
+    identity = identity_;
     session_timeout_ms = session_timeout_ms_;
 
     impl = zookeeper_init(hosts.c_str(), nullptr, session_timeout_ms, nullptr, nullptr, 0);
@@ -83,12 +85,23 @@ void ZooKeeper::init(const std::string & hosts_, int32_t session_timeout_ms_)
     if (!impl)
         throw KeeperException("Fail to initialize zookeeper. Hosts are " + hosts);
 
-    default_acl = &ZOO_OPEN_ACL_UNSAFE;
+    if (!identity.empty())
+    {
+        auto code = zoo_add_auth(impl, "digest", identity.c_str(), static_cast<int>(identity.size()), 0, 0);
+        if (code != ZOK)
+            throw KeeperException("Zookeeper authentication failed. Hosts are  " + hosts, code);
+
+        default_acl = &ZOO_CREATOR_ALL_ACL;
+    }
+    else
+        default_acl = &ZOO_OPEN_ACL_UNSAFE;
+
+    LOG_TRACE(log, "initialized, hosts: " << hosts);
 }
 
-ZooKeeper::ZooKeeper(const std::string & hosts, int32_t session_timeout_ms)
+ZooKeeper::ZooKeeper(const std::string & hosts, const std::string & identity, int32_t session_timeout_ms)
 {
-    init(hosts, session_timeout_ms);
+    init(hosts, identity, session_timeout_ms);
 }
 
 struct ZooKeeperArgs
@@ -99,6 +112,7 @@ struct ZooKeeperArgs
         config.keys(config_name, keys);
 
         std::vector<std::string> hosts_strings;
+        std::string root;
 
         session_timeout_ms = DEFAULT_SESSION_TIMEOUT;
         for (const auto & key : keys)
@@ -106,17 +120,29 @@ struct ZooKeeperArgs
             if (startsWith(key, "node"))
             {
                 hosts_strings.push_back(
-                    config.getString(config_name + "." + key + ".host") + ":" + config.getString(config_name + "." + key + ".port", "2181"));
+                    config.getString(config_name + "." + key + ".host") + ":"
+                    + config.getString(config_name + "." + key + ".port", "2181")
+                );
             }
             else if (key == "session_timeout_ms")
             {
                 session_timeout_ms = config.getInt(config_name + "." + key);
             }
+            else if (key == "identity")
+            {
+                identity = config.getString(config_name + "." + key);
+            }
+            else if (key == "root")
+            {
+                root = config.getString(config_name + "." + key);
+            }
             else throw KeeperException(std::string("Unknown key ") + key + " in config file");
         }
 
         /// Shuffle the hosts to distribute the load among ZooKeeper nodes.
-        std::random_shuffle(hosts_strings.begin(), hosts_strings.end());
+        std::random_device rd;
+        std::mt19937 g(rd());
+        std::shuffle(hosts_strings.begin(), hosts_strings.end(), g);
 
         for (auto & host : hosts_strings)
         {
@@ -124,16 +150,24 @@ struct ZooKeeperArgs
                 hosts += ",";
             hosts += host;
         }
+
+        if (!root.empty())
+        {
+            if (root.front() != '/')
+                throw KeeperException(std::string("Root path in config file should start with '/', but got ") + root);
+            hosts += root;
+        }
     }
 
     std::string hosts;
-    size_t session_timeout_ms;
+    std::string identity;
+    int session_timeout_ms;
 };
 
 ZooKeeper::ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name)
 {
     ZooKeeperArgs args(config, config_name);
-    init(args.hosts, args.session_timeout_ms);
+    init(args.hosts, args.identity, args.session_timeout_ms);
 }
 
 WatchCallback ZooKeeper::callbackForEvent(const EventPtr & event)
@@ -707,7 +741,7 @@ ZooKeeper::~ZooKeeper()
 
 ZooKeeperPtr ZooKeeper::startNewSession() const
 {
-    return std::make_shared<ZooKeeper>(hosts, session_timeout_ms);
+    return std::make_shared<ZooKeeper>(hosts, identity, session_timeout_ms);
 }
 
 Op::Create::Create(const std::string & path_, const std::string & value_, ACLPtr acl_, int32_t flags_)
diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.h b/dbms/src/Common/ZooKeeper/ZooKeeper.h
index fa8dd1d8c29..40f84bce4a1 100644
--- a/dbms/src/Common/ZooKeeper/ZooKeeper.h
+++ b/dbms/src/Common/ZooKeeper/ZooKeeper.h
@@ -54,7 +54,7 @@ class ZooKeeper
 public:
     using Ptr = std::shared_ptr<ZooKeeper>;
 
-    ZooKeeper(const std::string & hosts, int32_t session_timeout_ms = DEFAULT_SESSION_TIMEOUT);
+    ZooKeeper(const std::string & hosts, const std::string & identity = "", int32_t session_timeout_ms = DEFAULT_SESSION_TIMEOUT);
 
     /** Config of the form:
         <zookeeper>
@@ -67,6 +67,10 @@ public:
                 <port>2181</port>
             </node>
             <session_timeout_ms>30000</session_timeout_ms>
+            <!-- Optional. Chroot suffix. Should exist. -->
+            <root>/path/to/zookeeper/node</root>
+            <!-- Optional. Zookeeper digest ACL string. -->
+            <identity>user:password</identity>
         </zookeeper>
     */
     ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name);
@@ -353,7 +357,7 @@ private:
     friend struct WatchContext;
     friend class EphemeralNodeHolder;
 
-    void init(const std::string & hosts, int32_t session_timeout_ms);
+    void init(const std::string & hosts, const std::string & identity, int32_t session_timeout_ms);
     void removeChildrenRecursive(const std::string & path);
     void tryRemoveChildrenRecursive(const std::string & path);
 
@@ -397,6 +401,7 @@ private:
     MultiFuture asyncMultiImpl(const zkutil::Ops & ops_, bool throw_exception);
 
     std::string hosts;
+    std::string identity;
     int32_t session_timeout_ms;
 
     std::mutex mutex;
diff --git a/dbms/src/Common/ZooKeeper/tests/zkutil_test_commands.cpp b/dbms/src/Common/ZooKeeper/tests/zkutil_test_commands.cpp
index d9718c6f7f2..0a00fa98405 100644
--- a/dbms/src/Common/ZooKeeper/tests/zkutil_test_commands.cpp
+++ b/dbms/src/Common/ZooKeeper/tests/zkutil_test_commands.cpp
@@ -11,7 +11,7 @@ int main()
 {
     try
     {
-        ZooKeeper zk("mtfilter01t:2181,metrika-test:2181,mtweb01t:2181", 5000);
+        ZooKeeper zk("mtfilter01t:2181,metrika-test:2181,mtweb01t:2181", "", 5000);
         Strings children;
 
         std::cout << "create path" << std::endl;
diff --git a/dbms/src/Common/tests/CMakeLists.txt b/dbms/src/Common/tests/CMakeLists.txt
index 072d5547301..dd5f67c997b 100644
--- a/dbms/src/Common/tests/CMakeLists.txt
+++ b/dbms/src/Common/tests/CMakeLists.txt
@@ -57,3 +57,7 @@ target_link_libraries (array_cache dbms)
 
 add_executable (space_saving space_saving.cpp)
 target_link_libraries (space_saving dbms)
+
+add_executable (integer_hash_tables_and_hashes integer_hash_tables_and_hashes.cpp)
+target_include_directories (integer_hash_tables_and_hashes BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
+target_link_libraries (integer_hash_tables_and_hashes dbms)
diff --git a/dbms/src/Common/tests/hash_table.cpp b/dbms/src/Common/tests/hash_table.cpp
index 5093b9ebc48..86289d440a3 100644
--- a/dbms/src/Common/tests/hash_table.cpp
+++ b/dbms/src/Common/tests/hash_table.cpp
@@ -11,7 +11,7 @@
 int main(int argc, char ** argv)
 {
     {
-        using Cont = HashSet<int, DefaultHash<int>, HashTableGrower<1> >;
+        using Cont = HashSet<int, DefaultHash<int>, HashTableGrower<1>>;
         Cont cont;
 
         cont.insert(1);
@@ -36,7 +36,7 @@ int main(int argc, char ** argv)
     }
 
     {
-        using Cont = HashMap<int, std::string, DefaultHash<int>, HashTableGrower<1> >;
+        using Cont = HashMap<int, std::string, DefaultHash<int>, HashTableGrower<1>>;
         Cont cont;
 
         cont.insert(Cont::value_type(1, "Hello, world!"));
diff --git a/dbms/src/Common/tests/hopscotch-map/LICENSE b/dbms/src/Common/tests/hopscotch-map/LICENSE
new file mode 100644
index 00000000000..86b99c1e5ec
--- /dev/null
+++ b/dbms/src/Common/tests/hopscotch-map/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2016 Tessil
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/dbms/src/Common/tests/hopscotch-map/README.md b/dbms/src/Common/tests/hopscotch-map/README.md
new file mode 100644
index 00000000000..f8e96247b4b
--- /dev/null
+++ b/dbms/src/Common/tests/hopscotch-map/README.md
@@ -0,0 +1,303 @@
+[![Build Status](https://travis-ci.org/Tessil/hopscotch-map.svg?branch=master)](https://travis-ci.org/Tessil/hopscotch-map) [![Build status](https://ci.appveyor.com/api/projects/status/e97rjkcn3qwrhpvf/branch/master?svg=true)](https://ci.appveyor.com/project/Tessil/hopscotch-map/branch/master)
+
+## A C++ implementation of a fast hash map using hopscotch hashing
+
+The hopscotch-map library is a C++ implementation of a fast hash map and hash set using open-addressing and hopscotch hashing to resolve collisions. It is a cache-friendly data structure offering better performances than `std::unordered_map` in most cases and is closely similar to `google::dense_hash_map` while using less memory and providing more functionalities.
+
+The library provides four classes: `tsl::hopscotch_map`, `tsl::hopscotch_set`, `tsl::hopscotch_sc_map` and `tsl::hopscotch_sc_set`. The `tsl::hopscotch_sc_map` and `tsl::hopscotch_sc_set` classes have an additional requirement for the key, must be `LessThanComparable`, but provide a better upper bound, see [details](https://github.com/Tessil/hopscotch-map#deny-of-service-dos-attack) in example. Nonetheless, `tsl::hopscotch_map` and `tsl::hopscotch_set` should be sufficient in most cases and should be your default pick as they perform better in general.
+
+An overview of hopscotch hashing and some implementation details may be found [here](https://tessil.github.io/2016/08/29/hopscotch-hashing.html).
+
+A **benchmark** of `tsl::hopscotch_map` against other hash maps may be found [there](https://tessil.github.io/2016/08/29/benchmark-hopscotch-map.html).
+
+**Note**: By default the library uses a power of two for the size of its buckets array to take advantage of the [fast modulo](https://en.wikipedia.org/wiki/Modulo_operation#Performance_issues). For good performance, it requires the hash table to have a well-distributed hash function. If you encounter performance issues check the [GrowthPolicy](https://github.com/Tessil/hopscotch-map#growth-policy) section to change the default behaviour or change your hash function.
+
+### Key features
+- Header-only library, just include [src/](src/) to your include path and you are ready to go.
+- Fast hash table, see [benchmark](https://tessil.github.io/2016/08/29/benchmark-hopscotch-map.html) for some numbers.
+- Support for move-only and non-default constructible key/value.
+- Support for heterogeneous lookups (e.g. if you have a map that uses `std::unique_ptr<int>` as key, you could use an `int*` or a `std::uintptr_t` as key parameter to `find`, see [example](https://github.com/Tessil/hopscotch-map#heterogeneous-lookups)).
+- No need to reserve any sentinel value from the keys.
+- Possibility to store the hash value on insert for faster rehash and lookup if the hash or the key equal functions are expensive to compute (see the [StoreHash](https://tessil.github.io/hopscotch-map/doc/html/classtsl_1_1hopscotch__map.html#details) template parameter).
+- If the hash is known before a lookup, it is possible to pass it as parameter to speed-up the lookup.
+- The `tsl::hopscotch_sc_map` and `tsl::hopscotch_sc_set` provide a worst-case of O(log n) on lookup and delete making these classes resistant to hash table Deny of Service (DoS) attacks (see [details](https://github.com/Tessil/hopscotch-map#deny-of-service-dos-attack) in example).
+- API closely similar to `std::unordered_map` and `std::unordered_set`.
+
+### Differences compare to `std::unordered_map`
+`tsl::hopscotch_map` tries to have an interface similar to `std::unordered_map`, but some differences exist.
+- Iterator invalidation on insert doesn't behave in the same way (see [API](https://tessil.github.io/hopscotch-map/doc/html/classtsl_1_1hopscotch__map.html#details) for details).
+- References and pointers to keys or values in the map are invalidated in the same way as iterators to these keys-values on insert.
+- The size of the bucket array in the map grows by a factor of two, the size will always be a power of two, which may be a too steep growth rate for some purposes. The growth policy is modifiable (see the [`GrowthPolicy`](https://github.com/Tessil/hopscotch-map#growth-policy) template parameter) but it may reduce the speed of the hash map.
+- For iterators, `operator*()` and `operator->()` return a reference and a pointer to `const std::pair<Key, T>` instead of `std::pair<const Key, T>` making the value `T` not modifiable. To modify the value you have to call the `value()` method of the iterator to get a mutable reference. Example:
+```c++
+tsl::hopscotch_map<int, int> map = {{1, 1}, {2, 1}, {3, 1}};
+for(auto it = map.begin(); it != map.end(); ++it) {
+    //it->second = 2; // Illegal
+    it.value() = 2; // Ok
+}
+```
+- Move-only types must have a nothrow move constructor (with open addressing, it is not possible to keep the strong exception guarantee on rehash if the move constructor may throw).
+- No support for some buckets related methods (like bucket_size, bucket, ...).
+
+These differences also apply between `std::unordered_set` and `tsl::hopscotch_set`.
+
+Thread-safety and exceptions guarantees are the same as `std::unordered_map/set`.
+
+### Differences compare to `google::dense_hash_map`
+`tsl::hopscotch_map` has comparable performances to `google::dense_hash_map` (see [benchmark](https://tessil.github.io/2016/08/29/benchmark-hopscotch-map.html)), but come with some advantages.
+- There is no need to reserve sentinel values for the key as it is required by `google::dense_hash_map` where you need to have a sentinel for empty and deleted keys.
+- The type of the value in the map doesn't need a default constructor.
+- The key and the value of the map don't need a copy constructor/operator, move-only types are supported.
+- It uses less memory for its speed as it can sustain a load factor of 0.95 (which is the default value in the library compare to the 0.5 of `google::dense_hash_map`) while keeping good performances.
+
+### Growth policy
+
+By default `tsl::hopscotch_map/set` uses `tsl::power_of_two_growth_policy` as `GrowthPolicy`. This policy keeps the size of the map to a power of two by doubling the size of the map when a rehash is required. It allows the map to avoid the usage of the slow modulo operation, instead of <code>hash % 2<sup>n</sup></code>, it uses <code>hash & (2<sup>n</sup> - 1)</code>.
+
+This may cause a lot of collisions with a poor hash function as the modulo just masks the most significant bits.
+
+If you encounter poor performances, check `overflow_size()`. If it is not zero, you may have a lot of collisions due to a common pattern in the least significant bits. Either change the hash function for something more uniform or use `tsl::prime_growth_policy` which keeps the size of the map to a prime size.
+
+You can also use `tsl::mod_growth_policy` if you want a more configurable growth rate or you could even define your own policy (see [API](https://tessil.github.io/hopscotch-map/doc/html/classtsl_1_1hopscotch__map.html#details)).
+
+A bad distribution may lead to a runtime complexity of O(n) for lookups. Unfortunately it is sometimes difficult to guard yourself against it (e.g. DoS attack on the hash map). If needed, check `tsl::hopscotch_sc_map/set` which offer a worst-case scenario of O(log n) on lookups, see [details](https://github.com/Tessil/hopscotch-map#deny-of-service-dos-attack) in example.
+
+### Installation
+To use hopscotch-map, just add the [src/](src/) directory to your include path. It is a **header-only** library.
+
+The code should work with any C++11 standard-compliant compiler and has been tested with GCC 4.8.4, Clang 3.5.0 and Visual Studio 2015.
+
+To run the tests you will need the Boost Test library and CMake. 
+
+```bash
+git clone https://github.com/Tessil/hopscotch-map.git
+cd hopscotch-map
+mkdir build
+cd build
+cmake ..
+make
+./test_hopscotch_map 
+```
+
+
+### Usage
+The API can be found [here](https://tessil.github.io/hopscotch-map/doc/html/). 
+
+All methods are not documented yet, but they replicate the behaviour of the ones in `std::unordered_map` and `std::unordered_set`, except if specified otherwise.
+
+### Example
+```c++
+#include <cstdint>
+#include <iostream>
+#include <string>
+#include "hopscotch_map.h"
+#include "hopscotch_set.h"
+
+int main() {
+    tsl::hopscotch_map<std::string, int> map = {{"a", 1}, {"b", 2}};
+    map["c"] = 3;
+    map["d"] = 4;
+    
+    map.insert({"e", 5});
+    map.erase("b");
+    
+    for(auto it = map.begin(); it != map.end(); ++it) {
+        //it->second += 2; // Not valid.
+        it.value() += 2;
+    }
+    
+    // {d, 6} {a, 3} {e, 7} {c, 5}
+    for(const auto& key_value : map) {
+        std::cout << "{" << key_value.first << ", " << key_value.second << "}" << std::endl;
+    }
+    
+    
+    
+    
+    /*
+     * Calculating the hash and comparing two std::string may be slow. 
+     * We can store the hash of each std::string in the hash map to make 
+     * the inserts and lookups faster by setting StoreHash to true.
+     */ 
+    tsl::hopscotch_map<std::string, int, std::hash<std::string>, 
+                       std::equal_to<std::string>,
+                       std::allocator<std::pair<std::string, int>>,
+                       30, true> map2;
+                       
+    map2["a"] = 1;
+    map2["b"] = 2;
+    
+    // {a, 1} {b, 2}
+    for(const auto& key_value : map2) {
+        std::cout << "{" << key_value.first << ", " << key_value.second << "}" << std::endl;
+    }
+    
+    
+    
+    
+    tsl::hopscotch_set<int> set;
+    set.insert({1, 9, 0});
+    set.insert({2, -1, 9});
+    
+    // {0} {1} {2} {9} {-1}
+    for(const auto& key : set) {
+        std::cout << "{" << key << "}" << std::endl;
+    }
+} 
+```
+
+#### Heterogeneous lookups
+
+Heterogeneous overloads allow the usage of other types than `Key` for lookup and erase operations as long as the used types are hashable and comparable to `Key`.
+
+To activate the heterogeneous overloads in `tsl::hopscotch_map/set`, the qualified-id `KeyEqual::is_transparent` must be valid. It works the same way as for [`std::map::find`](http://en.cppreference.com/w/cpp/container/map/find). You can either use [`std::equal_to<>`](http://en.cppreference.com/w/cpp/utility/functional/equal_to_void) or define your own function object.
+
+Both `KeyEqual` and `Hash` will need to be able to deal with the different types.
+
+```c++
+#include <functional>
+#include <iostream>
+#include <string>
+#include "hopscotch_map.h"
+
+
+struct employee {
+    employee(int id, std::string name) : m_id(id), m_name(std::move(name)) {
+    }
+    
+    friend bool operator==(const employee& empl, int empl_id) {
+        return empl.m_id == empl_id;
+    }
+    
+    friend bool operator==(int empl_id, const employee& empl) {
+        return empl_id == empl.m_id;
+    }
+    
+    friend bool operator==(const employee& empl1, const employee& empl2) {
+        return empl1.m_id == empl2.m_id;
+    }
+    
+    
+    int m_id;
+    std::string m_name;
+};
+
+struct hash_employee {
+    std::size_t operator()(const employee& empl) const {
+        return std::hash<int>()(empl.m_id);
+    }
+    
+    std::size_t operator()(int id) const {
+        return std::hash<int>()(id);
+    }
+};
+
+struct equal_employee {
+    using is_transparent = void;
+    
+    bool operator()(const employee& empl, int empl_id) const {
+        return empl.m_id == empl_id;
+    }
+    
+    bool operator()(int empl_id, const employee& empl) const {
+        return empl_id == empl.m_id;
+    }
+    
+    bool operator()(const employee& empl1, const employee& empl2) const {
+        return empl1.m_id == empl2.m_id;
+    }
+};
+
+
+int main() {
+    // Use std::equal_to<> which will automatically deduce and forward the parameters
+    tsl::hopscotch_map<employee, int, hash_employee, std::equal_to<>> map; 
+    map.insert({employee(1, "John Doe"), 2001});
+    map.insert({employee(2, "Jane Doe"), 2002});
+    map.insert({employee(3, "John Smith"), 2003});
+
+    // John Smith 2003
+    auto it = map.find(3);
+    if(it != map.end()) {
+        std::cout << it->first.m_name << " " << it->second << std::endl;
+    }
+
+    map.erase(1);
+
+
+
+    // Use a custom KeyEqual which has an is_transparent member type
+    tsl::hopscotch_map<employee, int, hash_employee, equal_employee> map2;
+    map2.insert({employee(4, "Johnny Doe"), 2004});
+
+    // 2004
+    std::cout << map2.at(4) << std::endl;
+} 
+```
+
+#### Deny of Service (DoS) attack
+In addition to `tsl::hopscotch_map` and `tsl::hopscotch_set`, the library provides two more "secure" options: `tsl::hopscotch_sc_map` and `tsl::hopscotch_sc_set`. 
+
+These two additions have a worst-case runtime of O(log n) for lookups and deletions and an amortized worst case of O(log n) for insertions (amortized due to the possibility of rehash which would be in O(n)). Even if the hash function maps all the elements to the same bucket, the O(log n) would still hold.
+
+This provides a security against hash table Deny of Service attacks. 
+
+To achieve this, the "secure" versions use a binary search tree for the overflown elements (see [implementation details](https://tessil.github.io/2016/08/29/hopscotch-hashing.html)) and thus need the elements to be `LessThanComparable`. An additional `Compare` template parameter is needed.
+
+```c++
+#include <chrono>
+#include <cstdint>
+#include <iostream>
+#include "hopscotch_map.h"
+#include "hopscotch_sc_map.h"
+
+/*
+ * Poor hash function which always returns 1 to simulate
+ * a Deny of Service attack.
+ */
+struct dos_attack_simulation_hash {
+    std::size_t operator()(int id) const {
+        return 1;
+    }
+};
+
+int main() {
+    /*
+     * Slow due to the hash function, insertions are done in O(n).
+     */
+    tsl::hopscotch_map<int, int, dos_attack_simulation_hash> map;
+    
+    auto start = std::chrono::high_resolution_clock::now();
+    for(int i=0; i < 10000; i++) {
+        map.insert({i, 0});
+    }
+    auto end = std::chrono::high_resolution_clock::now();
+    
+    // 110 ms
+    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end-start);
+    std::cout << duration.count() << " ms" << std::endl;
+    
+    
+    
+    
+    /*
+     * Faster. Even with the poor hash function, insertions end-up to
+     * be O(log n) in average (and O(n) when a rehash occurs).
+     */
+    tsl::hopscotch_sc_map<int, int, dos_attack_simulation_hash> map_secure;
+    
+    start = std::chrono::high_resolution_clock::now();
+    for(int i=0; i < 10000; i++) {
+        map_secure.insert({i, 0});
+    }
+    end = std::chrono::high_resolution_clock::now();
+    
+    // 2 ms
+    duration = std::chrono::duration_cast<std::chrono::milliseconds>(end-start);
+    std::cout << duration.count() << " ms" << std::endl;
+} 
+```
+
+### License
+
+The code is licensed under the MIT license, see the [LICENSE file](LICENSE) for details.
diff --git a/dbms/src/Common/tests/hopscotch-map/src/hopscotch_hash.h b/dbms/src/Common/tests/hopscotch-map/src/hopscotch_hash.h
new file mode 100644
index 00000000000..2840c8880b5
--- /dev/null
+++ b/dbms/src/Common/tests/hopscotch-map/src/hopscotch_hash.h
@@ -0,0 +1,1873 @@
+/**
+ * MIT License
+ * 
+ * Copyright (c) 2017 Tessil
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef TSL_HOPSCOTCH_HASH_H
+#define TSL_HOPSCOTCH_HASH_H
+
+
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <cmath>
+#include <climits>
+#include <cstddef>
+#include <cstdint>
+#include <exception>
+#include <functional>
+#include <initializer_list>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <ratio>
+#include <stdexcept>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#if (defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9))
+#define TSL_NO_RANGE_ERASE_WITH_CONST_ITERATOR
+#endif
+
+
+
+/*
+ * Only activate tsl_assert if TSL_DEBUG is defined. 
+ * This way we avoid the performance hit when NDEBUG is not defined with assert as tsl_assert is used a lot
+ * (people usually compile with "-O3" and not "-O3 -DNDEBUG").
+ */
+#ifndef tsl_assert
+    #ifdef TSL_DEBUG
+    #define tsl_assert(expr) assert(expr)
+    #else
+    #define tsl_assert(expr) (static_cast<void>(0))
+    #endif
+#endif
+
+namespace tsl {
+
+/**
+ * Grow the map by a factor of two keeping bucket_count to a power of two. It allows
+ * the map to use a mask operation instead of a modulo operation to map a hash to a bucket.
+ */
+class power_of_two_growth_policy {
+public:
+    /**
+     * Called on map creation and rehash. The number of buckets requested is passed by parameter.
+     * This number is a minimum, the policy may update this value with a higher value if needed.
+     */
+    power_of_two_growth_policy(std::size_t& min_bucket_count_in_out) {
+        if(min_bucket_count_in_out > max_bucket_count()) {
+            throw std::length_error("The map exceeds its maxmimum size.");
+        }
+        
+        static_assert(MIN_BUCKETS_SIZE > 0, "");
+        const std::size_t min_bucket_count = MIN_BUCKETS_SIZE;
+        
+        min_bucket_count_in_out = std::max(min_bucket_count, min_bucket_count_in_out);
+        min_bucket_count_in_out = round_up_to_power_of_two(min_bucket_count_in_out);
+        m_mask = min_bucket_count_in_out - 1;
+    }
+    
+    /**
+     * Return the bucket [0, bucket_count()) to which the hash belongs.
+     */
+    std::size_t bucket_for_hash(std::size_t hash) const {
+        return hash & m_mask;
+    }
+    
+    /**
+     * Return the bucket count to uses when the bucket array grows on rehash.
+     */
+    std::size_t next_bucket_count() const {
+        if((m_mask + 1) > max_bucket_count()/2) {
+            throw std::length_error("The map exceeds its maxmimum size.");
+        }
+        
+        return (m_mask + 1) * 2;
+    }
+    
+    /**
+     * Return the maximum number of buckets supported by the policy.
+     */
+    std::size_t max_bucket_count() const {
+        return std::numeric_limits<std::size_t>::max()/2 + 1;
+    }
+    
+private:
+    static std::size_t round_up_to_power_of_two(std::size_t value) {
+        if(value == 0) {
+            return 1;
+        }
+        
+        if(is_power_of_two(value)) {
+            return value;
+        }
+            
+        --value;
+        for(std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) {
+            value |= value >> i;
+        }
+        
+        return value + 1;
+    }
+    
+    static constexpr bool is_power_of_two(std::size_t value) {
+        return value != 0 && (value & (value - 1)) == 0;
+    }
+    
+private:
+    static const std::size_t MIN_BUCKETS_SIZE = 2;
+    
+    std::size_t m_mask;
+};
+
+/**
+ * Grow the map by GrowthFactor::num/GrowthFactor::den and use a modulo to map a hash
+ * to a bucket. Slower but it can be usefull if you want a slower growth.
+ */
+template<class GrowthFactor = std::ratio<3, 2>>
+class mod_growth_policy {
+public:
+    mod_growth_policy(std::size_t& min_bucket_count_in_out) {
+        if(min_bucket_count_in_out > max_bucket_count()) {
+            throw std::length_error("The map exceeds its maxmimum size.");
+        }
+        
+        static_assert(MIN_BUCKETS_SIZE > 0, "");
+        const std::size_t min_bucket_count = MIN_BUCKETS_SIZE;
+        
+        min_bucket_count_in_out = std::max(min_bucket_count, min_bucket_count_in_out);
+        m_bucket_count = min_bucket_count_in_out;
+    }
+    
+    std::size_t bucket_for_hash(std::size_t hash) const {
+        tsl_assert(m_bucket_count != 0);
+        return hash % m_bucket_count;
+    }
+    
+    std::size_t next_bucket_count() const {
+        if(m_bucket_count == max_bucket_count()) {
+            throw std::length_error("The map exceeds its maxmimum size.");
+        }
+        
+        const double next_bucket_count = std::ceil(double(m_bucket_count) * REHASH_SIZE_MULTIPLICATION_FACTOR);
+        if(!std::isnormal(next_bucket_count)) {
+            throw std::length_error("The map exceeds its maxmimum size.");
+        }
+        
+        if(next_bucket_count > double(max_bucket_count())) {
+            return max_bucket_count();
+        }
+        else {
+            return std::size_t(next_bucket_count);
+        }
+    }
+    
+    std::size_t max_bucket_count() const {
+        return MAX_BUCKET_COUNT;
+    }
+    
+private:
+    static const std::size_t MIN_BUCKETS_SIZE = 2;
+    static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR = 1.0*GrowthFactor::num/GrowthFactor::den;
+    static const std::size_t MAX_BUCKET_COUNT = 
+            std::size_t(double(
+                    std::numeric_limits<std::size_t>::max()/REHASH_SIZE_MULTIPLICATION_FACTOR
+            ));
+            
+    static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, "Growth factor should be >= 1.1.");
+    
+    std::size_t m_bucket_count;
+};
+
+
+
+namespace detail_hopscotch_hash {
+
+static constexpr const std::array<std::size_t, 39> PRIMES = {{
+    5ul, 17ul, 29ul, 37ul, 53ul, 67ul, 79ul, 97ul, 131ul, 193ul, 257ul, 389ul, 521ul, 769ul, 1031ul, 1543ul, 2053ul, 
+    3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul, 3145739ul, 
+    6291469ul, 12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul, 
+    1610612741ul, 3221225473ul, 4294967291ul
+}};
+
+template<unsigned int IPrime>
+static std::size_t mod(std::size_t hash) { return hash % PRIMES[IPrime]; }
+
+// MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for faster modulo as the
+// compiler can optimize the modulo code better with a constant known at the compilation.
+static constexpr const std::array<std::size_t(*)(std::size_t), 39> MOD_PRIME = {{ 
+    &mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, &mod<6>, &mod<7>, &mod<8>, &mod<9>, &mod<10>, 
+    &mod<11>, &mod<12>, &mod<13>, &mod<14>, &mod<15>, &mod<16>, &mod<17>, &mod<18>, &mod<19>, &mod<20>, 
+    &mod<21>, &mod<22>, &mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>, &mod<29>, &mod<30>, 
+    &mod<31>, &mod<32>, &mod<33>, &mod<34>, &mod<35>, &mod<36>, &mod<37> , &mod<38>
+}};
+
+}
+
+/**
+ * Grow the map by using prime numbers as size. Slower than tsl::power_of_two_growth_policy in general 
+ * but will probably distribute the values around better in the buckets with a poor hash function.
+ */
+class prime_growth_policy {
+public:
+    prime_growth_policy(std::size_t& min_bucket_count_in_out) {
+        auto it_prime = std::lower_bound(tsl::detail_hopscotch_hash::PRIMES.begin(), 
+                                         tsl::detail_hopscotch_hash::PRIMES.end(), min_bucket_count_in_out);
+        if(it_prime == tsl::detail_hopscotch_hash::PRIMES.end()) {
+            throw std::length_error("The map exceeds its maxmimum size.");
+        }
+        
+        m_iprime = static_cast<unsigned int>(std::distance(tsl::detail_hopscotch_hash::PRIMES.begin(), it_prime));
+        min_bucket_count_in_out = *it_prime;
+    }
+    
+    std::size_t bucket_for_hash(std::size_t hash) const {
+        return bucket_for_hash_iprime(hash, m_iprime);
+    }
+    
+    std::size_t next_bucket_count() const {
+        if(m_iprime + 1 >= tsl::detail_hopscotch_hash::PRIMES.size()) {
+            throw std::length_error("The map exceeds its maxmimum size.");
+        }
+        
+        return tsl::detail_hopscotch_hash::PRIMES[m_iprime + 1];
+    }   
+    
+    std::size_t max_bucket_count() const {
+        return tsl::detail_hopscotch_hash::PRIMES.back();
+    }
+    
+private:  
+    std::size_t bucket_for_hash_iprime(std::size_t hash, unsigned int iprime) const {
+        tsl_assert(iprime < tsl::detail_hopscotch_hash::MOD_PRIME.size());
+        return tsl::detail_hopscotch_hash::MOD_PRIME[iprime](hash);
+    }
+    
+private:
+    unsigned int m_iprime;
+};
+
+
+namespace detail_hopscotch_hash {
+    
+    
+    
+template<typename T>
+struct make_void {
+    using type = void;
+};
+
+
+template<typename T, typename = void>
+struct has_is_transparent : std::false_type {
+};
+
+template<typename T>
+struct has_is_transparent<T, typename make_void<typename T::is_transparent>::type> : std::true_type {
+};
+
+
+template<typename T, typename = void>
+struct has_key_compare : std::false_type {
+};
+
+template<typename T>
+struct has_key_compare<T, typename make_void<typename T::key_compare>::type> : std::true_type {
+};
+
+
+
+
+
+/*
+ * smallest_type_for_min_bits::type returns the smallest type that can fit MinBits.
+ */
+static const size_t SMALLEST_TYPE_MAX_BITS_SUPPORTED = 64;
+template<unsigned int MinBits, typename Enable = void>
+class smallest_type_for_min_bits {
+};
+
+template<unsigned int MinBits>
+class smallest_type_for_min_bits<MinBits, typename std::enable_if<(MinBits > 0) && (MinBits <= 8)>::type> {
+public:
+    using type = std::uint_least8_t;
+};
+
+template<unsigned int MinBits>
+class smallest_type_for_min_bits<MinBits, typename std::enable_if<(MinBits > 8) && (MinBits <= 16)>::type> {
+public:
+    using type = std::uint_least16_t;
+};
+
+template<unsigned int MinBits>
+class smallest_type_for_min_bits<MinBits, typename std::enable_if<(MinBits > 16) && (MinBits <= 32)>::type> {
+public:
+    using type = std::uint_least32_t;
+};
+
+template<unsigned int MinBits>
+class smallest_type_for_min_bits<MinBits, typename std::enable_if<(MinBits > 32) && (MinBits <= 64)>::type> {
+public:
+    using type = std::uint_least64_t;
+};
+        
+
+
+/*
+ * Each bucket may store up to three elements:
+ * - An aligned storage to store a value_type object with placement-new.
+ * - An (optional) hash of the value in the bucket.
+ * - An unsigned integer of type neighborhood_bitmap used to tell us which buckets in the neighborhood of the 
+ *   current bucket contain a value with a hash belonging to the current bucket. 
+ * 
+ * For a bucket 'b' a bit 'i' (counting from 0 and from the least significant bit to the most significant) 
+ * set to 1 means that the bucket 'b+i' contains a value with a hash belonging to bucket 'b'.
+ * The bits used for that, start from the third least significant bit.
+ * 
+ * The least significant bit is set to 1 if there is a value in the bucket storage.
+ * The second least significant bit is set to 1 if there is an overflow. More than NeighborhoodSize values 
+ * give the same hash, all overflow values are stored in the m_overflow_elements list of the map.
+ */
+static const std::size_t NB_RESERVED_BITS_IN_NEIGHBORHOOD = 2; 
+
+
+template<bool StoreHash>
+class hopscotch_bucket_hash {
+public:    
+    using hash_type = std::false_type;
+    
+    bool bucket_hash_equal(std::size_t /*hash*/) const noexcept {
+        return true;
+    }
+    
+    std::size_t truncated_bucket_hash() const noexcept {
+        assert(false);
+        return 0;
+    }
+    
+protected:    
+    void copy_hash(const hopscotch_bucket_hash& ) noexcept {
+    }
+    
+    void set_hash(std::size_t /*hash*/) noexcept {
+    }
+};
+
+template<>
+class hopscotch_bucket_hash<true> {
+public:
+    using hash_type = std::uint_least32_t;
+    static_assert(sizeof(hash_type) <= sizeof(std::size_t), "");
+    
+    bool bucket_hash_equal(std::size_t hash) const noexcept {
+        return m_hash == hash_type(hash);
+    }
+    
+    std::size_t truncated_bucket_hash() const noexcept {
+        return m_hash;
+    }
+    
+protected:    
+    void copy_hash(const hopscotch_bucket_hash& bucket) noexcept {
+        m_hash = bucket.m_hash;
+    }
+    
+    void set_hash(std::size_t hash) noexcept {
+        m_hash = hash_type(hash);
+    }
+    
+private:    
+    hash_type m_hash;
+};
+
+template<typename ValueType, unsigned int NeighborhoodSize, bool StoreHash>
+class hopscotch_bucket: public hopscotch_bucket_hash<StoreHash> {
+private:
+    static const size_t MIN_NEIGHBORHOOD_SIZE = 4;
+    static const size_t MAX_NEIGHBORHOOD_SIZE = SMALLEST_TYPE_MAX_BITS_SUPPORTED - NB_RESERVED_BITS_IN_NEIGHBORHOOD; 
+    
+    
+    static_assert(NeighborhoodSize >= 4, "NeighborhoodSize should be >= 4.");
+    // We can't put a variable in the message, ensure coherence
+    static_assert(MIN_NEIGHBORHOOD_SIZE == 4, ""); 
+    
+    static_assert(NeighborhoodSize <= 62, "NeighborhoodSize should be <= 62.");
+    // We can't put a variable in the message, ensure coherence
+    static_assert(MAX_NEIGHBORHOOD_SIZE == 62, ""); 
+    
+    
+    static_assert(!StoreHash || NeighborhoodSize <= 30, 
+                  "NeighborhoodSize should be <= 30 if StoreHash is true.");
+    // We can't put a variable in the message, ensure coherence
+    static_assert(MAX_NEIGHBORHOOD_SIZE - 32 == 30, "");
+    
+    using bucket_hash = hopscotch_bucket_hash<StoreHash>;
+    
+public:
+    using value_type = ValueType;
+    using neighborhood_bitmap = 
+                typename smallest_type_for_min_bits<NeighborhoodSize + NB_RESERVED_BITS_IN_NEIGHBORHOOD>::type;
+
+
+    hopscotch_bucket() noexcept: bucket_hash(), m_neighborhood_infos(0) {
+        tsl_assert(empty());
+    }
+    
+    
+    hopscotch_bucket(const hopscotch_bucket& bucket) 
+        noexcept(std::is_nothrow_copy_constructible<value_type>::value): bucket_hash(bucket), 
+                                                                         m_neighborhood_infos(0) 
+    {
+        if(!bucket.empty()) {
+            ::new (static_cast<void*>(std::addressof(m_value))) value_type(bucket.value());
+        }
+        
+        m_neighborhood_infos = bucket.m_neighborhood_infos;
+    }
+    
+    hopscotch_bucket(hopscotch_bucket&& bucket)
+        noexcept(std::is_nothrow_move_constructible<value_type>::value) : bucket_hash(std::move(bucket)),
+                                                                          m_neighborhood_infos(0) 
+    {
+        if(!bucket.empty()) {
+            ::new (static_cast<void*>(std::addressof(m_value))) value_type(std::move(bucket.value()));
+        }
+        
+        m_neighborhood_infos = bucket.m_neighborhood_infos;
+    }
+     
+    hopscotch_bucket& operator=(const hopscotch_bucket& bucket) 
+        noexcept(std::is_nothrow_copy_constructible<value_type>::value) 
+    {
+        if(this != &bucket) {
+            remove_value();
+            
+            bucket_hash::operator=(bucket);
+            if(!bucket.empty()) {
+                ::new (static_cast<void*>(std::addressof(m_value))) value_type(bucket.value());
+            }
+            
+            m_neighborhood_infos = bucket.m_neighborhood_infos;
+        }
+        
+        return *this;
+    }
+    
+    hopscotch_bucket& operator=(hopscotch_bucket&& ) = delete;
+     
+    ~hopscotch_bucket() noexcept {
+        if(!empty()) {
+            destroy_value();
+        }
+    }
+    
+    neighborhood_bitmap neighborhood_infos() const noexcept {
+        return neighborhood_bitmap(m_neighborhood_infos >> NB_RESERVED_BITS_IN_NEIGHBORHOOD);
+    }
+    
+    void set_overflow(bool has_overflow) noexcept {
+        if(has_overflow) {
+            m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos | 2);
+        }
+        else {
+            m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos & ~2);
+        }
+    }
+    
+    bool has_overflow() const noexcept {
+        return (m_neighborhood_infos & 2) != 0;
+    }
+    
+    bool empty() const noexcept {
+        return (m_neighborhood_infos & 1) == 0;
+    }
+    
+    void toggle_neighbor_presence(std::size_t ineighbor) noexcept {
+        tsl_assert(ineighbor <= NeighborhoodSize);
+        m_neighborhood_infos = neighborhood_bitmap(
+                                    m_neighborhood_infos ^ (1ull << (ineighbor + NB_RESERVED_BITS_IN_NEIGHBORHOOD)));
+    }
+    
+    bool check_neighbor_presence(std::size_t ineighbor) const noexcept {
+        tsl_assert(ineighbor <= NeighborhoodSize);
+        if(((m_neighborhood_infos >> (ineighbor + NB_RESERVED_BITS_IN_NEIGHBORHOOD)) & 1) == 1) {
+            return true;
+        }
+        
+        return false;
+    }
+    
+    value_type& value() noexcept {
+        tsl_assert(!empty());
+        return *reinterpret_cast<value_type*>(std::addressof(m_value));
+    }
+    
+    const value_type& value() const noexcept {
+        tsl_assert(!empty());
+        return *reinterpret_cast<const value_type*>(std::addressof(m_value));
+    }
+    
+    template<typename... Args>
+    void set_value_of_empty_bucket(std::size_t hash, Args&&... value_type_args) {
+        tsl_assert(empty());
+        
+        ::new (static_cast<void*>(std::addressof(m_value))) value_type(std::forward<Args>(value_type_args)...);
+        set_empty(false);
+        this->set_hash(hash);
+    }
+    
+    void swap_value_into_empty_bucket(hopscotch_bucket& empty_bucket) {
+        tsl_assert(empty_bucket.empty());
+        if(!empty()) {
+            ::new (static_cast<void*>(std::addressof(empty_bucket.m_value))) value_type(std::move(value()));
+            empty_bucket.copy_hash(*this);
+            empty_bucket.set_empty(false);
+            
+            destroy_value();
+            set_empty(true);
+        }
+    }
+    
+    void remove_value() noexcept {
+        if(!empty()) {
+            destroy_value();
+            set_empty(true);
+        }
+    }
+    
+    void clear() noexcept {
+        if(!empty()) {
+            destroy_value();
+        }
+        
+        m_neighborhood_infos = 0;
+        tsl_assert(empty());
+    }
+    
+    static std::size_t max_size() noexcept {
+        if(StoreHash) {
+            return std::numeric_limits<typename bucket_hash::hash_type>::max();
+        }
+        else {
+            return std::numeric_limits<std::size_t>::max();
+        }
+    }
+    
+private:
+    void set_empty(bool is_empty) noexcept {
+        if(is_empty) {
+            m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos & ~1);
+        }
+        else {
+            m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos | 1);
+        }
+    }
+    
+    void destroy_value() noexcept {
+        try {
+            tsl_assert(!empty());
+            
+            value().~value_type();
+        }
+        catch(...) {
+            std::terminate();
+        }
+    }
+    
+private:
+    using storage = typename std::aligned_storage<sizeof(value_type), alignof(value_type)>::type;
+    
+    neighborhood_bitmap m_neighborhood_infos;
+    storage m_value;
+};
+
+
+/**
+ * Internal common class used by hopscotch_(sc)_map and hopscotch_(sc)_set.
+ * 
+ * ValueType is what will be stored by hopscotch_hash (usually std::pair<Key, T> for map and Key for set).
+ * 
+ * KeySelect should be a FunctionObject which takes a ValueType in parameter and returns a reference to the key.
+ * 
+ * ValueSelect should be a FunctionObject which takes a ValueType in parameter and returns a reference to the value.
+ * ValueSelect should be void if there is no value (in set for example).
+ * 
+ * OverflowContainer will be used as containers for overflown elements. Usually it should be a list<ValueType>
+ * or a set<Key>/map<Key, T>.
+ */
+template<class ValueType,
+         class KeySelect,
+         class ValueSelect,
+         class Hash,
+         class KeyEqual,
+         class Allocator,
+         unsigned int NeighborhoodSize,
+         bool StoreHash,
+         class GrowthPolicy,
+         class OverflowContainer>
+class hopscotch_hash: private Hash, private KeyEqual, private GrowthPolicy {
+private:
+    template<typename U>
+    using has_mapped_type = typename std::integral_constant<bool, !std::is_same<U, void>::value>;
+    
+public:
+    template<bool is_const>
+    class hopscotch_iterator;
+    
+    using key_type = typename KeySelect::key_type;
+    using value_type = ValueType;
+    using size_type = std::size_t;
+    using difference_type = std::ptrdiff_t;
+    using hasher = Hash;
+    using key_equal = KeyEqual;
+    using allocator_type = Allocator;
+    using reference = value_type&;
+    using const_reference = const value_type&;
+    using pointer = value_type*;
+    using const_pointer = const value_type*;
+    using iterator = hopscotch_iterator<false>;
+    using const_iterator = hopscotch_iterator<true>;
+    
+private:
+    using hopscotch_bucket = tsl::detail_hopscotch_hash::hopscotch_bucket<ValueType, NeighborhoodSize, StoreHash>;
+    using neighborhood_bitmap = typename hopscotch_bucket::neighborhood_bitmap;
+    
+    using buckets_allocator = typename std::allocator_traits<allocator_type>::template rebind_alloc<hopscotch_bucket>;
+    using buckets_container_type = std::vector<hopscotch_bucket, buckets_allocator>;  
+    
+    using overflow_container_type = OverflowContainer;
+    
+    static_assert(std::is_same<typename overflow_container_type::value_type, ValueType>::value, 
+                  "OverflowContainer should have ValueType as type.");
+    
+    static_assert(std::is_same<typename overflow_container_type::allocator_type, Allocator>::value, 
+                  "Invalid allocator, not the same type as the value_type.");
+    
+    
+    using iterator_buckets = typename buckets_container_type::iterator; 
+    using const_iterator_buckets = typename buckets_container_type::const_iterator;
+    
+    using iterator_overflow = typename overflow_container_type::iterator; 
+    using const_iterator_overflow = typename overflow_container_type::const_iterator; 
+    
+public:    
+    /**
+     * The 'operator*()' and 'operator->()' methods return a const reference and const pointer respectively to the 
+     * stored value type.
+     * 
+     * In case of a map, to get a modifiable reference to the value associated to a key (the '.second' in the 
+     * stored pair), you have to call 'value()'.
+     */
+    template<bool is_const>
+    class hopscotch_iterator {
+        friend class hopscotch_hash;
+    private:
+        using iterator_bucket = typename std::conditional<is_const, 
+                                                            typename hopscotch_hash::const_iterator_buckets, 
+                                                            typename hopscotch_hash::iterator_buckets>::type;
+        using iterator_overflow = typename std::conditional<is_const, 
+                                                            typename hopscotch_hash::const_iterator_overflow, 
+                                                            typename hopscotch_hash::iterator_overflow>::type;
+    
+        
+        hopscotch_iterator(iterator_bucket buckets_iterator, iterator_bucket buckets_end_iterator, 
+                           iterator_overflow overflow_iterator) noexcept : 
+            m_buckets_iterator(buckets_iterator), m_buckets_end_iterator(buckets_end_iterator),
+            m_overflow_iterator(overflow_iterator)
+        {
+        }
+        
+    public:
+        using iterator_category = std::forward_iterator_tag;
+        using value_type = const typename hopscotch_hash::value_type;
+        using difference_type = std::ptrdiff_t;
+        using reference = value_type&;
+        using pointer = value_type*;
+        
+        
+        hopscotch_iterator() noexcept {
+        }
+        
+        hopscotch_iterator(const hopscotch_iterator<false>& other) noexcept :
+            m_buckets_iterator(other.m_buckets_iterator), m_buckets_end_iterator(other.m_buckets_end_iterator),
+            m_overflow_iterator(other.m_overflow_iterator)
+        {
+        }
+        
+        const typename hopscotch_hash::key_type& key() const {
+            if(m_buckets_iterator != m_buckets_end_iterator) {
+                return KeySelect()(m_buckets_iterator->value());
+            }
+            
+            return KeySelect()(*m_overflow_iterator);
+        }
+
+        template<class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+        typename std::conditional<
+                        is_const, 
+                        const typename U::value_type&, 
+                        typename U::value_type&>::type value() const
+        {
+            if(m_buckets_iterator != m_buckets_end_iterator) {
+                return U()(m_buckets_iterator->value());
+            }
+            
+            return U()(*m_overflow_iterator);
+        }
+        
+        reference operator*() const { 
+            if(m_buckets_iterator != m_buckets_end_iterator) {
+                return m_buckets_iterator->value();
+            }
+            
+            return *m_overflow_iterator;
+        }
+        
+        pointer operator->() const { 
+            if(m_buckets_iterator != m_buckets_end_iterator) {
+                return std::addressof(m_buckets_iterator->value()); 
+            }
+            
+            return std::addressof(*m_overflow_iterator); 
+        }
+        
+        hopscotch_iterator& operator++() {
+            if(m_buckets_iterator == m_buckets_end_iterator) {
+                ++m_overflow_iterator;
+                return *this;
+            }
+            
+            do {
+                ++m_buckets_iterator;
+            } while(m_buckets_iterator != m_buckets_end_iterator && m_buckets_iterator->empty());
+            
+            return *this; 
+        }
+        
+        hopscotch_iterator operator++(int) {
+            hopscotch_iterator tmp(*this);
+            ++*this;
+            
+            return tmp;
+        }
+        
+        friend bool operator==(const hopscotch_iterator& lhs, const hopscotch_iterator& rhs) { 
+            return lhs.m_buckets_iterator == rhs.m_buckets_iterator && 
+                   lhs.m_overflow_iterator == rhs.m_overflow_iterator; 
+        }
+        
+        friend bool operator!=(const hopscotch_iterator& lhs, const hopscotch_iterator& rhs) { 
+            return !(lhs == rhs); 
+        }
+        
+    private:
+        iterator_bucket m_buckets_iterator;
+        iterator_bucket m_buckets_end_iterator;
+        iterator_overflow m_overflow_iterator;
+    };
+    
+
+    
+public:
+    template<class OC = OverflowContainer, typename std::enable_if<!has_key_compare<OC>::value>::type* = nullptr>
+    hopscotch_hash(size_type bucket_count, 
+                  const Hash& hash,
+                  const KeyEqual& equal,
+                  const Allocator& alloc,
+                  float max_load_factor) :  Hash(hash),
+                                            KeyEqual(equal),
+                                            GrowthPolicy(bucket_count),
+                                            m_buckets(alloc), 
+                                            m_overflow_elements(alloc),
+                                            m_nb_elements(0)
+    {
+        if(bucket_count > max_bucket_count()) {
+            throw std::length_error("The map exceeds its maxmimum size.");
+        }
+        
+        static_assert(NeighborhoodSize - 1 > 0, "");
+        m_buckets.resize(bucket_count + NeighborhoodSize - 1);
+        
+        
+        this->max_load_factor(max_load_factor);
+    }
+    
+    template<class OC = OverflowContainer, typename std::enable_if<has_key_compare<OC>::value>::type* = nullptr>
+    hopscotch_hash(size_type bucket_count, 
+                  const Hash& hash,
+                  const KeyEqual& equal,
+                  const Allocator& alloc,
+                  float max_load_factor,
+                  const typename OC::key_compare& comp) : Hash(hash),
+                                                          KeyEqual(equal),
+                                                          GrowthPolicy(bucket_count),
+                                                          m_buckets(alloc), 
+                                                          m_overflow_elements(comp, alloc),
+                                                          m_nb_elements(0)
+    {
+        
+        if(bucket_count > max_bucket_count()) {
+            throw std::length_error("The map exceeds its maxmimum size.");
+        }
+        
+        static_assert(NeighborhoodSize - 1 > 0, "");
+        
+        // Can't directly construct with the appropriate size in the initializer 
+        // as m_buckets(bucket_count, alloc) is not supported by GCC 4.8
+        m_buckets.resize(bucket_count + NeighborhoodSize - 1);
+        
+        
+        this->max_load_factor(max_load_factor);
+    }
+    
+    hopscotch_hash(const hopscotch_hash& other) = default;
+    
+    hopscotch_hash(hopscotch_hash&& other) 
+                        noexcept(
+                            std::is_nothrow_move_constructible<Hash>::value &&
+                            std::is_nothrow_move_constructible<KeyEqual>::value &&
+                            std::is_nothrow_move_constructible<GrowthPolicy>::value &&
+                            std::is_nothrow_move_constructible<buckets_container_type>::value &&
+                            std::is_nothrow_move_constructible<overflow_container_type>::value
+                        )
+                        : Hash(std::move(static_cast<Hash&>(other))),
+                          KeyEqual(std::move(static_cast<KeyEqual&>(other))),
+                          GrowthPolicy(std::move(static_cast<GrowthPolicy&>(other))),
+                          m_buckets(std::move(other.m_buckets)),
+                          m_overflow_elements(std::move(other.m_overflow_elements)),
+                          m_nb_elements(other.m_nb_elements),
+                          m_max_load_factor(other.m_max_load_factor),
+                          m_load_threshold(other.m_load_threshold),
+                          m_min_load_factor_rehash_threshold(other.m_min_load_factor_rehash_threshold)
+    {
+        other.clear();
+    }
+    
+    hopscotch_hash& operator=(const hopscotch_hash& other) = default;
+    
+    hopscotch_hash& operator=(hopscotch_hash&& other) {
+        other.swap(*this);
+        other.clear();
+        
+        return *this;
+    }
+    
+    allocator_type get_allocator() const {
+        return m_buckets.get_allocator();
+    }
+    
+    
+    /*
+     * Iterators
+     */
+    iterator begin() noexcept {
+        auto begin = m_buckets.begin();
+        while(begin != m_buckets.end() && begin->empty()) {
+            ++begin;
+        }
+        
+        return iterator(begin, m_buckets.end(), m_overflow_elements.begin());
+    }
+    
+    const_iterator begin() const noexcept {
+        return cbegin();
+    }
+    
+    const_iterator cbegin() const noexcept {
+        auto begin = m_buckets.cbegin();
+        while(begin != m_buckets.cend() && begin->empty()) {
+            ++begin;
+        }
+        
+        return const_iterator(begin, m_buckets.cend(), m_overflow_elements.cbegin());
+    }
+    
+    iterator end() noexcept {
+        return iterator(m_buckets.end(), m_buckets.end(), m_overflow_elements.end());
+    }
+    
+    const_iterator end() const noexcept {
+        return cend();
+    }
+    
+    const_iterator cend() const noexcept {
+        return const_iterator(m_buckets.cend(), m_buckets.cend(), m_overflow_elements.cend());
+    }
+    
+    
+    /*
+     * Capacity
+     */
+    bool empty() const noexcept {
+        return m_nb_elements == 0;
+    }
+    
+    size_type size() const noexcept {
+        return m_nb_elements;
+    }
+    
+    size_type max_size() const noexcept {
+        return hopscotch_bucket::max_size();
+    }
+    
+    /*
+     * Modifiers
+     */
+    void clear() noexcept {
+        for(auto& bucket : m_buckets) {
+            bucket.clear();
+        }
+        
+        m_overflow_elements.clear();
+        m_nb_elements = 0;
+    }
+    
+    
+    std::pair<iterator, bool> insert(const value_type& value) { 
+        return insert_impl(value); 
+    }
+        
+    template<class P, typename std::enable_if<std::is_constructible<value_type, P&&>::value>::type* = nullptr>
+    std::pair<iterator, bool> insert(P&& value) { 
+        return emplace(std::forward<P>(value)); 
+    }
+    
+    std::pair<iterator, bool> insert(value_type&& value) { 
+        return insert_impl(std::move(value)); 
+    }
+    
+    
+    iterator insert(const_iterator hint, const value_type& value) { 
+        if(hint != cend() && compare_keys(KeySelect()(*hint), KeySelect()(value))) { 
+            return mutable_iterator(hint); 
+        }
+        
+        return insert(value).first; 
+    }
+        
+    template<class P, typename std::enable_if<std::is_constructible<value_type, P&&>::value>::type* = nullptr>
+    iterator insert(const_iterator hint, P&& value) {
+        return emplace_hint(hint, std::forward<P>(value)); 
+    }
+    
+    iterator insert(const_iterator hint, value_type&& value) { 
+        if(hint != cend() && compare_keys(KeySelect()(*hint), KeySelect()(value))) { 
+            return mutable_iterator(hint); 
+        }
+        
+        return insert(std::move(value)).first; 
+    }
+    
+    
+    template<class InputIt>
+    void insert(InputIt first, InputIt last) {
+        if(std::is_base_of<std::forward_iterator_tag, 
+                           typename std::iterator_traits<InputIt>::iterator_category>::value) 
+        {
+            const auto nb_elements_insert = std::distance(first, last);
+            const std::size_t nb_elements_in_buckets = m_nb_elements - m_overflow_elements.size();
+            const std::size_t nb_free_buckets = m_load_threshold - nb_elements_in_buckets;
+            tsl_assert(m_nb_elements >= m_overflow_elements.size());
+            tsl_assert(m_load_threshold >= nb_elements_in_buckets);
+            
+            if(nb_elements_insert > 0 && nb_free_buckets < std::size_t(nb_elements_insert)) {
+                reserve(nb_elements_in_buckets + std::size_t(nb_elements_insert));
+            }
+        }
+        
+        for(; first != last; ++first) {
+            insert(*first);
+        }
+    }
+    
+    
+    template<class M>
+    std::pair<iterator, bool> insert_or_assign(const key_type& k, M&& obj) { 
+        return insert_or_assign_impl(k, std::forward<M>(obj)); 
+    }
+
+    template<class M>
+    std::pair<iterator, bool> insert_or_assign(key_type&& k, M&& obj) { 
+        return insert_or_assign_impl(std::move(k), std::forward<M>(obj)); 
+    }
+    
+    
+    template<class M>
+    iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj) {
+        if(hint != cend() && compare_keys(KeySelect()(*hint), k)) { 
+            auto it = mutable_iterator(hint); 
+            it.value() = std::forward<M>(obj);
+            
+            return it;
+        }
+        
+        return insert_or_assign(k, std::forward<M>(obj)).first;
+    }
+    
+    template<class M>
+    iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj) {
+        if(hint != cend() && compare_keys(KeySelect()(*hint), k)) {
+            auto it = mutable_iterator(hint); 
+            it.value() = std::forward<M>(obj);
+            
+            return it;
+        }
+        
+        return insert_or_assign(std::move(k), std::forward<M>(obj)).first;
+    }
+    
+    
+    template<class... Args>
+    std::pair<iterator, bool> emplace(Args&&... args) {
+        return insert(value_type(std::forward<Args>(args)...));
+    }
+    
+    template<class... Args>
+    iterator emplace_hint(const_iterator hint, Args&&... args) {
+        return insert(hint, value_type(std::forward<Args>(args)...));        
+    }
+    
+    template<class... Args>
+    std::pair<iterator, bool> try_emplace(const key_type& k, Args&&... args) { 
+        return try_emplace_impl(k, std::forward<Args>(args)...);
+    }
+    
+    template<class... Args>
+    std::pair<iterator, bool> try_emplace(key_type&& k, Args&&... args) {
+        return try_emplace_impl(std::move(k), std::forward<Args>(args)...);
+    }
+    
+    template<class... Args>
+    iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args) { 
+        if(hint != cend() && compare_keys(KeySelect()(*hint), k)) { 
+            return mutable_iterator(hint); 
+        }
+        
+        return try_emplace(k, std::forward<Args>(args)...).first;
+    }
+    
+    template<class... Args>
+    iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args) {
+        if(hint != cend() && compare_keys(KeySelect()(*hint), k)) { 
+            return mutable_iterator(hint); 
+        }
+        
+        return try_emplace(std::move(k), std::forward<Args>(args)...).first;
+    }
+    
+    
+    
+    iterator erase(iterator pos) {
+        return erase(const_iterator(pos));
+    }
+    
+    iterator erase(const_iterator pos) {
+        const std::size_t ibucket_for_hash = bucket_for_hash(hash_key(pos.key()));
+        
+        if(pos.m_buckets_iterator != pos.m_buckets_end_iterator) {
+            auto it_bucket = m_buckets.begin() + std::distance(m_buckets.cbegin(), pos.m_buckets_iterator);
+            erase_from_bucket(it_bucket, ibucket_for_hash);
+            
+            return ++iterator(it_bucket, m_buckets.end(), m_overflow_elements.begin()); 
+        }
+        else {
+            auto it_next_overflow = erase_from_overflow(pos.m_overflow_iterator, ibucket_for_hash);
+            return iterator(m_buckets.end(), m_buckets.end(), it_next_overflow);
+        }
+    }
+    
+    iterator erase(const_iterator first, const_iterator last) {
+        if(first == last) {
+            return mutable_iterator(first);
+        }
+        
+        auto to_delete = erase(first);
+        while(to_delete != last) {
+            to_delete = erase(to_delete);
+        }
+        
+        return to_delete;
+    }
+    
+    template<class K>
+    size_type erase(const K& key) {
+        return erase(key, hash_key(key));
+    }
+    
+    template<class K>
+    size_type erase(const K& key, std::size_t hash) {
+        const std::size_t ibucket_for_hash = bucket_for_hash(hash);
+        
+        auto it_find = find_in_buckets(key, hash, m_buckets.begin() + ibucket_for_hash);
+        if(it_find != m_buckets.end()) {
+            erase_from_bucket(it_find, ibucket_for_hash);
+
+            return 1;
+        }
+        
+        if(m_buckets[ibucket_for_hash].has_overflow()) {
+            auto it_overflow = find_in_overflow(key);
+            if(it_overflow != m_overflow_elements.end()) {
+                erase_from_overflow(it_overflow, ibucket_for_hash);
+                
+                return 1;
+            }
+        }
+        
+        return 0;
+    }
+    
+    void swap(hopscotch_hash& other) {
+        using std::swap;
+        
+        swap(static_cast<Hash&>(*this), static_cast<Hash&>(other));
+        swap(static_cast<KeyEqual&>(*this), static_cast<KeyEqual&>(other));
+        swap(static_cast<GrowthPolicy&>(*this), static_cast<GrowthPolicy&>(other));
+        swap(m_buckets, other.m_buckets);
+        swap(m_overflow_elements, other.m_overflow_elements);
+        swap(m_nb_elements, other.m_nb_elements);
+        swap(m_max_load_factor, other.m_max_load_factor);
+        swap(m_load_threshold, other.m_load_threshold);
+        swap(m_min_load_factor_rehash_threshold, other.m_min_load_factor_rehash_threshold);
+    }
+    
+    
+    /*
+     * Lookup
+     */
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    typename U::value_type& at(const K& key) {
+        return at(key, hash_key(key));
+    }
+    
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    typename U::value_type& at(const K& key, std::size_t hash) {
+        return const_cast<typename U::value_type&>(static_cast<const hopscotch_hash*>(this)->at(key, hash));
+    }
+    
+    
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    const typename U::value_type& at(const K& key) const {
+        return at(key, hash_key(key));
+    }
+    
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    const typename U::value_type& at(const K& key, std::size_t hash) const {
+        using T = typename U::value_type;
+        
+        const T* value = find_value_impl(key, hash, m_buckets.begin() + bucket_for_hash(hash));
+        if(value == nullptr) {
+            throw std::out_of_range("Couldn't find key.");
+        }
+        else {
+            return *value;
+        }
+    }
+    
+    
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    typename U::value_type& operator[](K&& key) {
+        using T = typename U::value_type;
+        
+        const std::size_t hash = hash_key(key);
+        const std::size_t ibucket_for_hash = bucket_for_hash(hash);
+        
+        T* value = find_value_impl(key, hash, m_buckets.begin() + ibucket_for_hash);
+        if(value != nullptr) {
+            return *value;
+        }
+        else {
+            return insert_impl(ibucket_for_hash, hash, std::piecewise_construct, 
+                                                       std::forward_as_tuple(std::forward<K>(key)), 
+                                                       std::forward_as_tuple()).first.value();
+        }
+    }
+    
+    
+    template<class K>
+    size_type count(const K& key) const {
+        return count(key, hash_key(key));
+    }
+    
+    template<class K>
+    size_type count(const K& key, std::size_t hash) const {
+        return count_impl(key, hash, m_buckets.cbegin() + bucket_for_hash(hash));
+    }
+    
+    
+    template<class K>
+    iterator find(const K& key) {
+        return find(key, hash_key(key));
+    }
+    
+    template<class K>
+    iterator find(const K& key, std::size_t hash) {
+        return find_impl(key, hash, m_buckets.begin() + bucket_for_hash(hash));
+    }
+    
+    
+    template<class K>
+    const_iterator find(const K& key) const {
+        return find(key, hash_key(key));
+    }
+    
+    template<class K>
+    const_iterator find(const K& key, std::size_t hash) const {
+        return find_impl(key, hash, m_buckets.begin() + bucket_for_hash(hash));
+    }
+    
+    
+    template<class K>
+    std::pair<iterator, iterator> equal_range(const K& key) {
+        return equal_range(key, hash_key(key));
+    }
+    
+    template<class K>
+    std::pair<iterator, iterator> equal_range(const K& key, std::size_t hash) {
+        iterator it = find(key, hash);
+        return std::make_pair(it, (it == end())?it:std::next(it));
+    }
+    
+    
+    template<class K>
+    std::pair<const_iterator, const_iterator> equal_range(const K& key) const {
+        return equal_range(key, hash_key(key));
+    }
+    
+    template<class K>
+    std::pair<const_iterator, const_iterator> equal_range(const K& key, std::size_t hash) const {
+        const_iterator it = find(key, hash);
+        return std::make_pair(it, (it == cend())?it:std::next(it));
+    }
+    
+    /*
+     * Bucket interface 
+     */
+    size_type bucket_count() const {
+        /*
+         * So that the last bucket can have NeighborhoodSize neighbors, the size of the bucket array is a little
+         * bigger than the real number of buckets. We could use some of the buckets at the beginning, but
+         * it is easier this way and we avoid weird behaviour with iterators.
+         */
+        return m_buckets.size() - NeighborhoodSize + 1; 
+    }
+    
+    size_type max_bucket_count() const {
+        const std::size_t max_bucket_count = std::min(GrowthPolicy::max_bucket_count(), m_buckets.max_size());
+        return max_bucket_count - NeighborhoodSize + 1;
+    }
+    
+    
+    /*
+     *  Hash policy 
+     */
+    float load_factor() const {
+        return float(m_nb_elements)/float(bucket_count());
+    }
+    
+    float max_load_factor() const {
+        return m_max_load_factor;
+    }
+    
+    void max_load_factor(float ml) {
+        m_max_load_factor = ml;
+        m_load_threshold = size_type(float(bucket_count())*m_max_load_factor);
+        m_min_load_factor_rehash_threshold = size_type(bucket_count()*MIN_LOAD_FACTOR_FOR_REHASH);
+    }
+    
+    void rehash(size_type count) {
+        count = std::max(count, size_type(std::ceil(float(size())/max_load_factor())));
+        rehash_impl(count);
+    }
+    
+    void reserve(size_type count) {
+        rehash(size_type(std::ceil(float(count)/max_load_factor())));
+    }
+    
+    
+    /*
+     * Observers
+     */
+    hasher hash_function() const {
+        return static_cast<Hash>(*this);
+    }
+    
+    key_equal key_eq() const {
+        return static_cast<KeyEqual>(*this);
+    }
+    
+    /*
+     * Other
+     */
+    iterator mutable_iterator(const_iterator pos) {
+        if(pos.m_buckets_iterator != pos.m_buckets_end_iterator) {
+            // Get a non-const iterator
+            auto it = m_buckets.begin() + std::distance(m_buckets.cbegin(), pos.m_buckets_iterator);
+            return iterator(it, m_buckets.end(), m_overflow_elements.begin());
+        }
+        else {
+            // Get a non-const iterator
+            auto it = mutable_overflow_iterator(pos.m_overflow_iterator);
+            
+            return iterator(m_buckets.end(), m_buckets.end(), it);
+        }
+    }
+    
+    size_type overflow_size() const noexcept {
+        return m_overflow_elements.size();
+    }
+    
+    template<class U = OverflowContainer, typename std::enable_if<has_key_compare<U>::value>::type* = nullptr>
+    typename U::key_compare key_comp() const {
+        return m_overflow_elements.key_comp();
+    }
+    
+    
+private:
+    template<class K>
+    std::size_t hash_key(const K& key) const {
+        return Hash::operator()(key);
+    }
+    
+    template<class K1, class K2>
+    bool compare_keys(const K1& key1, const K2& key2) const {
+        return KeyEqual::operator()(key1, key2);
+    }
+    
+    std::size_t bucket_for_hash(std::size_t hash) const {
+        return GrowthPolicy::bucket_for_hash(hash);
+    }
+    
+    
+    static_assert(std::is_nothrow_move_constructible<value_type>::value || 
+                  std::is_copy_constructible<value_type>::value, 
+                  "value_type must be either copy constructible or nothrow move constructible.");
+    
+    template<typename U = value_type, 
+             typename std::enable_if<std::is_nothrow_move_constructible<U>::value>::type* = nullptr>
+    void rehash_impl(size_type count) {
+        hopscotch_hash new_map = new_hopscotch_hash(count);
+        
+        if(!m_overflow_elements.empty()) {
+            new_map.m_overflow_elements.swap(m_overflow_elements);
+            new_map.m_nb_elements += new_map.m_overflow_elements.size();
+            
+            for(const value_type& value : new_map.m_overflow_elements) {
+                const std::size_t ibucket_for_hash = new_map.bucket_for_hash(new_map.hash_key(KeySelect()(value)));
+                new_map.m_buckets[ibucket_for_hash].set_overflow(true);
+            }
+        }
+        
+        try {
+            for(auto it_bucket = m_buckets.begin(); it_bucket != m_buckets.end(); ++it_bucket) {
+                if(it_bucket->empty()) {
+                    continue;
+                }
+                
+                const std::size_t hash = USE_STORED_HASH_ON_REHASH?
+                                            it_bucket->truncated_bucket_hash():
+                                            new_map.hash_key(KeySelect()(it_bucket->value()));
+                const std::size_t ibucket_for_hash = new_map.bucket_for_hash(hash);
+                
+                new_map.insert_impl(ibucket_for_hash, hash, std::move(it_bucket->value()));
+                
+                
+                erase_from_bucket(it_bucket, bucket_for_hash(hash));
+            }
+        } 
+        /*
+         * The call to insert_impl may throw an exception if an element is added to the overflow
+         * list. Rollback the elements in this case.
+         */
+        catch(...) {
+            m_overflow_elements.swap(new_map.m_overflow_elements);
+            
+            for(auto it_bucket = new_map.m_buckets.begin(); it_bucket != new_map.m_buckets.end(); ++it_bucket) {
+                if(it_bucket->empty()) {
+                    continue;
+                }
+                
+                const std::size_t hash = USE_STORED_HASH_ON_REHASH?
+                                            it_bucket->truncated_bucket_hash():
+                                            hash_key(KeySelect()(it_bucket->value()));
+                const std::size_t ibucket_for_hash = bucket_for_hash(hash);
+                
+                // The elements we insert were not in the overflow list before the switch.
+                // They will not be go in the overflow list if we rollback the switch.
+                insert_impl(ibucket_for_hash, hash, std::move(it_bucket->value()));
+            }
+            
+            throw;
+        }
+        
+        new_map.swap(*this);
+    }
+    
+    template<typename U = value_type, 
+             typename std::enable_if<std::is_copy_constructible<U>::value && 
+                                     !std::is_nothrow_move_constructible<U>::value>::type* = nullptr>
+    void rehash_impl(size_type count) {
+        hopscotch_hash new_map = new_hopscotch_hash(count);
+                
+        for(const hopscotch_bucket& bucket: m_buckets) {
+            if(bucket.empty()) {
+                continue;
+            }
+            
+            const std::size_t hash = USE_STORED_HASH_ON_REHASH?
+                                         bucket.truncated_bucket_hash():
+                                         new_map.hash_key(KeySelect()(bucket.value()));
+            const std::size_t ibucket_for_hash = new_map.bucket_for_hash(hash);
+            
+            new_map.insert_impl(ibucket_for_hash, hash, bucket.value());
+        }
+        
+        for(const value_type& value: m_overflow_elements) {
+            const std::size_t hash = new_map.hash_key(KeySelect()(value));
+            const std::size_t ibucket_for_hash = new_map.bucket_for_hash(hash);
+            
+            new_map.insert_impl(ibucket_for_hash, hash, value);
+        }
+            
+        new_map.swap(*this);
+    }
+    
+#ifdef TSL_NO_RANGE_ERASE_WITH_CONST_ITERATOR
+    iterator_overflow mutable_overflow_iterator(const_iterator_overflow it) {
+        return std::next(m_overflow_elements.begin(), std::distance(m_overflow_elements.cbegin(), it));        
+    }
+#else            
+    iterator_overflow mutable_overflow_iterator(const_iterator_overflow it) {
+        return m_overflow_elements.erase(it, it);       
+    }
+#endif    
+
+    // iterator is in overflow list
+    iterator_overflow erase_from_overflow(const_iterator_overflow pos, std::size_t ibucket_for_hash) {
+#ifdef TSL_NO_RANGE_ERASE_WITH_CONST_ITERATOR        
+        auto it_next = m_overflow_elements.erase(mutable_overflow_iterator(pos));
+#else
+        auto it_next = m_overflow_elements.erase(pos);
+#endif
+        m_nb_elements--;
+        
+        
+        // Check if we can remove the overflow flag
+        tsl_assert(m_buckets[ibucket_for_hash].has_overflow());
+        for(const value_type& value: m_overflow_elements) {
+            const std::size_t bucket_for_value = bucket_for_hash(hash_key(KeySelect()(value)));
+            if(bucket_for_value == ibucket_for_hash) {
+                return it_next;
+            }
+        }
+        
+        m_buckets[ibucket_for_hash].set_overflow(false);
+        return it_next;
+    }
+    
+    // iterator is in bucket
+    void erase_from_bucket(iterator_buckets pos, std::size_t ibucket_for_hash) noexcept {
+        const std::size_t ibucket_for_pos = std::distance(m_buckets.begin(), pos);
+        tsl_assert(ibucket_for_pos >= ibucket_for_hash);
+        
+        m_buckets[ibucket_for_pos].remove_value();
+        m_buckets[ibucket_for_hash].toggle_neighbor_presence(ibucket_for_pos - ibucket_for_hash);
+        m_nb_elements--;
+    }
+    
+
+    
+    template<class K, class M>
+    std::pair<iterator, bool> insert_or_assign_impl(K&& key, M&& obj) {
+        auto it = try_emplace_impl(std::forward<K>(key), std::forward<M>(obj));
+        if(!it.second) {
+            it.first.value() = std::forward<M>(obj);
+        }
+        
+        return it;
+    }
+    
+    template<typename P, class... Args>
+    std::pair<iterator, bool> try_emplace_impl(P&& key, Args&&... args_value) {
+        const std::size_t hash = hash_key(key);
+        const std::size_t ibucket_for_hash = bucket_for_hash(hash);
+        
+        // Check if already presents
+        auto it_find = find_impl(key, hash, m_buckets.begin() + ibucket_for_hash);
+        if(it_find != end()) {
+            return std::make_pair(it_find, false);
+        }
+        
+        return insert_impl(ibucket_for_hash, hash, std::piecewise_construct, 
+                                                   std::forward_as_tuple(std::forward<P>(key)), 
+                                                   std::forward_as_tuple(std::forward<Args>(args_value)...));
+    }
+    
+    template<typename P>
+    std::pair<iterator, bool> insert_impl(P&& value) {
+        const std::size_t hash = hash_key(KeySelect()(value));
+        const std::size_t ibucket_for_hash = bucket_for_hash(hash);
+        
+        // Check if already presents
+        auto it_find = find_impl(KeySelect()(value), hash, m_buckets.begin() + ibucket_for_hash);
+        if(it_find != end()) {
+            return std::make_pair(it_find, false);
+        }
+        
+        
+        return insert_impl(ibucket_for_hash, hash, std::forward<P>(value));
+    }
+    
+    template<typename... Args>
+    std::pair<iterator, bool> insert_impl(std::size_t ibucket_for_hash, std::size_t hash, Args&&... value_type_args) {
+        if((m_nb_elements - m_overflow_elements.size()) >= m_load_threshold) {
+            rehash(GrowthPolicy::next_bucket_count());
+            ibucket_for_hash = bucket_for_hash(hash);
+        }
+        
+        std::size_t ibucket_empty = find_empty_bucket(ibucket_for_hash);
+        if(ibucket_empty < m_buckets.size()) {
+            do {
+                tsl_assert(ibucket_empty >= ibucket_for_hash);
+                
+                // Empty bucket is in range of NeighborhoodSize, use it
+                if(ibucket_empty - ibucket_for_hash < NeighborhoodSize) {
+                    auto it = insert_in_bucket(ibucket_empty, ibucket_for_hash, 
+                                               hash, std::forward<Args>(value_type_args)...);
+                    return std::make_pair(iterator(it, m_buckets.end(), m_overflow_elements.begin()), true);
+                }
+            }
+            // else, try to swap values to get a closer empty bucket
+            while(swap_empty_bucket_closer(ibucket_empty));
+        }
+            
+        // Load factor is too low or a rehash will not change the neighborhood, put the value in overflow list
+        if(size() < m_min_load_factor_rehash_threshold || !will_neighborhood_change_on_rehash(ibucket_for_hash)) {
+            auto it_insert = insert_in_overflow(std::forward<Args>(value_type_args)...);
+            
+            m_buckets[ibucket_for_hash].set_overflow(true);
+            m_nb_elements++;
+            
+            return std::make_pair(iterator(m_buckets.end(), m_buckets.end(), it_insert), true);
+        }
+    
+        rehash(GrowthPolicy::next_bucket_count());
+        
+        ibucket_for_hash = bucket_for_hash(hash);
+        return insert_impl(ibucket_for_hash, hash, std::forward<Args>(value_type_args)...);
+    }    
+    
+    /*
+     * Return true if a rehash will change the position of a key-value in the neighborhood of 
+     * ibucket_neighborhood_check. In this case a rehash is needed instead of puting the value in overflow list.
+     */
+    bool will_neighborhood_change_on_rehash(size_t ibucket_neighborhood_check) const {
+        std::size_t expand_bucket_count = GrowthPolicy::next_bucket_count();
+        GrowthPolicy expand_growth_policy(expand_bucket_count);
+        
+        for(size_t ibucket = ibucket_neighborhood_check; 
+            ibucket < m_buckets.size() && (ibucket - ibucket_neighborhood_check) < NeighborhoodSize; 
+            ++ibucket)
+        {
+            tsl_assert(!m_buckets[ibucket].empty());
+            
+            const size_t hash = USE_STORED_HASH_ON_REHASH?
+                                    m_buckets[ibucket].truncated_bucket_hash():
+                                    hash_key(KeySelect()(m_buckets[ibucket].value()));
+            if(bucket_for_hash(hash) != expand_growth_policy.bucket_for_hash(hash)) {
+                return true;
+            }
+        }
+        
+        return false;
+    }
+    
+    /*
+     * Return the index of an empty bucket in m_buckets.
+     * If none, the returned index equals m_buckets.size()
+     */
+    std::size_t find_empty_bucket(std::size_t ibucket_start) const {
+        const std::size_t limit = std::min(ibucket_start + MAX_PROBES_FOR_EMPTY_BUCKET, m_buckets.size());
+        for(; ibucket_start < limit; ibucket_start++) {
+            if(m_buckets[ibucket_start].empty()) {
+                return ibucket_start;
+            }
+        }
+        
+        return m_buckets.size();
+    }
+    
+    /*
+     * Insert value in ibucket_empty where value originally belongs to ibucket_for_hash
+     * 
+     * Return bucket iterator to ibucket_empty
+     */
+    template<typename... Args>
+    iterator_buckets insert_in_bucket(std::size_t ibucket_empty, std::size_t ibucket_for_hash,
+                                      std::size_t hash, Args&&... value_type_args) 
+    {
+        tsl_assert(ibucket_empty >= ibucket_for_hash );
+        tsl_assert(m_buckets[ibucket_empty].empty());
+        m_buckets[ibucket_empty].set_value_of_empty_bucket(hash, std::forward<Args>(value_type_args)...);
+        
+        tsl_assert(!m_buckets[ibucket_for_hash].empty());
+        m_buckets[ibucket_for_hash].toggle_neighbor_presence(ibucket_empty - ibucket_for_hash);
+        m_nb_elements++;
+        
+        return m_buckets.begin() + ibucket_empty;
+    }
+    
+    /*
+     * Try to swap the bucket ibucket_empty_in_out with a bucket preceding it while keeping the neighborhood 
+     * conditions correct.
+     * 
+     * If a swap was possible, the position of ibucket_empty_in_out will be closer to 0 and true will re returned.
+     */
+    bool swap_empty_bucket_closer(std::size_t& ibucket_empty_in_out) {
+        tsl_assert(ibucket_empty_in_out >= NeighborhoodSize);
+        const std::size_t neighborhood_start = ibucket_empty_in_out - NeighborhoodSize + 1;
+        
+        for(std::size_t to_check = neighborhood_start; to_check < ibucket_empty_in_out; to_check++) {
+            neighborhood_bitmap neighborhood_infos = m_buckets[to_check].neighborhood_infos();
+            std::size_t to_swap = to_check;
+            
+            while(neighborhood_infos != 0 && to_swap < ibucket_empty_in_out) {
+                if((neighborhood_infos & 1) == 1) {
+                    tsl_assert(m_buckets[ibucket_empty_in_out].empty());
+                    tsl_assert(!m_buckets[to_swap].empty());
+                    
+                    m_buckets[to_swap].swap_value_into_empty_bucket(m_buckets[ibucket_empty_in_out]);
+                    
+                    tsl_assert(!m_buckets[to_check].check_neighbor_presence(ibucket_empty_in_out - to_check));
+                    tsl_assert(m_buckets[to_check].check_neighbor_presence(to_swap - to_check));
+                    
+                    m_buckets[to_check].toggle_neighbor_presence(ibucket_empty_in_out - to_check);
+                    m_buckets[to_check].toggle_neighbor_presence(to_swap - to_check);
+                    
+                    
+                    ibucket_empty_in_out = to_swap;
+                    
+                    return true;
+                }
+                
+                to_swap++;
+                neighborhood_infos = neighborhood_bitmap(neighborhood_infos >> 1);
+            }
+        }
+        
+        return false;
+    }
+    
+    
+    
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    typename U::value_type* find_value_impl(const K& key, std::size_t hash, iterator_buckets it_bucket) {
+        return const_cast<typename U::value_type*>(
+                    static_cast<const hopscotch_hash*>(this)->find_value_impl(key, hash, it_bucket));
+    }
+    
+    /*
+     * Avoid the creation of an iterator to just get the value for operator[] and at() in maps. Faster this way.
+     *
+     * Return null if no value for key (TODO use std::optional when available).
+     */
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    const typename U::value_type* find_value_impl(const K& key, std::size_t hash, 
+                                                  const_iterator_buckets it_bucket) const 
+    {
+        auto it_find = find_in_buckets(key, hash, it_bucket);
+        if(it_find != m_buckets.cend()) {
+            return std::addressof(ValueSelect()(it_find->value()));
+        }
+        
+        if(it_bucket->has_overflow()) {
+            auto it_overflow = find_in_overflow(key);
+            if(it_overflow != m_overflow_elements.end()) {
+                return std::addressof(ValueSelect()(*it_overflow));
+            }
+        }
+        
+        return nullptr;
+    }
+    
+    template<class K>
+    size_type count_impl(const K& key, std::size_t hash, const_iterator_buckets it_bucket) const {
+        if(find_in_buckets(key, hash, it_bucket) != m_buckets.cend()) {
+            return 1;
+        }
+        else if(it_bucket->has_overflow() && find_in_overflow(key) != m_overflow_elements.cend()) {
+            return 1;
+        }
+        else {
+            return 0;
+        }
+    }
+    
+    template<class K>
+    iterator find_impl(const K& key, std::size_t hash, iterator_buckets it_bucket) {
+        auto it = find_in_buckets(key, hash, it_bucket);
+        if(it != m_buckets.end()) {
+            return iterator(it, m_buckets.end(), m_overflow_elements.begin());
+        }
+        
+        if(!it_bucket->has_overflow()) {
+            return end();
+        }
+        
+        return iterator(m_buckets.end(), m_buckets.end(), find_in_overflow(key));
+    }
+    
+    template<class K>
+    const_iterator find_impl(const K& key, std::size_t hash, const_iterator_buckets it_bucket) const {
+        auto it = find_in_buckets(key, hash, it_bucket);
+        if(it != m_buckets.cend()) {
+            return const_iterator(it, m_buckets.cend(), m_overflow_elements.cbegin());
+        }
+        
+        if(!it_bucket->has_overflow()) {
+            return cend();
+        }
+
+        
+        return const_iterator(m_buckets.cend(), m_buckets.cend(), find_in_overflow(key));
+    }
+    
+    template<class K>
+    iterator_buckets find_in_buckets(const K& key, std::size_t hash, iterator_buckets it_bucket) {   
+        auto it_find = static_cast<const hopscotch_hash*>(this)->find_in_buckets(key, hash, it_bucket); 
+        return m_buckets.begin() + std::distance(m_buckets.cbegin(), it_find);
+    }
+
+    
+    template<class K>
+    const_iterator_buckets find_in_buckets(const K& key, std::size_t hash, const_iterator_buckets it_bucket) const {      
+        (void) hash; // Avoid warning of unused variable when StoreHash is false;
+
+        // TODO Try to optimize the function. 
+        // I tried to use ffs and  __builtin_ffs functions but I could not reduce the time the function
+        // takes with -march=native
+        
+        neighborhood_bitmap neighborhood_infos = it_bucket->neighborhood_infos();
+        while(neighborhood_infos != 0) {
+            if((neighborhood_infos & 1) == 1) {
+                // Check StoreHash before calling bucket_hash_equal. Functionally it doesn't change anythin. 
+                // If StoreHash is false, bucket_hash_equal is a no-op. Avoiding the call is there to help 
+                // GCC optimizes `hash` parameter away, it seems to not be able to do without this hint.
+                if((!StoreHash || it_bucket->bucket_hash_equal(hash)) && 
+                    compare_keys(KeySelect()(it_bucket->value()), key)) 
+                {
+                    return it_bucket;
+                }
+            }
+            
+            ++it_bucket;
+            neighborhood_infos = neighborhood_bitmap(neighborhood_infos >> 1);
+        }
+        
+        return m_buckets.end();
+    }
+    
+
+    
+    template<class K, class U = OverflowContainer, typename std::enable_if<!has_key_compare<U>::value>::type* = nullptr>
+    iterator_overflow find_in_overflow(const K& key) {
+        return std::find_if(m_overflow_elements.begin(), m_overflow_elements.end(), 
+                            [&](const value_type& value) { 
+                                return compare_keys(key, KeySelect()(value)); 
+                            });
+    }
+    
+    template<class K, class U = OverflowContainer, typename std::enable_if<!has_key_compare<U>::value>::type* = nullptr>
+    const_iterator_overflow find_in_overflow(const K& key) const {
+        return std::find_if(m_overflow_elements.cbegin(), m_overflow_elements.cend(), 
+                            [&](const value_type& value) { 
+                                return compare_keys(key, KeySelect()(value)); 
+                            });
+    }
+    
+    template<class K, class U = OverflowContainer, typename std::enable_if<has_key_compare<U>::value>::type* = nullptr>
+    iterator_overflow find_in_overflow(const K& key) {
+        return m_overflow_elements.find(key);
+    }
+    
+    template<class K, class U = OverflowContainer, typename std::enable_if<has_key_compare<U>::value>::type* = nullptr>
+    const_iterator_overflow find_in_overflow(const K& key) const {
+        return m_overflow_elements.find(key);
+    }
+    
+    
+    
+    template<class... Args, class U = OverflowContainer, typename std::enable_if<!has_key_compare<U>::value>::type* = nullptr>
+    iterator_overflow insert_in_overflow(Args&&... value_type_args) {
+        return m_overflow_elements.emplace(m_overflow_elements.end(), std::forward<Args>(value_type_args)...);
+    }
+    
+    template<class... Args, class U = OverflowContainer, typename std::enable_if<has_key_compare<U>::value>::type* = nullptr>
+    iterator_overflow insert_in_overflow(Args&&... value_type_args) {
+        return m_overflow_elements.emplace(std::forward<Args>(value_type_args)...).first;
+    }
+    
+    
+    
+    template<class U = OverflowContainer, typename std::enable_if<!has_key_compare<U>::value>::type* = nullptr>
+    hopscotch_hash new_hopscotch_hash(size_type bucket_count) {
+        return hopscotch_hash(bucket_count, static_cast<Hash&>(*this), static_cast<KeyEqual&>(*this), 
+                              get_allocator(), m_max_load_factor);
+    }
+    
+    template<class U = OverflowContainer, typename std::enable_if<has_key_compare<U>::value>::type* = nullptr>
+    hopscotch_hash new_hopscotch_hash(size_type bucket_count) {
+        return hopscotch_hash(bucket_count, static_cast<Hash&>(*this), static_cast<KeyEqual&>(*this), 
+                              get_allocator(), m_max_load_factor, m_overflow_elements.key_comp());
+    }
+    
+public:    
+    static const size_type DEFAULT_INIT_BUCKETS_SIZE = 16;
+    static constexpr float DEFAULT_MAX_LOAD_FACTOR = (NeighborhoodSize <= 30)?0.8f:0.9f;
+    
+private:    
+    static const std::size_t MAX_PROBES_FOR_EMPTY_BUCKET = 12*NeighborhoodSize;
+    static constexpr float MIN_LOAD_FACTOR_FOR_REHASH = 0.1f;
+    
+    static const bool USE_STORED_HASH_ON_REHASH = 
+                StoreHash && std::is_same<GrowthPolicy, tsl::power_of_two_growth_policy>::value;
+    
+private:    
+    buckets_container_type m_buckets;
+    overflow_container_type m_overflow_elements;
+    
+    size_type m_nb_elements;
+    
+    float m_max_load_factor;
+    size_type m_load_threshold;
+    size_type m_min_load_factor_rehash_threshold;
+};
+
+} // end namespace detail_hopscotch_hash
+
+
+} // end namespace tsl
+
+#endif
diff --git a/dbms/src/Common/tests/hopscotch-map/src/hopscotch_map.h b/dbms/src/Common/tests/hopscotch-map/src/hopscotch_map.h
new file mode 100644
index 00000000000..e7b9c2c3d3f
--- /dev/null
+++ b/dbms/src/Common/tests/hopscotch-map/src/hopscotch_map.h
@@ -0,0 +1,666 @@
+/**
+ * MIT License
+ * 
+ * Copyright (c) 2017 Tessil
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef TSL_HOPSCOTCH_MAP_H
+#define TSL_HOPSCOTCH_MAP_H
+
+
+#include <algorithm>
+#include <cstddef>
+#include <functional>
+#include <initializer_list>
+#include <list>
+#include <memory>
+#include <type_traits>
+#include <utility>
+#include "hopscotch_hash.h"
+
+
+namespace tsl {
+
+/**
+ * Implementation of a hash map using the hopscotch hashing algorithm.
+ * 
+ * The Key and the value T must be either nothrow move-constructible, copy-constuctible or both.
+ * 
+ * The size of the neighborhood (NeighborhoodSize) must be > 0 and <= 62 if StoreHash is false.
+ * When StoreHash is true, 32-bits of the hash will be stored alongside the neighborhood limiting
+ * the NeighborhoodSize to <= 30. There is no memory usage difference between 
+ * 'NeighborhoodSize 62; StoreHash false' and 'NeighborhoodSize 30; StoreHash true'.
+ * 
+ * Storing the hash may improve performance on insert during the rehash process if the hash takes time
+ * to compute. It may also improve read performance if the KeyEqual function takes time (or incurs a cache-miss).
+ * If used with simple Hash and KeyEqual it may slow things down.
+ * 
+ * StoreHash can only be set if the GrowthPolicy is set to tsl::power_of_two_growth_policy.
+ * 
+ * GrowthPolicy defines how the map grows and consequently how a hash value is mapped to a bucket. 
+ * By default the map uses tsl::power_of_two_growth_policy. This policy keeps the number of buckets 
+ * to a power of two and uses a mask to map the hash to a bucket instead of the slow modulo.
+ * You may define your own growth policy, check tsl::power_of_two_growth_policy for the interface.
+ * 
+ * If the destructors of Key or T throw an exception, behaviour of the class is undefined.
+ * 
+ * Iterators invalidation:
+ *  - clear, operator=, reserve, rehash: always invalidate the iterators.
+ *  - insert, emplace, emplace_hint, operator[]: if there is an effective insert, invalidate the iterators 
+ *    if a displacement is needed to resolve a collision (which mean that most of the time, 
+ *    insert will invalidate the iterators). Or if there is a rehash.
+ *  - erase: iterator on the erased element is the only one which become invalid.
+ */
+template<class Key, 
+         class T, 
+         class Hash = std::hash<Key>,
+         class KeyEqual = std::equal_to<Key>,
+         class Allocator = std::allocator<std::pair<Key, T>>,
+         unsigned int NeighborhoodSize = 62,
+         bool StoreHash = false,
+         class GrowthPolicy = tsl::power_of_two_growth_policy>
+class hopscotch_map {
+private:    
+    template<typename U>
+    using has_is_transparent = tsl::detail_hopscotch_hash::has_is_transparent<U>;
+    
+    class KeySelect {
+    public:
+        using key_type = Key;
+        
+        const key_type& operator()(const std::pair<Key, T>& key_value) const {
+            return key_value.first;
+        }
+        
+        key_type& operator()(std::pair<Key, T>& key_value) {
+            return key_value.first;
+        }
+    };  
+    
+    class ValueSelect {
+    public:
+        using value_type = T;
+        
+        const value_type& operator()(const std::pair<Key, T>& key_value) const {
+            return key_value.second;
+        }
+        
+        value_type& operator()(std::pair<Key, T>& key_value) {
+            return key_value.second;
+        }
+    };
+    
+    
+    using overflow_container_type = std::list<std::pair<Key, T>, Allocator>;
+    using ht = detail_hopscotch_hash::hopscotch_hash<std::pair<Key, T>, KeySelect, ValueSelect,
+                                                     Hash, KeyEqual, 
+                                                     Allocator, NeighborhoodSize, 
+                                                     StoreHash, GrowthPolicy,
+                                                     overflow_container_type>;
+    
+public:
+    using key_type = typename ht::key_type;
+    using mapped_type = T;
+    using value_type = typename ht::value_type;
+    using size_type = typename ht::size_type;
+    using difference_type = typename ht::difference_type;
+    using hasher = typename ht::hasher;
+    using key_equal = typename ht::key_equal;
+    using allocator_type = typename ht::allocator_type;
+    using reference = typename ht::reference;
+    using const_reference = typename ht::const_reference;
+    using pointer = typename ht::pointer;
+    using const_pointer = typename ht::const_pointer;
+    using iterator = typename ht::iterator;
+    using const_iterator = typename ht::const_iterator;
+    
+    
+    
+    /*
+     * Constructors
+     */
+    hopscotch_map() : hopscotch_map(ht::DEFAULT_INIT_BUCKETS_SIZE) {
+    }
+    
+    explicit hopscotch_map(size_type bucket_count, 
+                        const Hash& hash = Hash(),
+                        const KeyEqual& equal = KeyEqual(),
+                        const Allocator& alloc = Allocator()) : 
+                        m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR)
+    {
+    }
+    
+    hopscotch_map(size_type bucket_count,
+                  const Allocator& alloc) : hopscotch_map(bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+    
+    hopscotch_map(size_type bucket_count,
+                  const Hash& hash,
+                  const Allocator& alloc) : hopscotch_map(bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+    
+    explicit hopscotch_map(const Allocator& alloc) : hopscotch_map(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) {
+    }
+    
+    template<class InputIt>
+    hopscotch_map(InputIt first, InputIt last,
+                size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
+                const Hash& hash = Hash(),
+                const KeyEqual& equal = KeyEqual(),
+                const Allocator& alloc = Allocator()) : hopscotch_map(bucket_count, hash, equal, alloc)
+    {
+        insert(first, last);
+    }
+    
+    template<class InputIt>
+    hopscotch_map(InputIt first, InputIt last,
+                size_type bucket_count,
+                const Allocator& alloc) : hopscotch_map(first, last, bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+    
+    template<class InputIt>
+    hopscotch_map(InputIt first, InputIt last,
+                size_type bucket_count,
+                const Hash& hash,
+                const Allocator& alloc) : hopscotch_map(first, last, bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+
+    hopscotch_map(std::initializer_list<value_type> init,
+                    size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
+                    const Hash& hash = Hash(),
+                    const KeyEqual& equal = KeyEqual(),
+                    const Allocator& alloc = Allocator()) : 
+                    hopscotch_map(init.begin(), init.end(), bucket_count, hash, equal, alloc)
+    {
+    }
+
+    hopscotch_map(std::initializer_list<value_type> init,
+                    size_type bucket_count,
+                    const Allocator& alloc) : 
+                    hopscotch_map(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+
+    hopscotch_map(std::initializer_list<value_type> init,
+                    size_type bucket_count,
+                    const Hash& hash,
+                    const Allocator& alloc) : 
+                    hopscotch_map(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+
+    
+    hopscotch_map& operator=(std::initializer_list<value_type> ilist) {
+        m_ht.clear();
+        
+        m_ht.reserve(ilist.size());
+        m_ht.insert(ilist.begin(), ilist.end());
+        
+        return *this;
+    }
+    
+    allocator_type get_allocator() const { return m_ht.get_allocator(); }
+    
+    
+    /*
+     * Iterators
+     */
+    iterator begin() noexcept { return m_ht.begin(); }
+    const_iterator begin() const noexcept { return m_ht.begin(); }
+    const_iterator cbegin() const noexcept { return m_ht.cbegin(); }
+    
+    iterator end() noexcept { return m_ht.end(); }
+    const_iterator end() const noexcept { return m_ht.end(); }
+    const_iterator cend() const noexcept { return m_ht.cend(); }
+    
+    
+    /*
+     * Capacity
+     */
+    bool empty() const noexcept { return m_ht.empty(); }
+    size_type size() const noexcept { return m_ht.size(); }
+    size_type max_size() const noexcept { return m_ht.max_size(); }
+    
+    /*
+     * Modifiers
+     */
+    void clear() noexcept { m_ht.clear(); }
+    
+    
+    
+    
+    std::pair<iterator, bool> insert(const value_type& value) { 
+        return m_ht.insert(value); 
+    }
+        
+    template<class P, typename std::enable_if<std::is_constructible<value_type, P&&>::value>::type* = nullptr>
+    std::pair<iterator, bool> insert(P&& value) { 
+        return m_ht.insert(std::forward<P>(value)); 
+    }
+    
+    std::pair<iterator, bool> insert(value_type&& value) { 
+        return m_ht.insert(std::move(value)); 
+    }
+    
+    
+    iterator insert(const_iterator hint, const value_type& value) { 
+        return m_ht.insert(hint, value); 
+    }
+        
+    template<class P, typename std::enable_if<std::is_constructible<value_type, P&&>::value>::type* = nullptr>
+    iterator insert(const_iterator hint, P&& value) { 
+        return m_ht.insert(hint, std::forward<P>(value));
+    }
+    
+    iterator insert(const_iterator hint, value_type&& value) { 
+        return m_ht.insert(hint, std::move(value)); 
+    }
+    
+    
+    template<class InputIt>
+    void insert(InputIt first, InputIt last) { 
+        m_ht.insert(first, last); 
+    }
+    
+    void insert(std::initializer_list<value_type> ilist) { 
+        m_ht.insert(ilist.begin(), ilist.end()); 
+    }
+
+    
+    
+    
+    template<class M>
+    std::pair<iterator, bool> insert_or_assign(const key_type& k, M&& obj) { 
+        return m_ht.insert_or_assign(k, std::forward<M>(obj)); 
+    }
+
+    template<class M>
+    std::pair<iterator, bool> insert_or_assign(key_type&& k, M&& obj) { 
+        return m_ht.insert_or_assign(std::move(k), std::forward<M>(obj)); 
+    }
+    
+    template<class M>
+    iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj) {
+        return m_ht.insert_or_assign(hint, k, std::forward<M>(obj));
+    }
+    
+    template<class M>
+    iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj) {
+        return m_ht.insert_or_assign(hint, std::move(k), std::forward<M>(obj));
+    }
+    
+    
+    
+    
+    /**
+     * Due to the way elements are stored, emplace will need to move or copy the key-value once.
+     * The method is equivalent to insert(value_type(std::forward<Args>(args)...));
+     * 
+     * Mainly here for compatibility with the std::unordered_map interface.
+     */
+    template<class... Args>
+    std::pair<iterator, bool> emplace(Args&&... args) { 
+        return m_ht.emplace(std::forward<Args>(args)...); 
+    }
+    
+    
+    
+    
+    /**
+     * Due to the way elements are stored, emplace_hint will need to move or copy the key-value once.
+     * The method is equivalent to insert(hint, value_type(std::forward<Args>(args)...));
+     * 
+     * Mainly here for compatibility with the std::unordered_map interface.
+     */
+    template<class... Args>
+    iterator emplace_hint(const_iterator hint, Args&&... args) {
+        return m_ht.emplace_hint(hint, std::forward<Args>(args)...);
+    }
+    
+    
+    
+    
+    template<class... Args>
+    std::pair<iterator, bool> try_emplace(const key_type& k, Args&&... args) { 
+        return m_ht.try_emplace(k, std::forward<Args>(args)...);
+    }
+    
+    template<class... Args>
+    std::pair<iterator, bool> try_emplace(key_type&& k, Args&&... args) {
+        return m_ht.try_emplace(std::move(k), std::forward<Args>(args)...);
+    }
+    
+    template<class... Args>
+    iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args) {
+        return m_ht.try_emplace(hint, k, std::forward<Args>(args)...);
+    }
+    
+    template<class... Args>
+    iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args) {
+        return m_ht.try_emplace(hint, std::move(k), std::forward<Args>(args)...);
+    }
+    
+    
+
+    
+    iterator erase(iterator pos) { return m_ht.erase(pos); }
+    iterator erase(const_iterator pos) { return m_ht.erase(pos); }
+    iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); }
+    size_type erase(const key_type& key) { return m_ht.erase(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash.
+     */    
+    size_type erase(const key_type& key, std::size_t precalculated_hash) { 
+        return m_ht.erase(key, precalculated_hash); 
+    }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    size_type erase(const K& key) { return m_ht.erase(key); }
+    
+    /**
+     * @copydoc erase(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash.
+     */    
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    size_type erase(const K& key, std::size_t precalculated_hash) { 
+        return m_ht.erase(key, precalculated_hash); 
+    }
+    
+    
+    
+    
+    void swap(hopscotch_map& other) { other.m_ht.swap(m_ht); }
+    
+    /*
+     * Lookup
+     */
+    T& at(const Key& key) { return m_ht.at(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    T& at(const Key& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); }
+    
+    
+    const T& at(const Key& key) const { return m_ht.at(key); }
+    
+    /**
+     * @copydoc at(const Key& key, std::size_t precalculated_hash)
+     */
+    const T& at(const Key& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); }
+    
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    T& at(const K& key) { return m_ht.at(key); }
+
+    /**
+     * @copydoc at(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */    
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    T& at(const K& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); }
+    
+    
+    /**
+     * @copydoc at(const K& key)
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    const T& at(const K& key) const { return m_ht.at(key); }
+    
+    /**
+     * @copydoc at(const K& key, std::size_t precalculated_hash)
+     */    
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    const T& at(const K& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); }
+    
+    
+    
+    
+    T& operator[](const Key& key) { return m_ht[key]; }    
+    T& operator[](Key&& key) { return m_ht[std::move(key)]; }
+    
+    
+    
+    
+    size_type count(const Key& key) const { return m_ht.count(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    size_type count(const Key& key, std::size_t precalculated_hash) const { 
+        return m_ht.count(key, precalculated_hash); 
+    }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    size_type count(const K& key) const { return m_ht.count(key); }
+    
+    /**
+     * @copydoc count(const K& key) const
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */     
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    size_type count(const K& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); }
+    
+    
+    
+    
+    iterator find(const Key& key) { return m_ht.find(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    iterator find(const Key& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); }
+    
+    const_iterator find(const Key& key) const { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const Key& key, std::size_t precalculated_hash)
+     */
+    const_iterator find(const Key& key, std::size_t precalculated_hash) const { 
+        return m_ht.find(key, precalculated_hash);
+    }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    iterator find(const K& key) { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    iterator find(const K& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); }
+    
+    /**
+     * @copydoc find(const K& key)
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    const_iterator find(const K& key) const { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    const_iterator find(const K& key, std::size_t precalculated_hash) const { 
+        return m_ht.find(key, precalculated_hash); 
+    }
+    
+    
+    
+    
+    std::pair<iterator, iterator> equal_range(const Key& key) { return m_ht.equal_range(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    std::pair<iterator, iterator> equal_range(const Key& key, std::size_t precalculated_hash) { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    std::pair<const_iterator, const_iterator> equal_range(const Key& key) const { return m_ht.equal_range(key); }
+    
+    /**
+     * @copydoc equal_range(const Key& key, std::size_t precalculated_hash)
+     */
+    std::pair<const_iterator, const_iterator> equal_range(const Key& key, std::size_t precalculated_hash) const { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    std::pair<iterator, iterator> equal_range(const K& key) { return m_ht.equal_range(key); }
+    
+    
+    /**
+     * @copydoc equal_range(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    std::pair<iterator, iterator> equal_range(const K& key, std::size_t precalculated_hash) { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    /**
+     * @copydoc equal_range(const K& key)
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    std::pair<const_iterator, const_iterator> equal_range(const K& key) const { return m_ht.equal_range(key); }
+    
+    /**
+     * @copydoc equal_range(const K& key, std::size_t precalculated_hash)
+     */    
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    std::pair<const_iterator, const_iterator> equal_range(const K& key, std::size_t precalculated_hash) const { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    
+    
+    
+    /*
+     * Bucket interface 
+     */
+    size_type bucket_count() const { return m_ht.bucket_count(); }
+    size_type max_bucket_count() const { return m_ht.max_bucket_count(); }
+    
+    
+    /*
+     *  Hash policy 
+     */
+    float load_factor() const { return m_ht.load_factor(); }
+    float max_load_factor() const { return m_ht.max_load_factor(); }
+    void max_load_factor(float ml) { m_ht.max_load_factor(ml); }
+    
+    void rehash(size_type count) { m_ht.rehash(count); }
+    void reserve(size_type count) { m_ht.reserve(count); }
+    
+    
+    /*
+     * Observers
+     */
+    hasher hash_function() const { return m_ht.hash_function(); }
+    key_equal key_eq() const { return m_ht.key_eq(); }
+    
+    /*
+     * Other
+     */
+    
+    /**
+     * Convert a const_iterator to an iterator.
+     */
+    iterator mutable_iterator(const_iterator pos) {
+        return m_ht.mutable_iterator(pos);
+    }
+    
+    size_type overflow_size() const noexcept { return m_ht.overflow_size(); }
+    
+    friend bool operator==(const hopscotch_map& lhs, const hopscotch_map& rhs) {
+        if(lhs.size() != rhs.size()) {
+            return false;
+        }
+        
+        for(const auto& element_lhs : lhs) {
+            const auto it_element_rhs = rhs.find(element_lhs.first);
+            if(it_element_rhs == rhs.cend() || element_lhs.second != it_element_rhs->second) {
+                return false;
+            }
+        }
+        
+        return true;
+    }
+
+    friend bool operator!=(const hopscotch_map& lhs, const hopscotch_map& rhs) {
+        return !operator==(lhs, rhs);
+    }
+
+    friend void swap(hopscotch_map& lhs, hopscotch_map& rhs) {
+        lhs.swap(rhs);
+    }
+
+
+    
+private:
+    ht m_ht;
+};
+
+} // end namespace tsl
+
+#endif
diff --git a/dbms/src/Common/tests/hopscotch-map/src/hopscotch_sc_map.h b/dbms/src/Common/tests/hopscotch-map/src/hopscotch_sc_map.h
new file mode 100644
index 00000000000..6af85f3867c
--- /dev/null
+++ b/dbms/src/Common/tests/hopscotch-map/src/hopscotch_sc_map.h
@@ -0,0 +1,663 @@
+/**
+ * MIT License
+ * 
+ * Copyright (c) 2017 Tessil
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef TSL_HOPSCOTCH_SC_MAP_H
+#define TSL_HOPSCOTCH_SC_MAP_H 
+
+
+#include <algorithm>
+#include <cstddef>
+#include <functional>
+#include <initializer_list>
+#include <map>
+#include <memory>
+#include <type_traits>
+#include <utility>
+#include "hopscotch_hash.h"
+
+
+namespace tsl {
+
+    
+/**
+ * Similar to tsl::hopscotch_map but instead of using a list for overflowing elements it uses
+ * a binary search tree. It thus needs an additional template parameter Compare. Compare should
+ * be arithmetically coherent with KeyEqual.
+ * 
+ * The binary search tree allows the map to have a worst-case scenario of O(log n) for search 
+ * and delete, even if the hash function maps all the elements to the same bucket. 
+ * For insert, the amortized worst case is O(log n), but the worst case is O(n) in case of rehash.
+ * 
+ * This makes the map resistant to DoS attacks (but doesn't preclude you to have a good hash function,
+ * as an element in the bucket array is faster to retrieve than in the tree).
+ * 
+ * @copydoc hopscotch_map
+ */
+template<class Key, 
+         class T, 
+         class Hash = std::hash<Key>,
+         class KeyEqual = std::equal_to<Key>,
+         class Compare = std::less<Key>,
+         class Allocator = std::allocator<std::pair<const Key, T>>,
+         unsigned int NeighborhoodSize = 62,
+         bool StoreHash = false,
+         class GrowthPolicy = tsl::power_of_two_growth_policy>
+class hopscotch_sc_map {
+private:
+    template<typename U>
+    using has_is_transparent = tsl::detail_hopscotch_hash::has_is_transparent<U>;
+    
+    class KeySelect {
+    public:
+        using key_type = Key;
+        
+        const key_type& operator()(const std::pair<const Key, T>& key_value) const {
+            return key_value.first;
+        }
+        
+        const key_type& operator()(std::pair<const Key, T>& key_value) {
+            return key_value.first;
+        }
+    };  
+    
+    class ValueSelect {
+    public:
+        using value_type = T;
+        
+        const value_type& operator()(const std::pair<const Key, T>& key_value) const {
+            return key_value.second;
+        }
+        
+        value_type& operator()(std::pair<Key, T>& key_value) {
+            return key_value.second;
+        }
+    };
+    
+    
+    // TODO Not optimal as we have to use std::pair<const Key, T> as ValueType which forbid 
+    // us to move the key in the bucket array, we have to use copy. Optimize.
+    using overflow_container_type = std::map<Key, T, Compare, Allocator>;
+    using ht = detail_hopscotch_hash::hopscotch_hash<std::pair<const Key, T>, KeySelect, ValueSelect,
+                                                     Hash, KeyEqual, 
+                                                     Allocator, NeighborhoodSize, 
+                                                     StoreHash, GrowthPolicy,
+                                                     overflow_container_type>;
+    
+public:
+    using key_type = typename ht::key_type;
+    using mapped_type = T;
+    using value_type = typename ht::value_type;
+    using size_type = typename ht::size_type;
+    using difference_type = typename ht::difference_type;
+    using hasher = typename ht::hasher;
+    using key_equal = typename ht::key_equal;
+    using key_compare = Compare;
+    using allocator_type = typename ht::allocator_type;
+    using reference = typename ht::reference;
+    using const_reference = typename ht::const_reference;
+    using pointer = typename ht::pointer;
+    using const_pointer = typename ht::const_pointer;
+    using iterator = typename ht::iterator;
+    using const_iterator = typename ht::const_iterator;
+    
+    
+    /*
+     * Constructors
+     */
+    hopscotch_sc_map() : hopscotch_sc_map(ht::DEFAULT_INIT_BUCKETS_SIZE) {
+    }
+    
+    explicit hopscotch_sc_map(size_type bucket_count, 
+                        const Hash& hash = Hash(),
+                        const KeyEqual& equal = KeyEqual(),
+                        const Allocator& alloc = Allocator(),
+                        const Compare& comp = Compare()) : 
+                        m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR, comp)
+    {
+    }
+    
+    hopscotch_sc_map(size_type bucket_count,
+                  const Allocator& alloc) : hopscotch_sc_map(bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+    
+    hopscotch_sc_map(size_type bucket_count,
+                  const Hash& hash,
+                  const Allocator& alloc) : hopscotch_sc_map(bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+    
+    explicit hopscotch_sc_map(const Allocator& alloc) : hopscotch_sc_map(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) {
+    }
+    
+    template<class InputIt>
+    hopscotch_sc_map(InputIt first, InputIt last,
+                size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
+                const Hash& hash = Hash(),
+                const KeyEqual& equal = KeyEqual(),
+                const Allocator& alloc = Allocator()) : hopscotch_sc_map(bucket_count, hash, equal, alloc)
+    {
+        insert(first, last);
+    }
+    
+    template<class InputIt>
+    hopscotch_sc_map(InputIt first, InputIt last,
+                size_type bucket_count,
+                const Allocator& alloc) : hopscotch_sc_map(first, last, bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+    
+    template<class InputIt>
+    hopscotch_sc_map(InputIt first, InputIt last,
+                size_type bucket_count,
+                const Hash& hash,
+                const Allocator& alloc) : hopscotch_sc_map(first, last, bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+
+    hopscotch_sc_map(std::initializer_list<value_type> init,
+                    size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
+                    const Hash& hash = Hash(),
+                    const KeyEqual& equal = KeyEqual(),
+                    const Allocator& alloc = Allocator()) : 
+                    hopscotch_sc_map(init.begin(), init.end(), bucket_count, hash, equal, alloc)
+    {
+    }
+
+    hopscotch_sc_map(std::initializer_list<value_type> init,
+                    size_type bucket_count,
+                    const Allocator& alloc) : 
+                    hopscotch_sc_map(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+
+    hopscotch_sc_map(std::initializer_list<value_type> init,
+                    size_type bucket_count,
+                    const Hash& hash,
+                    const Allocator& alloc) : 
+                    hopscotch_sc_map(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+
+    
+    hopscotch_sc_map& operator=(std::initializer_list<value_type> ilist) {
+        m_ht.clear();
+        
+        m_ht.reserve(ilist.size());
+        m_ht.insert(ilist.begin(), ilist.end());
+        
+        return *this;
+    }
+    
+    allocator_type get_allocator() const { return m_ht.get_allocator(); }
+    
+    
+    /*
+     * Iterators
+     */
+    iterator begin() noexcept { return m_ht.begin(); }
+    const_iterator begin() const noexcept { return m_ht.begin(); }
+    const_iterator cbegin() const noexcept { return m_ht.cbegin(); }
+    
+    iterator end() noexcept { return m_ht.end(); }
+    const_iterator end() const noexcept { return m_ht.end(); }
+    const_iterator cend() const noexcept { return m_ht.cend(); }
+    
+    
+    /*
+     * Capacity
+     */
+    bool empty() const noexcept { return m_ht.empty(); }
+    size_type size() const noexcept { return m_ht.size(); }
+    size_type max_size() const noexcept { return m_ht.max_size(); }
+    
+    /*
+     * Modifiers
+     */
+    void clear() noexcept { m_ht.clear(); }
+    
+    
+    
+    
+    std::pair<iterator, bool> insert(const value_type& value) { 
+        return m_ht.insert(value); 
+    }
+        
+    template<class P, typename std::enable_if<std::is_constructible<value_type, P&&>::value>::type* = nullptr>
+    std::pair<iterator, bool> insert(P&& value) { 
+        return m_ht.insert(std::forward<P>(value)); 
+    }
+    
+    std::pair<iterator, bool> insert(value_type&& value) { 
+        return m_ht.insert(std::move(value)); 
+    }
+    
+    
+    iterator insert(const_iterator hint, const value_type& value) { 
+        return m_ht.insert(hint, value); 
+    }
+        
+    template<class P, typename std::enable_if<std::is_constructible<value_type, P&&>::value>::type* = nullptr>
+    iterator insert(const_iterator hint, P&& value) { 
+        return m_ht.insert(hint, std::forward<P>(value));
+    }
+    
+    iterator insert(const_iterator hint, value_type&& value) { 
+        return m_ht.insert(hint, std::move(value)); 
+    }
+    
+    
+    template<class InputIt>
+    void insert(InputIt first, InputIt last) { 
+        m_ht.insert(first, last); 
+    }
+    
+    void insert(std::initializer_list<value_type> ilist) { 
+        m_ht.insert(ilist.begin(), ilist.end()); 
+    }
+
+    
+    
+    
+    template<class M>
+    std::pair<iterator, bool> insert_or_assign(const key_type& k, M&& obj) { 
+        return m_ht.insert_or_assign(k, std::forward<M>(obj)); 
+    }
+
+    template<class M>
+    std::pair<iterator, bool> insert_or_assign(key_type&& k, M&& obj) { 
+        return m_ht.insert_or_assign(std::move(k), std::forward<M>(obj)); 
+    }
+    
+    template<class M>
+    iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj) {
+        return m_ht.insert_or_assign(hint, k, std::forward<M>(obj));
+    }
+    
+    template<class M>
+    iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj) {
+        return m_ht.insert_or_assign(hint, std::move(k), std::forward<M>(obj));
+    }
+    
+    
+    
+    /**
+     * Due to the way elements are stored, emplace will need to move or copy the key-value once.
+     * The method is equivalent to insert(value_type(std::forward<Args>(args)...));
+     * 
+     * Mainly here for compatibility with the std::unordered_map interface.
+     */
+    template<class... Args>
+    std::pair<iterator, bool> emplace(Args&&... args) { 
+        return m_ht.emplace(std::forward<Args>(args)...); 
+    }
+    
+    
+    
+    
+    /**
+     * Due to the way elements are stored, emplace_hint will need to move or copy the key-value once.
+     * The method is equivalent to insert(hint, value_type(std::forward<Args>(args)...));
+     * 
+     * Mainly here for compatibility with the std::unordered_map interface.
+     */
+    template<class... Args>
+    iterator emplace_hint(const_iterator hint, Args&&... args) {
+        return m_ht.emplace_hint(hint, std::forward<Args>(args)...);
+    }
+    
+    
+    
+    
+    template<class... Args>
+    std::pair<iterator, bool> try_emplace(const key_type& k, Args&&... args) { 
+        return m_ht.try_emplace(k, std::forward<Args>(args)...);
+    }
+    
+    template<class... Args>
+    std::pair<iterator, bool> try_emplace(key_type&& k, Args&&... args) {
+        return m_ht.try_emplace(std::move(k), std::forward<Args>(args)...);
+    }
+    
+    template<class... Args>
+    iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args) {
+        return m_ht.try_emplace(hint, k, std::forward<Args>(args)...);
+    }
+    
+    template<class... Args>
+    iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args) {
+        return m_ht.try_emplace(hint, std::move(k), std::forward<Args>(args)...);
+    }
+    
+    
+
+    
+    iterator erase(iterator pos) { return m_ht.erase(pos); }
+    iterator erase(const_iterator pos) { return m_ht.erase(pos); }
+    iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); }
+    size_type erase(const key_type& key) { return m_ht.erase(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash.
+     */    
+    size_type erase(const key_type& key, std::size_t precalculated_hash) { 
+        return m_ht.erase(key, precalculated_hash); 
+    }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent 
+     * and Compare::is_transparent exist. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    size_type erase(const K& key) { return m_ht.erase(key); }
+    
+    /**
+     * @copydoc erase(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    size_type erase(const K& key, std::size_t precalculated_hash) { return m_ht.erase(key, precalculated_hash); }
+    
+    
+    
+    
+    void swap(hopscotch_sc_map& other) { other.m_ht.swap(m_ht); }
+    
+    /*
+     * Lookup
+     */
+    T& at(const Key& key) { return m_ht.at(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */    
+    T& at(const Key& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); }
+    
+    const T& at(const Key& key) const { return m_ht.at(key); }
+    
+    /**
+     * @copydoc at(const Key& key, std::size_t precalculated_hash)
+     */    
+    const T& at(const Key& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent 
+     * and Compare::is_transparent exist. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    T& at(const K& key) { return m_ht.at(key); }
+    
+    /**
+     * @copydoc at(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */    
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    T& at(const K& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); }
+    
+    /**
+     * @copydoc at(const K& key)
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    const T& at(const K& key) const { return m_ht.at(key); }
+    
+    /**
+     * @copydoc at(const K& key, std::size_t precalculated_hash)
+     */    
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    const T& at(const K& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); }
+    
+    
+    
+    
+    T& operator[](const Key& key) { return m_ht[key]; }    
+    T& operator[](Key&& key) { return m_ht[std::move(key)]; }
+    
+    
+    
+    
+    size_type count(const Key& key) const { return m_ht.count(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    size_type count(const Key& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent 
+     * and Compare::is_transparent exist. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    size_type count(const K& key) const { return m_ht.count(key); }
+    
+    /**
+     * @copydoc count(const K& key) const
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */     
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    size_type count(const K& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); }
+    
+    
+    
+    
+    iterator find(const Key& key) { return m_ht.find(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    iterator find(const Key& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); }
+    
+    const_iterator find(const Key& key) const { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const Key& key, std::size_t precalculated_hash)
+     */
+    const_iterator find(const Key& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent 
+     * and Compare::is_transparent exist. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    iterator find(const K& key) { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    iterator find(const K& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); }
+    
+    /**
+     * @copydoc find(const K& key)
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    const_iterator find(const K& key) const { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const K& key, std::size_t precalculated_hash)
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    const_iterator find(const K& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); }
+    
+    
+    
+    
+    std::pair<iterator, iterator> equal_range(const Key& key) { return m_ht.equal_range(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    std::pair<iterator, iterator> equal_range(const Key& key, std::size_t precalculated_hash) { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    std::pair<const_iterator, const_iterator> equal_range(const Key& key) const { return m_ht.equal_range(key); }
+    
+    /**
+     * @copydoc equal_range(const Key& key, std::size_t precalculated_hash)
+     */
+    std::pair<const_iterator, const_iterator> equal_range(const Key& key, std::size_t precalculated_hash) const { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent 
+     * and Compare::is_transparent exist. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    std::pair<iterator, iterator> equal_range(const K& key) { return m_ht.equal_range(key); }
+    
+    /**
+     * @copydoc equal_range(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */    
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    std::pair<iterator, iterator> equal_range(const K& key, std::size_t precalculated_hash) { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    /**
+     * @copydoc equal_range(const K& key)
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    std::pair<const_iterator, const_iterator> equal_range(const K& key) const { return m_ht.equal_range(key); }
+    
+    /**
+     * @copydoc equal_range(const K& key, std::size_t precalculated_hash)
+     */    
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    std::pair<const_iterator, const_iterator> equal_range(const K& key, std::size_t precalculated_hash) const { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    
+    
+    
+    /*
+     * Bucket interface 
+     */
+    size_type bucket_count() const { return m_ht.bucket_count(); }
+    size_type max_bucket_count() const { return m_ht.max_bucket_count(); }
+    
+    
+    /*
+     *  Hash policy 
+     */
+    float load_factor() const { return m_ht.load_factor(); }
+    float max_load_factor() const { return m_ht.max_load_factor(); }
+    void max_load_factor(float ml) { m_ht.max_load_factor(ml); }
+    
+    void rehash(size_type count) { m_ht.rehash(count); }
+    void reserve(size_type count) { m_ht.reserve(count); }
+    
+    
+    /*
+     * Observers
+     */
+    hasher hash_function() const { return m_ht.hash_function(); }
+    key_equal key_eq() const { return m_ht.key_eq(); }
+    key_compare key_comp() const { return m_ht.key_comp(); }
+    
+    /*
+     * Other
+     */
+    
+    /**
+     * Convert a const_iterator to an iterator.
+     */
+    iterator mutable_iterator(const_iterator pos) {
+        return m_ht.mutable_iterator(pos);
+    }
+    
+    size_type overflow_size() const noexcept { return m_ht.overflow_size(); }
+    
+    friend bool operator==(const hopscotch_sc_map& lhs, const hopscotch_sc_map& rhs) {
+        if(lhs.size() != rhs.size()) {
+            return false;
+        }
+        
+        for(const auto& element_lhs : lhs) {
+            const auto it_element_rhs = rhs.find(element_lhs.first);
+            if(it_element_rhs == rhs.cend() || element_lhs.second != it_element_rhs->second) {
+                return false;
+            }
+        }
+        
+        return true;
+    }
+
+    friend bool operator!=(const hopscotch_sc_map& lhs, const hopscotch_sc_map& rhs) {
+        return !operator==(lhs, rhs);
+    }
+
+    friend void swap(hopscotch_sc_map& lhs, hopscotch_sc_map& rhs) {
+        lhs.swap(rhs);
+    }
+
+
+    
+private:
+    ht m_ht;
+};
+  
+
+} // end namespace tsl
+
+
+#endif
diff --git a/dbms/src/Common/tests/hopscotch-map/src/hopscotch_sc_set.h b/dbms/src/Common/tests/hopscotch-map/src/hopscotch_sc_set.h
new file mode 100644
index 00000000000..29eb1d5e3b7
--- /dev/null
+++ b/dbms/src/Common/tests/hopscotch-map/src/hopscotch_sc_set.h
@@ -0,0 +1,518 @@
+/**
+ * MIT License
+ * 
+ * Copyright (c) 2017 Tessil
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef TSL_HOPSCOTCH_SC_SET_H
+#define TSL_HOPSCOTCH_SC_SET_H 
+
+
+#include <algorithm>
+#include <cstddef>
+#include <functional>
+#include <initializer_list>
+#include <memory>
+#include <set>
+#include <type_traits>
+#include <utility>
+#include "hopscotch_hash.h"
+
+
+namespace tsl {
+    
+    
+/**
+ * Similar to tsl::hopscotch_set but instead of using a list for overflowing elements it uses
+ * a binary search tree. It thus needs an additional template parameter Compare. Compare should
+ * be arithmetically coherent with KeyEqual.
+ * 
+ * The binary search tree allows the set to have a worst-case scenario of O(log n) for search 
+ * and delete, even if the hash function maps all the elements to the same bucket. 
+ * For insert, the amortized worst case is O(log n), but the worst case is O(n) in case of rehash.
+ * 
+ * This makes the set resistant to DoS attacks (but doesn't preclude you to have a good hash function,
+ * as an element in the bucket array is faster to retrieve than in the tree).
+ * 
+ * @copydoc hopscotch_set
+ */
+template<class Key, 
+         class Hash = std::hash<Key>,
+         class KeyEqual = std::equal_to<Key>,
+         class Compare = std::less<Key>,
+         class Allocator = std::allocator<Key>,
+         unsigned int NeighborhoodSize = 62,
+         bool StoreHash = false,
+         class GrowthPolicy = tsl::power_of_two_growth_policy>
+class hopscotch_sc_set {
+private:    
+    template<typename U>
+    using has_is_transparent = tsl::detail_hopscotch_hash::has_is_transparent<U>;
+    
+    class KeySelect {
+    public:
+        using key_type = Key;
+        
+        const key_type& operator()(const Key& key) const {
+            return key;
+        }
+        
+        key_type& operator()(Key& key) {
+            return key;
+        }
+    };
+    
+    
+    using overflow_container_type = std::set<Key, Compare, Allocator>;
+    using ht = tsl::detail_hopscotch_hash::hopscotch_hash<Key, KeySelect, void,
+                                                     Hash, KeyEqual, 
+                                                     Allocator, NeighborhoodSize, 
+                                                     StoreHash, GrowthPolicy,
+                                                     overflow_container_type>;
+            
+public:
+    using key_type = typename ht::key_type;
+    using value_type = typename ht::value_type;
+    using size_type = typename ht::size_type;
+    using difference_type = typename ht::difference_type;
+    using hasher = typename ht::hasher;
+    using key_equal = typename ht::key_equal;
+    using key_compare = Compare;
+    using allocator_type = typename ht::allocator_type;
+    using reference = typename ht::reference;
+    using const_reference = typename ht::const_reference;
+    using pointer = typename ht::pointer;
+    using const_pointer = typename ht::const_pointer;
+    using iterator = typename ht::iterator;
+    using const_iterator = typename ht::const_iterator;
+
+    
+    /*
+     * Constructors
+     */
+    hopscotch_sc_set() : hopscotch_sc_set(ht::DEFAULT_INIT_BUCKETS_SIZE) {
+    }
+    
+    explicit hopscotch_sc_set(size_type bucket_count, 
+                        const Hash& hash = Hash(),
+                        const KeyEqual& equal = KeyEqual(),
+                        const Allocator& alloc = Allocator(),
+                        const Compare& comp = Compare()) : 
+                        m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR, comp)
+    {
+    }
+    
+    hopscotch_sc_set(size_type bucket_count,
+                  const Allocator& alloc) : hopscotch_sc_set(bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+    
+    hopscotch_sc_set(size_type bucket_count,
+                  const Hash& hash,
+                  const Allocator& alloc) : hopscotch_sc_set(bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+    
+    explicit hopscotch_sc_set(const Allocator& alloc) : hopscotch_sc_set(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) {
+    }
+    
+    template<class InputIt>
+    hopscotch_sc_set(InputIt first, InputIt last,
+                size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
+                const Hash& hash = Hash(),
+                const KeyEqual& equal = KeyEqual(),
+                const Allocator& alloc = Allocator()) : hopscotch_sc_set(bucket_count, hash, equal, alloc)
+    {
+        insert(first, last);
+    }
+    
+    template<class InputIt>
+    hopscotch_sc_set(InputIt first, InputIt last,
+                size_type bucket_count,
+                const Allocator& alloc) : hopscotch_sc_set(first, last, bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+    
+    template<class InputIt>
+    hopscotch_sc_set(InputIt first, InputIt last,
+                size_type bucket_count,
+                const Hash& hash,
+                const Allocator& alloc) : hopscotch_sc_set(first, last, bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+
+    hopscotch_sc_set(std::initializer_list<value_type> init,
+                    size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
+                    const Hash& hash = Hash(),
+                    const KeyEqual& equal = KeyEqual(),
+                    const Allocator& alloc = Allocator()) : 
+                    hopscotch_sc_set(init.begin(), init.end(), bucket_count, hash, equal, alloc)
+    {
+    }
+
+    hopscotch_sc_set(std::initializer_list<value_type> init,
+                    size_type bucket_count,
+                    const Allocator& alloc) : 
+                    hopscotch_sc_set(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+
+    hopscotch_sc_set(std::initializer_list<value_type> init,
+                    size_type bucket_count,
+                    const Hash& hash,
+                    const Allocator& alloc) : 
+                    hopscotch_sc_set(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+
+    
+    hopscotch_sc_set& operator=(std::initializer_list<value_type> ilist) {
+        m_ht.clear();
+        
+        m_ht.reserve(ilist.size());
+        m_ht.insert(ilist.begin(), ilist.end());
+        
+        return *this;
+    }
+    
+    allocator_type get_allocator() const { return m_ht.get_allocator(); }
+    
+    
+    /*
+     * Iterators
+     */
+    iterator begin() noexcept { return m_ht.begin(); }
+    const_iterator begin() const noexcept { return m_ht.begin(); }
+    const_iterator cbegin() const noexcept { return m_ht.cbegin(); }
+    
+    iterator end() noexcept { return m_ht.end(); }
+    const_iterator end() const noexcept { return m_ht.end(); }
+    const_iterator cend() const noexcept { return m_ht.cend(); }
+    
+    
+    /*
+     * Capacity
+     */
+    bool empty() const noexcept { return m_ht.empty(); }
+    size_type size() const noexcept { return m_ht.size(); }
+    size_type max_size() const noexcept { return m_ht.max_size(); }
+    
+    /*
+     * Modifiers
+     */
+    void clear() noexcept { m_ht.clear(); }
+    
+    
+    
+    
+    std::pair<iterator, bool> insert(const value_type& value) { return m_ht.insert(value); }
+    std::pair<iterator, bool> insert(value_type&& value) { return m_ht.insert(std::move(value)); }
+    
+    iterator insert(const_iterator hint, const value_type& value) { return m_ht.insert(hint, value); }
+    iterator insert(const_iterator hint, value_type&& value) { return m_ht.insert(hint, std::move(value)); }
+    
+    template<class InputIt>
+    void insert(InputIt first, InputIt last) { m_ht.insert(first, last); }
+    void insert(std::initializer_list<value_type> ilist) { m_ht.insert(ilist.begin(), ilist.end()); }
+
+    
+    
+    
+    /**
+     * Due to the way elements are stored, emplace will need to move or copy the key-value once.
+     * The method is equivalent to insert(value_type(std::forward<Args>(args)...));
+     * 
+     * Mainly here for compatibility with the std::unordered_map interface.
+     */
+    template<class... Args>
+    std::pair<iterator, bool> emplace(Args&&... args) { return m_ht.emplace(std::forward<Args>(args)...); }
+    
+    
+    
+    
+    /**
+     * Due to the way elements are stored, emplace_hint will need to move or copy the key-value once.
+     * The method is equivalent to insert(hint, value_type(std::forward<Args>(args)...));
+     * 
+     * Mainly here for compatibility with the std::unordered_map interface.
+     */
+    template<class... Args>
+    iterator emplace_hint(const_iterator hint, Args&&... args) {
+        return m_ht.emplace_hint(hint, std::forward<Args>(args)...);
+    }
+
+    
+    
+    
+    iterator erase(iterator pos) { return m_ht.erase(pos); }
+    iterator erase(const_iterator pos) { return m_ht.erase(pos); }
+    iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); }
+    size_type erase(const key_type& key) { return m_ht.erase(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash.
+     */    
+    size_type erase(const key_type& key, std::size_t precalculated_hash) { 
+        return m_ht.erase(key, precalculated_hash); 
+    }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent 
+     * and Compare::is_transparent exist. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    size_type erase(const K& key) { return m_ht.erase(key); }
+    
+    /**
+     * @copydoc erase(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    size_type erase(const K& key, std::size_t precalculated_hash) { return m_ht.erase(key, precalculated_hash); }
+    
+    
+    
+    
+    void swap(hopscotch_sc_set& other) { other.m_ht.swap(m_ht); }
+    
+    
+    /*
+     * Lookup
+     */
+    size_type count(const Key& key) const { return m_ht.count(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    size_type count(const Key& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent 
+     * and Compare::is_transparent exist. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    size_type count(const K& key) const { return m_ht.count(key); }
+    
+    /**
+     * @copydoc count(const K& key) const
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    size_type count(const K& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); }
+    
+    
+    
+    
+    iterator find(const Key& key) { return m_ht.find(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    iterator find(const Key& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); }
+    
+    const_iterator find(const Key& key) const { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const Key& key, std::size_t precalculated_hash)
+     */
+    const_iterator find(const Key& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); }
+        
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent 
+     * and Compare::is_transparent exist. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    iterator find(const K& key) { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    iterator find(const K& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); }
+    
+    /**
+     * @copydoc find(const K& key)
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    const_iterator find(const K& key) const { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    const_iterator find(const K& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); }
+    
+    
+    
+    
+    std::pair<iterator, iterator> equal_range(const Key& key) { return m_ht.equal_range(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    std::pair<iterator, iterator> equal_range(const Key& key, std::size_t precalculated_hash) { 
+        return m_ht.equal_range(key, precalculated_hash);
+    }
+    
+    std::pair<const_iterator, const_iterator> equal_range(const Key& key) const { return m_ht.equal_range(key); }
+    
+    /**
+     * @copydoc equal_range(const Key& key, std::size_t precalculated_hash)
+     */
+    std::pair<const_iterator, const_iterator> equal_range(const Key& key, std::size_t precalculated_hash) const { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent 
+     * and Compare::is_transparent exist. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    std::pair<iterator, iterator> equal_range(const K& key) { return m_ht.equal_range(key); }
+    
+    /**
+     * @copydoc equal_range(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    std::pair<iterator, iterator> equal_range(const K& key, std::size_t precalculated_hash) { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }    
+    
+    /**
+     * @copydoc equal_range(const K& key)
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    std::pair<const_iterator, const_iterator> equal_range(const K& key) const { return m_ht.equal_range(key); }    
+    
+    /**
+     * @copydoc equal_range(const K& key, std::size_t precalculated_hash)
+     */
+    template<class K, class KE = KeyEqual, class CP = Compare, 
+             typename std::enable_if<has_is_transparent<KE>::value && has_is_transparent<CP>::value>::type* = nullptr> 
+    std::pair<const_iterator, const_iterator> equal_range(const K& key, std::size_t precalculated_hash) const { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    
+    
+
+    /*
+     * Bucket interface 
+     */
+    size_type bucket_count() const { return m_ht.bucket_count(); }
+    size_type max_bucket_count() const { return m_ht.max_bucket_count(); }
+    
+    
+    /*
+     *  Hash policy 
+     */
+    float load_factor() const { return m_ht.load_factor(); }
+    float max_load_factor() const { return m_ht.max_load_factor(); }
+    void max_load_factor(float ml) { m_ht.max_load_factor(ml); }
+    
+    void rehash(size_type count) { m_ht.rehash(count); }
+    void reserve(size_type count) { m_ht.reserve(count); }
+    
+    
+    /*
+     * Observers
+     */
+    hasher hash_function() const { return m_ht.hash_function(); }
+    key_equal key_eq() const { return m_ht.key_eq(); }
+    key_compare key_comp() const { return m_ht.key_comp(); }
+    
+    
+    /*
+     * Other
+     */
+    
+    /**
+     * Convert a const_iterator to an iterator.
+     */
+    iterator mutable_iterator(const_iterator pos) {
+        return m_ht.mutable_iterator(pos);
+    }
+    
+    size_type overflow_size() const noexcept { return m_ht.overflow_size(); }
+    
+    friend bool operator==(const hopscotch_sc_set& lhs, const hopscotch_sc_set& rhs) {
+        if(lhs.size() != rhs.size()) {
+            return false;
+        }
+        
+        for(const auto& element_lhs : lhs) {
+            const auto it_element_rhs = rhs.find(element_lhs);
+            if(it_element_rhs == rhs.cend()) {
+                return false;
+            }
+        }
+        
+        return true;
+    }
+
+    friend bool operator!=(const hopscotch_sc_set& lhs, const hopscotch_sc_set& rhs) {
+        return !operator==(lhs, rhs);
+    }
+
+    friend void swap(hopscotch_sc_set& lhs, hopscotch_sc_set& rhs) {
+        lhs.swap(rhs);
+    }
+    
+private:
+    ht m_ht;    
+};
+  
+
+} // end namespace tsl
+
+
+#endif 
diff --git a/dbms/src/Common/tests/hopscotch-map/src/hopscotch_set.h b/dbms/src/Common/tests/hopscotch-map/src/hopscotch_set.h
new file mode 100644
index 00000000000..72be57fa7e3
--- /dev/null
+++ b/dbms/src/Common/tests/hopscotch-map/src/hopscotch_set.h
@@ -0,0 +1,513 @@
+/**
+ * MIT License
+ * 
+ * Copyright (c) 2017 Tessil
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef TSL_HOPSCOTCH_SET_H
+#define TSL_HOPSCOTCH_SET_H
+
+
+#include <algorithm>
+#include <cstddef>
+#include <functional>
+#include <initializer_list>
+#include <list>
+#include <memory>
+#include <type_traits>
+#include <utility>
+#include "hopscotch_hash.h"
+
+
+namespace tsl {
+
+/**
+ * Implementation of a hash set using the hopscotch hashing algorithm.
+ * 
+ * The Key must be either nothrow move-constructible, copy-constuctible or both.
+ * 
+ * The size of the neighborhood (NeighborhoodSize) must be > 0 and <= 62 if StoreHash is false.
+ * When StoreHash is true, 32-bits of the hash will be stored alongside the neighborhood limiting
+ * the NeighborhoodSize to <= 30. There is no memory usage difference between 
+ * 'NeighborhoodSize 62; StoreHash false' and 'NeighborhoodSize 30; StoreHash true'.
+ * 
+ * Storing the hash may improve performance on insert during the rehash process if the hash takes time
+ * to compute. It may also improve read performance if the KeyEqual function takes time (or incurs a cache-miss).
+ * If used with simple Hash and KeyEqual it may slow things down.
+ * 
+ * StoreHash can only be set if the GrowthPolicy is set to tsl::power_of_two_growth_policy.
+ * 
+ * GrowthPolicy defines how the set grows and consequently how a hash value is mapped to a bucket. 
+ * By default the set uses tsl::power_of_two_growth_policy. This policy keeps the number of buckets 
+ * to a power of two and uses a mask to set the hash to a bucket instead of the slow modulo.
+ * You may define your own growth policy, check tsl::power_of_two_growth_policy for the interface.
+ * 
+ * If the destructor of Key throws an exception, behaviour of the class is undefined.
+ * 
+ * Iterators invalidation:
+ *  - clear, operator=, reserve, rehash: always invalidate the iterators.
+ *  - insert, emplace, emplace_hint, operator[]: if there is an effective insert, invalidate the iterators 
+ *    if a displacement is needed to resolve a collision (which mean that most of the time, 
+ *    insert will invalidate the iterators). Or if there is a rehash.
+ *  - erase: iterator on the erased element is the only one which become invalid.
+ */
+template<class Key, 
+         class Hash = std::hash<Key>,
+         class KeyEqual = std::equal_to<Key>,
+         class Allocator = std::allocator<Key>,
+         unsigned int NeighborhoodSize = 62,
+         bool StoreHash = false,
+         class GrowthPolicy = tsl::power_of_two_growth_policy>
+class hopscotch_set {
+private:    
+    template<typename U>
+    using has_is_transparent = tsl::detail_hopscotch_hash::has_is_transparent<U>;
+    
+    class KeySelect {
+    public:
+        using key_type = Key;
+        
+        const key_type& operator()(const Key& key) const {
+            return key;
+        }
+        
+        key_type& operator()(Key& key) {
+            return key;
+        }
+    };
+    
+    
+    using overflow_container_type = std::list<Key, Allocator>;
+    using ht = detail_hopscotch_hash::hopscotch_hash<Key, KeySelect, void,
+                                                     Hash, KeyEqual, 
+                                                     Allocator, NeighborhoodSize, 
+                                                     StoreHash, GrowthPolicy,
+                                                     overflow_container_type>;
+            
+public:
+    using key_type = typename ht::key_type;
+    using value_type = typename ht::value_type;
+    using size_type = typename ht::size_type;
+    using difference_type = typename ht::difference_type;
+    using hasher = typename ht::hasher;
+    using key_equal = typename ht::key_equal;
+    using allocator_type = typename ht::allocator_type;
+    using reference = typename ht::reference;
+    using const_reference = typename ht::const_reference;
+    using pointer = typename ht::pointer;
+    using const_pointer = typename ht::const_pointer;
+    using iterator = typename ht::iterator;
+    using const_iterator = typename ht::const_iterator;
+
+    
+    /*
+     * Constructors
+     */
+    hopscotch_set() : hopscotch_set(ht::DEFAULT_INIT_BUCKETS_SIZE) {
+    }
+    
+    explicit hopscotch_set(size_type bucket_count, 
+                        const Hash& hash = Hash(),
+                        const KeyEqual& equal = KeyEqual(),
+                        const Allocator& alloc = Allocator()) : 
+                        m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR)
+    {
+    }
+    
+    hopscotch_set(size_type bucket_count,
+                  const Allocator& alloc) : hopscotch_set(bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+    
+    hopscotch_set(size_type bucket_count,
+                  const Hash& hash,
+                  const Allocator& alloc) : hopscotch_set(bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+    
+    explicit hopscotch_set(const Allocator& alloc) : hopscotch_set(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) {
+    }
+    
+    template<class InputIt>
+    hopscotch_set(InputIt first, InputIt last,
+                size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
+                const Hash& hash = Hash(),
+                const KeyEqual& equal = KeyEqual(),
+                const Allocator& alloc = Allocator()) : hopscotch_set(bucket_count, hash, equal, alloc)
+    {
+        insert(first, last);
+    }
+    
+    template<class InputIt>
+    hopscotch_set(InputIt first, InputIt last,
+                size_type bucket_count,
+                const Allocator& alloc) : hopscotch_set(first, last, bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+    
+    template<class InputIt>
+    hopscotch_set(InputIt first, InputIt last,
+                size_type bucket_count,
+                const Hash& hash,
+                const Allocator& alloc) : hopscotch_set(first, last, bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+
+    hopscotch_set(std::initializer_list<value_type> init,
+                    size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
+                    const Hash& hash = Hash(),
+                    const KeyEqual& equal = KeyEqual(),
+                    const Allocator& alloc = Allocator()) : 
+                    hopscotch_set(init.begin(), init.end(), bucket_count, hash, equal, alloc)
+    {
+    }
+
+    hopscotch_set(std::initializer_list<value_type> init,
+                    size_type bucket_count,
+                    const Allocator& alloc) : 
+                    hopscotch_set(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+
+    hopscotch_set(std::initializer_list<value_type> init,
+                    size_type bucket_count,
+                    const Hash& hash,
+                    const Allocator& alloc) : 
+                    hopscotch_set(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+
+    
+    hopscotch_set& operator=(std::initializer_list<value_type> ilist) {
+        m_ht.clear();
+        
+        m_ht.reserve(ilist.size());
+        m_ht.insert(ilist.begin(), ilist.end());
+        
+        return *this;
+    }
+    
+    allocator_type get_allocator() const { return m_ht.get_allocator(); }
+    
+    
+    /*
+     * Iterators
+     */
+    iterator begin() noexcept { return m_ht.begin(); }
+    const_iterator begin() const noexcept { return m_ht.begin(); }
+    const_iterator cbegin() const noexcept { return m_ht.cbegin(); }
+    
+    iterator end() noexcept { return m_ht.end(); }
+    const_iterator end() const noexcept { return m_ht.end(); }
+    const_iterator cend() const noexcept { return m_ht.cend(); }
+    
+    
+    /*
+     * Capacity
+     */
+    bool empty() const noexcept { return m_ht.empty(); }
+    size_type size() const noexcept { return m_ht.size(); }
+    size_type max_size() const noexcept { return m_ht.max_size(); }
+    
+    /*
+     * Modifiers
+     */
+    void clear() noexcept { m_ht.clear(); }
+    
+    
+    
+    
+    std::pair<iterator, bool> insert(const value_type& value) { return m_ht.insert(value); }
+    std::pair<iterator, bool> insert(value_type&& value) { return m_ht.insert(std::move(value)); }
+    
+    iterator insert(const_iterator hint, const value_type& value) { return m_ht.insert(hint, value); }
+    iterator insert(const_iterator hint, value_type&& value) { return m_ht.insert(hint, std::move(value)); }
+    
+    template<class InputIt>
+    void insert(InputIt first, InputIt last) { m_ht.insert(first, last); }
+    void insert(std::initializer_list<value_type> ilist) { m_ht.insert(ilist.begin(), ilist.end()); }
+
+    
+    
+    
+    /**
+     * Due to the way elements are stored, emplace will need to move or copy the key-value once.
+     * The method is equivalent to insert(value_type(std::forward<Args>(args)...));
+     * 
+     * Mainly here for compatibility with the std::unordered_map interface.
+     */
+    template<class... Args>
+    std::pair<iterator, bool> emplace(Args&&... args) { return m_ht.emplace(std::forward<Args>(args)...); }
+    
+    
+    
+    
+    /**
+     * Due to the way elements are stored, emplace_hint will need to move or copy the key-value once.
+     * The method is equivalent to insert(hint, value_type(std::forward<Args>(args)...));
+     * 
+     * Mainly here for compatibility with the std::unordered_map interface.
+     */
+    template<class... Args>
+    iterator emplace_hint(const_iterator hint, Args&&... args) {
+        return m_ht.emplace_hint(hint, std::forward<Args>(args)...);
+    }
+
+    
+    
+    
+    iterator erase(iterator pos) { return m_ht.erase(pos); }
+    iterator erase(const_iterator pos) { return m_ht.erase(pos); }
+    iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); }
+    size_type erase(const key_type& key) { return m_ht.erase(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash.
+     */    
+    size_type erase(const key_type& key, std::size_t precalculated_hash) { 
+        return m_ht.erase(key, precalculated_hash); 
+    }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    size_type erase(const K& key) { return m_ht.erase(key); }
+    
+    /**
+     * @copydoc erase(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash.
+     */    
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    size_type erase(const K& key, std::size_t precalculated_hash) { 
+        return m_ht.erase(key, precalculated_hash); 
+    }
+    
+    
+    
+    
+    void swap(hopscotch_set& other) { other.m_ht.swap(m_ht); }
+    
+    
+    /*
+     * Lookup
+     */
+    size_type count(const Key& key) const { return m_ht.count(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    size_type count(const Key& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    size_type count(const K& key) const { return m_ht.count(key); }
+    
+    /**
+     * @copydoc count(const K& key) const
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    size_type count(const K& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); }
+    
+    
+    
+    
+    iterator find(const Key& key) { return m_ht.find(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    iterator find(const Key& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); }
+    
+    const_iterator find(const Key& key) const { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const Key& key, std::size_t precalculated_hash)
+     */
+    const_iterator find(const Key& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    iterator find(const K& key) { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    iterator find(const K& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); }
+    
+    /**
+     * @copydoc find(const K& key)
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    const_iterator find(const K& key) const { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    const_iterator find(const K& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); }
+    
+    
+    
+    
+    std::pair<iterator, iterator> equal_range(const Key& key) { return m_ht.equal_range(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    std::pair<iterator, iterator> equal_range(const Key& key, std::size_t precalculated_hash) { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    std::pair<const_iterator, const_iterator> equal_range(const Key& key) const { return m_ht.equal_range(key); }
+    
+    /**
+     * @copydoc equal_range(const Key& key, std::size_t precalculated_hash)
+     */
+    std::pair<const_iterator, const_iterator> equal_range(const Key& key, std::size_t precalculated_hash) const { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    std::pair<iterator, iterator> equal_range(const K& key) { return m_ht.equal_range(key); }
+    
+    /**
+     * @copydoc equal_range(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */    
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    std::pair<iterator, iterator> equal_range(const K& key, std::size_t precalculated_hash) { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    /**
+     * @copydoc equal_range(const K& key)
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    std::pair<const_iterator, const_iterator> equal_range(const K& key) const { return m_ht.equal_range(key); }
+
+    /**
+     * @copydoc equal_range(const K& key, std::size_t precalculated_hash)
+     */    
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    std::pair<const_iterator, const_iterator> equal_range(const K& key, std::size_t precalculated_hash) const { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    
+    
+
+    /*
+     * Bucket interface 
+     */
+    size_type bucket_count() const { return m_ht.bucket_count(); }
+    size_type max_bucket_count() const { return m_ht.max_bucket_count(); }
+    
+    
+    /*
+     *  Hash policy 
+     */
+    float load_factor() const { return m_ht.load_factor(); }
+    float max_load_factor() const { return m_ht.max_load_factor(); }
+    void max_load_factor(float ml) { m_ht.max_load_factor(ml); }
+    
+    void rehash(size_type count) { m_ht.rehash(count); }
+    void reserve(size_type count) { m_ht.reserve(count); }
+    
+    
+    /*
+     * Observers
+     */
+    hasher hash_function() const { return m_ht.hash_function(); }
+    key_equal key_eq() const { return m_ht.key_eq(); }
+    
+    
+    /*
+     * Other
+     */
+    
+    /**
+     * Convert a const_iterator to an iterator.
+     */
+    iterator mutable_iterator(const_iterator pos) {
+        return m_ht.mutable_iterator(pos);
+    }
+    
+    size_type overflow_size() const noexcept { return m_ht.overflow_size(); }
+    
+    friend bool operator==(const hopscotch_set& lhs, const hopscotch_set& rhs) {
+        if(lhs.size() != rhs.size()) {
+            return false;
+        }
+        
+        for(const auto& element_lhs : lhs) {
+            const auto it_element_rhs = rhs.find(element_lhs);
+            if(it_element_rhs == rhs.cend()) {
+                return false;
+            }
+        }
+        
+        return true;
+    }
+
+    friend bool operator!=(const hopscotch_set& lhs, const hopscotch_set& rhs) {
+        return !operator==(lhs, rhs);
+    }
+
+    friend void swap(hopscotch_set& lhs, hopscotch_set& rhs) {
+        lhs.swap(rhs);
+    }
+    
+private:
+    ht m_ht;    
+};
+
+} // end namespace tsl
+
+#endif
diff --git a/dbms/src/Common/tests/int_hashes_perf.cpp b/dbms/src/Common/tests/int_hashes_perf.cpp
index be1cb2e5b93..990292e4e66 100644
--- a/dbms/src/Common/tests/int_hashes_perf.cpp
+++ b/dbms/src/Common/tests/int_hashes_perf.cpp
@@ -306,16 +306,16 @@ int main(int argc, char ** argv)
 
     setAffinity();
 
-    if (!method || method == 0) test<identity>        (n, &data[0], "0: identity");
-    if (!method || method == 1) test<intHash32>        (n, &data[0], "1: intHash32");
-    if (!method || method == 2) test<intHash64>        (n, &data[0], "2: intHash64");
-    if (!method || method == 3) test<hash3>            (n, &data[0], "3: two rounds");
-    if (!method || method == 4) test<hash4>            (n, &data[0], "4: two rounds and two variables");
-    if (!method || method == 5) test<hash5>            (n, &data[0], "5: two rounds with less ops");
-    if (!method || method == 6) test<murmurMix>        (n, &data[0], "6: murmur64 mixer");
-    if (!method || method == 7) test<mulShift>        (n, &data[0], "7: mulShift");
-    if (!method || method == 8) test<tabulation>    (n, &data[0], "8: tabulation");
-    if (!method || method == 9) test<crc32Hash>        (n, &data[0], "9: crc32");
+    if (!method || method == 0) test<identity>  (n, &data[0], "0: identity");
+    if (!method || method == 1) test<intHash32> (n, &data[0], "1: intHash32");
+    if (!method || method == 2) test<intHash64> (n, &data[0], "2: intHash64");
+    if (!method || method == 3) test<hash3>     (n, &data[0], "3: two rounds");
+    if (!method || method == 4) test<hash4>     (n, &data[0], "4: two rounds and two variables");
+    if (!method || method == 5) test<hash5>     (n, &data[0], "5: two rounds with less ops");
+    if (!method || method == 6) test<murmurMix> (n, &data[0], "6: murmur64 mixer");
+    if (!method || method == 7) test<mulShift>  (n, &data[0], "7: mulShift");
+    if (!method || method == 8) test<tabulation>(n, &data[0], "8: tabulation");
+    if (!method || method == 9) test<crc32Hash> (n, &data[0], "9: crc32");
 
     return 0;
 }
diff --git a/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp b/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp
new file mode 100644
index 00000000000..dfcf4259832
--- /dev/null
+++ b/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp
@@ -0,0 +1,390 @@
+#include <iostream>
+#include <iomanip>
+#include <vector>
+
+#include <unordered_map>
+
+#include <sparsehash/dense_hash_map>
+#include <sparsehash/sparse_hash_map>
+
+#include <Common/Stopwatch.h>
+
+//#define DBMS_HASH_MAP_COUNT_COLLISIONS
+//#define DBMS_HASH_MAP_DEBUG_RESIZES
+
+#include <Core/Types.h>
+#include <IO/ReadBufferFromFile.h>
+#include <IO/CompressedReadBuffer.h>
+#include <Common/HashTable/HashMap.h>
+#include <Common/SipHash.h>
+
+#include "hopscotch-map/src/hopscotch_map.h"
+
+
+using Key = UInt64;
+using Value = UInt64;
+
+
+
+/// Various hash functions to test
+
+namespace Hashes
+{
+    struct IdentityHash
+    {
+        size_t operator()(Key x) const
+        {
+            return x;
+        }
+    };
+
+    /// Actually this is even worse than IdentityHash.
+    struct SimpleMultiplyHash
+    {
+        size_t operator()(Key x) const
+        {
+            return x * 0xff51afd7ed558ccdULL;
+        }
+    };
+
+    struct MultiplyAndMixHash
+    {
+        size_t operator()(Key x) const
+        {
+            x *= 0xff51afd7ed558ccdULL;
+            x ^= x >> 33;
+            return x;
+        }
+    };
+
+    struct MixMultiplyMixHash
+    {
+        size_t operator()(Key x) const
+        {
+            x ^= x >> 33;
+            x *= 0xff51afd7ed558ccdULL;
+            x ^= x >> 33;
+            return x;
+        }
+    };
+
+    struct MurMurMixHash
+    {
+        size_t operator()(Key x) const
+        {
+            x ^= x >> 33;
+            x *= 0xff51afd7ed558ccdULL;
+            x ^= x >> 33;
+            x *= 0xc4ceb9fe1a85ec53ULL;
+            x ^= x >> 33;
+            return x;
+        }
+    };
+
+    /// Pretty bad, only for illustration purposes.
+    struct MixAllBitsHash
+    {
+        size_t operator()(Key x) const
+        {
+            x ^= x >> 1;
+            x ^= x >> 2;
+            x ^= x >> 4;
+            x ^= x >> 8;
+            x ^= x >> 16;
+            x ^= x >> 32;
+            return x;
+        }
+    };
+
+    struct IntHash32
+    {
+        size_t operator()(Key x) const
+        {
+            x = (~x) + (x << 18);
+            x = x ^ ((x >> 31) | (x << 33));
+            x = x * 21;
+            x = x ^ ((x >> 11) | (x << 53));
+            x = x + (x << 6);
+            x = x ^ ((x >> 22) | (x << 42));
+
+            return x;
+        }
+    };
+
+    struct ArcadiaNumericHash
+    {
+        size_t operator()(Key x) const
+        {
+            x += ~(x << 32);
+            x ^= (x >> 22);
+            x += ~(x << 13);
+            x ^= (x >> 8);
+            x += (x << 3);
+            x ^= (x >> 15);
+            x += ~(x << 27);
+            x ^= (x >> 31);
+
+            return x;
+        }
+    };
+
+    struct MurMurButDifferentHash
+    {
+        size_t operator()(Key x) const
+        {
+            x ^= x >> 23;
+            x *= 0x2127599bf4325c37ULL;
+            x ^= x >> 47;
+            x *= 0xb492b66fbe98f273ULL;
+            x ^= x >> 33;
+            return x;
+        }
+    };
+
+    struct TwoRoundsTwoVarsHash
+    {
+        size_t operator()(Key x) const
+        {
+            UInt64 a = x;
+            UInt64 b = x;
+
+            a ^= a >> 23;
+            b ^= b >> 15;
+
+            a *= 0x2127599bf4325c37ULL;
+            b *= 0xb492b66fbe98f273ULL;
+
+            a ^= a >> 47;
+            b ^= b >> 33;
+
+            return a ^ b;
+        }
+    };
+
+    struct TwoRoundsLessOpsHash
+    {
+        size_t operator()(Key x) const
+        {
+            x *= 0xb492b66fbe98f273ULL;
+            x ^= x >> 23;
+            x *= 0x2127599bf4325c37ULL;
+            x ^= x >> 47;
+
+            return x;
+        }
+    };
+
+    #if __SSE4_2__
+    #include <nmmintrin.h>
+    #endif
+
+    struct CRC32Hash
+    {
+        size_t operator()(Key x) const
+        {
+    #if __SSE4_2__
+            return _mm_crc32_u64(-1ULL, x);
+    #else
+            /// On other platforms we do not have CRC32. NOTE This can be confusing.
+            return intHash64(x);
+    #endif
+        }
+    };
+
+    struct CityHash
+    {
+        size_t operator()(Key x) const
+        {
+            return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&x), sizeof(x));
+        }
+    };
+
+    struct SipHash
+    {
+        size_t operator()(Key x) const
+        {
+            ::SipHash hash;
+            hash.update(reinterpret_cast<const char *>(&x), sizeof(x));
+            return hash.get64();
+        }
+    };
+
+    struct MulShiftHash
+    {
+        size_t operator()(Key x) const
+        {
+            static UInt64 random[2][256] =
+            {
+                {
+                    0xb9979dc11a7ab921, 0x069177f0cca1cb81, 0xa77b7458984cdca6, 0x20cdddcd60ebf956, 0x54a7e16ccc85c618, 0x5a7b32512add86d0, 0xd4024d92207929e0, 0x927506e044a87bcb, 0x0ee2fafa66d27f8d, 0x34e5d597062edc13, 0x9d9e5971e4ef679f, 0x1373fa521c455462, 0xae20d7a035922d67, 0x2acceeaa1c38dea1, 0xa8d7318d831b2291, 0x237a092ca188a58d, 0xf3aab9c253e94340, 0x59c067c9247ad798, 0x9a2a3b0fc175faea, 0xb4ff42232b62408a, 0xc33f6f6e14b2ad59, 0xd8354b02517bb45e, 0x6cd3324afa8e1fa8, 0x491f3efd10beb8ce, 0xec5511355b29b707, 0x11a0e525b3f8a1d9, 0xaecb034c53727bb3, 0xaad2f1a0f7b1bb15, 0xdc318ef665bf079b, 0xecaed1b2e1808d2d, 0x38ccecd71d8548f5, 0x4a29ee777e1e8984, 0x58d683d104a9a9ac, 0x1c86c3ae03b0d968, 0x6ae480de1f434851, 0xdbb254502c123636, 0x7c8514ca7feb2780, 0x209ce2dacac250c4, 0xec256290581d4242, 0xba54af4e004cd657, 0xef09683b8da8cfd4, 0xef4b60e1cc22c00f, 0x8bb9849ecb4f0f4e, 0x64186f57b4e7e197, 0xf86d9122f253282c, 0xe545dcc26d8232fc, 0x7a8eeab5b064887e, 0xd77895d6c7baf87b, 0x07bf805fc0fcef52, 0x362d110a780e166b, 0x1b0a76b85b924a27, 0x49904133b1fb35c4, 0x76328bab42b9f5b8, 0xcfd15d94b28db6d0, 0x8908beb1031eb7b3, 0xbe05c1d38c789639, 0x9a38f06e2a4a5e2e, 0x9c49fa7b05cbd5e7, 0xf31aa060547fc13b, 0xafcfd8714b2a4082, 0x71a117f24d3d8cbe, 0x201a2c30737c1243, 0x9915450b0b26957e, 0xbe5865d67bd08830, 0x1efd55f709543d56, 0x69305e7e4c08948e, 0xd19d4fe1a47b1598, 0x36e4246123e542b7, 0x4cf0640cc2b66fdf, 0xf4301d669c78d4f9, 0xe1e7025ea57b3a3e, 0x6d59685c09510c07, 0xd90b29fbe9680c7a, 0x5575a24ba532391e, 0xdebab7c32a11d297, 0x09010bd6a6b295c8, 0x5b745d475a2cc5f8, 0x1a17df8fa5d2fb0e, 0x9eb9c9a1c56caa5d, 0x8759a2d4b80f3104, 0x1a6732736d219e48, 0xb9a934bedbbb1b64, 0xe88fc304f4f41aa2, 0xb650530683f9aecf, 0xc6de4393e1754ec5, 0x455dd664a10d1975, 0x295dffd0c8accda1, 0x4b0e49b50e1c3cd1, 0xf9b16d40c7bcb99d, 0x73f924a20a6b9bba, 0x59d4473fddb90104, 0x16a1b347c260d1b0, 0x0b6c06c9546458b7, 0x808f3c2d9e112207, 0xd142ca3cdf2a92c8, 0x36b4ea106906393d, 0x3ad7d15985207a3f, 0x5b3236e568021603, 0x95ff5ce56c0c9e4f, 0x35b8973c8634cac8, 0xd09591efe1cea59e, 0x9a852d896596d1c5, 0x5b8d231f680d8003, 0xaf56403d17f64962, 0x27f9e41e098024b4, 0x276adf3aacd76289, 0x0f828c068e8e4552, 0x45b28f9c40591b8c, 0xf543929acc2fd3a1, 0x7f5c232270f8675c, 0x96bf54ca56992a6b, 0x831ceab94de9a77f, 0x884d660886d85f97, 0xa441f1401555b4cd, 0x2f7a37e0df322ca7, 0xcfd621bcaf7e8968, 0x2f4c4f54612f78bf, 0x81ad69be79a99942, 0x452169980a69690a, 0xfe7ecd6fdfffc7db, 0x6750d09c96c0723e, 0xa96e8b7c1f87d0b7, 0x9355142350227cfb, 0xd04b2c03a5fd752c, 0xaa884bf5cbaef0c9, 0x47439b6a210da3c7, 0x08366f260246b77d, 0xada9a09bde98ac47,
+                    0xf285854404e11a2e, 0x1f6b5f0db9344910, 0x43635afd969b49a1, 0xf6e336e4883a399c, 0xb3dc4cd8d0589e84, 0xeda5c2c0fc7742f4, 0x10b3b8fb622b125b, 0x9f109b5ab6eeed6e, 0x69277c89abd4cd78, 0xea221cf76ab7fc86, 0xfa0ca125e8daf891, 0xdac858a3c5e11c00, 0xafab19a748a947ae, 0xeb2b69a5a4da6489, 0x601eb5a12ac4565b, 0xf782900ee4cd7365, 0xa1dc2fd083f4546c, 0xf8723698955f8c1a, 0x1ef4282aebf3d945, 0x653da3bf71e624bd, 0xbe04b29a9baf7d5a, 0xb18f73a34543666f, 0x90c901c604dce6e2, 0x3208dbe059534b49, 0xacb11eee833467d5, 0x3d07083a9d5be0d6, 0x6bd915dd0fef2557, 0xabee35defb430211, 0xb3180e083638dc85, 0x2be0361e7febfce0, 0x4d97cf883fabdc41, 0xf1be01ddae8c4048, 0xb7d0b4f0332d2992, 0x72141ded3dd6f55a, 0x9123d943f30926d9, 0x7eeca6bc6be7313c, 0x95ab321d7ee5c72c, 0x21421b21f9d81288, 0x19233206a680c160, 0xf09a9d339c9d311c, 0x05133ec82d1ecd3b, 0x71f02fcd10beb60a, 0x5e61ad5eeb1ceeee, 0x087c9fbd97613ab5, 0xfe250587d99f3438, 0x5e871afeec0fa630, 0x4f88441aa4eedd4d, 0x1a42dd66f7452619, 0xf58e8d8f81b957ef, 0x66986b54ecf07347, 0x2382340318cfb238, 0x7d1c4f93c49b788c, 0xd8d9fa627f06932a, 0x6aa7c64c4873e3d5, 0x27efff555b10bb4e, 0x784a1565db94187f, 0x4271a9c1fcd0772f, 0xd234d11377eb40b0, 0xcaef5e6aa3977628, 0x64009936900c3fb0, 0xf0ae881430c514c5, 0x0cd32d01c5798aa4, 0xc45b39cce35de6e4, 0x1307b9f4d832f787, 0xceeee5e813135402, 0xc930a0867296e8a3, 0xe4b52a4e4f3d9783, 0xe4e7934e0d97202f, 0x51d1e9340b2459ce, 0x99cc0f90dcfe55b9, 0xf4f27b19cdedfe55, 0x9debe48d7c4cb666, 0xccd8bda6061dd33a, 0x3d1b011a2f870b00, 0xbb4bf9865127ebc3, 0xa149ab768fb8f044, 0x3119a981452666bd, 0x65db8aa27ea6d215, 0x1c6d1bd4e5f373ce, 0xa17f5382b2217d61, 0x9599f49e8a4c3636, 0x745bea7bbfdb01c0, 0x3827fcecc93556d5, 0x23467c41001df3a1, 0x6b055e5393e6df45, 0x845fe338233e4e8f, 0x2924d70bee302e1e, 0x00ff993181bdc604, 0x491f3a3eb8502037, 0x5d76d4307fb75616, 0xd02fc60a95d3e5ef, 0x807b31765ef80cfd, 0x94d6208ae964ca56, 0x01985a747cedaf30, 0xc93ba934dd080643, 0xff2171253b5f3f29, 0x661b5bf4c5c90793, 0x3e16f8961257ea4d, 0x6234adf4978717be, 0x0fb6ebda63b582d7, 0xe93085e5a1bfd52a, 0x356873f8edf17b1b, 0x6bd859a750a2cd58, 0x413c4f77cf56e540, 0x416f62eca618e666, 0x3fdbb2157558f5fc, 0x3446b31c6c14a00c, 0xba534ff625129ce8, 0x2579bbcd151f4a9c, 0xb13327991b337d1d, 0xe548cfe328d2e52a, 0x8685e6bd4abe8bc5, 0xc912ce5f3fe32b12, 0x544f4c648136ef2a, 0xac9dc00a55c71ae7, 0xb9c2048993ef7255, 0xdbfb82775439009b, 0x7124703dd90168cd, 0xfbf35a0d9e650617, 0x2c757dcd616aea51, 0x2c465d4aceb02eab, 0x28cb9d0f78dc8657, 0x87cfaeaa76051bc8, 0x0aeaf0c08f5e04a1, 0x7c60c6ac4e1bcdaa, 0x5904b7dcc3ac2e1f, 0x17625e6c1972f71c, 0x64dc0d8c46a96c16,
+                },
+                {
+                    0xb2b37152266d808f, 0xd1688ce5907fa9bb, 0xb4fbde50315ebb85, 0x94e1d1a40be0f814, 0xe5ef4ab98cebd353, 0xf908528b3ec218e6, 0xacc740ba335dceda, 0x19f10e26e4c68a48, 0x840a9672a2f5a2b5, 0xc0cf3b2996e6d1e9, 0x548a34dffc0570c1, 0xdc031240feff5c48, 0x92297ff0b800820e, 0x0c11352081d2a81e, 0x8fd8d29c79d55a69, 0x66488d4729e78c30, 0x941d465e2582b6fb, 0xe76b9689bba28cc5, 0x4103bec4b7c08c40, 0x4e4b4b76d188000c, 0x844db5402e959fc8, 0x2e134ee324e2f088, 0x625fa933c1fa4d1a, 0x1d4f0121515d0ce0, 0xb1d0d19a00d95196, 0xc38fe9c8032d86ac, 0xd6d71c1464cf5365, 0xef8d00bb18b455e8, 0x7536cd4fc7f65501, 0x97b4b02c30117dce, 0x7411e5084e20b6b9, 0x988167ff559bec81, 0xf91372f1a162a022, 0xc638ef90a723246c, 0x6761b912a7d129a9, 0x56927af0806a2486, 0xae697de7c569d46c, 0xf0587bfdf442de4e, 0x02b83e162ef75c67, 0x712f8e20cd8d62cd, 0x443a2b5a16f137ca, 0x5faca36e7dc9b149, 0x3fefa267d527a8d5, 0xa8bbd36ee9c73ceb, 0x451a8aa3972efdbd, 0x0b5a7ddaf5d22b7f, 0x37a58d60fb0033f5, 0xd57f6057b724127a, 0xa36b778ce2959583, 0xfd843a9d8800b0b4, 0x057f720e9582b96e, 0x4304286582ffe050, 0xa3fe58e3d150bad8, 0x3b1cc25468a8cf65, 0x5a343b883d439ecd, 0xefebddaf9fccf271, 0xdee57a385349bdc0, 0x685936c14e3c96ad, 0x54efe66dcb28d0fa, 0x3b3a5bbb4873165e, 0x6a10806224cc7c73, 0x8b81d349a6653e23, 0xd3c77a6c5254a1c1, 0xe426ed8badd6ae84, 0x159ed3abb8289790, 0xc34a0abddf1c3868, 0x7e0ffe203c7b27d3, 0x26a12e8a39321471, 0x058ef4beca30a376, 0xa329f59e34edcd3c, 0x72b0a270dcf9a332, 0x166ceb4a84f8b7e6, 0xa280d2c77091a912, 0xec16fa92ca834210, 0x87cb7bde305e2e8c, 0x637fbc31719a722e, 0xf1d6b723b62c74b9, 0xa79f73228d226e72, 0xdde9e7d9b6111fc5, 0x7ceb45bc84699f40, 0xa91bfc1705e0d732, 0x306af3eed572452d, 0x4a9433f2005ed515, 0x9f00376f44600cec, 0xefd838b045fb86ef, 0x65098de0679c0513, 0x51ccc18e00e21e0e, 0xd462c4e789677c32, 0x95a177c568ffc03e, 0xb12376a070d7b2d7, 0x664ae720214ec675, 0xcf15fe99a80aebd0, 0xa7d007e79eec58ab, 0x2c6b911aa9872e3d, 0x77f746e811360633, 0xd0830a895f8f557b, 0xfbefdc19fccb70e2, 0x02e4ca8f22349a2f, 0xcc8e04e0283f929c, 0xeb743ce2f3f2a830, 0x966611978470f470, 0xd46e0a5f306e76d0, 0xad2c8ed5bcd22c46, 0xf1eba8f4d933b891, 0x058a7875bb427a22, 0x45107f3da4aa3e16, 0xe81284e4b7d9db22, 0xe004099ab8178db8, 0x53e36cc21df27075, 0xd9a17d119adf78de, 0xe95c27a78e7824e7, 0xb584e128467f80dc, 0xd44ff1193314853c, 0xf0124a034d852e19, 0xc9077af6da8a9ce9, 0x57a082ea0d3f7acd, 0x14cd16087a7bf2a7, 0xeb64143d63df7307, 0xe15943fecae64d35, 0x978bb411cfc5f25c, 0x76a96f8f617871b7, 0xd7d9eddcbfae7bd1, 0x61bcfec611e3ed1c, 0xf2af9d3e527ecf80, 0x8182c0a8ee728f30, 0x7ca964bcddcbe009, 0x99c70ed92ad37d47, 0x52ad14b9a60bebdc,
+                    0x651a92a09a0f6cd9, 0xf081fe7ada03ec1f, 0xc2e558216dc44e49, 0x28d1e01b15889c1b, 0xd24d7e86573816dd, 0x965aa3b1404a1b4b, 0x39776cd5c65d62da, 0x5a1afb7ffaff7228, 0x993d7e2bccb5b123, 0x8af2f371498b991f, 0xec8f424fd6b122f9, 0xf7a7a05f114f446d, 0xf1729f142472c92c, 0x1e47d5d4dbd7d8d2, 0x3c79a550342c0bb7, 0x70d8ff005cc97454, 0x565f5c2a091373b5, 0xb2f07256c7f2de3b, 0xcc0e305a4fa50bba, 0x90d7259bcd4d4dfd, 0x57a21147c7b8d4b4, 0xb4e904c414344400, 0x1b9843cef6b7de4e, 0xc55ed544783d7077, 0x951fd36278c4af42, 0x5596691ec2a6c112, 0xa96366b16e85bae6, 0xdce8b9304db2cf16, 0x4196a4a77dad5d51, 0xc035d6a8486f503c, 0xda814111a183c18b, 0x850d476c24afcdeb, 0x8b1fb1911d79a756, 0x3fbb083dc9ff1546, 0xd9ea3048315a4840, 0x1bbaf883b34fb0e8, 0xc7812b9babc4f1aa, 0xeee79e6e139bfd6d, 0x0f61f802cad137ab, 0xe6702e761b466507, 0x00fea66009d13ddd, 0x27865b24fdc1543a, 0xe53c0cadcbd4ba6d, 0x081dead3238fe2fe, 0xc4ad52df5a17e06b, 0xdf196982de8820b4, 0x93056ac04ef6a5d0, 0xe3b0e29b5b394f0b, 0xd22124f2f63464f3, 0xcfdc91988a08c919, 0x142e82bacd825129, 0xa295a43e93e4e702, 0x1b344f1ae78d7438, 0x1ad39d6fce7b2de1, 0xf38a7608b3c52ae3, 0x0411864df1f01a7b, 0x8da709efa240a816, 0x4db801db3b0405a4, 0x82e53d875886c89c, 0x1f8a6580748a29c4, 0xb5f3c5da7b0c656d, 0xe6308e06103bb9da, 0x1c044a02583c3b23, 0x7824fafc7744ded1, 0x120ae3a85640de45, 0xbe7841a70dbe7f85, 0x13b1bedabd2227e0, 0x543f4d8601666ca0, 0x140acc8931524f45, 0x07d8d511f4deaf51, 0xe4218d90e617fb4c, 0xbff96e733749a010, 0x6cb6990dbdfed3a6, 0xe651708ed78c4008, 0x2d86b38cd518d5fe, 0x41316f458fcf5a20, 0x74cf6939366c2013, 0x298a2a81db72ba3a, 0xbdeb78b42a71be11, 0x7bf37745b51a4e93, 0x71d074c050fd7f32, 0xd7ae1af287ef3c83, 0x6545c0921918a9c4, 0x81b80a0be366c29b, 0xb5f96c0c829ff255, 0xedc1e2598be87178, 0xd930d6eafb62c208, 0x1b4ae0fd3bc45e3f, 0xae4eba5234af88a3, 0xef250aa0f82c9251, 0xf842992589d21959, 0x76d99f8cc618151d, 0xc0d053337ad3d0da, 0x87569479dbbdcc16, 0xc0a31211852f8ac5, 0xf09022e229d40060, 0x2a4acf18d2d9a943, 0x95b9dd630a124a30, 0xe3d441a7c183a2df, 0xb70592e18a0d31d5, 0x45dfe88bf06ec04f, 0x9a8982b8dabc5814, 0x9853c63295eb60a1, 0xcc6ab672b45261c5, 0x1d59b5ef15f998e5, 0xab052dc26ca65d9b, 0xc29094316db6ad02, 0x90e2463bc2a67bd5, 0x5d9658394413a531, 0xbbae171ee3e4997a, 0x7baf87e1759cbfee, 0x269b8ee8cb2d9c69, 0x98d272695f943d3c, 0xc2aa69caf54a47ae, 0xfb6d9caf908685a0, 0x36aab7dfb5ef3444, 0x9a28ca1db0e037e0, 0xd54c3b5005923402, 0x124addbcdcfa5bc1, 0x277b48aaa7bec0d9, 0x76d3a563d86fa26e, 0x0545a2263944b662, 0xdfd9e108d234c7a9, 0xa7f406b6d42d6ec8, 0x9becf8e91d0daf3e, 0xc53f653fc3f42bd7, 0x66a70e50c0535454, 0x2ff4d545d6a21306,
+                }
+            };
+
+            x ^= x >> 33;
+            x *= random[0][x & 0xFF];
+            x ^= x >> 33;
+            x *= random[1][x & 0xFF];
+            x ^= x >> 33;
+
+            return x;
+        }
+    };
+
+    struct TabulationHash
+    {
+        size_t operator()(Key x) const
+        {
+            static UInt64 random[8][256] =
+            {
+    {
+    0x4f3cecaf24409b1b, 0x6ad9f166d91c6613, 0x54bf75358aea8524, 0x3fbc1de24a079a6e, 0x57ea94a0259aec73, 0xf9174938aeb467ca, 0x6f5deec36e40d25e, 0x534addc1aa10643b, 0x94734451bc5c5ec5, 0x4d72432932c6c7b7, 0x56ddb2f1203575ef, 0x8f7f217ead5654b0, 0xd7c0eac16d4aa24e, 0x84e4265047714b2d, 0x449769b97c43e1e1, 0xfed98b3f4c5b7698, 0x48bb913b09ea35c1, 0xe69cfcd6052df551, 0x483636e39bab623d, 0xc108de147d29545a, 0xeafb7485ba1f8e40, 0xd4a3891e24ab7233, 0x941f1975d079ba30, 0x8776cd48b75bbf4c, 0x42c5f14b72ec0eef, 0x19a18efdff3a84b5, 0xec4078cb31112625, 0x3063155b2e1cace2, 0x4d0fd702d00c53d5, 0xc80ad41b4e104360, 0x67e9d8d12617d417, 0x1a6de5d7f6a3958b, 0x9520893617a19775, 0x8842a4072f85e7b1, 0xa066c6eec0f4c288, 0x91c753eb152561fb, 0x1eacdc853dbdc4d3, 0x38e8e61d8cc61e0e, 0x22117333fc2eb16e, 0xed909eb368ecc36b, 0x2b67fc646f0b73d2, 0x7c28b15e21c0a93a, 0x13f8de9d1bad4d5f, 0xd96a893cf9da4125, 0xd3bbc92ad05fb53e, 0xd13aaad8d8075799, 0x18f003d700064040, 0xbb47fd3c38570068, 0xcd3db144d1a1f6d4, 0xebb33f814155f734, 0x740c6c7f4d91ac30, 0x9e3ae55cefe1f46e, 0x678c8b1c10da8b96, 0x37510cf678751024, 0x4e9e97713eb900d6, 0xb11271b9b1617fe7, 0xf2b35c453dfcfc22, 0xf5c2a8307ec8d153, 0x14089b8b1462447b, 0x5a350397786bb472, 0xfff7cd246e11a821, 0xe5649ebd197aa820, 0x5b7b9b407888c0f8, 0x617e4610c8e466fa, 0x928f11ee454fbeb6, 0x72e6a8006953074e, 0xb695a3dadef3ae60, 0xa26906a7e0140bbd, 0xf856fd404e987a6f, 0x95cddade446d74c7, 0xf7cd44918454ca8d, 0x34b17066e9a0f88d, 0x3481e736e1fd2fbb, 0xdf09f9dab79514ba, 0xf9352d79440c48ae, 0xae71591dcae8b4b2, 0x2f1bfdb07031c1ee, 0x50f106bd520bb92d, 0xbc39d67db555d325, 0x0768dcf245299385, 0x2d0f0564d3ee2403, 0x0b4075b353804510, 0x3722374ac591e9d2, 0x561d767a57b72214, 0xd6e1775b179939ea, 0xedaf88090284c61e, 0xc897695662d2c7d2, 0xe2bdcab72b125c05, 0x37c0d44b70a6e565, 0x3c5be744f87454d9, 0xd8d969301f77195a, 0x3445efae8fea16b9, 0x338b465dd8e11413, 0x4bcde36d984527ac, 0x0b803b6976171f05, 0x51a7386146462803, 0xc2897c1a69b6f1c1, 0x5ab092594684a5b4, 0xd3f4afafab7fc3ee, 0x744c31a06c5c0460, 0x49f01a31eefe4021, 0xaa62ba4831a87a0c, 0x61052036b02cb121, 0xc4bb5582ed8d7e4a, 0x25b6b517dcf0d7fa, 0x703b1897ff50a997, 0x83e5b7ef256dead8, 0x8da8e0ea74709e3e, 0xd5e2be449fd500a8, 0x0c386f813d21c684, 0x4bfee7a5bc728b8e, 0x5f0aca2baba81c31, 0x7ddfcfe8a6e897de, 0x90b69d33b9af5fbd, 0xfddedc985978876b, 0xc8f93bbdde12f7f2, 0x45d7a1087f4739f0, 0x75ea324184f1e5b8, 0x97b78ad1ec2ef95f, 0x192e5bf5ab92521b, 0x5175eefdc3fadec7, 0xa23a36c97bac48b2, 0x6e1c21a7c232880e, 0xf8010ca06ce802ea, 0x6e9822f71a8123cb, 0xdcb21616d2941bc5, 0x6f2929e3943e0777, 0x5a6ae6bc66eb2f1d, 0xf20a3c93c0fa6172, 0xcc1f91002d11567d, 0xa558d8dd4fec490b, 0xbe0ca1be73f5d533, 0x66dd4427dc66bbe1, 0x6a27341a10cd985f, 0x8d12380bcbd9bec3, 0xfefaaf20d5b9139f, 0x8641d578cdc4e199, 0x5f21e78ab4d25c23, 0xef52fdd1026d6f8a, 0x842f108fa0fcb69d, 0x9bc3723415c2724b, 0xc9615a1d92ab9500, 0x7c0850b85e7535cd, 0x8e93357abaf1ed24, 0xdf6fb36799938890, 0xe2082b8202e65187, 0x11170a177d855ab0, 0x751564db24effa8d, 0xa74b7769d222649f, 0x12a999982fd65d64, 0xc380a14a49b93b32, 0x5b9e932a4f264ee3, 0x1ca4323b708a0aec, 0x28fd5d5a327c91d9, 0x31289307419c9f1b, 0x20bda201fd63649d, 0xd1d1c8c8163836d6, 0x9cdae51d6993d0a3, 0x3a6fbe9579c1da3a, 0x27072cbc6d3a4825, 0x94b7107387607e61, 0xb1b0b605f39f115c, 0xb2557d474dc4e952, 0x8794c509cf6e26b1, 0xc1a8398cb2e98c9c, 0x92d0f486b7ac4afd, 0x9082064213d7c39b, 0x7e200cefde5ed88d, 0x904b5a00dd597b29, 0xe84c40f0bf571c92, 0x196b4a47fe185361, 0x2fb3e66b0fa8c185, 0x581a7f90e8646a69, 0x4b74f5f56f087180, 0x2e94fe98e0b1cc0a, 0x198f7bfbc692eda7, 0xa4d0b7908707016e, 0x0c90ee281eff932d, 0x879a1cf4c24d189a, 0x2d66cea5314bd480, 0x23346e49480e7e83, 0x82d63e12f56ea02c, 0x4ce802d35f8f46e2, 0xd143fe931608d5f5, 0x66c466071c6718a2, 0x59a5ab0a39dd53b2, 0x4805529d2a394dff, 0x631151511fde3379, 0xab98f8154c24669d, 0xb3a05b6b742dcdf2, 0x9d9827d3e071e26f, 0x9c157833c16216fa, 0xa9c8584ed28c8023, 0x94ca7cddf52bbc28, 0xc34d111216c15159, 0x68abebbb05b62206, 0x4786f8c9094da769, 0xb9c218515d329fe9, 0xd997a2ad91f01905, 0x81aaa629f3b2bce0, 0xefef8a896238229a, 0x6af252f60dd72940, 0x4492c36d5f165eac, 0x9dd50a2ef4d5f9a1,
+    0xdfbf94a3550e6ecd, 0x9288b91b1caf05e7, 0x9c0ad10f9c67d06e, 0xdd4acdcb36db48c5, 0x7570a07af93f329d, 0xa31f26e2fffef103, 0x2886476327948381, 0xbddd48e33441c988, 0xc887b91661bf6e9b, 0xfc21127e01445e2f, 0xf0a8e3af7c953713, 0x47da9d0482ee366b, 0xc3d6ca5719939732, 0xedeadea909a6fa76, 0x26fcdad40d586205, 0xa1dd97b354af5ed4, 0xcc8aa75425740654, 0x4a0c5113156178f9, 0x981abd8fcf766b13, 0x99f31176a6cb712a, 0x49ac51e92982f34e, 0xdfeaa1da9373b16f, 0xa31666230da09aa3, 0xdc1eaceaf1afa2e2, 0x4326313df2d59166, 0x8836b6746f79236a, 0xe166e915f72d2540, 0x09cf7b4b78b2637e, 0x55369c7b01071949, 0x86ff67741a6c3cb6, 0x76503dcd4383b8cd, 0x36731b7b7cf8df2e, 0x51a2d6aad260273b, 0xf6d7cbb0db4253cb, 0x1230054a3bd28926, 0x4eb266aae155fc98, 0x9a2a7f60a9dac0e9, 0x79290b7b9035b3d4, 0x3a424cdbbe2aaa45, 0xcc46944623341c59, 0x7c185b60bade4199, 0x6ae892b88237cebc, 0xdd8ec8066e75ae58, 0x69a8aa059ff444a0, 0x987e27eae07462d5, 0x539f69c3d236e3c3, 0xc6bcae3c97941ed6, 0x29e853acc29b3eb5, 0x1faeb880153ba613, 0xd35bb34905b04517, 0xabcc7468231c83ca, 0x286d214a37fb65b3,
+    },
+    {
+    0x55baa482d6094d30, 0xa775277f382073a4, 0x73bae2387e32c5f8, 0x4a6416aaca027e0c, 0x0ad38404a8ca17e5, 0x37ca2344345f1a5d, 0x9c910f03937fc178, 0xd99efaccd8d0f15a, 0x557c9d7538915f7e, 0x500ee4fb62acc9e5, 0x3f1eb011057383c9, 0xaa4fe82c1926b108, 0x84e24186cfadd3fb, 0x0862cef2d38de5f0, 0xf03099ce7d642f66, 0x143b22300078711e, 0x04d44b5b3444abb0, 0x2c46d930d09accf6, 0xc5c8cfe5da8a7e49, 0x9c0dfe1f0c791972, 0xe287ed0f98ffb25c, 0xe91eb5daf941701b, 0xf4989aba25810a10, 0xa23853aed0869787, 0xc008036099dbab0e, 0x752dd9e217b5c4fd, 0x763c0b933129e776, 0x7b474798a5a16875, 0x31c3cf01e8ac88ae, 0xa3075c68c217fbbd, 0x12b53ba28ba2ef53, 0x62d619c3d366944e, 0x8d1aa326f3f96bb6, 0xe4ba4927c89184c3, 0x863b2374081f1e4f, 0xa2f5b1f264772e64, 0x7d8085a2941f5b09, 0xc19447f7e00838ca, 0x1fc50c73bd36adff, 0xc10e2d3ea18d12a2, 0xfb9d34ae20b0977a, 0x809ae69b8558a6d5, 0xf643d21e8543a777, 0x1223ae6c2809d3c5, 0x5ff0047e9b20a9d6, 0x814623fd6a99203e, 0xc912bd1a4b7ba3e1, 0xf4683a10792dce3a, 0x6157694e70c50676, 0xae3916927aae4a7c, 0x0973002f06279ed7, 0x0f4cd1c254c5e26e, 0x7d7ba6bb160200c3, 0x3c3bd06bd3fa92d9, 0x1e559440993b113a, 0xfca7ed369bf393eb, 0x6caa82117c2f3ca6, 0x4d848224ec4a0dfd, 0xfde9e96a3023bfc4, 0x3bf9ed37ebcad940, 0xcbf6587df4678baa, 0x98d4968ce7ec8779, 0x2644f85c1e0fbe76, 0xc984308f877374a2, 0xd53e563c2f8511a5, 0xd6dd5155704369f7, 0x853bdf051a2a6a2d, 0x21d9a29d02e502b4, 0x65150c59e806b5a9, 0xa1943a083f94e255, 0xdd5422dee2ebf9d2, 0x7a418ce99f1795b8, 0x98b0e22488a10741, 0xcabcf61f9966df2d, 0x58e14076e68fca44, 0x314d7ddf395cccdf, 0xcc403b46e733f565, 0xf15b35f8d71d8a1b, 0xf5f1b4f972ab688c, 0x1b3b1dea29c68904, 0x2c96354f3f1da731, 0x00f045692ffcf9f2, 0x03ace2077bc44510, 0x7da8d3d81d6d03f5, 0x5cd83bc6f5d9f8ad, 0x7c04887a3721286e, 0x1bd151e4125406cc, 0x888c7ad9ebd8ba2f, 0x143f637fe24a0da0, 0x6b3779a345052dc8, 0xbef717c4f597384e, 0xcbaffc196a0aebed, 0xc7c4c469a8d9c20a, 0x4e260ce8512295c8, 0xe0c9b47f50817abe, 0x0dd940d3900eaabe, 0x91d566526665dcf7, 0x470d9079c9c2fc39, 0x388867f7a6d48949, 0xd0c3ba55686d3d6c, 0x9880ba4fa1a0765d, 0x31178f63e3c86654, 0x1ad2daa404e3ce35, 0xcb816899daf1d758, 0x943662411dd67f38, 0xff9592c29d8c8150, 0x123d9c7fe4bf5702, 0xae43ea51c75b1793, 0x9d3e98a5ae2bc556, 0x601f8a748a24ab05, 0x3b090e60d6837c99, 0x6236755d48edd559, 0xefff911ef6b222b5, 0xdd567e4aafa31faa, 0x98bcb00b24a47474, 0x7cb94e6af25671c5, 0x5e821e019dde465e, 0x7defecfae68e913b, 0x915811752db3d4e7, 0x8be3d8cb181a5e8f, 0x95915979542f5ef1, 0x49708c505e34e6e5, 0xda4c55a2f6d1dc53, 0x2474d9abbfdc58c0, 0xc83b01a3120290ea, 0xd35cfa7833fb3e6a, 0x26dceb5adf4d2e72, 0xb4a95e8d3921a995, 0x05f9eea1de04e25c, 0xe1b3f728f77a0ea7, 0xd0e72bcdb5a43289, 0x532a242c481e5582, 0xa1af8ca39bd360b1, 0x92f5d69ceab3c9a3, 0x4d3effdcb8889428, 0x1a41253f9e794758, 0x8ddaf31b28db2834, 0x0e183ef313da9b9e, 0xd9b3f3a53daeee98, 0x39d555f28e821b18, 0xc5209431aed2ea3a, 0x70836f760f213e2c, 0xc8eb3a1a5b250610, 0xee225c27d0fae90c, 0xa3d1bfdb2f236eec, 0xe4ea1d3c6ab29fb1, 0x3e4113121d92c559, 0xd6f76fcb55b71cc9, 0x9b2c2b439c547467, 0xd4c0eaa7c33240f8, 0xbfaf66aa8c19832f, 0x9d3b1f5a4b810871, 0x01635ec6f0d79ff2, 0x60ac5b58598a106c, 0x487dd435e33b6846, 0x60bffd49a79cc594, 0x7b94ee39d7958a63, 0x146e9a412ad1259f, 0x7f78dd55015459f7, 0xf9b4611ed19dcc7b, 0x02099fed0debcb39, 0xfa2c53462f353c9f, 0x4df9be5b185dac4e, 0xdff08e1bb6197444, 0x68b8eef25269c9d1, 0x29375f899bfdf95c, 0x717425fb6cf4f3e7, 0xd05924d6516744ea, 0x7767e15c3f0ae746, 0xef09e001fbd96e10, 0x3ede191e2efd92b4, 0x2870f7b6584ed8db, 0x5b865889a51ac221, 0xc48bec3f1fb51b24, 0x221b7a0a04040567, 0xea1db24529ff5064, 0x1258c51e195e4518, 0x84789f760d2611c0, 0xff729a0328c4f026, 0xf600dfa2d51a3a17, 0xf90dc307cb46a03a, 0x9a7fdb17130b19d8, 0xbc52f4ea57b7260b, 0x40a6a8b285f3ab9e, 0xeb3cffe125e5ec4d, 0x7403e102ed00cedb, 0x73f6ef7ce2d178b7, 0xcffa6b38980c2e97, 0xd0e796c209ee19d2, 0x592bf15bd1da7c94, 0x39d26577581c4aac, 0x5396af489dd6bc53, 0x93258b2aa667f2e1, 0x848a94686c6f7296, 0x03e62afec9dee865, 0x07f2fd48c7715faa, 0xc15419583397837f, 0x672d8bce0e659a4e, 0x30773e75d88a6236, 0x87e89b73fd6b2338, 0x44b36eeaa3cf1782, 0x463715a899473e12, 0xacbbbf9ef34f1993, 0xe69201e06e0366bd,
+    0x4bb133053a2711e8, 0x3d8a3aa545b44de0, 0x76c4052be97df9b4, 0x764d1f80780c0dc3, 0x2f857803d304866b, 0x6e4bcb1fe6dba6f0, 0x203bda623fffa785, 0x4d4ebd02bbfd90f0, 0x4bb68df8e27003ad, 0xa8f2c6e2fcdd7a3c, 0xd671f8629f1e6a49, 0x8c5bc906af678f51, 0xc15daa98abab5323, 0x5e452b96ea600fd3, 0x8639b76f2b8701d0, 0x38d0efba497d2ed2, 0xe58138b534ec6b00, 0xc2ae5081bea98827, 0x94804a7d23f42a72, 0x2aeb670b22f6b6bd, 0xf82c45c2f9056dcf, 0x90688fa9103a686d, 0x033d4d30d160e812, 0x882a3ece84fec7b2, 0x3e6a772e2759e4da, 0x5e8c67943a040727, 0x32d55a6250431795, 0x7fc3e1139a64263d, 0xe43b8cc338035f25, 0x2dc5cf5c635e6b83, 0xa28357ae64955e46, 0xdd5be08f32c7243d, 0xe444c100eab4a18a, 0x879f71ed16556a1f, 0x4946e311697b872c, 0x3b03ec5646dc6737, 0x50dfa7300f6d9cc4, 0x8e7f48c57f42e905, 0x3649232984345f09, 0x8f8455e58304e686, 0x1ddecfecdd046180, 0x3bc6a932474b2a09, 0x350c7f543f5ccce4, 0x657e3a0eb2152346, 0xf71988c12b0bcd16, 0xa3e5210ed52837b5, 0x1cc15b50de742d38, 0x6d951d94d09d05e0, 0x0b62b88308358306, 0x7d8d89a552fc7b2c, 0x2ca923216a4670b5, 0x9b9ed6833377a5c8,
+    },
+    {
+    0xbb80ca3eebdae7b4, 0xf5fc0d7464541a53, 0x9a2c38654912cf7a, 0x021a17fd20db7bfd, 0xbc058265765c6a1d, 0x8b50b42412f8dd1e, 0x61dac7b7556a005f, 0x0df84fd583671469, 0x58165564a89bf07c, 0xff19c3a56cfe3267, 0x858ab38bb08616d3, 0x645f43110df8c740, 0x30348b0389c8d7c9, 0xf7d9a363581d8eaa, 0x671246f994cd2296, 0xa806a2677b012b0a, 0xa3c10c4870db951c, 0x56034551fe978bc9, 0xb06ee3c60f36cb7e, 0x632d0d010f3dc94d, 0x75d975398748a7c2, 0x6b90477488a987b5, 0x9fe5b16871658519, 0xf04888c6a874558a, 0xb138a4d0d84d3cab, 0xb11a09eff6ff47cc, 0x4eede1a796b9f4b0, 0xbb484f86d18aaecf, 0x2a30f47cfb79fa2c, 0x81ae01d95ac19546, 0x994abf9277a54039, 0x8ccdf00781ccd895, 0x7a0f04ea61661d2e, 0x89c3a67131ccc7d7, 0x3c45517546a7ba31, 0x22fd6f368fb15fa9, 0x9de7fbe2b23b406c, 0x9c781d12f148508a, 0xc3a45b7d9f3b9a66, 0xdc9e5ffdfc3dd94f, 0x2e7571001e32d2c1, 0x50e37ff45132e76c, 0x470f8344ec798a14, 0x4935da7cd2ab3c1d, 0xbae60625dbb8865a, 0x17c9776beba8739b, 0x43639b7937f12895, 0x6be16198b6f962c3, 0x4d91d6ef47a9a3db, 0x2dbdead3f22822f2, 0x3c81b7f0cfaaba7f, 0xef3a37046d27f6f3, 0x31d68e3a049ac326, 0xa8c44e710a99aa40, 0x93be20d0a0572ffe, 0xa38e5189e299539d, 0x3bee2001ad51ba22, 0x46edbadd327d48d7, 0xba8bfd9e4105d214, 0xb60dae7c54d82759, 0x7d93ed61474ac119, 0xfe57a9875ef3388c, 0xf27d7420dbc63872, 0x5f026dd089f44ca7, 0x04ccb7959ff3d70e, 0xf1c48fb27f2396ce, 0x9be866e8a8598eda, 0x6ac8d846c3e15a03, 0xf67e5ccd61c63b36, 0xdf30033839e8d09b, 0xfc1af6737d5bd799, 0xd670eba2d160fb3a, 0xe2e38d82c480922d, 0xd9c110c0b7454bb7, 0xcdd114994b88c8b4, 0x99a7cf833cec3774, 0x66f2e51c71fa8474, 0x5ab5e8bc8b4b961c, 0x8079e6a7a2c87c6b, 0x346759d7e0e87e26, 0x3afb97b58da05ee7, 0x29c3b79b60a81743, 0x673a121ba778462b, 0x58bb6651c3676e58, 0x21ca8f3b3717e87a, 0x48eb7f1e72e2d6fb, 0xf1fc8baf3eacf652, 0x6b60026b8bc860ef, 0x8d63de031fa90638, 0x1e5e8d6237d49d97, 0x38136b31c0b6215d, 0xe4577bee2ba3ac08, 0x82f23249ef3f3240, 0xfa7d83521c2cd147, 0x0b0c9b7d0a3e865e, 0xf91c445ceb6d8de8, 0x20c4aad83cc1588a, 0xfa18db09e7aa57b0, 0x9462bde17785da6b, 0x58de5861f9e1e0f0, 0xe1ea5249d05bc6b9, 0x64122980de6a88dd, 0x2dbf98533c8dda27, 0xf4a3f494cbeb0826, 0x0da98b56ec88bb70, 0x0f3023729baf8a54, 0x2de585c4fbd9726b, 0x2e75bbaea4864189, 0xd882334004de7a85, 0xb2902adfd01a3826, 0x97c3aa5b950b920f, 0x46842894ea9be2b1, 0x21eca237fe98a7ad, 0x3b1b381c09d5fde3, 0xcd80bbf3dc0a4f51, 0x81a22e80d737c423, 0xdfa4b9c3efa2690c, 0xb6640950f3512262, 0x8ab28a948a71fd7f, 0x1dcc9fa50918eae2, 0x2fcddc6cb9c448d8, 0x2af05f8c76f15e5b, 0xcd239ecc0d4ab1c2, 0x842916ee4c7bc90c, 0xa0e0067d3a2ca83e, 0x9c057b0a000c604e, 0xd66eaba1537d6547, 0x0fe6b8cfc9d6b2db, 0x7bca27e7f4463505, 0x795772f9d573c8de, 0xe7bd9d26f2c5a659, 0xc647ade7c3e886e1, 0x555b27d9e409c407, 0xbc92740673659089, 0x9937bc604b77b522, 0xecce084bccc0c015, 0xf040a0322db51be2, 0x48774313fa139741, 0xb0ef318171f2b231, 0xe1a8325d453addcf, 0xcff3ab63cf4e5300, 0x53d93408ab26653e, 0xb62ec45defb84ad3, 0xefa0678ac0ae466f, 0x3b77863961b7dd21, 0x9494470fdb1023d1, 0x7fc8a6630e395ffc, 0x34b2823c1fed2f01, 0x52bac1ec78029fc6, 0xb5b3dc553d87f40c, 0x8123455bf05c58d6, 0x99b48302e79f7585, 0x13599a21e6e5dbc6, 0xfa9ff5b4d2450b6e, 0xfeeff34ef2cec325, 0xd06b665f50c684a8, 0xd79c8e679afe4e42, 0x396e4fa2fd3d0219, 0x2470437d8c870bc1, 0xc409103bdac24398, 0x716685e54881c123, 0xa682b9ee57ceaba4, 0x5b072e58695c9d2b, 0x10bc4ff5df48b7f4, 0xf34e0ebbedb97531, 0x67b217d3fef4a126, 0xdebd2bfbb4d93189, 0xac02afd9ed15866d, 0x58b5a2be17103744, 0x52d898777aa0eb3f, 0x5ffc2287b414c08d, 0xcc4b3bd481c388b9, 0x9178a0523ce86cf3, 0x4052d981085790f6, 0x9dcc9bf08a7c81d1, 0x63c090d225c1ca65, 0x10f5fd1d5c89636f, 0x86fbfb7d0d9fe33d, 0xbf0ce7f29378ada3, 0x285b12f15fca3646, 0x48f4bb45a0ac2d95, 0xd03da469351ed891, 0x6445027b3770e1df, 0x619121108e91a7e5, 0xae25db3c091700f8, 0xa420a3820bce4cb3, 0xac925ff9d73c8aeb, 0x938a9d2d71e5de1b, 0xb117d661029ee7c3, 0xd26c8298488fe9a5, 0xaaa6efe17a616f72, 0x0d762246d5447e63, 0x8ca2e2259cb0807f, 0xd4ee85540367fde5, 0xf48c06b4576464ca, 0x8939ce11f242d079, 0x61f661397ee42812, 0xb9c933b237d256eb, 0xef6462b82accd495, 0xc841b19f709ee355, 0x40a83507a842a821, 0x81ebbef507433d5d, 0xdbf8e966e48b8425, 0x6fe079d5474e7efe,
+    0x95164bf3a6925ba4, 0xc606b1562c361bb6, 0x5e4478e57790321a, 0x72681d0c09b894d9, 0xa76b0fba11901df2, 0x31440946e0469413, 0xc007a32eb76038c7, 0xab2153b29a4f0afc, 0x1f260a34d801d133, 0xd478c19f40b0bc11, 0xbd507cd60448b636, 0x8bda5189b7e59cf5, 0xffe796e29958ea74, 0xc4497c21ed10d1f6, 0x0969bce43ac75030, 0xcfc56de77cb6a984, 0xc562800010f354eb, 0xd5412ac41f5e7a68, 0x3adeb5267c1c4c5c, 0xe4f9374056ed7da6, 0xc0145376822450fa, 0xb49b18b4d61d9c87, 0xffef71bf89364602, 0x3eae710f3dc5a3a7, 0xa3d8d45b2acbeee2, 0xe0629b6961ad0ae1, 0x7cbdd9e4aed684c3, 0xad7465e6350cf367, 0x4bb75782cd44f10e, 0x0b779338049be08b, 0x9f4d62c6a7f950e5, 0x7b77447b20b2c5c9, 0x1c202fb8a66a3b97, 0x6a42dbcbda5bd100, 0x30092bd58ff3f2b5, 0x4932990285689a17, 0x4eb3bf0664008056, 0x7349d0f71c402501, 0x1ac5c7890be1648f, 0x804e58480e1d9f03, 0x7745715195b33cc1, 0x7acaab2ae047de17, 0xbef967dd731932e8, 0xf39b36adf14f5f39, 0x54de75433d72679d, 0xa024427954257afc, 0xdc1f2602b79f60f6, 0x6945f75ca9b85afd, 0x79be4d5842f6e76c, 0x8587b10172ed4972, 0xfd94c25d96f769fb, 0x58fc9b5d7e109c27,
+    },
+    {
+    0xa88a7951f6af7c26, 0x854223da3e20c283, 0x2b2e5c4b70cfae4c, 0x63d43562ba946eec, 0x3a222d1e4bd67ede, 0xecd349cc5df57d11, 0xd8813655f95d202b, 0x8da9c54f840b6f23, 0xe19c356b4c7b0e4b, 0x33f801964d3e714d, 0x35ec2635f57d0e59, 0x2c9ad2755c85c608, 0xcd9005585557a085, 0xaf5a46b900bc2133, 0x9586a1d04597e820, 0x092aa13847e86510, 0x255145c434b7e9ed, 0x3be538ef4cd81013, 0x75c1a51b0c0a12af, 0x97c6bc4285339205, 0x71527e929440d1a8, 0xd89efb27b6632cc3, 0xe37e45bcf5418690, 0x9c19764bdb37ef75, 0xed923b87b16de861, 0x285c43a3fae7fd2c, 0xbda2e10ae12bf7ac, 0xf1d3e73d1c39c17f, 0x5f476ec09a2b58c9, 0x9ed9986bc0418584, 0x428210cdf58a1cb6, 0x99445397fabf0c25, 0xca1937c189d93e58, 0x16fd348ffd5d396a, 0x818f411019500b44, 0x8a950a743d53ad01, 0xccc6eea71c3bdea5, 0xd4c55e5e176b9c4a, 0x3220310c80c91a0c, 0xbc91c8af3025a7fd, 0xa52ac42dce9de1d7, 0xf1d40470602653c2, 0xa27709831970c604, 0xd9f77ac5ffa4546f, 0x33e08cda8037240f, 0x96291c5479dbb9c8, 0xcf93a6ec7a12e2aa, 0x3c414a99ace70849, 0x122e90df962e9ee4, 0xf29546a83900111b, 0xec5663205fbfcf62, 0xe54ba5b1804c55eb, 0x29a082609d1b4607, 0x159d3fcdb03a178f, 0x432d87c0f1810558, 0xd671f5b26b5d3965, 0x54ce81d17eb43632, 0xf82d6c60b41a7546, 0x1164d69b71895a9f, 0xe83730e7cf0d4fd5, 0x1ec8d8088556425a, 0x9fe70630541a2576, 0xd0b459959e7aa521, 0xb5f8fd1ceb0d643d, 0x3f264f978c5656e9, 0xf43dcafceba77ce3, 0x75cac4b48a08888c, 0x921d45dd4616f967, 0xc213d9e1ced5f9f1, 0x19663c87a2146214, 0x7c27f197147eff89, 0x7dc892acb09d55a8, 0x9e5229b1d98ceae3, 0x4edf950bd9d0bc8c, 0x4bae80af91b8f299, 0xd4033b0de084e625, 0x087bc700c8b74054, 0x28e1a48622969996, 0x7f7cd7de54d8b186, 0x88c3d303be96ab69, 0x9b45d7933cf40caa, 0x6df9872317ad2f89, 0x024a9ec77017d4e5, 0x165ba6c512af3185, 0xcf84b4c2e74de3f1, 0xbc697622ec5647ae, 0x706da3e4a61fe96f, 0xd9d3f9db778740f1, 0xf9c0e5eca2f2bcff, 0x2d524da481d9272e, 0x16bba40a6cce9ba5, 0x750e05ec675e93da, 0xdc93851adb541514, 0x9f1c7da1e981d778, 0x190070f1a5cf108f, 0x2c888733a270fae7, 0x39456e2ae70645ab, 0xd4c9d08cdcb1f71c, 0xc8196c614ca3e3c4, 0xd2b3cf7103fae0e9, 0x8836fd84df92bcc0, 0x6d50b0d36d72027b, 0xd11a6103e091d19b, 0x31b7b9817144abc1, 0x7f7052bea785c0f1, 0xe427d056c016e098, 0xde52125ca177b382, 0x97e2625bd4b40471, 0xcaac8129ff6244ea, 0xe2d1883196dc692a, 0x5b2b56d6cc7f1e5b, 0x6fcf8aa25135af49, 0xe6101fb2e7485b12, 0x43f699984ef3c842, 0x9063be433e05bb12, 0x3dc329422371eaa0, 0x599eb4840a097253, 0x89352561c10c16d8, 0x635bb8b0d0f2b10d, 0xd782844495ae23cb, 0xc3600266328f4a5d, 0x4abcc70311e5544a, 0xd7255c63db4e07f3, 0x39f379dcb004d6de, 0x82b2de870ad576b6, 0xf77baccb37f7a560, 0xe69c1c3b10f96c4e, 0xce668e8e4b8bdfb7, 0x5861502a49b34f17, 0x478844ec4a8d5d66, 0x29c8670c21feb8b1, 0x07b62ef8da3d3f98, 0x03198d5e29bb4811, 0x1d7cc509bc687944, 0x0f4bf147856e231b, 0x9bbf2c791c6f62d6, 0x97ea02cecb758f59, 0x519c16e96124ca95, 0x4590342c9e56d7e9, 0x6c3162803d692c2f, 0x77d690a788f2c008, 0x5b26afe0c406bcb0, 0xd2edea6ed5cbdea3, 0x23312eaddd21053e, 0x25b89ec08450bfb1, 0xdc6c41322241964c, 0x84b621796cddbc43, 0x7aa80d38b629966a, 0xfaddb1d7897daebf, 0x52c9251f7c37b125, 0x660c122e18c6a660, 0xed69ac30d66ce018, 0x417d46843cf3717a, 0x2eb3c3e35b5d50a2, 0x5236d099920e9c43, 0x82e41f78bb052814, 0xa6c70570b1dc8f20, 0x7c6c07e98c8192d2, 0xcfc74b2d5b86c5dc, 0xf1c4bebb6ea53836, 0xeee8de556fe348d8, 0xbeb43fa9f9ad4d1b, 0xd7bb5de4e32801ca, 0x331badb0d8fb1447, 0xcc484b71eb42660e, 0x08bb5bccd9b6cb79, 0x061a30335f58167a, 0x30ae0e1ee7d10f15, 0x0575c89117a048ac, 0x4cb57a8db1042199, 0xeb535a2899c5d90d, 0x6ccb63317da9b90e, 0xc2e045cc5a36ab33, 0xd72daa0318985a3d, 0xa83e48709485bcd3, 0x88bff618e9ce00e9, 0xd130f20674e94346, 0x9c604d925b7c78e9, 0x57f9630b7996c471, 0xf4167ed7bf51929d, 0xb47df98e06143ad1, 0xa91de782989a17a1, 0xfa450c2a983a516c, 0xd9f1ffc1142a974e, 0xb12fa960ef7acb7a, 0x9d1145c686991175, 0xc843bf8185df5cf5, 0x999f7f2a410af422, 0xdc14ac5eec660cf6, 0xa7305a02180595a1, 0xb026a76f4950433d, 0xaf56dbdf42def30a, 0xc9f77d35c81844d5, 0x3e3dbe0b889f855e, 0xb7c6251ff5793431, 0x29970f12adff286d, 0x0f4256a46c9509a6, 0x861398aa76312757, 0x80117a92346c98e8, 0xcb941de6d36d9a8d, 0xf96763bbf815159d, 0x9dee36d76883f2fa, 0xce293626a8588322, 0xf2cd528f36deb9fd,
+    0xee498833047760ff, 0x1735ef925ff3d8c9, 0x900e8a7581ba1243, 0xee66bb7219040439, 0x6916ba9919ee8980, 0xa5bf86f79c70dbe2, 0x6a4d218937106d41, 0x75b536d8a0e5934f, 0x15bfc0732c3a95d8, 0x60d202befa86f934, 0x8fce130f3a4721ea, 0x1dd04bc6bcddfe7b, 0x6253416ae2d529ab, 0x7942ac94c61a1103, 0xe30727ae935ad303, 0x198c70f055a0e11b, 0xc02d61abd4e871f8, 0x2d7fa01655e098c7, 0x9a18702441c63e27, 0x89f16573478cacbe, 0xf61b4d689888d5ba, 0x05854fce331ed312, 0x25adcf465754d6c7, 0x70e46a973b538cb5, 0xa567e24814e5eadf, 0x91819099ee2f7f3e, 0xa3fd1834aedf96f0, 0xa66c001124586041, 0x1de1d70453fd3ffd, 0xf08f8c9a3e638f4d, 0x5e7d8f4f27e06fde, 0x753ef15506e33866, 0x70f4901b22561095, 0xbf657c8be01ac8f1, 0xe3de164e00ab04ea, 0x88fa9dacc90e5e8e, 0x807af1339e75e9b1, 0xc2051e7c2d63f73f, 0x2aec04cbe0b6122a, 0x1f75fa1370b3c282, 0x8408940d0d46a3ec, 0xa7f7bf86e49f72df, 0x41ee278d9e440b0c, 0xff2158d4506077ec, 0x577a7ead4560808c, 0x7528a71b079c5f6d, 0x5508b7bf4ee8a355, 0x36cb40725f579ddf, 0x6f3628b1c380efdc, 0x53603471e5bde573, 0xb64003bd645a150b, 0x057df07f11dfd440,
+    },
+    {
+    0x9a77c7345cda77e2, 0x1ca85a2de1696bd9, 0xa5d9f1910f778ee8, 0x0f0568887c6835c4, 0x24d690187d3653b4, 0x16910eb48bbdf5d0, 0x91ad593d66ec3534, 0x9c84391fd555b7ea, 0xb7fbb2192c63fe68, 0x7cfed94f758f1072, 0x18dcb3b59a47ebc5, 0xa55f240d78072f6e, 0xe9c6c21cca2f8688, 0xe19eac5f19e46ac1, 0xb2682eda034d9cac, 0xfe3158c2b0cccc30, 0xd4893a7d8dfcc06f, 0x45758e32476450ce, 0x566595a64c4095db, 0x1c4d3bdf1b7f3f6b, 0x5a4a224a87582225, 0x67b4874cfa50d87f, 0x4afa51637244ed08, 0x972730da8b35e3d8, 0xc69ccdcf7676fa8f, 0x38a300f1a694a5a0, 0x58e578aba836edd9, 0x66ed06828b3cec53, 0x90fb57d1165837e0, 0xf8a28b42413bf900, 0xef361c212eee9d10, 0x584b1ffa73cc8cfa, 0xcae9e0a934eb7d6d, 0x54167f5166af683e, 0x0c86f4cc8b19543c, 0x6e895e81dead892f, 0x4d3712e2ce5aade8, 0xc1f214e71386fc5f, 0x5a919420900ce0b1, 0xe0d695ae0e37816f, 0xf531bb6a312b2844, 0x2ed6482bf280f375, 0x6f2cc06de0ed8798, 0x67ac3ea472c443cd, 0xf78f10414e69031f, 0x64a6fe0d3752f133, 0xfc579f78ad9c5fa9, 0xff7a4136a5887609, 0x83e6fa63dd05fcfc, 0x68bfbd0be612a1c0, 0xca9425599a7540e9, 0x95726b8f12208486, 0x79044fd8de167316, 0xd7a9dd6654df09a0, 0x7c5e2c454fdd3efe, 0x987a944a6698f80a, 0x14180a87e1e47dc1, 0x6e20e89e26bde6a3, 0x4729dc95fa0662f9, 0xa821bba5ce926e50, 0x25fda875eb4a26fc, 0x420e8ab43d767572, 0xbd72b87e93afe4e0, 0xc613165495fb12dd, 0x6c911c526cebf7b3, 0xd5375557a5e06a20, 0x7827cec995f9a783, 0xaf5bc58a7a6256f9, 0xa804f99b2986c91a, 0x090093da8f2d134d, 0x23a07269e2b3bdc0, 0xa0d112abebc21ead, 0xea60ceb3baede59b, 0x8edcb89e1449ebd5, 0x027227159f41556a, 0x818a8f349b46f3ca, 0x53c2fe4592c6d0b7, 0xad4c079cdf7f0f8f, 0x29ca4e889b080e1c, 0x0b24ef9197d62236, 0x8407220fcb49ff43, 0xf1ec64ff67d2a53d, 0x3257f1ab7b151e38, 0x473dafcfc804d842, 0x4840a949d38ee2e3, 0xd545611202ed1527, 0x2629c7420b29415c, 0x98dfbf9765f495f8, 0xa89e3299b7ff0dd2, 0x0acdbaea06493258, 0xd9fc11c216a06144, 0xa742f8c6441ceaaf, 0xa9bb953a7f594f36, 0xa5d25771ab29777c, 0x07467df7ae8651b1, 0x761c1e03fcea8625, 0x4fc0ceb9103e332e, 0x194f9eb3126b3469, 0xaaf201319cba0588, 0xdd357625e56d8d4f, 0x4c59e1e05c9c6e3c, 0x5de60410b28490cd, 0xf7b5fca38dbe10e1, 0x3cc4669d484a0e58, 0xaf5e4a1497b92520, 0xaba9ae98f8eb01ed, 0x23dcb749536e6d96, 0x176cdcb765585dfd, 0x9fdcc03eb6cc8aee, 0x7ef90df8d70ac032, 0x869ea534767effef, 0x3192464580f307c9, 0x0f364ffd58da0220, 0x5241ceea28126c87, 0x93a33f579362d687, 0x0ba283ed8c936244, 0xdfeaef961188fc6a, 0xc5fd5d7d19be65aa, 0x6cf7691fe8b97941, 0xb098cfdce8f83ce3, 0x2d2efd8e86efc5fb, 0xc847895f32509250, 0x7d5a439ca9805332, 0x6c73f8404c1d1b78, 0x63537939023b3534, 0x8f8c41a3790edbba, 0xbc07c56e04f83c0a, 0xd7776d8ceaa173b2, 0xb0958a6df751ea82, 0x72a49d76e3fdaaaa, 0x9a8755ed0d97c66a, 0xcc0212498aa9cf50, 0x49e15b32e397894c, 0xe7c9c8e6bbd3da89, 0x10320696fd4d50ca, 0x2e8e5e50b61234de, 0x65c4cba41a7c3d6c, 0x611009b13d54d04f, 0xf8a3147d1f5f8703, 0xa5e55f0497be92f0, 0x40972e0f332a88ce, 0x993e6b327b8b1b75, 0x3106bd73d7354afb, 0x38d21d37d5a32391, 0x23f2e73b6b532c54, 0xaa59ee2a4c0c0ae4, 0xa872464eebe07296, 0xe17597ea6d64ca1e, 0x7e81473b38c3a278, 0xfe779897a3a0817c, 0x6d39781d430c4199, 0x32f38244bdf00c87, 0x9170f5844adc6a17, 0xe7702538eab72626, 0xfc59dcf970709ec3, 0x4f5a5b877dffbb7e, 0x522bb999b577b0ae, 0x2f88f8601b6f03ec, 0x9359cf049c509223, 0xfbc59ca041039433, 0x0a130f79dc2c18dd, 0x2828b4b5c02cacac, 0xd323880c43c9088f, 0xa9d0fe1a5b95c24c, 0x4709d409107ce76b, 0xe3a966ae902a7db8, 0xe2d8b08e6ce7a27b, 0x87342131b457b8a2, 0xec10987c7b4e5237, 0x0686be69892b3d8b, 0xa10e34f060579ac4, 0x25b4f4d8893ef395, 0xfdf80694a47c1318, 0x4a7aef81eb0f0ccc, 0x4cdd7f1f2e26765c, 0xcdcd19799d02f601, 0x2a73302b9a783cec, 0x41c90bcd5a4048a5, 0x903cbaa13552c646, 0xf21e5360d91f720c, 0x61eb5b8837518aea, 0xc329fc22f184568c, 0x8c7bae6e28c5a811, 0x487b2e663c332db1, 0xcfe6e9d0990c269c, 0x2d470bff7ee5ba04, 0xdf7a75e3b806df45, 0xaec2713615ccfc13, 0xe9141f72150dda5f, 0xa43273d143e08a43, 0xe99cfbfa3c6c6ad5, 0x9c79062ab5592dcf, 0x4df4a40c0f04e044, 0x52dfa998e2dcd87a, 0x747314f727da3567, 0x5514e3fa427ea9cc, 0x4da2a99d17b80a97, 0x1d399dae1e0500f7, 0x48c22762a4b945c2, 0x90cc863a18f4d095, 0xfbc1d454dc9bd7a9, 0x5f20c7f4035e7272, 0x44169fc6292aa860, 0x8bfa97163371ffad,
+    0x0e4293b569cb4d08, 0xb859b978360af10a, 0x07f6a9aab3b807c8, 0x010aaf41927d14ee, 0xd79589856a744970, 0x6baf36d0195c5065, 0xb810cc0627c17bda, 0x53a7dac8761bfde4, 0x4f754280adde73a9, 0xc4220c627d177648, 0xa5b40676ec727b56, 0x188beac8841560f9, 0x5efc63712cb61fcc, 0xe08979b6232ed50c, 0xcc3cb06cd0d7bebd, 0x5d3aba8695f9b103, 0x8b585f9d522fa83a, 0xaf7abb7baf0a4de8, 0xc1485c2ad8193e3a, 0x920d19b1f34ff0c5, 0xda68cfd7d17036ae, 0xa4bf1ca38d152d89, 0xb7f8901716ce010b, 0xa07214f75395d901, 0x752d62b076fd1fd2, 0x997e7d2bbc6fb233, 0x32c2642bf5bba727, 0xb7565ca330a2f23a, 0xcf30d62aa921e747, 0xc35e14e2fe9d2f9b, 0xf041eb4dce789e7f, 0x1ede4e7bb4ef9106, 0x4c7037fd27336b12, 0xb3cc7c62a252d53f, 0x4eaa8a8db5134ba5, 0x386609a59b8caad9, 0x24572e5c57a6a540, 0x952f057e75b2d9ea, 0xc451a53b0896ad06, 0xced825ff76147b56, 0x157888dbbff3c714, 0xfe327864760f6a02, 0xe14812fbf2c9f4bd, 0x2681e1b1ee53bae9, 0x9cd66fd424ab5257, 0x2d75400927ec4c76, 0x99f40c24efe79cce, 0x76cdc64580b62737, 0xf10725bd0cfe4778, 0x77fb36ae946625df, 0x77ef10083f683fd0, 0x05eeb4a1ef54449e,
+    },
+    {
+    0x2a56217db4e684e9, 0xefd9be7c8ed05934, 0xd5bffad1bd8f125c, 0x03fb5363e1edfb14, 0x1cc6cc64c4d0ae1f, 0x0a42031ee4292ddc, 0xf301f9059c9f18df, 0x1af5471bd5244a32, 0x7c7ea2e05b9caf0d, 0xfd14b6b8f328dbbc, 0x3c8434c59f2ac51c, 0xb2c4539718146cf3, 0x7c57e402dfa9d30b, 0xe32ea9139bbaa8fb, 0xec7fe2183e1047e1, 0x8eb981ad72a62dd6, 0x433a2771754ec560, 0x5b4c387fe5449a87, 0x25090e65b33c7fa5, 0xfc479b37a179ebb8, 0xd4bdfa34464c1ef3, 0x5e561efcb0eed040, 0x39a48202df8f4c7f, 0x598f1a20ab01585e, 0xaa2d4a27ec04a943, 0xf8c313551fe9b622, 0x4ae1ceea76c17a7c, 0x6fe875a68dcc86e2, 0x447ad01c13e542c1, 0xe400f20550810995, 0x96d49f014cd98fca, 0x5ed385299c7b4dfe, 0xf76b155e53afbfeb, 0x5e04d2ee1866a1d3, 0x7f2947d0cc9fbdaf, 0x32cd48c6924917b2, 0xeda920681759dac8, 0xf128a276a5875d34, 0x7b61d4508840d642, 0x56fcc9b9ec0c5b19, 0x286ee601e0677a6b, 0xc2d38be64e18d90e, 0x97b0f73b230f6c6b, 0xe51c164458c48dab, 0x5e8c90d219b3f05a, 0xd4f9042b2a886a9c, 0x4973f70976bec492, 0x8d125fc3576e1f4a, 0x7d04238d15b03dbc, 0x1ad3e0fb8cbf01ac, 0x552889b6f89b226f, 0xd3be2ccfe71bad98, 0x194c18a44225502b, 0x3c1342293f7d8758, 0x3c55a71842d822e7, 0x429ef302612a85f5, 0x8778fef06d83323c, 0xd4a6dccde826dc6f, 0xd08961f8bcd0aae7, 0x8f7bddf23e6e5943, 0x336b311a93c6c5e1, 0x0ca66bd597df1ea0, 0x305b2bd1379da391, 0x44ba8703ddaa9264, 0x19df8550c98bb110, 0x654e939254612508, 0x86c299ab6ab25a0e, 0x7b0890ae521e8533, 0xbc43e5ab2fe50a30, 0xd92ea375251cc568, 0x148e8f2917940c46, 0x6cd9dc6ff544fe7e, 0x8b1e28ab642e561f, 0x660776023bc2cf22, 0x34c2dfa5b20f690e, 0x8696dcd5d31e8443, 0xcc4fab161a9769f9, 0x1666e5268191f4b8, 0x190f9860f9715459, 0x9b96062d8ace3903, 0xf9769277ef1e35cc, 0x84da71550dd5e12e, 0x28e9114a9d82f629, 0xf0123e1a100c2b32, 0xca7b2c16287ff884, 0x9e033549734d526c, 0x1047bdb6d289b437, 0x7d5e90dbb8dbfdb8, 0xe7f5db555bea587a, 0xe0813e5683b7c932, 0x7f0cf9dec9364053, 0x4b51e815d18ba7b0, 0x5442aae2b67f3f75, 0x3e016e48f53685a7, 0x418b19bf6b7dae9a, 0xa5cd71e10d2b07bf, 0x77910d9f7ea5986f, 0xbca898a323103afb, 0x9a1a705c170bc1c6, 0xd3af2645987bf970, 0x01e992f0723bf52f, 0x587600e55581ac01, 0x67d16733b2effc05, 0x4d9b139f2edbb511, 0x7254a7b70ccbd629, 0xeb34ca5036cbdca7, 0xef01c6006ea8deb2, 0x52825250d2fbae30, 0x783058f42afbe1d5, 0x5c8777aa43452402, 0xfadb9609e3097035, 0x26a60ccf9c9a6bcc, 0x9c0f764a411520a8, 0xa452a6ef30600db1, 0x07ff2a1a7dd635b7, 0x7421a0fccb49019a, 0xcd366f636226d1a9, 0xc3789659bc65500b, 0x7eadf2fc78431397, 0x39ac21bfaa8fb1c1, 0x1bd1044a1056e136, 0xd8d03bf161c45d8b, 0xa511107ae70ef91f, 0x4578577bf3401ad7, 0x7c747dd3185ac6f9, 0xae0f70b1e3d09eaf, 0x34315f4df64ec30f, 0x18b14e9991d7e75d, 0x2fb793a55cc3d6a1, 0xdeec732db958e157, 0x14822e15ce378be5, 0xa7890eb4399904ff, 0x03635ac694453df4, 0x3db6ebf58882a657, 0xb48e4695b2ba4450, 0xa3737c721d865244, 0x02d14362965a4abf, 0xff08ef4806d880aa, 0x75325bbce2624d55, 0x25c08941a17f15a1, 0x5a9bda82aedf1631, 0x86a6454b0ad89de0, 0x3431e81e69f3272f, 0xedd40f8308cf1760, 0xe02e37d2a28bf538, 0xe9a937eef9393b02, 0x1624ce92539df4e8, 0xabd1b62b1f30ee74, 0xa9cbcba7fa08005f, 0x0cb2ef53a66f43e9, 0x9e4380060bca9250, 0x8d0e1a93db1ce300, 0x38c355289c9e2599, 0x505e684e514832c3, 0x4a2e806b8d273941, 0x9cbeddcbffbe7c68, 0xdc5679abed54d763, 0x415002250f8db2f2, 0x7068cc805ee3fcc2, 0x331e549068e34679, 0x2cf7b17f7c1c1429, 0xf56b5b8e160e6f39, 0xa1a52ce0472d370a, 0xa019766ba1516e03, 0x50d65e1a44c81bc7, 0x2b821396f26b4f8b, 0xca68a52ad5b4d3ee, 0x5b8c38f8751ed2a1, 0x5d7f679a79227e10, 0x554296b41e0465aa, 0x9c93098b0649659b, 0x6e4b0c5f0b38c0f0, 0xb98a0ed0d12d0683, 0x3df67a148a059c8b, 0xc540796f70d1da65, 0x30e979d9a14cf535, 0x2a01c7abbdddcd67, 0x56b9828e1afab21a, 0x8ba914926bf7b789, 0x73ce0e23ec8277aa, 0x88946c2ed16236bb, 0x78b3f48c210ff713, 0x353f05a0cd499072, 0x4ca2b54469718a85, 0x7d49332ae3aa820c, 0xf91dc24e6b26d0a9, 0xa44f11be64887621, 0x350a66d62dcdc0ef, 0x0aef379c4bc78b26, 0x8db49dc3b772f90c, 0x1a6fb16f766f8569, 0xa5e6512c63882797, 0xd20a287e698666e4, 0x67368df4deee5bff, 0x4712556de85489d5, 0x9e7b838a28c71245, 0xe66f3915761df924, 0x451318769b9f2e7b, 0x7a3fdc9968ed3682, 0x6983aae7ce79736e, 0x38d4d5317189ddef, 0x599cf389921caf79, 0xc79bdbb20f0a655d, 0x4f8b4bcce704f85a,
+    0xc348cd9a28c98ff6, 0x6e320be1b5a41d45, 0x8fef58da19bc2f6a, 0x9194b77af87e49e0, 0xc67e195b0c368ea6, 0xad14b1accc6c322f, 0x668645f2798545a7, 0xf6e30036843fcc30, 0x886410567bbc5d8f, 0x818d5b833ca5f60c, 0xafb2568c92411155, 0x7d14aaf154c7e21e, 0x3bc86219a3d8bbc7, 0xc10ebbc3b69e9412, 0xf7b5f7ff0c119182, 0x03bc3b8a327daa8f, 0xcba179fe94ffbabf, 0x59ab5d718f499bac, 0xae52c530c3d2ccd4, 0x9542aaa796d65f7b, 0x82676e2203703ed6, 0xd42ec1caa36c91a6, 0xf998d9cb425d1bea, 0x2ea9111019bfde4c, 0x47c73357f1537f18, 0xbecb28ccb3d5ad74, 0x2ee54b2939756235, 0x926750d275c4ca4a, 0xb0876ebdf17df92f, 0x7e46ab036486d51f, 0xf82281750609d41b, 0xb4bd4ce265f7acce, 0x964c14b31bc4ff70, 0x6d164994e041a402, 0xadfdaeb0d3f5ae7a, 0xce2165e87f4d6f83, 0x6847c6a461ecab8f, 0x81d0538a06450fa7, 0xa421f73ef5306975, 0xd5be7ce8e0cd329e, 0x7dba1cc002b8b151, 0xec22e7c23e5f55c8, 0x9c9d1fb4ec0e97e5, 0x49f970017912e3a0, 0xaed66e59557a741a, 0x0d0d16ee28d6c13d, 0x66af528b4dd156ba, 0xe14636d09a64c4c7, 0xd9aefe8a0b2b52bc, 0x52d78a74baf8f8db, 0x195318f6527513e3, 0xa3506be0fd193599,
+    },
+    {
+    0x693c954182b4741c, 0x68f2b504c79f50f4, 0x9ea2d3fcd31219be, 0xdeb379709610abaf, 0x252c789c065bc767, 0xd53f8b1079a53490, 0xf9a295c7237f72d4, 0x16b079440a1ee6e5, 0x95b90c538974ec37, 0x9f7e700f6115fc58, 0x226cae7b140ad1d5, 0x079694f207e3eeb5, 0xb5d9ca4428146a90, 0x602e940309e3812b, 0x9c997f99ee478ab5, 0x57ce949fa068ed62, 0x2bceddb522fb218e, 0xf6cda5f368302b3b, 0xbc3415845cb3f0ec, 0x6b6b6c1124c65f20, 0xa70c0cf3a07fddf2, 0xa82d3063e783ecee, 0xe45f2bcb8cc21ecf, 0x31a06bb1c2c4c87b, 0x10cb087ac4218670, 0xcee2cfc8aab38285, 0x32dc35d2c9e66a9c, 0x54d190bb546a7407, 0xab83af6298bfbd45, 0x3299f79be4d69bd5, 0x4232803bab2cbc3c, 0x365fc10f694dadc7, 0xab6461f35ca74fd7, 0x4d79db0109543f2d, 0xc20a65a40c1a7c75, 0xc480935ea03366f2, 0x69051ec1905745de, 0xc434caaf41d67fb4, 0x43600d4b75af4651, 0x26a0263f6670d3bf, 0x22a56bf53b14e42f, 0x27903ae759cd1bf0, 0x4d93a0ffe7823e59, 0x51ba7d675e503778, 0x31d1591bc39e1ee6, 0x47abcfb03f7fbe6a, 0xc374032ffbb2529d, 0xc947a9a6d22c8a77, 0x4cbdb8657757deb6, 0x3ddac2d6494105e6, 0x6a71d630068b6937, 0xe91d52f7c517c4a0, 0xca5b5b34e3d39a30, 0xe85aebe2a54f3b0c, 0xa6c6e4855b9bc019, 0x180684b73cf3bf1c, 0x899c91feca383c61, 0x2a86d2bbc21d072c, 0xe360fc3e5090918e, 0x7fe7617c889df4cd, 0xf623d4c6855984bc, 0x853e73111dd8b7a1, 0x44019810ef8f4e5d, 0xa3a3da1cb9b0d365, 0x623d252bfad4920e, 0xd0a7e63d2c3c1716, 0x0ba0707098019a41, 0x5e53c2e2c983c23e, 0x450b52be3d49d69e, 0x9ab447f195a55292, 0x102d78a093d7f1df, 0x3f7c94cf01499b19, 0x33e76d65080e3507, 0x8359da72871602eb, 0x5ef38d12d198cd49, 0x086093b0a719e03e, 0x79d43fc327975daa, 0x40603f81e5322103, 0xa3d0c309f83d6668, 0x7bd49ef8dab4bf13, 0xe8c42b598fbc95d0, 0x40e3f510fd4ee26f, 0xfc2336b4a41d7551, 0x4f05c2ce8cfdec8e, 0xac2751b38bea1d1c, 0xe144cb59ecf184ef, 0x2d9b520a145c8c16, 0x5406f27989bc96f6, 0x23852a7bae978b47, 0xd524b42073f547c0, 0x576b9e7e58ceb7bc, 0xd9fa306c26bc5f59, 0x98fb2ccfdb313b82, 0xdd9af8268276b52e, 0x0ceb16a78c1976ce, 0x36af4961f8495a94, 0x73806309402da06b, 0x95039f7c76076df2, 0x6f7e82fae9851c79, 0x109333447490d749, 0x1f7b22431e0def0f, 0x7dc4611f05707f17, 0x5813730f34eefad7, 0xdb1fcb04572ae4a2, 0x407d37ae2379c94d, 0x4954946ac085f469, 0xf458acbf19e6c1dc, 0x2be788825f25be5c, 0xb0a0c5df6f9288f4, 0x6ea1bbd73d1ae82e, 0x6d5e97db179b28b9, 0x75316e71e46cd0e8, 0x45e01681319d4af7, 0x6040c6b875672aac, 0x7410ff82c09dcd91, 0x7d0089bcb5a48794, 0xa523a1c12fa0802c, 0xc706705b526a1e20, 0xd9f04815c0a6456c, 0x5f4636135077785e, 0x739dc5f1763648ba, 0x7904fcb8dd9bc1f4, 0x1a0ada96d0e5c541, 0x536030bc09131cb2, 0x5ca7a3fd6cd28982, 0x5a547f3cd3550cfe, 0x2ba4bb1c218f771f, 0x0ca35e7f151cf0c9, 0x7df42783cbb3c2a1, 0x6e595392af3971a3, 0x58cec5b3c93fd809, 0xe0542bc9c4bf2efc, 0x0ed113ff0880017c, 0xcb83af5016ca1a5c, 0xe6bcdf6f4858fca3, 0xc62a665e28b71345, 0x7a54581d47c2c867, 0x4caad9fc4b10e1f5, 0x40f2f726071d98d5, 0xc2359aa4ed087f16, 0x9b9aec929bc2c626, 0xd8fd364849d25a77, 0x8c0c8dcb6f8737cc, 0x94a189e5e3927e4f, 0x27f8743006e6c34e, 0xd5a7ee0aa548f683, 0x063e2b21d7d094e9, 0xd4213f38104a5537, 0x71b6218a8230c5ba, 0x10b229265323b4d2, 0x22234374a2734482, 0x569bb9d4b9a86b85, 0xaab0651b418d3b46, 0x8571f8d5871bbf1a, 0x48f130a098213091, 0xdf03437620ff0b3b, 0x3cecf68a577637e7, 0x6a696cfe69915e8b, 0x8dbeb76a195c4bce, 0x035186a897f2a454, 0xfc2c666ace71b74d, 0xd8eb7d118a1f8f80, 0x83047174cd5de1e4, 0x23ba618cb482fa3d, 0x26898e552ea4231b, 0xc04fe13b89da5b34, 0x0cfe71a23468a6ac, 0x58cafe5f25162c0b, 0xdcd2af6d1599e02f, 0x7935c18e94667b84, 0x11816440f6daf103, 0xdb45ed1052ac7f5a, 0x7f800876907b2cf2, 0x47ad23232c79a575, 0x31c14a166b1ecfa1, 0x2016835b15bc7dcb, 0xe36e6f0dc3163bf7, 0x8ac4fd23c1434c1e, 0xea8658a15b3dc7fe, 0x9e097194ae34823a, 0x4c512342a1df82f3, 0xbc466d229b632dda, 0x6f58bf666deb6ac1, 0x1cf0ce404ac53c42, 0x0d7b776466bf4144, 0xd11ec613cb7fd08e, 0xb41380a877ab9a37, 0x0c3f31d6ca1f15da, 0x5c4367d3b7153bb0, 0x008f82043cdb673b, 0xcbc376c830250dfa, 0xd69657607aa62bf1, 0x76c76e0eb1d2e2a9, 0xbda67585c841a49a, 0x935a6956f82ea2d4, 0xf2ca4947064cdf9a, 0x394ae4a2cd6c5e31, 0xffc5b5acf2dd77fd, 0xd2c1e4b30eb836e1, 0xa26050157ce432b6, 0xb4118e6f65668938, 0x4ba5e3cedc2f33b9, 0x5320d4d82a7f3a52, 0x6500f53bfe4e40d4,
+    0x74b48ff4c216ff96, 0x24c60a293dc8ea12, 0x8be1ac3d6efc61ae, 0x467684a7fdae0f6f, 0x8fe8c9a92a7f547b, 0x7824e120587a07b1, 0x2fcec2a5b63c3bf4, 0x1b73d74a5e67ba18, 0xb4581385dc6d258e, 0xf45e65a2291cca42, 0xce06c8be6f931f25, 0x83256e283e8982f7, 0xb9ae8d52823e0688, 0x6dbd2c9b6581a333, 0x84a84ee825e9ebab, 0x512149c44e3a5fa5, 0x4080391afd375a5e, 0x1b56667fcc708c58, 0x758f5051d80f4990, 0x3c7252fd2f439ab6, 0x22c150a13f828513, 0xfb4e2a09669176d2, 0x3762920064169341, 0xc78b23529c6bd59d, 0x76a6d00007ce11f8, 0x1ffd518cc0483314, 0x20a6c708a2ec7bb3, 0x24b4ddd0f6d1b755, 0x298018f86e982755, 0xb0f82e23a1e85822, 0xecac046a868165c2, 0x5fdf358d794a07c7, 0xb72398853a968370, 0x7418df33c3ff125e, 0x172bcb5300b3aab7, 0x2a4048baec8aeabe, 0x7ea98016f7c85ec1, 0x1afc507dc66113a8, 0xde2cbcb467b19ba1, 0xf5a60fe30f9e61e5, 0x25cccf868b6b0e3b, 0xce8a8992473f0d92, 0x2bed9615c305de79, 0x99cd4a3a30b13dcd, 0x264a0526e87720ef, 0x7c7b33cb6639c9af, 0x8b19555ad810fc7c, 0xdac5e39d6f01a264, 0x6fabe6f54bfe61cf, 0xee542526eaea7792, 0xf7feffe02cab8d9f, 0xc21130a67aaf401f,
+    },
+    {
+    0x8c9519953a0631c2, 0x865d637bb43b22be, 0xacb65a491f886c05, 0xfc4d38f8192d9757, 0x8d395f9f6dc42f0c, 0x53087dcadb04465e, 0x5a986f1d06d0fdd7, 0x60ecef2322d82ca6, 0xecb5726c2d321cf0, 0xc35d5a4bef2702b1, 0x5dd2b609c322b708, 0xbcf3b95dc3d729ca, 0x1d9170b9e1453366, 0x186553051a4e25be, 0x2a53d48fb82f77ee, 0xf46c760a61c999c1, 0xd0cc0a9782d8fb6c, 0x527f4ba4e3b5aeaf, 0x4bbe8f93317c3443, 0x045620b59ed19311, 0xc7b0c55124f293df, 0x4109b240dd664668, 0xe3eb86d84935282e, 0xdb084f7c80268067, 0x77453a7266c1d524, 0x64f313320afdd17e, 0x8bae4a0bda3e3e66, 0x2de67df4d33aa930, 0xa3068a663be947d2, 0xcea196429334afa7, 0xe7e0cea87cfab510, 0x741c10a48562fa44, 0x14494d5a04a6c202, 0x40e7a06890d50be4, 0xe1aa5f52250a65b5, 0xf4a25f6b7e5a6194, 0x565c00e7636c7c52, 0xf75c7fb672be9b3f, 0xe6afa72a659ee1bf, 0x6b64f6e131206bbc, 0xc890512940bbd56f, 0x2534086dc1a69681, 0xb00843f54690b56e, 0xcc5516083f0bdb0c, 0xa95c2e34fdc988c9, 0x849b826becbbf7f3, 0xebeb852f8b802332, 0x961f2b23e86a263c, 0xd72a88270464d6f2, 0x6763243e613eeff2, 0x254cdfdd48b072a1, 0xac3ab9855e94f5f0, 0x9fb7af5dbfd5dbb9, 0xc94e6163f1c0a8cf, 0xf288005dd9ade2e1, 0x87ef56d822a9de2d, 0x8ecab7bed5a7e85e, 0x83ea1bcff015e925, 0x1271137d38e2ff89, 0xa27a077e369d6a32, 0xb9f8913c07ce52b7, 0x5fe99b17468419b0, 0x9fdd39945a89b502, 0x794bc51777290510, 0xe852021031f3314c, 0xf67cff6fd964bac3, 0x7ac582022256c0c0, 0x77604f6943584c22, 0x323a2ce1c1306628, 0x08dd88c98350de11, 0xb8342e4f577818ca, 0xa5bc1d61a6d047c1, 0xddaa89ea44bbbf88, 0xc1a542d3e1620265, 0x8b764ba499a45119, 0x22d9c1904111885d, 0x101ae7d9ebba120f, 0x82ca6f603e96e10e, 0xd05317bd6f339e88, 0x54a4848959d20898, 0x00b0acad87d94f8e, 0xf2ab9bac6fdc358c, 0x95ca172cb6bf18f7, 0x08291eaf145b6f9c, 0xa485afdec21bf027, 0x2c25fdf6dc219361, 0x1a5318cefb7c2036, 0xcfcfafedd3ab62d8, 0x4c7ed76ef4b8ccd6, 0x473466ac0ac16385, 0x2c04553a07032b8a, 0xd31e4a32538843c7, 0x12dd65e138f6644d, 0x161130caf6577e8b, 0x63e5b4ff2c5441b1, 0xa8b0a84d06a58ed6, 0x125dac5910d5ea2b, 0xa97c12ae44459255, 0xb3cc802562a5fd6c, 0x4409c1575626adaa, 0xa067d92c8e8b8dbb, 0xfededf949bad949a, 0xb9d172a91a3a9009, 0x2ec5d64517692699, 0xc8412695f56eb641, 0x7ad115e4cb00eddd, 0x32c2242ed438b420, 0xa429ceede7138969, 0x7f6db9822cf384e9, 0x2dc39b0bca1f2932, 0xde92a487585eaca6, 0x87c87d0858a70e1e, 0xd2419508213e36f4, 0x18bbfbb774311ea8, 0xb625e058422964c0, 0x4334b1e764f683ad, 0xde817e4a6a1f7368, 0x39329302a8c2c2a9, 0xaa4c1e6c6f785843, 0x8dc7ef86d9d2d0f7, 0x597965f6f249653a, 0xff346827822aa645, 0x368a3637d29d39f2, 0x93775a1b92fc4b06, 0x310b2ad79909ac04, 0x689a60e616b90265, 0x85e4b485dab67c13, 0xd90f3f8b9d54d5d5, 0x7e14be6743d94267, 0x48faebe7430aef01, 0xae5b11c885ecdfb5, 0x52eb6c5cef5587c3, 0x17023e5091ba5663, 0x5bdd13b983a987aa, 0x8a398958b5466a69, 0x6dc644518b81ab09, 0xfb4bb0dd2d533832, 0x1d4baa7ded0c80ed, 0x97d94a33f3f1dc18, 0x4eaa82471d6d2466, 0xdc80226d2e5a87b8, 0x6e7946ed519df314, 0x5e78f110eb7b9c3d, 0xae1445dc0b05b119, 0xaf3a2b4ad6a7e4f3, 0x2b3fab4aff638a6f, 0x482ee0a0ee944717, 0x6c638acb12e6ef9a, 0xf59ffb896ccd4720, 0x4c3e1c2d1a151201, 0xb6f9b34cec313119, 0x4c44261e36facdd5, 0xc59d9c212c3ea06c, 0x9388664dc1d30ca2, 0x29b9d9b5eabffb2c, 0x7808d154b0ec1b3a, 0xc4aaf60b740308fa, 0x6f7aca6f0741b2b5, 0x2edf4f889c09621d, 0x1fa820ee83261292, 0x7a162fd7a082003c, 0x870c2de2f130ff5a, 0xc32d6a90658eb347, 0x0b5a6e242082dc01, 0xf15db35e61e3e4f9, 0x05cb98edaeddde3b, 0xf42a568ba8650943, 0x7a70bb17d9251446, 0x1192a280cb3f6e41, 0x1e793c64522c4580, 0x29a80c9474c9b619, 0x4ccd8e988ea18381, 0xb8838a1136fbfc48, 0x108a0978ea3b5737, 0xe8d5e36bf6eebbc7, 0x62b9f51dbf53b7e6, 0x6996fcba08c05853, 0x500713ada63dec92, 0x55e48c824b0341da, 0xa76a0dee1b82ae93, 0x95706b1d4d8ec8ec, 0x2b100b833913b0a5, 0x2dcc7ccc931f4a69, 0xf198714708999d3e, 0x36d8a256f6be869f, 0x74791453a9159591, 0x3e0e78593608f415, 0xc18f5072b7288cc1, 0x1e045ab5b4deeb8b, 0x55ce3a1e68cc7318, 0xadb069bc0ea75ce6, 0x5f0c72e82faa5d12, 0xb0dcec4f4a5b32eb, 0xb2af1e588a6b3d69, 0x7133d52a1259083f, 0xaf5cbeb8c7d7b228, 0xe00640c93ba3c4e0, 0x5264e4f06b24e751, 0xf4b8ad26a7e50c2d, 0x034dcdb8121f9b91, 0x8d18c62ff6c0b2c9, 0x04c378dbb5c92962, 0x7c7c041b704a39bb, 0x8c7cac2bd6fab046,
+    0xe830751a18b1fb5e, 0x9e30fb31b333ccec, 0x4fba9d374256093e, 0x598628d4b2871fde, 0xd6854917cc217ab4, 0x4da3839966d614cd, 0x6c2ee98d9f0a6bf9, 0xa643c8991753f6e4, 0x4a7982d0be1d0930, 0x441b590a0694d4f4, 0xac70c5107d531b97, 0xbb9e36477a76bbd2, 0x921ccfb831039d8e, 0x61f6991dc827545c, 0x6c5afe13298cf2ad, 0xecf28b9022ab3a75, 0x11e0265d86c2d913, 0x51b4aeded81317ec, 0x5bdccaa59f2cbeb8, 0xb9c76e9f66388e78, 0xa6babe827af99e38, 0x7c92e55ca21c6159, 0xe49bad2924782213, 0x3c2f72423ad5f50d, 0xb3755cfa70e505b4, 0x9f55bc675f2dd8d0, 0xf2891d2b3c912007, 0xbfe5cf184e166eff, 0x0e43e71fdd72d966, 0xf56228bcd5c95ca0, 0x80fa47660411d1a6, 0x92166503e32b5c2f, 0x542096f618073022, 0x5dd3a8ea782205c5, 0xb520095d8dff2a5e, 0x045b81afc2f56ade, 0xb85681ed6f1de692, 0xb9a75fdde941cf34, 0x58e17def17bb5d6b, 0xd4b11a833adbd178, 0x787ab0355e2fda17, 0x38e5bda322c1a58a, 0x0c1bf5f6457d6d33, 0x93172c3a82e1c498, 0xf3d6f541b2b86965, 0x4e7f9e55316d0a31, 0xc2e824b016aab50b, 0x1d6c62558ea1c109, 0x5370f2b9133e09ee, 0x43137d4fa9a8437f, 0xe5239ad79830662b, 0x4e109cd3220f67dd,
+    },
+            };
+            size_t res = 0;
+
+            for (size_t i = 0; i < 8; ++i)
+            {
+                res ^= random[i][UInt8(x)];
+                x >>= 8;
+            }
+
+            return res;
+        }
+    };
+}
+
+
+template <typename Key, typename Mapped, typename Hash>
+using HopscotchMap = tsl::hopscotch_map<Key, Mapped, Hash>;
+
+
+
+template <template <typename...> class Map, typename Hash>
+void NO_INLINE test(const Key * data, size_t size, std::function<void(Map<Key, Value, Hash>&)> init = {})
+{
+    if (std::is_same<Map<Key, Value, Hash>, HopscotchMap<Key, Value, Hashes::IdentityHash>>::value
+        || std::is_same<Map<Key, Value, Hash>, HopscotchMap<Key, Value, Hashes::SimpleMultiplyHash>>::value
+        || std::is_same<Map<Key, Value, Hash>, HopscotchMap<Key, Value, Hashes::MixAllBitsHash>>::value)
+    {
+        std::cerr << __PRETTY_FUNCTION__ << ":\nDisqualified\n";
+        return;
+    }
+
+    Stopwatch watch;
+
+    Map<Key, Value, Hash> map;
+    if (init)
+        init(map);
+
+    for (auto end = data + size; data < end; ++data)
+        ++map[*data];
+
+    watch.stop();
+    std::cerr << __PRETTY_FUNCTION__
+        << ":\nElapsed: " << watch.elapsedSeconds()
+        << " (" << size / watch.elapsedSeconds() << " elem/sec.)"
+        << ", map size: " << map.size() << "\n";
+}
+
+template <template <typename...> class Map, typename Init>
+void NO_INLINE testForEachHash(const Key * data, size_t size, Init && init)
+{
+    test<Map, Hashes::IdentityHash>(data, size, init);
+    test<Map, Hashes::SimpleMultiplyHash>(data, size, init);
+    test<Map, Hashes::MultiplyAndMixHash>(data, size, init);
+    test<Map, Hashes::MixMultiplyMixHash>(data, size, init);
+    test<Map, Hashes::MurMurMixHash>(data, size, init);
+    test<Map, Hashes::MixAllBitsHash>(data, size, init);
+    test<Map, Hashes::IntHash32>(data, size, init);
+    test<Map, Hashes::ArcadiaNumericHash>(data, size, init);
+    test<Map, Hashes::MurMurButDifferentHash>(data, size, init);
+    test<Map, Hashes::TwoRoundsTwoVarsHash>(data, size, init);
+    test<Map, Hashes::TwoRoundsLessOpsHash>(data, size, init);
+    test<Map, Hashes::CRC32Hash>(data, size, init);
+    test<Map, Hashes::MulShiftHash>(data, size, init);
+    test<Map, Hashes::TabulationHash>(data, size, init);
+    test<Map, Hashes::CityHash>(data, size, init);
+    test<Map, Hashes::SipHash>(data, size, init);
+}
+
+void NO_INLINE testForEachMapAndHash(const Key * data, size_t size)
+{
+    auto nothing = [](auto & map){};
+
+    testForEachHash<HashMap>(data, size, nothing);
+    testForEachHash<std::unordered_map>(data, size, nothing);
+    testForEachHash<google::dense_hash_map>(data, size, [](auto & map){ map.set_empty_key(-1); });
+    testForEachHash<google::sparse_hash_map>(data, size, nothing);
+    testForEachHash<HopscotchMap>(data, size, nothing);
+}
+
+
+
+int main(int argc, char ** argv)
+{
+    size_t n = atoi(argv[1]);
+//    size_t m = atoi(argv[2]);
+
+    std::cerr << std::fixed << std::setprecision(3);
+
+    std::vector<Key> data(n);
+
+    std::cerr << "sizeof(Key) = " << sizeof(Key) << ", sizeof(Value) = " << sizeof(Value) << std::endl;
+
+    {
+        Stopwatch watch;
+        DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
+        DB::CompressedReadBuffer in2(in1);
+
+        in2.readStrict(reinterpret_cast<char*>(&data[0]), sizeof(data[0]) * n);
+
+        watch.stop();
+        std::cerr
+            << "Vector. Size: " << n
+            << ", elapsed: " << watch.elapsedSeconds()
+            << " (" << n / watch.elapsedSeconds() << " elem/sec.)"
+            << std::endl;
+    }
+
+    /** Actually we should not run multiple test within same invocation of binary,
+      *  because order of test could alter test results (due to state of allocator and various minor reasons),
+      *  but in this case it's Ok.
+      */
+
+    testForEachMapAndHash(data.data(), data.size());
+    return 0;
+}
diff --git a/dbms/src/Common/tests/lru_cache.cpp b/dbms/src/Common/tests/lru_cache.cpp
index dea6bb6e14a..c311b88494c 100644
--- a/dbms/src/Common/tests/lru_cache.cpp
+++ b/dbms/src/Common/tests/lru_cache.cpp
@@ -31,7 +31,7 @@ while (0)
 
 void run()
 {
-    const std::vector<std::function<bool()> > tests =
+    const std::vector<std::function<bool()>> tests =
     {
         test1,
         test2,
diff --git a/dbms/src/Common/tests/parallel_aggregation.cpp b/dbms/src/Common/tests/parallel_aggregation.cpp
index 763674abd68..1082a7cf564 100644
--- a/dbms/src/Common/tests/parallel_aggregation.cpp
+++ b/dbms/src/Common/tests/parallel_aggregation.cpp
@@ -57,7 +57,7 @@ using Mutex = std::mutex;
     Key,
     HashTableCellWithLock<
         Key,
-        HashMapCell<Key, Value, DefaultHash<Key> > >,
+        HashMapCell<Key, Value, DefaultHash<Key>> >,
     DefaultHash<Key>,
     HashTableGrower<21>,
     HashTableAllocator>;*/
diff --git a/dbms/src/Core/Defines.h b/dbms/src/Core/Defines.h
index 08184f55f84..10ba7030836 100644
--- a/dbms/src/Core/Defines.h
+++ b/dbms/src/Core/Defines.h
@@ -61,9 +61,6 @@
 /// Name suffix for the column containing the array offsets.
 #define ARRAY_SIZES_COLUMN_NAME_SUFFIX ".size"
 
-#define DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES 50264
-#define DBMS_MIN_REVISION_WITH_TOTAL_ROWS_IN_PROGRESS 51554
-#define DBMS_MIN_REVISION_WITH_BLOCK_INFO 51903
 #define DBMS_MIN_REVISION_WITH_CLIENT_INFO 54032
 #define DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE 54058
 #define DBMS_MIN_REVISION_WITH_QUOTA_KEY_IN_CLIENT_INFO 54060
diff --git a/dbms/src/Core/Field.h b/dbms/src/Core/Field.h
index dd96612e827..affa2dcec96 100644
--- a/dbms/src/Core/Field.h
+++ b/dbms/src/Core/Field.h
@@ -37,8 +37,8 @@ STRONG_TYPEDEF(TupleBackend, Tuple); /// Array and Tuple are different types wit
   *  is not generalized,
   *  but somewhat more efficient, and simpler.
   *
-  * Used to represent a unit value of one of several types in the RAM.
-  * Warning! Preferably, instead of single values, store the pieces of the columns. See Column.h
+  * Used to represent a single value of one of several types in memory.
+  * Warning! Prefer to use chunks of columns instead of single values. See Column.h
   */
 class Field
 {
diff --git a/dbms/src/Core/Progress.cpp b/dbms/src/Core/Progress.cpp
index c96038ba290..540be33f6f1 100644
--- a/dbms/src/Core/Progress.cpp
+++ b/dbms/src/Core/Progress.cpp
@@ -17,9 +17,7 @@ void Progress::read(ReadBuffer & in, UInt64 server_revision)
 
     readVarUInt(new_rows, in);
     readVarUInt(new_bytes, in);
-
-    if (server_revision >= DBMS_MIN_REVISION_WITH_TOTAL_ROWS_IN_PROGRESS)
-        readVarUInt(new_total_rows, in);
+    readVarUInt(new_total_rows, in);
 
     rows = new_rows;
     bytes = new_bytes;
@@ -31,9 +29,7 @@ void Progress::write(WriteBuffer & out, UInt64 client_revision) const
 {
     writeVarUInt(rows.load(), out);
     writeVarUInt(bytes.load(), out);
-
-    if (client_revision >= DBMS_MIN_REVISION_WITH_TOTAL_ROWS_IN_PROGRESS)
-        writeVarUInt(total_rows.load(), out);
+    writeVarUInt(total_rows.load(), out);
 }
 
 
diff --git a/dbms/src/Core/Types.h b/dbms/src/Core/Types.h
index 63621b701ff..9ffe643e675 100644
--- a/dbms/src/Core/Types.h
+++ b/dbms/src/Core/Types.h
@@ -42,7 +42,7 @@ template <typename T> struct IsNullable { static constexpr bool value = false; }
 template <typename T> struct IsNullable<Nullable<T>> { static constexpr bool value = true; };
 
 template <typename T> struct IsNumber { static constexpr bool value = false; };
-template <typename T> struct IsNumber<Nullable<T> > { static constexpr bool value = IsNumber<T>::value; };
+template <typename T> struct IsNumber<Nullable<T>> { static constexpr bool value = IsNumber<T>::value; };
 
 template <> struct IsNumber<UInt8>   { static constexpr bool value = true; };
 template <> struct IsNumber<UInt16>  { static constexpr bool value = true; };
diff --git a/dbms/src/Core/tests/string_pool.cpp b/dbms/src/Core/tests/string_pool.cpp
index 79b0a9c824b..83aae929f32 100644
--- a/dbms/src/Core/tests/string_pool.cpp
+++ b/dbms/src/Core/tests/string_pool.cpp
@@ -214,7 +214,7 @@ int main(int argc, char ** argv)
             << std::endl;
 
         size_t i = 0;
-        for (RefsHashMap::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
+        for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
         {
             devnull.write(it->first.data, it->first.size);
             devnull << std::endl;
@@ -241,7 +241,7 @@ int main(int argc, char ** argv)
             << std::endl;
 
         size_t i = 0;
-        for (RefsHashMap::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
+        for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
         {
             devnull.write(it->first.data, it->first.size);
             devnull << std::endl;
diff --git a/dbms/src/DataStreams/DistinctBlockInputStream.cpp b/dbms/src/DataStreams/DistinctBlockInputStream.cpp
index 21fd85c4098..f27c1ee0f62 100644
--- a/dbms/src/DataStreams/DistinctBlockInputStream.cpp
+++ b/dbms/src/DataStreams/DistinctBlockInputStream.cpp
@@ -115,7 +115,7 @@ void DistinctBlockInputStream::buildFilter(
         /// Make a key.
         typename Method::Key key = state.getKey(columns, columns.size(), i, key_sizes);
 
-        typename Method::Data::iterator it = method.data.find(key);
+        typename Method::Data::iterator it;
         bool inserted;
         method.data.emplace(key, it, inserted);
 
diff --git a/dbms/src/DataStreams/NativeBlockInputStream.cpp b/dbms/src/DataStreams/NativeBlockInputStream.cpp
index b153cf2bd5f..a65cd6719e5 100644
--- a/dbms/src/DataStreams/NativeBlockInputStream.cpp
+++ b/dbms/src/DataStreams/NativeBlockInputStream.cpp
@@ -106,7 +106,7 @@ Block NativeBlockInputStream::readImpl()
     }
 
     /// Additional information about the block.
-    if (server_revision >= DBMS_MIN_REVISION_WITH_BLOCK_INFO)
+    if (server_revision > 0)
         res.info.read(istr);
 
     /// Dimensions
diff --git a/dbms/src/DataStreams/NativeBlockInputStream.h b/dbms/src/DataStreams/NativeBlockInputStream.h
index 2efbf7cd3cc..b1df20a782a 100644
--- a/dbms/src/DataStreams/NativeBlockInputStream.h
+++ b/dbms/src/DataStreams/NativeBlockInputStream.h
@@ -60,8 +60,7 @@ struct IndexForNativeFormat
 class NativeBlockInputStream : public IProfilingBlockInputStream
 {
 public:
-    /** If a non-zero server_revision is specified, additional block information may be expected and read,
-      * depending on what is supported for the specified revision.
+    /** If a non-zero server_revision is specified, additional block information may be expected and read.
       *
       * `index` is not required parameter. If set, only parts of columns specified in the index will be read.
       */
diff --git a/dbms/src/DataStreams/NativeBlockOutputStream.cpp b/dbms/src/DataStreams/NativeBlockOutputStream.cpp
index 763ffabb7ee..b9cfc0fbac7 100644
--- a/dbms/src/DataStreams/NativeBlockOutputStream.cpp
+++ b/dbms/src/DataStreams/NativeBlockOutputStream.cpp
@@ -116,7 +116,7 @@ void NativeBlockOutputStream::writeData(const IDataType & type, const ColumnPtr
 void NativeBlockOutputStream::write(const Block & block)
 {
     /// Additional information about the block.
-    if (client_revision >= DBMS_MIN_REVISION_WITH_BLOCK_INFO)
+    if (client_revision > 0)
         block.info.write(ostr);
 
     /// Dimensions
diff --git a/dbms/src/DataStreams/NativeBlockOutputStream.h b/dbms/src/DataStreams/NativeBlockOutputStream.h
index 16ba2415cc7..d76cb827863 100644
--- a/dbms/src/DataStreams/NativeBlockOutputStream.h
+++ b/dbms/src/DataStreams/NativeBlockOutputStream.h
@@ -20,8 +20,7 @@ class CompressedWriteBuffer;
 class NativeBlockOutputStream : public IBlockOutputStream
 {
 public:
-    /** If non-zero client_revision is specified, additional block information can be written,
-      *  depending on what is supported for the specified revision.
+    /** If non-zero client_revision is specified, additional block information can be written.
       */
     NativeBlockOutputStream(
         WriteBuffer & ostr_, UInt64 client_revision_ = 0,
diff --git a/dbms/src/DataStreams/RemoteBlockInputStream.h b/dbms/src/DataStreams/RemoteBlockInputStream.h
index e81c4b71db3..0117ac37703 100644
--- a/dbms/src/DataStreams/RemoteBlockInputStream.h
+++ b/dbms/src/DataStreams/RemoteBlockInputStream.h
@@ -109,7 +109,7 @@ private:
     Tables external_tables;
     QueryProcessingStage::Enum stage;
 
-    /// Threads for reading from temporary tables and following sending of data
+    /// Streams for reading from temporary tables and following sending of data
     /// to remote servers for GLOBAL-subqueries
     std::vector<ExternalTablesData> external_tables_data;
     std::mutex external_tables_mutex;
diff --git a/dbms/src/DataTypes/DataTypeTraits.h b/dbms/src/DataTypes/DataTypeTraits.h
index d083e67793d..afa0b48b480 100644
--- a/dbms/src/DataTypes/DataTypeTraits.h
+++ b/dbms/src/DataTypes/DataTypeTraits.h
@@ -213,12 +213,12 @@ struct DataTypeProduct
 };
 
 template <typename T1, typename T2>
-struct DataTypeProduct<T1, Nullable<T2> >
+struct DataTypeProduct<T1, Nullable<T2>>
 {
     using Type = typename ToEnrichedDataType<
         typename NumberTraits::TypeProduct<
             typename ToEnrichedNumericType<T1>::Type,
-            typename NumberTraits::EmbedType<Nullable<typename T2::FieldType> >::Type
+            typename NumberTraits::EmbedType<Nullable<typename T2::FieldType>>::Type
         >::Type
     >::Type;
 };
diff --git a/dbms/src/DataTypes/NumberTraits.h b/dbms/src/DataTypes/NumberTraits.h
index 67ddfc056fb..9fb04fd46a2 100644
--- a/dbms/src/DataTypes/NumberTraits.h
+++ b/dbms/src/DataTypes/NumberTraits.h
@@ -503,16 +503,16 @@ template <> struct EmbedType<Float32> { using Type = Enriched::Float32<HasNoNull
 template <> struct EmbedType<Float64> { using Type = Enriched::Float64<HasNoNull>; };
 
 template <> struct EmbedType<Null> { using Type = Enriched::Void<HasNull>; };
-template <> struct EmbedType<Nullable<Int8> > { using Type = Enriched::Int8<HasNull>; };
-template <> struct EmbedType<Nullable<Int16> > { using Type = Enriched::Int16<HasNull>; };
-template <> struct EmbedType<Nullable<Int32> > { using Type = Enriched::Int32<HasNull>; };
-template <> struct EmbedType<Nullable<Int64> > { using Type = Enriched::Int64<HasNull>; };
-template <> struct EmbedType<Nullable<UInt8> > { using Type = Enriched::UInt8<HasNull>; };
-template <> struct EmbedType<Nullable<UInt16> > { using Type = Enriched::UInt16<HasNull>; };
-template <> struct EmbedType<Nullable<UInt32> > { using Type = Enriched::UInt32<HasNull>; };
-template <> struct EmbedType<Nullable<UInt64> > { using Type = Enriched::UInt64<HasNull>; };
-template <> struct EmbedType<Nullable<Float32> > { using Type = Enriched::Float32<HasNull>; };
-template <> struct EmbedType<Nullable<Float64> > { using Type = Enriched::Float64<HasNull>; };
+template <> struct EmbedType<Nullable<Int8>> { using Type = Enriched::Int8<HasNull>; };
+template <> struct EmbedType<Nullable<Int16>> { using Type = Enriched::Int16<HasNull>; };
+template <> struct EmbedType<Nullable<Int32>> { using Type = Enriched::Int32<HasNull>; };
+template <> struct EmbedType<Nullable<Int64>> { using Type = Enriched::Int64<HasNull>; };
+template <> struct EmbedType<Nullable<UInt8>> { using Type = Enriched::UInt8<HasNull>; };
+template <> struct EmbedType<Nullable<UInt16>> { using Type = Enriched::UInt16<HasNull>; };
+template <> struct EmbedType<Nullable<UInt32>> { using Type = Enriched::UInt32<HasNull>; };
+template <> struct EmbedType<Nullable<UInt64>> { using Type = Enriched::UInt64<HasNull>; };
+template <> struct EmbedType<Nullable<Float32>> { using Type = Enriched::Float32<HasNull>; };
+template <> struct EmbedType<Nullable<Float64>> { using Type = Enriched::Float64<HasNull>; };
 
 /// Get an ordinary type from an enriched type.
 template <typename TType>
diff --git a/dbms/src/Dictionaries/DictionarySourceFactory.cpp b/dbms/src/Dictionaries/DictionarySourceFactory.cpp
index 436f89fa525..a75618dbdac 100644
--- a/dbms/src/Dictionaries/DictionarySourceFactory.cpp
+++ b/dbms/src/Dictionaries/DictionarySourceFactory.cpp
@@ -3,7 +3,6 @@
 #include <Core/Block.h>
 #include <Dictionaries/DictionaryStructure.h>
 #include <Dictionaries/FileDictionarySource.h>
-#include <Dictionaries/MySQLDictionarySource.h>
 #include <Dictionaries/ClickHouseDictionarySource.h>
 #include <Dictionaries/ExecutableDictionarySource.h>
 #include <Dictionaries/HTTPDictionarySource.h>
diff --git a/dbms/src/Dictionaries/Embedded/RegionsHierarchies.cpp b/dbms/src/Dictionaries/Embedded/RegionsHierarchies.cpp
index e64cf3edf30..1a61f7a0478 100644
--- a/dbms/src/Dictionaries/Embedded/RegionsHierarchies.cpp
+++ b/dbms/src/Dictionaries/Embedded/RegionsHierarchies.cpp
@@ -1,20 +1,19 @@
 #include <Dictionaries/Embedded/RegionsHierarchies.h>
-#include <Poco/DirectoryIterator.h>
-#include <Poco/Util/Application.h>
-#include <Poco/Util/AbstractConfiguration.h>
+
 #include <common/logger_useful.h>
 
+#include <Poco/DirectoryIterator.h>
+
 
 static constexpr auto config_key = "path_to_regions_hierarchy_file";
 
 
-RegionsHierarchies::RegionsHierarchies()
-: RegionsHierarchies(Poco::Util::Application::instance().config().getString(config_key))
+void RegionsHierarchies::reload(const Poco::Util::AbstractConfiguration & config)
 {
+    reload(config.getString(config_key));
 }
 
-
-RegionsHierarchies::RegionsHierarchies(const std::string & path)
+void RegionsHierarchies::reload(const std::string & path)
 {
     Logger * log = &Logger::get("RegionsHierarchies");
 
@@ -47,10 +46,12 @@ RegionsHierarchies::RegionsHierarchies(const std::string & path)
                 std::forward_as_tuple(dir_it->path()));
         }
     }
+
+    reload();
 }
 
 
-bool RegionsHierarchies::isConfigured()
+bool RegionsHierarchies::isConfigured(const Poco::Util::AbstractConfiguration & config)
 {
-    return Poco::Util::Application::instance().config().has(config_key);
+    return config.has(config_key);
 }
diff --git a/dbms/src/Dictionaries/Embedded/RegionsHierarchies.h b/dbms/src/Dictionaries/Embedded/RegionsHierarchies.h
index 5b0be5cfb66..7dfc0bfb555 100644
--- a/dbms/src/Dictionaries/Embedded/RegionsHierarchies.h
+++ b/dbms/src/Dictionaries/Embedded/RegionsHierarchies.h
@@ -1,7 +1,10 @@
 #pragma once
 
 #include <Dictionaries/Embedded/RegionsHierarchy.h>
+
+#include <Poco/Util/AbstractConfiguration.h>
 #include <Poco/Exception.h>
+
 #include <unordered_map>
 
 
@@ -24,11 +27,11 @@ public:
       * For example, if /opt/geo/regions_hierarchy.txt is specified,
       *  then the /opt/geo/regions_hierarchy_ua.txt file will also be loaded, if any, it will be accessible by the `ua` key.
       */
-    RegionsHierarchies();
-    explicit RegionsHierarchies(const std::string & path_to_regions_hierarchy_file);
+    void reload(const Poco::Util::AbstractConfiguration & config);
+    void reload(const std::string & directory);
 
     /// Has corresponding section in configuration file.
-    static bool isConfigured();
+    static bool isConfigured(const Poco::Util::AbstractConfiguration & config);
 
 
     /** Reloads, if necessary, all hierarchies of regions.
diff --git a/dbms/src/Dictionaries/Embedded/RegionsHierarchy.cpp b/dbms/src/Dictionaries/Embedded/RegionsHierarchy.cpp
index 30e06ede014..928d0e39cd5 100644
--- a/dbms/src/Dictionaries/Embedded/RegionsHierarchy.cpp
+++ b/dbms/src/Dictionaries/Embedded/RegionsHierarchy.cpp
@@ -12,13 +12,6 @@
 #include <IO/WriteHelpers.h>
 
 
-static constexpr auto config_key = "path_to_regions_hierarchy_file";
-
-
-RegionsHierarchy::RegionsHierarchy()
-{
-    path = Poco::Util::Application::instance().config().getString(config_key);
-}
 
 RegionsHierarchy::RegionsHierarchy(const std::string & path_)
 {
diff --git a/dbms/src/Dictionaries/Embedded/RegionsHierarchy.h b/dbms/src/Dictionaries/Embedded/RegionsHierarchy.h
index 0b819e51333..4c3c0905bf7 100644
--- a/dbms/src/Dictionaries/Embedded/RegionsHierarchy.h
+++ b/dbms/src/Dictionaries/Embedded/RegionsHierarchy.h
@@ -3,7 +3,6 @@
 #include <vector>
 #include <boost/noncopyable.hpp>
 #include <common/Types.h>
-#include <ext/singleton.h>
 
 
 #define REGION_TYPE_CITY       6
@@ -61,7 +60,6 @@ private:
     std::string path;
 
 public:
-    RegionsHierarchy();
     RegionsHierarchy(const std::string & path_);
 
     /// Reloads, if necessary, the hierarchy of regions. Not threadsafe.
@@ -142,13 +140,3 @@ public:
         return populations[region];
     }
 };
-
-
-class RegionsHierarchySingleton : public ext::singleton<RegionsHierarchySingleton>, public RegionsHierarchy
-{
-friend class ext::singleton<RegionsHierarchySingleton>;
-protected:
-    RegionsHierarchySingleton()
-    {
-    }
-};
diff --git a/dbms/src/Dictionaries/Embedded/RegionsNames.cpp b/dbms/src/Dictionaries/Embedded/RegionsNames.cpp
index ef9f7d6e8d0..ebaebede43e 100644
--- a/dbms/src/Dictionaries/Embedded/RegionsNames.cpp
+++ b/dbms/src/Dictionaries/Embedded/RegionsNames.cpp
@@ -28,10 +28,9 @@ std::string RegionsNames::dumpSupportedLanguagesNames()
     return res;
 }
 
-void RegionsNames::reload()
+void RegionsNames::reload(const Poco::Util::AbstractConfiguration & config)
 {
-    std::string directory = Poco::Util::Application::instance().config().getString(config_key);
-    reload(directory);
+    reload(config.getString(config_key));
 }
 
 void RegionsNames::reload(const std::string & directory)
@@ -110,7 +109,7 @@ void RegionsNames::reload(const std::string & directory)
 }
 
 
-bool RegionsNames::isConfigured()
+bool RegionsNames::isConfigured(const Poco::Util::AbstractConfiguration & config)
 {
-    return Poco::Util::Application::instance().config().has(config_key);
+    return config.has(config_key);
 }
diff --git a/dbms/src/Dictionaries/Embedded/RegionsNames.h b/dbms/src/Dictionaries/Embedded/RegionsNames.h
index c1399cca8ad..52b145d1718 100644
--- a/dbms/src/Dictionaries/Embedded/RegionsNames.h
+++ b/dbms/src/Dictionaries/Embedded/RegionsNames.h
@@ -1,10 +1,13 @@
 #pragma once
 
+#include <Poco/Util/AbstractConfiguration.h>
+#include <Poco/Exception.h>
+
+#include <common/Types.h>
+#include <common/StringRef.h>
+
 #include <string>
 #include <vector>
-#include <Poco/Exception.h>
-#include <common/Types.h>
-#include <common/StringRef.h>
 
 
 /** A class that allows you to recognize by region id its text name in one of the supported languages: ru, en, ua, by, kz, tr.
@@ -67,11 +70,11 @@ private:
 public:
     /** Reboot, if necessary, the names of regions.
       */
-    void reload();
+    void reload(const Poco::Util::AbstractConfiguration & config);
     void reload(const std::string & directory);
 
     /// Has corresponding section in configuration file.
-    static bool isConfigured();
+    static bool isConfigured(const Poco::Util::AbstractConfiguration & config);
 
 
     StringRef getRegionName(RegionID region_id, Language language = Language::RU) const
diff --git a/dbms/src/Dictionaries/Embedded/TechDataHierarchy.cpp b/dbms/src/Dictionaries/Embedded/TechDataHierarchy.cpp
index 07baa4ebf19..921b7f5e9ee 100644
--- a/dbms/src/Dictionaries/Embedded/TechDataHierarchy.cpp
+++ b/dbms/src/Dictionaries/Embedded/TechDataHierarchy.cpp
@@ -10,10 +10,9 @@
 static constexpr auto config_key = "mysql_metrica";
 
 
-void TechDataHierarchy::reload()
+void TechDataHierarchy::reload(const Poco::Util::AbstractConfiguration & config)
 {
     Logger * log = &Logger::get("TechDataHierarchy");
-
     LOG_DEBUG(log, "Loading tech data hierarchy.");
 
     mysqlxx::PoolWithFailover pool(config_key);
@@ -53,9 +52,9 @@ void TechDataHierarchy::reload()
 }
 
 
-bool TechDataHierarchy::isConfigured()
+bool TechDataHierarchy::isConfigured(const Poco::Util::AbstractConfiguration & config)
 {
-    return Poco::Util::Application::instance().config().has(config_key);
+    return config.has(config_key);
 }
 
 #endif
diff --git a/dbms/src/Dictionaries/Embedded/TechDataHierarchy.h b/dbms/src/Dictionaries/Embedded/TechDataHierarchy.h
index cdfd9f1a526..c86d188aa92 100644
--- a/dbms/src/Dictionaries/Embedded/TechDataHierarchy.h
+++ b/dbms/src/Dictionaries/Embedded/TechDataHierarchy.h
@@ -1,8 +1,12 @@
 #pragma once
 
-#include <ext/singleton.h>
+#include <Poco/Util/AbstractConfiguration.h>
+#include <Poco/Exception.h>
+
 #include <common/Types.h>
 
+#include <ext/singleton.h>
+
 
 /** @brief Class that lets you know if a search engine or operating system belongs
   * another search engine or operating system, respectively.
@@ -15,10 +19,10 @@ private:
     UInt8 se_parent[256] {};
 
 public:
-    void reload();
+    void reload(const Poco::Util::AbstractConfiguration & config);
 
     /// Has corresponding section in configuration file.
-    static bool isConfigured();
+    static bool isConfigured(const Poco::Util::AbstractConfiguration & config);
 
 
     /// The "belongs" relation.
diff --git a/dbms/src/Functions/FunctionFactory.cpp b/dbms/src/Functions/FunctionFactory.cpp
index c8819fbbae8..4b681b29263 100644
--- a/dbms/src/Functions/FunctionFactory.cpp
+++ b/dbms/src/Functions/FunctionFactory.cpp
@@ -1,7 +1,11 @@
-#include <Poco/String.h>
 #include <Functions/FunctionFactory.h>
+
+#include <Interpreters/Context.h>
+
 #include <Common/Exception.h>
 
+#include <Poco/String.h>
+
 
 namespace DB
 {
@@ -9,10 +13,14 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int UNKNOWN_FUNCTION;
+    extern const int LOGICAL_ERROR;
 }
 
 
-void FunctionFactory::registerFunction(const String & name, Creator creator, CaseSensitiveness case_sensitiveness)
+void FunctionFactory::registerFunction(const
+    std::string & name,
+    Creator creator,
+    CaseSensitiveness case_sensitiveness)
 {
     if (!functions.emplace(name, creator).second)
         throw Exception("FunctionFactory: the function name '" + name + "' is not unique",
diff --git a/dbms/src/Functions/FunctionFactory.h b/dbms/src/Functions/FunctionFactory.h
index 9702e233a44..3f4693080a1 100644
--- a/dbms/src/Functions/FunctionFactory.h
+++ b/dbms/src/Functions/FunctionFactory.h
@@ -1,25 +1,19 @@
 #pragma once
 
-#include <string>
-#include <memory>
-#include <unordered_map>
+#include <Functions/IFunction.h>
+
 #include <ext/singleton.h>
 
-#include <Common/Exception.h>
-#include <Core/Types.h>
+#include <functional>
+#include <memory>
+#include <string>
+#include <unordered_map>
 
 
 namespace DB
 {
 
 class Context;
-class IFunction;
-using FunctionPtr = std::shared_ptr<IFunction>;
-
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
 
 
 /** Creates function by name.
@@ -30,12 +24,8 @@ class FunctionFactory : public ext::singleton<FunctionFactory>
 {
     friend class StorageSystemFunctions;
 
-private:
-    using Creator = FunctionPtr(*)(const Context & context);    /// Not std::function, for lower object size and less indirection.
-    using Functions = std::unordered_map<String, Creator>;
-
-    Functions functions;
-    Functions case_insensitive_functions;
+public:
+    using Creator = std::function<FunctionPtr(const Context &)>;
 
     /// For compatibility with SQL, it's possible to specify that certain function name is case insensitive.
     enum CaseSensitiveness
@@ -44,18 +34,30 @@ private:
         CaseInsensitive
     };
 
-public:
-    FunctionPtr get(const String & name, const Context & context) const;    /// Throws an exception if not found.
-    FunctionPtr tryGet(const String & name, const Context & context) const; /// Returns nullptr if not found.
-
-    /// No locking, you must register all functions before usage of get, tryGet.
-    void registerFunction(const String & name, Creator creator, CaseSensitiveness case_sensitiveness = CaseSensitive);
+    /// Register a function by its name.
+    /// No locking, you must register all functions before usage of get.
+    void registerFunction(
+        const std::string & name,
+        Creator creator,
+        CaseSensitiveness case_sensitiveness = CaseSensitive);
 
     template <typename Function>
     void registerFunction()
     {
-        registerFunction(String(Function::name), &Function::create);
+        registerFunction(Function::name, &Function::create);
     }
+
+    /// Throws an exception if not found.
+    FunctionPtr get(const std::string & name, const Context & context) const;
+
+    /// Returns nullptr if not found.
+    FunctionPtr tryGet(const std::string & name, const Context & context) const;
+
+private:
+    using Functions = std::unordered_map<std::string, Creator>;
+
+    Functions functions;
+    Functions case_insensitive_functions;
 };
 
 }
diff --git a/dbms/src/Functions/FunctionsString.cpp b/dbms/src/Functions/FunctionsString.cpp
index 975f5dc6777..e1cdc8dc6a1 100644
--- a/dbms/src/Functions/FunctionsString.cpp
+++ b/dbms/src/Functions/FunctionsString.cpp
@@ -871,10 +871,18 @@ public:
                 throw Exception("Third argument provided for function substring could not be negative.", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
         }
 
-        if (const ColumnString * col = checkAndGetColumn<ColumnString>(&*column_string))
-            executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value, block, result, StringSource(*col));
-        else if (const ColumnFixedString * col = checkAndGetColumn<ColumnFixedString>(&*column_string))
-            executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value, block, result, FixedStringSource(*col));
+        if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
+            executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value,
+                             block, result, StringSource(*col));
+        else if (const ColumnFixedString * col = checkAndGetColumn<ColumnFixedString>(column_string.get()))
+            executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value,
+                             block, result, FixedStringSource(*col));
+        else if (const ColumnConst * col = checkAndGetColumnConst<ColumnString>(column_string.get()))
+            executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value,
+                             block, result, ConstSource<StringSource>(*col));
+        else if (const ColumnConst * col = checkAndGetColumnConst<ColumnFixedString>(column_string.get()))
+            executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value,
+                             block, result, ConstSource<FixedStringSource>(*col));
         else
             throw Exception(
                 "Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(),
@@ -945,7 +953,7 @@ public:
         if (start >= 0x8000000000000000ULL || length >= 0x8000000000000000ULL)
             throw Exception("Too large values of 2nd or 3rd argument provided for function substring.", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
 
-        if (const ColumnString * col = checkAndGetColumn<ColumnString>(&*column_string))
+        if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
         {
             std::shared_ptr<ColumnString> col_res = std::make_shared<ColumnString>();
             block.getByPosition(result).column = col_res;
diff --git a/dbms/src/Functions/FunctionsStringArray.h b/dbms/src/Functions/FunctionsStringArray.h
index be44cd1d47f..bd9222699cf 100644
--- a/dbms/src/Functions/FunctionsStringArray.h
+++ b/dbms/src/Functions/FunctionsStringArray.h
@@ -20,6 +20,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int BAD_ARGUMENTS;
 }
 
 
@@ -210,6 +211,9 @@ public:
                 ErrorCodes::ILLEGAL_COLUMN);
 
         sep = col->getValue<String>();
+
+        if (sep.empty())
+            throw Exception("Illegal separator for function " + getName() + ". Must be not empty.", ErrorCodes::BAD_ARGUMENTS);
     }
 
     /// Returns the position of the argument that is the column of strings
diff --git a/dbms/src/Functions/GatherUtils.h b/dbms/src/Functions/GatherUtils.h
index d0f90733c95..48d8a3112c5 100644
--- a/dbms/src/Functions/GatherUtils.h
+++ b/dbms/src/Functions/GatherUtils.h
@@ -107,16 +107,16 @@ struct NumericArraySource
     Slice getSliceFromRight(size_t offset) const
     {
         size_t elem_size = offsets[row_num] - prev_offset;
-        if (offset >= elem_size)
-            return {&elements[prev_offset], 0};
+        if (offset > elem_size)
+            return {&elements[prev_offset], elem_size};
         return {&elements[offsets[row_num] - offset], offset};
     }
 
     Slice getSliceFromRight(size_t offset, size_t length) const
     {
         size_t elem_size = offsets[row_num] - prev_offset;
-        if (offset >= elem_size)
-            return {&elements[prev_offset], 0};
+        if (offset > elem_size)
+            return {&elements[prev_offset], elem_size};
         return {&elements[offsets[row_num] - offset], std::min(length, offset)};
     }
 };
@@ -246,16 +246,16 @@ struct StringSource
     Slice getSliceFromRight(size_t offset) const
     {
         size_t elem_size = offsets[row_num] - prev_offset - 1;
-        if (offset >= elem_size)
-            return {&elements[prev_offset], 0};
+        if (offset > elem_size)
+            return {&elements[prev_offset], elem_size};
         return {&elements[prev_offset + elem_size - offset], offset};
     }
 
     Slice getSliceFromRight(size_t offset, size_t length) const
     {
         size_t elem_size = offsets[row_num] - prev_offset - 1;
-        if (offset >= elem_size)
-            return {&elements[prev_offset], 0};
+        if (offset > elem_size)
+            return {&elements[prev_offset], elem_size};
         return {&elements[prev_offset + elem_size - offset], std::min(length, offset)};
     }
 };
@@ -321,15 +321,15 @@ struct FixedStringSource
 
     Slice getSliceFromRight(size_t offset) const
     {
-        if (offset >= string_size)
-            return {pos, 0};
+        if (offset > string_size)
+            return {pos, string_size};
         return {pos + string_size - offset, offset};
     }
 
     Slice getSliceFromRight(size_t offset, size_t length) const
     {
-        if (offset >= string_size)
-            return {pos, 0};
+        if (offset > string_size)
+            return {pos, string_size};
         return {pos + string_size - offset, std::min(length, offset)};
     }
 };
@@ -560,16 +560,16 @@ struct GenericArraySource
     Slice getSliceFromRight(size_t offset) const
     {
         size_t elem_size = offsets[row_num] - prev_offset;
-        if (offset >= elem_size)
-            return {&elements, prev_offset, 0};
+        if (offset > elem_size)
+            return {&elements, prev_offset, elem_size};
         return {&elements, offsets[row_num] - offset, offset};
     }
 
     Slice getSliceFromRight(size_t offset, size_t length) const
     {
         size_t elem_size = offsets[row_num] - prev_offset;
-        if (offset >= elem_size)
-            return {&elements, prev_offset, 0};
+        if (offset > elem_size)
+            return {&elements, prev_offset, elem_size};
         return {&elements, offsets[row_num] - offset, std::min(length, offset)};
     }
 };
diff --git a/dbms/src/IO/WriteBufferFromVector.h b/dbms/src/IO/WriteBufferFromVector.h
index 966148300ae..bea92121bfe 100644
--- a/dbms/src/IO/WriteBufferFromVector.h
+++ b/dbms/src/IO/WriteBufferFromVector.h
@@ -14,7 +14,7 @@ namespace DB
 /** Initialized by vector. Writes data to it. When the vector is finished, it doubles its size.
   * CharType - char or unsigned char.
   */
-template <typename VectorType = std::vector<char> >
+template <typename VectorType = std::vector<char>>
 class WriteBufferFromVector : public WriteBuffer
 {
 private:
diff --git a/dbms/src/IO/tests/read_buffer_aio.cpp b/dbms/src/IO/tests/read_buffer_aio.cpp
index e5a3e69113d..56fbeab5d8b 100644
--- a/dbms/src/IO/tests/read_buffer_aio.cpp
+++ b/dbms/src/IO/tests/read_buffer_aio.cpp
@@ -67,7 +67,7 @@ void run()
     std::string buf5;
     prepare4(filename5, buf5);
 
-    const std::vector<std::function<bool()> > tests =
+    const std::vector<std::function<bool()>> tests =
     {
         std::bind(test1, std::ref(filename)),
         std::bind(test2, std::ref(filename), std::ref(buf)),
diff --git a/dbms/src/IO/tests/write_buffer_aio.cpp b/dbms/src/IO/tests/write_buffer_aio.cpp
index 1eb8d539ce8..5394d67a36d 100644
--- a/dbms/src/IO/tests/write_buffer_aio.cpp
+++ b/dbms/src/IO/tests/write_buffer_aio.cpp
@@ -32,7 +32,7 @@ bool test10();
 
 void run()
 {
-    const std::vector<std::function<bool()> > tests =
+    const std::vector<std::function<bool()>> tests =
     {
         test1,
         test2,
diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp
index 4f0772a4980..4cfd5967a1d 100644
--- a/dbms/src/Interpreters/Aggregator.cpp
+++ b/dbms/src/Interpreters/Aggregator.cpp
@@ -356,7 +356,8 @@ void Aggregator::compileIfPossible(AggregatedDataVariants::Type type)
       *  at the end of which `on_ready` callback is called.
       */
     SharedLibraryPtr lib = params.compiler->getOrCount(key, params.min_count_to_compile,
-        "-include " INTERNAL_COMPILER_HEADERS "/dbms/src/Interpreters/SpecializedAggregator.h",
+        "-include " INTERNAL_COMPILER_HEADERS "/dbms/src/Interpreters/SpecializedAggregator.h "
+        "-Wno-unused-function",
         get_code, on_ready);
 
     /// If the result is already ready.
@@ -605,7 +606,7 @@ void NO_INLINE Aggregator::executeImplCase(
 
         if (!no_more_keys)  /// Insert.
         {
-            /// Optimization for frequently duplicating keys.
+            /// Optimization for consecutive identical keys.
             if (!Method::no_consecutive_keys_optimization)
             {
                 if (i != 0 && key == prev_key)
diff --git a/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp b/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp
index cf1c6e6db36..cc48d346f95 100644
--- a/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -6,6 +6,7 @@
 #include <Interpreters/IInterpreter.h>
 #include <Parsers/queryToString.h>
 #include <DataStreams/RemoteBlockInputStream.h>
+#include <Interpreters/ProcessList.h>
 
 
 namespace DB
@@ -39,13 +40,24 @@ BlockInputStreams executeQuery(
     Context new_context(context);
     new_context.setSettings(new_settings);
 
+    ThrottlerPtr user_level_throttler;
+    if (settings.limits.max_network_bandwidth_for_user)
+        if (auto process_list_element = context.getProcessListElement())
+            if (auto user_process_list = process_list_element->user_process_list)
+                user_level_throttler = user_process_list->user_throttler;
+
     /// Network bandwidth limit, if needed.
     ThrottlerPtr throttler;
     if (settings.limits.max_network_bandwidth || settings.limits.max_network_bytes)
+    {
         throttler = std::make_shared<Throttler>(
-            settings.limits.max_network_bandwidth,
-            settings.limits.max_network_bytes,
-            "Limit for bytes to send or receive over network exceeded.");
+                settings.limits.max_network_bandwidth,
+                settings.limits.max_network_bytes,
+                "Limit for bytes to send or receive over network exceeded.",
+                user_level_throttler);
+    }
+    else if (settings.limits.max_network_bandwidth_for_user)
+        throttler = user_level_throttler;
 
     for (const auto & shard_info : cluster->getShardsInfo())
         stream_factory.createForShard(shard_info, query, query_ast, new_context, throttler, res);
diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp
index c011734bdb1..f894fe12563 100644
--- a/dbms/src/Interpreters/Context.cpp
+++ b/dbms/src/Interpreters/Context.cpp
@@ -104,6 +104,8 @@ struct ContextShared
     String path;                                            /// Path to the data directory, with a slash at the end.
     String tmp_path;                                        /// The path to the temporary files that occur when processing the request.
     String flags_path;                                      /// Path to the directory with some control flags for server maintenance.
+    ConfigurationPtr config;                                /// Global configuration settings.
+
     Databases databases;                                    /// List of databases and tables in them.
     FormatFactory format_factory;                           /// Formats.
     mutable std::shared_ptr<EmbeddedDictionaries> embedded_dictionaries;    /// Metrica's dictionaeis. Have lazy initialization.
@@ -483,6 +485,23 @@ void Context::setFlagsPath(const String & path)
     shared->flags_path = path;
 }
 
+void Context::setConfig(const ConfigurationPtr & config)
+{
+    auto lock = getLock();
+    shared->config = config;
+}
+
+ConfigurationPtr Context::getConfig() const
+{
+    auto lock = getLock();
+    return shared->config;
+}
+
+Poco::Util::AbstractConfiguration & Context::getConfigRef() const
+{
+    auto lock = getLock();
+    return shared->config ? *shared->config : Poco::Util::Application::instance().config();
+}
 
 void Context::setUsersConfig(const ConfigurationPtr & config)
 {
@@ -498,7 +517,6 @@ ConfigurationPtr Context::getUsersConfig()
     return shared->users_config;
 }
 
-
 void Context::calculateUserSettings()
 {
     auto lock = getLock();
@@ -1020,7 +1038,7 @@ const EmbeddedDictionaries & Context::getEmbeddedDictionariesImpl(const bool thr
     std::lock_guard<std::mutex> lock(shared->embedded_dictionaries_mutex);
 
     if (!shared->embedded_dictionaries)
-        shared->embedded_dictionaries = std::make_shared<EmbeddedDictionaries>(throw_on_error);
+        shared->embedded_dictionaries = std::make_shared<EmbeddedDictionaries>(*this->global_context, throw_on_error);
 
     return *shared->embedded_dictionaries;
 }
@@ -1076,6 +1094,11 @@ ProcessList::Element * Context::getProcessListElement()
     return process_list_elem;
 }
 
+const ProcessList::Element * Context::getProcessListElement() const
+{
+    return process_list_elem;
+}
+
 
 void Context::setUncompressedCache(size_t max_size_in_bytes)
 {
@@ -1207,7 +1230,10 @@ std::pair<String, UInt16> Context::getInterserverIOAddress() const
 
 UInt16 Context::getTCPPort() const
 {
-    return Poco::Util::Application::instance().config().getInt("tcp_port");
+    auto lock = getLock();
+
+    auto & config = getConfigRef();
+    return config.getInt("tcp_port");
 }
 
 
@@ -1234,7 +1260,7 @@ Clusters & Context::getClusters() const
         std::lock_guard<std::mutex> lock(shared->clusters_mutex);
         if (!shared->clusters)
         {
-            auto & config = shared->clusters_config ? *shared->clusters_config : Poco::Util::Application::instance().config();
+            auto & config = shared->clusters_config ? *shared->clusters_config : getConfigRef();
             shared->clusters = std::make_unique<Clusters>(config, settings);
         }
     }
@@ -1280,7 +1306,7 @@ QueryLog & Context::getQueryLog()
         if (!global_context)
             throw Exception("Logical error: no global context for query log", ErrorCodes::LOGICAL_ERROR);
 
-        auto & config = Poco::Util::Application::instance().config();
+        auto & config = getConfigRef();
 
         String database = config.getString("query_log.database", "system");
         String table = config.getString("query_log.table", "query_log");
@@ -1299,7 +1325,7 @@ PartLog * Context::getPartLog(const String & database, const String & table)
 {
     auto lock = getLock();
 
-    auto & config = Poco::Util::Application::instance().config();
+    auto & config = getConfigRef();
     if (!config.has("part_log"))
         return nullptr;
 
@@ -1340,7 +1366,7 @@ CompressionMethod Context::chooseCompressionMethod(size_t part_size, double part
     if (!shared->compression_method_selector)
     {
         constexpr auto config_name = "compression";
-        auto & config = Poco::Util::Application::instance().config();
+        auto & config = getConfigRef();
 
         if (config.has(config_name))
             shared->compression_method_selector = std::make_unique<CompressionMethodSelector>(config, "compression");
@@ -1358,7 +1384,7 @@ const MergeTreeSettings & Context::getMergeTreeSettings()
 
     if (!shared->merge_tree_settings)
     {
-        auto & config = Poco::Util::Application::instance().config();
+        auto & config = getConfigRef();
         shared->merge_tree_settings = std::make_unique<MergeTreeSettings>();
         shared->merge_tree_settings->loadFromConfig("merge_tree", config);
     }
diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h
index f2e5855bc6b..69875a3d1ad 100644
--- a/dbms/src/Interpreters/Context.h
+++ b/dbms/src/Interpreters/Context.h
@@ -128,12 +128,16 @@ public:
 
     using ConfigurationPtr = Poco::AutoPtr<Poco::Util::AbstractConfiguration>;
 
+    /// Global application configuration settings.
+    void setConfig(const ConfigurationPtr & config);
+    ConfigurationPtr getConfig() const;
+    Poco::Util::AbstractConfiguration & getConfigRef() const;
+
     /** Take the list of users, quotas and configuration profiles from this config.
       * The list of users is completely replaced.
       * The accumulated quota values are not reset if the quota is not deleted.
       */
     void setUsersConfig(const ConfigurationPtr & config);
-
     ConfigurationPtr getUsersConfig();
 
     /// Must be called before getClientInfo.
@@ -265,6 +269,7 @@ public:
     void setProcessListElement(ProcessListElement * elem);
     /// Can return nullptr if the query was not inserted into the ProcessList.
     ProcessListElement * getProcessListElement();
+    const ProcessListElement * getProcessListElement() const;
 
     /// List all queries.
     ProcessList & getProcessList();
diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp
index d77e1f275d5..fee78388266 100644
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@@ -506,7 +506,8 @@ bool DDLWorker::tryExecuteQuery(const String & query, const DDLTask & task, Exec
 
     try
     {
-        executeQuery(istr, ostr, false, context, nullptr);
+        Context local_context(context);
+        executeQuery(istr, ostr, false, local_context, nullptr);
     }
     catch (...)
     {
@@ -862,7 +863,7 @@ void DDLWorker::run()
             tryLogCurrentException(log, "Terminating. Cannot initialize DDL queue.");
             return;
         }
-    } while (!initialized);
+    } while (!initialized && !stop_flag);
 
     while (!stop_flag)
     {
@@ -886,7 +887,22 @@ void DDLWorker::run()
                 if (!e.isTemporaryError())
                 {
                     LOG_DEBUG(log, "Recovering ZooKeeper session after: " << getCurrentExceptionMessage(false));
-                    zookeeper = context.getZooKeeper();
+
+                    while (!stop_flag)
+                    {
+                        try
+                        {
+                            zookeeper = context.getZooKeeper();
+                            break;
+                        }
+                        catch (...)
+                        {
+                            tryLogCurrentException(__PRETTY_FUNCTION__);
+
+                            using namespace std::chrono_literals;
+                            std::this_thread::sleep_for(5s);
+                        }
+                    }
                 }
                 else
                 {
diff --git a/dbms/src/Interpreters/EmbeddedDictionaries.cpp b/dbms/src/Interpreters/EmbeddedDictionaries.cpp
index 4d1f11d0167..5a3471a554d 100644
--- a/dbms/src/Interpreters/EmbeddedDictionaries.cpp
+++ b/dbms/src/Interpreters/EmbeddedDictionaries.cpp
@@ -1,12 +1,15 @@
-#include <common/logger_useful.h>
-#include <Poco/Util/Application.h>
-#include <Interpreters/EmbeddedDictionaries.h>
 #include <Dictionaries/Embedded/RegionsHierarchies.h>
-#include <Dictionaries/Embedded/TechDataHierarchy.h>
 #include <Dictionaries/Embedded/RegionsNames.h>
+#include <Dictionaries/Embedded/TechDataHierarchy.h>
+#include <Interpreters/Context.h>
+#include <Interpreters/EmbeddedDictionaries.h>
+
 #include <Common/setThreadName.h>
 #include <Common/Exception.h>
 #include <Common/config.h>
+#include <common/logger_useful.h>
+
+#include <Poco/Util/Application.h>
 
 
 namespace DB
@@ -26,12 +29,14 @@ void EmbeddedDictionaries::handleException(const bool throw_on_error) const
 template <typename Dictionary>
 bool EmbeddedDictionaries::reloadDictionary(MultiVersion<Dictionary> & dictionary, const bool throw_on_error)
 {
-    if (Dictionary::isConfigured() && (!is_fast_start_stage || !dictionary.get()))
+    const auto & config = context.getConfigRef();
+
+    if (Dictionary::isConfigured(config) && (!is_fast_start_stage || !dictionary.get()))
     {
         try
         {
             auto new_dictionary = std::make_unique<Dictionary>();
-            new_dictionary->reload();
+            new_dictionary->reload(config);
             dictionary.set(new_dictionary.release());
         }
         catch (...)
@@ -100,20 +105,16 @@ void EmbeddedDictionaries::reloadPeriodically()
 }
 
 
-EmbeddedDictionaries::EmbeddedDictionaries(const bool throw_on_error, const int reload_period_)
-    : reload_period(reload_period_), log(&Logger::get("EmbeddedDictionaries"))
+EmbeddedDictionaries::EmbeddedDictionaries(Context & context_, const bool throw_on_error)
+    : log(&Logger::get("EmbeddedDictionaries"))
+    , context(context_)
+    , reload_period(context_.getConfigRef().getInt("builtin_dictionaries_reload_interval", 3600))
 {
     reloadImpl(throw_on_error);
     reloading_thread = std::thread([this] { reloadPeriodically(); });
 }
 
 
-EmbeddedDictionaries::EmbeddedDictionaries(const bool throw_on_error)
-    : EmbeddedDictionaries(throw_on_error,
-        Poco::Util::Application::instance().config().getInt("builtin_dictionaries_reload_interval", 3600))
-{}
-
-
 EmbeddedDictionaries::~EmbeddedDictionaries()
 {
     destroy.set();
diff --git a/dbms/src/Interpreters/EmbeddedDictionaries.h b/dbms/src/Interpreters/EmbeddedDictionaries.h
index d71b4feb927..bb14d7391a1 100644
--- a/dbms/src/Interpreters/EmbeddedDictionaries.h
+++ b/dbms/src/Interpreters/EmbeddedDictionaries.h
@@ -23,6 +23,9 @@ class Context;
 class EmbeddedDictionaries
 {
 private:
+    Poco::Logger * log;
+    Context & context;
+
     MultiVersion<RegionsHierarchies> regions_hierarchies;
     MultiVersion<TechDataHierarchy> tech_data_hierarchy;
     MultiVersion<RegionsNames> regions_names;
@@ -35,8 +38,6 @@ private:
     std::thread reloading_thread;
     Poco::Event destroy;
 
-    Poco::Logger * log;
-
 
     void handleException(const bool throw_on_error) const;
 
@@ -54,9 +55,7 @@ private:
 
 public:
     /// Every reload_period seconds directories are updated inside a separate thread.
-    EmbeddedDictionaries(const bool throw_on_error, const int reload_period_);
-
-    EmbeddedDictionaries(const bool throw_on_error);
+    EmbeddedDictionaries(Context & context, const bool throw_on_error);
 
     ~EmbeddedDictionaries();
 
diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp
index bce55a70a89..e3cafeb0e3e 100644
--- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp
@@ -943,12 +943,8 @@ void ExpressionAnalyzer::addASTAliases(ASTPtr & ast, int ignore_levels)
             ++subquery_index;
         }
 
-        std::cerr << ast->getColumnName() << "\n";
-
         ast->setAlias(alias);
         aliases[alias] = ast;
-
-        std::cerr << ast->getAliasOrColumnName() << "\n";
     }
 }
 
@@ -1009,8 +1005,10 @@ void ExpressionAnalyzer::normalizeTreeImpl(
     /// rewrite rules that act when you go from top to bottom.
     bool replaced = false;
 
-    ASTFunction * func_node = typeid_cast<ASTFunction *>(ast.get());
-    if (func_node)
+    ASTIdentifier * identifier_node = nullptr;
+    ASTFunction * func_node = nullptr;
+
+    if ((func_node = typeid_cast<ASTFunction *>(ast.get())))
     {
         /** Is there a column in the table whose name fully matches the function entry?
           * For example, in the table there is a column "domain(URL)", and we requested domain(URL).
@@ -1048,13 +1046,13 @@ void ExpressionAnalyzer::normalizeTreeImpl(
             }
         }
     }
-    else if (ASTIdentifier * node = typeid_cast<ASTIdentifier *>(ast.get()))
+    else if ((identifier_node = typeid_cast<ASTIdentifier *>(ast.get())))
     {
-        if (node->kind == ASTIdentifier::Column)
+        if (identifier_node->kind == ASTIdentifier::Column)
         {
             /// If it is an alias, but not a parent alias (for constructs like "SELECT column + 1 AS column").
-            Aliases::const_iterator jt = aliases.find(node->name);
-            if (jt != aliases.end() && current_alias != node->name)
+            Aliases::const_iterator jt = aliases.find(identifier_node->name);
+            if (jt != aliases.end() && current_alias != identifier_node->name)
             {
                 /// Let's replace it with the corresponding tree node.
                 if (current_asts.count(jt->second.get()))
@@ -1116,7 +1114,7 @@ void ExpressionAnalyzer::normalizeTreeImpl(
         return;
     }
 
-    /// Recurring calls. Don't go into subqueries.
+    /// Recurring calls. Don't go into subqueries. Don't go into components of compound identifiers.
     /// We also do not go to the left argument of lambda expressions, so as not to replace the formal parameters
     ///  on aliases in expressions of the form 123 AS x, arrayMap(x -> 1, [2]).
 
@@ -1134,6 +1132,9 @@ void ExpressionAnalyzer::normalizeTreeImpl(
             normalizeTreeImpl(child, finished_asts, current_asts, current_alias, level + 1);
         }
     }
+    else if (identifier_node)
+    {
+    }
     else
     {
         for (auto & child : ast->children)
@@ -2736,10 +2737,6 @@ void ExpressionAnalyzer::collectUsedColumns()
             columns_added_by_join.erase(it++);
     }
 
-/*    for (const auto & name_type : columns_added_by_join)
-        std::cerr << "JOINed column (required, not key): " << name_type.name << std::endl;
-    std::cerr << std::endl;*/
-
     /// Insert the columns required for the ARRAY JOIN calculation into the required columns list.
     NameSet array_join_sources;
     for (const auto & result_source : array_join_result_to_source)
@@ -2835,15 +2832,6 @@ void ExpressionAnalyzer::collectJoinedColumns(NameSet & joined_columns, NamesAnd
             joined_columns_name_type.emplace_back(col.name, col.type);
         }
     }
-
-/*    for (const auto & name : join_key_names_left)
-        std::cerr << "JOIN key (left): " << name << std::endl;
-    for (const auto & name : join_key_names_right)
-        std::cerr << "JOIN key (right): " << name << std::endl;
-    std::cerr << std::endl;
-    for (const auto & name : joined_columns)
-        std::cerr << "JOINed column: " << name << std::endl;
-    std::cerr << std::endl;*/
 }
 
 
diff --git a/dbms/src/Interpreters/ExternalDictionaries.cpp b/dbms/src/Interpreters/ExternalDictionaries.cpp
index 4d3dc137131..3b6ffb84428 100644
--- a/dbms/src/Interpreters/ExternalDictionaries.cpp
+++ b/dbms/src/Interpreters/ExternalDictionaries.cpp
@@ -1,4 +1,5 @@
 #include <Interpreters/ExternalDictionaries.h>
+#include <Interpreters/Context.h>
 #include <Dictionaries/DictionaryFactory.h>
 #include <Dictionaries/DictionaryStructure.h>
 #include <Dictionaries/IDictionarySource.h>
@@ -95,7 +96,7 @@ std::set<std::string> getDictionariesConfigPaths(const Poco::Util::AbstractConfi
 
 void ExternalDictionaries::reloadImpl(const bool throw_on_error)
 {
-    const auto config_paths = getDictionariesConfigPaths(Poco::Util::Application::instance().config());
+    const auto config_paths = getDictionariesConfigPaths(context.getConfigRef());
 
     for (const auto & config_path : config_paths)
     {
diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp
index b17ba8dd66f..80536a76a15 100644
--- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp
@@ -445,8 +445,20 @@ String InterpreterCreateQuery::setEngine(
         String as_database_name = create.as_database.empty() ? context.getCurrentDatabase() : create.as_database;
         String as_table_name = create.as_table;
 
-        storage_name = as_storage->getName();
-        create.storage = typeid_cast<const ASTCreateQuery &>(*context.getCreateQuery(as_database_name, as_table_name)).storage;
+        auto as_create_ptr = context.getCreateQuery(as_database_name, as_table_name);
+        auto & as_create = typeid_cast<const ASTCreateQuery &>(*as_create_ptr);
+
+        if (!create.storage)
+        {
+            if (as_create.is_view || as_create.is_materialized_view)
+                create.storage = as_create.inner_storage;
+            else
+                create.storage = as_create.storage;
+
+            storage_name = typeid_cast<const ASTFunction &>(*create.storage).name;
+        }
+        else
+            storage_name = as_storage->getName();
     }
     else if (create.is_temporary)
         set_engine("Memory");
diff --git a/dbms/src/Interpreters/Limits.h b/dbms/src/Interpreters/Limits.h
index 6851257dabe..0ae83f01dcf 100644
--- a/dbms/src/Interpreters/Limits.h
+++ b/dbms/src/Interpreters/Limits.h
@@ -98,6 +98,8 @@ struct Limits
     M(SettingUInt64, max_network_bandwidth, 0) \
     /** The maximum number of bytes to receive or transmit over the network, as part of the query. */ \
     M(SettingUInt64, max_network_bytes, 0) \
+    /** The maximum speed of data exchange over the network for the user in bytes per second. 0 - not bounded. */ \
+    M(SettingUInt64, max_network_bandwidth_for_user, 0)
 
 #define DECLARE(TYPE, NAME, DEFAULT) \
     TYPE NAME {DEFAULT};
diff --git a/dbms/src/Interpreters/ProcessList.cpp b/dbms/src/Interpreters/ProcessList.cpp
index e4283df201a..99b256e77e9 100644
--- a/dbms/src/Interpreters/ProcessList.cpp
+++ b/dbms/src/Interpreters/ProcessList.cpp
@@ -101,6 +101,14 @@ ProcessList::EntryPtr ProcessList::insert(
                 total_memory_tracker.setDescription("(total)");
                 user_process_list.user_memory_tracker.setNext(&total_memory_tracker);
             }
+
+            if (settings.limits.max_network_bandwidth_for_user && !user_process_list.user_throttler)
+            {
+                user_process_list.user_throttler = std::make_shared<Throttler>(settings.limits.max_network_bandwidth_for_user, 0,
+                    "Network bandwidth limit for a user exceeded.");
+            }
+
+            res->get().user_process_list = &user_process_list;
         }
     }
 
@@ -124,21 +132,22 @@ ProcessListEntry::~ProcessListEntry()
     /// This removes the memory_tracker of one request.
     parent.cont.erase(it);
 
-    ProcessList::UserToQueries::iterator user_process_list = parent.user_to_queries.find(user);
+    auto user_process_list = parent.user_to_queries.find(user);
     if (user_process_list != parent.user_to_queries.end())
     {
         /// In case the request is canceled, the data about it is deleted from the map at the time of cancellation, and not here.
         if (!is_cancelled && !query_id.empty())
         {
-            ProcessListForUser::QueryToElement::iterator element = user_process_list->second.queries.find(query_id);
+            auto element = user_process_list->second.queries.find(query_id);
             if (element != user_process_list->second.queries.end())
                 user_process_list->second.queries.erase(element);
         }
 
         /// This removes the memory_tracker from the user. At this time, the memory_tracker that references it does not live.
 
-        /// If there are no more queries for the user, then we delete the record.
+        /// If there are no more queries for the user, then we delete the entry.
         /// This also clears the MemoryTracker for the user, and a message about the memory consumption is output to the log.
+        /// This also clears network bandwidth Throttler, so it will not count periods of inactivity.
         /// Sometimes it is important to reset the MemoryTracker, because it may accumulate skew
         ///  due to the fact that there are cases when memory can be allocated while processing the request, but released later.
         if (user_process_list->second.queries.empty())
diff --git a/dbms/src/Interpreters/ProcessList.h b/dbms/src/Interpreters/ProcessList.h
index da284667e95..c853f4a06bc 100644
--- a/dbms/src/Interpreters/ProcessList.h
+++ b/dbms/src/Interpreters/ProcessList.h
@@ -14,6 +14,7 @@
 #include <Interpreters/ClientInfo.h>
 #include <Common/CurrentMetrics.h>
 #include <DataStreams/BlockIO.h>
+#include <Common/Throttler.h>
 
 
 namespace CurrentMetrics
@@ -29,6 +30,7 @@ using StoragePtr = std::shared_ptr<IStorage>;
 using Tables = std::map<String, StoragePtr>;
 struct Settings;
 class IAST;
+struct ProcessListForUser;
 
 
 /** List of currently executing queries.
@@ -76,6 +78,9 @@ struct ProcessListElement
     /// Temporary tables could be registered here. Modify under mutex.
     Tables temporary_tables;
 
+    /// Be careful using it. For example, queries field could be modified concurrently.
+    const ProcessListForUser * user_process_list = nullptr;
+
 protected:
 
     mutable std::mutex query_streams_mutex;
@@ -169,6 +174,9 @@ struct ProcessListForUser
 
     /// Limit and counter for memory of all simultaneously running queries of single user.
     MemoryTracker user_memory_tracker;
+
+    /// Count network usage for all simultaneously running queries of single user.
+    ThrottlerPtr user_throttler;
 };
 
 
@@ -214,10 +222,15 @@ private:
     mutable std::mutex mutex;
     mutable Poco::Condition have_space;        /// Number of currently running queries has become less than maximum.
 
+    /// List of queries
     Container cont;
     size_t cur_size;        /// In C++03 or C++11 and old ABI, std::list::size is not O(1).
     size_t max_size;        /// 0 means no limit. Otherwise, when limit exceeded, an exception is thrown.
+
+    /// Stores per-user info: queries, statistics and limits
     UserToQueries user_to_queries;
+
+    /// Stores info about queries grouped by their priority
     QueryPriorities priorities;
 
     /// Limit and counter for memory of all simultaneously running queries.
diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp
index 5662886648a..4939e245f71 100644
--- a/dbms/src/Interpreters/Set.cpp
+++ b/dbms/src/Interpreters/Set.cpp
@@ -83,7 +83,7 @@ void NO_INLINE Set::insertFromBlockImplCase(
         /// Obtain a key to insert to the set
         typename Method::Key key = state.getKey(key_columns, keys_size, i, key_sizes);
 
-        typename Method::Data::iterator it = method.data.find(key);
+        typename Method::Data::iterator it;
         bool inserted;
         method.data.emplace(key, it, inserted);
 
diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h
index 81b96ef53e4..a0eb9639060 100644
--- a/dbms/src/Interpreters/Settings.h
+++ b/dbms/src/Interpreters/Settings.h
@@ -274,7 +274,7 @@ struct Settings
      * Zero means do not take delay into account. \
      */ \
     \
-    M(SettingUInt64, max_replica_delay_for_distributed_queries, 0) \
+    M(SettingUInt64, max_replica_delay_for_distributed_queries, 300) \
    /** Suppose max_replica_delay_for_distributed_queries is set and all replicas for the queried table are stale. \
      * If this setting is enabled, the query will be performed anyway, otherwise the error will be reported. \
      */ \
diff --git a/dbms/src/Interpreters/SpecializedAggregator.h b/dbms/src/Interpreters/SpecializedAggregator.h
index acfefe0f44d..c582e72b4fe 100644
--- a/dbms/src/Interpreters/SpecializedAggregator.h
+++ b/dbms/src/Interpreters/SpecializedAggregator.h
@@ -1,21 +1,30 @@
 #include <Interpreters/Aggregator.h>
 
-#include <AggregateFunctions/AggregateFunctionCount.h>
-#include <AggregateFunctions/AggregateFunctionSum.h>
-#include <AggregateFunctions/AggregateFunctionAvg.h>
-#include <AggregateFunctions/AggregateFunctionMinMaxAny.h>
 #include <AggregateFunctions/AggregateFunctionArgMinMax.h>
+#include <AggregateFunctions/AggregateFunctionArray.h>
+#include <AggregateFunctions/AggregateFunctionAvg.h>
+#include <AggregateFunctions/AggregateFunctionCount.h>
+#include <AggregateFunctions/AggregateFunctionForEach.h>
+#include <AggregateFunctions/AggregateFunctionGroupArray.h>
+#include <AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h>
+#include <AggregateFunctions/AggregateFunctionGroupUniqArray.h>
+#include <AggregateFunctions/AggregateFunctionIf.h>
+#include <AggregateFunctions/AggregateFunctionMerge.h>
+#include <AggregateFunctions/AggregateFunctionMinMaxAny.h>
+#include <AggregateFunctions/AggregateFunctionNull.h>
+#include <AggregateFunctions/AggregateFunctionQuantileDeterministic.h>
+#include <AggregateFunctions/AggregateFunctionQuantileExact.h>
+#include <AggregateFunctions/AggregateFunctionQuantileExactWeighted.h>
+#include <AggregateFunctions/AggregateFunctionQuantile.h>
+#include <AggregateFunctions/AggregateFunctionQuantileTDigest.h>
+#include <AggregateFunctions/AggregateFunctionQuantileTiming.h>
+#include <AggregateFunctions/AggregateFunctionSequenceMatch.h>
+#include <AggregateFunctions/AggregateFunctionState.h>
+#include <AggregateFunctions/AggregateFunctionStatistics.h>
+#include <AggregateFunctions/AggregateFunctionSum.h>
+#include <AggregateFunctions/AggregateFunctionTopK.h>
 #include <AggregateFunctions/AggregateFunctionUniq.h>
 #include <AggregateFunctions/AggregateFunctionUniqUpTo.h>
-#include <AggregateFunctions/AggregateFunctionGroupArray.h>
-#include <AggregateFunctions/AggregateFunctionGroupUniqArray.h>
-#include <AggregateFunctions/AggregateFunctionQuantile.h>
-#include <AggregateFunctions/AggregateFunctionQuantileTiming.h>
-#include <AggregateFunctions/AggregateFunctionIf.h>
-#include <AggregateFunctions/AggregateFunctionArray.h>
-#include <AggregateFunctions/AggregateFunctionState.h>
-#include <AggregateFunctions/AggregateFunctionMerge.h>
-#include <AggregateFunctions/AggregateFunctionNull.h>
 
 
 namespace DB
diff --git a/dbms/src/Interpreters/evaluateConstantExpression.cpp b/dbms/src/Interpreters/evaluateConstantExpression.cpp
index 2839e01bb26..24ea8924c38 100644
--- a/dbms/src/Interpreters/evaluateConstantExpression.cpp
+++ b/dbms/src/Interpreters/evaluateConstantExpression.cpp
@@ -60,7 +60,7 @@ ASTPtr evaluateConstantExpressionAsLiteral(ASTPtr & node, const Context & contex
 }
 
 
-ASTPtr evaluateConstantExpressionOrIdentidierAsLiteral(ASTPtr & node, const Context & context)
+ASTPtr evaluateConstantExpressionOrIdentifierAsLiteral(ASTPtr & node, const Context & context)
 {
     if (const ASTIdentifier * id = typeid_cast<const ASTIdentifier *>(node.get()))
         return std::make_shared<ASTLiteral>(node->range, Field(id->name));
diff --git a/dbms/src/Interpreters/evaluateConstantExpression.h b/dbms/src/Interpreters/evaluateConstantExpression.h
index 52dbed9f83d..ed3befdf02b 100644
--- a/dbms/src/Interpreters/evaluateConstantExpression.h
+++ b/dbms/src/Interpreters/evaluateConstantExpression.h
@@ -29,6 +29,6 @@ std::shared_ptr<IAST> evaluateConstantExpressionAsLiteral(std::shared_ptr<IAST>
   * Also, if AST is identifier, then return string literal with its name.
   * Useful in places where some name may be specified as identifier, or as result of a constant expression.
   */
-std::shared_ptr<IAST> evaluateConstantExpressionOrIdentidierAsLiteral(std::shared_ptr<IAST> & node, const Context & context);
+std::shared_ptr<IAST> evaluateConstantExpressionOrIdentifierAsLiteral(std::shared_ptr<IAST> & node, const Context & context);
 
 }
diff --git a/dbms/src/Interpreters/loadMetadata.cpp b/dbms/src/Interpreters/loadMetadata.cpp
index dac4ef0c911..80b670f4c3f 100644
--- a/dbms/src/Interpreters/loadMetadata.cpp
+++ b/dbms/src/Interpreters/loadMetadata.cpp
@@ -137,7 +137,6 @@ void loadMetadataSystem(Context & context)
 
         auto system_database = std::make_shared<DatabaseOrdinary>(SYSTEM_DATABASE, global_path + "metadata/" SYSTEM_DATABASE);
         context.addDatabase(SYSTEM_DATABASE, system_database);
-
     }
 
 }
diff --git a/dbms/src/Interpreters/tests/CMakeLists.txt b/dbms/src/Interpreters/tests/CMakeLists.txt
index 233b4a8e5b7..48793cbcf8d 100644
--- a/dbms/src/Interpreters/tests/CMakeLists.txt
+++ b/dbms/src/Interpreters/tests/CMakeLists.txt
@@ -14,10 +14,6 @@ add_executable (hash_map hash_map.cpp)
 target_include_directories (hash_map BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
 target_link_libraries (hash_map dbms)
 
-add_executable (hash_map2 hash_map2.cpp)
-target_include_directories (hash_map2 BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
-target_link_libraries (hash_map2 dbms)
-
 add_executable (hash_map3 hash_map3.cpp)
 target_link_libraries (hash_map3 dbms)
 
diff --git a/dbms/src/Interpreters/tests/hash_map.cpp b/dbms/src/Interpreters/tests/hash_map.cpp
index 0df2a73548a..6bf083a5a03 100644
--- a/dbms/src/Interpreters/tests/hash_map.cpp
+++ b/dbms/src/Interpreters/tests/hash_map.cpp
@@ -244,8 +244,8 @@ int main(int argc, char ** argv)
     {
         Stopwatch watch;
 
-        std::unordered_map<Key, Value, DefaultHash<Key> > map;
-        std::unordered_map<Key, Value, DefaultHash<Key> >::iterator it;
+        std::unordered_map<Key, Value, DefaultHash<Key>> map;
+        std::unordered_map<Key, Value, DefaultHash<Key>>::iterator it;
         for (size_t i = 0; i < n; ++i)
         {
             it = map.insert(std::make_pair(data[i], std::move(value))).first;
@@ -264,8 +264,8 @@ int main(int argc, char ** argv)
     {
         Stopwatch watch;
 
-        google::dense_hash_map<Key, Value, DefaultHash<Key> > map;
-        google::dense_hash_map<Key, Value, DefaultHash<Key> >::iterator it;
+        google::dense_hash_map<Key, Value, DefaultHash<Key>> map;
+        google::dense_hash_map<Key, Value, DefaultHash<Key>>::iterator it;
         map.set_empty_key(-1ULL);
         for (size_t i = 0; i < n; ++i)
         {
@@ -285,8 +285,8 @@ int main(int argc, char ** argv)
     {
         Stopwatch watch;
 
-        google::sparse_hash_map<Key, Value, DefaultHash<Key> > map;
-        google::sparse_hash_map<Key, Value, DefaultHash<Key> >::iterator it;
+        google::sparse_hash_map<Key, Value, DefaultHash<Key>> map;
+        google::sparse_hash_map<Key, Value, DefaultHash<Key>>::iterator it;
         for (size_t i = 0; i < n; ++i)
         {
             map.insert(std::make_pair(data[i], std::move(value)));
diff --git a/dbms/src/Interpreters/tests/hash_map2.cpp b/dbms/src/Interpreters/tests/hash_map2.cpp
deleted file mode 100644
index 8a8027c726a..00000000000
--- a/dbms/src/Interpreters/tests/hash_map2.cpp
+++ /dev/null
@@ -1,189 +0,0 @@
-#include <iostream>
-#include <iomanip>
-#include <vector>
-
-#include <unordered_map>
-
-#include <sparsehash/dense_hash_map>
-#include <sparsehash/sparse_hash_map>
-
-#include <Common/Stopwatch.h>
-
-//#define DBMS_HASH_MAP_COUNT_COLLISIONS
-#define DBMS_HASH_MAP_DEBUG_RESIZES
-
-#include <Core/Types.h>
-#include <IO/ReadBufferFromFile.h>
-#include <IO/CompressedReadBuffer.h>
-#include <Common/HashTable/HashMap.h>
-
-
-using Key = UInt64;
-using Value = UInt64;
-
-struct CellWithoutZeroWithSavedHash : public HashMapCell<Key, Value, DefaultHash<Key> >
-{
-//    size_t saved_hash;
-
-    static constexpr bool need_zero_value_storage = false;
-
-    CellWithoutZeroWithSavedHash() : HashMapCell() {}
-    CellWithoutZeroWithSavedHash(const Key & key_, const State & state) : HashMapCell(key_, state) {}
-    CellWithoutZeroWithSavedHash(const value_type & value_, const State & state) : HashMapCell(value_, state) {}
-
-/*    bool keyEquals(const Key & key_) const { return value.first == key_; }
-    bool keyEquals(const CellWithoutZeroWithSavedHash & other) const { return saved_hash == other.saved_hash && value.first == other.value.first; }
-
-    void setHash(size_t hash_value) { saved_hash = hash_value; }
-    size_t getHash(const DefaultHash<Key> & hash) const { return saved_hash; }*/
-};
-
-struct Grower : public HashTableGrower<>
-{
-    /// The state of this structure is enough to get the buffer size of the hash table.
-
-    /// Specifies the initial size of the hash table.
-    static const size_t initial_size_degree = 16;
-    Grower() { size_degree = initial_size_degree; }
-
-//    size_t max_fill = (1ULL << initial_size_degree) * 0.9;
-
-    /// The size of the hash table in the cells.
-    size_t bufSize() const               { return 1ULL << size_degree; }
-
-    size_t maxFill() const               { return 1ULL << (size_degree - 1); }
-//    size_t maxFill() const             { return max_fill; }
-
-    size_t mask() const                  { return bufSize() - 1; }
-
-    /// From the hash value, get the cell number in the hash table.
-    size_t place(size_t x) const         { return x & mask(); }
-
-    /// The next cell in the collision resolution chain.
-    size_t next(size_t pos) const        { ++pos; return pos & mask(); }
-
-    /// Whether the hash table is sufficiently full. You need to increase the size of the hash table, or remove something unnecessary from it.
-    bool overflow(size_t elems) const    { return elems > maxFill(); }
-
-    /// Increase the size of the hash table.
-    void increaseSize()
-    {
-        size_degree += size_degree >= 23 ? 1 : 2;
-//        max_fill = (1ULL << size_degree) * 0.9;
-    }
-
-    /// Set the buffer size by the number of elements in the hash table. Used when deserializing a hash table.
-    void set(size_t num_elems)
-    {
-        throw Poco::Exception(__PRETTY_FUNCTION__);
-    }
-};
-
-
-int main(int argc, char ** argv)
-{
-    size_t n = atoi(argv[1]);
-    size_t m = atoi(argv[2]);
-
-    std::vector<Key> data(n);
-
-    std::cerr << "sizeof(Key) = " << sizeof(Key) << ", sizeof(Value) = " << sizeof(Value) << std::endl;
-
-    {
-        Stopwatch watch;
-        DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
-        DB::CompressedReadBuffer in2(in1);
-
-        in2.readStrict(reinterpret_cast<char*>(&data[0]), sizeof(data[0]) * n);
-
-        watch.stop();
-        std::cerr << std::fixed << std::setprecision(2)
-            << "Vector. Size: " << n
-            << ", elapsed: " << watch.elapsedSeconds()
-            << " (" << n / watch.elapsedSeconds() << " elem/sec.)"
-            << std::endl;
-    }
-
-    if (m == 1)
-    {
-        Stopwatch watch;
-
-//        using Map = HashMap<Key, Value>;
-
-        /// Due to `WithoutZero`, it's faster by 0.7% (if not fits into L3-cache) - 2.3% (if fits into L3-cache).
-        using Map = HashMapTable<Key, CellWithoutZeroWithSavedHash, DefaultHash<Key>, Grower>;
-
-        Map map;
-        Map::iterator it;
-        bool inserted;
-
-        for (size_t i = 0; i < n; ++i)
-        {
-            map.emplace(data[i], it, inserted);
-            if (inserted)
-                it->second = 0;
-            ++it->second;
-        }
-
-        watch.stop();
-        std::cerr << std::fixed << std::setprecision(2)
-            << "HashMap. Size: " << map.size()
-            << ", elapsed: " << watch.elapsedSeconds()
-            << " (" << n / watch.elapsedSeconds() << " elem/sec.)"
-#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
-            << ", collisions: " << map.getCollisions()
-#endif
-            << std::endl;
-    }
-
-    if (m == 2)
-    {
-        Stopwatch watch;
-
-        std::unordered_map<Key, Value, DefaultHash<Key> > map;
-        for (size_t i = 0; i < n; ++i)
-            ++map[data[i]];
-
-        watch.stop();
-        std::cerr << std::fixed << std::setprecision(2)
-            << "std::unordered_map. Size: " << map.size()
-            << ", elapsed: " << watch.elapsedSeconds()
-            << " (" << n / watch.elapsedSeconds() << " elem/sec.)"
-            << std::endl;
-    }
-
-    if (m == 3)
-    {
-        Stopwatch watch;
-
-        google::dense_hash_map<Key, Value, DefaultHash<Key> > map;
-        map.set_empty_key(-1ULL);
-        for (size_t i = 0; i < n; ++i)
-              ++map[data[i]];
-
-        watch.stop();
-        std::cerr << std::fixed << std::setprecision(2)
-            << "google::dense_hash_map. Size: " << map.size()
-            << ", elapsed: " << watch.elapsedSeconds()
-            << " (" << n / watch.elapsedSeconds() << " elem/sec.)"
-            << std::endl;
-    }
-
-    if (m == 4)
-    {
-        Stopwatch watch;
-
-        google::sparse_hash_map<Key, Value, DefaultHash<Key> > map;
-        for (size_t i = 0; i < n; ++i)
-            ++map[data[i]];
-
-        watch.stop();
-        std::cerr << std::fixed << std::setprecision(2)
-            << "google::sparse_hash_map. Size: " << map.size()
-            << ", elapsed: " << watch.elapsedSeconds()
-            << " (" << n / watch.elapsedSeconds() << " elem/sec.)"
-            << std::endl;
-    }
-
-    return 0;
-}
diff --git a/dbms/src/Interpreters/tests/hash_map_string.cpp b/dbms/src/Interpreters/tests/hash_map_string.cpp
index c73a5151339..817d03876e4 100644
--- a/dbms/src/Interpreters/tests/hash_map_string.cpp
+++ b/dbms/src/Interpreters/tests/hash_map_string.cpp
@@ -434,7 +434,7 @@ int main(int argc, char ** argv)
     {
         Stopwatch watch;
 
-        std::unordered_map<Key, Value, DefaultHash<Key> > map;
+        std::unordered_map<Key, Value, DefaultHash<Key>> map;
         for (size_t i = 0; i < n; ++i)
             ++map[data[i]];
 
@@ -450,7 +450,7 @@ int main(int argc, char ** argv)
     {
         Stopwatch watch;
 
-        google::dense_hash_map<Key, Value, DefaultHash<Key> > map;
+        google::dense_hash_map<Key, Value, DefaultHash<Key>> map;
         map.set_empty_key(Key("\0", 1));
         for (size_t i = 0; i < n; ++i)
               ++map[data[i]];
@@ -467,7 +467,7 @@ int main(int argc, char ** argv)
     {
         Stopwatch watch;
 
-        google::sparse_hash_map<Key, Value, DefaultHash<Key> > map;
+        google::sparse_hash_map<Key, Value, DefaultHash<Key>> map;
         for (size_t i = 0; i < n; ++i)
             ++map[data[i]];
 
diff --git a/dbms/src/Interpreters/tests/hash_map_string_2.cpp b/dbms/src/Interpreters/tests/hash_map_string_2.cpp
index 4fb21fae998..3958592e3d5 100644
--- a/dbms/src/Interpreters/tests/hash_map_string_2.cpp
+++ b/dbms/src/Interpreters/tests/hash_map_string_2.cpp
@@ -636,20 +636,20 @@ int main(int argc, char ** argv)
     }
 
     if (!m || m == 1) bench<StringRef_Compare1_Ptrs>                (data, "StringRef_Compare1_Ptrs");
-    if (!m || m == 2) bench<StringRef_Compare1_Index>                (data, "StringRef_Compare1_Index");
+    if (!m || m == 2) bench<StringRef_Compare1_Index>               (data, "StringRef_Compare1_Index");
     if (!m || m == 3) bench<StringRef_CompareMemcmp>                (data, "StringRef_CompareMemcmp");
-    if (!m || m == 4) bench<StringRef_Compare8_1_byUInt64>            (data, "StringRef_Compare8_1_byUInt64");
-    if (!m || m == 5) bench<StringRef_Compare16_1_byMemcmp>            (data, "StringRef_Compare16_1_byMemcmp");
+    if (!m || m == 4) bench<StringRef_Compare8_1_byUInt64>          (data, "StringRef_Compare8_1_byUInt64");
+    if (!m || m == 5) bench<StringRef_Compare16_1_byMemcmp>         (data, "StringRef_Compare16_1_byMemcmp");
     if (!m || m == 6) bench<StringRef_Compare16_1_byUInt64_logicAnd>(data, "StringRef_Compare16_1_byUInt64_logicAnd");
-    if (!m || m == 7) bench<StringRef_Compare16_1_byUInt64_bitAnd>    (data, "StringRef_Compare16_1_byUInt64_bitAnd");
+    if (!m || m == 7) bench<StringRef_Compare16_1_byUInt64_bitAnd>  (data, "StringRef_Compare16_1_byUInt64_bitAnd");
 #if __SSE4_1__
-    if (!m || m == 8) bench<StringRef_Compare16_1_byIntSSE>            (data, "StringRef_Compare16_1_byIntSSE");
-    if (!m || m == 9) bench<StringRef_Compare16_1_byFloatSSE>        (data, "StringRef_Compare16_1_byFloatSSE");
-    if (!m || m == 10) bench<StringRef_Compare16_1_bySSE4>            (data, "StringRef_Compare16_1_bySSE4");
-    if (!m || m == 11) bench<StringRef_Compare16_1_bySSE4_wide>        (data, "StringRef_Compare16_1_bySSE4_wide");
-    if (!m || m == 12) bench<StringRef_Compare16_1_bySSE_wide>        (data, "StringRef_Compare16_1_bySSE_wide");
+    if (!m || m == 8) bench<StringRef_Compare16_1_byIntSSE>         (data, "StringRef_Compare16_1_byIntSSE");
+    if (!m || m == 9) bench<StringRef_Compare16_1_byFloatSSE>       (data, "StringRef_Compare16_1_byFloatSSE");
+    if (!m || m == 10) bench<StringRef_Compare16_1_bySSE4>          (data, "StringRef_Compare16_1_bySSE4");
+    if (!m || m == 11) bench<StringRef_Compare16_1_bySSE4_wide>     (data, "StringRef_Compare16_1_bySSE4_wide");
+    if (!m || m == 12) bench<StringRef_Compare16_1_bySSE_wide>      (data, "StringRef_Compare16_1_bySSE_wide");
 #endif
-    if (!m || m == 100) bench<StringRef_CompareAlwaysTrue>            (data, "StringRef_CompareAlwaysTrue");
+    if (!m || m == 100) bench<StringRef_CompareAlwaysTrue>          (data, "StringRef_CompareAlwaysTrue");
     if (!m || m == 101) bench<StringRef_CompareAlmostAlwaysTrue>    (data, "StringRef_CompareAlmostAlwaysTrue");
 
     /// 10 > 8, 9
diff --git a/dbms/src/Interpreters/tests/hash_map_string_3.cpp b/dbms/src/Interpreters/tests/hash_map_string_3.cpp
index dfebabdcbbf..7f637d765e3 100644
--- a/dbms/src/Interpreters/tests/hash_map_string_3.cpp
+++ b/dbms/src/Interpreters/tests/hash_map_string_3.cpp
@@ -29,7 +29,7 @@ for file in MobilePhoneModel PageCharset Params URLDomain UTMSource Referer URL
   echo
   BEST_METHOD=0
   BEST_RESULT=0
-  for method in {1..10}; do
+  for method in {1..11}; do
    echo -ne $file $size $method '';
    TOTAL_ELEMS=0
    for i in {0..1000}; do
@@ -139,6 +139,26 @@ struct FastHash64
 };
 
 
+struct FNV1a
+{
+    size_t operator() (StringRef x) const
+    {
+        size_t res = 0xcbf29ce484222325ULL;
+
+        const char * pos = x.data;
+        const char * end = x.data + x.size;
+
+        for (; pos < end; ++pos)
+        {
+            res *= 1099511628211ULL;
+            res ^= *pos;
+        }
+
+        return res;
+    }
+};
+
+
 #if __SSE4_1__
 
 struct CrapWow
@@ -462,20 +482,21 @@ int main(int argc, char ** argv)
             << std::endl;
     }
 
-    if (!m || m == 1) bench<StringRef_CompareMemcmp, StringRefHash64>(data, "StringRef_CityHash64");
-    if (!m || m == 2) bench<StringRef_CompareMemcmp, FastHash64>    (data, "StringRef_FastHash64");
-    if (!m || m == 3) bench<StringRef_CompareMemcmp, SimpleHash>    (data, "StringRef_SimpleHash");
+    if (!m || m == 1) bench<StringRef, StringRefHash64>(data, "StringRef_CityHash64");
+    if (!m || m == 2) bench<StringRef, FastHash64>     (data, "StringRef_FastHash64");
+    if (!m || m == 3) bench<StringRef, SimpleHash>     (data, "StringRef_SimpleHash");
+    if (!m || m == 4) bench<StringRef, FNV1a>          (data, "StringRef_FNV1a");
 
 #if __SSE4_1__
-    if (!m || m == 4) bench<StringRef_CompareMemcmp, CrapWow>        (data, "StringRef_CrapWow");
-    if (!m || m == 5) bench<StringRef_CompareMemcmp, CRC32Hash>        (data, "StringRef_CRC32Hash");
-    if (!m || m == 6) bench<StringRef_CompareMemcmp, CRC32ILPHash>    (data, "StringRef_CRC32ILPHash");
+    if (!m || m == 5) bench<StringRef, CrapWow>        (data, "StringRef_CrapWow");
+    if (!m || m == 6) bench<StringRef, CRC32Hash>      (data, "StringRef_CRC32Hash");
+    if (!m || m == 7) bench<StringRef, CRC32ILPHash>   (data, "StringRef_CRC32ILPHash");
 #endif
 
-    if (!m || m == 7) bench<StringRef_CompareMemcmp, VerySimpleHash>(data, "StringRef_VerySimpleHash");
-    if (!m || m == 8) bench<StringRef_CompareMemcmp, FarmHash64>(data, "StringRef_FarmHash64");
-    if (!m || m == 9) bench<StringRef_CompareMemcmp, MetroHash64<metrohash64_1>>(data, "StringRef_MetroHash64_1");
-    if (!m || m == 10) bench<StringRef_CompareMemcmp, MetroHash64<metrohash64_2>>(data, "StringRef_MetroHash64_2");
+    if (!m || m == 8) bench<StringRef, VerySimpleHash> (data, "StringRef_VerySimpleHash");
+    if (!m || m == 9) bench<StringRef, FarmHash64>     (data, "StringRef_FarmHash64");
+    if (!m || m == 10) bench<StringRef, MetroHash64<metrohash64_1>>(data, "StringRef_MetroHash64_1");
+    if (!m || m == 11) bench<StringRef, MetroHash64<metrohash64_2>>(data, "StringRef_MetroHash64_2");
 
     return 0;
 }
diff --git a/dbms/src/Interpreters/tests/two_level_hash_map.cpp b/dbms/src/Interpreters/tests/two_level_hash_map.cpp
index 6c3b85a7626..ff579edfe7b 100644
--- a/dbms/src/Interpreters/tests/two_level_hash_map.cpp
+++ b/dbms/src/Interpreters/tests/two_level_hash_map.cpp
@@ -49,9 +49,9 @@ int main(int argc, char ** argv)
     {
         Stopwatch watch;
 
-        std::cerr << sizeof(HashMapCell<Key, Value, DefaultHash<Key> >) << std::endl;
+        std::cerr << sizeof(HashMapCell<Key, Value, DefaultHash<Key>>) << std::endl;
 
-        using Map = TwoLevelHashTable<Key, HashMapCell<Key, Value, DefaultHash<Key> >, DefaultHash<Key>, HashTableGrower<8>, HashTableAllocator>;
+        using Map = TwoLevelHashTable<Key, HashMapCell<Key, Value, DefaultHash<Key>>, DefaultHash<Key>, HashTableGrower<8>, HashTableAllocator>;
 
         Map map;
         Map::iterator it;
@@ -86,7 +86,7 @@ int main(int argc, char ** argv)
     {
         Stopwatch watch;
 
-        using Map = TwoLevelHashTable<Key, HashMapCell<Key, Value, DefaultHash<Key> >, DefaultHash<Key>, HashTableGrower<8>, HashTableAllocator>;
+        using Map = TwoLevelHashTable<Key, HashMapCell<Key, Value, DefaultHash<Key>>, DefaultHash<Key>, HashTableGrower<8>, HashTableAllocator>;
         //using Map = HashMap<Key, Value, UniquesHashSetDefaultHash>;
 
         Map map;
diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp
index f4f939ff01e..25037bd67b9 100644
--- a/dbms/src/Parsers/ParserCreateQuery.cpp
+++ b/dbms/src/Parsers/ParserCreateQuery.cpp
@@ -323,12 +323,10 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
         /// AS SELECT ...
         if (!s_as.ignore(pos, expected))
             return false;
-        Pos before_select = pos;
-        if (!s_select.ignore(pos, expected))
-            return false;
-        pos = before_select;
+
         ParserSelectQuery select_p;
-        select_p.parse(pos, select, expected);
+        if (!select_p.parse(pos, select, expected))
+            return false;
     }
 
     auto query = std::make_shared<ASTCreateQuery>(StringRange(begin, pos));
diff --git a/dbms/src/Parsers/ParserInsertQuery.cpp b/dbms/src/Parsers/ParserInsertQuery.cpp
index 1bb43d47f3d..709e9e79233 100644
--- a/dbms/src/Parsers/ParserInsertQuery.cpp
+++ b/dbms/src/Parsers/ParserInsertQuery.cpp
@@ -80,22 +80,23 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
         if (!name_p.parse(pos, format, expected))
             return false;
 
-        if (pos->type == TokenType::Semicolon)
-            throw Exception("You have excessive ';' symbol before data for INSERT.\n"
-                "Example:\n\n"
-                "INSERT INTO t (x, y) FORMAT TabSeparated\n"
-                "1\tHello\n"
-                "2\tWorld\n"
-                "\n"
-                "Note that there is no ';' in first line.", ErrorCodes::SYNTAX_ERROR);
-
-        /// Data starts after the first newline, if there is one, or after all the whitespace characters, otherwise.
-
         data = name_pos->end;
 
+        if (data < end && *data == ';')
+            throw Exception("You have excessive ';' symbol before data for INSERT.\n"
+                                    "Example:\n\n"
+                                    "INSERT INTO t (x, y) FORMAT TabSeparated\n"
+                                    ";\tHello\n"
+                                    "2\tWorld\n"
+                                    "\n"
+                                    "Note that there is no ';' just after format name, "
+                                    "you need to put at least one whitespace symbol before the data.", ErrorCodes::SYNTAX_ERROR);
+
         while (data < end && (*data == ' ' || *data == '\t' || *data == '\f'))
             ++data;
 
+        /// Data starts after the first newline, if there is one, or after all the whitespace characters, otherwise.
+
         if (data < end && *data == '\r')
             ++data;
 
diff --git a/dbms/src/Server/HTTPHandler.cpp b/dbms/src/Server/HTTPHandler.cpp
index 57ba1491e3d..8a421b94ff2 100644
--- a/dbms/src/Server/HTTPHandler.cpp
+++ b/dbms/src/Server/HTTPHandler.cpp
@@ -130,9 +130,10 @@ static Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int excepti
 }
 
 
-static std::chrono::steady_clock::duration parseSessionTimeout(const HTMLForm & params)
+static std::chrono::steady_clock::duration parseSessionTimeout(
+    const Poco::Util::AbstractConfiguration & config,
+    const HTMLForm & params)
 {
-    const auto & config = Poco::Util::Application::instance().config();
     unsigned session_timeout = config.getInt("default_session_timeout", 60);
 
     if (params.has("session_timeout"))
@@ -245,7 +246,7 @@ void HTTPHandler::processQuery(
     if (session_is_set)
     {
         session_id = params.get("session_id");
-        session_timeout = parseSessionTimeout(params);
+        session_timeout = parseSessionTimeout(server.config(), params);
         std::string session_check = params.get("session_check", "");
 
         session = context.acquireSession(session_id, session_timeout, session_check == "1");
diff --git a/dbms/src/Server/MetricsTransmitter.cpp b/dbms/src/Server/MetricsTransmitter.cpp
index 8b65fc72f1c..bf0ae0e4186 100644
--- a/dbms/src/Server/MetricsTransmitter.cpp
+++ b/dbms/src/Server/MetricsTransmitter.cpp
@@ -1,15 +1,21 @@
 #include "MetricsTransmitter.h"
 
-#include <Poco/Util/Application.h>
-#include <Poco/Util/LayeredConfiguration.h>
-#include <daemon/BaseDaemon.h>
+#include <Interpreters/AsynchronousMetrics.h>
+#include <Interpreters/Context.h>
+
 #include <Common/CurrentMetrics.h>
 #include <Common/Exception.h>
 #include <Common/setThreadName.h>
-#include <Interpreters/AsynchronousMetrics.h>
+
+#include <daemon/BaseDaemon.h>
+
+#include <Poco/Util/Application.h>
+#include <Poco/Util/LayeredConfiguration.h>
+
 
 namespace DB
 {
+
 MetricsTransmitter::~MetricsTransmitter()
 {
     try
@@ -32,7 +38,7 @@ MetricsTransmitter::~MetricsTransmitter()
 
 void MetricsTransmitter::run()
 {
-    auto & config = Poco::Util::Application::instance().config();
+    const auto & config = context.getConfigRef();
     auto interval = config.getInt(config_name + ".interval", 60);
 
     const std::string thread_name = "MericsTrns " + std::to_string(interval) + "s";
@@ -63,7 +69,7 @@ void MetricsTransmitter::run()
 
 void MetricsTransmitter::transmit(std::vector<ProfileEvents::Count> & prev_counters)
 {
-    auto & config = Poco::Util::Application::instance().config();
+    const auto & config = context.getConfigRef();
     auto async_metrics_values = async_metrics.getValues();
 
     GraphiteWriter::KeyValueVector<ssize_t> key_vals{};
diff --git a/dbms/src/Server/MetricsTransmitter.h b/dbms/src/Server/MetricsTransmitter.h
index 4bee6ad0c7d..e85113ad141 100644
--- a/dbms/src/Server/MetricsTransmitter.h
+++ b/dbms/src/Server/MetricsTransmitter.h
@@ -10,7 +10,10 @@
 
 namespace DB
 {
+
 class AsynchronousMetrics;
+class Context;
+
 
 /**    Automatically sends
   * - difference of ProfileEvents;
@@ -21,8 +24,12 @@ class AsynchronousMetrics;
 class MetricsTransmitter
 {
 public:
-    MetricsTransmitter(const AsynchronousMetrics & async_metrics, const std::string & config_name)
-        : async_metrics{async_metrics}, config_name{config_name}
+    MetricsTransmitter(Context & context_,
+                       const AsynchronousMetrics & async_metrics_,
+                       const std::string & config_name_)
+        : context(context_)
+        , async_metrics(async_metrics_)
+        , config_name(config_name_)
     {
     }
     ~MetricsTransmitter();
@@ -31,6 +38,8 @@ private:
     void run();
     void transmit(std::vector<ProfileEvents::Count> & prev_counters);
 
+    Context & context;
+
     const AsynchronousMetrics & async_metrics;
     const std::string config_name;
 
@@ -43,4 +52,5 @@ private:
     static constexpr auto current_metrics_path_prefix = "ClickHouse.Metrics.";
     static constexpr auto asynchronous_metrics_path_prefix = "ClickHouse.AsynchronousMetrics.";
 };
+
 }
diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp
index 4ad0b9ec719..e7820e296de 100644
--- a/dbms/src/Server/Server.cpp
+++ b/dbms/src/Server/Server.cpp
@@ -530,7 +530,8 @@ int Server::main(const std::vector<std::string> & args)
         std::vector<std::unique_ptr<MetricsTransmitter>> metrics_transmitters;
         for (const auto & graphite_key : DB::getMultipleKeysFromConfig(config(), "", "graphite"))
         {
-            metrics_transmitters.emplace_back(std::make_unique<MetricsTransmitter>(async_metrics, graphite_key));
+            metrics_transmitters.emplace_back(std::make_unique<MetricsTransmitter>(
+                *global_context, async_metrics, graphite_key));
         }
 
         SessionCleaner session_cleaner(*global_context);
diff --git a/dbms/src/Server/TCPHandler.cpp b/dbms/src/Server/TCPHandler.cpp
index d93d60bd61e..011454a5159 100644
--- a/dbms/src/Server/TCPHandler.cpp
+++ b/dbms/src/Server/TCPHandler.cpp
@@ -144,8 +144,7 @@ void TCPHandler::runImpl()
                 continue;
 
             /// Get blocks of temporary tables
-            if (client_revision >= DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES)
-                readData(global_settings);
+            readData(global_settings);
 
             /// Reset the input stream, as we received an empty block while receiving external table data.
             /// So, the stream has been marked as cancelled and we can't read from it anymore.
@@ -416,8 +415,7 @@ void TCPHandler::sendTotals()
             initBlockOutput();
 
             writeVarUInt(Protocol::Server::Totals, *out);
-            if (client_revision >= DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES)
-                writeStringBinary("", *out);
+            writeStringBinary("", *out);
 
             state.block_out->write(totals);
             state.maybe_compressed_out->next();
@@ -438,8 +436,7 @@ void TCPHandler::sendExtremes()
             initBlockOutput();
 
             writeVarUInt(Protocol::Server::Extremes, *out);
-            if (client_revision >= DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES)
-                writeStringBinary("", *out);
+            writeStringBinary("", *out);
 
             state.block_out->write(extremes);
             state.maybe_compressed_out->next();
@@ -612,8 +609,7 @@ bool TCPHandler::receiveData()
 
     /// The name of the temporary table for writing data, default to empty string
     String external_table_name;
-    if (client_revision >= DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES)
-        readStringBinary(external_table_name, *in);
+    readStringBinary(external_table_name, *in);
 
     /// Read one block from the network and write it down
     Block block = state.block_in->read();
@@ -717,8 +713,7 @@ void TCPHandler::sendData(Block & block)
     initBlockOutput();
 
     writeVarUInt(Protocol::Server::Data, *out);
-    if (client_revision >= DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES)
-        writeStringBinary("", *out);
+    writeStringBinary("", *out);
 
     state.block_out->write(block);
     state.maybe_compressed_out->next();
diff --git a/dbms/src/Server/config.xml b/dbms/src/Server/config.xml
index baf563c0a2c..dfd3f2433dd 100644
--- a/dbms/src/Server/config.xml
+++ b/dbms/src/Server/config.xml
@@ -111,9 +111,17 @@
 
          Time zone is specified as identifier from IANA time zone database, like UTC or Africa/Abidjan.
          If not specified, system time zone at server startup is used.
+
+         Please note, that server could display time zone alias instead of specified name.
+         Example: W-SU is an alias for Europe/Moscow and Zulu is an alias for UTC.
     -->
     <!-- <timezone>Europe/Moscow</timezone> -->
 
+    <!-- You can specify umask here (see "man umask"). Server will apply it on startup.
+         Number is always parsed as octal. Default umask is 027 (other users cannot read logs, data files, etc; group can only read).
+    -->
+    <!-- <umask>022</umask> -->
+
     <!-- Configuration of clusters that could be used in Distributed tables.
          https://clickhouse.yandex/reference_en.html#Distributed
       -->
diff --git a/dbms/src/Storages/Distributed/DirectoryMonitor.cpp b/dbms/src/Storages/Distributed/DirectoryMonitor.cpp
index b531953cde2..03831ba8428 100644
--- a/dbms/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/dbms/src/Storages/Distributed/DirectoryMonitor.cpp
@@ -57,16 +57,14 @@ namespace
             if (!user_pw_end || !colon)
                 throw Exception{
                     "Shard address '" + address + "' does not match to 'user[:password]@host:port#default_database' pattern",
-                    ErrorCodes::INCORRECT_FILE_NAME
-                };
+                    ErrorCodes::INCORRECT_FILE_NAME};
 
             const bool has_pw = colon < user_pw_end;
             const char * host_end = has_pw ? strchr(user_pw_end + 1, ':') : colon;
             if (!host_end)
                 throw Exception{
                     "Shard address '" + address + "' does not contain port",
-                    ErrorCodes::INCORRECT_FILE_NAME
-                };
+                    ErrorCodes::INCORRECT_FILE_NAME};
 
             const char * has_db = strchr(address.data(), '#');
             const char * port_end = has_db ? has_db : address_end;
diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
index 046a4b500df..9a9d6f76743 100644
--- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
+++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp
@@ -367,7 +367,8 @@ void DistributedBlockOutputStream::writeAsyncImpl(const Block & block, const siz
         for (const auto & address : cluster->getShardsAddresses()[shard_id])
             if (!address.is_local)
                 dir_names.push_back(address.toStringFull());
-        writeToShard(block, dir_names);
+        if (!dir_names.empty())
+            writeToShard(block, dir_names);
     }
 }
 
diff --git a/dbms/src/Storages/MergeTree/DataPartsExchange.cpp b/dbms/src/Storages/MergeTree/DataPartsExchange.cpp
index b0d98f44ae9..c7eb2abbaa7 100644
--- a/dbms/src/Storages/MergeTree/DataPartsExchange.cpp
+++ b/dbms/src/Storages/MergeTree/DataPartsExchange.cpp
@@ -268,9 +268,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPartImpl(
     new_data_part->info = MergeTreePartInfo::fromPartName(part_name);
     MergeTreePartInfo::parseMinMaxDatesFromPartName(part_name, new_data_part->min_date, new_data_part->max_date);
     new_data_part->modification_time = time(nullptr);
-    new_data_part->loadColumns(true);
-    new_data_part->loadChecksums(true);
-    new_data_part->loadIndex();
+    new_data_part->loadColumnsChecksumsIndex(true, false);
     new_data_part->is_sharded = false;
     new_data_part->checksums.checkEqual(checksums, false);
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp
index ad288f0be5c..592a97135da 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp
@@ -246,8 +246,11 @@ Block MergeTreeBaseBlockInputStream::readFromPart()
                     if (pre_range_reader)
                     {
                         /// Have to read rows from last partly read granula.
-                        auto & range = ranges_to_read.back();
-                        task->current_range_reader = reader->readRange(range.begin, range.end);
+                        if (!ranges_to_read.empty())
+                        {
+                            auto & range = ranges_to_read.back();
+                            task->current_range_reader = reader->readRange(range.begin, range.end);
+                        }
                         /// But can just skip them.
                         task->number_of_rows_to_skip = rows_was_read_in_last_range;
                     }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
index f675eaee193..bd6de8d05c7 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
@@ -302,7 +302,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
     Poco::DirectoryIterator end;
     for (Poco::DirectoryIterator it(full_path); it != end; ++it)
     {
-        /// Skip temporary directories older than one day.
+        /// Skip temporary directories.
         if (startsWith(it.name(), "tmp"))
             continue;
 
@@ -326,10 +326,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
 
         try
         {
-            part->loadColumns(require_part_metadata);
-            part->loadChecksums(require_part_metadata);
-            part->loadIndex();
-            part->checkNotBroken(require_part_metadata);
+            part->loadColumnsChecksumsIndex(require_part_metadata, true);
         }
         catch (const Exception & e)
         {
@@ -1033,7 +1030,16 @@ MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart(
             part->size_in_bytes,
             static_cast<double>(part->size_in_bytes) / this->getTotalActiveSizeInBytes());
         ExpressionBlockInputStream in(part_in, expression);
-        MergedColumnOnlyOutputStream out(*this, full_path + part->name + '/', true, compression_method, false);
+
+        /** Don't write offsets for arrays, because ALTER never change them
+         *  (MODIFY COLUMN could only change types of elements but never modify array sizes).
+          * Also note that they does not participate in 'rename_map'.
+          * Also note, that for columns, that are parts of Nested,
+          *  temporary column name ('converting_column_name') created in 'createConvertExpression' method
+          *  will have old name of shared offsets for arrays.
+          */
+        MergedColumnOnlyOutputStream out(*this, full_path + part->name + '/', true, compression_method, true /* skip_offsets */);
+
         in.readPrefix();
         out.writePrefix();
 
@@ -1298,7 +1304,7 @@ void MergeTreeData::replaceParts(const DataPartsVector & remove, const DataParts
 
     for (const DataPartPtr & part : remove)
     {
-        part->remove_time = clear_without_timeout ? 0 : time(0);
+        part->remove_time = clear_without_timeout ? 0 : time(nullptr);
 
         if (data_parts.erase(part))
             removePartContributionToColumnSizes(part);
@@ -1544,11 +1550,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::loadPartAndFixMetadata(const St
     if (Poco::File(full_part_path + "columns.txt").exists())
         Poco::File(full_part_path + "columns.txt").remove();
 
-    part->loadColumns(false);
-    part->loadChecksums(false);
-    part->loadIndex();
-    part->checkNotBroken(false);
-
+    part->loadColumnsChecksumsIndex(false, true);
     part->modification_time = Poco::File(full_part_path).getLastModified().epochTime();
 
     /// If the checksums file is not present, calculate the checksums and write them to disk.
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
index 6d5de63053a..22f871e9c13 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
@@ -324,9 +324,7 @@ String MergeTreeDataPart::getColumnNameWithMinumumCompressedSize() const
     }
 
     if (!minimum_size_column)
-        throw Exception{
-            "Could not find a column of minimum size in MergeTree",
-            ErrorCodes::LOGICAL_ERROR};
+        throw Exception("Could not find a column of minimum size in MergeTree, part " + getFullPath(), ErrorCodes::LOGICAL_ERROR);
 
     return *minimum_size_column;
 }
@@ -529,6 +527,17 @@ void MergeTreeDataPart::renameAddPrefix(bool to_detached, const String & prefix)
     renameTo(dst_name());
 }
 
+
+void MergeTreeDataPart::loadColumnsChecksumsIndex(bool require_columns_checksums, bool check_consistency)
+{
+    loadColumns(require_columns_checksums);
+    loadChecksums(require_columns_checksums);
+    loadIndex();
+    if (check_consistency)
+        checkConsistency(require_columns_checksums);
+}
+
+
 void MergeTreeDataPart::loadIndex()
 {
     /// Size - in number of marks.
@@ -627,7 +636,7 @@ void MergeTreeDataPart::loadColumns(bool require)
     columns.readText(file);
 }
 
-void MergeTreeDataPart::checkNotBroken(bool require_part_metadata)
+void MergeTreeDataPart::checkConsistency(bool require_part_metadata)
 {
     String path = getFullPath();
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h
index d7c30b9ad22..3d6211f78a7 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h
@@ -180,19 +180,12 @@ struct MergeTreeDataPart
     /// Renames a part by appending a prefix to the name. To_detached - also moved to the detached directory.
     void renameAddPrefix(bool to_detached, const String & prefix) const;
 
-    /// Loads index file. Also calculates this->size if size=0
-    void loadIndex();
-
-    /// If checksums.txt exists, reads files' checksums (and sizes) from it
-    void loadChecksums(bool require);
-
     /// Populates columns_to_size map (compressed size).
     void accumulateColumnSizes(ColumnToSize & column_to_size) const;
 
-    /// Reads columns names and types from columns.txt
-    void loadColumns(bool require);
-
-    void checkNotBroken(bool require_part_metadata);
+    /// Initialize columns (from columns.txt if exists, or create from column files if not).
+    /// Load checksums from checksums.txt if exists. Load index if required.
+    void loadColumnsChecksumsIndex(bool require_columns_checksums, bool check_consistency);
 
     /// Checks that .bin and .mrk files exist
     bool hasColumnFiles(const String & column) const;
@@ -200,6 +193,18 @@ struct MergeTreeDataPart
     /// For data in RAM ('index')
     size_t getIndexSizeInBytes() const;
     size_t getIndexSizeInAllocatedBytes() const;
+
+private:
+    /// Reads columns names and types from columns.txt
+    void loadColumns(bool require);
+
+    /// If checksums.txt exists, reads files' checksums (and sizes) from it
+    void loadChecksums(bool require);
+
+    /// Loads index file. Also calculates this->size if size=0
+    void loadIndex();
+
+    void checkConsistency(bool require_part_metadata);
 };
 
 }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index ee85110d30d..d3177c41875 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -145,7 +145,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::read(
     MergeTreeData::DataPartsVector parts = data.getDataPartsVector();
 
     /// If query contains restrictions on the virtual column `_part` or `_part_index`, select only parts suitable for it.
-    /// The virtual column `_sample_factor - 1 / <used sampling factor>` can be requested in the query.
+    /// The virtual column `_sample_factor` (which is equal to 1 / used sample rate) can be requested in the query.
     Names virt_column_names;
     Names real_column_names;
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.h b/dbms/src/Storages/MergeTree/MergeTreeSettings.h
index b922f75870b..5f89b888492 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.h
@@ -22,7 +22,7 @@ struct MergeTreeSettings
     /** Merge settings. */
 
     /// Maximum in total size of parts to merge, when there are maximum (minimum) free threads in background pool (or entries in replication queue).
-    size_t max_bytes_to_merge_at_max_space_in_pool = 100ULL * 1024 * 1024 * 1024;
+    size_t max_bytes_to_merge_at_max_space_in_pool = 150ULL * 1024 * 1024 * 1024;
     size_t max_bytes_to_merge_at_min_space_in_pool = 1024 * 1024;
 
     /// How many tasks of merging parts are allowed simultaneously in ReplicatedMergeTree queue.
diff --git a/dbms/src/Storages/MergeTree/PKCondition.h b/dbms/src/Storages/MergeTree/PKCondition.h
index 98c1d32cec0..5c48df5becc 100644
--- a/dbms/src/Storages/MergeTree/PKCondition.h
+++ b/dbms/src/Storages/MergeTree/PKCondition.h
@@ -20,7 +20,7 @@ class IFunction;
 using FunctionPtr = std::shared_ptr<IFunction>;
 
 
-/** Range with open or closed ends; Perhaps unlimited.
+/** Range with open or closed ends; possibly unbounded.
   */
 struct Range
 {
@@ -31,12 +31,12 @@ private:
 public:
     Field left;                       /// the left border, if any
     Field right;                      /// the right border, if any
-    bool left_bounded = false;        /// limited to the left
-    bool right_bounded = false;       /// limited to the right
+    bool left_bounded = false;        /// bounded at the left
+    bool right_bounded = false;       /// bounded at the right
     bool left_included = false;       /// includes the left border, if any
     bool right_included = false;      /// includes the right border, if any
 
-    /// The whole set.
+    /// The whole unversum.
     Range() {}
 
     /// One point.
@@ -148,7 +148,7 @@ public:
         /// r to the right of me.
         if (r.left_bounded
             && right_bounded
-            && (less(right, r.left)                            /// ...} {...
+            && (less(right, r.left)                          /// ...} {...
                 || ((!right_included || !r.left_included)    /// ...) [... or ...] (...
                     && equals(r.left, right))))
             return false;
@@ -193,15 +193,15 @@ public:
 /** Condition on the index.
   *
   * Consists of the conditions for the key belonging to all possible ranges or sets,
-  *  as well as logical links AND/OR/NOT above these conditions.
+  *  as well as logical operators AND/OR/NOT above these conditions.
   *
   * Constructs a reverse polish notation from these conditions
-  *  and can calculate (interpret) its feasibility over key ranges.
+  *  and can calculate (interpret) its satisfiability over key ranges.
   */
 class PKCondition
 {
 public:
-    /// Does not include the SAMPLE section. all_columns - the set of all columns of the table.
+    /// Does not take into account the SAMPLE section. all_columns - the set of all columns of the table.
     PKCondition(
         const SelectQueryInfo & query_info,
         const Context & context,
diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp
index 11638044935..3283ca1d988 100644
--- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp
+++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp
@@ -247,7 +247,7 @@ void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(zkutil::ZooKeeperPt
         if (!status.exists)
             throw zkutil::KeeperException("A block node was suddenly deleted", ZNONODE);
 
-        cached_block_stats->emplace(elem.first, status.stat);
+        cached_block_stats->emplace(elem.first, RequiredStat(status.stat));
         timed_blocks.emplace_back(elem.first, RequiredStat(status.stat));
     }
 
diff --git a/dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp b/dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp
index 55950a5a93d..b51180b51c4 100644
--- a/dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp
+++ b/dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp
@@ -107,9 +107,7 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & body
     data_part->info = MergeTreePartInfo::fromPartName(part_name);
     MergeTreePartInfo::parseMinMaxDatesFromPartName(part_name, data_part->min_date, data_part->max_date);
     data_part->modification_time = time(nullptr);
-    data_part->loadColumns(true);
-    data_part->loadChecksums(true);
-    data_part->loadIndex();
+    data_part->loadColumnsChecksumsIndex(true, false);
     data_part->is_sharded = false;
     data_part->checksums.checkEqual(checksums, false);
 
diff --git a/dbms/src/Storages/StorageFactory.cpp b/dbms/src/Storages/StorageFactory.cpp
index e33e93605c7..b0240553ae5 100644
--- a/dbms/src/Storages/StorageFactory.cpp
+++ b/dbms/src/Storages/StorageFactory.cpp
@@ -195,7 +195,7 @@ static void appendGraphitePattern(const Context & context,
 static void setGraphitePatternsFromConfig(const Context & context,
     const String & config_element, Graphite::Params & params)
 {
-    const Poco::Util::AbstractConfiguration & config = Poco::Util::Application::instance().config();
+    const auto & config = context.getConfigRef();
 
     if (!config.has(config_element))
         throw Exception("No '" + config_element + "' element in configuration file",
@@ -321,7 +321,7 @@ StoragePtr StorageFactory::get(
         if (args.empty() || args.size() > 2)
             throw Exception(error_msg, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
-        args[0] = evaluateConstantExpressionOrIdentidierAsLiteral(args[0], local_context);
+        args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args[0], local_context);
         String format_name = static_cast<const ASTLiteral &>(*args[0]).value.safeGet<String>();
 
         int source_fd = -1;
@@ -352,7 +352,7 @@ StoragePtr StorageFactory::get(
                     source_fd = static_cast<int>(literal->value.get<UInt64>());
             }
 
-            args[1] = evaluateConstantExpressionOrIdentidierAsLiteral(args[1], local_context);
+            args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(args[1], local_context);
             source_path = static_cast<const ASTLiteral &>(*args[1]).value.safeGet<String>();
         }
 
@@ -457,7 +457,7 @@ StoragePtr StorageFactory::get(
                 " - name of source database and regexp for table names.",
                 ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
-        args[0] = evaluateConstantExpressionOrIdentidierAsLiteral(args[0], local_context);
+        args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args[0], local_context);
         args[1] = evaluateConstantExpressionAsLiteral(args[1], local_context);
 
         String source_database         = static_cast<const ASTLiteral &>(*args[0]).value.safeGet<String>();
@@ -497,8 +497,8 @@ StoragePtr StorageFactory::get(
 
         String cluster_name = getClusterName(*args[0]);
 
-        args[1] = evaluateConstantExpressionOrIdentidierAsLiteral(args[1], local_context);
-        args[2] = evaluateConstantExpressionOrIdentidierAsLiteral(args[2], local_context);
+        args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(args[1], local_context);
+        args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(args[2], local_context);
 
         String remote_database     = static_cast<const ASTLiteral &>(*args[1]).value.safeGet<String>();
         String remote_table     = static_cast<const ASTLiteral &>(*args[2]).value.safeGet<String>();
@@ -550,8 +550,8 @@ StoragePtr StorageFactory::get(
                 " destination_database, destination_table, num_buckets, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes.",
                 ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
-        args[0] = evaluateConstantExpressionOrIdentidierAsLiteral(args[0], local_context);
-        args[1] = evaluateConstantExpressionOrIdentidierAsLiteral(args[1], local_context);
+        args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args[0], local_context);
+        args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(args[1], local_context);
 
         String destination_database = static_cast<const ASTLiteral &>(*args[0]).value.safeGet<String>();
         String destination_table     = static_cast<const ASTLiteral &>(*args[1]).value.safeGet<String>();
@@ -598,8 +598,8 @@ StoragePtr StorageFactory::get(
             throw Exception(error_message_argument_number_mismatch,
                 ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
-        args[0] = evaluateConstantExpressionOrIdentidierAsLiteral(args[0], local_context);
-        args[1] = evaluateConstantExpressionOrIdentidierAsLiteral(args[1], local_context);
+        args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args[0], local_context);
+        args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(args[1], local_context);
 
         String destination_database = static_cast<const ASTLiteral &>(*args[0]).value.safeGet<String>();
         String destination_table    = static_cast<const ASTLiteral &>(*args[1]).value.safeGet<String>();
diff --git a/dbms/src/Storages/StorageLog.cpp b/dbms/src/Storages/StorageLog.cpp
index 6c9cf9f95ed..25cd948ed5a 100644
--- a/dbms/src/Storages/StorageLog.cpp
+++ b/dbms/src/Storages/StorageLog.cpp
@@ -185,7 +185,7 @@ private:
         }
     };
 
-    using MarksForColumns = std::vector<std::pair<size_t, Mark> >;
+    using MarksForColumns = std::vector<std::pair<size_t, Mark>>;
 
     using FileStreams = std::map<std::string, std::unique_ptr<Stream>>;
     FileStreams streams;
diff --git a/dbms/src/Storages/StorageMaterializedView.cpp b/dbms/src/Storages/StorageMaterializedView.cpp
index ba861f04a06..08429ed5e5a 100644
--- a/dbms/src/Storages/StorageMaterializedView.cpp
+++ b/dbms/src/Storages/StorageMaterializedView.cpp
@@ -19,6 +19,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
+    extern const int INCORRECT_QUERY;
 }
 
 
@@ -29,7 +30,7 @@ static void extractDependentTable(const ASTSelectQuery & query, String & select_
     if (!query_table)
         return;
 
-    if (const ASTIdentifier * ast_id = typeid_cast<const ASTIdentifier *>(query_table.get()))
+    if (auto ast_id = typeid_cast<const ASTIdentifier *>(query_table.get()))
     {
         auto query_database = query.database();
 
@@ -41,7 +42,7 @@ static void extractDependentTable(const ASTSelectQuery & query, String & select_
         select_database_name = typeid_cast<const ASTIdentifier &>(*query_database).name;
         select_table_name = ast_id->name;
     }
-    else if (const ASTSelectQuery * ast_select = typeid_cast<const ASTSelectQuery *>(query_table.get()))
+    else if (auto ast_select = typeid_cast<const ASTSelectQuery *>(query_table.get()))
     {
         extractDependentTable(*ast_select, select_database_name, select_table_name);
     }
@@ -66,13 +67,18 @@ StorageMaterializedView::StorageMaterializedView(
     database_name(database_name_), context(context_), columns(columns_)
 {
     ASTCreateQuery & create = typeid_cast<ASTCreateQuery &>(*query_);
+
+    if (!create.select)
+        throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY);
+
+    if (!create.inner_storage)
+        throw Exception("ENGINE of MaterializedView should be specified explicitly", ErrorCodes::INCORRECT_QUERY);
+
     ASTSelectQuery & select = typeid_cast<ASTSelectQuery &>(*create.select);
 
     /// If the internal query does not specify a database, retrieve it from the context and write it to the query.
     select.setDatabaseIfNeeded(database_name);
 
-    inner_query = create.select;
-
     extractDependentTable(select, select_database_name, select_table_name);
 
     if (!select_table_name.empty())
@@ -80,7 +86,8 @@ StorageMaterializedView::StorageMaterializedView(
             DatabaseAndTableName(select_database_name, select_table_name),
             DatabaseAndTableName(database_name, table_name));
 
-    auto inner_table_name = getInnerTableName();
+    String inner_table_name = getInnerTableName();
+    inner_query = create.select;
 
     /// If there is an ATTACH request, then the internal table must already be connected.
     if (!attach_)
@@ -91,18 +98,7 @@ StorageMaterializedView::StorageMaterializedView(
         manual_create_query->table = inner_table_name;
         manual_create_query->columns = create.columns;
         manual_create_query->children.push_back(manual_create_query->columns);
-
-        /// If you do not specify a storage type in the query, try retrieving it from SELECT query.
-        if (!create.inner_storage)
-        {
-            /// TODO also try to extract `params` to create a table
-            auto func = std::make_shared<ASTFunction>();
-            func->name = context.getTable(select_database_name, select_table_name)->getName();
-            manual_create_query->storage = func;
-        }
-        else
-            manual_create_query->storage = create.inner_storage;
-
+        manual_create_query->storage = create.inner_storage;
         manual_create_query->children.push_back(manual_create_query->storage);
 
         /// Execute the query.
diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
index 603c6b3c1b5..76546d1cb9b 100644
--- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1419,9 +1419,10 @@ void StorageReplicatedMergeTree::executeClearColumnInPartition(const LogEntry &
 
     auto entry_part_info = MergeTreePartInfo::fromPartName(entry.new_part_name);
 
-    /// We don't change table structure, only data in some parts, disable reading from them
+    /// We don't change table structure, only data in some parts
+    /// To disable reading from these parts, we will sequentially acquire write lock for each part inside alterDataPart()
+    /// If we will lock the whole table here, a deadlock can occur. For example, if use use Buffer table (CLICKHOUSE-3238)
     auto lock_read_structure = lockStructure(false);
-    auto lock_write_data = lockDataForAlter();
 
     auto zookeeper = getZooKeeper();
 
@@ -3708,7 +3709,7 @@ StorageReplicatedMergeTree::gatherReplicaSpaceInfo(const WeightedZooKeeperPaths
 
     ThreadPool pool(task_info_list.size());
 
-    using Tasks = std::vector<std::packaged_task<size_t()> >;
+    using Tasks = std::vector<std::packaged_task<size_t()>>;
     Tasks tasks(task_info_list.size());
 
     try
@@ -3790,11 +3791,26 @@ void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZK(Logger * log_)
     if (!count)
         return;
 
+    /// Part names that were successfully deleted from filesystem and should be deleted from ZooKeeper
+    Strings part_names;
+    auto remove_from_zookeeper = [&] ()
+    {
+        LOG_DEBUG(log, "Removed " << part_names.size() << " old parts from filesystem. Removing them from ZooKeeper.");
+
+        try
+        {
+            removePartsFromZooKeeper(zookeeper, part_names);
+        }
+        catch (...)
+        {
+            LOG_ERROR(log, "There is a problem with deleting parts from ZooKeeper: " << getCurrentExceptionMessage(false));
+        }
+    };
+
     try
     {
-        LOG_DEBUG(log, "Removing " << parts.size() << " old parts from file system");
+        LOG_DEBUG(log, "Removing " << parts.size() << " old parts from filesystem");
 
-        Strings part_names;
         while (!parts.empty())
         {
             MergeTreeData::DataPartPtr & part = parts.back();
@@ -3802,41 +3818,81 @@ void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZK(Logger * log_)
             part_names.emplace_back(part->name);
             parts.pop_back();
         }
-
-        LOG_DEBUG(log, "Removed " << part_names.size() << " old parts from file system. Removing them from ZooKeeper.");
-
-        try
-        {
-            removePartsFromZooKeeper(zookeeper, part_names);
-        }
-        catch (const zkutil::KeeperException & e)
-        {
-            LOG_ERROR(log, "There is a problem with deleting parts from ZooKeeper: " << getCurrentExceptionMessage(false));
-        }
     }
     catch (...)
     {
         tryLogCurrentException(__PRETTY_FUNCTION__);
+
+        /// Finalize deletion of parts already deleted from filesystem, rollback remaining parts
         data.addOldParts(parts);
+        remove_from_zookeeper();
+
         throw;
     }
 
+    /// Finalize deletion
+    remove_from_zookeeper();
+
     LOG_DEBUG(log, "Removed " << count << " old parts");
 }
 
 
+static int32_t tryMultiWithRetries(zkutil::ZooKeeperPtr & zookeeper, zkutil::Ops & ops) noexcept
+{
+    int32_t code;
+    try
+    {
+        code = zookeeper->tryMultiWithRetries(ops);
+    }
+    catch (const zkutil::KeeperException & e)
+    {
+        code = e.code;
+    }
+
+    return code;
+}
+
+
 void StorageReplicatedMergeTree::removePartsFromZooKeeper(zkutil::ZooKeeperPtr & zookeeper, const Strings & part_names)
 {
     zkutil::Ops ops;
+    auto it_first_node_in_batch = part_names.cbegin();
 
     for (auto it = part_names.cbegin(); it != part_names.cend(); ++it)
     {
         removePartFromZooKeeper(*it, ops);
 
-        if (ops.size() >= zkutil::MULTI_BATCH_SIZE || next(it) == part_names.cend())
+        auto it_next = std::next(it);
+        if (ops.size() >= zkutil::MULTI_BATCH_SIZE || it_next == part_names.cend())
         {
-            zookeeper->tryMulti(ops);
+            /// It is Ok to use multi with retries to delete nodes, because new nodes with the same names cannot appear here
+            auto code = tryMultiWithRetries(zookeeper, ops);
             ops.clear();
+
+            if (code == ZNONODE)
+            {
+                /// Fallback
+                LOG_DEBUG(log, "There are no some part nodes in ZooKeeper, will remove part nodes sequentially");
+
+                for (auto it_in_batch = it_first_node_in_batch; it_in_batch != it_next; ++it_in_batch)
+                {
+                    zkutil::Ops cur_ops;
+                    removePartFromZooKeeper(*it_in_batch, cur_ops);
+                    auto cur_code = tryMultiWithRetries(zookeeper, cur_ops);
+
+                    if (cur_code == ZNONODE)
+                        LOG_DEBUG(log, "There is no part " << *it_in_batch << " in ZooKeeper, it was only in filesystem");
+                    else if (cur_code != ZOK)
+                        LOG_WARNING(log, "Cannot remove part " << *it_in_batch << " from ZooKeeper: " << ::zerror(cur_code));
+                }
+            }
+            else if (code != ZOK)
+            {
+                LOG_WARNING(log, "There was a problem with deleting " << (it_next - it_first_node_in_batch)
+                    << " nodes from ZooKeeper: " << ::zerror(code));
+            }
+
+            it_first_node_in_batch = it_next;
         }
     }
 }
diff --git a/dbms/src/Storages/System/StorageSystemGraphite.cpp b/dbms/src/Storages/System/StorageSystemGraphite.cpp
index 016aeaa304d..109d6baafa7 100644
--- a/dbms/src/Storages/System/StorageSystemGraphite.cpp
+++ b/dbms/src/Storages/System/StorageSystemGraphite.cpp
@@ -1,11 +1,12 @@
 #include <Storages/System/StorageSystemGraphite.h>
 
-#include <Core/Field.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnsNumber.h>
+#include <Core/Field.h>
 #include <DataStreams/OneBlockInputStream.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
+#include <Interpreters/Context.h>
 
 #include <Poco/Util/Application.h>
 
@@ -63,9 +64,10 @@ static Pattern readOnePattern(
     return pattern;
 }
 
-static std::vector<Pattern> readPatterns(const std::string & section)
+static std::vector<Pattern> readPatterns(
+    const AbstractConfiguration & config,
+    const std::string & section)
 {
-    const AbstractConfiguration & config = Application::instance().config();
     AbstractConfiguration::Keys keys;
     std::vector<Pattern> result;
     size_t count = 0;
@@ -92,9 +94,8 @@ static std::vector<Pattern> readPatterns(const std::string & section)
     return result;
 }
 
-static Strings getAllGraphiteSections()
+static Strings getAllGraphiteSections(const AbstractConfiguration & config)
 {
-    const AbstractConfiguration & config = Application::instance().config();
     Strings result;
 
     AbstractConfiguration::Keys keys;
@@ -180,10 +181,12 @@ BlockInputStreams StorageSystemGraphite::read(
     col_is_default.column = std::make_shared<ColumnUInt8>();
     block.insert(col_is_default);
 
-    Strings sections = getAllGraphiteSections();
+    const auto & config = context.getConfigRef();
+
+    Strings sections = getAllGraphiteSections(config);
     for (const auto & section : sections)
     {
-        const auto patterns = readPatterns(section);
+        const auto patterns = readPatterns(config, section);
         for (const auto & pattern : patterns)
         {
             for (const auto & ret : pattern.retentions)
diff --git a/dbms/src/TableFunctions/TableFunctionFactory.cpp b/dbms/src/TableFunctions/TableFunctionFactory.cpp
index 267c3c202a2..a3355fd32e9 100644
--- a/dbms/src/TableFunctions/TableFunctionFactory.cpp
+++ b/dbms/src/TableFunctions/TableFunctionFactory.cpp
@@ -1,7 +1,8 @@
-#include <Common/Exception.h>
+#include <TableFunctions/TableFunctionFactory.h>
+
 #include <Interpreters/Context.h>
 
-#include <TableFunctions/TableFunctionFactory.h>
+#include <Common/Exception.h>
 
 
 namespace DB
@@ -11,9 +12,17 @@ namespace ErrorCodes
 {
     extern const int READONLY;
     extern const int UNKNOWN_FUNCTION;
+    extern const int LOGICAL_ERROR;
 }
 
 
+void TableFunctionFactory::registerFunction(const std::string & name, Creator creator)
+{
+    if (!functions.emplace(name, std::move(creator)).second)
+        throw Exception("TableFunctionFactory: the table function name '" + name + "' is not unique",
+            ErrorCodes::LOGICAL_ERROR);
+}
+
 TableFunctionPtr TableFunctionFactory::get(
     const std::string & name,
     const Context & context) const
@@ -24,6 +33,7 @@ TableFunctionPtr TableFunctionFactory::get(
     auto it = functions.find(name);
     if (it == functions.end())
         throw Exception("Unknown table function " + name, ErrorCodes::UNKNOWN_FUNCTION);
+
     return it->second();
 }
 
diff --git a/dbms/src/TableFunctions/TableFunctionFactory.h b/dbms/src/TableFunctions/TableFunctionFactory.h
index fd58b9e625e..de782630386 100644
--- a/dbms/src/TableFunctions/TableFunctionFactory.h
+++ b/dbms/src/TableFunctions/TableFunctionFactory.h
@@ -1,46 +1,50 @@
 #pragma once
 
-#include <unordered_map>
-#include <ext/singleton.h>
-#include <Core/Types.h>
-#include <Common/Exception.h>
 #include <TableFunctions/ITableFunction.h>
 
+#include <ext/singleton.h>
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <unordered_map>
+
 
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
+class Context;
 
 
 /** Lets you get a table function by its name.
   */
-class TableFunctionFactory : public ext::singleton<TableFunctionFactory>
+class TableFunctionFactory final: public ext::singleton<TableFunctionFactory>
 {
-private:
-    /// No std::function, for smaller object size and less indirection.
-    using Creator = TableFunctionPtr(*)();
-    using TableFunctions = std::unordered_map<String, Creator>;
-
-    TableFunctions functions;
-
 public:
-    TableFunctionPtr get(
-        const String & name,
-        const Context & context) const;
+    using Creator = std::function<TableFunctionPtr()>;
 
-    /// Register a table function by its name.
+    /// Register a function by its name.
     /// No locking, you must register all functions before usage of get.
+    void registerFunction(const std::string & name, Creator creator);
+
     template <typename Function>
     void registerFunction()
     {
-        if (!functions.emplace(std::string(Function::name), []{ return TableFunctionPtr(std::make_unique<Function>()); }).second)
-            throw Exception("TableFunctionFactory: the table function name '" + String(Function::name) + "' is not unique",
-                ErrorCodes::LOGICAL_ERROR);
+        auto creator = [] () -> TableFunctionPtr {
+            return std::make_shared<Function>();
+        };
+        registerFunction(Function::name, std::move(creator));
     }
+
+    /// Throws an exception if not found.
+    TableFunctionPtr get(
+        const std::string & name,
+        const Context & context) const;
+
+private:
+    using TableFunctions = std::unordered_map<std::string, Creator>;
+
+    TableFunctions functions;
 };
 
 }
diff --git a/dbms/src/TableFunctions/TableFunctionMerge.cpp b/dbms/src/TableFunctions/TableFunctionMerge.cpp
index 2c50759fcde..d6e2c7d4dc7 100644
--- a/dbms/src/TableFunctions/TableFunctionMerge.cpp
+++ b/dbms/src/TableFunctions/TableFunctionMerge.cpp
@@ -71,7 +71,7 @@ StoragePtr TableFunctionMerge::execute(const ASTPtr & ast_function, const Contex
             " - name of source database and regexp for table names.",
             ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
-    args[0] = evaluateConstantExpressionOrIdentidierAsLiteral(args[0], context);
+    args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args[0], context);
     args[1] = evaluateConstantExpressionAsLiteral(args[1], context);
 
     String source_database = static_cast<const ASTLiteral &>(*args[0]).value.safeGet<String>();
diff --git a/dbms/src/TableFunctions/TableFunctionNumbers.cpp b/dbms/src/TableFunctions/TableFunctionNumbers.cpp
index 8e2187ec675..ee141f68549 100644
--- a/dbms/src/TableFunctions/TableFunctionNumbers.cpp
+++ b/dbms/src/TableFunctions/TableFunctionNumbers.cpp
@@ -32,7 +32,7 @@ StoragePtr TableFunctionNumbers::execute(const ASTPtr & ast_function, const Cont
         throw Exception("Table function 'numbers' requires exactly one argument: amount of numbers.",
             ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
-    args[0] = evaluateConstantExpressionOrIdentidierAsLiteral(args[0], context);
+    args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args[0], context);
 
     UInt64 limit = static_cast<const ASTLiteral &>(*args[0]).value.safeGet<UInt64>();
 
diff --git a/dbms/src/TableFunctions/TableFunctionRemote.cpp b/dbms/src/TableFunctions/TableFunctionRemote.cpp
index 9700ceb6fc2..d1240c93832 100644
--- a/dbms/src/TableFunctions/TableFunctionRemote.cpp
+++ b/dbms/src/TableFunctions/TableFunctionRemote.cpp
@@ -219,7 +219,7 @@ StoragePtr TableFunctionRemote::execute(const ASTPtr & ast_function, const Conte
     description = getStringLiteral(*args[arg_num], "Hosts pattern");
     ++arg_num;
 
-    args[arg_num] = evaluateConstantExpressionOrIdentidierAsLiteral(args[arg_num], context);
+    args[arg_num] = evaluateConstantExpressionOrIdentifierAsLiteral(args[arg_num], context);
     remote_database = static_cast<const ASTLiteral &>(*args[arg_num]).value.safeGet<String>();
     ++arg_num;
 
@@ -235,7 +235,7 @@ StoragePtr TableFunctionRemote::execute(const ASTPtr & ast_function, const Conte
         if (arg_num >= args.size())
             throw Exception(err, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
-        args[arg_num] = evaluateConstantExpressionOrIdentidierAsLiteral(args[arg_num], context);
+        args[arg_num] = evaluateConstantExpressionOrIdentifierAsLiteral(args[arg_num], context);
         remote_table = static_cast<const ASTLiteral &>(*args[arg_num]).value.safeGet<String>();
         ++arg_num;
     }
diff --git a/dbms/src/TableFunctions/TableFunctionShardByHash.cpp b/dbms/src/TableFunctions/TableFunctionShardByHash.cpp
index 583990f4cd8..edcda344fe6 100644
--- a/dbms/src/TableFunctions/TableFunctionShardByHash.cpp
+++ b/dbms/src/TableFunctions/TableFunctionShardByHash.cpp
@@ -57,8 +57,8 @@ StoragePtr TableFunctionShardByHash::execute(const ASTPtr & ast_function, const
     cluster_name = getClusterName(*args[0]);
     key = getStringLiteral(*args[1], "Key to hash");
 
-    args[2] = evaluateConstantExpressionOrIdentidierAsLiteral(args[2], context);
-    args[3] = evaluateConstantExpressionOrIdentidierAsLiteral(args[3], context);
+    args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(args[2], context);
+    args[3] = evaluateConstantExpressionOrIdentifierAsLiteral(args[3], context);
 
     remote_database = static_cast<const ASTLiteral &>(*args[2]).value.safeGet<String>();
     remote_table = static_cast<const ASTLiteral &>(*args[3]).value.safeGet<String>();
diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py
index 9a64e2cb070..57f165d9099 100644
--- a/dbms/tests/integration/helpers/cluster.py
+++ b/dbms/tests/integration/helpers/cluster.py
@@ -12,6 +12,7 @@ from dicttoxml import dicttoxml
 import xml.dom.minidom
 
 import docker
+from docker.errors import ContainerError
 
 from .client import Client, CommandRequest
 
@@ -28,13 +29,15 @@ class ClickHouseCluster:
     these directories will contain logs, database files, docker-compose config, ClickHouse configs etc.
     """
 
-    def __init__(self, base_path, name=None, base_configs_dir=None, server_bin_path=None, client_bin_path=None):
+    def __init__(self, base_path, name=None, base_configs_dir=None, server_bin_path=None, client_bin_path=None,
+                 zookeeper_config_path=None):
         self.base_dir = p.dirname(base_path)
         self.name = name if name is not None else ''
 
         self.base_configs_dir = base_configs_dir or os.environ.get('CLICKHOUSE_TESTS_BASE_CONFIG_DIR', '/etc/clickhouse-server/')
         self.server_bin_path = server_bin_path or os.environ.get('CLICKHOUSE_TESTS_SERVER_BIN_PATH', '/usr/bin/clickhouse')
         self.client_bin_path = client_bin_path or os.environ.get('CLICKHOUSE_TESTS_CLIENT_BIN_PATH', '/usr/bin/clickhouse-client')
+        self.zookeeper_config_path = p.join(self.base_dir, zookeeper_config_path) if zookeeper_config_path else p.join(HELPERS_DIR, 'zookeeper_config.xml')
 
         self.project_name = pwd.getpwuid(os.getuid()).pw_name + p.basename(self.base_dir) + self.name
         # docker-compose removes everything non-alphanumeric from project names so we do it too.
@@ -42,6 +45,8 @@ class ClickHouseCluster:
         self.instances_dir = p.join(self.base_dir, '_instances' + ('' if not self.name else '_' + self.name))
 
         self.base_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', self.project_name]
+        self.base_zookeeper_cmd = None
+        self.pre_zookkeeper_commands = []
         self.instances = {}
         self.with_zookeeper = False
 
@@ -68,13 +73,15 @@ class ClickHouseCluster:
 
         instance = ClickHouseInstance(
             self, self.base_dir, name, config_dir, main_configs, user_configs, macroses, with_zookeeper,
-            self.base_configs_dir, self.server_bin_path, clickhouse_path_dir, hostname=hostname)
+            self.zookeeper_config_path, self.base_configs_dir, self.server_bin_path, clickhouse_path_dir, hostname=hostname)
 
         self.instances[name] = instance
         self.base_cmd.extend(['--file', instance.docker_compose_path])
         if with_zookeeper and not self.with_zookeeper:
             self.with_zookeeper = True
             self.base_cmd.extend(['--file', p.join(HELPERS_DIR, 'docker_compose_zookeeper.yml')])
+            self.base_zookeeper_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
+                                       self.project_name, '--file', p.join(HELPERS_DIR, 'docker_compose_zookeeper.yml')]
 
         return instance
 
@@ -102,10 +109,15 @@ class ClickHouseCluster:
         for instance in self.instances.values():
             instance.create_dir(destroy_dir=destroy_dirs)
 
-        subprocess.check_call(self.base_cmd + ['up', '-d'])
-
         self.docker_client = docker.from_env()
 
+        if self.with_zookeeper and self.base_zookeeper_cmd:
+            subprocess.check_call(self.base_zookeeper_cmd + ['up', '-d', '--no-recreate'])
+            for command in self.pre_zookkeeper_commands:
+                self.run_zookeeper_client_command(command, repeats=5)
+
+        subprocess.check_call(self.base_cmd + ['up', '-d', '--no-recreate'])
+
         start_deadline = time.time() + 20.0 # seconds
         for instance in self.instances.itervalues():
             instance.docker_client = self.docker_client
@@ -123,7 +135,7 @@ class ClickHouseCluster:
     def shutdown(self, kill=True):
         if kill:
             subprocess.check_call(self.base_cmd + ['kill'])
-        subprocess.check_call(self.base_cmd + ['down', '--volumes'])
+        subprocess.check_call(self.base_cmd + ['down', '--volumes', '--remove-orphans'])
         self.is_up = False
 
         self.docker_client = None
@@ -134,6 +146,22 @@ class ClickHouseCluster:
             instance.client = None
 
 
+    def run_zookeeper_client_command(self, command, zoo_node = 'zoo1', repeats=1, sleep_for=1):
+        cli_cmd = 'zkCli.sh  ' + command
+        zoo_name = self.get_instance_docker_id(zoo_node)
+        network_mode = 'container:' + zoo_name
+        for i in range(0, repeats - 1):
+            try:
+                return self.docker_client.containers.run('zookeeper', cli_cmd, remove=True, network_mode=network_mode)
+            except ContainerError:
+                time.sleep(sleep_for)
+
+        return self.docker_client.containers.run('zookeeper', cli_cmd, remove=True, network_mode=network_mode)
+
+    def add_zookeeper_startup_command(self, command):
+        self.pre_zookkeeper_commands.append(command)
+
+
 DOCKER_COMPOSE_TEMPLATE = '''
 version: '2'
 services:
@@ -157,7 +185,7 @@ services:
 class ClickHouseInstance:
     def __init__(
             self, cluster, base_path, name, custom_config_dir, custom_main_configs, custom_user_configs, macroses,
-            with_zookeeper, base_configs_dir, server_bin_path, clickhouse_path_dir, hostname=None):
+            with_zookeeper, zookeeper_config_path, base_configs_dir, server_bin_path, clickhouse_path_dir, hostname=None):
 
         self.name = name
         self.base_cmd = cluster.base_cmd[:]
@@ -171,6 +199,7 @@ class ClickHouseInstance:
         self.clickhouse_path_dir = p.abspath(p.join(base_path, clickhouse_path_dir)) if clickhouse_path_dir else None
         self.macroses = macroses if macroses is not None else {}
         self.with_zookeeper = with_zookeeper
+        self.zookeeper_config_path = zookeeper_config_path
 
         self.base_configs_dir = base_configs_dir
         self.server_bin_path = server_bin_path
@@ -266,14 +295,14 @@ class ClickHouseInstance:
 
         os.makedirs(self.path)
 
-        configs_dir = p.join(self.path, 'configs')
+        configs_dir = p.abspath(p.join(self.path, 'configs'))
         os.mkdir(configs_dir)
 
         shutil.copy(p.join(self.base_configs_dir, 'config.xml'), configs_dir)
         shutil.copy(p.join(self.base_configs_dir, 'users.xml'), configs_dir)
 
-        config_d_dir = p.join(configs_dir, 'config.d')
-        users_d_dir = p.join(configs_dir, 'users.d')
+        config_d_dir = p.abspath(p.join(configs_dir, 'config.d'))
+        users_d_dir = p.abspath(p.join(configs_dir, 'users.d'))
         os.mkdir(config_d_dir)
         os.mkdir(users_d_dir)
 
@@ -287,7 +316,7 @@ class ClickHouseInstance:
 
         # Put ZooKeeper config
         if self.with_zookeeper:
-            shutil.copy(p.join(HELPERS_DIR, 'zookeeper_config.xml'), config_d_dir)
+            shutil.copy(self.zookeeper_config_path, config_d_dir)
 
         # Copy config dir
         if self.custom_config_dir:
@@ -301,12 +330,12 @@ class ClickHouseInstance:
         for path in self.custom_user_config_paths:
             shutil.copy(path, users_d_dir)
 
-        db_dir = p.join(self.path, 'database')
+        db_dir = p.abspath(p.join(self.path, 'database'))
         os.mkdir(db_dir)
         if self.clickhouse_path_dir is not None:
             distutils.dir_util.copy_tree(self.clickhouse_path_dir, db_dir)
 
-        logs_dir = p.join(self.path, 'logs')
+        logs_dir = p.abspath(p.join(self.path, 'logs'))
         os.mkdir(logs_dir)
 
         depends_on = '[]'
diff --git a/dbms/tests/integration/test_distributed_ddl/test.py b/dbms/tests/integration/test_distributed_ddl/test.py
old mode 100644
new mode 100755
index 32363debfa6..0b529bb0a44
--- a/dbms/tests/integration/test_distributed_ddl/test.py
+++ b/dbms/tests/integration/test_distributed_ddl/test.py
@@ -1,8 +1,12 @@
+import os
 import os.path as p
+import sys
 import time
 import datetime
 import pytest
+from contextlib import contextmanager
 
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from helpers.cluster import ClickHouseCluster
 from helpers.network import PartitionManager, PartitionManagerDisbaler
 from helpers.test_tools import TSV
@@ -295,3 +299,10 @@ ENGINE = Distributed(cluster_without_replication, default, merge, i)
     ddl_check_query(instance, "DROP TABLE merge ON CLUSTER cluster_without_replication")
     ddl_check_query(instance, "DROP TABLE all_merge_32 ON CLUSTER cluster_without_replication")
     ddl_check_query(instance, "DROP TABLE all_merge_64 ON CLUSTER cluster_without_replication")
+
+
+if __name__ == '__main__':
+    with contextmanager(started_cluster)() as cluster:
+       for name, instance in cluster.instances.items():
+           print name, instance.ip_address
+       raw_input("Cluster created, press any key to destroy...")
diff --git a/dbms/tests/integration/test_insert_into_distributed/configs/remote_servers.xml b/dbms/tests/integration/test_insert_into_distributed/configs/remote_servers.xml
index 54a7fd95a79..84b98cc0223 100644
--- a/dbms/tests/integration/test_insert_into_distributed/configs/remote_servers.xml
+++ b/dbms/tests/integration/test_insert_into_distributed/configs/remote_servers.xml
@@ -8,5 +8,13 @@
                 </replica>
             </shard>
         </test_cluster>
+        <test_local_cluster>
+            <shard>
+                <replica>
+                    <host>localhost</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </test_local_cluster>
     </remote_servers>
 </yandex>
diff --git a/dbms/tests/integration/test_insert_into_distributed/test.py b/dbms/tests/integration/test_insert_into_distributed/test.py
index 69c7b62f9ee..414ac2babc8 100644
--- a/dbms/tests/integration/test_insert_into_distributed/test.py
+++ b/dbms/tests/integration/test_insert_into_distributed/test.py
@@ -14,6 +14,11 @@ instance_test_inserts_batching = cluster.add_instance(
     main_configs=['configs/remote_servers.xml'], user_configs=['configs/enable_distributed_inserts_batching.xml'])
 remote = cluster.add_instance('remote', user_configs=['configs/forbid_background_merges.xml'])
 
+instance_test_inserts_local_cluster = cluster.add_instance(
+    'instance_test_inserts_local_cluster',
+    main_configs=['configs/remote_servers.xml'])
+
+
 @pytest.fixture(scope="module")
 def started_cluster():
     try:
@@ -27,6 +32,11 @@ CREATE TABLE distributed (x UInt32) ENGINE = Distributed('test_cluster', 'defaul
         remote.query("CREATE TABLE local2 (d Date, x UInt32, s String) ENGINE = MergeTree(d, x, 8192)")
         instance_test_inserts_batching.query('''
 CREATE TABLE distributed (d Date, x UInt32) ENGINE = Distributed('test_cluster', 'default', 'local2')
+''')
+
+        instance_test_inserts_local_cluster.query("CREATE TABLE local (d Date, x UInt32) ENGINE = MergeTree(d, x, 8192)")
+        instance_test_inserts_local_cluster.query('''
+CREATE TABLE distributed_on_local (d Date, x UInt32) ENGINE = Distributed('test_local_cluster', 'default', 'local')
 ''')
 
         yield cluster
@@ -105,3 +115,10 @@ def test_inserts_batching(started_cluster):
 20000101_20000101_5_5_0	[6,9]
 '''
     assert TSV(result) == TSV(expected)
+
+
+def test_inserts_local(started_cluster):
+    instance = instance_test_inserts_local_cluster
+    instance.query("INSERT INTO distributed_on_local VALUES ('2000-01-01', 1)")
+    time.sleep(0.5)
+    assert instance.query("SELECT count(*) FROM local").strip() == '1'
diff --git a/dbms/tests/integration/test_zookeeper_config/__init__.py b/dbms/tests/integration/test_zookeeper_config/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/dbms/tests/integration/test_zookeeper_config/configs/remote_servers.xml b/dbms/tests/integration/test_zookeeper_config/configs/remote_servers.xml
new file mode 100644
index 00000000000..01865e33a85
--- /dev/null
+++ b/dbms/tests/integration/test_zookeeper_config/configs/remote_servers.xml
@@ -0,0 +1,17 @@
+<yandex>
+    <remote_servers>
+        <test_cluster>
+            <shard>
+                <replica>
+                    <host>node1</host>
+                    <port>9000</port>
+                </replica>
+
+                <replica>
+                    <host>node2</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </test_cluster>
+    </remote_servers>
+</yandex>
diff --git a/dbms/tests/integration/test_zookeeper_config/configs/zookeeper_config_root_a.xml b/dbms/tests/integration/test_zookeeper_config/configs/zookeeper_config_root_a.xml
new file mode 100644
index 00000000000..606a1e87763
--- /dev/null
+++ b/dbms/tests/integration/test_zookeeper_config/configs/zookeeper_config_root_a.xml
@@ -0,0 +1,18 @@
+<yandex>
+    <zookeeper>
+        <node index="1">
+            <host>zoo1</host>
+            <port>2181</port>
+        </node>
+        <node index="2">
+            <host>zoo2</host>
+            <port>2181</port>
+        </node>
+        <node index="3">
+            <host>zoo3</host>
+            <port>2181</port>
+        </node>
+        <session_timeout_ms>3000</session_timeout_ms>
+        <root>/root_a</root>
+    </zookeeper>
+</yandex>
diff --git a/dbms/tests/integration/test_zookeeper_config/configs/zookeeper_config_root_b.xml b/dbms/tests/integration/test_zookeeper_config/configs/zookeeper_config_root_b.xml
new file mode 100644
index 00000000000..8c934604eee
--- /dev/null
+++ b/dbms/tests/integration/test_zookeeper_config/configs/zookeeper_config_root_b.xml
@@ -0,0 +1,18 @@
+<yandex>
+    <zookeeper>
+        <node index="1">
+            <host>zoo1</host>
+            <port>2181</port>
+        </node>
+        <node index="2">
+            <host>zoo2</host>
+            <port>2181</port>
+        </node>
+        <node index="3">
+            <host>zoo3</host>
+            <port>2181</port>
+        </node>
+        <session_timeout_ms>3000</session_timeout_ms>
+        <root>/root_b</root>
+    </zookeeper>
+</yandex>
diff --git a/dbms/tests/integration/test_zookeeper_config/configs/zookeeper_config_with_password.xml b/dbms/tests/integration/test_zookeeper_config/configs/zookeeper_config_with_password.xml
new file mode 100644
index 00000000000..327953062c7
--- /dev/null
+++ b/dbms/tests/integration/test_zookeeper_config/configs/zookeeper_config_with_password.xml
@@ -0,0 +1,18 @@
+<yandex>
+    <zookeeper>
+        <node index="1">
+            <host>zoo1</host>
+            <port>2181</port>
+        </node>
+        <node index="2">
+            <host>zoo2</host>
+            <port>2181</port>
+        </node>
+        <node index="3">
+            <host>zoo3</host>
+            <port>2181</port>
+        </node>
+        <session_timeout_ms>3000</session_timeout_ms>
+        <identity>user:password</identity>
+    </zookeeper>
+</yandex>
diff --git a/dbms/tests/integration/test_zookeeper_config/test.py b/dbms/tests/integration/test_zookeeper_config/test.py
new file mode 100644
index 00000000000..d3962b147ef
--- /dev/null
+++ b/dbms/tests/integration/test_zookeeper_config/test.py
@@ -0,0 +1,96 @@
+from helpers.cluster import ClickHouseCluster
+import pytest
+
+
+def test_chroot_with_same_root():
+
+    cluster_1 = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_config_root_a.xml')
+    cluster_2 = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_config_root_a.xml')
+
+    node1 = cluster_1.add_instance('node1', config_dir='configs', with_zookeeper=True)
+    node2 = cluster_2.add_instance('node2', config_dir='configs', with_zookeeper=True)
+    nodes = [node1, node2]
+
+    cluster_1.add_zookeeper_startup_command('create /root_a ""')
+    cluster_1.add_zookeeper_startup_command('ls / ')
+
+    try:
+        cluster_1.start()
+
+        try:
+            cluster_2.start(destroy_dirs=False)
+            for i, node in enumerate(nodes):
+                node.query('''
+                CREATE TABLE simple (date Date, id UInt32) 
+                ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', '{replica}', date, id, 8192);
+                '''.format(replica=node.name))
+                node.query("INSERT INTO simple VALUES ({0}, {0})".format(i))
+
+            assert node1.query('select count() from simple').strip() == '2'
+            assert node2.query('select count() from simple').strip() == '2'
+
+        finally:
+            cluster_2.shutdown()
+
+    finally:
+        cluster_1.shutdown()
+
+
+def test_chroot_with_different_root():
+
+    cluster_1 = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_config_root_a.xml')
+    cluster_2 = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_config_root_b.xml')
+
+    node1 = cluster_1.add_instance('node1', config_dir='configs', with_zookeeper=True)
+    node2 = cluster_2.add_instance('node2', config_dir='configs', with_zookeeper=True)
+    nodes = [node1, node2]
+
+    cluster_1.add_zookeeper_startup_command('create /root_a ""')
+    cluster_1.add_zookeeper_startup_command('create /root_b ""')
+    cluster_1.add_zookeeper_startup_command('ls / ')
+
+    try:
+        cluster_1.start()
+
+        try:
+            cluster_2.start(destroy_dirs=False)
+
+            for i, node in enumerate(nodes):
+                node.query('''
+                CREATE TABLE simple (date Date, id UInt32) 
+                ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', '{replica}', date, id, 8192);
+                '''.format(replica=node.name))
+                node.query("INSERT INTO simple VALUES ({0}, {0})".format(i))
+
+            assert node1.query('select count() from simple').strip() == '1'
+            assert node2.query('select count() from simple').strip() == '1'
+
+        finally:
+            cluster_2.shutdown()
+
+    finally:
+        cluster_1.shutdown()
+
+
+def test_identity():
+
+    cluster_1 = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_config_with_password.xml')
+    cluster_2 = ClickHouseCluster(__file__)
+
+    node1 = cluster_1.add_instance('node1', config_dir='configs', with_zookeeper=True)
+    node2 = cluster_2.add_instance('node2', config_dir='configs', with_zookeeper=True)
+
+    try:
+        cluster_1.start()
+
+        # node1.query('''
+        # CREATE TABLE simple (date Date, id UInt32)
+        # ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', '{replica}', date, id, 8192);
+        # '''.format(replica=node1.name))
+
+        with pytest.raises(Exception):
+            cluster_2.start(destroy_dirs=False)
+
+    finally:
+        cluster_1.shutdown()
+        cluster_2.shutdown()
diff --git a/dbms/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.reference b/dbms/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.reference
index 8fb767d89d5..37514bc429b 100644
--- a/dbms/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.reference
+++ b/dbms/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.reference
@@ -1,2 +1,6 @@
 2014-01-02	0	0	0000-00-00 00:00:00	2014-01-02 03:04:06
 1	2014-01-02 03:04:06
+0
+0
+0
+0
diff --git a/dbms/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql b/dbms/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql
index a3f475d2f25..772ac54d830 100644
--- a/dbms/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql
+++ b/dbms/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql
@@ -11,3 +11,28 @@ SELECT * FROM default.test_view;
 
 DROP TABLE default.test_table;
 DROP TABLE default.test_view;
+
+-- Check only sophisticated constructors and desctructors:
+
+USE test;
+DROP TABLE IF EXISTS tmp;
+DROP TABLE IF EXISTS tmp_mv;
+DROP TABLE IF EXISTS tmp_mv2;
+DROP TABLE IF EXISTS `.inner.tmp_mv`;
+DROP TABLE IF EXISTS `.inner.tmp_mv2`;
+
+CREATE TABLE tmp (date Date, name String) ENGINE = Memory;
+CREATE MATERIALIZED VIEW tmp_mv ENGINE = AggregatingMergeTree(date, (date, name), 8192) AS SELECT date, name, countState() AS cc FROM tmp GROUP BY date, name;
+CREATE TABLE tmp_mv2 AS tmp_mv;
+CREATE TABLE tmp_mv3 AS tmp_mv ENGINE = Memory;
+CREATE MATERIALIZED VIEW tmp_mv4 ENGINE = AggregatingMergeTree(date, date, 8192) POPULATE AS SELECT DISTINCT * FROM tmp_mv;
+
+DROP TABLE tmp_mv;
+DROP TABLE tmp_mv2;
+DROP TABLE tmp_mv3;
+DROP TABLE tmp_mv4;
+
+EXISTS TABLE `.inner.tmp_mv`;
+EXISTS TABLE `.inner.tmp_mv2`;
+EXISTS TABLE `.inner.tmp_mv3`;
+EXISTS TABLE `.inner.tmp_mv4`;
diff --git a/dbms/tests/queries/0_stateless/00180_attach_materialized_view.sql b/dbms/tests/queries/0_stateless/00180_attach_materialized_view.sql
index 81b162ee4dd..787cea6caf7 100644
--- a/dbms/tests/queries/0_stateless/00180_attach_materialized_view.sql
+++ b/dbms/tests/queries/0_stateless/00180_attach_materialized_view.sql
@@ -3,10 +3,10 @@ DROP TABLE IF EXISTS test.mv;
 DROP TABLE IF EXISTS test.`.inner.mv`;
 
 CREATE TABLE test.t (x UInt8) ENGINE = Null;
-CREATE MATERIALIZED VIEW test.mv AS SELECT * FROM test.t;
+CREATE MATERIALIZED VIEW test.mv ENGINE = Null AS SELECT * FROM test.t;
 
 DETACH TABLE test.mv;
-ATTACH MATERIALIZED VIEW test.mv AS SELECT * FROM test.t;
+ATTACH MATERIALIZED VIEW test.mv ENGINE = Null AS SELECT * FROM test.t;
 
 DROP TABLE test.t;
 DROP TABLE test.mv;
diff --git a/dbms/tests/queries/0_stateless/00289_buffer_test.reference b/dbms/tests/queries/0_stateless/00289_buffer_test.reference
deleted file mode 100644
index a6d67848e91..00000000000
--- a/dbms/tests/queries/0_stateless/00289_buffer_test.reference
+++ /dev/null
@@ -1,2 +0,0 @@
-20000 1 20000 200010000 20000
-20000 1 20000 200010000 20000
diff --git a/dbms/tests/queries/0_stateless/00289_buffer_test.sh b/dbms/tests/queries/0_stateless/00289_buffer_test.sh
deleted file mode 100755
index 3cf5580ee85..00000000000
--- a/dbms/tests/queries/0_stateless/00289_buffer_test.sh
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/usr/bin/env bash
-
-clickhouse-client -n --query="
-    DROP TABLE IF EXISTS test.dst;
-    DROP TABLE IF EXISTS test.buffer;
-
-    CREATE TABLE test.dst (x UInt64, d Date DEFAULT today()) ENGINE = MergeTree(d, x, 8192);
-    CREATE TABLE test.buffer (x UInt64, d Date DEFAULT today()) ENGINE = Buffer(test, dst, 16, 1, 10, 100, 1000, 10000, 100000);
-    ";
-
-seq    1 1000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n &
-seq 1001 2000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n &
-seq 2001 3000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n &
-seq 3001 4000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n &
-seq 4001 5000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n &
-seq 5001 6000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n &
-seq 6001 7000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n &
-seq 7001 8000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n &
-seq 8001 9000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n &
-seq 9001 10000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n &
-seq 10001 11000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n &
-seq 11001 12000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n &
-seq 12001 13000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n &
-seq 13001 14000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n &
-seq 14001 15000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n &
-seq 15001 16000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n &
-seq 16001 17000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n &
-seq 17001 18000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n &
-seq 18001 19000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n &
-seq 19001 20000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n &
-
-wait
-
-# To avoid race conditions (see comments in StorageBuffer.cpp)
-function retry()
-{
-    RES=$(clickhouse-client --query="$1")
-    if [[ $RES != "20000    1    20000    200010000    20000" ]]; then
-        sleep 10;
-        RES=$(clickhouse-client --query="$1");
-    fi;
-
-    echo $RES;
-}
-
-retry "SELECT count(), min(x), max(x), sum(x), uniqExact(x) FROM test.buffer;";
-clickhouse-client --query="OPTIMIZE TABLE test.buffer;";
-retry "SELECT count(), min(x), max(x), sum(x), uniqExact(x) FROM test.dst;";
-
-clickhouse-client -n --query="
-    DROP TABLE test.dst;
-    DROP TABLE test.buffer;
-    ";
diff --git a/dbms/tests/queries/0_stateless/00488_non_ascii_column_names.reference b/dbms/tests/queries/0_stateless/00488_non_ascii_column_names.reference
new file mode 100644
index 00000000000..77bcc007aa3
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00488_non_ascii_column_names.reference
@@ -0,0 +1,2 @@
+hello
+hello	world
diff --git a/dbms/tests/queries/0_stateless/00494_alias_substitution_bug.reference b/dbms/tests/queries/0_stateless/00494_alias_substitution_bug.reference
new file mode 100644
index 00000000000..6ed281c757a
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00494_alias_substitution_bug.reference
@@ -0,0 +1,2 @@
+1
+1
diff --git a/dbms/tests/queries/0_stateless/00494_alias_substitution_bug.sql b/dbms/tests/queries/0_stateless/00494_alias_substitution_bug.sql
new file mode 100644
index 00000000000..81c4b1a39a4
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00494_alias_substitution_bug.sql
@@ -0,0 +1,5 @@
+DROP TABLE IF EXISTS test.nested;
+CREATE TABLE test.nested (n Nested(x UInt8)) ENGINE = Memory;
+INSERT INTO test.nested VALUES ([1, 2]);
+SELECT 1 AS x FROM remote('127.0.0.1', test.nested) ARRAY JOIN n.x;
+DROP TABLE test.nested;
diff --git a/dbms/tests/queries/0_stateless/00495_reading_const_zero_column.reference b/dbms/tests/queries/0_stateless/00495_reading_const_zero_column.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/dbms/tests/queries/0_stateless/00495_reading_const_zero_column.sql b/dbms/tests/queries/0_stateless/00495_reading_const_zero_column.sql
new file mode 100644
index 00000000000..ad94b9c06b3
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00495_reading_const_zero_column.sql
@@ -0,0 +1,5 @@
+drop table if exists one_table;
+create table one_table (date Date, one UInt64) engine = MergeTree(date, (date, one), 8192);
+insert into one_table select today(), toUInt64(1) from system.numbers limit 100000;
+SET preferred_block_size_bytes = 8192;
+select isNull(one) from one_table where isNull(one);
diff --git a/dbms/tests/queries/0_stateless/00496_substring_negative_offset.reference b/dbms/tests/queries/0_stateless/00496_substring_negative_offset.reference
new file mode 100644
index 00000000000..b592f370dea
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00496_substring_negative_offset.reference
@@ -0,0 +1,40 @@
+abc
+abc
+abc
+bc
+c
+
+abc
+bc
+c
+
+abc
+abc
+abc
+bc
+c
+
+abc
+bc
+c
+
+abc
+abc
+abc
+bc
+c
+
+abc
+bc
+c
+
+abc
+abc
+abc
+bc
+c
+
+abc
+bc
+c
+
diff --git a/dbms/tests/queries/0_stateless/00496_substring_negative_offset.sql b/dbms/tests/queries/0_stateless/00496_substring_negative_offset.sql
new file mode 100644
index 00000000000..170af8f79b4
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00496_substring_negative_offset.sql
@@ -0,0 +1,4 @@
+SELECT substring('abc', number - 5) FROM system.numbers LIMIT 10;
+SELECT substring(materialize('abc'), number - 5) FROM system.numbers LIMIT 10;
+SELECT substring(toFixedString('abc', 3), number - 5) FROM system.numbers LIMIT 10;
+SELECT substring(materialize(toFixedString('abc', 3)), number - 5) FROM system.numbers LIMIT 10;
diff --git a/dbms/tests/queries/0_stateless/00497_whitespaces_in_insert.reference b/dbms/tests/queries/0_stateless/00497_whitespaces_in_insert.reference
new file mode 100644
index 00000000000..9055e59470d
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00497_whitespaces_in_insert.reference
@@ -0,0 +1,9 @@
+59
+59
+32
+59
+32
+59
+
+;
+;
diff --git a/dbms/tests/queries/0_stateless/00497_whitespaces_in_insert.sh b/dbms/tests/queries/0_stateless/00497_whitespaces_in_insert.sh
new file mode 100755
index 00000000000..09d1aa3de88
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00497_whitespaces_in_insert.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+clickhouse-client -q "DROP TABLE IF EXISTS test.ws";
+clickhouse-client -q "CREATE TABLE test.ws (i UInt8) ENGINE = Memory";
+
+clickhouse-client -q "INSERT INTO test.ws FORMAT RowBinary ;";
+clickhouse-client -q "INSERT INTO test.ws FORMAT RowBinary 	; ";
+clickhouse-client -q "INSERT INTO test.ws FORMAT RowBinary
+; ";
+echo -n ";" | clickhouse-client -q "INSERT INTO test.ws FORMAT RowBinary";
+
+clickhouse-client --max_threads=1 -q "SELECT * FROM test.ws";
+clickhouse-client -q "DROP TABLE test.ws";
+
+
+clickhouse-client -q "SELECT ''";
+
+
+clickhouse-client -q "CREATE TABLE test.ws (s String) ENGINE = Memory";
+clickhouse-client -q "INSERT INTO test.ws FORMAT TSV	;
+";
+echo ";" | clickhouse-client -q "INSERT INTO test.ws FORMAT TSV"
+if clickhouse-client -q "INSERT INTO test.ws FORMAT TSV;" 1>/dev/null 2>/dev/null; then
+    echo ERROR;
+fi
+clickhouse-client --max_threads=1 -q "SELECT * FROM test.ws";
+
+clickhouse-client -q "DROP TABLE test.ws";
diff --git a/dbms/tests/queries/0_stateless/00498_bitwise_aggregate_functions.reference b/dbms/tests/queries/0_stateless/00498_bitwise_aggregate_functions.reference
new file mode 100644
index 00000000000..fb5cfb57d39
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00498_bitwise_aggregate_functions.reference
@@ -0,0 +1,4 @@
+0	[0,4,8,12,16]	28	0	16
+1	[1,5,9,13,17]	29	1	17
+2	[2,6,10,14,18]	30	2	18
+3	[3,7,11,15,19]	31	3	19
diff --git a/dbms/tests/queries/0_stateless/00498_bitwise_aggregate_functions.sql b/dbms/tests/queries/0_stateless/00498_bitwise_aggregate_functions.sql
new file mode 100644
index 00000000000..07cd0b3630f
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00498_bitwise_aggregate_functions.sql
@@ -0,0 +1 @@
+SELECT number % 4 AS k, groupArray(number), groupBitOr(number), groupBitAnd(number), groupBitXor(number) FROM (SELECT * FROM system.numbers LIMIT 20) GROUP BY k ORDER BY k;
diff --git a/debian/.pbuilderrc b/debian/.pbuilderrc
new file mode 100644
index 00000000000..d640d54871e
--- /dev/null
+++ b/debian/.pbuilderrc
@@ -0,0 +1,137 @@
+# ubuntu:
+# prepare:
+# ln -s gutsy /usr/share/debootstrap/scripts/artful
+# echo "3.0 (native)" > debian/source/format
+# build ubuntu:
+# sudo DIST=trusty pbuilder create --configfile debian/.pbuilderrc && DIST=trusty pdebuild --configfile debian/.pbuilderrc
+# sudo DIST=zesty  pbuilder create --configfile debian/.pbuilderrc && DIST=zesty  pdebuild --configfile debian/.pbuilderrc
+# sudo DIST=artful pbuilder create --configfile debian/.pbuilderrc && DIST=artful pdebuild --configfile debian/.pbuilderrc
+# build debian:
+# sudo DIST=experimental pbuilder create --configfile debian/.pbuilderrc && DIST=experimental pdebuild --configfile debian/.pbuilderrc
+# sudo DIST=testing      pbuilder create --configfile debian/.pbuilderrc && DIST=testing      pdebuild --configfile debian/.pbuilderrc
+# sudo DIST=stable       pbuilder create --configfile debian/.pbuilderrc && DIST=stable       pdebuild --configfile debian/.pbuilderrc
+# BROKEN:
+# sudo DIST=xenial       pbuilder create --configfile debian/.pbuilderrc && DIST=xenial       pdebuild --configfile debian/.pbuilderrc
+# sudo DIST=unstable     pbuilder create --configfile debian/.pbuilderrc && DIST=unstable     pdebuild --configfile debian/.pbuilderrc
+# TODO:
+# sudo DIST=zesty        ARCH=i386  pbuilder create --configfile debian/.pbuilderrc && DIST=zesty        ARCH=i386  pdebuild --configfile debian/.pbuilderrc
+
+# from https://wiki.debian.org/PbuilderTricks :
+
+# Codenames for Debian suites according to their alias. Update these when
+# needed.
+UNSTABLE_CODENAME="sid"
+TESTING_CODENAME="buster"
+STABLE_CODENAME="stretch"
+STABLE_BACKPORTS_SUITE="$STABLE_CODENAME-backports"
+
+# List of Debian suites.
+DEBIAN_SUITES=($UNSTABLE_CODENAME $TESTING_CODENAME $STABLE_CODENAME $STABLE_BACKPORTS_SUITE
+    "experimental" "unstable" "testing" "stable")
+
+# List of Ubuntu suites. Update these when needed.
+UBUNTU_SUITES=("artful" "zesty" "xenial" "trusty")
+
+# Mirrors to use. Update these to your preferred mirror.
+DEBIAN_MIRROR="deb.debian.org"
+#UBUNTU_MIRROR="mirrors.kernel.org"
+
+UBUNTU_MIRROR="mirror.yandex.ru"
+#DEBIAN_MIRROR="mirror.yandex.ru"
+
+# Optionally use the changelog of a package to determine the suite to use if
+# none set.
+if [ -z "${DIST}" ] && [ -r "debian/changelog" ]; then
+    DIST=$(dpkg-parsechangelog --show-field=Distribution)
+fi
+
+# Optionally set a default distribution if none is used. Note that you can set
+# your own default (i.e. ${DIST:="unstable"}).
+: ${DIST:="$(lsb_release --short --codename)"}
+
+# Optionally change Debian codenames in $DIST to their aliases.
+case "$DIST" in
+    $UNSTABLE_CODENAME)
+        DIST="unstable"
+        ;;
+    $TESTING_CODENAME)
+        DIST="testing"
+        ;;
+    $STABLE_CODENAME)
+        DIST="stable"
+        ;;
+esac
+
+# Optionally set the architecture to the host architecture if none set. Note
+# that you can set your own default (i.e. ${ARCH:="i386"}).
+: ${ARCH:="$(dpkg --print-architecture)"}
+
+NAME="$DIST"
+if [ -n "${ARCH}" ]; then
+    NAME="$NAME-$ARCH"
+    DEBOOTSTRAPOPTS=("--arch" "$ARCH" "${DEBOOTSTRAPOPTS[@]}")
+fi
+
+BASETGZ="/var/cache/pbuilder/$NAME-base.tgz"
+DISTRIBUTION="$DIST"
+BUILDRESULT="/var/cache/pbuilder/$NAME/result/"
+APTCACHE="/var/cache/pbuilder/$NAME/aptcache/"
+BUILDPLACE="/var/cache/pbuilder/build/"
+
+if $(echo ${DEBIAN_SUITES[@]} | grep -q $DIST); then
+    # Debian configuration
+    OSNAME=debian
+    MIRRORSITE="http://$DEBIAN_MIRROR/$OSNAME/"
+    COMPONENTS="main contrib non-free"
+    if $(echo "$STABLE_CODENAME stable" | grep -q $DIST); then
+        OTHERMIRROR="$OTHERMIRROR | deb $MIRRORSITE $STABLE_BACKPORTS_SUITE $COMPONENTS"
+    fi
+    # APTKEYRINGS=/usr/share/keyrings/debian-archive-keyring.gpg
+    # sudo apt install debian-archive-keyring
+    DEBOOTSTRAPOPTS+=( '--keyring' '/usr/share/keyrings/debian-archive-keyring.gpg' )
+elif $(echo ${UBUNTU_SUITES[@]} | grep -q $DIST); then
+    # Ubuntu configuration
+    OSNAME=ubuntu
+    MIRRORSITE="http://$UBUNTU_MIRROR/$OSNAME/"
+    COMPONENTS="main restricted universe multiverse"
+
+    case "$DIST" in
+       "trusty" | "xenial" )
+           OTHERMIRROR+="deb http://ppa.launchpad.net/ubuntu-toolchain-r/test/$OSNAME $DIST main"
+           ALLOWUNTRUSTED=yes
+           ;;
+    esac
+
+    # deb http://apt.llvm.org/zesty/ llvm-toolchain-zesty-5.0 main
+else
+    echo "Unknown distribution: $DIST"
+    exit 1
+fi
+
+echo "using $NAME $OSNAME $DIST $ARCH $LOGNAME"
+
+case "$DIST" in
+    "trusty")
+        # ccache broken
+        ;;
+    *)
+        CCACHEDIR=/var/cache/pbuilder/ccache
+        ;;
+esac
+
+case "$DIST" in
+    "trusty" | "artful" | "experimental" | "unstable" )
+        export DEB_CC=gcc-7
+        export DEB_CXX=g++-7
+        ;;
+esac
+
+export CCACHE_PREFIX=
+
+export DEB_BUILD_OPTIONS=parallel=`nproc`
+
+# Floating bug with permissions:
+sudo mkdir -p /var/cache/pbuilder/ccache
+sudo chmod -R a+rwx /var/cache/pbuilder/ccache
+
+# echo "DEBOOTSTRAPOPTS = ${DEBOOTSTRAPOPTS[@]}"
diff --git a/debian/changelog b/debian/changelog
new file mode 100644
index 00000000000..8f6dc3b2b4a
--- /dev/null
+++ b/debian/changelog
@@ -0,0 +1,5 @@
+clickhouse (1.1.54281) unstable; urgency=low
+
+  * Modified source code
+
+ --  <robot-metrika-test@yandex-team.ru>  Wed, 23 Aug 2017 15:09:15 +0300
diff --git a/debian/control b/debian/control
index a91d321d09d..6eb44d4bda4 100644
--- a/debian/control
+++ b/debian/control
@@ -3,8 +3,8 @@ Priority: optional
 Maintainer: Alexey Milovidov <milovidov@yandex-team.ru>
 Build-Depends: debhelper (>= 9),
                cmake,
-               gcc-6, g++-6,
-               default-libmysqlclient-dev | libmysqlclient-dev,
+               gcc-7 | gcc-6, g++-7 | g++-6,
+               libmariadbclient-dev | default-libmysqlclient-dev | libmysqlclient-dev,
                libicu-dev,
                libltdl-dev,
                libreadline-dev,
diff --git a/debian/rules b/debian/rules
index ae7875f6b83..c74140a2c51 100755
--- a/debian/rules
+++ b/debian/rules
@@ -19,22 +19,28 @@ DEB_HOST_MULTIARCH ?= $(shell dpkg-architecture -qDEB_HOST_MULTIARCH)
 #endif
 #DEB_BUILD_OPTIONS+=parallel=$(THREADS_COUNT)
 
-DEB_CC ?= gcc-6
-DEB_CXX ?= g++-6
-DEB_CLANG ?= $(shell which clang-6.0 || which clang-5.0 || which clang-4.0 || which clang || which clang-3.9 || which clang-3.8)
-# CMAKE_FLAGS_ADD += -DINTERNAL_COMPILER_EXECUTABLE=$(basename $(DEB_CLANG)) # TODO: this is actual only if you will also change clang name in copy_clang_binaries.sh
+CMAKE_FLAGS += -DENABLE_TESTS=0
 
-DEB_BUILD_GNU_TYPE := $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE)
-DEB_HOST_GNU_TYPE  := $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE)
+DEB_CLANG ?= $(shell which clang-6.0 || which clang-5.0 || which clang-4.0 || which clang || which clang-3.9 || which clang-3.8)
+# CMAKE_FLAGS += -DINTERNAL_COMPILER_EXECUTABLE=$(basename $(DEB_CLANG)) # TODO: this is actual only if you will also change clang name in copy_clang_binaries.sh
+
+#DEB_CC ?= gcc-6
+#DEB_CXX ?= g++-6
+
+ifdef DEB_CXX
+    DEB_BUILD_GNU_TYPE := $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE)
+    DEB_HOST_GNU_TYPE  := $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE)
 ifeq ($(DEB_BUILD_GNU_TYPE),$(DEB_HOST_GNU_TYPE))
-CC := $(DEB_CC)
-CXX := $(DEB_CXX)
+        CC := $(DEB_CC)
+        CXX := $(DEB_CXX)
 else
-CC := $(DEB_HOST_GNU_TYPE)-$(DEB_CC)
-CXX := $(DEB_HOST_GNU_TYPE)-$(DEB_CXX)
+        CC := $(DEB_HOST_GNU_TYPE)-$(DEB_CC)
+        CXX := $(DEB_HOST_GNU_TYPE)-$(DEB_CXX)
+endif
 endif
 
-CMAKE_FLAGS ?= -DCMAKE_CXX_COMPILER=`which $(CXX)` -DCMAKE_C_COMPILER=`which $(CC)` -DENABLE_TESTS=0 $(CMAKE_FLAGS_ADD)
+CMAKE_FLAGS += -DCMAKE_CXX_COMPILER=`which $(CXX)` -DCMAKE_C_COMPILER=`which $(CC)`
+
 ifndef DH_VERBOSE
     CMAKE_FLAGS += -DCMAKE_VERBOSE_MAKEFILE=0
 endif
@@ -76,9 +82,10 @@ override_dh_install:
 
 	# In case building clickhouse-server, adding to package binary of clang, ld and header files - for dynamic compilation.
 	mkdir -p $(DESTDIR)/usr/share/clickhouse/bin $(DESTDIR)/usr/share/clickhouse/headers
+ifeq ($(USE_INTERNAL_COMPILER),1)
 	CLANG=$(DEB_CLANG) debian/copy_clang_binaries.sh $(DESTDIR)/usr/share/clickhouse/bin/
 	CLANG=$(DEB_CLANG) ./copy_headers.sh . $(DESTDIR)/usr/share/clickhouse/headers
-
+endif
 
 	# fake metrika files when private dir is empty
 	mkdir -p debian/tmp/etc/clickhouse-server/metrika
diff --git a/docs/en/table_engines/replication.rst b/docs/en/table_engines/replication.rst
index 44b86bc1bb1..a3d543f7059 100644
--- a/docs/en/table_engines/replication.rst
+++ b/docs/en/table_engines/replication.rst
@@ -159,7 +159,7 @@ If the data differs on various replicas, first sync it, or delete this data on a
 
 Rename the existing MergeTree table, then create a ReplicatedMergeTree table with the old name.
 Move the data from the old table to the 'detached' subdirectory inside the directory with the new table data (``/var/lib/clickhouse/data/db_name/table_name/``).
-Then run ALTER TABLE ATTACH PART on one of the replicas to add these data parts to the working set.
+Then run ALTER TABLE ATTACH PARTITION on one of the replicas to add these data parts to the working set.
 
 If exactly the same parts exist on the other replicas, they are added to the working set on them. If not, the parts are downloaded from the replica that has them.
 
diff --git a/docs/ru/table_engines/replication.rst b/docs/ru/table_engines/replication.rst
index 83f97928dea..24bc1b619d1 100644
--- a/docs/ru/table_engines/replication.rst
+++ b/docs/ru/table_engines/replication.rst
@@ -163,7 +163,7 @@ ReplicatedSummingMergeTree
 
 Переименуйте имеющуюся MergeTree таблицу, затем создайте со старым именем таблицу типа ReplicatedMergeTree.
 Перенесите данные из старой таблицы в поддиректорию detached в директории с данными новой таблицы (``/var/lib/clickhouse/data/db_name/table_name/``).
-Затем добавьте эти куски данных в рабочий набор с помощью выполнения запросов ALTER TABLE ATTACH PART на одной из реплик.
+Затем добавьте эти куски данных в рабочий набор с помощью выполнения запросов ALTER TABLE ATTACH PARTITION на одной из реплик.
 
 Преобразование из ReplicatedMergeTree в MergeTree
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/libs/libdaemon/cmake/find_unwind.cmake b/libs/libdaemon/cmake/find_unwind.cmake
index 0c55715b60c..80b2c746c7c 100644
--- a/libs/libdaemon/cmake/find_unwind.cmake
+++ b/libs/libdaemon/cmake/find_unwind.cmake
@@ -1,7 +1,7 @@
 include (CMakePushCheckState)
 cmake_push_check_state ()
 
-if (CMAKE_SYSTEM MATCHES "Linux" AND NOT ARM)
+if (CMAKE_SYSTEM MATCHES "Linux" AND NOT ARCH_ARM)
     option (USE_INTERNAL_UNWIND_LIBRARY "Set to FALSE to use system unwind library instead of bundled" ${NOT_UNBUNDLED})
 else ()
     option (USE_INTERNAL_UNWIND_LIBRARY "Set to FALSE to use system unwind library instead of bundled" OFF)
@@ -34,7 +34,7 @@ endif ()
 
 if (UNWIND_LIBRARY AND UNWIND_INCLUDE_DIR)
     set (USE_UNWIND 1)
-elseif (CMAKE_SYSTEM MATCHES "Linux" AND NOT ARM)
+elseif (CMAKE_SYSTEM MATCHES "Linux" AND NOT ARCH_ARM)
     set (USE_INTERNAL_UNWIND_LIBRARY 1)
     set (UNWIND_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libunwind/include")
     set (UNWIND_LIBRARY unwind)
diff --git a/libs/libdaemon/include/daemon/BaseDaemon.h b/libs/libdaemon/include/daemon/BaseDaemon.h
index 33a6889d610..870990246e5 100644
--- a/libs/libdaemon/include/daemon/BaseDaemon.h
+++ b/libs/libdaemon/include/daemon/BaseDaemon.h
@@ -54,6 +54,8 @@ class BaseDaemon : public Poco::Util::ServerApplication
     friend class SignalListener;
 
 public:
+    static constexpr char DEFAULT_GRAPHITE_CONFIG_NAME[] = "graphite";
+
     BaseDaemon();
     ~BaseDaemon();
 
@@ -67,7 +69,7 @@ public:
     void buildLoggers();
 
     /// Определяет параметр командной строки
-    void defineOptions(Poco::Util::OptionSet& _options) override;
+    void defineOptions(Poco::Util::OptionSet & _options) override;
 
     /// Заставляет демон завершаться, если хотя бы одна задача завершилась неудачно
     void exitOnTaskError();
@@ -107,7 +109,7 @@ public:
     /// root_path по умолчанию one_min
     /// key - лучше группировать по смыслу. Например "meminfo.cached" или "meminfo.free", "meminfo.total"
     template <class T>
-    void writeToGraphite(const std::string & key, const T & value, const std::string & config_name = "graphite", time_t timestamp = 0, const std::string & custom_root_path = "")
+    void writeToGraphite(const std::string & key, const T & value, const std::string & config_name = DEFAULT_GRAPHITE_CONFIG_NAME, time_t timestamp = 0, const std::string & custom_root_path = "")
     {
         auto writer = getGraphiteWriter(config_name);
         if (writer)
@@ -115,14 +117,14 @@ public:
     }
 
     template <class T>
-    void writeToGraphite(const GraphiteWriter::KeyValueVector<T> & key_vals, const std::string & config_name = "graphite", time_t timestamp = 0, const std::string & custom_root_path = "")
+    void writeToGraphite(const GraphiteWriter::KeyValueVector<T> & key_vals, const std::string & config_name = DEFAULT_GRAPHITE_CONFIG_NAME, time_t timestamp = 0, const std::string & custom_root_path = "")
     {
         auto writer = getGraphiteWriter(config_name);
         if (writer)
             writer->write(key_vals, timestamp, custom_root_path);
     }
 
-    GraphiteWriter * getGraphiteWriter(const std::string & config_name = "graphite")
+    GraphiteWriter * getGraphiteWriter(const std::string & config_name = DEFAULT_GRAPHITE_CONFIG_NAME)
     {
         if (graphite_writers.count(config_name))
             return graphite_writers[config_name].get();
diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp
index eb2ec6bdab8..434545b89cc 100644
--- a/libs/libdaemon/src/BaseDaemon.cpp
+++ b/libs/libdaemon/src/BaseDaemon.cpp
@@ -29,6 +29,7 @@
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <iostream>
+#include <sstream>
 #include <memory>
 #include <Poco/Observer.h>
 #include <Poco/Logger.h>
@@ -72,6 +73,8 @@ using Poco::Message;
 using Poco::Util::AbstractConfiguration;
 
 
+constexpr char BaseDaemon::DEFAULT_GRAPHITE_CONFIG_NAME[];
+
 /** Для передачи информации из обработчика сигнала для обработки в другом потоке.
   * Если при получении сигнала надо делать что-нибудь серьёзное (например, вывести сообщение в лог),
   *  то передать нужную информацию через pipe в другой поток и сделать там всю работу
@@ -691,7 +694,7 @@ std::string BaseDaemon::getDefaultCorePath() const
     return "/opt/cores/";
 }
 
-void BaseDaemon::initialize(Application& self)
+void BaseDaemon::initialize(Application & self)
 {
     task_manager.reset(new Poco::TaskManager);
     ServerApplication::initialize(self);
@@ -740,6 +743,18 @@ void BaseDaemon::initialize(Application& self)
         tzset();
     }
 
+    /// This must be done before creation of any files (including logs).
+    if (config().has("umask"))
+    {
+        std::string umask_str = config().getString("umask");
+        mode_t umask_num = 0;
+        std::stringstream stream;
+        stream << umask_str;
+        stream >> std::oct >> umask_num;
+
+        umask(umask_num);
+    }
+
     std::string log_path = config().getString("logger.log", "");
     if (!log_path.empty())
         log_path = Poco::Path(log_path).setFileName("").toString();
diff --git a/libs/libmysqlxx/cmake/find_mysqlclient.cmake b/libs/libmysqlxx/cmake/find_mysqlclient.cmake
index ae77349e444..1e6d6decbb6 100644
--- a/libs/libmysqlxx/cmake/find_mysqlclient.cmake
+++ b/libs/libmysqlxx/cmake/find_mysqlclient.cmake
@@ -22,9 +22,9 @@ if (ENABLE_MYSQL)
     find_path (MYSQL_INCLUDE_DIR NAMES mysql/mysql.h PATHS ${MYSQL_INCLUDE_PATHS})
 
     if (USE_STATIC_LIBRARIES)
-        find_library (STATIC_MYSQLCLIENT_LIB libmysqlclient.a PATHS ${MYSQL_LIB_PATHS})
+        find_library (STATIC_MYSQLCLIENT_LIB mariadbclient mysqlclient PATHS ${MYSQL_LIB_PATHS})
     else ()
-        find_library (MYSQLCLIENT_LIB mysqlclient PATHS ${MYSQL_LIB_PATHS})
+        find_library (MYSQLCLIENT_LIB mariadbclient mysqlclient PATHS ${MYSQL_LIB_PATHS})
     endif ()
 
     if (MYSQL_INCLUDE_DIR AND (STATIC_MYSQLCLIENT_LIB OR MYSQLCLIENT_LIB))
diff --git a/libs/libmysqlxx/include/mysqlxx/Transaction.h b/libs/libmysqlxx/include/mysqlxx/Transaction.h
index 542cd8e12e7..2b3a26ef63a 100644
--- a/libs/libmysqlxx/include/mysqlxx/Transaction.h
+++ b/libs/libmysqlxx/include/mysqlxx/Transaction.h
@@ -22,8 +22,14 @@ public:
 
     virtual ~Transaction()
     {
-        if (!finished)
-            rollback();
+        try
+        {
+            if (!finished)
+                rollback();
+        }
+        catch (...)
+        {
+        }
     }
 
     void commit()
diff --git a/release b/release
index d85357fc185..255f973eb91 100755
--- a/release
+++ b/release
@@ -7,12 +7,16 @@ cd $CURDIR
 
 source "./release_lib.sh"
 
-CONTROL=debian/control
-CHLOG=debian/changelog
-CHDATE=$(LC_ALL=C date -R | sed -e 's/,/\\,/g') # Replace comma to '\,'
+DEB_CC=gcc-6
+DEB_CXX=g++-6
 
+CONTROL=debian/control
 DEBUILD_NOSIGN_OPTIONS="-us -uc"
 
+if [ -z "$REVISION" ] ; then
+    get_revision_author
+fi
+
 while [[ $1 == --* ]]
 do
     if [[ $1 == '--test' ]]; then
@@ -35,9 +39,6 @@ do
     fi
 done
 
-if [ -z "$REVISION" ] ; then
-    get_revision_author
-fi
 
 # Build options
 if [ -n "$SANITIZER" ]
@@ -60,12 +61,12 @@ if [ -z "$THREAD_COUNT" ] ; then
     THREAD_COUNT=`nproc || grep -c ^processor /proc/cpuinfo`
 fi
 
-CMAKE_FLAGS_ADD+=" $LIBTCMALLOC_OPTS -DCMAKE_BUILD_TYPE=$CMAKE_BUILD_TYPE"
+CMAKE_FLAGS+=" $LIBTCMALLOC_OPTS -DCMAKE_BUILD_TYPE=$CMAKE_BUILD_TYPE"
 
 REVISION+=$VERSION_POSTFIX
 echo -e "\nCurrent revision is $REVISION"
 
-gen_changelog "$REVISION" "$CHDATE" "$AUTHOR" "$CHLOG"
+gen_changelog "$REVISION" "" "$AUTHOR" ""
 
 # Build (only binary packages).
-debuild -e PATH -e SSH_AUTH_SOCK -e DEB_BUILD_OPTIONS=parallel=$THREAD_COUNT -e DEB_CC -e DEB_CXX -e DEB_CLANG -e CMAKE_FLAGS_ADD="$CMAKE_FLAGS_ADD" -b ${DEBUILD_NOSIGN_OPTIONS} ${DEBUILD_NODEPS_OPTIONS}
+debuild -e PATH -e SSH_AUTH_SOCK -e DEB_BUILD_OPTIONS=parallel=$THREAD_COUNT -e DEB_CC=$DEB_CC -e DEB_CXX=$DEB_CXX -e USE_INTERNAL_COMPILER=1 -e CMAKE_FLAGS="$CMAKE_FLAGS" -b ${DEBUILD_NOSIGN_OPTIONS} ${DEBUILD_NODEPS_OPTIONS}
diff --git a/release_lib.sh b/release_lib.sh
index abc392d9e1c..72c73ddbd5d 100644
--- a/release_lib.sh
+++ b/release_lib.sh
@@ -1,11 +1,14 @@
+
+set +e
+
 function get_revision {
     BASEDIR=$(dirname "${BASH_SOURCE[0]}")
     grep "set(VERSION_REVISION" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_REVISION \(.*\))$/\1/'
 }
 
-# remove me after fixing all testing-building scripts
-function make_control {
-    true
+function get_author {
+    AUTHOR=$(git config --get user.name || echo ${USER})
+    echo $AUTHOR
 }
 
 # Generate revision number.
@@ -46,7 +49,9 @@ function gen_revision_author {
 
             git_describe=`git describe`
             sed -i -- "s/VERSION_REVISION .*)/VERSION_REVISION $REVISION)/g;s/VERSION_DESCRIBE .*)/VERSION_DESCRIBE $git_describe)/g" dbms/cmake/version.cmake
-            git commit -m "$auto_message [$REVISION]" dbms/cmake/version.cmake
+
+            gen_changelog "$REVISION" "" "$AUTHOR" ""
+            git commit -m "$auto_message [$REVISION]" dbms/cmake/version.cmake debian/changelog
             #git push
 
             # Second tag for correct version information in version.cmake inside tag
@@ -76,7 +81,7 @@ function gen_revision_author {
 
 function get_revision_author {
     REVISION=$(get_revision)
-    AUTHOR=$(git config --get user.name || echo ${USER})
+    AUTHOR=$(get_author)
     export REVISION
     export AUTHOR
 }
@@ -90,6 +95,17 @@ function gen_changelog {
     CHDATE="$2"
     AUTHOR="$3"
     CHLOG="$4"
+    if [ -z "REVISION" ] ; then
+        get_revision_author
+    fi
+
+    if [ -z "$CHLOG" ] ; then
+        CHLOG=debian/changelog
+    fi
+
+    if [ -z "$CHDATE" ] ; then
+        CHDATE=$(LC_ALL=C date -R | sed -e 's/,/\\,/g') # Replace comma to '\,'
+    fi
 
     sed \
         -e "s/[@]REVISION[@]/$REVISION/g" \
@@ -98,29 +114,3 @@ function gen_changelog {
         -e "s/[@]EMAIL[@]/$(whoami)@yandex-team.ru/g" \
         < $CHLOG.in > $CHLOG
 }
-
-# Upload to Metrica repository
-# working directory - where script is itself
-function upload_debs {
-    REVISION="$1"
-    # Determine the repository, in which you need to upload the packages. It corresponds to the version of Ubuntu.
-    source /etc/lsb-release
-
-    if [ "$DISTRIB_CODENAME" == "precise" ]; then
-        REPO="metrika"
-    elif [ "$DISTRIB_CODENAME" == "trusty" ]; then
-        REPO="metrika-trusty"
-    elif [ "$DISTRIB_CODENAME" == "xenial" ]; then
-        REPO="metrika-xenial"
-    else
-        echo -e "\n\e[0;31mUnknown Ubuntu version $DISTRIB_CODENAME \e[0;0m\n"
-    fi
-
-    # Upload to Metrica repository.
-
-    cd ../
-    DUPLOAD_CONF=dupload.conf
-    cat src/debian/dupload.conf.in | sed -e "s/[@]AUTHOR[@]/$(whoami)/g" > $DUPLOAD_CONF
-
-    dupload metrika-yandex_1.1."$REVISION"_amd64.changes -t $REPO -c --nomail
-}
diff --git a/website/presentations b/website/presentations
deleted file mode 160000
index cc6a20a9711..00000000000
--- a/website/presentations
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit cc6a20a9711dc7083c2a8ea79c7127d6fa87faee
diff --git a/website/release.sh b/website/release.sh
index 43dbc7eff78..e0606455f46 100755
--- a/website/release.sh
+++ b/website/release.sh
@@ -14,6 +14,8 @@ REMOTE_NAME="registry.yandex.net/${FULL_NAME}"
 DOCKER_HASH="$2"
 if [[ -z "$1" ]]
 then
+    git clone https://github.com/yandex/clickhouse-presentations.git presentations || true
+    git --work-tree=$(readlink -f presentations) --git-dir=$(readlink -f presentations)/.git pull
     gulp clean
     gulp build
     docker build -t "${FULL_NAME}" "${BASE_DIR}"