From c3b9e32e9f5ebf99ee3cedc8d6ecd80d60a2a382 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Mon, 25 Sep 2017 14:21:45 +0300 Subject: [PATCH 01/63] 1. "Aggregate functions" chapter is restructured. 2. Combinator -ForEach for aggregate functions is described. 3. Description of system.parts is actualized. 4. groupArrayInsertAt function if described. . --- docs/ru/agg_functions/index.rst | 329 +---------------------- docs/ru/operations/settings/settings.rst | 9 + docs/ru/query_language/queries.rst | 3 + docs/ru/system_tables/system.parts.rst | 41 ++- 4 files changed, 50 insertions(+), 332 deletions(-) diff --git a/docs/ru/agg_functions/index.rst b/docs/ru/agg_functions/index.rst index 50570bde1b1..a702a0654e3 100644 --- a/docs/ru/agg_functions/index.rst +++ b/docs/ru/agg_functions/index.rst @@ -1,325 +1,18 @@ +.. _aggregate_functions: + Агрегатные функции ================== -count() -------- -Считает количество строк. Принимает ноль аргументов, возвращает UInt64. -Не поддерживается синтаксис ``COUNT(DISTINCT x)`` - для этого есть отдельная агрегатная функция ``uniq``. +Агрегатные функции работают в `привычном `_ для специалистов по базам данных смысле. -Запрос вида ``SELECT count() FROM table`` не оптимизируется, так как количество записей в таблице нигде не хранится отдельно - из таблицы будет выбран какой-нибудь достаточно маленький столбец, и будет посчитано количество значений в нём. +ClickHouse поддерживает также: -any(x) ------- -Выбирает первое попавшееся значение. -Порядок выполнения запроса может быть произвольным и даже каждый раз разным, поэтому результат данной функции недетерминирован. -Для получения детерминированного результата, можно использовать функции min или max вместо any. + * :ref:`Параметрические агрегатные функции `, которые помимо стоблцов принимаю и другие параметры. + * :ref:`Комбинаторы `, которые изменяют поведение агрегатных фунций. -В некоторых случаях, вы всё-таки можете рассчитывать на порядок выполнения запроса. Это - случаи, когда SELECT идёт из подзапроса, в котором используется ORDER BY. -При наличии в запросе ``SELECT`` секции ``GROUP BY`` или хотя бы одной агрегатной функции, ClickHouse (в отличие от, например, MySQL) требует, чтобы все выражения в секциях ``SELECT``, ``HAVING``, ``ORDER BY`` вычислялись из ключей или из агрегатных функций. То есть, каждый выбираемый из таблицы столбец, должен использоваться либо в ключах, либо внутри агрегатных функций. Чтобы получить поведение, как в MySQL, вы можете поместить остальные столбцы в агрегатную функцию ``any``. - -anyLast(x) ----------- -Выбирает последнее попавшееся значение. -Результат так же недетерминирован, как и для функции ``any``. - -min(x) ------- -Вычисляет минимум. - -max(x) ------- -Вычисляет максимум. - -argMin(arg, val) ----------------- -Вычисляет значение arg при минимальном значении val. Если есть несколько разных значений arg для минимальных значений val, то выдаётся первое попавшееся из таких значений. - -argMax(arg, val) ----------------- -Вычисляет значение arg при максимальном значении val. Если есть несколько разных значений arg для максимальных значений val, то выдаётся первое попавшееся из таких значений. - -sum(x) ------- -Вычисляет сумму. -Работает только для чисел. - -avg(x) ------- -Вычисляет среднее. -Работает только для чисел. -Результат всегда - Float64. - -uniq(x) -------- -Приближённо вычисляет количество различных значений аргумента. Работает для чисел, строк, дат, дат-с-временем, для нескольких аргументов и аргументов-кортежей. - -Используется алгоритм типа adaptive sampling: в качестве состояния вычислений используется выборка значений хэшей элементов, размером до 65536. -Алгоритм является очень точным для множеств небольшой кардинальности (до 65536) и очень эффективным по CPU (при расчёте не слишком большого количества таких функций, использование ``uniq`` почти так же быстро, как использование других агрегатных функций). - -Результат детерминирован (не зависит от порядка выполнения запроса). - -uniqCombined(x) ---------------- -Приближённо вычисляет количество различных значений аргумента. Работает для чисел, строк, дат, дат-с-временем, для нескольких аргументов и аргументов-кортежей. - -Используется комбинация трёх алгоритмов: массив, хэш-таблица и `HyperLogLog `_ с таблицей коррекции погрешности. Расход памяти в несколько раз меньше, чем у функции ``uniq``, а точность в несколько раз выше. Скорость работы чуть ниже, чем у функции ``uniq``, но иногда может быть даже выше - в случае распределённых запросов, в которых по сети передаётся большое количество состояний агрегации. Максимальный размер состояния составляет 96 KiB (HyperLogLog из 217 6-битовых ячеек). - -Результат детерминирован (не зависит от порядка выполнения запроса). - -Функция ``uniqCombined`` является хорошим выбором по умолчанию для подсчёта количества различных значений. - -uniqHLL12(x) ------------- -Приближённо вычисляет количество различных значений аргумента, используя алгоритм `HyperLogLog `_. -Используется 212 5-битовых ячеек. Размер состояния чуть больше 2.5 КБ. - -Результат детерминирован (не зависит от порядка выполнения запроса). - -В большинстве случаев, используйте функцию ``uniq`` или ``uniqCombined``. - -uniqExact(x) ------------- -Вычисляет количество различных значений аргумента, точно. -Не стоит бояться приближённых расчётов. Поэтому, используйте лучше функцию ``uniq``. -Функцию ``uniqExact`` следует использовать, если вам точно нужен точный результат. - -Функция ``uniqExact`` расходует больше оперативки, чем функция ``uniq``, так как размер состояния неограниченно растёт по мере роста количества различных значений. - -groupArray(x), groupArray(max_size)(x) --------------------------------------- -Составляет массив из значений аргумента. -Значения в массив могут быть добавлены в любом (недетерминированном) порядке. - -Вторая версия (с параметром ``max_size``) ограничивает размер результирующего массива ``max_size`` элементами. -Например, ``groupArray(1)(x)`` эквивалентно ``[any(x)]``. - -В некоторых случаях, вы всё же можете рассчитывать на порядок выполнения запроса. Это — случаи, когда ``SELECT`` идёт из подзапроса, в котором используется ``ORDER BY``. - -groupUniqArray(x) ------------------ -Составляет массив из различных значений аргумента. Расход оперативки такой же, как у функции ``uniqExact``. - -quantile(level)(x) ------------------- -Приближённо вычисляет квантиль уровня level. level - константа, число с плавающей запятой от 0 до 1. -Рекомендуется использовать значения level в диапазоне 0.01..0.99. -Не используйте значения level, равные 0 или 1 - для таких случаев есть функции min и max. - -В этой функции, равно как и во всех функциях для расчёта квантилей, параметр level может быть не указан. В таком случае, он принимается равным 0.5 - то есть, функция будет вычислять медиану. - -Работает для чисел, дат, дат-с-временем. -Для чисел возвращает Float64, для дат - дату, для дат-с-временем - дату-с-временем. - -Используется `reservoir sampling `_ с размером резервуара до 8192. -При необходимости, результат выдаётся с линейной аппроксимацией из двух соседних значений. -Этот алгоритм обеспечивает весьма низкую точность расчёта. Смотрите также функции ``quantileTiming``, ``quantileTDigest``, ``quantileExact``. - -Результат зависит от порядка выполнения запроса, и является недетерминированным. - -При использовании нескольких функций ``quantile`` (и аналогичных) с разными уровнями в запросе, внутренние состояния не объединяются (то есть, запрос работает менее эффективно, чем мог бы). В этом случае, используйте функцию ``quantiles`` (и аналогичные). - -quantileDeterministic(level)(x, determinator) ---------------------------------------------- -Работает аналогично функции ``quantile``, но, в отличие от неё, результат является детерминированным и не зависит от порядка выполнения запроса. - -Для этого, функция принимает второй аргумент - «детерминатор». Это некоторое число, хэш от которого используется вместо генератора случайных чисел в алгоритме reservoir sampling. Для правильной работы функции, одно и то же значение детерминатора не должно встречаться слишком часто. В качестве детерминатора вы можете использовать идентификатор события, идентификатор посетителя и т. п. - -Не используйте эту функцию для рассчёта таймингов. Для этого есть более подходящая функции - ``quantileTiming``. - -quantileTiming(level)(x) ------------------------- -Вычисляет квантиль уровня level с фиксированной точностью. -Работает для чисел. Предназначена для расчёта квантилей от времени загрузки страницы в миллисекундах. - -Если значение больше 30000 (соответствует времени загрузки страницы большем 30 секундам) - результат приравнивается к 30000. - -Если всего значений не больше примерно 5670, то вычисление точное. - -Иначе: - * если время меньше 1024 мс., то вычисление точное. - * иначе вычисление идёт с округлением до числа, кратного 16 мс. - -При передаче в функцию отрицательных значений, поведение не определено. - -Возвращаемое значение имеет тип Float32. Когда в функцию не было передано ни одного значения (при использовании ``quantileTimingIf``), возвращается nan. Это сделано, чтобы отличать такие случаи от нулей. Смотрите замечание о сортировке NaN-ов в разделе «Секция ORDER BY». - -Результат детерминирован (не зависит от порядка выполнения запроса). - -Для своей задачи (расчёт квантилей времени загрузки страниц), использование этой функции эффективнее и результат точнее, чем для функции ``quantile``. - -quantileTimingWeighted(level)(x, weight) ----------------------------------------- -Отличается от функции medianTiming наличием второго аргумента - «веса». Вес - неотрицательное целое число. -Результат считается так же, как если бы в функцию `medianTiming`` значение x было передано weight количество раз. - -quantileExact(level)(x) ------------------------ -Вычисляет квантиль уровня level точно. Для этого, все переданные значения складываются в массив, который затем частично сортируется. Поэтому, функция потребляет O(n) памяти, где n - количество переданных значений. Впрочем, для случая маленького количества значений, функция весьма эффективна. - -quantileExactWeighted(level)(x, weight) ---------------------------------------- -Вычисляет квантиль уровня level точно. При этом, каждое значение учитывается с весом weight - как будто оно присутствует weight раз. Аргументы функции можно рассматривать как гистограммы, где значению x соответствует «столбик» гистограммы высоты weight, а саму функцию можно рассматривать как суммирование гистограмм. - -В качестве алгоритма используется хэш-таблица. Из-за этого, в случае, если передаваемые значения часто повторяются, функция потребляет меньше оперативки, чем ``quantileExact``. Вы можете использовать эту функцию вместо ``quantileExact``, указав в качестве веса число 1. - -quantileTDigest(level)(x) -------------------------- -Вычисляет квантиль уровня level приближённо, с использованием алгоритма `t-digest `_. Максимальная погрешность составляет 1%. Расход памяти на состояние пропорционален логарифму от количества переданных значений. - -Производительность функции ниже ``quantile``, ``quantileTiming``. По соотношению размера состояния и точности, функция существенно лучше, чем ``quantile``. - -Результат зависит от порядка выполнения запроса, и является недетерминированным. - -median ------- -Для всех quantile-функций, также присутствуют соответствующие median-функции: ``median``, ``medianDeterministic``, ``medianTiming``, ``medianTimingWeighted``, ``medianExact``, ``medianExactWeighted``, ``medianTDigest``. Они являются синонимами и их поведение ничем не отличается. - -quantiles(level1, level2, ...)(x) ---------------------------------- -Для всех quantile-функций, также присутствуют соответствующие quantiles-функции: ``quantiles``, ``quantilesDeterministic``, ``quantilesTiming``, ``quantilesTimingWeighted``, ``quantilesExact``, ``quantilesExactWeighted``, ``quantilesTDigest``. Эти функции за один проход вычисляют все квантили перечисленных уровней и возвращают массив вычисленных значений. - -varSamp(x) ----------- -Вычисляет величину ``Σ((x - x̅)2) / (n - 1)``, где n - размер выборки, x̅ - среднее значение x. - -Она представляет собой несмещённую оценку дисперсии случайной величины, если переданные в функцию значения являются выборкой этой случайной величины. - -Возвращает Float64. В случае, когда ``n <= 1``, возвращается +∞. - -varPop(x) ---------- -Вычисляет величину ``Σ((x - x̅)2) / n``, где n - размер выборки, x̅ - среднее значение x. - -То есть, дисперсию для множества значений. Возвращает Float64. - -stddevSamp(x) -------------- -Результат равен квадратному корню от ``varSamp(x)``. - - -stddevPop(x) ------------- -Результат равен квадратному корню от ``varPop(x)``. - - -covarSamp(x, y) ---------------- -Вычисляет величину ``Σ((x - x̅)(y - y̅)) / (n - 1)``. - -Возвращает Float64. В случае, когда ``n <= 1``, возвращается +∞. - -covarPop(x, y) --------------- -Вычисляет величину ``Σ((x - x̅)(y - y̅)) / n``. - -corr(x, y) ----------- -Вычисляет коэффициент корреляции Пирсона: ``Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)2) * Σ((y - y̅)2))``. - -Параметрические агрегатные функции -================================== -Некоторые агрегатные функции могут принимать не только столбцы-аргументы (по которым производится свёртка), но и набор параметров - констант для инициализации. Синтаксис - две пары круглых скобок вместо одной. Первая - для параметров, вторая - для аргументов. - -sequenceMatch(pattern)(time, cond1, cond2, ...) ------------------------------------------------ -Сопоставление с образцом для цепочки событий. - -``pattern`` - строка, содержащая шаблон для сопоставления. Шаблон похож на регулярное выражение. - -``time`` - время события, тип DateTime - -``cond1``, ``cond2`` ... - от одного до 32 аргументов типа UInt8 - признаков, было ли выполнено некоторое условие для события. - -Функция собирает в оперативке последовательность событий. Затем производит проверку на соответствие этой последовательности шаблону. -Возвращает UInt8 - 0, если шаблон не подходит и 1, если шаблон подходит. - -Пример: ``sequenceMatch('(?1).*(?2)')(EventTime, URL LIKE '%company%', URL LIKE '%cart%')`` - -- была ли цепочка событий, в которой посещение страницы с адресом, содержащим company было раньше по времени посещения страницы с адресом, содержащим cart. - -Это вырожденный пример. Его можно записать с помощью других агрегатных функций: - -.. code-block:: text - - minIf(EventTime, URL LIKE '%company%') < maxIf(EventTime, URL LIKE '%cart%'). - -Но в более сложных случаях, такого решения нет. - -Синтаксис шаблонов: - -``(?1)`` - ссылка на условие (вместо 1 - любой номер); - -``.*`` - произвольное количество любых событий; - -``(?t>=1800)`` - условие на время; - -за указанное время допускается любое количество любых событий; - -вместо >= могут использоваться операторы <, >, <=; - -вместо 1800 может быть любое число; - -События, произошедшие в одну секунду, могут оказаться в цепочке в произвольном порядке. От этого может зависеть результат работы функции. - -sequenceCount(pattern)(time, cond1, cond2, ...) ------------------------------------------------ -Аналогично функции sequenceMatch, но возвращает не факт наличия цепочки событий, а UInt64 - количество найденных цепочек. -Цепочки ищутся без перекрытия. То есть, следующая цепочка может начаться только после окончания предыдущей. - -uniqUpTo(N)(x) --------------- -Вычисляет количество различных значений аргумента, если оно меньше или равно N. -В случае, если количество различных значений аргумента больше N, возвращает N + 1. - -Рекомендуется использовать для маленьких N - до 10. Максимальное значение N - 100. - -Для состояния агрегатной функции используется количество оперативки равное 1 + N * размер одного значения байт. -Для строк запоминается некриптографический хэш, имеющий размер 8 байт. То есть, для строк вычисление приближённое. - -Функция также работает для нескольких аргументов. - -Работает максимально быстро за исключением патологических случаев, когда используется большое значение N и количество уникальных значений чуть меньше N. - -Пример применения: - -.. code-block:: text - - Задача: показывать в отчёте только поисковые фразы, по которым было хотя бы 5 уникальных посетителей. - Решение: пишем в запросе GROUP BY SearchPhrase HAVING uniqUpTo(4)(UserID) >= 5 - -Комбинаторы агрегатных функций -============================== -К имени агрегатной функции может быть приписан некоторый суффикс. При этом, работа агрегатной функции некоторым образом модифицируется. -Существуют комбинаторы If и Array. Смотрите разделы ниже. - -Комбинатор -If. Условные агрегатные функции -------------------------------------------- -К имени любой агрегатной функции может быть приписан суффикс -If. В этом случае, агрегатная функция принимает ещё один дополнительный аргумент - условие (типа UInt8). Агрегатная функция будет обрабатывать только те строки, для которых условие сработало. Если условие ни разу не сработало - возвращается некоторое значение по умолчанию (обычно - нули, пустые строки). - -Примеры: ``sumIf(column, cond)``, ``countIf(cond)``, ``avgIf(x, cond)``, ``quantilesTimingIf(level1, level2)(x, cond)``, ``argMinIf(arg, val, cond)`` и т. п. - -С помощью условных агрегатных функций, вы можете вычислить агрегаты сразу для нескольких условий, не используя подзапросы и ``JOIN``-ы. -Например, в Яндекс.Метрике, условные агрегатные функции используются для реализации функциональности сравнения сегментов. - -Комбинатор -Array. Агрегатные функции для аргументов-массивов -------------------------------------------------------------- -К имени любой агрегатной функции может быть приписан суффикс -Array. В этом случае, агрегатная функция вместо аргументов типов T принимает аргументы типов Array(T) (массивы). Если агрегатная функция принимает несколько аргументов, то это должны быть массивы одинаковых длин. При обработке массивов, агрегатная функция работает, как исходная агрегатная функция по всем элементам массивов. - -Пример 1: ``sumArray(arr)`` - просуммировать все элементы всех массивов arr. В данном примере можно было бы написать проще: ``sum(arraySum(arr))``. - -Пример 2: ``uniqArray(arr)`` - посчитать количество уникальных элементов всех массивов arr. Это можно было бы сделать проще: ``uniq(arrayJoin(arr))``, но не всегда есть возможность добавить arrayJoin в запрос. - -Комбинаторы -If и -Array можно сочетать. При этом, должен сначала идти Array, а потом If. Примеры: ``uniqArrayIf(arr, cond)``, ``quantilesTimingArrayIf(level1, level2)(arr, cond)``. Из-за такого порядка получается, что аргумент cond не должен быть массивом. - -Комбинатор -State. ------------------- -В случае применения этого комбинатора, агрегатная функция возвращает не готовое значение (например, в случае функции uniq - количество уникальных значений), а промежуточное состояние агрегации (например, в случае функции ``uniq`` - хэш-таблицу для рассчёта количества уникальных значений), которое имеет тип AggregateFunction(...) и может использоваться для дальнейшей обработки или может быть сохранено в таблицу для последующей доагрегации - смотрите разделы «AggregatingMergeTree» и «функции для работы с промежуточными состояниями агрегации». - -Комбинатор -Merge. ------------------- -В случае применения этого комбинатора, агрегатная функция будет принимать в качестве аргумента промежуточное состояние агрегации, доагрегировать (объединять вместе) эти состояния, и возвращать готовое значение. - -Комбинатор -MergeState. ------------------------ -Выполняет слияние промежуточных состояний агрегации, аналогично комбинатору -Merge, но возвращает не готовое значение, а промежуточное состояние агрегации, аналогично комбинатору -State. +.. toctree:: + + reference + parametric_functions + combinators \ No newline at end of file diff --git a/docs/ru/operations/settings/settings.rst b/docs/ru/operations/settings/settings.rst index 740403cfc44..a0adedf8f74 100644 --- a/docs/ru/operations/settings/settings.rst +++ b/docs/ru/operations/settings/settings.rst @@ -67,6 +67,15 @@ force_primary_key При ``force_primary_key=1`` ClickHouse проверяет, есть ли в запросе условие на первичный ключ, которое может использоваться для отсечения диапазонов данных. Если подходящего условия нет - кидается исключение. Подробнее про диапазоны данных в таблицах MergeTree читайте в разделе :ref:`table_engines-mergetree`. +.. _settings_settings_fsync_metadata: + +fsync_metadata +-------------- + +Включить или отключить fsync при записи .sql файлов. По-умолчанию включено. + +Имеет смысл выключать, если на сервере миллионы мелких таблиц-чанков, которые постоянно создаются и уничтожаются. + input_format_allow_errors_num ----------------------------- Устанавливает максимальное количество допустимых ошибок при чтении из текстовых форматов (CSV, TSV и т.п.). diff --git a/docs/ru/query_language/queries.rst b/docs/ru/query_language/queries.rst index 13ac431b20f..1073b0cae76 100644 --- a/docs/ru/query_language/queries.rst +++ b/docs/ru/query_language/queries.rst @@ -201,6 +201,9 @@ RENAME Все таблицы переименовываются под глобальной блокировкой. Переименовывание таблицы является лёгкой операцией. Если вы указали после TO другую базу данных, то таблица будет перенесена в эту базу данных. При этом, директории с базами данных должны быть расположены в одной файловой системе (иначе возвращается ошибка). + +.. _query_language_queries_alter: + ALTER ~~~~~ Запрос ``ALTER`` поддерживается только для таблиц типа ``*MergeTree``, а также ``Merge`` и ``Distributed``. Запрос имеет несколько вариантов. diff --git a/docs/ru/system_tables/system.parts.rst b/docs/ru/system_tables/system.parts.rst index 1bc4195028a..e425c89ce50 100644 --- a/docs/ru/system_tables/system.parts.rst +++ b/docs/ru/system_tables/system.parts.rst @@ -1,20 +1,33 @@ system.parts ------------ -Содержит информацию о кусках таблиц семейства MergeTree. + +Содержит информацию о кусках таблиц семейства :ref:`table_engines-mergetree`. + +Каждая строка описывает один кусок данных. Столбцы: -.. code-block:: text +.. csv-table:: + :header: "Имя", "Тип", "Описание" + :widths: 15, 15, 70 - database String - имя базы данных, в которой находится таблица, к которой относится кусок - table String - имя таблицы, к которой относится кусок - engine String - имя движка таблицы, без параметров - partition String - имя партиции - имеет формат YYYYMM - name String - имя куска - replicated UInt8 - относится ли кусок к реплицируемым данным - active UInt8 - используется ли кусок в таблице, или же он уже не нужен и скоро будет удалён - неактивные куски остаются после слияния - marks UInt64 - количество засечек - умножьте на гранулированность индекса (обычно 8192), чтобы получить примерное количество строк в куске - bytes UInt64 - количество байт в сжатом виде - modification_time DateTime - время модификации директории с куском - обычно соответствует времени создания куска - remove_time DateTime - только для неактивных кусков - время, когда кусок стал неактивным - refcount UInt32 - количество мест, в котором кусок используется - значение больше 2 говорит о том, что этот кусок участвует в запросах или в слияниях + "partition", "String", "Имя партиции. Формат YYYYMM. Что такое партиция можно узнать из описания запроса :ref:`query_language_queries_alter`." + "name", "String", "Имя куска." + "active", "UInt8", "Признак активности. Если кусок активен, то он используется таблице, в противном случает он будет удален. Неактивные куски остаются после слияний." + "marks", "UInt64", "Количество засечек. Чтобы получить примерное количество строк в куске, умножьте ``marks`` на гранулированность индекса (обычно 8192)." + "marks_size", "UInt64", "Размер файла с засечками." + "rows", "UInt64", "Количество строк." + "bytes", "UInt64", "Количество байт в сжатом виде." + "modification_time", "DateTime", "Время модификации директории с куском. Обычно соответствует времени создания куска." + "remove_time", "DateTime", "Время, когда кусок стал неактивным." + "refcount", "UInt32", "Количество мест, в котором кусок используется. Значение больше 2 говорит о том, что кусок участвует в запросах или в слияниях." + "min_date", "Date", "Минимальное значение ключа даты в куске." + "max_date", "Date", "Максимальное значение ключа даты в куске." + "min_block_number", "UInt64", "Минимальный номер куска из которых состоит текущий после слияния." + "max_block_number", "UInt64", "Максимальный номер куска из которых состоит текущий после слияния." + "level", "UInt32", "Глубина дерева слияний. Если слияний не было, то ``level=0``." + "primary_key_bytes_in_memory", "UInt64", "Объем памяти (в байтах), занимаемой значениями первичных ключей." + "primary_key_bytes_in_memory_allocated", "UInt64", "Выделенный с резервом объем памяти (в байтах) для размещения первичных ключей." + "database", "String", "Имя базы данных." + "table", "String", "Имя таблицы." + "engine", "String", "Имя движка таблицы, без параметров." From e14b23f52ee5185006d1679faf43f493fbedc620 Mon Sep 17 00:00:00 2001 From: KochetovNicolai Date: Tue, 26 Sep 2017 11:49:41 +0300 Subject: [PATCH 02/63] Update third-party_client_libraries.rst Added ch-encode and ch-insert libraries. --- docs/en/interfaces/third-party_client_libraries.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/en/interfaces/third-party_client_libraries.rst b/docs/en/interfaces/third-party_client_libraries.rst index 6e8a8fd4420..23c487f9074 100644 --- a/docs/en/interfaces/third-party_client_libraries.rst +++ b/docs/en/interfaces/third-party_client_libraries.rst @@ -17,6 +17,8 @@ There exist third-party client libraries for ClickHouse: - `go-clickhouse `_ - `mailru\go-clickhouse `_ - `golang-clickhouse `_ + - `ch-encode `_ + - `ch-insert `_ * NodeJs - `clickhouse (NodeJs) `_ - `node-clickhouse `_ From 9fbfde65ec50352bef4692845ca99827d58f2f83 Mon Sep 17 00:00:00 2001 From: KochetovNicolai Date: Tue, 26 Sep 2017 11:58:30 +0300 Subject: [PATCH 03/63] Update third-party_client_libraries.rst --- docs/en/interfaces/third-party_client_libraries.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/en/interfaces/third-party_client_libraries.rst b/docs/en/interfaces/third-party_client_libraries.rst index 23c487f9074..0ee86db23f7 100644 --- a/docs/en/interfaces/third-party_client_libraries.rst +++ b/docs/en/interfaces/third-party_client_libraries.rst @@ -17,8 +17,7 @@ There exist third-party client libraries for ClickHouse: - `go-clickhouse `_ - `mailru\go-clickhouse `_ - `golang-clickhouse `_ - - `ch-encode `_ - - `ch-insert `_ + - `ch-insert `_ + `ch-encode `_ for data insertion * NodeJs - `clickhouse (NodeJs) `_ - `node-clickhouse `_ From c8dc25815f0c335f8378d3397c9aed3fcc758e6e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 26 Sep 2017 19:07:00 +0300 Subject: [PATCH 04/63] fixed pointinPolygon test, clang build --- dbms/src/Functions/GeoUtils.h | 2 +- .../0_stateless/00500_point_in_polygon.sql | 98 +++++++++---------- 2 files changed, 50 insertions(+), 50 deletions(-) diff --git a/dbms/src/Functions/GeoUtils.h b/dbms/src/Functions/GeoUtils.h index d11944bae74..f5a4b4651cc 100644 --- a/dbms/src/Functions/GeoUtils.h +++ b/dbms/src/Functions/GeoUtils.h @@ -643,7 +643,7 @@ std::string serialize(Polygon && polygon) auto serializeFloat = [&buffer](float value) { buffer.write(reinterpret_cast(&value), sizeof(value)); }; auto serializeSize = [&buffer](size_t size) { buffer.write(reinterpret_cast(&size), sizeof(size)); }; - auto serializeRing = [& buffer, & serializeFloat, & serializeSize](const RingType & ring) + auto serializeRing = [& serializeFloat, & serializeSize](const RingType & ring) { serializeSize(ring.size()); for (const auto & point : ring) diff --git a/dbms/tests/queries/0_stateless/00500_point_in_polygon.sql b/dbms/tests/queries/0_stateless/00500_point_in_polygon.sql index fd6af3c1084..af146dd8549 100644 --- a/dbms/tests/queries/0_stateless/00500_point_in_polygon.sql +++ b/dbms/tests/queries/0_stateless/00500_point_in_polygon.sql @@ -1,75 +1,75 @@ SELECT pointInPolygonFranklin(tuple(2.0,1.0), [tuple(0.0,0.0), tuple(3.0,3.0), tuple(3.0,0.0), tuple(0.0,0.0)]); SELECT pointInPolygonFranklin(tuple(1.0,2.0), [tuple(0.0,0.0), tuple(3.0,3.0), tuple(3.0,0.0), tuple(0.0,0.0)]); SELECT pointInPolygonFranklin(tuple(4.0,1.0), [tuple(0.0,0.0), tuple(3.0,3.0), tuple(3.0,0.0), tuple(0.0,0.0)]); -SELECT pointInPolygon(tuple(2.0,1.0), [tuple(0.0,0.0), tuple(3.0,3.0), tuple(3.0,0.0), tuple(0.0,0.0)]); -SELECT pointInPolygon(tuple(1.0,2.0), [tuple(0.0,0.0), tuple(3.0,3.0), tuple(3.0,0.0), tuple(0.0,0.0)]); -SELECT pointInPolygon(tuple(4.0,1.0), [tuple(0.0,0.0), tuple(3.0,3.0), tuple(3.0,0.0), tuple(0.0,0.0)]); +SELECT pointInPolygonCrossing(tuple(2.0,1.0), [tuple(0.0,0.0), tuple(3.0,3.0), tuple(3.0,0.0), tuple(0.0,0.0)]); +SELECT pointInPolygonCrossing(tuple(1.0,2.0), [tuple(0.0,0.0), tuple(3.0,3.0), tuple(3.0,0.0), tuple(0.0,0.0)]); +SELECT pointInPolygonCrossing(tuple(4.0,1.0), [tuple(0.0,0.0), tuple(3.0,3.0), tuple(3.0,0.0), tuple(0.0,0.0)]); SELECT pointInPolygonWinding(tuple(2.0,1.0), [tuple(0.0,0.0), tuple(3.0,3.0), tuple(3.0,0.0), tuple(0.0,0.0)]); SELECT pointInPolygonWinding(tuple(1.0,2.0), [tuple(0.0,0.0), tuple(3.0,3.0), tuple(3.0,0.0), tuple(0.0,0.0)]); SELECT pointInPolygonWinding(tuple(4.0,1.0), [tuple(0.0,0.0), tuple(3.0,3.0), tuple(3.0,0.0), tuple(0.0,0.0)]); SELECT 'inner'; -SELECT pointInPolygonWithGrid((3., 3.), [(6, 0), (8, 4), (5, 8), (0, 2), (6, 0)]); +SELECT pointInPolygon((3., 3.), [(6, 0), (8, 4), (5, 8), (0, 2), (6, 0)]); SELECT 'outer'; -SELECT pointInPolygonWithGrid((0.1, 0.1), [(6., 0.), (8., 4.), (5., 8.), (0., 2.), (6., 0.)]); +SELECT pointInPolygon((0.1, 0.1), [(6., 0.), (8., 4.), (5., 8.), (0., 2.), (6., 0.)]); SELECT 'single line'; -SELECT pointInPolygonWithGrid((4.1, 0.1), [(6., 0.), (8., 4.), (5., 8.), (0., 2.), (6., 0.)]); -SELECT pointInPolygonWithGrid((4.9, 0.9), [(6., 0.), (8., 4.), (5., 8.), (0., 2.), (6., 0.)]); +SELECT pointInPolygon((4.1, 0.1), [(6., 0.), (8., 4.), (5., 8.), (0., 2.), (6., 0.)]); +SELECT pointInPolygon((4.9, 0.9), [(6., 0.), (8., 4.), (5., 8.), (0., 2.), (6., 0.)]); SELECT 'shifted grid'; -SELECT pointInPolygonWithGrid((0., 0.), [(6., 1.), (8., 4.), (5., 8.), (1., 2.), (6., 1.)]); -SELECT pointInPolygonWithGrid((6., 5.), [(6., 1.), (8., 4.), (5., 8.), (1., 2.), (6., 1.)]); +SELECT pointInPolygon((0., 0.), [(6., 1.), (8., 4.), (5., 8.), (1., 2.), (6., 1.)]); +SELECT pointInPolygon((6., 5.), [(6., 1.), (8., 4.), (5., 8.), (1., 2.), (6., 1.)]); SELECT 'pair of lines, single polygon'; -SELECT pointInPolygonWithGrid((0.1, 0.1), [(0., 0.), (8., 7.), (7., 8.), (0., 0.)]); -SELECT pointInPolygonWithGrid((0.9, 0.1), [(0., 0.), (8., 7.), (7., 8.), (0., 0.)]); -SELECT pointInPolygonWithGrid((0.1, 0.9), [(0., 0.), (8., 7.), (7., 8.), (0., 0.)]); -SELECT pointInPolygonWithGrid((2.2, 2.2), [(0., 0.), (8., 7.), (7., 8.), (0., 0.)]); -SELECT pointInPolygonWithGrid((2.1, 2.9), [(0., 0.), (8., 7.), (7., 8.), (0., 0.)]); -SELECT pointInPolygonWithGrid((2.9, 2.1), [(0., 0.), (8., 7.), (7., 8.), (0., 0.)]); +SELECT pointInPolygon((0.1, 0.1), [(0., 0.), (8., 7.), (7., 8.), (0., 0.)]); +SELECT pointInPolygon((0.9, 0.1), [(0., 0.), (8., 7.), (7., 8.), (0., 0.)]); +SELECT pointInPolygon((0.1, 0.9), [(0., 0.), (8., 7.), (7., 8.), (0., 0.)]); +SELECT pointInPolygon((2.2, 2.2), [(0., 0.), (8., 7.), (7., 8.), (0., 0.)]); +SELECT pointInPolygon((2.1, 2.9), [(0., 0.), (8., 7.), (7., 8.), (0., 0.)]); +SELECT pointInPolygon((2.9, 2.1), [(0., 0.), (8., 7.), (7., 8.), (0., 0.)]); SELECT 'pair of lines, different polygons'; -SELECT pointInPolygonWithGrid((0.1, 0.1), [(0.5, 0.), (1.0, 0.), (8.0, 7.5), (7.5, 8.0), (0., 1.), (0., 0.5), (4.5, 5.5), (5.5, 4.5), (0.5, 0.0)]); -SELECT pointInPolygonWithGrid((1., 1.), [(0.5, 0.), (1.0, 0.), (8.0, 7.5), (7.5, 8.0), (0., 1.), (0., 0.5), (4.5, 5.5), (5.5, 4.5), (0.5, 0.0)]); -SELECT pointInPolygonWithGrid((0.7, 0.1), [(0.5, 0.), (1.0, 0.), (8.0, 7.5), (7.5, 8.0), (0., 1.), (0., 0.5), (4.5, 5.5), (5.5, 4.5), (0.5, 0.0)]); -SELECT pointInPolygonWithGrid((0.1, 0.7), [(0.5, 0.), (1.0, 0.), (8.0, 7.5), (7.5, 8.0), (0., 1.), (0., 0.5), (4.5, 5.5), (5.5, 4.5), (0.5, 0.0)]); -SELECT pointInPolygonWithGrid((1.1, 0.1), [(0.5, 0.), (1.0, 0.), (8.0, 7.5), (7.5, 8.0), (0., 1.), (0., 0.5), (4.5, 5.5), (5.5, 4.5), (0.5, 0.0)]); -SELECT pointInPolygonWithGrid((0.1, 1.1), [(0.5, 0.), (1.0, 0.), (8.0, 7.5), (7.5, 8.0), (0., 1.), (0., 0.5), (4.5, 5.5), (5.5, 4.5), (0.5, 0.0)]); -SELECT pointInPolygonWithGrid((5.0, 5.0), [(0.5, 0.), (1.0, 0.), (8.0, 7.5), (7.5, 8.0), (0., 1.), (0., 0.5), (4.5, 5.5), (5.5, 4.5), (0.5, 0.0)]); -SELECT pointInPolygonWithGrid((7.9, 7.9), [(0.5, 0.), (1.0, 0.), (8.0, 7.5), (7.5, 8.0), (0., 1.), (0., 0.5), (4.5, 5.5), (5.5, 4.5), (0.5, 0.0)]); +SELECT pointInPolygon((0.1, 0.1), [(0.5, 0.), (1.0, 0.), (8.0, 7.5), (7.5, 8.0), (0., 1.), (0., 0.5), (4.5, 5.5), (5.5, 4.5), (0.5, 0.0)]); +SELECT pointInPolygon((1., 1.), [(0.5, 0.), (1.0, 0.), (8.0, 7.5), (7.5, 8.0), (0., 1.), (0., 0.5), (4.5, 5.5), (5.5, 4.5), (0.5, 0.0)]); +SELECT pointInPolygon((0.7, 0.1), [(0.5, 0.), (1.0, 0.), (8.0, 7.5), (7.5, 8.0), (0., 1.), (0., 0.5), (4.5, 5.5), (5.5, 4.5), (0.5, 0.0)]); +SELECT pointInPolygon((0.1, 0.7), [(0.5, 0.), (1.0, 0.), (8.0, 7.5), (7.5, 8.0), (0., 1.), (0., 0.5), (4.5, 5.5), (5.5, 4.5), (0.5, 0.0)]); +SELECT pointInPolygon((1.1, 0.1), [(0.5, 0.), (1.0, 0.), (8.0, 7.5), (7.5, 8.0), (0., 1.), (0., 0.5), (4.5, 5.5), (5.5, 4.5), (0.5, 0.0)]); +SELECT pointInPolygon((0.1, 1.1), [(0.5, 0.), (1.0, 0.), (8.0, 7.5), (7.5, 8.0), (0., 1.), (0., 0.5), (4.5, 5.5), (5.5, 4.5), (0.5, 0.0)]); +SELECT pointInPolygon((5.0, 5.0), [(0.5, 0.), (1.0, 0.), (8.0, 7.5), (7.5, 8.0), (0., 1.), (0., 0.5), (4.5, 5.5), (5.5, 4.5), (0.5, 0.0)]); +SELECT pointInPolygon((7.9, 7.9), [(0.5, 0.), (1.0, 0.), (8.0, 7.5), (7.5, 8.0), (0., 1.), (0., 0.5), (4.5, 5.5), (5.5, 4.5), (0.5, 0.0)]); SELECT 'complex polygon'; -SELECT pointInPolygonWithGrid((0.05, 0.05), [(0., 1.), (0.2, 0.5), (0.6, 0.5), (0.8, 0.8), (0.8, 0.3), (0.1, 0.3), (0.1, 0.1), (0.8, 0.1), (1.0, 0.0), (8.0, 7.0), (7.0, 8.0), (0., 1.)]); -SELECT pointInPolygonWithGrid((0.15, 0.15), [(0., 1.), (0.2, 0.5), (0.6, 0.5), (0.8, 0.8), (0.8, 0.3), (0.1, 0.3), (0.1, 0.1), (0.8, 0.1), (1.0, 0.0), (8.0, 7.0), (7.0, 8.0), (0., 1.)]); -SELECT pointInPolygonWithGrid((0.3, 0.4), [(0., 1.), (0.2, 0.5), (0.6, 0.5), (0.8, 0.8), (0.8, 0.3), (0.1, 0.3), (0.1, 0.1), (0.8, 0.1), (1.0, 0.0), (8.0, 7.0), (7.0, 8.0), (0., 1.)]); -SELECT pointInPolygonWithGrid((0.4, 0.7), [(0., 1.), (0.2, 0.5), (0.6, 0.5), (0.8, 0.8), (0.8, 0.3), (0.1, 0.3), (0.1, 0.1), (0.8, 0.1), (1.0, 0.0), (8.0, 7.0), (7.0, 8.0), (0., 1.)]); -SELECT pointInPolygonWithGrid((0.7, 0.6), [(0., 1.), (0.2, 0.5), (0.6, 0.5), (0.8, 0.8), (0.8, 0.3), (0.1, 0.3), (0.1, 0.1), (0.8, 0.1), (1.0, 0.0), (8.0, 7.0), (7.0, 8.0), (0., 1.)]); -SELECT pointInPolygonWithGrid((0.9, 0.1), [(0., 1.), (0.2, 0.5), (0.6, 0.5), (0.8, 0.8), (0.8, 0.3), (0.1, 0.3), (0.1, 0.1), (0.8, 0.1), (1.0, 0.0), (8.0, 7.0), (7.0, 8.0), (0., 1.)]); +SELECT pointInPolygon((0.05, 0.05), [(0., 1.), (0.2, 0.5), (0.6, 0.5), (0.8, 0.8), (0.8, 0.3), (0.1, 0.3), (0.1, 0.1), (0.8, 0.1), (1.0, 0.0), (8.0, 7.0), (7.0, 8.0), (0., 1.)]); +SELECT pointInPolygon((0.15, 0.15), [(0., 1.), (0.2, 0.5), (0.6, 0.5), (0.8, 0.8), (0.8, 0.3), (0.1, 0.3), (0.1, 0.1), (0.8, 0.1), (1.0, 0.0), (8.0, 7.0), (7.0, 8.0), (0., 1.)]); +SELECT pointInPolygon((0.3, 0.4), [(0., 1.), (0.2, 0.5), (0.6, 0.5), (0.8, 0.8), (0.8, 0.3), (0.1, 0.3), (0.1, 0.1), (0.8, 0.1), (1.0, 0.0), (8.0, 7.0), (7.0, 8.0), (0., 1.)]); +SELECT pointInPolygon((0.4, 0.7), [(0., 1.), (0.2, 0.5), (0.6, 0.5), (0.8, 0.8), (0.8, 0.3), (0.1, 0.3), (0.1, 0.1), (0.8, 0.1), (1.0, 0.0), (8.0, 7.0), (7.0, 8.0), (0., 1.)]); +SELECT pointInPolygon((0.7, 0.6), [(0., 1.), (0.2, 0.5), (0.6, 0.5), (0.8, 0.8), (0.8, 0.3), (0.1, 0.3), (0.1, 0.1), (0.8, 0.1), (1.0, 0.0), (8.0, 7.0), (7.0, 8.0), (0., 1.)]); +SELECT pointInPolygon((0.9, 0.1), [(0., 1.), (0.2, 0.5), (0.6, 0.5), (0.8, 0.8), (0.8, 0.3), (0.1, 0.3), (0.1, 0.1), (0.8, 0.1), (1.0, 0.0), (8.0, 7.0), (7.0, 8.0), (0., 1.)]); SELECT 'polygon with holes'; -SELECT pointInPolygonWithGrid((1., 1.), [(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (3., 5.), (5., 5.), (5., 3.)]); -SELECT pointInPolygonWithGrid((2.5, 2.5), [(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (3., 5.), (5., 5.), (5., 3.)]); -SELECT pointInPolygonWithGrid((4., 4.), [(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (3., 5.), (5., 5.), (5., 3.)]); -SELECT pointInPolygonWithGrid((4., 2.), [(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (3., 5.), (5., 5.), (5., 3.)]); -SELECT pointInPolygonWithGrid((9., 9.), [(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (3., 5.), (5., 5.), (5., 3.)]); +SELECT pointInPolygon((1., 1.), [(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (3., 5.), (5., 5.), (5., 3.)]); +SELECT pointInPolygon((2.5, 2.5), [(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (3., 5.), (5., 5.), (5., 3.)]); +SELECT pointInPolygon((4., 4.), [(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (3., 5.), (5., 5.), (5., 3.)]); +SELECT pointInPolygon((4., 2.), [(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (3., 5.), (5., 5.), (5., 3.)]); +SELECT pointInPolygon((9., 9.), [(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (3., 5.), (5., 5.), (5., 3.)]); -SELECT pointInPolygonWithGrid((0.5, 1.5), [(0., 0.), (7., 0.), (7., 3.), (0., 3.)], [(1., 1.), (2., 1.), (2., 2.), (1., 2.)], [(3., 1.), (4., 1.), (4., 2.), (3., 2.)], [(5., 1.), (6., 1.), (6., 2.), (5., 2.)]); -SELECT pointInPolygonWithGrid((1.5, 1.5), [(0., 0.), (7., 0.), (7., 3.), (0., 3.)], [(1., 1.), (2., 1.), (2., 2.), (1., 2.)], [(3., 1.), (4., 1.), (4., 2.), (3., 2.)], [(5., 1.), (6., 1.), (6., 2.), (5., 2.)]); -SELECT pointInPolygonWithGrid((2.5, 1.5), [(0., 0.), (7., 0.), (7., 3.), (0., 3.)], [(1., 1.), (2., 1.), (2., 2.), (1., 2.)], [(3., 1.), (4., 1.), (4., 2.), (3., 2.)], [(5., 1.), (6., 1.), (6., 2.), (5., 2.)]); -SELECT pointInPolygonWithGrid((3.5, 1.5), [(0., 0.), (7., 0.), (7., 3.), (0., 3.)], [(1., 1.), (2., 1.), (2., 2.), (1., 2.)], [(3., 1.), (4., 1.), (4., 2.), (3., 2.)], [(5., 1.), (6., 1.), (6., 2.), (5., 2.)]); -SELECT pointInPolygonWithGrid((4.5, 1.5), [(0., 0.), (7., 0.), (7., 3.), (0., 3.)], [(1., 1.), (2., 1.), (2., 2.), (1., 2.)], [(3., 1.), (4., 1.), (4., 2.), (3., 2.)], [(5., 1.), (6., 1.), (6., 2.), (5., 2.)]); -SELECT pointInPolygonWithGrid((5.5, 1.5), [(0., 0.), (7., 0.), (7., 3.), (0., 3.)], [(1., 1.), (2., 1.), (2., 2.), (1., 2.)], [(3., 1.), (4., 1.), (4., 2.), (3., 2.)], [(5., 1.), (6., 1.), (6., 2.), (5., 2.)]); -SELECT pointInPolygonWithGrid((6.5, 1.5), [(0., 0.), (7., 0.), (7., 3.), (0., 3.)], [(1., 1.), (2., 1.), (2., 2.), (1., 2.)], [(3., 1.), (4., 1.), (4., 2.), (3., 2.)], [(5., 1.), (6., 1.), (6., 2.), (5., 2.)]); +SELECT pointInPolygon((0.5, 1.5), [(0., 0.), (7., 0.), (7., 3.), (0., 3.)], [(1., 1.), (2., 1.), (2., 2.), (1., 2.)], [(3., 1.), (4., 1.), (4., 2.), (3., 2.)], [(5., 1.), (6., 1.), (6., 2.), (5., 2.)]); +SELECT pointInPolygon((1.5, 1.5), [(0., 0.), (7., 0.), (7., 3.), (0., 3.)], [(1., 1.), (2., 1.), (2., 2.), (1., 2.)], [(3., 1.), (4., 1.), (4., 2.), (3., 2.)], [(5., 1.), (6., 1.), (6., 2.), (5., 2.)]); +SELECT pointInPolygon((2.5, 1.5), [(0., 0.), (7., 0.), (7., 3.), (0., 3.)], [(1., 1.), (2., 1.), (2., 2.), (1., 2.)], [(3., 1.), (4., 1.), (4., 2.), (3., 2.)], [(5., 1.), (6., 1.), (6., 2.), (5., 2.)]); +SELECT pointInPolygon((3.5, 1.5), [(0., 0.), (7., 0.), (7., 3.), (0., 3.)], [(1., 1.), (2., 1.), (2., 2.), (1., 2.)], [(3., 1.), (4., 1.), (4., 2.), (3., 2.)], [(5., 1.), (6., 1.), (6., 2.), (5., 2.)]); +SELECT pointInPolygon((4.5, 1.5), [(0., 0.), (7., 0.), (7., 3.), (0., 3.)], [(1., 1.), (2., 1.), (2., 2.), (1., 2.)], [(3., 1.), (4., 1.), (4., 2.), (3., 2.)], [(5., 1.), (6., 1.), (6., 2.), (5., 2.)]); +SELECT pointInPolygon((5.5, 1.5), [(0., 0.), (7., 0.), (7., 3.), (0., 3.)], [(1., 1.), (2., 1.), (2., 2.), (1., 2.)], [(3., 1.), (4., 1.), (4., 2.), (3., 2.)], [(5., 1.), (6., 1.), (6., 2.), (5., 2.)]); +SELECT pointInPolygon((6.5, 1.5), [(0., 0.), (7., 0.), (7., 3.), (0., 3.)], [(1., 1.), (2., 1.), (2., 2.), (1., 2.)], [(3., 1.), (4., 1.), (4., 2.), (3., 2.)], [(5., 1.), (6., 1.), (6., 2.), (5., 2.)]); SELECT 'polygons with reversed direction'; -SELECT pointInPolygonWithGrid((4.1, .1), [(6., 0.), (0., 2.), (5., 8.), (8., 4.)]); -SELECT pointInPolygonWithGrid((4.1, .9), [(6., 0.), (0., 2.), (5., 8.), (8., 4.)]); +SELECT pointInPolygon((4.1, .1), [(6., 0.), (0., 2.), (5., 8.), (8., 4.)]); +SELECT pointInPolygon((4.1, .9), [(6., 0.), (0., 2.), (5., 8.), (8., 4.)]); -SELECT pointInPolygonWithGrid((1., 1.), [(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (5., 3.), (5., 5.), (3., 5.)]); -SELECT pointInPolygonWithGrid((2.5, 2.5), [(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (5., 3.), (5., 5.), (3., 5.)]); -SELECT pointInPolygonWithGrid((4., 4.), [(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (5., 3.), (5., 5.), (3., 5.)]); -SELECT pointInPolygonWithGrid((4., 2.), [(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (5., 3.), (5., 5.), (3., 5.)]); -SELECT pointInPolygonWithGrid((9., 9.), [(4., 0.), (8., 4.), (4., 8.), (0., 4.)],[(3., 3.), (5., 3.), (5., 5.), (3., 5.)]); +SELECT pointInPolygon((1., 1.), [(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (5., 3.), (5., 5.), (3., 5.)]); +SELECT pointInPolygon((2.5, 2.5), [(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (5., 3.), (5., 5.), (3., 5.)]); +SELECT pointInPolygon((4., 4.), [(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (5., 3.), (5., 5.), (3., 5.)]); +SELECT pointInPolygon((4., 2.), [(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (5., 3.), (5., 5.), (3., 5.)]); +SELECT pointInPolygon((9., 9.), [(4., 0.), (8., 4.), (4., 8.), (0., 4.)],[(3., 3.), (5., 3.), (5., 5.), (3., 5.)]); SELECT 'eps for complex polygon in grid'; -SELECT pointInPolygonWithGrid((0., 0.), [(0., 1.), (0.2, 0.5), (0.6, 0.5), (0.8, 0.8), (0.8, 0.3), (0.1, 0.3), (0.1, 0.1), (0.8, 0.1), (1., 0.), (-6., -7.), (-7., -6.), (0., 1.)]) +SELECT pointInPolygon((0., 0.), [(0., 1.), (0.2, 0.5), (0.6, 0.5), (0.8, 0.8), (0.8, 0.3), (0.1, 0.3), (0.1, 0.1), (0.8, 0.1), (1., 0.), (-6., -7.), (-7., -6.), (0., 1.)]) From c8ffca746e2d98c5d3c0985582be6a0633593d05 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 26 Sep 2017 18:25:24 +0300 Subject: [PATCH 05/63] fixed defalut Nullable implementation for Const(Nullable) columns [#CLICKHOUSE-3339] --- dbms/src/Functions/FunctionHelpers.cpp | 19 ++++++++++++++++--- dbms/src/Functions/IFunction.cpp | 15 ++++++++++----- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/dbms/src/Functions/FunctionHelpers.cpp b/dbms/src/Functions/FunctionHelpers.cpp index 2a21c4601dd..d55e8b3ba39 100644 --- a/dbms/src/Functions/FunctionHelpers.cpp +++ b/dbms/src/Functions/FunctionHelpers.cpp @@ -4,6 +4,8 @@ #include #include #include +#include +#include "FunctionsArithmetic.h" namespace DB @@ -95,10 +97,21 @@ Block createBlockWithNestedColumns(const Block & block, ColumnNumbers args, size { ++j; - if (col.column->isNullable()) + if (col.type->isNullable()) { - auto nullable_col = static_cast(col.column.get()); - const ColumnPtr & nested_col = nullable_col->getNestedColumn(); + bool is_const = col.column->isConst(); + auto const_col = static_cast(col.column.get()); + + if (is_const && !const_col->isNullable()) + throw Exception("Column at position " + toString(i + 1) + " with type " + col.type->getName() + + " should be nullable, but got " + const_col->getName(), ErrorCodes::LOGICAL_ERROR); + + auto nullable_col = static_cast( + is_const ? &const_col->getDataColumn() : col.column.get()); + + ColumnPtr nested_col = nullable_col->getNestedColumn(); + if (is_const) + nested_col = std::make_shared(nested_col, const_col->size()); auto nullable_type = static_cast(col.type.get()); const DataTypePtr & nested_type = nullable_type->getNestedType(); diff --git a/dbms/src/Functions/IFunction.cpp b/dbms/src/Functions/IFunction.cpp index b2c00d62d77..f6c7ad7b3b5 100644 --- a/dbms/src/Functions/IFunction.cpp +++ b/dbms/src/Functions/IFunction.cpp @@ -204,12 +204,17 @@ bool defaultImplementationForNulls( const ColumnWithTypeAndName & source_col = temporary_block.getByPosition(result); ColumnWithTypeAndName & dest_col = block.getByPosition(result); - /// Initialize the result column. - ColumnPtr null_map = std::make_shared(block.rows(), 0); - dest_col.column = std::make_shared(source_col.column, null_map); + if (source_col.column->isConst()) + dest_col.column = source_col.column; + else + { + /// Initialize the result column. + ColumnPtr null_map = std::make_shared(block.rows(), 0); + dest_col.column = std::make_shared(source_col.column, null_map); - /// Deduce the null map of the result from the null maps of the nullable columns. - createNullMap(block, args, result); + /// Deduce the null map of the result from the null maps of the nullable columns. + createNullMap(block, args, result); + } return true; } From 1592691e47ad37cfcd41b761a005199e80ca7baf Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 26 Sep 2017 18:56:25 +0300 Subject: [PATCH 06/63] fixed defalut Nullable implementation for Const(Nullable) columns --- dbms/src/Functions/FunctionHelpers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/FunctionHelpers.cpp b/dbms/src/Functions/FunctionHelpers.cpp index d55e8b3ba39..ab4e415c6d6 100644 --- a/dbms/src/Functions/FunctionHelpers.cpp +++ b/dbms/src/Functions/FunctionHelpers.cpp @@ -102,7 +102,7 @@ Block createBlockWithNestedColumns(const Block & block, ColumnNumbers args, size bool is_const = col.column->isConst(); auto const_col = static_cast(col.column.get()); - if (is_const && !const_col->isNullable()) + if (is_const && !const_col->getDataColumn().isNullable()) throw Exception("Column at position " + toString(i + 1) + " with type " + col.type->getName() + " should be nullable, but got " + const_col->getName(), ErrorCodes::LOGICAL_ERROR); From 937d3f388309b98d17330bc2fff581dbb6943914 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 26 Sep 2017 19:01:12 +0300 Subject: [PATCH 07/63] added test [#CLICKHOUSE-3339] --- .../queries/0_stateless/00503_cast_const_nullable.reference | 2 ++ dbms/tests/queries/0_stateless/00503_cast_const_nullable.sql | 2 ++ 2 files changed, 4 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00503_cast_const_nullable.reference create mode 100644 dbms/tests/queries/0_stateless/00503_cast_const_nullable.sql diff --git a/dbms/tests/queries/0_stateless/00503_cast_const_nullable.reference b/dbms/tests/queries/0_stateless/00503_cast_const_nullable.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/dbms/tests/queries/0_stateless/00503_cast_const_nullable.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/dbms/tests/queries/0_stateless/00503_cast_const_nullable.sql b/dbms/tests/queries/0_stateless/00503_cast_const_nullable.sql new file mode 100644 index 00000000000..c901af29713 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00503_cast_const_nullable.sql @@ -0,0 +1,2 @@ +SELECT CAST(1 AS Nullable(UInt8)) AS id WHERE id = CAST(1 AS Nullable(UInt8)); +SELECT CAST(1 AS Nullable(UInt8)) AS id WHERE id = 1; From 2f80c150db134a4467eb94bca2b748e694c695dd Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Fri, 25 Aug 2017 23:41:45 +0300 Subject: [PATCH 08/63] save MergeTree data format version and parse part names depending on it [#CLICKHOUSE-3000] --- .../Storages/MergeTree/ActiveDataPartSet.cpp | 7 +- .../Storages/MergeTree/ActiveDataPartSet.h | 6 +- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 35 +++++++-- dbms/src/Storages/MergeTree/MergeTreeData.h | 2 + .../MergeTree/MergeTreeDataFormatVersion.h | 10 +++ .../MergeTree/MergeTreeDataMerger.cpp | 28 ++++--- .../Storages/MergeTree/MergeTreeDataPart.cpp | 26 +++++-- .../Storages/MergeTree/MergeTreeDataPart.h | 7 +- .../MergeTree/MergeTreeDataWriter.cpp | 37 +++++---- .../Storages/MergeTree/MergeTreePartInfo.cpp | 76 ++++++++++++++----- .../Storages/MergeTree/MergeTreePartInfo.h | 12 +-- .../ReplicatedMergeTreeBlockOutputStream.cpp | 7 +- .../ReplicatedMergeTreePartCheckThread.cpp | 4 +- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 12 +-- .../MergeTree/ReplicatedMergeTreeQueue.h | 8 +- dbms/src/Storages/StorageMergeTree.cpp | 4 +- .../Storages/StorageReplicatedMergeTree.cpp | 61 ++++++++------- .../src/Storages/StorageReplicatedMergeTree.h | 15 ++-- dbms/src/Storages/tests/part_name.cpp | 3 +- 19 files changed, 238 insertions(+), 122 deletions(-) create mode 100644 dbms/src/Storages/MergeTree/MergeTreeDataFormatVersion.h diff --git a/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp b/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp index 80ed6f7e8e5..a56a0592bd2 100644 --- a/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp +++ b/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp @@ -4,7 +4,8 @@ namespace DB { -ActiveDataPartSet::ActiveDataPartSet(const Strings & names) +ActiveDataPartSet::ActiveDataPartSet(MergeTreeDataFormatVersion format_version_, const Strings & names) + : format_version(format_version_) { for (const auto & name : names) addImpl(name); @@ -20,7 +21,7 @@ void ActiveDataPartSet::add(const String & name) void ActiveDataPartSet::addImpl(const String & name) { - auto part_info = MergeTreePartInfo::fromPartName(name); + auto part_info = MergeTreePartInfo::fromPartName(name, format_version); if (!getContainingPartImpl(part_info).empty()) return; @@ -53,7 +54,7 @@ void ActiveDataPartSet::addImpl(const String & name) String ActiveDataPartSet::getContainingPart(const String & part_name) const { std::lock_guard lock(mutex); - return getContainingPartImpl(MergeTreePartInfo::fromPartName(part_name)); + return getContainingPartImpl(MergeTreePartInfo::fromPartName(part_name, format_version)); } diff --git a/dbms/src/Storages/MergeTree/ActiveDataPartSet.h b/dbms/src/Storages/MergeTree/ActiveDataPartSet.h index 1e6067adb16..ee2a2d10431 100644 --- a/dbms/src/Storages/MergeTree/ActiveDataPartSet.h +++ b/dbms/src/Storages/MergeTree/ActiveDataPartSet.h @@ -18,8 +18,8 @@ namespace DB class ActiveDataPartSet { public: - ActiveDataPartSet() {} - ActiveDataPartSet(const Strings & names); + ActiveDataPartSet(MergeTreeDataFormatVersion format_version_) : format_version(format_version_) {} + ActiveDataPartSet(MergeTreeDataFormatVersion format_version_, const Strings & names); void add(const String & name); @@ -31,6 +31,8 @@ public: size_t size() const; private: + MergeTreeDataFormatVersion format_version; + mutable std::mutex mutex; std::map part_info_to_name; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 29ea1e8a51a..e226798a236 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -61,6 +61,7 @@ namespace ErrorCodes { extern const int MEMORY_LIMIT_EXCEEDED; extern const int SYNTAX_ERROR; + extern const int CORRUPTED_DATA; } @@ -130,6 +131,23 @@ MergeTreeData::MergeTreeData( /// Creating directories, if not exist. Poco::File(full_path).createDirectories(); Poco::File(full_path + "detached").createDirectory(); + + String version_file_path = full_path + "format_version.txt"; + if (!attach) + { + format_version = 0; + WriteBufferFromFile buf(version_file_path); + writeIntText(format_version.toUnderType(), buf); + } + else if (Poco::File(version_file_path).exists()) + { + ReadBufferFromFile buf(version_file_path); + readIntText(format_version, buf); + if (!buf.eof()) + throw Exception("Bad version file: " + version_file_path, ErrorCodes::CORRUPTED_DATA); + } + else + format_version = 0; } @@ -362,7 +380,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) for (const String & file_name : part_file_names) { MergeTreePartInfo part_info; - if (!MergeTreePartInfo::tryParsePartName(file_name, &part_info)) + if (!MergeTreePartInfo::tryParsePartName(file_name, &part_info, format_version)) continue; MutableDataPartPtr part = std::make_shared(*this, file_name, part_info); @@ -414,7 +432,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) continue; MergeTreePartInfo contained_part_info; - if (!MergeTreePartInfo::tryParsePartName(contained_name, &contained_part_info)) + if (!MergeTreePartInfo::tryParsePartName(contained_name, &contained_part_info, format_version)) continue; if (part->info.contains(contained_part_info)) @@ -1249,8 +1267,11 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace( if (increment) part->info.min_block = part->info.max_block = increment->get(); - String new_name = MergeTreePartInfo::getPartName( - part->getMinDate(), part->getMaxDate(), part->info.min_block, part->info.max_block, part->info.level); + String new_name; + if (format_version == 0) + new_name = part->info.getPartNameV0(part->getMinDate(), part->getMaxDate()); + else + new_name = part->info.getPartName(); LOG_TRACE(log, "Renaming temporary part " << part->relative_path << " to " << new_name << "."); @@ -1526,7 +1547,7 @@ void MergeTreeData::delayInsertIfNeeded(Poco::Event * until) MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(const String & part_name) { - auto part_info = MergeTreePartInfo::fromPartName(part_name); + auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version); std::lock_guard lock(data_parts_mutex); @@ -1553,7 +1574,7 @@ MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(const String & MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_name) { - auto part_info = MergeTreePartInfo::fromPartName(part_name); + auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version); std::lock_guard lock(all_data_parts_mutex); auto it = all_data_parts.lower_bound(part_info); @@ -1721,7 +1742,7 @@ size_t MergeTreeData::getPartitionSize(const std::string & partition_id) const for (Poco::DirectoryIterator it(full_path); it != end; ++it) { MergeTreePartInfo part_info; - if (!MergeTreePartInfo::tryParsePartName(it.name(), &part_info)) + if (!MergeTreePartInfo::tryParsePartName(it.name(), &part_info, format_version)) continue; if (part_info.partition_id != partition_id) continue; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 4e25e38cf77..fba39411f3a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -466,6 +466,8 @@ public: /// For determining the partition id of inserted blocks. String getPartitionIDFromData(const Row & partition); + MergeTreeDataFormatVersion format_version; + Context & context; const String date_column_name; const ASTPtr sampling_expression; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataFormatVersion.h b/dbms/src/Storages/MergeTree/MergeTreeDataFormatVersion.h new file mode 100644 index 00000000000..7d723412e6f --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeDataFormatVersion.h @@ -0,0 +1,10 @@ +#pragma once + +#include + +namespace DB +{ + +STRONG_TYPEDEF(UInt32, MergeTreeDataFormatVersion); + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp index e0961430e58..261a06acea1 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp @@ -84,16 +84,20 @@ void MergeTreeDataMerger::FuturePart::assign(MergeTreeData::DataPartsVector part part_info.max_block = parts.back()->info.max_block; part_info.level = max_level + 1; - DayNum_t min_date = DayNum_t(std::numeric_limits::max()); - DayNum_t max_date = DayNum_t(std::numeric_limits::min()); - for (const auto & part : parts) + if (parts.front()->storage.format_version == 0) { - min_date = std::min(min_date, part->getMinDate()); - max_date = std::max(max_date, part->getMaxDate()); - } + DayNum_t min_date = DayNum_t(std::numeric_limits::max()); + DayNum_t max_date = DayNum_t(std::numeric_limits::min()); + for (const auto & part : parts) + { + min_date = std::min(min_date, part->getMinDate()); + max_date = std::max(max_date, part->getMaxDate()); + } - name = MergeTreePartInfo::getPartName( - min_date, max_date, part_info.min_block, part_info.max_block, part_info.level); + name = part_info.getPartNameV0(min_date, max_date); + } + else + name = part_info.getPartName(); } MergeTreeDataMerger::MergeTreeDataMerger(MergeTreeData & data_, const BackgroundProcessingPool & pool_) @@ -1069,9 +1073,11 @@ MergeTreeData::PerShardDataParts MergeTreeDataMerger::reshardPartition( size_t shard_no = entry.first; MergeTreeData::MutableDataPartPtr & part_from_shard = entry.second; - std::string new_name = MergeTreePartInfo::getPartName( - part_from_shard->getMinDate(), part_from_shard->getMaxDate(), - part_from_shard->info.min_block, part_from_shard->info.max_block, part_from_shard->info.level); + std::string new_name; + if (data.format_version == 0) + new_name = part_from_shard->info.getPartNameV0(part_from_shard->getMinDate(), part_from_shard->getMaxDate()); + else + new_name = part_from_shard->info.getPartName(); std::string new_relative_path = "reshard/" + toString(shard_no) + "/" + new_name; part_from_shard->renameTo(new_relative_path); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index eb1adf9fe33..333b43b73fd 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -337,6 +337,11 @@ void MinMaxIndex::merge(const MinMaxIndex & other) } +MergeTreeDataPart::MergeTreeDataPart(MergeTreeData & storage_, const String & name_) + : storage(storage_), name(name_), info(MergeTreePartInfo::fromPartName(name_, storage.format_version)) +{ +} + /// Returns the size of .bin file for column `name` if found, zero otherwise. size_t MergeTreeDataPart::getColumnCompressedSize(const String & name) const { @@ -657,16 +662,21 @@ void MergeTreeDataPart::loadIndex() void MergeTreeDataPart::loadPartitionAndMinMaxIndex() { - DayNum_t min_date; - DayNum_t max_date; - MergeTreePartInfo::parseMinMaxDatesFromPartName(name, min_date, max_date); + if (storage.format_version == 0) + { + DayNum_t min_date; + DayNum_t max_date; + MergeTreePartInfo::parseMinMaxDatesFromPartName(name, min_date, max_date); - const auto & date_lut = DateLUT::instance(); - partition = Row(1, static_cast(date_lut.toNumYYYYMM(min_date))); + const auto & date_lut = DateLUT::instance(); + partition = Row(1, static_cast(date_lut.toNumYYYYMM(min_date))); - minmax_idx.min_column_values = Row(1, static_cast(min_date)); - minmax_idx.max_column_values = Row(1, static_cast(max_date)); - minmax_idx.initialized = true; + minmax_idx.min_column_values = Row(1, static_cast(min_date)); + minmax_idx.max_column_values = Row(1, static_cast(max_date)); + minmax_idx.initialized = true; + } + else + throw Exception("TODO", ErrorCodes::LOGICAL_ERROR); } void MergeTreeDataPart::loadChecksums(bool require) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h index 4baa7e7844f..f3172e0c35e 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h @@ -104,16 +104,13 @@ struct MergeTreeDataPart using Checksums = MergeTreeDataPartChecksums; using Checksum = MergeTreeDataPartChecksums::Checksum; - MergeTreeDataPart(MergeTreeData & storage_, const String & name_) - : storage(storage_), name(name_), info(MergeTreePartInfo::fromPartName(name_)) - { - } - MergeTreeDataPart(MergeTreeData & storage_, const String & name_, const MergeTreePartInfo & info_) : storage(storage_), name(name_), info(info_) { } + MergeTreeDataPart(MergeTreeData & storage_, const String & name_); + /// Returns checksum of column's binary file. const Checksum * tryGetBinChecksum(const String & name) const; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 2a4d6f346a5..d210744d7a5 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -134,22 +134,29 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa MinMaxIndex minmax_idx; minmax_idx.update(block, data.minmax_idx_columns); - DayNum_t min_date(minmax_idx.min_column_values[data.minmax_idx_date_column_pos].get()); - DayNum_t max_date(minmax_idx.max_column_values[data.minmax_idx_date_column_pos].get()); - - const auto & date_lut = DateLUT::instance(); - - DayNum_t min_month = date_lut.toFirstDayNumOfMonth(DayNum_t(min_date)); - DayNum_t max_month = date_lut.toFirstDayNumOfMonth(DayNum_t(max_date)); - - if (min_month != max_month) - throw Exception("Logical error: part spans more than one month."); - - String part_name = MergeTreePartInfo::getPartName(min_date, max_date, temp_index, temp_index, 0); - String new_partition_id = data.getPartitionIDFromData(block_with_partition.partition); - MergeTreeData::MutableDataPartPtr new_data_part = std::make_shared( - data, part_name, MergeTreePartInfo(new_partition_id, temp_index, temp_index, 0)); + + MergeTreePartInfo new_part_info(new_partition_id, temp_index, temp_index, 0); + String part_name; + if (data.format_version == 0) + { + DayNum_t min_date(minmax_idx.min_column_values[data.minmax_idx_date_column_pos].get()); + DayNum_t max_date(minmax_idx.max_column_values[data.minmax_idx_date_column_pos].get()); + + const auto & date_lut = DateLUT::instance(); + + DayNum_t min_month = date_lut.toFirstDayNumOfMonth(DayNum_t(min_date)); + DayNum_t max_month = date_lut.toFirstDayNumOfMonth(DayNum_t(max_date)); + + if (min_month != max_month) + throw Exception("Logical error: part spans more than one month."); + + part_name = new_part_info.getPartNameV0(min_date, max_date); + } + else + part_name = new_part_info.getPartName(); + + MergeTreeData::MutableDataPartPtr new_data_part = std::make_shared(data, part_name, new_part_info); new_data_part->partition = std::move(block_with_partition.partition); new_data_part->minmax_idx = std::move(minmax_idx); new_data_part->relative_path = TMP_PREFIX + part_name; diff --git a/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp b/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp index 910c9a95ba4..8131bd92e69 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp @@ -12,29 +12,51 @@ namespace ErrorCodes } -MergeTreePartInfo MergeTreePartInfo::fromPartName(const String & dir_name) +MergeTreePartInfo MergeTreePartInfo::fromPartName(const String & dir_name, MergeTreeDataFormatVersion format_version) { MergeTreePartInfo part_info; - if (!tryParsePartName(dir_name, &part_info)) + if (!tryParsePartName(dir_name, &part_info, format_version)) throw Exception("Unexpected part name: " + dir_name, ErrorCodes::BAD_DATA_PART_NAME); return part_info; } -bool MergeTreePartInfo::tryParsePartName(const String & dir_name, MergeTreePartInfo * part_info) + +bool MergeTreePartInfo::tryParsePartName(const String & dir_name, MergeTreePartInfo * part_info, MergeTreeDataFormatVersion format_version) { - UInt32 min_yyyymmdd = 0; - UInt32 max_yyyymmdd = 0; + ReadBufferFromString in(dir_name); + + String partition_id; + if (format_version == 0) + { + UInt32 min_yyyymmdd = 0; + UInt32 max_yyyymmdd = 0; + if (!tryReadIntText(min_yyyymmdd, in) + || !checkChar('_', in) + || !tryReadIntText(max_yyyymmdd, in) + || !checkChar('_', in)) + { + return false; + } + partition_id = toString(min_yyyymmdd / 100); + } + else + { + while (!in.eof()) + { + char c; + readChar(c, in); + if (c == '_') + break; + + partition_id.push_back(c); + } + } + Int64 min_block_num = 0; Int64 max_block_num = 0; UInt32 level = 0; - ReadBufferFromString in(dir_name); - - if (!tryReadIntText(min_yyyymmdd, in) - || !checkChar('_', in) - || !tryReadIntText(max_yyyymmdd, in) - || !checkChar('_', in) - || !tryReadIntText(min_block_num, in) + if (!tryReadIntText(min_block_num, in) || !checkChar('_', in) || !tryReadIntText(max_block_num, in) || !checkChar('_', in) @@ -46,7 +68,7 @@ bool MergeTreePartInfo::tryParsePartName(const String & dir_name, MergeTreePartI if (part_info) { - part_info->partition_id = dir_name.substr(0, strlen("YYYYMM")); + part_info->partition_id = std::move(partition_id); part_info->min_block = min_block_num; part_info->max_block = max_block_num; part_info->level = level; @@ -83,15 +105,31 @@ void MergeTreePartInfo::parseMinMaxDatesFromPartName(const String & dir_name, Da } -bool MergeTreePartInfo::contains(const String & outer_part_name, const String & inner_part_name) +bool MergeTreePartInfo::contains(const String & outer_part_name, const String & inner_part_name, MergeTreeDataFormatVersion format_version) { - MergeTreePartInfo outer = fromPartName(outer_part_name); - MergeTreePartInfo inner = fromPartName(inner_part_name); + MergeTreePartInfo outer = fromPartName(outer_part_name, format_version); + MergeTreePartInfo inner = fromPartName(inner_part_name, format_version); return outer.contains(inner); } -String MergeTreePartInfo::getPartName(DayNum_t left_date, DayNum_t right_date, Int64 left_id, Int64 right_id, UInt64 level) +String MergeTreePartInfo::getPartName() const +{ + WriteBufferFromOwnString wb; + + writeString(partition_id, wb); + writeChar('_', wb); + writeIntText(min_block, wb); + writeChar('_', wb); + writeIntText(max_block, wb); + writeChar('_', wb); + writeIntText(level, wb); + + return wb.str(); +} + + +String MergeTreePartInfo::getPartNameV0(DayNum_t left_date, DayNum_t right_date) const { const auto & date_lut = DateLUT::instance(); @@ -106,9 +144,9 @@ String MergeTreePartInfo::getPartName(DayNum_t left_date, DayNum_t right_date, I writeChar('_', wb); writeIntText(right_date_id, wb); writeChar('_', wb); - writeIntText(left_id, wb); + writeIntText(min_block, wb); writeChar('_', wb); - writeIntText(right_id, wb); + writeIntText(max_block, wb); writeChar('_', wb); writeIntText(level, wb); diff --git a/dbms/src/Storages/MergeTree/MergeTreePartInfo.h b/dbms/src/Storages/MergeTree/MergeTreePartInfo.h index abdbfce289a..4bc660c84f1 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartInfo.h +++ b/dbms/src/Storages/MergeTree/MergeTreePartInfo.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -37,15 +38,16 @@ struct MergeTreePartInfo && level >= rhs.level; } - static MergeTreePartInfo fromPartName(const String & part_name); + String getPartName() const; + String getPartNameV0(DayNum_t left_date, DayNum_t right_date) const; - static bool tryParsePartName(const String & dir_name, MergeTreePartInfo * part_info); + static MergeTreePartInfo fromPartName(const String & part_name, MergeTreeDataFormatVersion format_version); - static void parseMinMaxDatesFromPartName(const String & dir_name, DayNum_t & min_date, DayNum_t & max_date); + static bool tryParsePartName(const String & dir_name, MergeTreePartInfo * part_info, MergeTreeDataFormatVersion format_version); - static bool contains(const String & outer_part_name, const String & inner_part_name); + static void parseMinMaxDatesFromPartName(const String & part_name, DayNum_t & min_date, DayNum_t & max_date); - static String getPartName(DayNum_t left_date, DayNum_t right_date, Int64 left_id, Int64 right_id, UInt64 level); + static bool contains(const String & outer_part_name, const String & inner_part_name, MergeTreeDataFormatVersion format_version); }; } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp index a160d6674d0..7b6b5d51d63 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp @@ -175,8 +175,11 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo part->info.max_block = block_number; part->info.level = 0; - String part_name = MergeTreePartInfo::getPartName( - part->getMinDate(), part->getMaxDate(), block_number, block_number, 0); + String part_name; + if (storage.data.format_version == 0) + part_name = part->info.getPartNameV0(part->getMinDate(), part->getMaxDate()); + else + part_name = part->info.getPartName(); part->name = part_name; diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index f4745077963..f8ca88b9109 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -86,7 +86,7 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par } /// If the part is not in ZooKeeper, we'll check if it's at least somewhere. - auto part_info = MergeTreePartInfo::fromPartName(part_name); + auto part_info = MergeTreePartInfo::fromPartName(part_name, storage.data.format_version); /** The logic is as follows: * - if some live or inactive replica has such a part, or a part covering it @@ -122,7 +122,7 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par Strings parts = zookeeper->getChildren(storage.zookeeper_path + "/replicas/" + replica + "/parts"); for (const String & part_on_replica : parts) { - auto part_on_replica_info = MergeTreePartInfo::fromPartName(part_on_replica); + auto part_on_replica_info = MergeTreePartInfo::fromPartName(part_on_replica, storage.data.format_version); if (part_on_replica_info.contains(part_info)) { diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 9f0a4498762..db8ddbd0e5f 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -421,7 +421,7 @@ void ReplicatedMergeTreeQueue::removeGetsAndMergesInRange(zkutil::ZooKeeperPtr z for (Queue::iterator it = queue.begin(); it != queue.end();) { if (((*it)->type == LogEntry::GET_PART || (*it)->type == LogEntry::MERGE_PARTS) && - MergeTreePartInfo::contains(part_name, (*it)->new_part_name)) + MergeTreePartInfo::contains(part_name, (*it)->new_part_name, format_version)) { if ((*it)->currently_executing) to_wait.push_back(*it); @@ -460,14 +460,14 @@ ReplicatedMergeTreeQueue::Queue ReplicatedMergeTreeQueue::getConflictsForClearCo { if (elem->type == LogEntry::MERGE_PARTS || elem->type == LogEntry::GET_PART || elem->type == LogEntry::ATTACH_PART) { - if (MergeTreePartInfo::contains(entry.new_part_name, elem->new_part_name)) + if (MergeTreePartInfo::contains(entry.new_part_name, elem->new_part_name, format_version)) conflicts.emplace_back(elem); } if (elem->type == LogEntry::CLEAR_COLUMN) { - auto cur_part = MergeTreePartInfo::fromPartName(elem->new_part_name); - auto part = MergeTreePartInfo::fromPartName(entry.new_part_name); + auto cur_part = MergeTreePartInfo::fromPartName(elem->new_part_name, format_version); + auto part = MergeTreePartInfo::fromPartName(entry.new_part_name, format_version); if (part.partition_id == cur_part.partition_id) conflicts.emplace_back(elem); @@ -523,12 +523,12 @@ bool ReplicatedMergeTreeQueue::isNotCoveredByFuturePartsImpl(const String & new_ /// A more complex check is whether another part is currently created by other action that will cover this part. /// NOTE The above is redundant, but left for a more convenient message in the log. - auto result_part = MergeTreePartInfo::fromPartName(new_part_name); + auto result_part = MergeTreePartInfo::fromPartName(new_part_name, format_version); /// It can slow down when the size of `future_parts` is large. But it can not be large, since `BackgroundProcessingPool` is limited. for (const auto & future_part_name : future_parts) { - auto future_part = MergeTreePartInfo::fromPartName(future_part_name); + auto future_part = MergeTreePartInfo::fromPartName(future_part_name, format_version); if (future_part.contains(result_part)) { diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 93b515a4e4c..925885bf80f 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -37,6 +37,8 @@ private: using InsertsByTime = std::set; + MergeTreeDataFormatVersion format_version; + String zookeeper_path; String replica_path; String logger_name; @@ -121,7 +123,11 @@ private: }; public: - ReplicatedMergeTreeQueue() {} + ReplicatedMergeTreeQueue(MergeTreeDataFormatVersion format_version_) + : format_version(format_version_) + , virtual_parts(format_version) + { + } void initialize(const String & zookeeper_path_, const String & replica_path_, const String & logger_name_, const MergeTreeData::DataParts & parts, zkutil::ZooKeeperPtr zookeeper); diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 2ccbc8a59ca..9353cc66bea 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -497,12 +497,12 @@ void StorageMergeTree::attachPartition(const ASTPtr & query, const Field & field else { LOG_DEBUG(log, "Looking for parts for partition " << partition_id << " in " << source_dir); - ActiveDataPartSet active_parts; + ActiveDataPartSet active_parts(data.format_version); for (Poco::DirectoryIterator it = Poco::DirectoryIterator(full_path + source_dir); it != Poco::DirectoryIterator(); ++it) { const String & name = it.name(); MergeTreePartInfo part_info; - if (!MergeTreePartInfo::tryParsePartName(name, &part_info) + if (!MergeTreePartInfo::tryParsePartName(name, &part_info, data.format_version) || part_info.partition_id != partition_id) { continue; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 96fe887613f..efb9fc08c96 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -201,7 +201,8 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( settings_, database_name_ + "." + table_name, true, attach, [this] (const std::string & name) { enqueuePartForCheck(name); }, [this] () { clearOldPartsAndRemoveFromZK(); }), - reader(data), writer(data), merger(data, context.getBackgroundPool()), fetcher(data), sharded_partition_uploader_client(*this), + reader(data), writer(data), merger(data, context.getBackgroundPool()), queue(data.format_version), + fetcher(data), sharded_partition_uploader_client(*this), shutdown_event(false), part_check_thread(*this), log(&Logger::get(database_name + "." + table_name + " (StorageReplicatedMergeTree)")) { @@ -700,7 +701,7 @@ void StorageReplicatedMergeTree::createReplica() /// Add to the queue jobs to receive all the active parts that the reference/master replica has. Strings parts = zookeeper->getChildren(source_path + "/parts"); - ActiveDataPartSet active_parts_set(parts); + ActiveDataPartSet active_parts_set(data.format_version, parts); Strings active_parts = active_parts_set.getParts(); for (const String & name : active_parts) @@ -1260,7 +1261,7 @@ bool StorageReplicatedMergeTree::executeLogEntry(const LogEntry & entry) { ops.emplace_back(std::make_unique(quorum_path, quorum_stat.version)); - auto part_info = MergeTreePartInfo::fromPartName(entry.new_part_name); + auto part_info = MergeTreePartInfo::fromPartName(entry.new_part_name, data.format_version); if (part_info.min_block != part_info.max_block) throw Exception("Logical error: log entry with quorum for part covering more than one block number", @@ -1379,7 +1380,7 @@ void StorageReplicatedMergeTree::executeDropRange(const StorageReplicatedMergeTr LOG_DEBUG(log, (entry.detach ? "Detaching" : "Removing") << " parts."); size_t removed_parts = 0; - auto entry_part_info = MergeTreePartInfo::fromPartName(entry.new_part_name); + auto entry_part_info = MergeTreePartInfo::fromPartName(entry.new_part_name, data.format_version); /// Delete the parts contained in the range to be deleted. /// It's important that no old parts remain (after the merge), because otherwise, @@ -1425,7 +1426,7 @@ void StorageReplicatedMergeTree::executeClearColumnInPartition(const LogEntry & /// So, if conflicts are found, throw an exception and will retry execution later queue.disableMergesAndFetchesInRange(entry); - auto entry_part_info = MergeTreePartInfo::fromPartName(entry.new_part_name); + auto entry_part_info = MergeTreePartInfo::fromPartName(entry.new_part_name, data.format_version); /// We don't change table structure, only data in some parts /// To disable reading from these parts, we will sequentially acquire write lock for each part inside alterDataPart() @@ -1599,7 +1600,7 @@ namespace ReplicatedMergeTreeQuorumEntry quorum_entry; quorum_entry.fromString(quorum_node_value); - auto part_info = MergeTreePartInfo::fromPartName(quorum_entry.part_name); + auto part_info = MergeTreePartInfo::fromPartName(quorum_entry.part_name, data.format_version); if (part_info.min_block != part_info.max_block) throw Exception("Logical error: part written with quorum covers more than one block numbers", ErrorCodes::LOGICAL_ERROR); @@ -1613,7 +1614,7 @@ namespace String quorum_last_part; if (zookeeper->tryGet(zookeeper_path + "/quorum/last_part", quorum_last_part) && quorum_last_part.empty() == false) { - auto part_info = MergeTreePartInfo::fromPartName(quorum_last_part); + auto part_info = MergeTreePartInfo::fromPartName(quorum_last_part, data.format_version); if (part_info.min_block != part_info.max_block) throw Exception("Logical error: part written with quorum covers more than one block numbers", ErrorCodes::LOGICAL_ERROR); @@ -1970,9 +1971,11 @@ String StorageReplicatedMergeTree::findReplicaHavingCoveringPart(const LogEntry Strings parts = zookeeper->getChildren(zookeeper_path + "/replicas/" + replica + "/parts"); for (const String & part_on_replica : parts) { - if (part_on_replica == entry.new_part_name || MergeTreePartInfo::contains(part_on_replica, entry.new_part_name)) + if (part_on_replica == entry.new_part_name + || MergeTreePartInfo::contains(part_on_replica, entry.new_part_name, data.format_version)) { - if (largest_part_found.empty() || MergeTreePartInfo::contains(part_on_replica, largest_part_found)) + if (largest_part_found.empty() + || MergeTreePartInfo::contains(part_on_replica, largest_part_found, data.format_version)) { largest_part_found = part_on_replica; } @@ -2116,7 +2119,6 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Strin MergeTreeData::MutableDataPartPtr part = fetcher.fetchPart( part_name, replica_path, address.host, address.replication_port, to_detached); - if (!to_detached) { zkutil::Ops ops; @@ -2314,13 +2316,13 @@ BlockInputStreams StorageReplicatedMergeTree::read( { ReplicatedMergeTreeQuorumEntry quorum_entry; quorum_entry.fromString(quorum_str); - auto part_info = MergeTreePartInfo::fromPartName(quorum_entry.part_name); + auto part_info = MergeTreePartInfo::fromPartName(quorum_entry.part_name, data.format_version); max_block_number_to_read = part_info.min_block - 1; } } else { - auto part_info = MergeTreePartInfo::fromPartName(last_part); + auto part_info = MergeTreePartInfo::fromPartName(last_part, data.format_version); max_block_number_to_read = part_info.max_block; } } @@ -2567,16 +2569,22 @@ void StorageReplicatedMergeTree::alter(const AlterCommands & params, /// The name of an imaginary part covering all possible parts in the specified partition with numbers in the range from zero to specified right bound. -static String getFakePartNameCoveringPartRange(const String & partition_id, UInt64 left, UInt64 right) +static String getFakePartNameCoveringPartRange( + MergeTreeDataFormatVersion format_version, const String & partition_id, UInt64 left, UInt64 right) { - /// The date range is all month long. - const auto & lut = DateLUT::instance(); - time_t start_time = lut.YYYYMMDDToDate(parse(partition_id + "01")); - DayNum_t left_date = lut.toDayNum(start_time); - DayNum_t right_date = DayNum_t(static_cast(left_date) + lut.daysInMonth(start_time) - 1); - /// Artificial high level is choosen, to make this part "covering" all parts inside. - return MergeTreePartInfo::getPartName(left_date, right_date, left, right, 999999999); + MergeTreePartInfo part_info(partition_id, left, right, 999999999); + if (format_version == 0) + { + /// The date range is all month long. + const auto & lut = DateLUT::instance(); + time_t start_time = lut.YYYYMMDDToDate(parse(partition_id + "01")); + DayNum_t left_date = lut.toDayNum(start_time); + DayNum_t right_date = DayNum_t(static_cast(left_date) + lut.daysInMonth(start_time) - 1); + return part_info.getPartNameV0(left_date, right_date); + } + else + return part_info.getPartName(); } @@ -2606,7 +2614,7 @@ String StorageReplicatedMergeTree::getFakePartNameCoveringAllPartsInPartition(co return {}; --right; - return getFakePartNameCoveringPartRange(partition_id, left, right); + return getFakePartNameCoveringPartRange(data.format_version, partition_id, left, right); } @@ -2720,14 +2728,14 @@ void StorageReplicatedMergeTree::attachPartition(const ASTPtr & query, const Fie else { LOG_DEBUG(log, "Looking for parts for partition " << partition_id << " in " << source_dir); - ActiveDataPartSet active_parts; + ActiveDataPartSet active_parts(data.format_version); std::set part_names; for (Poco::DirectoryIterator it = Poco::DirectoryIterator(full_path + source_dir); it != Poco::DirectoryIterator(); ++it) { String name = it.name(); MergeTreePartInfo part_info; - if (!MergeTreePartInfo::tryParsePartName(name, &part_info)) + if (!MergeTreePartInfo::tryParsePartName(name, &part_info, data.format_version)) continue; if (part_info.partition_id != partition_id) continue; @@ -3228,7 +3236,8 @@ void StorageReplicatedMergeTree::fetchPartition(const Field & partition, const S for (Poco::DirectoryIterator dir_it{data.getFullPath() + "detached/"}; dir_it != dir_end; ++dir_it) { MergeTreePartInfo part_info; - if (MergeTreePartInfo::tryParsePartName(dir_it.name(), &part_info) && part_info.partition_id == partition_id) + if (MergeTreePartInfo::tryParsePartName(dir_it.name(), &part_info, data.format_version) + && part_info.partition_id == partition_id) throw Exception("Detached partition " + partition_id + " already exists.", ErrorCodes::PARTITION_ALREADY_EXISTS); } @@ -3308,7 +3317,7 @@ void StorageReplicatedMergeTree::fetchPartition(const Field & partition, const S throw Exception("Too much retries to fetch parts from " + best_replica_path, ErrorCodes::TOO_MUCH_RETRIES_TO_FETCH_PARTS); Strings parts = getZooKeeper()->getChildren(best_replica_path + "/parts"); - ActiveDataPartSet active_parts_set(parts); + ActiveDataPartSet active_parts_set(data.format_version, parts); Strings parts_to_fetch; if (missing_parts.empty()) @@ -3319,7 +3328,7 @@ void StorageReplicatedMergeTree::fetchPartition(const Field & partition, const S Strings parts_to_fetch_partition; for (const String & part : parts_to_fetch) { - if (MergeTreePartInfo::fromPartName(part).partition_id == partition_id) + if (MergeTreePartInfo::fromPartName(part, data.format_version).partition_id == partition_id) parts_to_fetch_partition.push_back(part); } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index 25562c778f1..ce0489124de 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -243,13 +243,6 @@ private: String replica_name; String replica_path; - /** The queue of what needs to be done on this replica to catch up with everyone. It is taken from ZooKeeper (/replicas/me/queue/). - * In ZK entries in chronological order. Here it is not necessary. - */ - ReplicatedMergeTreeQueue queue; - std::atomic last_queue_update_start_time{0}; - std::atomic last_queue_update_finish_time{0}; - /** /replicas/me/is_active. */ zkutil::EphemeralNodeHolderPtr replica_is_active_node; @@ -275,6 +268,14 @@ private: MergeTreeDataWriter writer; MergeTreeDataMerger merger; + /** The queue of what needs to be done on this replica to catch up with everyone. It is taken from ZooKeeper (/replicas/me/queue/). + * In ZK entries in chronological order. Here it is not necessary. + */ + ReplicatedMergeTreeQueue queue; + std::atomic last_queue_update_start_time{0}; + std::atomic last_queue_update_finish_time{0}; + + DataPartsExchange::Fetcher fetcher; RemoteDiskSpaceMonitor::Client disk_space_monitor_client; ShardedPartitionUploader::Client sharded_partition_uploader_client; diff --git a/dbms/src/Storages/tests/part_name.cpp b/dbms/src/Storages/tests/part_name.cpp index 39ec4656ff0..c6ce3bf5719 100644 --- a/dbms/src/Storages/tests/part_name.cpp +++ b/dbms/src/Storages/tests/part_name.cpp @@ -9,7 +9,8 @@ int main(int argc, char ** argv) for (DayNum_t date = today; DayNum_t(date + 10) > today; --date) { - std::string name = DB::MergeTreePartInfo::getPartName(date, date, 0, 0, 0); + DB::MergeTreePartInfo part_info("partition", 0, 0, 0); + std::string name = part_info.getPartNameV0(date, date); std::cerr << name << '\n'; time_t time = DateLUT::instance().YYYYMMDDToDate(DB::parse(name)); From e9f93028b8026ce8db5de903b5a25eeff4bb2979 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Wed, 30 Aug 2017 22:03:19 +0300 Subject: [PATCH 09/63] serialize partition and minmax index values [#CLICKHOUSE-3000] --- .../Storages/MergeTree/DataPartsExchange.cpp | 1 - dbms/src/Storages/MergeTree/MergeTreeData.cpp | 8 ++- dbms/src/Storages/MergeTree/MergeTreeData.h | 1 + .../MergeTree/MergeTreeDataMerger.cpp | 20 +------ .../Storages/MergeTree/MergeTreeDataPart.h | 1 - .../MergeTree/MergeTreeDataWriter.cpp | 11 +--- .../MergeTree/MergedBlockOutputStream.cpp | 55 +++++++++++++------ .../MergeTree/MergedBlockOutputStream.h | 11 ++-- .../MergeTree/ShardedPartitionUploader.cpp | 1 - 9 files changed, 54 insertions(+), 55 deletions(-) diff --git a/dbms/src/Storages/MergeTree/DataPartsExchange.cpp b/dbms/src/Storages/MergeTree/DataPartsExchange.cpp index f914da23ffc..0793d624e20 100644 --- a/dbms/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/dbms/src/Storages/MergeTree/DataPartsExchange.cpp @@ -266,7 +266,6 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPartImpl( new_data_part->modification_time = time(nullptr); new_data_part->loadColumnsChecksumsIndexes(true, false); - new_data_part->is_sharded = false; new_data_part->checksums.checkEqual(checksums, false); return new_data_part; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index e226798a236..65a3a6c1df2 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -221,8 +221,14 @@ void MergeTreeData::initPartitionKey() partition_expr_ast = parseQuery( parser, partition_expr_str.data(), partition_expr_str.data() + partition_expr_str.length(), "partition expression"); partition_expr = ExpressionAnalyzer(partition_expr_ast, context, nullptr, getColumnsList()).getActions(false); + partition_expr_columns.clear(); + partition_expr_column_types.clear(); for (const ASTPtr & ast : partition_expr_ast->children) - partition_expr_columns.emplace_back(ast->getColumnName()); + { + String col_name = ast->getColumnName(); + partition_expr_columns.emplace_back(col_name); + partition_expr_column_types.emplace_back(partition_expr->getSampleBlock().getByName(col_name).type); + } const NamesAndTypesList & minmax_idx_columns_with_types = partition_expr->getRequiredColumnsWithTypes(); minmax_idx_expr = std::make_shared(minmax_idx_columns_with_types, context.getSettingsRef()); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index fba39411f3a..4ea4446f6f1 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -485,6 +485,7 @@ public: ASTPtr partition_expr_ast; ExpressionActionsPtr partition_expr; Names partition_expr_columns; + DataTypes partition_expr_column_types; ExpressionActionsPtr minmax_idx_expr; Names minmax_idx_columns; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp index 261a06acea1..510aecdfa37 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp @@ -739,22 +739,15 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMerger::mergePartsToTemporaryPart << merge_entry->bytes_read_uncompressed / 1000000.0 / elapsed_seconds << " MB/sec."); } - new_data_part->columns = all_columns; if (merge_alg != MergeAlgorithm::Vertical) - new_data_part->checksums = to.writeSuffixAndGetChecksums(); + to.writeSuffixAndFinalizePart(new_data_part); else - new_data_part->checksums = to.writeSuffixAndGetChecksums(all_columns, &checksums_gathered_columns); - new_data_part->index.swap(to.getIndex()); + to.writeSuffixAndFinalizePart(new_data_part, &all_columns, &checksums_gathered_columns); /// For convenience, even CollapsingSortedBlockInputStream can not return zero rows. if (0 == to.marksCount()) throw Exception("Empty part after merge", ErrorCodes::LOGICAL_ERROR); - new_data_part->size = to.marksCount(); - new_data_part->modification_time = time(nullptr); - new_data_part->size_in_bytes = MergeTreeData::DataPart::calcTotalSize(new_part_tmp_path); - new_data_part->is_sharded = false; - return new_data_part; } @@ -1056,14 +1049,7 @@ MergeTreeData::PerShardDataParts MergeTreeDataMerger::reshardPartition( } MergeTreeData::MutableDataPartPtr & data_part = per_shard_data_parts.at(shard_no); - - data_part->columns = column_names_and_types; - data_part->checksums = output_stream->writeSuffixAndGetChecksums(); - data_part->index.swap(output_stream->getIndex()); - data_part->size = output_stream->marksCount(); - data_part->modification_time = time(nullptr); - data_part->size_in_bytes = MergeTreeData::DataPart::calcTotalSize(output_stream->getPartPath()); - data_part->is_sharded = true; + output_stream->writeSuffixAndFinalizePart(data_part); data_part->shard_no = shard_no; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h index f3172e0c35e..e6814b8a11d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h @@ -158,7 +158,6 @@ struct MergeTreeDataPart bool is_temp = false; /// For resharding. - bool is_sharded = false; size_t shard_no = 0; /// Primary key (correspond to primary.idx file). diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index d210744d7a5..5538eb22451 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -124,8 +124,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa { Block & block = block_with_partition.block; - size_t part_size = (block.rows() + data.index_granularity - 1) / data.index_granularity; - static const String TMP_PREFIX = "tmp_insert_"; /// This will generate unique name in scope of current server process. @@ -205,14 +203,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa out.writePrefix(); out.writeWithPermutation(block, perm_ptr); - MergeTreeData::DataPart::Checksums checksums = out.writeSuffixAndGetChecksums(); - - new_data_part->size = part_size; - new_data_part->modification_time = time(nullptr); - new_data_part->columns = columns; - new_data_part->checksums = checksums; - new_data_part->index.swap(out.getIndex()); - new_data_part->size_in_bytes = MergeTreeData::DataPart::calcTotalSize(new_data_part->getFullPath()); + out.writeSuffixAndFinalizePart(new_data_part); ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterRows, block.rows()); ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterUncompressedBytes, block.bytes()); diff --git a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 062dfe4ab33..b264ed4caae 100644 --- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -385,10 +385,14 @@ void MergedBlockOutputStream::writeSuffix() throw Exception("Method writeSuffix is not supported by MergedBlockOutputStream", ErrorCodes::NOT_IMPLEMENTED); } -MergeTreeData::DataPart::Checksums MergedBlockOutputStream::writeSuffixAndGetChecksums( - const NamesAndTypesList & total_column_list, - MergeTreeData::DataPart::Checksums * additional_column_checksums) +void MergedBlockOutputStream::writeSuffixAndFinalizePart( + MergeTreeData::MutableDataPartPtr & new_part, + const NamesAndTypesList * total_column_list, + MergeTreeData::DataPart::Checksums * additional_column_checksums) { + if (!total_column_list) + total_column_list = &columns_list; + /// Finish write and get checksums. MergeTreeData::DataPart::Checksums checksums; @@ -415,14 +419,36 @@ MergeTreeData::DataPart::Checksums MergedBlockOutputStream::writeSuffixAndGetChe { /// A part is empty - all records are deleted. Poco::File(part_path).remove(true); - checksums.files.clear(); - return checksums; + return; + } + + if (!storage.partition_expr_columns.empty()) + { + WriteBufferFromFile out(part_path + "partition.dat"); + HashingWriteBuffer out_hashing(out); + for (size_t i = 0; i < new_part->partition.size(); ++i) + storage.partition_expr_column_types[i]->serializeBinary(new_part->partition[i], out_hashing); + checksums.files["partition.dat"].file_size = out_hashing.count(); + checksums.files["partition.dat"].file_hash = out_hashing.getHash(); + } + + for (size_t i = 0; i < storage.minmax_idx_columns.size(); ++i) + { + String file_name = "minmax_" + escapeForFileName(storage.minmax_idx_columns[i]) + ".idx"; + const DataTypePtr & type = storage.minmax_idx_column_types[i]; + + WriteBufferFromFile out(part_path + file_name); + HashingWriteBuffer out_hashing(out); + type->serializeBinary(new_part->minmax_idx.min_column_values[i], out_hashing); + type->serializeBinary(new_part->minmax_idx.max_column_values[i], out_hashing); + checksums.files[file_name].file_size = out_hashing.count(); + checksums.files[file_name].file_hash = out_hashing.getHash(); } { /// Write a file with a description of columns. WriteBufferFromFile out(part_path + "columns.txt", 4096); - total_column_list.writeText(out); + total_column_list->writeText(out); } { @@ -431,17 +457,12 @@ MergeTreeData::DataPart::Checksums MergedBlockOutputStream::writeSuffixAndGetChe checksums.write(out); } - return checksums; -} - -MergeTreeData::DataPart::Checksums MergedBlockOutputStream::writeSuffixAndGetChecksums() -{ - return writeSuffixAndGetChecksums(columns_list, nullptr); -} - -MergeTreeData::DataPart::Index & MergedBlockOutputStream::getIndex() -{ - return index_columns; + new_part->size = marks_count; + new_part->modification_time = time(nullptr); + new_part->columns = *total_column_list; + new_part->index.swap(index_columns); + new_part->checksums = checksums; + new_part->size_in_bytes = MergeTreeData::DataPart::calcTotalSize(new_part->getFullPath()); } size_t MergedBlockOutputStream::marksCount() diff --git a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h index f8081d0aa17..b0da59b96d0 100644 --- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -122,13 +122,10 @@ public: void writeSuffix() override; - MergeTreeData::DataPart::Checksums writeSuffixAndGetChecksums( - const NamesAndTypesList & total_column_list, - MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr); - - MergeTreeData::DataPart::Checksums writeSuffixAndGetChecksums(); - - MergeTreeData::DataPart::Index & getIndex(); + void writeSuffixAndFinalizePart( + MergeTreeData::MutableDataPartPtr & new_part, + const NamesAndTypesList * total_columns_list = nullptr, + MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr); /// How many marks are already written. size_t marksCount(); diff --git a/dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp b/dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp index 73f0da0912d..82385a45c71 100644 --- a/dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp +++ b/dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp @@ -105,7 +105,6 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & body data_part->modification_time = time(nullptr); data_part->loadColumnsChecksumsIndexes(true, false); - data_part->is_sharded = false; data_part->checksums.checkEqual(checksums, false); /// Now store permanently the received part. From 69069219321ff1015a5f3ccec48efc7d066f8f71 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Wed, 30 Aug 2017 23:23:29 +0300 Subject: [PATCH 10/63] load partition and minmax index from disk [#CLICKHOUSE-3000] --- dbms/src/IO/BufferWithOwnMemory.h | 4 +- dbms/src/IO/ReadBufferFromFile.h | 2 + dbms/src/IO/ReadBufferFromFileBase.h | 1 + dbms/src/IO/ReadBufferFromFileDescriptor.h | 2 + .../Storages/MergeTree/MergeTreeDataPart.cpp | 77 +++++++++++++++---- 5 files changed, 70 insertions(+), 16 deletions(-) diff --git a/dbms/src/IO/BufferWithOwnMemory.h b/dbms/src/IO/BufferWithOwnMemory.h index 43040efd042..d0874feecb1 100644 --- a/dbms/src/IO/BufferWithOwnMemory.h +++ b/dbms/src/IO/BufferWithOwnMemory.h @@ -44,12 +44,12 @@ struct Memory : boost::noncopyable, Allocator dealloc(); } - Memory(Memory && rhs) + Memory(Memory && rhs) noexcept { *this = std::move(rhs); } - Memory & operator=(Memory && rhs) + Memory & operator=(Memory && rhs) noexcept { std::swap(m_capacity, rhs.m_capacity); std::swap(m_size, rhs.m_size); diff --git a/dbms/src/IO/ReadBufferFromFile.h b/dbms/src/IO/ReadBufferFromFile.h index 90f7d293351..087e612c6ee 100644 --- a/dbms/src/IO/ReadBufferFromFile.h +++ b/dbms/src/IO/ReadBufferFromFile.h @@ -32,6 +32,8 @@ public: ReadBufferFromFile(int fd, const std::string & original_file_name = {}, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1, char * existing_memory = nullptr, size_t alignment = 0); + ReadBufferFromFile(ReadBufferFromFile &&) = default; + ~ReadBufferFromFile() override; /// Close file before destruction of object. diff --git a/dbms/src/IO/ReadBufferFromFileBase.h b/dbms/src/IO/ReadBufferFromFileBase.h index 1f4952c8c89..dbbc667edff 100644 --- a/dbms/src/IO/ReadBufferFromFileBase.h +++ b/dbms/src/IO/ReadBufferFromFileBase.h @@ -18,6 +18,7 @@ class ReadBufferFromFileBase : public BufferWithOwnMemory { public: ReadBufferFromFileBase(size_t buf_size, char * existing_memory, size_t alignment); + ReadBufferFromFileBase(ReadBufferFromFileBase &&) = default; virtual ~ReadBufferFromFileBase(); off_t seek(off_t off, int whence = SEEK_SET); virtual off_t getPositionInFile() = 0; diff --git a/dbms/src/IO/ReadBufferFromFileDescriptor.h b/dbms/src/IO/ReadBufferFromFileDescriptor.h index e5bd3e05142..1331878912e 100644 --- a/dbms/src/IO/ReadBufferFromFileDescriptor.h +++ b/dbms/src/IO/ReadBufferFromFileDescriptor.h @@ -26,6 +26,8 @@ public: ReadBufferFromFileDescriptor(int fd_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0) : ReadBufferFromFileBase(buf_size, existing_memory, alignment), fd(fd_), pos_in_file(0) {} + ReadBufferFromFileDescriptor(ReadBufferFromFileDescriptor &&) = default; + int getFD() const override { return fd; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index 333b43b73fd..ff8f9f4b2f4 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -614,6 +614,12 @@ void MergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checksu } +static ReadBufferFromFile openForReading(const String & path) +{ + return ReadBufferFromFile(path, std::min(static_cast(DBMS_DEFAULT_BUFFER_SIZE), Poco::File(path).getSize())); +} + + void MergeTreeDataPart::loadIndex() { /// Size - in number of marks. @@ -640,8 +646,7 @@ void MergeTreeDataPart::loadIndex() } String index_path = getFullPath() + "primary.idx"; - ReadBufferFromFile index_file(index_path, - std::min(static_cast(DBMS_DEFAULT_BUFFER_SIZE), Poco::File(index_path).getSize())); + ReadBufferFromFile index_file = openForReading(index_path); for (size_t i = 0; i < size; ++i) for (size_t j = 0; j < key_size; ++j) @@ -676,7 +681,28 @@ void MergeTreeDataPart::loadPartitionAndMinMaxIndex() minmax_idx.initialized = true; } else - throw Exception("TODO", ErrorCodes::LOGICAL_ERROR); + { + if (!storage.partition_expr_column_types.empty()) + { + ReadBufferFromFile file = openForReading(getFullPath() + "partition.dat"); + partition.resize(storage.partition_expr_column_types.size()); + for (size_t i = 0; i < storage.partition_expr_column_types.size(); ++i) + storage.partition_expr_column_types[i]->deserializeBinary(partition[i], file); + } + + size_t minmax_idx_size = storage.minmax_idx_column_types.size(); + minmax_idx.min_column_values.resize(minmax_idx_size); + minmax_idx.max_column_values.resize(minmax_idx_size); + for (size_t i = 0; i < minmax_idx_size; ++i) + { + String file_name = getFullPath() + "minmax_" + escapeForFileName(storage.minmax_idx_columns[i]) + ".idx"; + ReadBufferFromFile file = openForReading(file_name); + const DataTypePtr & type = storage.minmax_idx_column_types[i]; + type->deserializeBinary(minmax_idx.min_column_values[i], file); + type->deserializeBinary(minmax_idx.max_column_values[i], file); + } + minmax_idx.initialized = true; + } } void MergeTreeDataPart::loadChecksums(bool require) @@ -689,7 +715,7 @@ void MergeTreeDataPart::loadChecksums(bool require) return; } - ReadBufferFromFile file(path, std::min(static_cast(DBMS_DEFAULT_BUFFER_SIZE), Poco::File(path).getSize())); + ReadBufferFromFile file = openForReading(path); if (checksums.read(file)) assertEOF(file); } @@ -727,7 +753,7 @@ void MergeTreeDataPart::loadColumns(bool require) return; } - ReadBufferFromFile file(path, std::min(static_cast(DBMS_DEFAULT_BUFFER_SIZE), Poco::File(path).getSize())); + ReadBufferFromFile file = openForReading(path); columns.readText(file); } @@ -751,22 +777,45 @@ void MergeTreeDataPart::checkConsistency(bool require_part_metadata) } } + if (storage.format_version > 0) + { + if (!storage.partition_expr_columns.empty() && !checksums.files.count("partition.dat")) + throw Exception("No checksum for partition.dat", ErrorCodes::NO_FILE_IN_DATA_PART); + + for (const String & col_name : storage.minmax_idx_columns) + { + if (!checksums.files.count("minmax_" + escapeForFileName(col_name) + ".idx")) + throw Exception("No minmax idx file checksum for column " + col_name, ErrorCodes::NO_FILE_IN_DATA_PART); + } + } + checksums.checkSizes(path); } else { - if (!storage.sort_descr.empty()) + auto check_file_not_empty = [&path](const String & file_path) { - /// Check that the primary key is not empty. - Poco::File index_file(path + "primary.idx"); + Poco::File file(file_path); + if (!file.exists() || file.getSize() == 0) + throw Exception("Part " + path + " is broken: " + file_path + " is empty", ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART); + return file.getSize(); + }; - if (!index_file.exists() || index_file.getSize() == 0) - throw Exception("Part " + path + " is broken: primary key is empty.", ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART); + /// Check that the primary key index is not empty. + if (!storage.sort_descr.empty()) + check_file_not_empty(path + "primary.idx"); + + if (storage.format_version > 0) + { + if (!storage.partition_expr_columns.empty()) + check_file_not_empty(path + "partition.dat"); + + for (const String & col_name : storage.minmax_idx_columns) + check_file_not_empty(path + "minmax_" + escapeForFileName(col_name) + ".idx"); } /// Check that all marks are nonempty and have the same size. - - auto check_marks = [](const std::string & path, const NamesAndTypesList & columns, const std::string & extension) + auto check_marks = [&path](const NamesAndTypesList & columns, const std::string & extension) { ssize_t marks_size = -1; for (const NameAndTypePair & it : columns) @@ -794,8 +843,8 @@ void MergeTreeDataPart::checkConsistency(bool require_part_metadata) } }; - check_marks(path, columns, ".mrk"); - check_marks(path, columns, ".null.mrk"); + check_marks(columns, ".mrk"); + check_marks(columns, ".null.mrk"); } } From 71ebf67f2e0d759a5a1ef32aa78919ba83d49a32 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Thu, 31 Aug 2017 18:40:34 +0300 Subject: [PATCH 11/63] encapsulate partition and minmax_idx load/store code [#CLICKHOUSE-3000] --- .../Storages/MergeTree/MergeTreeDataMerger.h | 2 +- .../Storages/MergeTree/MergeTreeDataPart.cpp | 130 +++++++++++------- .../Storages/MergeTree/MergeTreeDataPart.h | 65 ++++++--- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +- .../MergeTree/MergeTreeDataWriter.cpp | 8 +- .../MergeTree/MergedBlockOutputStream.cpp | 24 +--- 6 files changed, 140 insertions(+), 91 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.h b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.h index e71be37eaa5..65c9974b443 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.h @@ -27,7 +27,7 @@ public: MergeTreePartInfo part_info; MergeTreeData::DataPartsVector parts; - const Row & getPartition() const { return parts.front()->partition; } + const MergeTreeDataPart::Partition & getPartition() const { return parts.front()->partition; } FuturePart() = default; explicit FuturePart(MergeTreeData::DataPartsVector parts_) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index ff8f9f4b2f4..30064d457de 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -285,12 +286,75 @@ const MergeTreeDataPartChecksums::Checksum * MergeTreeDataPart::tryGetBinChecksu } -void MinMaxIndex::update(const Block & block, const Names & column_names) +static ReadBufferFromFile openForReading(const String & path) +{ + return ReadBufferFromFile(path, std::min(static_cast(DBMS_DEFAULT_BUFFER_SIZE), Poco::File(path).getSize())); +} + + +void MergeTreeDataPart::Partition::load(const MergeTreeData & storage, const String & part_path) +{ + if (storage.partition_expr_column_types.empty()) + return; + + ReadBufferFromFile file = openForReading(part_path + "partition.dat"); + value.resize(storage.partition_expr_column_types.size()); + for (size_t i = 0; i < storage.partition_expr_column_types.size(); ++i) + storage.partition_expr_column_types[i]->deserializeBinary(value[i], file); +} + +void MergeTreeDataPart::Partition::store(const MergeTreeData & storage, const String & part_path, Checksums & checksums) const +{ + if (storage.partition_expr_columns.empty()) + return; + + WriteBufferFromFile out(part_path + "partition.dat"); + HashingWriteBuffer out_hashing(out); + for (size_t i = 0; i < value.size(); ++i) + storage.partition_expr_column_types[i]->serializeBinary(value[i], out_hashing); + checksums.files["partition.dat"].file_size = out_hashing.count(); + checksums.files["partition.dat"].file_hash = out_hashing.getHash(); +} + + +void MergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & storage, const String & part_path) +{ + size_t minmax_idx_size = storage.minmax_idx_column_types.size(); + min_values.resize(minmax_idx_size); + max_values.resize(minmax_idx_size); + for (size_t i = 0; i < minmax_idx_size; ++i) + { + String file_name = part_path + "minmax_" + escapeForFileName(storage.minmax_idx_columns[i]) + ".idx"; + ReadBufferFromFile file = openForReading(file_name); + const DataTypePtr & type = storage.minmax_idx_column_types[i]; + type->deserializeBinary(min_values[i], file); + type->deserializeBinary(max_values[i], file); + } + initialized = true; +} + +void MergeTreeDataPart::MinMaxIndex::store(const MergeTreeData & storage, const String & part_path, Checksums & checksums) const +{ + for (size_t i = 0; i < storage.minmax_idx_columns.size(); ++i) + { + String file_name = "minmax_" + escapeForFileName(storage.minmax_idx_columns[i]) + ".idx"; + const DataTypePtr & type = storage.minmax_idx_column_types[i]; + + WriteBufferFromFile out(part_path + file_name); + HashingWriteBuffer out_hashing(out); + type->serializeBinary(min_values[i], out_hashing); + type->serializeBinary(max_values[i], out_hashing); + checksums.files[file_name].file_size = out_hashing.count(); + checksums.files[file_name].file_hash = out_hashing.getHash(); + } +} + +void MergeTreeDataPart::MinMaxIndex::update(const Block & block, const Names & column_names) { if (!initialized) { - min_column_values.resize(column_names.size()); - max_column_values.resize(column_names.size()); + min_values.resize(column_names.size()); + max_values.resize(column_names.size()); } for (size_t i = 0; i < column_names.size(); ++i) @@ -302,36 +366,36 @@ void MinMaxIndex::update(const Block & block, const Names & column_names) if (!initialized) { - min_column_values[i] = Field(min_value); - max_column_values[i] = Field(max_value); + min_values[i] = Field(min_value); + max_values[i] = Field(max_value); } else { - min_column_values[i] = std::min(min_column_values[i], min_value); - max_column_values[i] = std::max(max_column_values[i], max_value); + min_values[i] = std::min(min_values[i], min_value); + max_values[i] = std::max(max_values[i], max_value); } } initialized = true; } -void MinMaxIndex::merge(const MinMaxIndex & other) +void MergeTreeDataPart::MinMaxIndex::merge(const MinMaxIndex & other) { if (!other.initialized) return; if (!initialized) { - min_column_values.assign(other.min_column_values); - max_column_values.assign(other.max_column_values); + min_values.assign(other.min_values); + max_values.assign(other.max_values); initialized = true; } else { - for (size_t i = 0; i < min_column_values.size(); ++i) + for (size_t i = 0; i < min_values.size(); ++i) { - min_column_values[i] = std::min(min_column_values[i], other.min_column_values[i]); - max_column_values[i] = std::max(max_column_values[i], other.max_column_values[i]); + min_values[i] = std::min(min_values[i], other.min_values[i]); + max_values[i] = std::max(max_values[i], other.max_values[i]); } } } @@ -446,7 +510,7 @@ String MergeTreeDataPart::getNameWithPrefix() const DayNum_t MergeTreeDataPart::getMinDate() const { if (storage.minmax_idx_date_column_pos != -1) - return DayNum_t(minmax_idx.min_column_values[storage.minmax_idx_date_column_pos].get()); + return DayNum_t(minmax_idx.min_values[storage.minmax_idx_date_column_pos].get()); else return DayNum_t(); } @@ -455,7 +519,7 @@ DayNum_t MergeTreeDataPart::getMinDate() const DayNum_t MergeTreeDataPart::getMaxDate() const { if (storage.minmax_idx_date_column_pos != -1) - return DayNum_t(minmax_idx.max_column_values[storage.minmax_idx_date_column_pos].get()); + return DayNum_t(minmax_idx.max_values[storage.minmax_idx_date_column_pos].get()); else return DayNum_t(); } @@ -614,12 +678,6 @@ void MergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checksu } -static ReadBufferFromFile openForReading(const String & path) -{ - return ReadBufferFromFile(path, std::min(static_cast(DBMS_DEFAULT_BUFFER_SIZE), Poco::File(path).getSize())); -} - - void MergeTreeDataPart::loadIndex() { /// Size - in number of marks. @@ -674,34 +732,14 @@ void MergeTreeDataPart::loadPartitionAndMinMaxIndex() MergeTreePartInfo::parseMinMaxDatesFromPartName(name, min_date, max_date); const auto & date_lut = DateLUT::instance(); - partition = Row(1, static_cast(date_lut.toNumYYYYMM(min_date))); - - minmax_idx.min_column_values = Row(1, static_cast(min_date)); - minmax_idx.max_column_values = Row(1, static_cast(max_date)); - minmax_idx.initialized = true; + partition = Partition(date_lut.toNumYYYYMM(min_date)); + minmax_idx = MinMaxIndex(min_date, max_date); } else { - if (!storage.partition_expr_column_types.empty()) - { - ReadBufferFromFile file = openForReading(getFullPath() + "partition.dat"); - partition.resize(storage.partition_expr_column_types.size()); - for (size_t i = 0; i < storage.partition_expr_column_types.size(); ++i) - storage.partition_expr_column_types[i]->deserializeBinary(partition[i], file); - } - - size_t minmax_idx_size = storage.minmax_idx_column_types.size(); - minmax_idx.min_column_values.resize(minmax_idx_size); - minmax_idx.max_column_values.resize(minmax_idx_size); - for (size_t i = 0; i < minmax_idx_size; ++i) - { - String file_name = getFullPath() + "minmax_" + escapeForFileName(storage.minmax_idx_columns[i]) + ".idx"; - ReadBufferFromFile file = openForReading(file_name); - const DataTypePtr & type = storage.minmax_idx_column_types[i]; - type->deserializeBinary(minmax_idx.min_column_values[i], file); - type->deserializeBinary(minmax_idx.max_column_values[i], file); - } - minmax_idx.initialized = true; + String full_path = getFullPath(); + partition.load(storage, full_path); + minmax_idx.load(storage, full_path); } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h index e6814b8a11d..c705023cf0a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h @@ -80,21 +80,6 @@ struct MergeTreeDataPartChecksums }; -/// Index that for each part stores min and max values of a set of columns. This allows quickly excluding -/// parts based on conditions on these columns imposed by a query. -/// Currently this index is built using only columns required by partition expression, but in principle it -/// can be built using any set of columns. -struct MinMaxIndex -{ - void update(const Block & block, const Names & column_names); - void merge(const MinMaxIndex & other); - - bool initialized = false; - Row min_column_values; - Row max_column_values; -}; - - class MergeTreeData; @@ -142,8 +127,6 @@ struct MergeTreeDataPart String name; MergeTreePartInfo info; - Row partition; - /// A directory path (realative to storage's path) where part data is actually stored /// Examples: 'detached/tmp_fetch_', 'tmp_', '' mutable String relative_path; @@ -166,6 +149,54 @@ struct MergeTreeDataPart using Index = Columns; Index index; + struct Partition + { + Row value; + + public: + Partition() = default; + explicit Partition(Row value_) : value(std::move(value_)) {} + + /// For month-based partitioning. + explicit Partition(UInt32 yyyymm) : value(1, static_cast(yyyymm)) {} + + void load(const MergeTreeData & storage, const String & part_path); + void store(const MergeTreeData & storage, const String & part_path, Checksums & checksums) const; + + void assign(const Partition & other) { value.assign(other.value); } + + }; + + Partition partition; + + /// Index that for each part stores min and max values of a set of columns. This allows quickly excluding + /// parts based on conditions on these columns imposed by a query. + /// Currently this index is built using only columns required by partition expression, but in principle it + /// can be built using any set of columns. + struct MinMaxIndex + { + Row min_values; + Row max_values; + bool initialized = false; + + public: + MinMaxIndex() = default; + + /// For month-based partitioning. + MinMaxIndex(DayNum_t min_date, DayNum_t max_date) + : min_values(1, static_cast(min_date)) + , max_values(1, static_cast(max_date)) + , initialized(true) + { + } + + void load(const MergeTreeData & storage, const String & part_path); + void store(const MergeTreeData & storage, const String & part_path, Checksums & checksums) const; + + void update(const Block & block, const Names & column_names); + void merge(const MinMaxIndex & other); + }; + MinMaxIndex minmax_idx; Checksums checksums; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 04f912a5372..ef2324cd853 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -234,7 +234,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::read( if (!minmax_idx_condition.mayBeTrueInRange( data.minmax_idx_columns.size(), - &part->minmax_idx.min_column_values[0], &part->minmax_idx.max_column_values[0], + &part->minmax_idx.min_values[0], &part->minmax_idx.max_values[0], data.minmax_idx_column_types)) continue; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 5538eb22451..1d4853450b5 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -129,7 +129,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa /// This will generate unique name in scope of current server process. Int64 temp_index = data.insert_increment.get(); - MinMaxIndex minmax_idx; + MergeTreeDataPart::MinMaxIndex minmax_idx; minmax_idx.update(block, data.minmax_idx_columns); String new_partition_id = data.getPartitionIDFromData(block_with_partition.partition); @@ -138,8 +138,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa String part_name; if (data.format_version == 0) { - DayNum_t min_date(minmax_idx.min_column_values[data.minmax_idx_date_column_pos].get()); - DayNum_t max_date(minmax_idx.max_column_values[data.minmax_idx_date_column_pos].get()); + DayNum_t min_date(minmax_idx.min_values[data.minmax_idx_date_column_pos].get()); + DayNum_t max_date(minmax_idx.max_values[data.minmax_idx_date_column_pos].get()); const auto & date_lut = DateLUT::instance(); @@ -155,7 +155,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa part_name = new_part_info.getPartName(); MergeTreeData::MutableDataPartPtr new_data_part = std::make_shared(data, part_name, new_part_info); - new_data_part->partition = std::move(block_with_partition.partition); + new_data_part->partition = MergeTreeDataPart::Partition(std::move(block_with_partition.partition)); new_data_part->minmax_idx = std::move(minmax_idx); new_data_part->relative_path = TMP_PREFIX + part_name; new_data_part->is_temp = true; diff --git a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp index b264ed4caae..aadc9bb712e 100644 --- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -422,28 +422,8 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart( return; } - if (!storage.partition_expr_columns.empty()) - { - WriteBufferFromFile out(part_path + "partition.dat"); - HashingWriteBuffer out_hashing(out); - for (size_t i = 0; i < new_part->partition.size(); ++i) - storage.partition_expr_column_types[i]->serializeBinary(new_part->partition[i], out_hashing); - checksums.files["partition.dat"].file_size = out_hashing.count(); - checksums.files["partition.dat"].file_hash = out_hashing.getHash(); - } - - for (size_t i = 0; i < storage.minmax_idx_columns.size(); ++i) - { - String file_name = "minmax_" + escapeForFileName(storage.minmax_idx_columns[i]) + ".idx"; - const DataTypePtr & type = storage.minmax_idx_column_types[i]; - - WriteBufferFromFile out(part_path + file_name); - HashingWriteBuffer out_hashing(out); - type->serializeBinary(new_part->minmax_idx.min_column_values[i], out_hashing); - type->serializeBinary(new_part->minmax_idx.max_column_values[i], out_hashing); - checksums.files[file_name].file_size = out_hashing.count(); - checksums.files[file_name].file_hash = out_hashing.getHash(); - } + new_part->partition.store(storage, part_path, checksums); + new_part->minmax_idx.store(storage, part_path, checksums); { /// Write a file with a description of columns. From 25bb444d5ac4447fbe827605bc025cd83bd2a454 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Fri, 1 Sep 2017 23:33:17 +0300 Subject: [PATCH 12/63] absence of partitioning works [#CLICKHOUSE-3000] --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 44 +++++++++++++------ dbms/src/Storages/MergeTree/MergeTreeData.h | 3 +- .../Storages/MergeTree/MergeTreeDataPart.cpp | 8 ++-- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 25 +++++++---- .../MergeTree/MergeTreeDataWriter.cpp | 2 +- 5 files changed, 52 insertions(+), 30 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 65a3a6c1df2..501e8bf6ea2 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -126,7 +126,15 @@ MergeTreeData::MergeTreeData( throw Exception("Primary key could be empty only for UnsortedMergeTree", ErrorCodes::BAD_ARGUMENTS); initPrimaryKey(); - initPartitionKey(); + + ASTPtr partition_expr_ast; + { + String partition_expr_str = "toYYYYMM(" + date_column_name + ")"; + ParserNotEmptyExpressionList parser(/* allow_alias_without_as_keyword = */ false); + partition_expr_ast = parseQuery( + parser, partition_expr_str.data(), partition_expr_str.data() + partition_expr_str.length(), "partition expression"); + } + initPartitionKey(partition_expr_ast); /// Creating directories, if not exist. Poco::File(full_path).createDirectories(); @@ -214,27 +222,30 @@ void MergeTreeData::initPrimaryKey() } -void MergeTreeData::initPartitionKey() +void MergeTreeData::initPartitionKey(const ASTPtr & partition_expr_ast) { - String partition_expr_str = "toYYYYMM(" + date_column_name + ")"; - ParserNotEmptyExpressionList parser(/* allow_alias_without_as_keyword = */ false); - partition_expr_ast = parseQuery( - parser, partition_expr_str.data(), partition_expr_str.data() + partition_expr_str.length(), "partition expression"); + if (!partition_expr_ast || partition_expr_ast->children.empty()) + return; + partition_expr = ExpressionAnalyzer(partition_expr_ast, context, nullptr, getColumnsList()).getActions(false); - partition_expr_columns.clear(); - partition_expr_column_types.clear(); for (const ASTPtr & ast : partition_expr_ast->children) { String col_name = ast->getColumnName(); partition_expr_columns.emplace_back(col_name); - partition_expr_column_types.emplace_back(partition_expr->getSampleBlock().getByName(col_name).type); + + const ColumnWithTypeAndName & element = partition_expr->getSampleBlock().getByName(col_name); + + if (element.column && element.column->isConst()) + throw Exception("Partition key cannot contain constants", ErrorCodes::ILLEGAL_COLUMN); + if (element.type->isNullable()) + throw Exception("Partition key cannot contain nullable columns", ErrorCodes::ILLEGAL_COLUMN); + + partition_expr_column_types.emplace_back(element.type); } + /// Add all columns used in the partition key to the min-max index. const NamesAndTypesList & minmax_idx_columns_with_types = partition_expr->getRequiredColumnsWithTypes(); minmax_idx_expr = std::make_shared(minmax_idx_columns_with_types, context.getSettingsRef()); - minmax_idx_columns.clear(); - minmax_idx_column_types.clear(); - minmax_idx_sort_descr.clear(); for (const NameAndTypePair & column : minmax_idx_columns_with_types) { minmax_idx_columns.emplace_back(column.name); @@ -242,6 +253,7 @@ void MergeTreeData::initPartitionKey() minmax_idx_sort_descr.emplace_back(column.name, 1, 1); } + /// Try to find the date column in columns used by the partition key (a common case). bool encountered_date_column = false; for (size_t i = 0; i < minmax_idx_column_types.size(); ++i) { @@ -1780,9 +1792,13 @@ String MergeTreeData::getPartitionIDFromQuery(const Field & partition) String MergeTreeData::getPartitionIDFromData(const Row & partition) { - /// Month-partitioning specific, TODO: generalize. - if (partition.size() != 1) + if (partition.size() != partition_expr_columns.size()) throw Exception("Invalid partition key size: " + toString(partition.size()), ErrorCodes::LOGICAL_ERROR); + + if (partition.empty()) + return "all"; + + /// Month-partitioning specific, TODO: generalize. if (partition[0].getType() != Field::Types::UInt64) throw Exception(String("Invalid partition key type: ") + partition[0].getTypeName(), ErrorCodes::LOGICAL_ERROR); return toString(partition[0].get()); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 4ea4446f6f1..7f537c68425 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -482,7 +482,6 @@ public: Block primary_key_sample; DataTypes primary_key_data_types; - ASTPtr partition_expr_ast; ExpressionActionsPtr partition_expr; Names partition_expr_columns; DataTypes partition_expr_column_types; @@ -552,7 +551,7 @@ private: void initPrimaryKey(); - void initPartitionKey(); + void initPartitionKey(const ASTPtr & partition_key_ast); /// Expression for column type conversion. /// If no conversions are needed, out_expression=nullptr. diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index 30064d457de..00909da14d3 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -294,7 +294,7 @@ static ReadBufferFromFile openForReading(const String & path) void MergeTreeDataPart::Partition::load(const MergeTreeData & storage, const String & part_path) { - if (storage.partition_expr_column_types.empty()) + if (!storage.partition_expr) return; ReadBufferFromFile file = openForReading(part_path + "partition.dat"); @@ -305,7 +305,7 @@ void MergeTreeDataPart::Partition::load(const MergeTreeData & storage, const Str void MergeTreeDataPart::Partition::store(const MergeTreeData & storage, const String & part_path, Checksums & checksums) const { - if (storage.partition_expr_columns.empty()) + if (!storage.partition_expr) return; WriteBufferFromFile out(part_path + "partition.dat"); @@ -817,7 +817,7 @@ void MergeTreeDataPart::checkConsistency(bool require_part_metadata) if (storage.format_version > 0) { - if (!storage.partition_expr_columns.empty() && !checksums.files.count("partition.dat")) + if (storage.partition_expr && !checksums.files.count("partition.dat")) throw Exception("No checksum for partition.dat", ErrorCodes::NO_FILE_IN_DATA_PART); for (const String & col_name : storage.minmax_idx_columns) @@ -845,7 +845,7 @@ void MergeTreeDataPart::checkConsistency(bool require_part_metadata) if (storage.format_version > 0) { - if (!storage.partition_expr_columns.empty()) + if (storage.partition_expr) check_file_not_empty(path + "partition.dat"); for (const String & col_name : storage.minmax_idx_columns) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index ef2324cd853..985202d6dd9 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -17,6 +17,7 @@ #endif #include /// For calculations related to sampling coefficients. +#include #include #include @@ -213,13 +214,18 @@ BlockInputStreams MergeTreeDataSelectExecutor::read( throw Exception(exception_message.str(), ErrorCodes::INDEX_NOT_USED); } - PKCondition minmax_idx_condition( + std::experimental::optional minmax_idx_condition; + if (data.minmax_idx_expr) + { + minmax_idx_condition.emplace( query_info, context, available_real_and_virtual_columns, data.minmax_idx_sort_descr, data.minmax_idx_expr); - if (settings.force_index_by_date && minmax_idx_condition.alwaysUnknownOrTrue()) - throw Exception("Index by date (" + data.date_column_name + ") is not used and setting 'force_index_by_date' is set.", - ErrorCodes::INDEX_NOT_USED); + if (settings.force_index_by_date && minmax_idx_condition->alwaysUnknownOrTrue()) + throw Exception( + "Index by date (" + data.date_column_name + ") is not used and setting 'force_index_by_date' is set.", + ErrorCodes::INDEX_NOT_USED); + } /// Select the parts in which there can be data that satisfy `minmax_idx_condition` and that match the condition on `_part`, /// as well as `max_block_number_to_read`. @@ -232,10 +238,10 @@ BlockInputStreams MergeTreeDataSelectExecutor::read( if (part_values.find(part->name) == part_values.end()) continue; - if (!minmax_idx_condition.mayBeTrueInRange( - data.minmax_idx_columns.size(), - &part->minmax_idx.min_values[0], &part->minmax_idx.max_values[0], - data.minmax_idx_column_types)) + if (minmax_idx_condition && !minmax_idx_condition->mayBeTrueInRange( + data.minmax_idx_columns.size(), + &part->minmax_idx.min_values[0], &part->minmax_idx.max_values[0], + data.minmax_idx_column_types)) continue; if (max_block_number_to_read && part->info.max_block > max_block_number_to_read) @@ -474,7 +480,8 @@ BlockInputStreams MergeTreeDataSelectExecutor::read( } LOG_DEBUG(log, "Key condition: " << key_condition.toString()); - LOG_DEBUG(log, "MinMax index condition: " << minmax_idx_condition.toString()); + if (minmax_idx_condition) + LOG_DEBUG(log, "MinMax index condition: " << minmax_idx_condition->toString()); /// PREWHERE ExpressionActionsPtr prewhere_actions; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 1d4853450b5..4b071357490 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -71,7 +71,7 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(const Block & block data.check(block, true); block.checkNumberOfRows(); - if (data.partition_expr_columns.empty()) /// Table is not partitioned. + if (!data.partition_expr) /// Table is not partitioned. { result.emplace_back(Block(block), Row()); return result; From f0995f274155805935be5609b6b0a9b2fb39ee72 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Tue, 5 Sep 2017 15:12:55 +0300 Subject: [PATCH 13/63] partition serialization into id and text [#CLICKHOUSE-3000] --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 21 ++---- dbms/src/Storages/MergeTree/MergeTreeData.h | 3 - .../Storages/MergeTree/MergeTreeDataPart.cpp | 71 +++++++++++++++++++ .../Storages/MergeTree/MergeTreeDataPart.h | 4 ++ .../MergeTree/MergeTreeDataWriter.cpp | 6 +- .../Storages/System/StorageSystemParts.cpp | 6 +- 6 files changed, 87 insertions(+), 24 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 501e8bf6ea2..8bdf87e6f07 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -22,17 +23,17 @@ #include #include #include -#include #include #include -#include #include #include #include #include #include #include -#include +#include + +#include #include #include @@ -1790,20 +1791,6 @@ String MergeTreeData::getPartitionIDFromQuery(const Field & partition) return partition_id; } -String MergeTreeData::getPartitionIDFromData(const Row & partition) -{ - if (partition.size() != partition_expr_columns.size()) - throw Exception("Invalid partition key size: " + toString(partition.size()), ErrorCodes::LOGICAL_ERROR); - - if (partition.empty()) - return "all"; - - /// Month-partitioning specific, TODO: generalize. - if (partition[0].getType() != Field::Types::UInt64) - throw Exception(String("Invalid partition key type: ") + partition[0].getTypeName(), ErrorCodes::LOGICAL_ERROR); - return toString(partition[0].get()); -} - void MergeTreeData::Transaction::rollback() { if (data && (!parts_to_remove_on_rollback.empty() || !parts_to_add_on_rollback.empty())) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 7f537c68425..3b653153e34 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -463,9 +463,6 @@ public: /// For ATTACH/DETACH/DROP/RESHARD PARTITION. String getPartitionIDFromQuery(const Field & partition); - /// For determining the partition id of inserted blocks. - String getPartitionIDFromData(const Row & partition); - MergeTreeDataFormatVersion format_version; Context & context; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index 00909da14d3..65907f7e5cd 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -8,12 +8,16 @@ #include #include #include +#include #include #include #include +#include +#include #include #include #include +#include #include #include @@ -292,6 +296,73 @@ static ReadBufferFromFile openForReading(const String & path) } +String MergeTreeDataPart::Partition::getID(const MergeTreeData & storage) const +{ + if (value.size() != storage.partition_expr_columns.size()) + throw Exception("Invalid partition key size: " + toString(value.size()), ErrorCodes::LOGICAL_ERROR); + + if (value.empty()) + return "all"; + + /// In case all partition fields are represented by integral types, try to produce a human-readable partition id. + /// Otherwise use a hex-encoded hash. + bool are_all_integral = true; + for (const Field & field : value) + { + if (field.getType() != Field::Types::UInt64 && field.getType() != Field::Types::Int64) + { + are_all_integral = false; + break; + } + } + + String result; + + if (are_all_integral) + { + FieldVisitorToString to_string_visitor; + for (size_t i = 0; i < value.size(); ++i) + { + if (i > 0) + result += '-'; + + if (typeid_cast(storage.partition_expr_column_types[i].get())) + result += toString(DateLUT::instance().toNumYYYYMMDD(DayNum_t(value[i].get()))); + else + result += applyVisitor(to_string_visitor, value[i]); + } + + return result; + } + + SipHash hash; + FieldVisitorHash hashing_visitor(hash); + for (const Field & field : value) + applyVisitor(hashing_visitor, field); + + char hash_data[16]; + hash.get128(hash_data); + result.resize(32); + for (size_t i = 0; i < 16; ++i) + writeHexByteLowercase(hash_data[i], &result[2 * i]); + + return result; +} + +void MergeTreeDataPart::Partition::serializeTextQuoted(const MergeTreeData & storage, WriteBuffer & out) const +{ + for (size_t i = 0; i < storage.partition_expr_column_types.size(); ++i) + { + if (i > 0) + writeCString(", ", out); + + const DataTypePtr & type = storage.partition_expr_column_types[i]; + ColumnPtr column = type->createColumn(); + column->insert(value[i]); + type->serializeTextQuoted(*column, 0, out); + } +} + void MergeTreeDataPart::Partition::load(const MergeTreeData & storage, const String & part_path) { if (!storage.partition_expr) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h index c705023cf0a..f03344c2fe6 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h @@ -160,6 +160,10 @@ struct MergeTreeDataPart /// For month-based partitioning. explicit Partition(UInt32 yyyymm) : value(1, static_cast(yyyymm)) {} + String getID(const MergeTreeData & storage) const; + + void serializeTextQuoted(const MergeTreeData & storage, WriteBuffer & out) const; + void load(const MergeTreeData & storage, const String & part_path); void store(const MergeTreeData & storage, const String & part_path, Checksums & checksums) const; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 4b071357490..296d0b7265c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -132,9 +132,9 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa MergeTreeDataPart::MinMaxIndex minmax_idx; minmax_idx.update(block, data.minmax_idx_columns); - String new_partition_id = data.getPartitionIDFromData(block_with_partition.partition); + MergeTreeDataPart::Partition partition(std::move(block_with_partition.partition)); - MergeTreePartInfo new_part_info(new_partition_id, temp_index, temp_index, 0); + MergeTreePartInfo new_part_info(partition.getID(data), temp_index, temp_index, 0); String part_name; if (data.format_version == 0) { @@ -155,7 +155,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa part_name = new_part_info.getPartName(); MergeTreeData::MutableDataPartPtr new_data_part = std::make_shared(data, part_name, new_part_info); - new_data_part->partition = MergeTreeDataPart::Partition(std::move(block_with_partition.partition)); + new_data_part->partition = std::move(partition); new_data_part->minmax_idx = std::move(minmax_idx); new_data_part->relative_path = TMP_PREFIX + part_name; new_data_part->is_temp = true; diff --git a/dbms/src/Storages/System/StorageSystemParts.cpp b/dbms/src/Storages/System/StorageSystemParts.cpp index b3835740f33..e1e568db1d1 100644 --- a/dbms/src/Storages/System/StorageSystemParts.cpp +++ b/dbms/src/Storages/System/StorageSystemParts.cpp @@ -202,7 +202,11 @@ BlockInputStreams StorageSystemParts::read( for (const MergeTreeData::DataPartPtr & part : all_parts) { size_t i = 0; - block.getByPosition(i++).column->insert(part->info.partition_id); + { + WriteBufferFromOwnString out; + part->partition.serializeTextQuoted(*data, out); + block.getByPosition(i++).column->insert(out.str()); + } block.getByPosition(i++).column->insert(part->name); block.getByPosition(i++).column->insert(static_cast(active_parts.count(part))); block.getByPosition(i++).column->insert(static_cast(part->size)); From 68cb59227725457df9074189d693232eca317d95 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Wed, 6 Sep 2017 23:34:26 +0300 Subject: [PATCH 14/63] allow arbitrary partition literals in ALTER PARTITION queries [#CLICKHOUSE-3000] --- dbms/src/Common/AutoArray.h | 2 +- dbms/src/Core/ErrorCodes.cpp | 2 +- .../Interpreters/InterpreterAlterQuery.cpp | 31 +++---- dbms/src/Interpreters/InterpreterAlterQuery.h | 14 ++-- .../Interpreters/InterpreterOptimizeQuery.cpp | 4 +- dbms/src/Parsers/ASTAlterQuery.h | 3 +- dbms/src/Parsers/ASTOptimizeQuery.h | 26 ++++-- dbms/src/Parsers/ASTPartition.cpp | 44 ++++++++++ dbms/src/Parsers/ASTPartition.h | 29 +++++++ dbms/src/Parsers/ExpressionElementParsers.cpp | 4 +- dbms/src/Parsers/ParserAlterQuery.cpp | 21 +++-- dbms/src/Parsers/ParserOptimizeQuery.cpp | 14 ++-- dbms/src/Parsers/ParserPartition.cpp | 81 ++++++++++++++++++ dbms/src/Parsers/ParserPartition.h | 17 ++++ dbms/src/Parsers/TokenIterator.h | 1 + dbms/src/Storages/IStorage.h | 14 ++-- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 83 ++++++++++++++++--- dbms/src/Storages/MergeTree/MergeTreeData.h | 4 +- .../Storages/MergeTree/MergeTreeDataPart.cpp | 16 +++- dbms/src/Storages/StorageBuffer.cpp | 8 +- dbms/src/Storages/StorageBuffer.h | 2 +- dbms/src/Storages/StorageDistributed.cpp | 5 +- dbms/src/Storages/StorageDistributed.h | 2 +- dbms/src/Storages/StorageMaterializedView.cpp | 4 +- dbms/src/Storages/StorageMaterializedView.h | 2 +- dbms/src/Storages/StorageMergeTree.cpp | 34 +++++--- dbms/src/Storages/StorageMergeTree.h | 13 ++- .../Storages/StorageReplicatedMergeTree.cpp | 51 ++++++------ .../src/Storages/StorageReplicatedMergeTree.h | 14 ++-- dbms/src/Storages/StorageTrivialBuffer.cpp | 4 +- dbms/src/Storages/StorageTrivialBuffer.h | 2 +- 31 files changed, 404 insertions(+), 147 deletions(-) create mode 100644 dbms/src/Parsers/ASTPartition.cpp create mode 100644 dbms/src/Parsers/ASTPartition.h create mode 100644 dbms/src/Parsers/ParserPartition.cpp create mode 100644 dbms/src/Parsers/ParserPartition.h diff --git a/dbms/src/Common/AutoArray.h b/dbms/src/Common/AutoArray.h index 6f8d67539f3..a972559543f 100644 --- a/dbms/src/Common/AutoArray.h +++ b/dbms/src/Common/AutoArray.h @@ -48,7 +48,7 @@ public: setEmpty(); } - AutoArray(size_t size_) + explicit AutoArray(size_t size_) { init(size_, false); } diff --git a/dbms/src/Core/ErrorCodes.cpp b/dbms/src/Core/ErrorCodes.cpp index 8fbb1956941..6afa1361a5a 100644 --- a/dbms/src/Core/ErrorCodes.cpp +++ b/dbms/src/Core/ErrorCodes.cpp @@ -253,7 +253,7 @@ namespace ErrorCodes extern const int INVALID_NESTED_NAME = 245; extern const int CORRUPTED_DATA = 246; extern const int INCORRECT_MARK = 247; - extern const int INVALID_PARTITION_NAME = 248; + extern const int INVALID_PARTITION_VALUE = 248; extern const int NOT_ENOUGH_BLOCK_NUMBERS = 250; extern const int NO_SUCH_REPLICA = 251; extern const int TOO_MUCH_PARTS = 252; diff --git a/dbms/src/Interpreters/InterpreterAlterQuery.cpp b/dbms/src/Interpreters/InterpreterAlterQuery.cpp index 2834fb4636a..378f19eddad 100644 --- a/dbms/src/Interpreters/InterpreterAlterQuery.cpp +++ b/dbms/src/Interpreters/InterpreterAlterQuery.cpp @@ -60,19 +60,19 @@ BlockIO InterpreterAlterQuery::execute() switch (command.type) { case PartitionCommand::DROP_PARTITION: - table->dropPartition(query_ptr, command.partition, command.detach, context.getSettingsRef()); + table->dropPartition(query_ptr, command.partition, command.detach, context); break; case PartitionCommand::ATTACH_PARTITION: - table->attachPartition(query_ptr, command.partition, command.part, context.getSettingsRef()); + table->attachPartition(command.partition, command.part, context); break; case PartitionCommand::FETCH_PARTITION: - table->fetchPartition(command.partition, command.from, context.getSettingsRef()); + table->fetchPartition(command.partition, command.from, context); break; case PartitionCommand::FREEZE_PARTITION: - table->freezePartition(command.partition, command.with_name, context.getSettingsRef()); + table->freezePartition(command.partition, command.with_name, context); break; case PartitionCommand::RESHARD_PARTITION: @@ -82,7 +82,7 @@ BlockIO InterpreterAlterQuery::execute() break; case PartitionCommand::CLEAR_COLUMN: - table->clearColumnInPartition(query_ptr, command.partition, command.column_name, context.getSettingsRef()); + table->clearColumnInPartition(command.partition, command.column_name, context); break; } } @@ -136,10 +136,9 @@ void InterpreterAlterQuery::parseAlter( if (!params.clear_column) throw Exception("Can't DROP COLUMN from partition. It is possible only CLEAR COLUMN in partition", ErrorCodes::BAD_ARGUMENTS); - const Field & partition = typeid_cast(*(params.partition)).value; const Field & column_name = typeid_cast(*(params.column)).name; - out_partition_commands.emplace_back(PartitionCommand::clearColumn(partition, column_name)); + out_partition_commands.emplace_back(PartitionCommand::clearColumn(params.partition, column_name)); } else { @@ -185,30 +184,22 @@ void InterpreterAlterQuery::parseAlter( } else if (params.type == ASTAlterQuery::DROP_PARTITION) { - const Field & partition = dynamic_cast(*params.partition).value; - out_partition_commands.emplace_back(PartitionCommand::dropPartition(partition, params.detach)); + out_partition_commands.emplace_back(PartitionCommand::dropPartition(params.partition, params.detach)); } else if (params.type == ASTAlterQuery::ATTACH_PARTITION) { - const Field & partition = dynamic_cast(*params.partition).value; - out_partition_commands.emplace_back(PartitionCommand::attachPartition(partition, params.part)); + out_partition_commands.emplace_back(PartitionCommand::attachPartition(params.partition, params.part)); } else if (params.type == ASTAlterQuery::FETCH_PARTITION) { - const Field & partition = dynamic_cast(*params.partition).value; - out_partition_commands.emplace_back(PartitionCommand::fetchPartition(partition, params.from)); + out_partition_commands.emplace_back(PartitionCommand::fetchPartition(params.partition, params.from)); } else if (params.type == ASTAlterQuery::FREEZE_PARTITION) { - const Field & partition = dynamic_cast(*params.partition).value; - out_partition_commands.emplace_back(PartitionCommand::freezePartition(partition, params.with_name)); + out_partition_commands.emplace_back(PartitionCommand::freezePartition(params.partition, params.with_name)); } else if (params.type == ASTAlterQuery::RESHARD_PARTITION) { - Field partition; - if (params.partition) - partition = dynamic_cast(*params.partition).value; - WeightedZooKeeperPaths weighted_zookeeper_paths; const ASTs & ast_weighted_zookeeper_paths = typeid_cast(*params.weighted_zookeeper_paths).children; @@ -223,7 +214,7 @@ void InterpreterAlterQuery::parseAlter( coordinator = dynamic_cast(*params.coordinator).value; out_partition_commands.emplace_back(PartitionCommand::reshardPartitions( - partition, weighted_zookeeper_paths, params.sharding_key_expr, + params.partition, weighted_zookeeper_paths, params.sharding_key_expr, params.do_copy, coordinator)); } else diff --git a/dbms/src/Interpreters/InterpreterAlterQuery.h b/dbms/src/Interpreters/InterpreterAlterQuery.h index 4fd718cd781..438554692af 100644 --- a/dbms/src/Interpreters/InterpreterAlterQuery.h +++ b/dbms/src/Interpreters/InterpreterAlterQuery.h @@ -35,7 +35,7 @@ private: Type type; - Field partition; + ASTPtr partition; Field column_name; bool detach = false; /// true for DETACH PARTITION. @@ -52,7 +52,7 @@ private: /// For FREEZE PARTITION String with_name; - static PartitionCommand dropPartition(const Field & partition, bool detach) + static PartitionCommand dropPartition(const ASTPtr & partition, bool detach) { PartitionCommand res; res.type = DROP_PARTITION; @@ -61,7 +61,7 @@ private: return res; } - static PartitionCommand clearColumn(const Field & partition, const Field & column_name) + static PartitionCommand clearColumn(const ASTPtr & partition, const Field & column_name) { PartitionCommand res; res.type = CLEAR_COLUMN; @@ -70,7 +70,7 @@ private: return res; } - static PartitionCommand attachPartition(const Field & partition, bool part) + static PartitionCommand attachPartition(const ASTPtr & partition, bool part) { PartitionCommand res; res.type = ATTACH_PARTITION; @@ -79,7 +79,7 @@ private: return res; } - static PartitionCommand fetchPartition(const Field & partition, const String & from) + static PartitionCommand fetchPartition(const ASTPtr & partition, const String & from) { PartitionCommand res; res.type = FETCH_PARTITION; @@ -88,7 +88,7 @@ private: return res; } - static PartitionCommand freezePartition(const Field & partition, const String & with_name) + static PartitionCommand freezePartition(const ASTPtr & partition, const String & with_name) { PartitionCommand res; res.type = FREEZE_PARTITION; @@ -97,7 +97,7 @@ private: return res; } - static PartitionCommand reshardPartitions(const Field & partition_, + static PartitionCommand reshardPartitions(const ASTPtr & partition_, const WeightedZooKeeperPaths & weighted_zookeeper_paths_, const ASTPtr & sharding_key_expr_, bool do_copy_, const Field & coordinator_) { diff --git a/dbms/src/Interpreters/InterpreterOptimizeQuery.cpp b/dbms/src/Interpreters/InterpreterOptimizeQuery.cpp index c9bce30271c..8504ffe3821 100644 --- a/dbms/src/Interpreters/InterpreterOptimizeQuery.cpp +++ b/dbms/src/Interpreters/InterpreterOptimizeQuery.cpp @@ -18,12 +18,12 @@ BlockIO InterpreterOptimizeQuery::execute() { const ASTOptimizeQuery & ast = typeid_cast(*query_ptr); - if (ast.final && ast.partition.empty()) + if (ast.final && !ast.partition) throw Exception("FINAL flag for OPTIMIZE query is meaningful only with specified PARTITION", ErrorCodes::BAD_ARGUMENTS); StoragePtr table = context.getTable(ast.database, ast.table); auto table_lock = table->lockStructure(true, __PRETTY_FUNCTION__); - table->optimize(query_ptr, ast.partition, ast.final, ast.deduplicate, context.getSettings()); + table->optimize(query_ptr, ast.partition, ast.final, ast.deduplicate, context); return {}; } diff --git a/dbms/src/Parsers/ASTAlterQuery.h b/dbms/src/Parsers/ASTAlterQuery.h index f49bc4c1435..8c9bcd09d82 100644 --- a/dbms/src/Parsers/ASTAlterQuery.h +++ b/dbms/src/Parsers/ASTAlterQuery.h @@ -60,9 +60,10 @@ public: */ ASTPtr primary_key; - /** In DROP PARTITION and RESHARD PARTITION queries, the name of the partition is stored here. + /** In DROP PARTITION and RESHARD PARTITION queries, the value or ID of the partition is stored here. */ ASTPtr partition; + bool detach = false; /// true for DETACH PARTITION bool part = false; /// true for ATTACH PART diff --git a/dbms/src/Parsers/ASTOptimizeQuery.h b/dbms/src/Parsers/ASTOptimizeQuery.h index c395dc18ee7..3caae258b41 100644 --- a/dbms/src/Parsers/ASTOptimizeQuery.h +++ b/dbms/src/Parsers/ASTOptimizeQuery.h @@ -16,7 +16,7 @@ public: String table; /// The partition to optimize can be specified. - String partition; + ASTPtr partition; /// A flag can be specified - perform optimization "to the end" instead of one step. bool final; /// Do deduplicate (default: false) @@ -26,9 +26,21 @@ public: ASTOptimizeQuery(const StringRange range_) : IAST(range_) {} /** Get the text that identifies this element. */ - String getID() const override { return "OptimizeQuery_" + database + "_" + table + "_" + partition + (final ? "_final" : "") + (deduplicate ? "_deduplicate" : ""); }; + String getID() const override { return "OptimizeQuery_" + database + "_" + table + (final ? "_final" : "") + (deduplicate ? "_deduplicate" : ""); }; - ASTPtr clone() const override { return std::make_shared(*this); } + ASTPtr clone() const override + { + auto res = std::make_shared(*this); + res->children.clear(); + + if (partition) + { + res->partition = partition->clone(); + res->children.push_back(res->partition); + } + + return res; + } protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override @@ -36,9 +48,11 @@ protected: settings.ostr << (settings.hilite ? hilite_keyword : "") << "OPTIMIZE TABLE " << (settings.hilite ? hilite_none : "") << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table); - if (!partition.empty()) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " PARTITION " << (settings.hilite ? hilite_none : "") - << partition; + if (partition) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " PARTITION " << (settings.hilite ? hilite_none : ""); + partition->formatImpl(settings, state, frame); + } if (final) settings.ostr << (settings.hilite ? hilite_keyword : "") << " FINAL" << (settings.hilite ? hilite_none : ""); diff --git a/dbms/src/Parsers/ASTPartition.cpp b/dbms/src/Parsers/ASTPartition.cpp new file mode 100644 index 00000000000..6f0a256ec3b --- /dev/null +++ b/dbms/src/Parsers/ASTPartition.cpp @@ -0,0 +1,44 @@ +#include +#include + +namespace DB +{ + +String ASTPartition::getID() const +{ + if (value) + return "Partition"; + else + return "Partition_ID_" + id; +} + +ASTPtr ASTPartition::clone() const +{ + auto res = std::make_shared(*this); + res->children.clear(); + + if (value) + { + res->value = value->clone(); + res->children.push_back(res->value); + } + + return res; +} + +void ASTPartition::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + if (value) + { + value->formatImpl(settings, state, frame); + } + else + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "ID " << (settings.hilite ? hilite_none : ""); + WriteBufferFromOwnString id_buf; + writeQuoted(id, id_buf); + settings.ostr << id_buf.str(); + } +} + +} diff --git a/dbms/src/Parsers/ASTPartition.h b/dbms/src/Parsers/ASTPartition.h new file mode 100644 index 00000000000..9f78d56fca1 --- /dev/null +++ b/dbms/src/Parsers/ASTPartition.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +/// Either a (possibly compound) expression representing a partition value or a partition ID. +class ASTPartition : public IAST +{ +public: + ASTPtr value; + StringRef fields_str; /// The extent of comma-separated partition expression fields without parentheses. + size_t fields_count = 0; + + String id; + + ASTPartition() = default; + ASTPartition(StringRange range_) : IAST(range_) {} + String getID() const override; + ASTPtr clone() const override; + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; +}; + +} diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp index c2203ae4f25..49d1e6da9c9 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.cpp +++ b/dbms/src/Parsers/ExpressionElementParsers.cpp @@ -498,6 +498,8 @@ bool ParserStringLiteral::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (pos->type != TokenType::StringLiteral) return false; + Pos begin = pos; + String s; ReadBufferFromMemory in(pos->begin, pos->size()); @@ -518,7 +520,7 @@ bool ParserStringLiteral::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte } ++pos; - node = std::make_shared(StringRange(pos->begin, pos->end), s); + node = std::make_shared(StringRange(begin, pos), s); return true; } diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index 3f2c4334523..7b36f80d6bf 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -2,8 +2,8 @@ #include #include #include +#include #include -#include #include #include @@ -46,8 +46,7 @@ bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserIdentifier table_parser; ParserCompoundIdentifier parser_name; ParserCompoundColumnDeclaration parser_col_decl; - ParserLiteral parser_literal; - ParserUnsignedInteger parser_uint; + ParserPartition parser_partition; ParserStringLiteral parser_string_literal; ASTPtr table; @@ -106,7 +105,7 @@ bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } else if (s_drop_partition.ignore(pos, expected)) { - if (!parser_literal.parse(pos, params.partition, expected)) + if (!parser_partition.parse(pos, params.partition, expected)) return false; params.type = ASTAlterQuery::DROP_PARTITION; @@ -130,13 +129,13 @@ bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (s_in_partition.ignore(pos, expected)) { - if (!parser_literal.parse(pos, params.partition, expected)) + if (!parser_partition.parse(pos, params.partition, expected)) return false; } } else if (s_detach_partition.ignore(pos, expected)) { - if (!parser_literal.parse(pos, params.partition, expected)) + if (!parser_partition.parse(pos, params.partition, expected)) return false; params.type = ASTAlterQuery::DROP_PARTITION; @@ -144,14 +143,14 @@ bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } else if (s_attach_partition.ignore(pos, expected)) { - if (!parser_literal.parse(pos, params.partition, expected)) + if (!parser_partition.parse(pos, params.partition, expected)) return false; params.type = ASTAlterQuery::ATTACH_PARTITION; } else if (s_attach_part.ignore(pos, expected)) { - if (!parser_literal.parse(pos, params.partition, expected)) + if (!parser_string_literal.parse(pos, params.partition, expected)) return false; params.part = true; @@ -159,7 +158,7 @@ bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } else if (s_fetch_partition.ignore(pos, expected)) { - if (!parser_literal.parse(pos, params.partition, expected)) + if (!parser_partition.parse(pos, params.partition, expected)) return false; if (!s_from.ignore(pos, expected)) @@ -174,7 +173,7 @@ bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } else if (s_freeze_partition.ignore(pos, expected)) { - if (!parser_literal.parse(pos, params.partition, expected)) + if (!parser_partition.parse(pos, params.partition, expected)) return false; /// WITH NAME 'name' - place local backup to directory with specified name @@ -225,7 +224,7 @@ bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (s_partition.ignore(pos, expected)) { - if (!parser_uint.parse(pos, params.partition, expected)) + if (!parser_partition.parse(pos, params.partition, expected)) return false; } diff --git a/dbms/src/Parsers/ParserOptimizeQuery.cpp b/dbms/src/Parsers/ParserOptimizeQuery.cpp index eb4580755bf..90e9146210a 100644 --- a/dbms/src/Parsers/ParserOptimizeQuery.cpp +++ b/dbms/src/Parsers/ParserOptimizeQuery.cpp @@ -1,8 +1,9 @@ -#include -#include - -#include #include +#include +#include + +#include +#include #include #include @@ -22,7 +23,7 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte ParserKeyword s_deduplicate("DEDUPLICATE"); ParserToken s_dot(TokenType::Dot); ParserIdentifier name_p; - ParserLiteral partition_p; + ParserPartition partition_p; ASTPtr database; ASTPtr table; @@ -62,8 +63,7 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte query->database = typeid_cast(*database).name; if (table) query->table = typeid_cast(*table).name; - if (partition) - query->partition = applyVisitor(FieldVisitorToString(), typeid_cast(*partition).value); + query->partition = partition; query->final = final; query->deduplicate = deduplicate; diff --git a/dbms/src/Parsers/ParserPartition.cpp b/dbms/src/Parsers/ParserPartition.cpp new file mode 100644 index 00000000000..9ecd03d9cd8 --- /dev/null +++ b/dbms/src/Parsers/ParserPartition.cpp @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_id("ID"); + ParserStringLiteral parser_string_literal; + ParserExpression parser_expr; + + Pos begin = pos; + + auto partition = std::make_shared(); + + if (s_id.ignore(pos, expected)) + { + ASTPtr partition_id; + if (!parser_string_literal.parse(pos, partition_id, expected)) + return false; + + partition->id = dynamic_cast(*partition_id).value.get(); + } + else + { + ASTPtr value; + if (!parser_expr.parse(pos, value, expected)) + return false; + + size_t fields_count; + StringRef fields_str; + + const auto * tuple_ast = typeid_cast(value.get()); + if (tuple_ast && tuple_ast->name == "tuple") + { + const auto * arguments_ast = dynamic_cast(tuple_ast->arguments.get()); + if (arguments_ast) + fields_count = arguments_ast->children.size(); + else + fields_count = 0; + + Pos left_paren = begin; + Pos right_paren = pos; + + while (left_paren != right_paren && left_paren->type != TokenType::OpeningRoundBracket) + ++left_paren; + if (left_paren->type != TokenType::OpeningRoundBracket) + return false; + + while (right_paren != left_paren && right_paren->type != TokenType::ClosingRoundBracket) + --right_paren; + if (right_paren->type != TokenType::ClosingRoundBracket) + return false; + + fields_str = StringRef(left_paren->end, right_paren->begin - left_paren->end); + } + else + { + fields_count = 1; + fields_str = StringRef(begin->begin, pos->begin - begin->begin); + } + + partition->value = value; + partition->children.push_back(value); + partition->fields_str = fields_str; + partition->fields_count = fields_count; + } + + partition->range = StringRange(begin, pos); + node = partition; + return true; +} + +} diff --git a/dbms/src/Parsers/ParserPartition.h b/dbms/src/Parsers/ParserPartition.h new file mode 100644 index 00000000000..2bb7048fd81 --- /dev/null +++ b/dbms/src/Parsers/ParserPartition.h @@ -0,0 +1,17 @@ +#pragma once + +#include + +namespace DB +{ + +/// Parse either a partition value as a (possibly compound) literal or a partition ID. +/// Produce ASTPartition. +class ParserPartition : public IParserBase +{ +protected: + const char * getName() const override { return "partition"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/dbms/src/Parsers/TokenIterator.h b/dbms/src/Parsers/TokenIterator.h index 71d2edfef42..8430d7282e0 100644 --- a/dbms/src/Parsers/TokenIterator.h +++ b/dbms/src/Parsers/TokenIterator.h @@ -69,6 +69,7 @@ public: bool operator< (const TokenIterator & rhs) const { return index < rhs.index; } bool operator<= (const TokenIterator & rhs) const { return index <= rhs.index; } bool operator== (const TokenIterator & rhs) const { return index == rhs.index; } + bool operator!= (const TokenIterator & rhs) const { return index != rhs.index; } bool isValid() { return get().type < TokenType::EndOfStream; } diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index 473792376c6..9d3e72d64ea 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -221,35 +221,35 @@ public: } /** Execute CLEAR COLUMN ... IN PARTITION query which removes column from given partition. */ - virtual void clearColumnInPartition(const ASTPtr & query, const Field & partition, const Field & column_name, const Settings & settings) + virtual void clearColumnInPartition(const ASTPtr & partition, const Field & column_name, const Context & context) { throw Exception("Method dropColumnFromPartition is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } /** Run the query (DROP|DETACH) PARTITION. */ - virtual void dropPartition(const ASTPtr & query, const Field & partition, bool detach, const Settings & settings) + virtual void dropPartition(const ASTPtr & query, const ASTPtr & partition, bool detach, const Context & context) { throw Exception("Method dropPartition is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } /** Run the ATTACH request (PART|PARTITION). */ - virtual void attachPartition(const ASTPtr & query, const Field & partition, bool part, const Settings & settings) + virtual void attachPartition(const ASTPtr & partition, bool part, const Context & context) { throw Exception("Method attachPartition is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } /** Run the FETCH PARTITION query. */ - virtual void fetchPartition(const Field & partition, const String & from, const Settings & settings) + virtual void fetchPartition(const ASTPtr & partition, const String & from, const Context & context) { throw Exception("Method fetchPartition is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } /** Run the FREEZE PARTITION request. That is, create a local backup (snapshot) of data using the `localBackup` function (see localBackup.h) */ - virtual void freezePartition(const Field & partition, const String & with_name, const Settings & settings) + virtual void freezePartition(const ASTPtr & partition, const String & with_name, const Context & context) { throw Exception("Method freezePartition is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } @@ -258,7 +258,7 @@ public: */ virtual void reshardPartitions( const ASTPtr & query, const String & database_name, - const Field & partition, + const ASTPtr & partition, const WeightedZooKeeperPaths & weighted_zookeeper_paths, const ASTPtr & sharding_key_expr, bool do_copy, const Field & coordinator, const Context & context) @@ -269,7 +269,7 @@ public: /** Perform any background work. For example, combining parts in a MergeTree type table. * Returns whether any work has been done. */ - virtual bool optimize(const ASTPtr & query, const String & partition_id, bool final, bool deduplicate, const Settings & settings) + virtual bool optimize(const ASTPtr & query, const ASTPtr & partition, bool final, bool deduplicate, const Context & context) { throw Exception("Method optimize is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 8bdf87e6f07..2e6d36ab66d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -6,13 +6,19 @@ #include #include #include +#include +#include +#include #include #include #include +#include #include #include #include #include +#include +#include #include #include #include @@ -63,6 +69,7 @@ namespace ErrorCodes extern const int MEMORY_LIMIT_EXCEEDED; extern const int SYNTAX_ERROR; extern const int CORRUPTED_DATA; + extern const int INVALID_PARTITION_VALUE; } @@ -1714,8 +1721,21 @@ void MergeTreeData::removePartContributionToColumnSizes(const DataPartPtr & part } -void MergeTreeData::freezePartition(const std::string & prefix, const String & with_name) +void MergeTreeData::freezePartition(const ASTPtr & partition_ast, const String & with_name, const Context & context) { + String prefix; + if (format_version == 0) + { + const auto & partition = dynamic_cast(*partition_ast); + /// Month-partitioning specific - allow partition ID can be passed in the partition value. + if (const auto * partition_lit = dynamic_cast(partition.value.get())) + prefix = partition_lit->value.getType() == Field::Types::UInt64 + ? toString(partition_lit->value.get()) + : partition_lit->value.safeGet(); + } + else + prefix = getPartitionIDFromQuery(partition_ast, context); + LOG_DEBUG(log, "Freezing parts with prefix " + prefix); String clickhouse_path = Poco::Path(context.getPath()).makeAbsolute().toString(); @@ -1777,18 +1797,61 @@ size_t MergeTreeData::getPartitionSize(const std::string & partition_id) const return size; } -String MergeTreeData::getPartitionIDFromQuery(const Field & partition) +String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & partition_ast, const Context & context) { - /// Month-partitioning specific, TODO: generalize. - String partition_id = partition.getType() == Field::Types::UInt64 - ? toString(partition.get()) - : partition.safeGet(); + const auto & partition = typeid_cast(*partition_ast); - if (partition_id.size() != 6 || !std::all_of(partition_id.begin(), partition_id.end(), isNumericASCII)) - throw Exception("Invalid partition format: " + partition_id + ". Partition should consist of 6 digits: YYYYMM", - ErrorCodes::INVALID_PARTITION_NAME); + if (!partition.value) + return partition.id; - return partition_id; + if (format_version == 0) + { + /// Month-partitioning specific - allow partition ID can be passed in the partition value. + const auto * partition_lit = typeid_cast(partition.value.get()); + if (partition_lit && partition_lit->value.getType() == Field::Types::String) + { + String partition_id = partition_lit->value.get(); + if (partition_id.size() != 6 || !std::all_of(partition_id.begin(), partition_id.end(), isNumericASCII)) + throw Exception( + "Invalid partition format: " + partition_id + ". Partition should consist of 6 digits: YYYYMM", + ErrorCodes::INVALID_PARTITION_VALUE); + return partition_id; + } + } + + /// Re-parse partition key fields using the information about expected field types. + + size_t fields_count = partition_expr_column_types.size(); + if (partition.fields_count != fields_count) + throw Exception( + "Wrong number of fields in the partition expression: " + toString(partition.fields_count) + + ", must be: " + toString(fields_count), + ErrorCodes::INVALID_PARTITION_VALUE); + + Row partition_row(fields_count); + + if (fields_count) + { + ReadBufferFromMemory left_paren_buf("(", 1); + ReadBufferFromMemory fields_buf(partition.fields_str.data, partition.fields_str.size); + ReadBufferFromMemory right_paren_buf(")", 1); + ConcatReadBuffer buf({&left_paren_buf, &fields_buf, &right_paren_buf}); + + ValuesRowInputStream input_stream(buf, context, /* interpret_expressions = */true); + Block block; + for (size_t i = 0; i < fields_count; ++i) + block.insert(ColumnWithTypeAndName(partition_expr_column_types[i], partition_expr_columns[i])); + + if (!input_stream.read(block)) + throw Exception( + "Could not parse partition value: `" + partition.fields_str.toString() + "`", + ErrorCodes::INVALID_PARTITION_VALUE); + + for (size_t i = 0; i < fields_count; ++i) + block.getByPosition(i).column->get(0, partition_row[i]); + } + + return MergeTreeDataPart::Partition(std::move(partition_row)).getID(*this); } void MergeTreeData::Transaction::rollback() diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 3b653153e34..d5cfd4e2ead 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -410,7 +410,7 @@ public: * Backup is created in directory clickhouse_dir/shadow/i/, where i - incremental number, * or if 'with_name' is specified - backup is created in directory with specified name. */ - void freezePartition(const std::string & prefix, const String & with_name); + void freezePartition(const ASTPtr & partition, const String & with_name, const Context & context); /// Returns the size of partition in bytes. size_t getPartitionSize(const std::string & partition_id) const; @@ -461,7 +461,7 @@ public: } /// For ATTACH/DETACH/DROP/RESHARD PARTITION. - String getPartitionIDFromQuery(const Field & partition); + String getPartitionIDFromQuery(const ASTPtr & partition, const Context & context); MergeTreeDataFormatVersion format_version; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index 65907f7e5cd..673bf977a83 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -351,7 +351,18 @@ String MergeTreeDataPart::Partition::getID(const MergeTreeData & storage) const void MergeTreeDataPart::Partition::serializeTextQuoted(const MergeTreeData & storage, WriteBuffer & out) const { - for (size_t i = 0; i < storage.partition_expr_column_types.size(); ++i) + size_t key_size = storage.partition_expr_column_types.size(); + + if (key_size == 0) + { + writeCString("tuple()", out); + return; + } + + if (key_size > 1) + writeChar('(', out); + + for (size_t i = 0; i < key_size; ++i) { if (i > 0) writeCString(", ", out); @@ -361,6 +372,9 @@ void MergeTreeDataPart::Partition::serializeTextQuoted(const MergeTreeData & sto column->insert(value[i]); type->serializeTextQuoted(*column, 0, out); } + + if (key_size > 1) + writeChar(')', out); } void MergeTreeDataPart::Partition::load(const MergeTreeData & storage, const String & part_path) diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index 1ceb2d8ac1e..90026d72d21 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -346,7 +346,7 @@ void StorageBuffer::shutdown() try { - optimize(nullptr /*query*/, {} /*partition_id*/, false /*final*/, false /*deduplicate*/, context.getSettings()); + optimize(nullptr /*query*/, {} /*partition*/, false /*final*/, false /*deduplicate*/, context); } catch (...) { @@ -365,9 +365,9 @@ void StorageBuffer::shutdown() * * This kind of race condition make very hard to implement proper tests. */ -bool StorageBuffer::optimize(const ASTPtr & query, const String & partition_id, bool final, bool deduplicate, const Settings & settings) +bool StorageBuffer::optimize(const ASTPtr & query, const ASTPtr & partition, bool final, bool deduplicate, const Context & context) { - if (!partition_id.empty()) + if (partition) throw Exception("Partition cannot be specified when optimizing table of type Buffer", ErrorCodes::NOT_IMPLEMENTED); if (final) @@ -593,7 +593,7 @@ void StorageBuffer::alter(const AlterCommands & params, const String & database_ auto lock = lockStructureForAlter(__PRETTY_FUNCTION__); /// So that no blocks of the old structure remain. - optimize({} /*query*/, {} /*partition_id*/, false /*final*/, false /*deduplicate*/, context.getSettings()); + optimize({} /*query*/, {} /*partition_id*/, false /*final*/, false /*deduplicate*/, context); params.apply(*columns, materialized_columns, alias_columns, column_defaults); diff --git a/dbms/src/Storages/StorageBuffer.h b/dbms/src/Storages/StorageBuffer.h index d1c588c71e0..0e67dc4fb02 100644 --- a/dbms/src/Storages/StorageBuffer.h +++ b/dbms/src/Storages/StorageBuffer.h @@ -69,7 +69,7 @@ public: void startup() override; /// Flush all buffers into the subordinate table and stop background thread. void shutdown() override; - bool optimize(const ASTPtr & query, const String & partition_id, bool final, bool deduplicate, const Settings & settings) override; + bool optimize(const ASTPtr & query, const ASTPtr & partition, bool final, bool deduplicate, const Context & context) override; void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override { name = new_table_name; } diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index 7eb6e0b117e..141c7cccc71 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -270,7 +270,7 @@ void StorageDistributed::shutdown() void StorageDistributed::reshardPartitions( const ASTPtr & query, const String & database_name, - const Field & partition, + const ASTPtr & partition, const WeightedZooKeeperPaths & weighted_zookeeper_paths, const ASTPtr & sharding_key_expr, bool do_copy, const Field & coordinator, const Context & context) @@ -322,8 +322,7 @@ void StorageDistributed::reshardPartitions( ASTAlterQuery::Parameters & parameters = alter_query.parameters.back(); parameters.type = ASTAlterQuery::RESHARD_PARTITION; - if (!partition.isNull()) - parameters.partition = std::make_shared(StringRange(), partition); + parameters.partition = partition->clone(); ASTPtr expr_list = std::make_shared(); for (const auto & entry : weighted_zookeeper_paths) diff --git a/dbms/src/Storages/StorageDistributed.h b/dbms/src/Storages/StorageDistributed.h index cd986803434..6c6fd662eff 100644 --- a/dbms/src/Storages/StorageDistributed.h +++ b/dbms/src/Storages/StorageDistributed.h @@ -76,7 +76,7 @@ public: void reshardPartitions( const ASTPtr & query, const String & database_name, - const Field & partition, + const ASTPtr & partition, const WeightedZooKeeperPaths & weighted_zookeeper_paths, const ASTPtr & sharding_key_expr, bool do_copy, const Field & coordinator, const Context & context) override; diff --git a/dbms/src/Storages/StorageMaterializedView.cpp b/dbms/src/Storages/StorageMaterializedView.cpp index 60ef05b707b..c59b84125f8 100644 --- a/dbms/src/Storages/StorageMaterializedView.cpp +++ b/dbms/src/Storages/StorageMaterializedView.cpp @@ -166,9 +166,9 @@ void StorageMaterializedView::drop() } } -bool StorageMaterializedView::optimize(const ASTPtr & query, const String & partition_id, bool final, bool deduplicate, const Settings & settings) +bool StorageMaterializedView::optimize(const ASTPtr & query, const ASTPtr & partition, bool final, bool deduplicate, const Context & context) { - return getInnerTable()->optimize(query, partition_id, final, deduplicate, settings); + return getInnerTable()->optimize(query, partition, final, deduplicate, context); } StoragePtr StorageMaterializedView::getInnerTable() const diff --git a/dbms/src/Storages/StorageMaterializedView.h b/dbms/src/Storages/StorageMaterializedView.h index 113a23bf3c1..efc6927e26e 100644 --- a/dbms/src/Storages/StorageMaterializedView.h +++ b/dbms/src/Storages/StorageMaterializedView.h @@ -35,7 +35,7 @@ public: BlockOutputStreamPtr write(const ASTPtr & query, const Settings & settings) override; void drop() override; - bool optimize(const ASTPtr & query, const String & partition_id, bool final, bool deduplicate, const Settings & settings) override; + bool optimize(const ASTPtr & query, const ASTPtr & partition, bool final, bool deduplicate, const Context & context) override; BlockInputStreams read( const Names & column_names, diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 9353cc66bea..4b0c2d1fc3a 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -396,7 +397,7 @@ bool StorageMergeTree::mergeTask() } -void StorageMergeTree::clearColumnInPartition(const ASTPtr & query, const Field & partition, const Field & column_name, const Settings &) +void StorageMergeTree::clearColumnInPartition(const ASTPtr & partition, const Field & column_name, const Context & context) { /// Asks to complete merges and does not allow them to start. /// This protects against "revival" of data for a removed partition after completion of merge. @@ -405,7 +406,7 @@ void StorageMergeTree::clearColumnInPartition(const ASTPtr & query, const Field /// We don't change table structure, only data in some parts, parts are locked inside alterDataPart() function auto lock_read_structure = lockStructure(false, __PRETTY_FUNCTION__); - String partition_id = data.getPartitionIDFromQuery(partition); + String partition_id = data.getPartitionIDFromQuery(partition, context); MergeTreeData::DataParts parts = data.getDataParts(); std::vector transactions; @@ -446,7 +447,17 @@ void StorageMergeTree::clearColumnInPartition(const ASTPtr & query, const Field } -void StorageMergeTree::dropPartition(const ASTPtr & query, const Field & partition, bool detach, const Settings & settings) +bool StorageMergeTree::optimize( + const ASTPtr & query, const ASTPtr & partition, bool final, bool deduplicate, const Context & context) +{ + String partition_id; + if (partition) + partition_id = data.getPartitionIDFromQuery(partition, context); + return merge(context.getSettingsRef().min_bytes_to_use_direct_io, true, partition_id, final, deduplicate); +} + + +void StorageMergeTree::dropPartition(const ASTPtr & query, const ASTPtr & partition, bool detach, const Context & context) { /// Asks to complete merges and does not allow them to start. /// This protects against "revival" of data for a removed partition after completion of merge. @@ -454,7 +465,7 @@ void StorageMergeTree::dropPartition(const ASTPtr & query, const Field & partiti /// Waits for completion of merge and does not start new ones. auto lock = lockForAlter(__PRETTY_FUNCTION__); - String partition_id = data.getPartitionIDFromQuery(partition); + String partition_id = data.getPartitionIDFromQuery(partition, context); size_t removed_parts = 0; MergeTreeData::DataParts parts = data.getDataParts(); @@ -473,18 +484,18 @@ void StorageMergeTree::dropPartition(const ASTPtr & query, const Field & partiti data.replaceParts({part}, {}, false); } - LOG_INFO(log, (detach ? "Detached " : "Removed ") << removed_parts << " parts inside " << applyVisitor(FieldVisitorToString(), partition) << "."); + LOG_INFO(log, (detach ? "Detached " : "Removed ") << removed_parts << " parts inside partition ID " << partition_id << "."); } -void StorageMergeTree::attachPartition(const ASTPtr & query, const Field & field, bool part, const Settings & settings) +void StorageMergeTree::attachPartition(const ASTPtr & partition, bool part, const Context & context) { String partition_id; if (part) - partition_id = field.getType() == Field::Types::UInt64 ? toString(field.get()) : field.safeGet(); + partition_id = typeid_cast(*partition).value.safeGet(); else - partition_id = data.getPartitionIDFromQuery(field); + partition_id = data.getPartitionIDFromQuery(partition, context); String source_dir = "detached/"; @@ -532,12 +543,9 @@ void StorageMergeTree::attachPartition(const ASTPtr & query, const Field & field } -void StorageMergeTree::freezePartition(const Field & partition, const String & with_name, const Settings & settings) +void StorageMergeTree::freezePartition(const ASTPtr & partition, const String & with_name, const Context & context) { - /// The prefix can be arbitrary. Not necessarily a month - you can specify only a year. - data.freezePartition(partition.getType() == Field::Types::UInt64 - ? toString(partition.get()) - : partition.safeGet(), with_name); + data.freezePartition(partition, with_name, context); } } diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index bc0f3070f97..fefe84a71f7 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -61,15 +61,12 @@ public: /** Perform the next step in combining the parts. */ - bool optimize(const ASTPtr & query, const String & partition_id, bool final, bool deduplicate, const Settings & settings) override - { - return merge(settings.min_bytes_to_use_direct_io, true, partition_id, final, deduplicate); - } + bool optimize(const ASTPtr & query, const ASTPtr & partition, bool final, bool deduplicate, const Context & context) override; - void dropPartition(const ASTPtr & query, const Field & partition, bool detach, const Settings & settings) override; - void clearColumnInPartition(const ASTPtr & query, const Field & partition, const Field & column_name, const Settings & settings) override; - void attachPartition(const ASTPtr & query, const Field & partition, bool part, const Settings & settings) override; - void freezePartition(const Field & partition, const String & with_name, const Settings & settings) override; + void dropPartition(const ASTPtr & query, const ASTPtr & partition, bool detach, const Context & context) override; + void clearColumnInPartition(const ASTPtr & partition, const Field & column_name, const Context & context) override; + void attachPartition(const ASTPtr & partition, bool part, const Context & context) override; + void freezePartition(const ASTPtr & partition, const String & with_name, const Context & context) override; void drop() override; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index efb9fc08c96..13874669452 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -2348,13 +2349,13 @@ BlockOutputStreamPtr StorageReplicatedMergeTree::write(const ASTPtr & query, con } -bool StorageReplicatedMergeTree::optimize(const ASTPtr & query, const String & partition_id, bool final, bool deduplicate, const Settings & settings) +bool StorageReplicatedMergeTree::optimize(const ASTPtr & query, const ASTPtr & partition, bool final, bool deduplicate, const Context & context) { assertNotReadonly(); if (!is_leader_node) { - sendRequestToLeaderReplica(query, settings); + sendRequestToLeaderReplica(query, context.getSettingsRef()); return true; } @@ -2375,13 +2376,14 @@ bool StorageReplicatedMergeTree::optimize(const ASTPtr & query, const String & p MergeTreeDataMerger::FuturePart future_merged_part; bool selected = false; - if (partition_id.empty()) + if (!partition) { selected = merger.selectPartsToMerge( future_merged_part, false, data.settings.max_bytes_to_merge_at_max_space_in_pool, can_merge); } else { + String partition_id = data.getPartitionIDFromQuery(partition, context); selected = merger.selectAllPartsToMergeWithinPartition(future_merged_part, disk_space, can_merge, partition_id, final); } @@ -2619,13 +2621,13 @@ String StorageReplicatedMergeTree::getFakePartNameCoveringAllPartsInPartition(co void StorageReplicatedMergeTree::clearColumnInPartition( - const ASTPtr & query, const Field & partition, const Field & column_name, const Settings & settings) + const ASTPtr & partition, const Field & column_name, const Context & context) { assertNotReadonly(); /// We don't block merges, so anyone can manage this task (not only leader) - String partition_id = data.getPartitionIDFromQuery(partition); + String partition_id = data.getPartitionIDFromQuery(partition, context); String fake_part_name = getFakePartNameCoveringAllPartsInPartition(partition_id); if (fake_part_name.empty()) @@ -2646,26 +2648,26 @@ void StorageReplicatedMergeTree::clearColumnInPartition( entry.znode_name = log_znode_path.substr(log_znode_path.find_last_of('/') + 1); /// If necessary, wait until the operation is performed on itself or on all replicas. - if (settings.replication_alter_partitions_sync != 0) + if (context.getSettingsRef().replication_alter_partitions_sync != 0) { - if (settings.replication_alter_partitions_sync == 1) + if (context.getSettingsRef().replication_alter_partitions_sync == 1) waitForReplicaToProcessLogEntry(replica_name, entry); else waitForAllReplicasToProcessLogEntry(entry); } } -void StorageReplicatedMergeTree::dropPartition(const ASTPtr & query, const Field & partition, bool detach, const Settings & settings) +void StorageReplicatedMergeTree::dropPartition(const ASTPtr & query, const ASTPtr & partition, bool detach, const Context & context) { assertNotReadonly(); if (!is_leader_node) { - sendRequestToLeaderReplica(query, settings); + sendRequestToLeaderReplica(query, context.getSettingsRef()); return; } - String partition_id = data.getPartitionIDFromQuery(partition); + String partition_id = data.getPartitionIDFromQuery(partition, context); String fake_part_name = getFakePartNameCoveringAllPartsInPartition(partition_id); if (fake_part_name.empty()) @@ -2696,9 +2698,9 @@ void StorageReplicatedMergeTree::dropPartition(const ASTPtr & query, const Field entry.znode_name = log_znode_path.substr(log_znode_path.find_last_of('/') + 1); /// If necessary, wait until the operation is performed on itself or on all replicas. - if (settings.replication_alter_partitions_sync != 0) + if (context.getSettingsRef().replication_alter_partitions_sync != 0) { - if (settings.replication_alter_partitions_sync == 1) + if (context.getSettingsRef().replication_alter_partitions_sync == 1) waitForReplicaToProcessLogEntry(replica_name, entry); else waitForAllReplicasToProcessLogEntry(entry); @@ -2706,16 +2708,16 @@ void StorageReplicatedMergeTree::dropPartition(const ASTPtr & query, const Field } -void StorageReplicatedMergeTree::attachPartition(const ASTPtr & query, const Field & field, bool attach_part, const Settings & settings) +void StorageReplicatedMergeTree::attachPartition(const ASTPtr & partition, bool attach_part, const Context & context) { assertNotReadonly(); String partition_id; if (attach_part) - partition_id = field.safeGet(); + partition_id = typeid_cast(*partition).value.safeGet(); else - partition_id = data.getPartitionIDFromQuery(field); + partition_id = data.getPartitionIDFromQuery(partition, context); String source_dir = "detached/"; @@ -3219,9 +3221,9 @@ void StorageReplicatedMergeTree::getReplicaDelays(time_t & out_absolute_delay, t } -void StorageReplicatedMergeTree::fetchPartition(const Field & partition, const String & from_, const Settings & settings) +void StorageReplicatedMergeTree::fetchPartition(const ASTPtr & partition, const String & from_, const Context & context) { - String partition_id = data.getPartitionIDFromQuery(partition); + String partition_id = data.getPartitionIDFromQuery(partition, context); String from = from_; if (from.back() == '/') @@ -3373,20 +3375,15 @@ void StorageReplicatedMergeTree::fetchPartition(const Field & partition, const S } -void StorageReplicatedMergeTree::freezePartition(const Field & partition, const String & with_name, const Settings & settings) +void StorageReplicatedMergeTree::freezePartition(const ASTPtr & partition, const String & with_name, const Context & context) { - /// The prefix can be arbitrary. Not necessarily a month - you can specify only a year. - String prefix = partition.getType() == Field::Types::UInt64 - ? toString(partition.get()) - : partition.safeGet(); - - data.freezePartition(prefix, with_name); + data.freezePartition(partition, with_name, context); } void StorageReplicatedMergeTree::reshardPartitions( const ASTPtr & query, const String & database_name, - const Field & partition, + const ASTPtr & partition, const WeightedZooKeeperPaths & weighted_zookeeper_paths, const ASTPtr & sharding_key_expr, bool do_copy, const Field & coordinator, const Context & context) @@ -3466,8 +3463,8 @@ void StorageReplicatedMergeTree::reshardPartitions( throw Exception{"Shard paths must be distinct", ErrorCodes::DUPLICATE_SHARD_PATHS}; } - bool include_all = partition.isNull(); - String partition_id = !partition.isNull() ? data.getPartitionIDFromQuery(partition) : String(); + bool include_all = !partition; + String partition_id = partition ? data.getPartitionIDFromQuery(partition, context) : String(); /// Make a list of local partitions that need to be resharded. std::set unique_partition_list; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index ce0489124de..8fc9a9ce754 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -133,19 +133,19 @@ public: BlockOutputStreamPtr write(const ASTPtr & query, const Settings & settings) override; - bool optimize(const ASTPtr & query, const String & partition_id, bool final, bool deduplicate, const Settings & settings) override; + bool optimize(const ASTPtr & query, const ASTPtr & partition, bool final, bool deduplicate, const Context & context) override; void alter(const AlterCommands & params, const String & database_name, const String & table_name, const Context & context) override; - void clearColumnInPartition(const ASTPtr & query, const Field & partition, const Field & column_name, const Settings & settings) override; - void dropPartition(const ASTPtr & query, const Field & partition, bool detach, const Settings & settings) override; - void attachPartition(const ASTPtr & query, const Field & partition, bool part, const Settings & settings) override; - void fetchPartition(const Field & partition, const String & from, const Settings & settings) override; - void freezePartition(const Field & partition, const String & with_name, const Settings & settings) override; + void clearColumnInPartition(const ASTPtr & partition, const Field & column_name, const Context & context) override; + void dropPartition(const ASTPtr & query, const ASTPtr & partition, bool detach, const Context & context) override; + void attachPartition(const ASTPtr & partition, bool part, const Context & context) override; + void fetchPartition(const ASTPtr & partition, const String & from, const Context & context) override; + void freezePartition(const ASTPtr & partition, const String & with_name, const Context & context) override; void reshardPartitions( const ASTPtr & query, const String & database_name, - const Field & partition, + const ASTPtr & partition, const WeightedZooKeeperPaths & weighted_zookeeper_paths, const ASTPtr & sharding_key_expr, bool do_copy, const Field & coordinator, const Context & context) override; diff --git a/dbms/src/Storages/StorageTrivialBuffer.cpp b/dbms/src/Storages/StorageTrivialBuffer.cpp index 06e693b3494..271af6e1d8f 100644 --- a/dbms/src/Storages/StorageTrivialBuffer.cpp +++ b/dbms/src/Storages/StorageTrivialBuffer.cpp @@ -380,9 +380,9 @@ void StorageTrivialBuffer::shutdown() * * This kind of race condition make very hard to implement proper tests. */ -bool StorageTrivialBuffer::optimize(const ASTPtr & query, const String & partition_id, bool final, bool deduplicate, const Settings & settings) +bool StorageTrivialBuffer::optimize(const ASTPtr & query, const ASTPtr & partition, bool final, bool deduplicate, const Context & context) { - if (!partition_id.empty()) + if (partition) throw Exception("Partition cannot be specified when optimizing table of type TrivialBuffer", ErrorCodes::NOT_IMPLEMENTED); diff --git a/dbms/src/Storages/StorageTrivialBuffer.h b/dbms/src/Storages/StorageTrivialBuffer.h index c0e4753b24a..b1ac18360ab 100644 --- a/dbms/src/Storages/StorageTrivialBuffer.h +++ b/dbms/src/Storages/StorageTrivialBuffer.h @@ -69,7 +69,7 @@ public: void startup() override; /// Writes all the blocks in buffer into the destination table. Stop flushing thread. void shutdown() override; - bool optimize(const ASTPtr & query, const String & partition_id, bool final, bool deduplicate, const Settings & settings) override; + bool optimize(const ASTPtr & query, const ASTPtr & partition, bool final, bool deduplicate, const Context & context) override; void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override { name = new_table_name; } From ceddecbc102c3b75ff29003374bcbaf39da9c872 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Thu, 7 Sep 2017 16:22:25 +0300 Subject: [PATCH 15/63] implement missing getExtremes() functions [#CLICKHOUSE-3000] --- dbms/src/Columns/ColumnArray.cpp | 19 +++++++++++++++++ dbms/src/Columns/ColumnFixedString.cpp | 21 +++++++++++++++++++ dbms/src/Columns/ColumnString.cpp | 21 +++++++++++++++++++ .../0_stateless/00307_format_xml.reference | 18 +++++++++------- .../00378_json_quote_64bit_integers.reference | 16 +++++++------- 5 files changed, 79 insertions(+), 16 deletions(-) diff --git a/dbms/src/Columns/ColumnArray.cpp b/dbms/src/Columns/ColumnArray.cpp index bc835df09db..1b85dae1741 100644 --- a/dbms/src/Columns/ColumnArray.cpp +++ b/dbms/src/Columns/ColumnArray.cpp @@ -341,6 +341,25 @@ void ColumnArray::getExtremes(Field & min, Field & max) const { min = Array(); max = Array(); + + size_t col_size = size(); + + if (col_size == 0) + return; + + size_t min_idx = 0; + size_t max_idx = 0; + + for (size_t i = 1; i < col_size; ++i) + { + if (compareAt(i, min_idx, *this, /* nan_direction_hint = */ 1) < 0) + min_idx = i; + if (compareAt(i, max_idx, *this, /* nan_direction_hint = */ -1) > 0) + max_idx = i; + } + + get(min_idx, min); + get(max_idx, max); } diff --git a/dbms/src/Columns/ColumnFixedString.cpp b/dbms/src/Columns/ColumnFixedString.cpp index ad846d45603..61ab24fee10 100644 --- a/dbms/src/Columns/ColumnFixedString.cpp +++ b/dbms/src/Columns/ColumnFixedString.cpp @@ -289,6 +289,27 @@ void ColumnFixedString::getExtremes(Field & min, Field & max) const { min = String(); max = String(); + + size_t col_size = size(); + + if (col_size == 0) + return; + + size_t min_idx = 0; + size_t max_idx = 0; + + less less_op(*this); + + for (size_t i = 1; i < col_size; ++i) + { + if (less_op(i, min_idx)) + min_idx = i; + if (less_op(max_idx, i)) + max_idx = i; + } + + get(min_idx, min); + get(max_idx, max); } } diff --git a/dbms/src/Columns/ColumnString.cpp b/dbms/src/Columns/ColumnString.cpp index 17fb9b53667..5a04db5d893 100644 --- a/dbms/src/Columns/ColumnString.cpp +++ b/dbms/src/Columns/ColumnString.cpp @@ -265,6 +265,27 @@ void ColumnString::getExtremes(Field & min, Field & max) const { min = String(); max = String(); + + size_t col_size = size(); + + if (col_size == 0) + return; + + size_t min_idx = 0; + size_t max_idx = 0; + + less less_op(*this); + + for (size_t i = 1; i < col_size; ++i) + { + if (less_op(i, min_idx)) + min_idx = i; + if (less_op(max_idx, i)) + max_idx = i; + } + + get(min_idx, min); + get(max_idx, max); } diff --git a/dbms/tests/queries/0_stateless/00307_format_xml.reference b/dbms/tests/queries/0_stateless/00307_format_xml.reference index c439e6db932..2d9badc5a3e 100644 --- a/dbms/tests/queries/0_stateless/00307_format_xml.reference +++ b/dbms/tests/queries/0_stateless/00307_format_xml.reference @@ -36,18 +36,20 @@ - - + Hello & world + Hello +<World> - - 2001-02-03 04:05:06 + 0123456789 + Hello & world2001-02-03 04:05:06 - - + Hello & world + Hello +<World> - - 2001-02-03 04:05:06 + 0123456789 + Hello & world2001-02-03 04:05:06 1 diff --git a/dbms/tests/queries/0_stateless/00378_json_quote_64bit_integers.reference b/dbms/tests/queries/0_stateless/00378_json_quote_64bit_integers.reference index 8240d0b21f6..22395188fe6 100644 --- a/dbms/tests/queries/0_stateless/00378_json_quote_64bit_integers.reference +++ b/dbms/tests/queries/0_stateless/00378_json_quote_64bit_integers.reference @@ -64,7 +64,7 @@ "ip": "9223372036854775807", "in": "-9223372036854775808", "up": "18446744073709551615", - "arr": [], + "arr": ["0"], "tuple": ["0","0"] }, "max": @@ -74,7 +74,7 @@ "ip": "9223372036854775807", "in": "-9223372036854775808", "up": "18446744073709551615", - "arr": [], + "arr": ["0"], "tuple": ["0","0"] } }, @@ -123,8 +123,8 @@ "extremes": { - "min": ["0","0","9223372036854775807","-9223372036854775808","18446744073709551615",[],["0","0"]], - "max": ["0","0","9223372036854775807","-9223372036854775808","18446744073709551615",[],["0","0"]] + "min": ["0","0","9223372036854775807","-9223372036854775808","18446744073709551615",["0"],["0","0"]], + "max": ["0","0","9223372036854775807","-9223372036854775808","18446744073709551615",["0"],["0","0"]] }, "rows": 1 @@ -196,7 +196,7 @@ "ip": 9223372036854775807, "in": -9223372036854775808, "up": 18446744073709551615, - "arr": [], + "arr": [0], "tuple": [0,0] }, "max": @@ -206,7 +206,7 @@ "ip": 9223372036854775807, "in": -9223372036854775808, "up": 18446744073709551615, - "arr": [], + "arr": [0], "tuple": [0,0] } }, @@ -255,8 +255,8 @@ "extremes": { - "min": [0,0,9223372036854775807,-9223372036854775808,18446744073709551615,[],[0,0]], - "max": [0,0,9223372036854775807,-9223372036854775808,18446744073709551615,[],[0,0]] + "min": [0,0,9223372036854775807,-9223372036854775808,18446744073709551615,[0],[0,0]], + "max": [0,0,9223372036854775807,-9223372036854775808,18446744073709551615,[0],[0,0]] }, "rows": 1 From 909b46de0fe2fb365daef36560872978005df87a Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Thu, 7 Sep 2017 19:21:06 +0300 Subject: [PATCH 16/63] add MergeTreeDataFormatVersion constant [#CLICKHOUSE-3000] --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 6 +++--- dbms/src/Storages/MergeTree/MergeTreeDataFormatVersion.h | 2 ++ dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp | 4 ++-- dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp | 6 +++--- dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp | 2 +- dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp | 2 +- .../MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp | 2 +- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 8 files changed, 14 insertions(+), 12 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 2e6d36ab66d..8be15b6ae92 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1294,7 +1294,7 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace( part->info.min_block = part->info.max_block = increment->get(); String new_name; - if (format_version == 0) + if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) new_name = part->info.getPartNameV0(part->getMinDate(), part->getMaxDate()); else new_name = part->info.getPartName(); @@ -1724,7 +1724,7 @@ void MergeTreeData::removePartContributionToColumnSizes(const DataPartPtr & part void MergeTreeData::freezePartition(const ASTPtr & partition_ast, const String & with_name, const Context & context) { String prefix; - if (format_version == 0) + if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { const auto & partition = dynamic_cast(*partition_ast); /// Month-partitioning specific - allow partition ID can be passed in the partition value. @@ -1804,7 +1804,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & partition_ast, cons if (!partition.value) return partition.id; - if (format_version == 0) + if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { /// Month-partitioning specific - allow partition ID can be passed in the partition value. const auto * partition_lit = typeid_cast(partition.value.get()); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataFormatVersion.h b/dbms/src/Storages/MergeTree/MergeTreeDataFormatVersion.h index 7d723412e6f..74f26fcdf45 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataFormatVersion.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataFormatVersion.h @@ -7,4 +7,6 @@ namespace DB STRONG_TYPEDEF(UInt32, MergeTreeDataFormatVersion); +const MergeTreeDataFormatVersion MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING {1}; + } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp index 510aecdfa37..77b7a8aa6da 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp @@ -84,7 +84,7 @@ void MergeTreeDataMerger::FuturePart::assign(MergeTreeData::DataPartsVector part part_info.max_block = parts.back()->info.max_block; part_info.level = max_level + 1; - if (parts.front()->storage.format_version == 0) + if (parts.front()->storage.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { DayNum_t min_date = DayNum_t(std::numeric_limits::max()); DayNum_t max_date = DayNum_t(std::numeric_limits::min()); @@ -1060,7 +1060,7 @@ MergeTreeData::PerShardDataParts MergeTreeDataMerger::reshardPartition( MergeTreeData::MutableDataPartPtr & part_from_shard = entry.second; std::string new_name; - if (data.format_version == 0) + if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) new_name = part_from_shard->info.getPartNameV0(part_from_shard->getMinDate(), part_from_shard->getMaxDate()); else new_name = part_from_shard->info.getPartName(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index 673bf977a83..9a79975e1dc 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -810,7 +810,7 @@ void MergeTreeDataPart::loadIndex() void MergeTreeDataPart::loadPartitionAndMinMaxIndex() { - if (storage.format_version == 0) + if (storage.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { DayNum_t min_date; DayNum_t max_date; @@ -900,7 +900,7 @@ void MergeTreeDataPart::checkConsistency(bool require_part_metadata) } } - if (storage.format_version > 0) + if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { if (storage.partition_expr && !checksums.files.count("partition.dat")) throw Exception("No checksum for partition.dat", ErrorCodes::NO_FILE_IN_DATA_PART); @@ -928,7 +928,7 @@ void MergeTreeDataPart::checkConsistency(bool require_part_metadata) if (!storage.sort_descr.empty()) check_file_not_empty(path + "primary.idx"); - if (storage.format_version > 0) + if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { if (storage.partition_expr) check_file_not_empty(path + "partition.dat"); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 296d0b7265c..b71fedbab86 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -136,7 +136,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa MergeTreePartInfo new_part_info(partition.getID(data), temp_index, temp_index, 0); String part_name; - if (data.format_version == 0) + if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { DayNum_t min_date(minmax_idx.min_values[data.minmax_idx_date_column_pos].get()); DayNum_t max_date(minmax_idx.max_values[data.minmax_idx_date_column_pos].get()); diff --git a/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp b/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp index 8131bd92e69..338dcf2249d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp @@ -26,7 +26,7 @@ bool MergeTreePartInfo::tryParsePartName(const String & dir_name, MergeTreePartI ReadBufferFromString in(dir_name); String partition_id; - if (format_version == 0) + if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { UInt32 min_yyyymmdd = 0; UInt32 max_yyyymmdd = 0; diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp index 7b6b5d51d63..52bfa125bde 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp @@ -176,7 +176,7 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo part->info.level = 0; String part_name; - if (storage.data.format_version == 0) + if (storage.data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) part_name = part->info.getPartNameV0(part->getMinDate(), part->getMaxDate()); else part_name = part->info.getPartName(); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 13874669452..d9e9229b916 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -2576,7 +2576,7 @@ static String getFakePartNameCoveringPartRange( { /// Artificial high level is choosen, to make this part "covering" all parts inside. MergeTreePartInfo part_info(partition_id, left, right, 999999999); - if (format_version == 0) + if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { /// The date range is all month long. const auto & lut = DateLUT::instance(); From a10ba0cd9125897d0ba2c13001c2c6b747082980 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Fri, 8 Sep 2017 16:17:38 +0300 Subject: [PATCH 17/63] serialize partition key in Replicated tables metadata [#CLICKHOUSE-3000] --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 49 +++++-------- dbms/src/Storages/MergeTree/MergeTreeData.h | 4 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 18 ++++- dbms/src/Storages/StorageMergeTree.cpp | 2 +- dbms/src/Storages/StorageMergeTree.h | 2 +- .../Storages/StorageReplicatedMergeTree.cpp | 70 ++++++++++++++++--- .../src/Storages/StorageReplicatedMergeTree.h | 4 +- 7 files changed, 101 insertions(+), 48 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 8be15b6ae92..f27abd3d6af 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -80,8 +80,9 @@ MergeTreeData::MergeTreeData( const NamesAndTypesList & alias_columns_, const ColumnDefaults & column_defaults_, Context & context_, - ASTPtr & primary_expr_ast_, - const String & date_column_name_, const ASTPtr & sampling_expression_, + const ASTPtr & primary_expr_ast_, + const String & date_column_name_, + const ASTPtr & sampling_expression_, size_t index_granularity_, const MergingParams & merging_params_, const MergeTreeSettings & settings_, @@ -91,10 +92,11 @@ MergeTreeData::MergeTreeData( BrokenPartCallback broken_part_callback_, PartsCleanCallback parts_clean_callback_) : ITableDeclaration{materialized_columns_, alias_columns_, column_defaults_}, context(context_), - date_column_name(date_column_name_), sampling_expression(sampling_expression_), + sampling_expression(sampling_expression_), index_granularity(index_granularity_), merging_params(merging_params_), - settings(settings_), primary_expr_ast(primary_expr_ast_ ? primary_expr_ast_->clone() : nullptr), + settings(settings_), + primary_expr_ast(primary_expr_ast_), require_part_metadata(require_part_metadata_), database_name(database_), table_name(table_), full_path(full_path_), columns(columns_), @@ -102,29 +104,6 @@ MergeTreeData::MergeTreeData( parts_clean_callback(parts_clean_callback_ ? parts_clean_callback_ : [this](){ clearOldParts(); }), log_name(log_name_), log(&Logger::get(log_name + " (Data)")) { - /// Check that the date column exists and is of type Date. - const auto check_date_exists = [this] (const NamesAndTypesList & columns) - { - for (const auto & column : columns) - { - if (column.name == date_column_name) - { - if (!typeid_cast(column.type.get())) - throw Exception("Date column (" + date_column_name + ") for storage of MergeTree family must have type Date." - " Provided column of type " + column.type->getName() + "." - " You may have separate column with type " + column.type->getName() + ".", ErrorCodes::BAD_TYPE_OF_FIELD); - return true; - } - } - - return false; - }; - - if (!check_date_exists(*columns) && !check_date_exists(materialized_columns)) - throw Exception{ - "Date column (" + date_column_name + ") does not exist in table declaration.", - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE}; - checkNoMultidimensionalArrays(*columns, attach); checkNoMultidimensionalArrays(materialized_columns, attach); @@ -135,14 +114,24 @@ MergeTreeData::MergeTreeData( initPrimaryKey(); - ASTPtr partition_expr_ast; + try { - String partition_expr_str = "toYYYYMM(" + date_column_name + ")"; + String partition_expr_str = "toYYYYMM(" + date_column_name_ + ")"; ParserNotEmptyExpressionList parser(/* allow_alias_without_as_keyword = */ false); partition_expr_ast = parseQuery( parser, partition_expr_str.data(), partition_expr_str.data() + partition_expr_str.length(), "partition expression"); + + initPartitionKey(partition_expr_ast); + + if (minmax_idx_date_column_pos == -1) + throw Exception("Could not find Date column in the partition key", ErrorCodes::BAD_TYPE_OF_FIELD); + } + catch (Exception & e) + { + /// Better error message. + e.addMessage("(while initializing MergeTree partition key from date column `" + date_column_name_ + "`)"); + throw; } - initPartitionKey(partition_expr_ast); /// Creating directories, if not exist. Poco::File(full_path).createDirectories(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index d5cfd4e2ead..1f72e95a689 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -236,7 +236,7 @@ public: const NamesAndTypesList & alias_columns_, const ColumnDefaults & column_defaults_, Context & context_, - ASTPtr & primary_expr_ast_, + const ASTPtr & primary_expr_ast_, const String & date_column_name_, const ASTPtr & sampling_expression_, /// nullptr, if sampling is not supported. size_t index_granularity_, @@ -466,7 +466,6 @@ public: MergeTreeDataFormatVersion format_version; Context & context; - const String date_column_name; const ASTPtr sampling_expression; const size_t index_granularity; @@ -479,6 +478,7 @@ public: Block primary_key_sample; DataTypes primary_key_data_types; + ASTPtr partition_expr_ast; ExpressionActionsPtr partition_expr; Names partition_expr_columns; DataTypes partition_expr_column_types; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 985202d6dd9..bd6e54623ac 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -222,9 +222,21 @@ BlockInputStreams MergeTreeDataSelectExecutor::read( data.minmax_idx_sort_descr, data.minmax_idx_expr); if (settings.force_index_by_date && minmax_idx_condition->alwaysUnknownOrTrue()) - throw Exception( - "Index by date (" + data.date_column_name + ") is not used and setting 'force_index_by_date' is set.", - ErrorCodes::INDEX_NOT_USED); + { + String msg = "MinMax index by columns ("; + bool first = true; + for (const String & col : data.minmax_idx_columns) + { + if (first) + first = false; + else + msg += ", "; + msg += col; + } + msg += ") is not used and setting 'force_index_by_date' is set"; + + throw Exception(msg, ErrorCodes::INDEX_NOT_USED); + } } /// Select the parts in which there can be data that satisfy `minmax_idx_condition` and that match the condition on `_part`, diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 4b0c2d1fc3a..f211e281370 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -40,7 +40,7 @@ StorageMergeTree::StorageMergeTree( const ColumnDefaults & column_defaults_, bool attach, Context & context_, - ASTPtr & primary_expr_ast_, + const ASTPtr & primary_expr_ast_, const String & date_column_name_, const ASTPtr & sampling_expression_, /// nullptr, if sampling is not supported. size_t index_granularity_, diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index fefe84a71f7..194fbb6cd07 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -130,7 +130,7 @@ private: const ColumnDefaults & column_defaults_, bool attach, Context & context_, - ASTPtr & primary_expr_ast_, + const ASTPtr & primary_expr_ast_, const String & date_column_name_, const ASTPtr & sampling_expression_, /// nullptr, if sampling is not supported. size_t index_granularity_, diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index d9e9229b916..13fc1e9cb98 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -182,7 +182,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( const NamesAndTypesList & alias_columns_, const ColumnDefaults & column_defaults_, Context & context_, - ASTPtr & primary_expr_ast_, + const ASTPtr & primary_expr_ast_, const String & date_column_name_, const ASTPtr & sampling_expression_, size_t index_granularity_, @@ -304,7 +304,7 @@ StoragePtr StorageReplicatedMergeTree::create( const NamesAndTypesList & alias_columns_, const ColumnDefaults & column_defaults_, Context & context_, - ASTPtr & primary_expr_ast_, + const ASTPtr & primary_expr_ast_, const String & date_column_name_, const ASTPtr & sampling_expression_, size_t index_granularity_, @@ -388,12 +388,24 @@ namespace void write(WriteBuffer & out) const { out << "metadata format version: 1" << "\n" - << "date column: " << data.date_column_name << "\n" - << "sampling expression: " << formattedAST(data.sampling_expression) << "\n" + << "date column: "; + + if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) + out << data.minmax_idx_columns[data.minmax_idx_date_column_pos] << "\n"; + else + out << "\n"; + + out << "sampling expression: " << formattedAST(data.sampling_expression) << "\n" << "index granularity: " << data.index_granularity << "\n" << "mode: " << static_cast(data.merging_params.mode) << "\n" << "sign column: " << data.merging_params.sign_column << "\n" << "primary key: " << formattedAST(data.primary_expr_ast) << "\n"; + + if (data.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) + { + out << "data format version: " << data.format_version.toUnderType() << "\n"; + out << "partition key: " << formattedAST(data.partition_expr_ast) << "\n"; + } } String toString() const @@ -410,12 +422,21 @@ namespace in >> "metadata format version: 1"; in >> "\ndate column: "; - String read_date_column_name; - in >> read_date_column_name; + String read_date_column; + in >> read_date_column; - if (read_date_column_name != data.date_column_name) - throw Exception("Existing table metadata in ZooKeeper differs in date index column." - " Stored in ZooKeeper: " + read_date_column_name + ", local: " + data.date_column_name, + if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) + { + const String & local_date_column = data.minmax_idx_columns[data.minmax_idx_date_column_pos]; + if (local_date_column != read_date_column) + throw Exception("Existing table metadata in ZooKeeper differs in date index column." + " Stored in ZooKeeper: " + read_date_column + ", local: " + local_date_column, + ErrorCodes::METADATA_MISMATCH); + } + else if (!read_date_column.empty()) + throw Exception( + "Existing table metadata in ZooKeeper differs in date index column." + " Stored in ZooKeeper: " + read_date_column + ", local is custom-partitioned.", ErrorCodes::METADATA_MISMATCH); in >> "\nsampling expression: "; @@ -469,6 +490,37 @@ namespace ErrorCodes::METADATA_MISMATCH); in >> "\n"; + MergeTreeDataFormatVersion read_data_format_version; + if (in.eof()) + read_data_format_version = 0; + else + { + in >> "data format version: "; + in >> read_data_format_version.toUnderType(); + } + + if (read_data_format_version != data.format_version) + throw Exception("Existing table metadata in ZooKeeper differs in data format version." + " Stored in ZooKeeper: " + DB::toString(read_data_format_version.toUnderType()) + + ", local: " + DB::toString(data.format_version.toUnderType()), + ErrorCodes::METADATA_MISMATCH); + + if (data.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) + { + in >> "\npartition key: "; + String read_partition_key; + String local_partition_key = formattedAST(data.partition_expr_ast); + in >> read_partition_key; + + if (read_partition_key != local_partition_key) + throw Exception( + "Existing table metadata in ZooKeeper differs in partition key expression." + " Stored in ZooKeeper: " + read_partition_key + ", local: " + local_partition_key, + ErrorCodes::METADATA_MISMATCH); + + in >> "\n"; + } + assertEOF(in); } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index 8fc9a9ce754..8bdd864d074 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -87,7 +87,7 @@ public: const NamesAndTypesList & alias_columns_, const ColumnDefaults & column_defaults_, Context & context_, - ASTPtr & primary_expr_ast_, + const ASTPtr & primary_expr_ast_, const String & date_column_name_, const ASTPtr & sampling_expression_, /// nullptr, if sampling is not supported. size_t index_granularity_, @@ -334,7 +334,7 @@ private: const NamesAndTypesList & alias_columns_, const ColumnDefaults & column_defaults_, Context & context_, - ASTPtr & primary_expr_ast_, + const ASTPtr & primary_expr_ast_, const String & date_column_name_, const ASTPtr & sampling_expression_, size_t index_granularity_, From 728a23f60e27f57b11e5ce96fde9cb786dab2ecd Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Fri, 8 Sep 2017 21:11:09 +0300 Subject: [PATCH 18/63] allow creation of custom-partitioned tables with experimental syntax [#CLICKHOUSE-3000] --- dbms/src/Interpreters/Settings.h | 10 +++- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 47 +++++++++++++------ dbms/src/Storages/MergeTree/MergeTreeData.h | 7 ++- dbms/src/Storages/StorageFactory.cpp | 23 +++++---- dbms/src/Storages/StorageMergeTree.cpp | 5 +- dbms/src/Storages/StorageMergeTree.h | 6 ++- .../Storages/StorageReplicatedMergeTree.cpp | 10 ++-- .../src/Storages/StorageReplicatedMergeTree.h | 6 ++- 8 files changed, 76 insertions(+), 38 deletions(-) diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index a0eb9639060..16afbbe1fba 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -293,7 +293,15 @@ struct Settings */ \ M(SettingUInt64, insert_distributed_timeout, 0) \ /* Timeout for DDL query responses from all hosts in cluster. Negative value means infinite. */ \ - M(SettingInt64, distributed_ddl_task_timeout, 120) + M(SettingInt64, distributed_ddl_task_timeout, 120) \ + \ + /** If true, and the date parameter of MergeTree engines is an expression (not a column name), \ + * it will be interpreted as the partitioning expression, allowing custom partitions. \ + * IMPORTANT: Don't use this setting just yet. \ + * It is for testing purposes, the syntax will likely change soon and the server will not be able \ + * to load the tables created this way. You have been warned. \ + */ \ + M(SettingBool, experimental_merge_tree_allow_custom_partitions, false) /// Possible limits for query execution. diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index f27abd3d6af..fc0f03a362b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -70,6 +70,7 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; extern const int CORRUPTED_DATA; extern const int INVALID_PARTITION_VALUE; + extern const int METADATA_MISMATCH; } @@ -81,7 +82,8 @@ MergeTreeData::MergeTreeData( const ColumnDefaults & column_defaults_, Context & context_, const ASTPtr & primary_expr_ast_, - const String & date_column_name_, + const String & date_column_name, + const ASTPtr & partition_expr_ast_, const ASTPtr & sampling_expression_, size_t index_granularity_, const MergingParams & merging_params_, @@ -97,6 +99,7 @@ MergeTreeData::MergeTreeData( merging_params(merging_params_), settings(settings_), primary_expr_ast(primary_expr_ast_), + partition_expr_ast(partition_expr_ast_), require_part_metadata(require_part_metadata_), database_name(database_), table_name(table_), full_path(full_path_), columns(columns_), @@ -114,23 +117,32 @@ MergeTreeData::MergeTreeData( initPrimaryKey(); - try + MergeTreeDataFormatVersion min_format_version(0); + if (!date_column_name.empty()) { - String partition_expr_str = "toYYYYMM(" + date_column_name_ + ")"; - ParserNotEmptyExpressionList parser(/* allow_alias_without_as_keyword = */ false); - partition_expr_ast = parseQuery( - parser, partition_expr_str.data(), partition_expr_str.data() + partition_expr_str.length(), "partition expression"); + try + { + String partition_expr_str = "toYYYYMM(" + date_column_name + ")"; + ParserNotEmptyExpressionList parser(/* allow_alias_without_as_keyword = */ false); + partition_expr_ast = parseQuery( + parser, partition_expr_str.data(), partition_expr_str.data() + partition_expr_str.length(), "partition expression"); - initPartitionKey(partition_expr_ast); + initPartitionKey(); - if (minmax_idx_date_column_pos == -1) - throw Exception("Could not find Date column in the partition key", ErrorCodes::BAD_TYPE_OF_FIELD); + if (minmax_idx_date_column_pos == -1) + throw Exception("Could not find Date column", ErrorCodes::BAD_TYPE_OF_FIELD); + } + catch (Exception & e) + { + /// Better error message. + e.addMessage("(while initializing MergeTree partition key from date column `" + date_column_name + "`)"); + throw; + } } - catch (Exception & e) + else { - /// Better error message. - e.addMessage("(while initializing MergeTree partition key from date column `" + date_column_name_ + "`)"); - throw; + initPartitionKey(); + min_format_version = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING; } /// Creating directories, if not exist. @@ -140,7 +152,7 @@ MergeTreeData::MergeTreeData( String version_file_path = full_path + "format_version.txt"; if (!attach) { - format_version = 0; + format_version = min_format_version; WriteBufferFromFile buf(version_file_path); writeIntText(format_version.toUnderType(), buf); } @@ -153,6 +165,11 @@ MergeTreeData::MergeTreeData( } else format_version = 0; + + if (format_version < min_format_version) + throw Exception( + "MergeTree data format version on disk doesn't support custom partitioning", + ErrorCodes::METADATA_MISMATCH); } @@ -219,7 +236,7 @@ void MergeTreeData::initPrimaryKey() } -void MergeTreeData::initPartitionKey(const ASTPtr & partition_expr_ast) +void MergeTreeData::initPartitionKey() { if (!partition_expr_ast || partition_expr_ast->children.empty()) return; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 1f72e95a689..ea020df55a6 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -227,6 +227,8 @@ public: /// Correctness of names and paths is not checked. /// /// primary_expr_ast - expression used for sorting; empty for UnsortedMergeTree. + /// date_column_name - if not empty, the name of the Date column used for partitioning by month. + /// Otherwise, partition_expr_ast is used for partitioning. /// index_granularity - how many rows correspond to one primary key value. /// require_part_metadata - should checksums.txt and columns.txt exist in the part directory. /// attach - whether the existing table is attached or the new table is created. @@ -237,7 +239,8 @@ public: const ColumnDefaults & column_defaults_, Context & context_, const ASTPtr & primary_expr_ast_, - const String & date_column_name_, + const String & date_column_name, + const ASTPtr & partition_expr_ast_, const ASTPtr & sampling_expression_, /// nullptr, if sampling is not supported. size_t index_granularity_, const MergingParams & merging_params_, @@ -548,7 +551,7 @@ private: void initPrimaryKey(); - void initPartitionKey(const ASTPtr & partition_key_ast); + void initPartitionKey(); /// Expression for column type conversion. /// If no conversions are needed, out_expression=nullptr. diff --git a/dbms/src/Storages/StorageFactory.cpp b/dbms/src/Storages/StorageFactory.cpp index b0240553ae5..c70a21235cb 100644 --- a/dbms/src/Storages/StorageFactory.cpp +++ b/dbms/src/Storages/StorageFactory.cpp @@ -59,18 +59,18 @@ namespace ErrorCodes } -/** For StorageMergeTree: get the primary key as an ASTExpressionList. +/** For StorageMergeTree: get the key expression AST as an ASTExpressionList. * It can be specified in the tuple: (CounterID, Date), * or as one column: CounterID. */ -static ASTPtr extractPrimaryKey(const ASTPtr & node) +static ASTPtr extractKeyExpressionList(const ASTPtr & node) { - const ASTFunction * primary_expr_func = typeid_cast(&*node); + const ASTFunction * expr_func = typeid_cast(&*node); - if (primary_expr_func && primary_expr_func->name == "tuple") + if (expr_func && expr_func->name == "tuple") { /// Primary key is specified in tuple. - return primary_expr_func->children.at(0); + return expr_func->children.at(0); } else { @@ -822,6 +822,7 @@ For further info please read the documentation: https://clickhouse.yandex/ /// For all. String date_column_name; + ASTPtr partition_expr_ast; ASTPtr primary_expr_list; ASTPtr sampling_expression; UInt64 index_granularity; @@ -904,15 +905,17 @@ For further info please read the documentation: https://clickhouse.yandex/ args.erase(args.begin() + 1); } - /// Now only three parameters remain - date, primary_key, index_granularity. + /// Now only three parameters remain - date (or partitioning expression), primary_key, index_granularity. - if (auto ast = typeid_cast(&*args[0])) + if (auto ast = typeid_cast(args[0].get())) date_column_name = ast->name; + else if (local_context.getSettingsRef().experimental_merge_tree_allow_custom_partitions) + partition_expr_ast = extractKeyExpressionList(args[0]); else throw Exception(String("Date column name must be an unquoted string") + verbose_help, ErrorCodes::BAD_ARGUMENTS); if (merging_params.mode != MergeTreeData::MergingParams::Unsorted) - primary_expr_list = extractPrimaryKey(args[1]); + primary_expr_list = extractKeyExpressionList(args[1]); auto ast = typeid_cast(&*args.back()); if (ast && ast->value.getType() == Field::Types::UInt64) @@ -924,7 +927,7 @@ For further info please read the documentation: https://clickhouse.yandex/ return StorageReplicatedMergeTree::create( zookeeper_path, replica_name, attach, data_path, database_name, table_name, columns, materialized_columns, alias_columns, column_defaults, - context, primary_expr_list, date_column_name, + context, primary_expr_list, date_column_name, partition_expr_ast, sampling_expression, index_granularity, merging_params, has_force_restore_data_flag, context.getMergeTreeSettings()); @@ -932,7 +935,7 @@ For further info please read the documentation: https://clickhouse.yandex/ return StorageMergeTree::create( data_path, database_name, table_name, columns, materialized_columns, alias_columns, column_defaults, attach, - context, primary_expr_list, date_column_name, + context, primary_expr_list, date_column_name, partition_expr_ast, sampling_expression, index_granularity, merging_params, has_force_restore_data_flag, context.getMergeTreeSettings()); diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index f211e281370..a544f5bde26 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -41,7 +41,8 @@ StorageMergeTree::StorageMergeTree( bool attach, Context & context_, const ASTPtr & primary_expr_ast_, - const String & date_column_name_, + const String & date_column_name, + const ASTPtr & partition_expr_ast_, const ASTPtr & sampling_expression_, /// nullptr, if sampling is not supported. size_t index_granularity_, const MergeTreeData::MergingParams & merging_params_, @@ -53,7 +54,7 @@ StorageMergeTree::StorageMergeTree( data(database_name, table_name, full_path, columns_, materialized_columns_, alias_columns_, column_defaults_, - context_, primary_expr_ast_, date_column_name_, + context_, primary_expr_ast_, date_column_name, partition_expr_ast_, sampling_expression_, index_granularity_, merging_params_, settings_, database_name_ + "." + table_name, false, attach), reader(data), writer(data), merger(data, context.getBackgroundPool()), diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index 194fbb6cd07..9fdcd1339df 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -117,7 +117,8 @@ private: * consisting of the specified columns. * * primary_expr_ast - expression for sorting; - * date_column_name - the name of the column with the date; + * date_column_name - if not empty, the name of the column with the date used for partitioning by month; + otherwise, partition_expr_ast is used as the partitioning expression; * index_granularity - fow how many rows one index value is written. */ StorageMergeTree( @@ -131,7 +132,8 @@ private: bool attach, Context & context_, const ASTPtr & primary_expr_ast_, - const String & date_column_name_, + const String & date_column_name, + const ASTPtr & partition_expr_ast_, const ASTPtr & sampling_expression_, /// nullptr, if sampling is not supported. size_t index_granularity_, const MergeTreeData::MergingParams & merging_params_, diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 13fc1e9cb98..4125de90068 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -183,7 +183,8 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( const ColumnDefaults & column_defaults_, Context & context_, const ASTPtr & primary_expr_ast_, - const String & date_column_name_, + const String & date_column_name, + const ASTPtr & partition_expr_ast_, const ASTPtr & sampling_expression_, size_t index_granularity_, const MergeTreeData::MergingParams & merging_params_, @@ -197,7 +198,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( data(database_name, table_name, full_path, columns_, materialized_columns_, alias_columns_, column_defaults_, - context_, primary_expr_ast_, date_column_name_, + context_, primary_expr_ast_, date_column_name, partition_expr_ast_, sampling_expression_, index_granularity_, merging_params_, settings_, database_name_ + "." + table_name, true, attach, [this] (const std::string & name) { enqueuePartForCheck(name); }, @@ -305,7 +306,8 @@ StoragePtr StorageReplicatedMergeTree::create( const ColumnDefaults & column_defaults_, Context & context_, const ASTPtr & primary_expr_ast_, - const String & date_column_name_, + const String & date_column_name, + const ASTPtr & partition_expr_ast_, const ASTPtr & sampling_expression_, size_t index_granularity_, const MergeTreeData::MergingParams & merging_params_, @@ -316,7 +318,7 @@ StoragePtr StorageReplicatedMergeTree::create( zookeeper_path_, replica_name_, attach, path_, database_name_, name_, columns_, materialized_columns_, alias_columns_, column_defaults_, - context_, primary_expr_ast_, date_column_name_, + context_, primary_expr_ast_, date_column_name, partition_expr_ast_, sampling_expression_, index_granularity_, merging_params_, has_force_restore_data_flag_, settings_); StoragePtr res_ptr = res; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index 8bdd864d074..1099e80693a 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -88,7 +88,8 @@ public: const ColumnDefaults & column_defaults_, Context & context_, const ASTPtr & primary_expr_ast_, - const String & date_column_name_, + const String & date_column_name, + const ASTPtr & partition_expr_ast_, const ASTPtr & sampling_expression_, /// nullptr, if sampling is not supported. size_t index_granularity_, const MergeTreeData::MergingParams & merging_params_, @@ -335,7 +336,8 @@ private: const ColumnDefaults & column_defaults_, Context & context_, const ASTPtr & primary_expr_ast_, - const String & date_column_name_, + const String & date_column_name, + const ASTPtr & partition_expr_ast_, const ASTPtr & sampling_expression_, size_t index_granularity_, const MergeTreeData::MergingParams & merging_params_, From 1be62b567ed51103b18dcef4d3d6f5fae10c9c2b Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Mon, 11 Sep 2017 20:55:41 +0300 Subject: [PATCH 19/63] safeguards to protect against distinct partition values with the same partition_id [#CLICKHOUSE-3000] --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 72 ++++++---- dbms/src/Storages/MergeTree/MergeTreeData.h | 6 +- .../MergeTree/MergeTreeDataMerger.cpp | 8 ++ .../Storages/MergeTree/MergeTreeDataMerger.h | 2 +- .../Storages/MergeTree/MergeTreeDataPart.cpp | 121 ++--------------- .../Storages/MergeTree/MergeTreeDataPart.h | 25 +--- .../MergeTree/MergeTreeDataWriter.cpp | 2 +- .../Storages/MergeTree/MergeTreePartition.cpp | 128 ++++++++++++++++++ .../Storages/MergeTree/MergeTreePartition.h | 36 +++++ dbms/src/Storages/StorageDistributed.cpp | 3 +- 10 files changed, 238 insertions(+), 165 deletions(-) create mode 100644 dbms/src/Storages/MergeTree/MergeTreePartition.cpp create mode 100644 dbms/src/Storages/MergeTree/MergeTreePartition.h diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index fc0f03a362b..80d9006db27 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1293,6 +1293,15 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace( { std::lock_guard lock(data_parts_mutex); + if (DataPartPtr existing_part_in_partition = getAnyPartInPartition(part->info.partition_id, lock)) + { + if (part->partition.value != existing_part_in_partition->partition.value) + throw Exception( + "Partition value mismatch between two parts with the same partition ID. Existing part: " + + existing_part_in_partition->name + ", newly added part: " + part->name, + ErrorCodes::CORRUPTED_DATA); + } + /** It is important that obtaining new block number and adding that block to parts set is done atomically. * Otherwise there is race condition - merge of blocks could happen in interval that doesn't yet contain new part. */ @@ -1407,18 +1416,6 @@ void MergeTreeData::replaceParts(const DataPartsVector & remove, const DataParts } } -void MergeTreeData::attachPart(const DataPartPtr & part) -{ - std::lock_guard lock(data_parts_mutex); - std::lock_guard lock_all(all_data_parts_mutex); - - if (!all_data_parts.insert(part).second) - throw Exception("Part " + part->name + " is already attached", ErrorCodes::DUPLICATE_DATA_PART); - - data_parts.insert(part); - addPartContributionToColumnSizes(part); -} - void MergeTreeData::renameAndDetachPart(const DataPartPtr & part, const String & prefix, bool restore_covered, bool move_to_detached) { LOG_INFO(log, "Renaming " << part->relative_path << " to " << prefix << part->name << " and detaching it."); @@ -1733,7 +1730,7 @@ void MergeTreeData::freezePartition(const ASTPtr & partition_ast, const String & if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { const auto & partition = dynamic_cast(*partition_ast); - /// Month-partitioning specific - allow partition ID can be passed in the partition value. + /// Month-partitioning specific - partition ID can be passed in the partition value. if (const auto * partition_lit = dynamic_cast(partition.value.get())) prefix = partition_lit->value.getType() == Field::Types::UInt64 ? toString(partition_lit->value.get()) @@ -1803,17 +1800,17 @@ size_t MergeTreeData::getPartitionSize(const std::string & partition_id) const return size; } -String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & partition_ast, const Context & context) +String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, const Context & context) { - const auto & partition = typeid_cast(*partition_ast); + const auto & partition_ast = typeid_cast(*ast); - if (!partition.value) - return partition.id; + if (!partition_ast.value) + return partition_ast.id; if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { - /// Month-partitioning specific - allow partition ID can be passed in the partition value. - const auto * partition_lit = typeid_cast(partition.value.get()); + /// Month-partitioning specific - partition ID can be passed in the partition value. + const auto * partition_lit = typeid_cast(partition_ast.value.get()); if (partition_lit && partition_lit->value.getType() == Field::Types::String) { String partition_id = partition_lit->value.get(); @@ -1828,9 +1825,9 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & partition_ast, cons /// Re-parse partition key fields using the information about expected field types. size_t fields_count = partition_expr_column_types.size(); - if (partition.fields_count != fields_count) + if (partition_ast.fields_count != fields_count) throw Exception( - "Wrong number of fields in the partition expression: " + toString(partition.fields_count) + + "Wrong number of fields in the partition expression: " + toString(partition_ast.fields_count) + ", must be: " + toString(fields_count), ErrorCodes::INVALID_PARTITION_VALUE); @@ -1839,7 +1836,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & partition_ast, cons if (fields_count) { ReadBufferFromMemory left_paren_buf("(", 1); - ReadBufferFromMemory fields_buf(partition.fields_str.data, partition.fields_str.size); + ReadBufferFromMemory fields_buf(partition_ast.fields_str.data, partition_ast.fields_str.size); ReadBufferFromMemory right_paren_buf(")", 1); ConcatReadBuffer buf({&left_paren_buf, &fields_buf, &right_paren_buf}); @@ -1850,14 +1847,31 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & partition_ast, cons if (!input_stream.read(block)) throw Exception( - "Could not parse partition value: `" + partition.fields_str.toString() + "`", + "Could not parse partition value: `" + partition_ast.fields_str.toString() + "`", ErrorCodes::INVALID_PARTITION_VALUE); for (size_t i = 0; i < fields_count; ++i) block.getByPosition(i).column->get(0, partition_row[i]); } - return MergeTreeDataPart::Partition(std::move(partition_row)).getID(*this); + MergeTreePartition partition(std::move(partition_row)); + String partition_id = partition.getID(*this); + + { + std::lock_guard data_parts_lock(data_parts_mutex); + DataPartPtr existing_part_in_partition = getAnyPartInPartition(partition_id, data_parts_lock); + if (existing_part_in_partition && existing_part_in_partition->partition.value != partition.value) + { + WriteBufferFromOwnString buf; + writeCString("Parsed partition value: ", buf); + partition.serializeTextQuoted(*this, buf); + writeCString(" doesn't match partition value for an existing part with the same partition ID: ", buf); + writeString(existing_part_in_partition->name, buf); + throw Exception(buf.str(), ErrorCodes::INVALID_PARTITION_VALUE); + } + } + + return partition_id; } void MergeTreeData::Transaction::rollback() @@ -1888,5 +1902,15 @@ void MergeTreeData::Transaction::rollback() } } +MergeTreeData::DataPartPtr MergeTreeData::getAnyPartInPartition( + const String & partition_id, std::lock_guard & data_parts_lock) +{ + auto min_block = std::numeric_limits::min(); + MergeTreePartInfo dummy_part_info(partition_id, min_block, min_block, 0); + auto it = data_parts.lower_bound(dummy_part_info); + if (it != data_parts.end() && (*it)->info.partition_id == partition_id) + return *it; + return {}; +} } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index ea020df55a6..189c1b4618d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -337,9 +337,6 @@ public: /// clearOldParts (ignoring old_parts_lifetime). void replaceParts(const DataPartsVector & remove, const DataPartsVector & add, bool clear_without_timeout); - /// Adds new part to the list of known parts and to the working set. - void attachPart(const DataPartPtr & part); - /// Renames the part to detached/_ and forgets about it. The data won't be deleted in /// clearOldParts. /// If restore_covered is true, adds to the working set inactive parts, which were merged into the deleted part. @@ -568,6 +565,9 @@ private: /// Adds or subtracts the contribution of the part to compressed column sizes. void addPartContributionToColumnSizes(const DataPartPtr & part); void removePartContributionToColumnSizes(const DataPartPtr & part); + + /// If there is no part in the partition with ID `partition_id`, returns empty ptr. + DataPartPtr getAnyPartInPartition(const String & partition_id, std::lock_guard & data_parts_lock); }; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp index 77b7a8aa6da..05e097d7adc 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp @@ -73,6 +73,14 @@ void MergeTreeDataMerger::FuturePart::assign(MergeTreeData::DataPartsVector part if (parts_.empty()) return; + for (size_t i = 0; i < parts_.size(); ++i) + { + if (parts_[i]->partition.value != parts_[0]->partition.value) + throw Exception( + "Attempting to merge parts " + parts_[i]->name + " and " + parts_[0]->name + " that are in different partitions", + ErrorCodes::LOGICAL_ERROR); + } + parts = std::move(parts_); UInt32 max_level = 0; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.h b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.h index 65c9974b443..2a3e65120c8 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.h @@ -27,7 +27,7 @@ public: MergeTreePartInfo part_info; MergeTreeData::DataPartsVector parts; - const MergeTreeDataPart::Partition & getPartition() const { return parts.front()->partition; } + const MergeTreePartition & getPartition() const { return parts.front()->partition; } FuturePart() = default; explicit FuturePart(MergeTreeData::DataPartsVector parts_) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index 9a79975e1dc..60a356d0944 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -8,16 +8,12 @@ #include #include #include -#include #include #include #include -#include -#include #include #include #include -#include #include #include @@ -38,6 +34,7 @@ namespace ErrorCodes extern const int NO_FILE_IN_DATA_PART; extern const int EXPECTED_END_OF_FILE; extern const int BAD_SIZE_OF_FILE_IN_DATA_PART; + extern const int CORRUPTED_DATA; extern const int FORMAT_VERSION_TOO_OLD; extern const int UNKNOWN_FORMAT; extern const int UNEXPECTED_FILE_IN_DATA_PART; @@ -295,113 +292,6 @@ static ReadBufferFromFile openForReading(const String & path) return ReadBufferFromFile(path, std::min(static_cast(DBMS_DEFAULT_BUFFER_SIZE), Poco::File(path).getSize())); } - -String MergeTreeDataPart::Partition::getID(const MergeTreeData & storage) const -{ - if (value.size() != storage.partition_expr_columns.size()) - throw Exception("Invalid partition key size: " + toString(value.size()), ErrorCodes::LOGICAL_ERROR); - - if (value.empty()) - return "all"; - - /// In case all partition fields are represented by integral types, try to produce a human-readable partition id. - /// Otherwise use a hex-encoded hash. - bool are_all_integral = true; - for (const Field & field : value) - { - if (field.getType() != Field::Types::UInt64 && field.getType() != Field::Types::Int64) - { - are_all_integral = false; - break; - } - } - - String result; - - if (are_all_integral) - { - FieldVisitorToString to_string_visitor; - for (size_t i = 0; i < value.size(); ++i) - { - if (i > 0) - result += '-'; - - if (typeid_cast(storage.partition_expr_column_types[i].get())) - result += toString(DateLUT::instance().toNumYYYYMMDD(DayNum_t(value[i].get()))); - else - result += applyVisitor(to_string_visitor, value[i]); - } - - return result; - } - - SipHash hash; - FieldVisitorHash hashing_visitor(hash); - for (const Field & field : value) - applyVisitor(hashing_visitor, field); - - char hash_data[16]; - hash.get128(hash_data); - result.resize(32); - for (size_t i = 0; i < 16; ++i) - writeHexByteLowercase(hash_data[i], &result[2 * i]); - - return result; -} - -void MergeTreeDataPart::Partition::serializeTextQuoted(const MergeTreeData & storage, WriteBuffer & out) const -{ - size_t key_size = storage.partition_expr_column_types.size(); - - if (key_size == 0) - { - writeCString("tuple()", out); - return; - } - - if (key_size > 1) - writeChar('(', out); - - for (size_t i = 0; i < key_size; ++i) - { - if (i > 0) - writeCString(", ", out); - - const DataTypePtr & type = storage.partition_expr_column_types[i]; - ColumnPtr column = type->createColumn(); - column->insert(value[i]); - type->serializeTextQuoted(*column, 0, out); - } - - if (key_size > 1) - writeChar(')', out); -} - -void MergeTreeDataPart::Partition::load(const MergeTreeData & storage, const String & part_path) -{ - if (!storage.partition_expr) - return; - - ReadBufferFromFile file = openForReading(part_path + "partition.dat"); - value.resize(storage.partition_expr_column_types.size()); - for (size_t i = 0; i < storage.partition_expr_column_types.size(); ++i) - storage.partition_expr_column_types[i]->deserializeBinary(value[i], file); -} - -void MergeTreeDataPart::Partition::store(const MergeTreeData & storage, const String & part_path, Checksums & checksums) const -{ - if (!storage.partition_expr) - return; - - WriteBufferFromFile out(part_path + "partition.dat"); - HashingWriteBuffer out_hashing(out); - for (size_t i = 0; i < value.size(); ++i) - storage.partition_expr_column_types[i]->serializeBinary(value[i], out_hashing); - checksums.files["partition.dat"].file_size = out_hashing.count(); - checksums.files["partition.dat"].file_hash = out_hashing.getHash(); -} - - void MergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & storage, const String & part_path) { size_t minmax_idx_size = storage.minmax_idx_column_types.size(); @@ -817,7 +707,7 @@ void MergeTreeDataPart::loadPartitionAndMinMaxIndex() MergeTreePartInfo::parseMinMaxDatesFromPartName(name, min_date, max_date); const auto & date_lut = DateLUT::instance(); - partition = Partition(date_lut.toNumYYYYMM(min_date)); + partition = MergeTreePartition(date_lut.toNumYYYYMM(min_date)); minmax_idx = MinMaxIndex(min_date, max_date); } else @@ -826,6 +716,13 @@ void MergeTreeDataPart::loadPartitionAndMinMaxIndex() partition.load(storage, full_path); minmax_idx.load(storage, full_path); } + + String calculated_partition_id = partition.getID(storage); + if (calculated_partition_id != info.partition_id) + throw Exception( + "While loading part " + getFullPath() + ": calculated partition ID: " + calculated_partition_id + + " differs from partition ID in part name: " + info.partition_id, + ErrorCodes::CORRUPTED_DATA); } void MergeTreeDataPart::loadChecksums(bool require) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h index f03344c2fe6..c7d94c4ccb8 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -149,29 +150,7 @@ struct MergeTreeDataPart using Index = Columns; Index index; - struct Partition - { - Row value; - - public: - Partition() = default; - explicit Partition(Row value_) : value(std::move(value_)) {} - - /// For month-based partitioning. - explicit Partition(UInt32 yyyymm) : value(1, static_cast(yyyymm)) {} - - String getID(const MergeTreeData & storage) const; - - void serializeTextQuoted(const MergeTreeData & storage, WriteBuffer & out) const; - - void load(const MergeTreeData & storage, const String & part_path); - void store(const MergeTreeData & storage, const String & part_path, Checksums & checksums) const; - - void assign(const Partition & other) { value.assign(other.value); } - - }; - - Partition partition; + MergeTreePartition partition; /// Index that for each part stores min and max values of a set of columns. This allows quickly excluding /// parts based on conditions on these columns imposed by a query. diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index b71fedbab86..bcaee5f9d43 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -132,7 +132,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa MergeTreeDataPart::MinMaxIndex minmax_idx; minmax_idx.update(block, data.minmax_idx_columns); - MergeTreeDataPart::Partition partition(std::move(block_with_partition.partition)); + MergeTreePartition partition(std::move(block_with_partition.partition)); MergeTreePartInfo new_part_info(partition.getID(data), temp_index, temp_index, 0); String part_name; diff --git a/dbms/src/Storages/MergeTree/MergeTreePartition.cpp b/dbms/src/Storages/MergeTree/MergeTreePartition.cpp new file mode 100644 index 00000000000..4bf7bb352de --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreePartition.cpp @@ -0,0 +1,128 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +static ReadBufferFromFile openForReading(const String & path) +{ + return ReadBufferFromFile(path, std::min(static_cast(DBMS_DEFAULT_BUFFER_SIZE), Poco::File(path).getSize())); +} + +String MergeTreePartition::getID(const MergeTreeData & storage) const +{ + if (value.size() != storage.partition_expr_columns.size()) + throw Exception("Invalid partition key size: " + toString(value.size()), ErrorCodes::LOGICAL_ERROR); + + if (value.empty()) + return "all"; + + /// In case all partition fields are represented by integral types, try to produce a human-readable partition id. + /// Otherwise use a hex-encoded hash. + + bool are_all_integral = true; + for (const Field & field : value) + { + if (field.getType() != Field::Types::UInt64 && field.getType() != Field::Types::Int64) + { + are_all_integral = false; + break; + } + } + + String result; + + if (are_all_integral) + { + FieldVisitorToString to_string_visitor; + for (size_t i = 0; i < value.size(); ++i) + { + if (i > 0) + result += '-'; + + if (typeid_cast(storage.partition_expr_column_types[i].get())) + result += toString(DateLUT::instance().toNumYYYYMMDD(DayNum_t(value[i].get()))); + else + result += applyVisitor(to_string_visitor, value[i]); + } + + return result; + } + + SipHash hash; + FieldVisitorHash hashing_visitor(hash); + for (const Field & field : value) + applyVisitor(hashing_visitor, field); + + char hash_data[16]; + hash.get128(hash_data); + result.resize(32); + for (size_t i = 0; i < 16; ++i) + writeHexByteLowercase(hash_data[i], &result[2 * i]); + + return result; +} + +void MergeTreePartition::serializeTextQuoted(const MergeTreeData & storage, WriteBuffer & out) const +{ + size_t key_size = storage.partition_expr_column_types.size(); + + if (key_size == 0) + { + writeCString("tuple()", out); + return; + } + + if (key_size > 1) + writeChar('(', out); + + for (size_t i = 0; i < key_size; ++i) + { + if (i > 0) + writeCString(", ", out); + + const DataTypePtr & type = storage.partition_expr_column_types[i]; + ColumnPtr column = type->createColumn(); + column->insert(value[i]); + type->serializeTextQuoted(*column, 0, out); + } + + if (key_size > 1) + writeChar(')', out); +} + +void MergeTreePartition::load(const MergeTreeData & storage, const String & part_path) +{ + if (!storage.partition_expr) + return; + + ReadBufferFromFile file = openForReading(part_path + "partition.dat"); + value.resize(storage.partition_expr_column_types.size()); + for (size_t i = 0; i < storage.partition_expr_column_types.size(); ++i) + storage.partition_expr_column_types[i]->deserializeBinary(value[i], file); +} + +void MergeTreePartition::store(const MergeTreeData & storage, const String & part_path, MergeTreeDataPartChecksums & checksums) const +{ + if (!storage.partition_expr) + return; + + WriteBufferFromFile out(part_path + "partition.dat"); + HashingWriteBuffer out_hashing(out); + for (size_t i = 0; i < value.size(); ++i) + storage.partition_expr_column_types[i]->serializeBinary(value[i], out_hashing); + checksums.files["partition.dat"].file_size = out_hashing.count(); + checksums.files["partition.dat"].file_hash = out_hashing.getHash(); +} + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreePartition.h b/dbms/src/Storages/MergeTree/MergeTreePartition.h new file mode 100644 index 00000000000..65263520e83 --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreePartition.h @@ -0,0 +1,36 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +class MergeTreeData; +struct MergeTreeDataPartChecksums; + +struct MergeTreePartition +{ + Row value; + +public: + MergeTreePartition() = default; + + explicit MergeTreePartition(Row value_) : value(std::move(value_)) {} + + /// For month-based partitioning. + explicit MergeTreePartition(UInt32 yyyymm) : value(1, static_cast(yyyymm)) {} + + String getID(const MergeTreeData & storage) const; + + void serializeTextQuoted(const MergeTreeData & storage, WriteBuffer & out) const; + + void load(const MergeTreeData & storage, const String & part_path); + void store(const MergeTreeData & storage, const String & part_path, MergeTreeDataPartChecksums & checksums) const; + + void assign(const MergeTreePartition & other) { value.assign(other.value); } +}; + + +} diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index 141c7cccc71..4db43cb17bf 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -322,7 +322,8 @@ void StorageDistributed::reshardPartitions( ASTAlterQuery::Parameters & parameters = alter_query.parameters.back(); parameters.type = ASTAlterQuery::RESHARD_PARTITION; - parameters.partition = partition->clone(); + if (partition) + parameters.partition = partition->clone(); ASTPtr expr_list = std::make_shared(); for (const auto & entry : weighted_zookeeper_paths) From 4160bcfdf5b69b8db5e810acc79c2b82538c90f1 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Mon, 11 Sep 2017 20:33:01 +0300 Subject: [PATCH 20/63] add custom partitioning tests [#CLICKHOUSE-3000] --- .../00502_custom_partitioning_local.reference | 50 ++++++++ .../00502_custom_partitioning_local.sql | 94 +++++++++++++++ ...artitioning_replicated_zookeeper.reference | 50 ++++++++ ...stom_partitioning_replicated_zookeeper.sql | 108 ++++++++++++++++++ 4 files changed, 302 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00502_custom_partitioning_local.reference create mode 100644 dbms/tests/queries/0_stateless/00502_custom_partitioning_local.sql create mode 100644 dbms/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper.reference create mode 100644 dbms/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper.sql diff --git a/dbms/tests/queries/0_stateless/00502_custom_partitioning_local.reference b/dbms/tests/queries/0_stateless/00502_custom_partitioning_local.reference new file mode 100644 index 00000000000..57de310b212 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00502_custom_partitioning_local.reference @@ -0,0 +1,50 @@ +*** Not partitioned *** +Parts before OPTIMIZE: +tuple() all_1_1_0 +tuple() all_2_2_0 +Parts after OPTIMIZE: +tuple() all_1_2_1 +Sum before DETACH PARTITION: +15 +Sum after DETACH PARTITION: +*** Partitioned by week *** +Parts before OPTIMIZE: +\'1999-12-27\' 19991227_1_1_0 +\'2000-01-03\' 20000103_2_2_0 +\'2000-01-03\' 20000103_3_3_0 +Parts after OPTIMIZE: +\'1999-12-27\' 19991227_1_1_0 +\'2000-01-03\' 20000103_2_3_1 +Sum before DROP PARTITION: +15 +Sum after DROP PARTITION: +12 +*** Partitioned by a (Date, UInt8) tuple *** +Parts before OPTIMIZE: +(\'2000-01-01\', 1) 20000101-1_1_1_0 +(\'2000-01-01\', 1) 20000101-1_5_5_0 +(\'2000-01-01\', 2) 20000101-2_2_2_0 +(\'2000-01-02\', 1) 20000102-1_3_3_0 +(\'2000-01-02\', 1) 20000102-1_4_4_0 +Parts after OPTIMIZE: +(\'2000-01-01\', 1) 20000101-1_1_5_1 +(\'2000-01-01\', 2) 20000101-2_2_2_0 +(\'2000-01-02\', 1) 20000102-1_3_4_1 +Sum before DETACH PARTITION: +15 +Sum after DETACH PARTITION: +9 +*** Partitioned by String *** +Parts before OPTIMIZE: +\'bbb\' 7d878f3d88441d2b3dc371e2a3050f6d_2_2_0 +\'bbb\' 7d878f3d88441d2b3dc371e2a3050f6d_3_3_0 +\'aaa\' 9b50856126a8a6064f11f027d455bf58_1_1_0 +\'aaa\' 9b50856126a8a6064f11f027d455bf58_4_4_0 +Parts after OPTIMIZE: +\'bbb\' 7d878f3d88441d2b3dc371e2a3050f6d_2_2_0 +\'bbb\' 7d878f3d88441d2b3dc371e2a3050f6d_3_3_0 +\'aaa\' 9b50856126a8a6064f11f027d455bf58_1_4_1 +Sum before DROP PARTITION: +15 +Sum after DROP PARTITION: +8 diff --git a/dbms/tests/queries/0_stateless/00502_custom_partitioning_local.sql b/dbms/tests/queries/0_stateless/00502_custom_partitioning_local.sql new file mode 100644 index 00000000000..81c4b0171ee --- /dev/null +++ b/dbms/tests/queries/0_stateless/00502_custom_partitioning_local.sql @@ -0,0 +1,94 @@ +-- IMPORTANT: Don't use this setting just yet. +-- It is for testing purposes, the syntax will likely change soon and the server will not be able +-- to load the tables created this way. You have been warned. +SET experimental_merge_tree_allow_custom_partitions = 1; + +SELECT '*** Not partitioned ***'; + +DROP TABLE IF EXISTS test.not_partitioned; +CREATE TABLE test.not_partitioned(x UInt8) ENGINE = MergeTree(tuple(), x, 8192); + +INSERT INTO test.not_partitioned VALUES (1), (2), (3); +INSERT INTO test.not_partitioned VALUES (4), (5); + +SELECT 'Parts before OPTIMIZE:'; +SELECT partition, name FROM system.parts WHERE database = 'test' AND table = 'not_partitioned' AND active ORDER BY name; +OPTIMIZE TABLE test.not_partitioned PARTITION tuple() FINAL; +SELECT 'Parts after OPTIMIZE:'; +SELECT partition, name FROM system.parts WHERE database = 'test' AND table = 'not_partitioned' AND active ORDER BY name; + +SELECT 'Sum before DETACH PARTITION:'; +SELECT sum(x) FROM test.not_partitioned; +ALTER TABLE test.not_partitioned DETACH PARTITION ID 'all'; +SELECT 'Sum after DETACH PARTITION:'; +SELECT sum(x) FROM test.not_partitioned; + +DROP TABLE test.not_partitioned; + +SELECT '*** Partitioned by week ***'; + +DROP TABLE IF EXISTS test.partitioned_by_week; +CREATE TABLE test.partitioned_by_week(d Date, x UInt8) ENGINE = MergeTree(toMonday(d), x, 8192); + +-- 2000-01-03 belongs to a different week than 2000-01-01 and 2000-01-02 +INSERT INTO test.partitioned_by_week VALUES ('2000-01-01', 1), ('2000-01-02', 2), ('2000-01-03', 3); +INSERT INTO test.partitioned_by_week VALUES ('2000-01-03', 4), ('2000-01-03', 5); + +SELECT 'Parts before OPTIMIZE:'; +SELECT partition, name FROM system.parts WHERE database = 'test' AND table = 'partitioned_by_week' AND active ORDER BY name; +OPTIMIZE TABLE test.partitioned_by_week PARTITION '2000-01-03' FINAL; +SELECT 'Parts after OPTIMIZE:'; +SELECT partition, name FROM system.parts WHERE database = 'test' AND table = 'partitioned_by_week' AND active ORDER BY name; + +SELECT 'Sum before DROP PARTITION:'; +SELECT sum(x) FROM test.partitioned_by_week; +ALTER TABLE test.partitioned_by_week DROP PARTITION '1999-12-27'; +SELECT 'Sum after DROP PARTITION:'; +SELECT sum(x) FROM test.partitioned_by_week; + +DROP TABLE test.partitioned_by_week; + +SELECT '*** Partitioned by a (Date, UInt8) tuple ***'; + +DROP TABLE IF EXISTS test.partitioned_by_tuple; +CREATE TABLE test.partitioned_by_tuple(d Date, x UInt8, y UInt8) ENGINE = MergeTree((d, x), x, 8192); + +INSERT INTO test.partitioned_by_tuple VALUES ('2000-01-01', 1, 1), ('2000-01-01', 2, 2), ('2000-01-02', 1, 3); +INSERT INTO test.partitioned_by_tuple VALUES ('2000-01-02', 1, 4), ('2000-01-01', 1, 5); + +SELECT 'Parts before OPTIMIZE:'; +SELECT partition, name FROM system.parts WHERE database = 'test' AND table = 'partitioned_by_tuple' AND active ORDER BY name; +OPTIMIZE TABLE test.partitioned_by_tuple PARTITION ('2000-01-01', 1) FINAL; +OPTIMIZE TABLE test.partitioned_by_tuple PARTITION ('2000-01-02', 1) FINAL; +SELECT 'Parts after OPTIMIZE:'; +SELECT partition, name FROM system.parts WHERE database = 'test' AND table = 'partitioned_by_tuple' AND active ORDER BY name; + +SELECT 'Sum before DETACH PARTITION:'; +SELECT sum(y) FROM test.partitioned_by_tuple; +ALTER TABLE test.partitioned_by_tuple DETACH PARTITION ID '20000101-1'; +SELECT 'Sum after DETACH PARTITION:'; +SELECT sum(y) FROM test.partitioned_by_tuple; + +DROP TABLE test.partitioned_by_tuple; + +SELECT '*** Partitioned by String ***'; + +DROP TABLE IF EXISTS test.partitioned_by_string; +CREATE TABLE test.partitioned_by_string(s String, x UInt8) ENGINE = MergeTree(tuple(s), x, 8192); + +INSERT INTO test.partitioned_by_string VALUES ('aaa', 1), ('aaa', 2), ('bbb', 3); +INSERT INTO test.partitioned_by_string VALUES ('bbb', 4), ('aaa', 5); + +SELECT 'Parts before OPTIMIZE:'; +SELECT partition, name FROM system.parts WHERE database = 'test' AND table = 'partitioned_by_string' AND active ORDER BY name; +OPTIMIZE TABLE test.partitioned_by_string PARTITION 'aaa' FINAL; +SELECT 'Parts after OPTIMIZE:'; +SELECT partition, name FROM system.parts WHERE database = 'test' AND table = 'partitioned_by_string' AND active ORDER BY name; + +SELECT 'Sum before DROP PARTITION:'; +SELECT sum(x) FROM test.partitioned_by_string; +ALTER TABLE test.partitioned_by_string DROP PARTITION 'bbb'; +SELECT 'Sum after DROP PARTITION:'; +SELECT sum(x) FROM test.partitioned_by_string; + +DROP TABLE test.partitioned_by_string; diff --git a/dbms/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper.reference b/dbms/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper.reference new file mode 100644 index 00000000000..c4ba61ce205 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper.reference @@ -0,0 +1,50 @@ +*** Not partitioned *** +Parts before OPTIMIZE: +tuple() all_0_0_0 +tuple() all_1_1_0 +Parts after OPTIMIZE: +tuple() all_0_1_1 +Sum before DETACH PARTITION: +15 +Sum after DETACH PARTITION: +*** Partitioned by week *** +Parts before OPTIMIZE: +\'1999-12-27\' 19991227_0_0_0 +\'2000-01-03\' 20000103_0_0_0 +\'2000-01-03\' 20000103_1_1_0 +Parts after OPTIMIZE: +\'1999-12-27\' 19991227_0_0_0 +\'2000-01-03\' 20000103_0_1_1 +Sum before DROP PARTITION: +15 +Sum after DROP PARTITION: +12 +*** Partitioned by a (Date, UInt8) tuple *** +Parts before OPTIMIZE: +(\'2000-01-01\', 1) 20000101-1_0_0_0 +(\'2000-01-01\', 1) 20000101-1_1_1_0 +(\'2000-01-01\', 2) 20000101-2_0_0_0 +(\'2000-01-02\', 1) 20000102-1_0_0_0 +(\'2000-01-02\', 1) 20000102-1_1_1_0 +Parts after OPTIMIZE: +(\'2000-01-01\', 1) 20000101-1_0_1_1 +(\'2000-01-01\', 2) 20000101-2_0_0_0 +(\'2000-01-02\', 1) 20000102-1_0_1_1 +Sum before DETACH PARTITION: +15 +Sum after DETACH PARTITION: +9 +*** Partitioned by String *** +Parts before OPTIMIZE: +\'bbb\' 7d878f3d88441d2b3dc371e2a3050f6d_0_0_0 +\'bbb\' 7d878f3d88441d2b3dc371e2a3050f6d_1_1_0 +\'aaa\' 9b50856126a8a6064f11f027d455bf58_0_0_0 +\'aaa\' 9b50856126a8a6064f11f027d455bf58_1_1_0 +Parts after OPTIMIZE: +\'bbb\' 7d878f3d88441d2b3dc371e2a3050f6d_0_0_0 +\'bbb\' 7d878f3d88441d2b3dc371e2a3050f6d_1_1_0 +\'aaa\' 9b50856126a8a6064f11f027d455bf58_0_1_1 +Sum before DROP PARTITION: +15 +Sum after DROP PARTITION: +8 diff --git a/dbms/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper.sql b/dbms/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper.sql new file mode 100644 index 00000000000..022b7047cec --- /dev/null +++ b/dbms/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper.sql @@ -0,0 +1,108 @@ +-- IMPORTANT: Don't use this setting just yet. +-- It is for testing purposes, the syntax will likely change soon and the server will not be able +-- to load the tables created this way. You have been warned. +SET experimental_merge_tree_allow_custom_partitions = 1; + +SET replication_alter_partitions_sync = 2; + +SELECT '*** Not partitioned ***'; + +DROP TABLE IF EXISTS test.not_partitioned_replica1; +DROP TABLE IF EXISTS test.not_partitioned_replica2; +CREATE TABLE test.not_partitioned_replica1(x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/not_partitioned', '1', tuple(), x, 8192); +CREATE TABLE test.not_partitioned_replica2(x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/not_partitioned', '2', tuple(), x, 8192); + +INSERT INTO test.not_partitioned_replica1 VALUES (1), (2), (3); +INSERT INTO test.not_partitioned_replica1 VALUES (4), (5); + +SELECT 'Parts before OPTIMIZE:'; +SELECT partition, name FROM system.parts WHERE database = 'test' AND table = 'not_partitioned_replica1' AND active ORDER BY name; +OPTIMIZE TABLE test.not_partitioned_replica1 PARTITION tuple() FINAL; +SELECT 'Parts after OPTIMIZE:'; +SELECT partition, name FROM system.parts WHERE database = 'test' AND table = 'not_partitioned_replica2' AND active ORDER BY name; + +SELECT 'Sum before DETACH PARTITION:'; +SELECT sum(x) FROM test.not_partitioned_replica2; +ALTER TABLE test.not_partitioned_replica1 DETACH PARTITION ID 'all'; +SELECT 'Sum after DETACH PARTITION:'; +SELECT sum(x) FROM test.not_partitioned_replica2; + +DROP TABLE test.not_partitioned_replica1; +DROP TABLE test.not_partitioned_replica2; + +SELECT '*** Partitioned by week ***'; + +DROP TABLE IF EXISTS test.partitioned_by_week_replica1; +DROP TABLE IF EXISTS test.partitioned_by_week_replica2; +CREATE TABLE test.partitioned_by_week_replica1(d Date, x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/partitioned_by_week', '1', toMonday(d), x, 8192); +CREATE TABLE test.partitioned_by_week_replica2(d Date, x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/partitioned_by_week', '2', toMonday(d), x, 8192); + +-- 2000-01-03 belongs to a different week than 2000-01-01 and 2000-01-02 +INSERT INTO test.partitioned_by_week_replica1 VALUES ('2000-01-01', 1), ('2000-01-02', 2), ('2000-01-03', 3); +INSERT INTO test.partitioned_by_week_replica1 VALUES ('2000-01-03', 4), ('2000-01-03', 5); + +SELECT 'Parts before OPTIMIZE:'; -- Select parts on the first replica to avoid waiting for replication. +SELECT partition, name FROM system.parts WHERE database = 'test' AND table = 'partitioned_by_week_replica1' AND active ORDER BY name; +OPTIMIZE TABLE test.partitioned_by_week_replica1 PARTITION '2000-01-03' FINAL; +SELECT 'Parts after OPTIMIZE:'; -- After OPTIMIZE with replication_alter_partitions_sync=2 replicas must be in sync. +SELECT partition, name FROM system.parts WHERE database = 'test' AND table = 'partitioned_by_week_replica2' AND active ORDER BY name; + +SELECT 'Sum before DROP PARTITION:'; +SELECT sum(x) FROM test.partitioned_by_week_replica2; +ALTER TABLE test.partitioned_by_week_replica1 DROP PARTITION '1999-12-27'; +SELECT 'Sum after DROP PARTITION:'; +SELECT sum(x) FROM test.partitioned_by_week_replica2; + +DROP TABLE test.partitioned_by_week_replica1; +DROP TABLE test.partitioned_by_week_replica2; + +SELECT '*** Partitioned by a (Date, UInt8) tuple ***'; + +DROP TABLE IF EXISTS test.partitioned_by_tuple_replica1; +DROP TABLE IF EXISTS test.partitioned_by_tuple_replica2; +CREATE TABLE test.partitioned_by_tuple_replica1(d Date, x UInt8, y UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/partitioned_by_tuple', '1', (d, x), x, 8192); +CREATE TABLE test.partitioned_by_tuple_replica2(d Date, x UInt8, y UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/partitioned_by_tuple', '2', (d, x), x, 8192); + +INSERT INTO test.partitioned_by_tuple_replica1 VALUES ('2000-01-01', 1, 1), ('2000-01-01', 2, 2), ('2000-01-02', 1, 3); +INSERT INTO test.partitioned_by_tuple_replica1 VALUES ('2000-01-02', 1, 4), ('2000-01-01', 1, 5); + +SELECT 'Parts before OPTIMIZE:'; +SELECT partition, name FROM system.parts WHERE database = 'test' AND table = 'partitioned_by_tuple_replica1' AND active ORDER BY name; +OPTIMIZE TABLE test.partitioned_by_tuple_replica1 PARTITION ('2000-01-01', 1) FINAL; +OPTIMIZE TABLE test.partitioned_by_tuple_replica1 PARTITION ('2000-01-02', 1) FINAL; +SELECT 'Parts after OPTIMIZE:'; +SELECT partition, name FROM system.parts WHERE database = 'test' AND table = 'partitioned_by_tuple_replica2' AND active ORDER BY name; + +SELECT 'Sum before DETACH PARTITION:'; +SELECT sum(y) FROM test.partitioned_by_tuple_replica2; +ALTER TABLE test.partitioned_by_tuple_replica1 DETACH PARTITION ID '20000101-1'; +SELECT 'Sum after DETACH PARTITION:'; +SELECT sum(y) FROM test.partitioned_by_tuple_replica2; + +DROP TABLE test.partitioned_by_tuple_replica1; +DROP TABLE test.partitioned_by_tuple_replica2; + +SELECT '*** Partitioned by String ***'; + +DROP TABLE IF EXISTS test.partitioned_by_string_replica1; +DROP TABLE IF EXISTS test.partitioned_by_string_replica2; +CREATE TABLE test.partitioned_by_string_replica1(s String, x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/partitioned_by_string', '1', tuple(s), x, 8192); +CREATE TABLE test.partitioned_by_string_replica2(s String, x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/partitioned_by_string', '2', tuple(s), x, 8192); + +INSERT INTO test.partitioned_by_string_replica1 VALUES ('aaa', 1), ('aaa', 2), ('bbb', 3); +INSERT INTO test.partitioned_by_string_replica1 VALUES ('bbb', 4), ('aaa', 5); + +SELECT 'Parts before OPTIMIZE:'; +SELECT partition, name FROM system.parts WHERE database = 'test' AND table = 'partitioned_by_string_replica1' AND active ORDER BY name; +OPTIMIZE TABLE test.partitioned_by_string_replica2 PARTITION 'aaa' FINAL; +SELECT 'Parts after OPTIMIZE:'; +SELECT partition, name FROM system.parts WHERE database = 'test' AND table = 'partitioned_by_string_replica2' AND active ORDER BY name; + +SELECT 'Sum before DROP PARTITION:'; +SELECT sum(x) FROM test.partitioned_by_string_replica2; +ALTER TABLE test.partitioned_by_string_replica1 DROP PARTITION 'bbb'; +SELECT 'Sum after DROP PARTITION:'; +SELECT sum(x) FROM test.partitioned_by_string_replica2; + +DROP TABLE test.partitioned_by_string_replica1; +DROP TABLE test.partitioned_by_string_replica2; From 61f0b3275709cd2461759b35909fae68127ecddc Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Tue, 12 Sep 2017 22:20:56 +0300 Subject: [PATCH 21/63] forbid ALTER of partition key columns for now [#CLICKHOUSE-3000] --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 80d9006db27..140b2ab839b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -771,26 +771,29 @@ void MergeTreeData::checkAlter(const AlterCommands & commands) /// Set of columns that shouldn't be altered. NameSet columns_alter_forbidden; - /// Primary or partition key columns can be ALTERed only if they are used in the key as-is + /// Primary key columns can be ALTERed only if they are used in the key as-is /// (and not as a part of some expression) and if the ALTER only affects column metadata. NameSet columns_alter_metadata_only; - auto add_key_columns = [&](const ExpressionActionsPtr & expr) + if (partition_expr) { - if (!expr) - return; + /// Forbid altering partition key columns because it can change partition ID format. + /// TODO: in some cases (e.g. adding an Enum value) a partition key column can still be ALTERed. + /// We should allow it. + for (const String & col : partition_expr->getRequiredColumns()) + columns_alter_forbidden.insert(col); + } - for (const ExpressionAction & action : expr->getActions()) + if (primary_expr) + { + for (const ExpressionAction & action : primary_expr->getActions()) { auto action_columns = action.getNeededColumns(); columns_alter_forbidden.insert(action_columns.begin(), action_columns.end()); } - for (const String & col : expr->getRequiredColumns()) + for (const String & col : primary_expr->getRequiredColumns()) columns_alter_metadata_only.insert(col); - }; - - add_key_columns(partition_expr); - add_key_columns(primary_expr); + } /// We don't process sampling_expression separately because it must be among the primary key columns. if (!merging_params.sign_column.empty()) From f1a8b9bfa1b70646eca1b4ef3f7486e713f3514e Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Wed, 13 Sep 2017 19:22:04 +0300 Subject: [PATCH 22/63] add comments [#CLICKHOUSE-3000] --- dbms/src/Storages/MergeTree/MergeTreeData.h | 13 ++++++++++--- .../Storages/MergeTree/MergeTreeDataFormatVersion.h | 1 + dbms/src/Storages/MergeTree/MergeTreePartition.cpp | 12 ++++++++---- dbms/src/Storages/MergeTree/MergeTreePartition.h | 1 + 4 files changed, 20 insertions(+), 7 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 189c1b4618d..a01740e62a8 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -46,10 +46,10 @@ namespace ErrorCodes /// The date column is specified. For each part min and max dates are remembered. /// Essentially it is an index too. /// -/// Data is partitioned by month. Parts belonging to different months are not merged - for the ease of -/// administration (data sync and backup). +/// Data is partitioned by the value of the partitioning expression. +/// Parts belonging to different partitions are not merged - for the ease of administration (data sync and backup). /// -/// File structure: +/// File structure of old-style month-partitioned tables (format_version = 0): /// Part directory - / min-date _ max-date _ min-id _ max-id _ level / /// Inside the part directory: /// checksums.txt - contains the list of all files along with their sizes and checksums. @@ -58,6 +58,13 @@ namespace ErrorCodes /// [Column].bin - contains compressed column data. /// [Column].mrk - marks, pointing to seek positions allowing to skip n * k rows. /// +/// File structure of tables with custom partitioning (format_version >= 1): +/// Part directory - / partiiton-id _ min-id _ max-id _ level / +/// Inside the part directory: +/// The same files as for month-partitioned tables, plus +/// partition.dat - contains the value of the partitioning expression +/// minmax_[Column].idx - MinMax indexes (see MergeTreeDataPart::MinMaxIndex class) for the columns required by the partitioning expression. +/// /// Several modes are implemented. Modes determine additional actions during merge: /// - Ordinary - don't do anything special /// - Collapsing - collapse pairs of rows with the opposite values of sign_columns for the same values diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataFormatVersion.h b/dbms/src/Storages/MergeTree/MergeTreeDataFormatVersion.h index 74f26fcdf45..e08bfd44656 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataFormatVersion.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataFormatVersion.h @@ -1,5 +1,6 @@ #pragma once +#include #include namespace DB diff --git a/dbms/src/Storages/MergeTree/MergeTreePartition.cpp b/dbms/src/Storages/MergeTree/MergeTreePartition.cpp index 4bf7bb352de..e33896a3cdf 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartition.cpp @@ -19,17 +19,18 @@ static ReadBufferFromFile openForReading(const String & path) return ReadBufferFromFile(path, std::min(static_cast(DBMS_DEFAULT_BUFFER_SIZE), Poco::File(path).getSize())); } +/// NOTE: This ID is used to create part names which are then persisted in ZK and as directory names on the file system. +/// So if you want to change this method, be sure to guarantee compatibility with existing table data. String MergeTreePartition::getID(const MergeTreeData & storage) const { if (value.size() != storage.partition_expr_columns.size()) throw Exception("Invalid partition key size: " + toString(value.size()), ErrorCodes::LOGICAL_ERROR); if (value.empty()) - return "all"; + return "all"; /// It is tempting to use an empty string here. But that would break directory structure in ZK. - /// In case all partition fields are represented by integral types, try to produce a human-readable partition id. + /// In case all partition fields are represented by integral types, try to produce a human-readable ID. /// Otherwise use a hex-encoded hash. - bool are_all_integral = true; for (const Field & field : value) { @@ -51,9 +52,12 @@ String MergeTreePartition::getID(const MergeTreeData & storage) const result += '-'; if (typeid_cast(storage.partition_expr_column_types[i].get())) - result += toString(DateLUT::instance().toNumYYYYMMDD(DayNum_t(value[i].get()))); + result += toString(DateLUT::instance().toNumYYYYMMDD(DayNum_t(value[i].safeGet()))); else result += applyVisitor(to_string_visitor, value[i]); + + /// It is tempting to output DateTime as YYYYMMDDhhmmss, but that would make partition ID + /// timezone-dependent. } return result; diff --git a/dbms/src/Storages/MergeTree/MergeTreePartition.h b/dbms/src/Storages/MergeTree/MergeTreePartition.h index 65263520e83..d32b5f4401a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartition.h +++ b/dbms/src/Storages/MergeTree/MergeTreePartition.h @@ -10,6 +10,7 @@ namespace DB class MergeTreeData; struct MergeTreeDataPartChecksums; +/// This class represents a partition value of a single part and encapsulates its loading/storing logic. struct MergeTreePartition { Row value; From bfd87add7dd9f1e9fd6377d4ccbfb736ee761f90 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Thu, 14 Sep 2017 14:52:22 +0300 Subject: [PATCH 23/63] improve getExremes(), fix for ColumnNullable [#CLICKHOUSE-2] --- dbms/src/Columns/ColumnArray.cpp | 2 +- dbms/src/Columns/ColumnFixedString.cpp | 2 +- dbms/src/Columns/ColumnNullable.cpp | 4 ++-- dbms/src/Columns/ColumnString.cpp | 2 +- dbms/src/Columns/ColumnVector.cpp | 3 +-- 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/dbms/src/Columns/ColumnArray.cpp b/dbms/src/Columns/ColumnArray.cpp index 1b85dae1741..4752f734ec3 100644 --- a/dbms/src/Columns/ColumnArray.cpp +++ b/dbms/src/Columns/ColumnArray.cpp @@ -354,7 +354,7 @@ void ColumnArray::getExtremes(Field & min, Field & max) const { if (compareAt(i, min_idx, *this, /* nan_direction_hint = */ 1) < 0) min_idx = i; - if (compareAt(i, max_idx, *this, /* nan_direction_hint = */ -1) > 0) + else if (compareAt(i, max_idx, *this, /* nan_direction_hint = */ -1) > 0) max_idx = i; } diff --git a/dbms/src/Columns/ColumnFixedString.cpp b/dbms/src/Columns/ColumnFixedString.cpp index 61ab24fee10..0f75e8f0ff4 100644 --- a/dbms/src/Columns/ColumnFixedString.cpp +++ b/dbms/src/Columns/ColumnFixedString.cpp @@ -304,7 +304,7 @@ void ColumnFixedString::getExtremes(Field & min, Field & max) const { if (less_op(i, min_idx)) min_idx = i; - if (less_op(max_idx, i)) + else if (less_op(max_idx, i)) max_idx = i; } diff --git a/dbms/src/Columns/ColumnNullable.cpp b/dbms/src/Columns/ColumnNullable.cpp index 37edce2840d..66b35e80d7b 100644 --- a/dbms/src/Columns/ColumnNullable.cpp +++ b/dbms/src/Columns/ColumnNullable.cpp @@ -359,6 +359,7 @@ void getExtremesFromNullableContent(const ColumnVector & col, const NullMap & cur_min = x; cur_max = x; has_not_null = true; + has_not_nan = !isNaN(x); continue; } @@ -375,8 +376,7 @@ void getExtremesFromNullableContent(const ColumnVector & col, const NullMap & if (x < cur_min) cur_min = x; - - if (x > cur_max) + else if (x > cur_max) cur_max = x; } diff --git a/dbms/src/Columns/ColumnString.cpp b/dbms/src/Columns/ColumnString.cpp index 5a04db5d893..1c16840cc2d 100644 --- a/dbms/src/Columns/ColumnString.cpp +++ b/dbms/src/Columns/ColumnString.cpp @@ -280,7 +280,7 @@ void ColumnString::getExtremes(Field & min, Field & max) const { if (less_op(i, min_idx)) min_idx = i; - if (less_op(max_idx, i)) + else if (less_op(max_idx, i)) max_idx = i; } diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp index a8651160d81..ec6b1c59d00 100644 --- a/dbms/src/Columns/ColumnVector.cpp +++ b/dbms/src/Columns/ColumnVector.cpp @@ -301,8 +301,7 @@ void ColumnVector::getExtremes(Field & min, Field & max) const if (x < cur_min) cur_min = x; - - if (x > cur_max) + else if (x > cur_max) cur_max = x; } From 6e57272052e35e0d1901f2d6df65a95155032bdc Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Mon, 18 Sep 2017 23:49:21 +0300 Subject: [PATCH 24/63] don't use partition value as a prefix for FREEZE PARTITION [#CLICKHOUSE-3000] --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 45 ++++++++++++------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 140b2ab839b..7d0c96b9431 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -46,6 +46,7 @@ #include #include #include +#include namespace ProfileEvents @@ -1729,20 +1730,26 @@ void MergeTreeData::removePartContributionToColumnSizes(const DataPartPtr & part void MergeTreeData::freezePartition(const ASTPtr & partition_ast, const String & with_name, const Context & context) { - String prefix; + std::experimental::optional prefix; + String partition_id; if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { const auto & partition = dynamic_cast(*partition_ast); - /// Month-partitioning specific - partition ID can be passed in the partition value. + /// Month-partitioning specific - partition value can represent a prefix of the partition to freeze. if (const auto * partition_lit = dynamic_cast(partition.value.get())) prefix = partition_lit->value.getType() == Field::Types::UInt64 ? toString(partition_lit->value.get()) : partition_lit->value.safeGet(); + else + partition_id = getPartitionIDFromQuery(partition_ast, context); } else - prefix = getPartitionIDFromQuery(partition_ast, context); + partition_id = getPartitionIDFromQuery(partition_ast, context); - LOG_DEBUG(log, "Freezing parts with prefix " + prefix); + if (prefix) + LOG_DEBUG(log, "Freezing parts with prefix " + prefix.value()); + else + LOG_DEBUG(log, "Freezing parts with partition ID " + partition_id); String clickhouse_path = Poco::Path(context.getPath()).makeAbsolute().toString(); String shadow_path = clickhouse_path + "shadow/"; @@ -1759,19 +1766,27 @@ void MergeTreeData::freezePartition(const ASTPtr & partition_ast, const String & Poco::DirectoryIterator end; for (Poco::DirectoryIterator it(full_path); it != end; ++it) { - if (startsWith(it.name(), prefix)) + MergeTreePartInfo part_info; + if (!MergeTreePartInfo::tryParsePartName(it.name(), &part_info, format_version)) + continue; + if (prefix) { - LOG_DEBUG(log, "Freezing part " << it.name()); - - String part_absolute_path = it.path().absolute().toString(); - if (!startsWith(part_absolute_path, clickhouse_path)) - throw Exception("Part path " + part_absolute_path + " is not inside " + clickhouse_path, ErrorCodes::LOGICAL_ERROR); - - String backup_part_absolute_path = part_absolute_path; - backup_part_absolute_path.replace(0, clickhouse_path.size(), backup_path); - localBackup(part_absolute_path, backup_part_absolute_path); - ++parts_processed; + if (!startsWith(part_info.partition_id, prefix.value())) + continue; } + else if (part_info.partition_id != partition_id) + continue; + + LOG_DEBUG(log, "Freezing part " << it.name()); + + String part_absolute_path = it.path().absolute().toString(); + if (!startsWith(part_absolute_path, clickhouse_path)) + throw Exception("Part path " + part_absolute_path + " is not inside " + clickhouse_path, ErrorCodes::LOGICAL_ERROR); + + String backup_part_absolute_path = part_absolute_path; + backup_part_absolute_path.replace(0, clickhouse_path.size(), backup_path); + localBackup(part_absolute_path, backup_part_absolute_path); + ++parts_processed; } LOG_DEBUG(log, "Freezed " << parts_processed << " parts"); From 5250650f65054002a44f25bc0e1ded8d004aa9b3 Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 26 Sep 2017 23:29:34 +0300 Subject: [PATCH 25/63] Fixes (#1286) * Revert "Simplification [#CLICKHOUSE-2]." This reverts commit 98ad6a5db3239ac6567ee96e90973b935dcac39b. * Fix error: comparison of unsigned expression < 0 is always false * Fix float division by zero * Fix float division by zero * Disable PointInPolygon function if compiling with old clang * Fix warning: dbms/src/Functions/FunctionsCoding.h:336:21: error: comparison of constant -1 with expression of type 'const char' is always true [-Werror,-Wtautological-constant-out-of-range-compare] if (num != -1) * Fix --- dbms/src/DataStreams/ColumnGathererStream.cpp | 9 ++++++--- dbms/src/Functions/FunctionsCoding.h | 2 +- dbms/src/Functions/FunctionsGeo.cpp | 5 ++++- dbms/src/Functions/GeoUtils.h | 8 ++++++++ libs/libglibc-compatibility/glibc-compatibility.c | 7 +++++-- 5 files changed, 24 insertions(+), 7 deletions(-) diff --git a/dbms/src/DataStreams/ColumnGathererStream.cpp b/dbms/src/DataStreams/ColumnGathererStream.cpp index 760af89e0b2..568df30180e 100644 --- a/dbms/src/DataStreams/ColumnGathererStream.cpp +++ b/dbms/src/DataStreams/ColumnGathererStream.cpp @@ -124,12 +124,15 @@ void ColumnGathererStream::readSuffixImpl() return; double seconds = profile_info.total_stopwatch.elapsedSeconds(); + std::stringstream speed; + if (seconds) + speed << ", " << profile_info.rows / seconds << " rows/sec., " + << profile_info.bytes / 1048576.0 / seconds << " MiB/sec."; LOG_TRACE(log, std::fixed << std::setprecision(2) << "Gathered column " << name << " (" << static_cast(profile_info.bytes) / profile_info.rows << " bytes/elem.)" - << " in " << seconds << " sec., " - << profile_info.rows / seconds << " rows/sec., " - << profile_info.bytes / 1048576.0 / seconds << " MiB/sec."); + << " in " << seconds << " sec." + << speed.str()); } } diff --git a/dbms/src/Functions/FunctionsCoding.h b/dbms/src/Functions/FunctionsCoding.h index 4429c639285..4ae5d279eed 100644 --- a/dbms/src/Functions/FunctionsCoding.h +++ b/dbms/src/Functions/FunctionsCoding.h @@ -333,7 +333,7 @@ public: { const auto num = unhex(ch); - if (num != -1) + if (num != '\xff') { val <<= 4; val |= num; diff --git a/dbms/src/Functions/FunctionsGeo.cpp b/dbms/src/Functions/FunctionsGeo.cpp index 4fd87542098..8240cf6e68a 100644 --- a/dbms/src/Functions/FunctionsGeo.cpp +++ b/dbms/src/Functions/FunctionsGeo.cpp @@ -31,6 +31,7 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; } +#if USE_POINT_IN_POLYGON namespace FunctionPointInPolygonDetail { @@ -271,16 +272,18 @@ template <> const char * FunctionPointInPolygon::name = "pointInPolygonFranklin"; template <> const char * FunctionPointInPolygon::name = "pointInPolygon"; - +#endif void registerFunctionsGeo(FunctionFactory & factory) { factory.registerFunction(); factory.registerFunction(); +#if USE_POINT_IN_POLYGON factory.registerFunction>(); factory.registerFunction>(); factory.registerFunction>(); factory.registerFunction>(); +#endif } } diff --git a/dbms/src/Functions/GeoUtils.h b/dbms/src/Functions/GeoUtils.h index f5a4b4651cc..64cf97c3c71 100644 --- a/dbms/src/Functions/GeoUtils.h +++ b/dbms/src/Functions/GeoUtils.h @@ -19,6 +19,11 @@ #pragma GCC diagnostic pop #endif +#if __clang__ && __clang_major__ <= 4 +#else +#define USE_POINT_IN_POLYGON 1 +#endif + #include #include #include @@ -78,6 +83,7 @@ UInt64 getMultiPolygonAllocatedBytes(const MultiPolygon & multi_polygon) return size; } +#if USE_POINT_IN_POLYGON template class PointInPolygonWithGrid { @@ -577,6 +583,8 @@ ColumnPtr pointInPolygon(const IColumn & x, const IColumn & y, PointInPolygonImp return Impl::call(x, y, impl); } +#endif + /// Total angle (signed) between neighbor vectors in linestring. Zero if linestring.size() < 2. template float calcLinestringRotation(const Linestring & points) diff --git a/libs/libglibc-compatibility/glibc-compatibility.c b/libs/libglibc-compatibility/glibc-compatibility.c index 9ebb435cafe..0f6ce057132 100644 --- a/libs/libglibc-compatibility/glibc-compatibility.c +++ b/libs/libglibc-compatibility/glibc-compatibility.c @@ -43,11 +43,14 @@ int __gai_sigqueue(int sig, const union sigval val, pid_t caller_pid) #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 16) long int __fdelt_chk(long int d) +{ + if (d < 0) + abort(); #else unsigned long int __fdelt_chk(unsigned long int d) -#endif { - if (d < 0 || d >= FD_SETSIZE) +#endif + if (d >= FD_SETSIZE) abort(); return d / __NFDBITS; } From c46ce859a183bcd89441c1a16462ada150f6ae8d Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 26 Sep 2017 20:35:58 +0000 Subject: [PATCH 26/63] Fix tests on clean storage --- dbms/tests/queries/0_stateless/99999_prepare.reference | 0 dbms/tests/queries/0_stateless/99999_prepare.sql | 1 + 2 files changed, 1 insertion(+) create mode 100644 dbms/tests/queries/0_stateless/99999_prepare.reference create mode 100644 dbms/tests/queries/0_stateless/99999_prepare.sql diff --git a/dbms/tests/queries/0_stateless/99999_prepare.reference b/dbms/tests/queries/0_stateless/99999_prepare.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/99999_prepare.sql b/dbms/tests/queries/0_stateless/99999_prepare.sql new file mode 100644 index 00000000000..e68c2efea8d --- /dev/null +++ b/dbms/tests/queries/0_stateless/99999_prepare.sql @@ -0,0 +1 @@ +CREATE DATABASE IF NOT EXISTS test; From d469e5eddd3be807a411eb196233af8f464e4279 Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 26 Sep 2017 23:55:22 +0300 Subject: [PATCH 27/63] Fix no-shard tests --- ..._group_array.reference => 00113_shard_group_array.reference} | 0 .../{00113_group_array.sql => 00113_shard_group_array.sql} | 0 dbms/tests/queries/0_stateless/00284_external_aggregation.sql | 2 +- ...491_shard_distributed_and_aliases_in_where_having.reference} | 0 ... => 00491_shard_distributed_and_aliases_in_where_having.sql} | 0 5 files changed, 1 insertion(+), 1 deletion(-) rename dbms/tests/queries/0_stateless/{00113_group_array.reference => 00113_shard_group_array.reference} (100%) rename dbms/tests/queries/0_stateless/{00113_group_array.sql => 00113_shard_group_array.sql} (100%) rename dbms/tests/queries/0_stateless/{00491_distributed_and_aliases_in_where_having.reference => 00491_shard_distributed_and_aliases_in_where_having.reference} (100%) rename dbms/tests/queries/0_stateless/{00491_distributed_and_aliases_in_where_having.sql => 00491_shard_distributed_and_aliases_in_where_having.sql} (100%) diff --git a/dbms/tests/queries/0_stateless/00113_group_array.reference b/dbms/tests/queries/0_stateless/00113_shard_group_array.reference similarity index 100% rename from dbms/tests/queries/0_stateless/00113_group_array.reference rename to dbms/tests/queries/0_stateless/00113_shard_group_array.reference diff --git a/dbms/tests/queries/0_stateless/00113_group_array.sql b/dbms/tests/queries/0_stateless/00113_shard_group_array.sql similarity index 100% rename from dbms/tests/queries/0_stateless/00113_group_array.sql rename to dbms/tests/queries/0_stateless/00113_shard_group_array.sql diff --git a/dbms/tests/queries/0_stateless/00284_external_aggregation.sql b/dbms/tests/queries/0_stateless/00284_external_aggregation.sql index 0595b81a0f7..75d2c0b9bc2 100644 --- a/dbms/tests/queries/0_stateless/00284_external_aggregation.sql +++ b/dbms/tests/queries/0_stateless/00284_external_aggregation.sql @@ -1,5 +1,5 @@ SET max_bytes_before_external_group_by = 100000000; -SET max_memory_usage = 200000000; +SET max_memory_usage = 201000000; SELECT sum(k), sum(c) FROM (SELECT number AS k, count() AS c FROM (SELECT * FROM system.numbers LIMIT 10000000) GROUP BY k); SELECT sum(k), sum(c), max(u) FROM (SELECT number AS k, count() AS c, uniqArray(range(number % 16)) AS u FROM (SELECT * FROM system.numbers LIMIT 1000000) GROUP BY k); diff --git a/dbms/tests/queries/0_stateless/00491_distributed_and_aliases_in_where_having.reference b/dbms/tests/queries/0_stateless/00491_shard_distributed_and_aliases_in_where_having.reference similarity index 100% rename from dbms/tests/queries/0_stateless/00491_distributed_and_aliases_in_where_having.reference rename to dbms/tests/queries/0_stateless/00491_shard_distributed_and_aliases_in_where_having.reference diff --git a/dbms/tests/queries/0_stateless/00491_distributed_and_aliases_in_where_having.sql b/dbms/tests/queries/0_stateless/00491_shard_distributed_and_aliases_in_where_having.sql similarity index 100% rename from dbms/tests/queries/0_stateless/00491_distributed_and_aliases_in_where_having.sql rename to dbms/tests/queries/0_stateless/00491_shard_distributed_and_aliases_in_where_having.sql From 8f3a244d2622236e4a6950cbfc03a17168c7f37e Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 27 Sep 2017 17:52:24 +0300 Subject: [PATCH 28/63] Fix internal compiler with some new clang packages --- dbms/src/Interpreters/Compiler.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Interpreters/Compiler.cpp b/dbms/src/Interpreters/Compiler.cpp index 4f5dcfc3308..fde9e717690 100644 --- a/dbms/src/Interpreters/Compiler.cpp +++ b/dbms/src/Interpreters/Compiler.cpp @@ -228,6 +228,7 @@ void Compiler::compile( " -isystem " INTERNAL_COMPILER_HEADERS_ROOT "/usr/include/x86_64-linux-gnu/" " -isystem " INTERNAL_COMPILER_HEADERS_ROOT "/usr/include/x86_64-linux-gnu/c++/*/" " -isystem " INTERNAL_COMPILER_HEADERS_ROOT "/usr/local/lib/clang/*/include/" + " -isystem " INTERNAL_COMPILER_HEADERS_ROOT "/usr/lib/clang/*/include/" #endif " -I " INTERNAL_COMPILER_HEADERS "/dbms/src/" " -I " INTERNAL_COMPILER_HEADERS "/contrib/libcityhash/include/" From 07c964ef3b497b6ee0c35be85b983d6979725312 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 28 Sep 2017 22:42:01 +0300 Subject: [PATCH 29/63] contrib: Fixed unwind build under GCC 7. [#METR-21516] See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81712 . --- contrib/libunwind/CMakeLists.txt | 9 +- contrib/libunwind/README | 2 +- .../{include/config.h => config/config.h.in} | 11 +- .../libunwind-common.h.in} | 10 +- .../libunwind.h => config/libunwind.h.in} | 0 .../tdep/libunwind_i.h.in} | 0 contrib/libunwind/include/config.h.in | 230 ------ contrib/libunwind/include/dwarf-eh.h | 4 +- contrib/libunwind/include/dwarf.h | 7 - contrib/libunwind/include/libunwind-arm.h | 7 +- .../libunwind/include/libunwind-common.h.in | 10 +- contrib/libunwind/include/libunwind_i.h | 18 +- contrib/libunwind/include/stamp-h1 | 1 - .../libunwind/include/tdep-arm/libunwind_i.h | 7 +- .../include/tdep-tilegx/libunwind_i.h | 4 - contrib/libunwind/include/tdep/jmpbuf.h | 2 +- contrib/libunwind/src/Makefile.am | 749 ++++++++++++++++++ contrib/libunwind/src/aarch64/Ginit.c | 6 +- contrib/libunwind/src/aarch64/Ginit_local.c | 15 +- contrib/libunwind/src/aarch64/Gregs.c | 11 +- contrib/libunwind/src/aarch64/Gresume.c | 21 +- contrib/libunwind/src/aarch64/Gstep.c | 60 +- contrib/libunwind/src/aarch64/unwind_i.h | 2 + contrib/libunwind/src/arm/Ginit.c | 12 +- contrib/libunwind/src/arm/Ginit_local.c | 15 +- contrib/libunwind/src/arm/Gis_signal_frame.c | 87 -- contrib/libunwind/src/arm/Gos-freebsd.c | 129 +++ contrib/libunwind/src/arm/Gos-linux.c | 182 +++++ .../Gcreate_addr_space.c => arm/Gos-other.c} | 46 +- contrib/libunwind/src/arm/Gregs.c | 4 +- contrib/libunwind/src/arm/Gstep.c | 113 +-- contrib/libunwind/src/arm/Gtrace.c | 9 +- .../Lis_signal_frame.c => arm/Los-freebsd.c} | 2 +- .../src/{dwarf/Lstep.c => arm/Los-linux.c} | 2 +- .../Lcreate_addr_space.c => arm/Los-other.c} | 2 +- contrib/libunwind/src/arm/getcontext.S | 9 +- contrib/libunwind/src/arm/offsets.h | 6 + .../src/coredump/_UCD_access_reg_freebsd.c | 23 +- contrib/libunwind/src/coredump/_UCD_destroy.c | 2 + .../src/coredump/libunwind-coredump.pc | 11 - contrib/libunwind/src/dwarf/Gfde.c | 2 +- .../libunwind/src/dwarf/Gfind_proc_info-lsb.c | 31 +- .../libunwind/src/dwarf/Gfind_unwind_table.c | 2 +- contrib/libunwind/src/dwarf/Gparser.c | 30 +- contrib/libunwind/src/elfxx.c | 16 +- contrib/libunwind/src/hppa/Ginit.c | 2 +- contrib/libunwind/src/hppa/Ginit_local.c | 15 +- contrib/libunwind/src/ia64/Gapply_reg_state.c | 4 +- contrib/libunwind/src/ia64/Ginit.c | 2 +- .../libunwind/src/ia64/Greg_states_iterate.c | 4 +- contrib/libunwind/src/ia64/Gscript.c | 4 +- contrib/libunwind/src/libunwind-generic.pc | 11 - contrib/libunwind/src/mi/Gdyn-extract.c | 1 + contrib/libunwind/src/mi/Gget_proc_name.c | 4 + contrib/libunwind/src/mi/Gset_cache_size.c | 8 + .../libunwind/src/mi/Gset_caching_policy.c | 2 +- contrib/libunwind/src/mips/Ginit.c | 2 +- contrib/libunwind/src/mips/Ginit_local.c | 15 +- contrib/libunwind/src/mips/Gregs.c | 2 + contrib/libunwind/src/os-freebsd.c | 2 +- contrib/libunwind/src/ppc/Ginit_local.c | 15 +- .../libunwind/src/ppc32/Gapply_reg_state.c | 37 + contrib/libunwind/src/ppc32/Ginit.c | 2 +- .../Gstep.c => ppc32/Greg_states_iterate.c} | 24 +- .../libunwind/src/ppc32/Lapply_reg_state.c | 5 + .../libunwind/src/ppc32/Lreg_states_iterate.c | 5 + .../libunwind/src/ppc64/Gapply_reg_state.c | 37 + contrib/libunwind/src/ppc64/Ginit.c | 2 +- .../libunwind/src/ppc64/Greg_states_iterate.c | 37 + .../libunwind/src/ppc64/Lapply_reg_state.c | 5 + .../libunwind/src/ppc64/Lreg_states_iterate.c | 5 + .../libunwind/src/ptrace/_UPT_access_fpreg.c | 16 + .../libunwind/src/ptrace/_UPT_access_reg.c | 45 +- .../libunwind/src/ptrace/_UPT_reg_offset.c | 4 + .../libunwind/src/ptrace/libunwind-ptrace.pc | 11 - .../libunwind/src/setjmp/libunwind-setjmp.pc | 11 - contrib/libunwind/src/sh/Ginit.c | 2 +- contrib/libunwind/src/sh/Ginit_local.c | 15 +- contrib/libunwind/src/sh/Gregs.c | 4 +- contrib/libunwind/src/tilegx/Ginit.c | 2 +- contrib/libunwind/src/tilegx/Ginit_local.c | 15 +- contrib/libunwind/src/tilegx/Gregs.c | 12 +- contrib/libunwind/src/unwind/libunwind.pc | 11 - contrib/libunwind/src/x86/Ginit.c | 2 +- contrib/libunwind/src/x86/Ginit_local.c | 15 +- contrib/libunwind/src/x86/Gos-linux.c | 25 +- contrib/libunwind/src/x86/unwind_i.h | 3 + contrib/libunwind/src/x86_64/Ginit.c | 55 +- contrib/libunwind/src/x86_64/Ginit_local.c | 15 +- contrib/libunwind/src/x86_64/Gos-linux.c | 2 +- contrib/libunwind/src/x86_64/init.h | 1 + libs/libdaemon/src/BaseDaemon.cpp | 2 +- 92 files changed, 1805 insertions(+), 636 deletions(-) rename contrib/libunwind/{include/config.h => config/config.h.in} (96%) rename contrib/libunwind/{include/libunwind-common.h => config/libunwind-common.h.in} (97%) rename contrib/libunwind/{include/libunwind.h => config/libunwind.h.in} (100%) rename contrib/libunwind/{include/tdep/libunwind_i.h => config/tdep/libunwind_i.h.in} (100%) delete mode 100644 contrib/libunwind/include/config.h.in delete mode 100644 contrib/libunwind/include/stamp-h1 create mode 100644 contrib/libunwind/src/Makefile.am delete mode 100644 contrib/libunwind/src/arm/Gis_signal_frame.c create mode 100644 contrib/libunwind/src/arm/Gos-freebsd.c create mode 100644 contrib/libunwind/src/arm/Gos-linux.c rename contrib/libunwind/src/{ppc/Gcreate_addr_space.c => arm/Gos-other.c} (65%) rename contrib/libunwind/src/{x86_64/Lis_signal_frame.c => arm/Los-freebsd.c} (78%) rename contrib/libunwind/src/{dwarf/Lstep.c => arm/Los-linux.c} (82%) rename contrib/libunwind/src/{ppc/Lcreate_addr_space.c => arm/Los-other.c} (77%) delete mode 100644 contrib/libunwind/src/coredump/libunwind-coredump.pc delete mode 100644 contrib/libunwind/src/libunwind-generic.pc create mode 100644 contrib/libunwind/src/ppc32/Gapply_reg_state.c rename contrib/libunwind/src/{dwarf/Gstep.c => ppc32/Greg_states_iterate.c} (72%) create mode 100644 contrib/libunwind/src/ppc32/Lapply_reg_state.c create mode 100644 contrib/libunwind/src/ppc32/Lreg_states_iterate.c create mode 100644 contrib/libunwind/src/ppc64/Gapply_reg_state.c create mode 100644 contrib/libunwind/src/ppc64/Greg_states_iterate.c create mode 100644 contrib/libunwind/src/ppc64/Lapply_reg_state.c create mode 100644 contrib/libunwind/src/ppc64/Lreg_states_iterate.c delete mode 100644 contrib/libunwind/src/ptrace/libunwind-ptrace.pc delete mode 100644 contrib/libunwind/src/setjmp/libunwind-setjmp.pc delete mode 100644 contrib/libunwind/src/unwind/libunwind.pc diff --git a/contrib/libunwind/CMakeLists.txt b/contrib/libunwind/CMakeLists.txt index 1a1b1e79bc3..47032be92bb 100644 --- a/contrib/libunwind/CMakeLists.txt +++ b/contrib/libunwind/CMakeLists.txt @@ -45,7 +45,6 @@ src/dwarf/Lfde.c src/dwarf/Lfind_proc_info-lsb.c src/dwarf/Lparser.c src/dwarf/Lpe.c -src/dwarf/Lstep.c src/dwarf/global.c src/elf64.c @@ -53,9 +52,17 @@ src/os-linux.c src/x86_64/Los-linux.c ) +find_file (HAVE_ATOMIC_OPS_H "atomic_ops.h") +configure_file (config/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config/config.h) +configure_file (config/libunwind.h.in ${CMAKE_CURRENT_BINARY_DIR}/config/libunwind.h) +configure_file (config/libunwind-common.h.in ${CMAKE_CURRENT_BINARY_DIR}/config/libunwind-common.h) +configure_file (config/tdep/libunwind_i.h.in ${CMAKE_CURRENT_BINARY_DIR}/config/tdep/libunwind_i.h) + target_compile_definitions (unwind PRIVATE HAVE_CONFIG_H=1 _XOPEN_SOURCE _GNU_SOURCE) target_compile_options (unwind PRIVATE -Wno-visibility -Wno-header-guard) target_include_directories (unwind PUBLIC include) target_include_directories (unwind PRIVATE include/tdep) +target_include_directories (unwind PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/config) +target_include_directories (unwind PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/config/tdep) target_include_directories (unwind PRIVATE src) diff --git a/contrib/libunwind/README b/contrib/libunwind/README index 08df15b5621..9687c3c0338 100644 --- a/contrib/libunwind/README +++ b/contrib/libunwind/README @@ -1,2 +1,2 @@ Source: https://github.com/libunwind/libunwind -Revision: 2934cf40529e0261801a4142fabae449a65effd0 +Revision: 60ddc67196eafb5cafd0d89e461c9d700a697d6d diff --git a/contrib/libunwind/include/config.h b/contrib/libunwind/config/config.h.in similarity index 96% rename from contrib/libunwind/include/config.h rename to contrib/libunwind/config/config.h.in index 672aa4ada7f..7dcf38ed012 100644 --- a/contrib/libunwind/include/config.h +++ b/contrib/libunwind/config/config.h.in @@ -17,7 +17,7 @@ /* #undef HAVE_ASM_PTRACE_OFFSETS_H */ /* Define to 1 if you have the header file. */ -/* #undef HAVE_ATOMIC_OPS_H */ +#cmakedefine HAVE_ATOMIC_OPS_H /* Define to 1 if you have the header file. */ #define HAVE_BYTESWAP_H 1 @@ -34,6 +34,10 @@ you don't. */ #define HAVE_DECL_PTRACE_POKEUSER 1 +/* Define to 1 if you have the declaration of `PTRACE_SETREGSET', and to 0 if + you don't. */ +#define HAVE_DECL_PTRACE_SETREGSET 1 + /* Define to 1 if you have the declaration of `PTRACE_SINGLESTEP', and to 0 if you don't. */ #define HAVE_DECL_PTRACE_SINGLESTEP 1 @@ -183,9 +187,10 @@ #define HAVE__BUILTIN___CLEAR_CACHE 1 /* Define to 1 if __thread keyword is supported by the C compiler. */ -#define HAVE___THREAD 1 +/* #undef HAVE___THREAD */ -/* Define to the sub-directory where libtool stores uninstalled libraries. */ +/* Define to the sub-directory in which libtool stores uninstalled libraries. + */ #define LT_OBJDIR ".libs/" /* Name of package */ diff --git a/contrib/libunwind/include/libunwind-common.h b/contrib/libunwind/config/libunwind-common.h.in similarity index 97% rename from contrib/libunwind/include/libunwind-common.h rename to contrib/libunwind/config/libunwind-common.h.in index d4fbf270a28..ceacdb42128 100644 --- a/contrib/libunwind/include/libunwind-common.h +++ b/contrib/libunwind/config/libunwind-common.h.in @@ -86,6 +86,12 @@ typedef enum } unw_caching_policy_t; +typedef enum + { + UNW_INIT_SIGNAL_FRAME = 1, /* We know this is a signal frame */ + } +unw_init_local2_flags_t; + typedef int unw_regnum_t; /* The unwind cursor starts at the youngest (most deeply nested) frame @@ -219,7 +225,7 @@ unw_save_loc_t; #define unw_destroy_addr_space UNW_OBJ(destroy_addr_space) #define unw_get_accessors UNW_ARCH_OBJ(get_accessors) #define unw_init_local UNW_OBJ(init_local) -#define unw_init_local_signal UNW_OBJ(init_local_signal) +#define unw_init_local2 UNW_OBJ(init_local2) #define unw_init_remote UNW_OBJ(init_remote) #define unw_step UNW_OBJ(step) #define unw_resume UNW_OBJ(resume) @@ -250,7 +256,7 @@ extern int unw_set_cache_size (unw_addr_space_t, size_t, int); extern const char *unw_regname (unw_regnum_t); extern int unw_init_local (unw_cursor_t *, unw_context_t *); -extern int unw_init_local_signal (unw_cursor_t *, unw_context_t *); +extern int unw_init_local2 (unw_cursor_t *, unw_context_t *, int); extern int unw_init_remote (unw_cursor_t *, unw_addr_space_t, void *); extern int unw_step (unw_cursor_t *); extern int unw_resume (unw_cursor_t *); diff --git a/contrib/libunwind/include/libunwind.h b/contrib/libunwind/config/libunwind.h.in similarity index 100% rename from contrib/libunwind/include/libunwind.h rename to contrib/libunwind/config/libunwind.h.in diff --git a/contrib/libunwind/include/tdep/libunwind_i.h b/contrib/libunwind/config/tdep/libunwind_i.h.in similarity index 100% rename from contrib/libunwind/include/tdep/libunwind_i.h rename to contrib/libunwind/config/tdep/libunwind_i.h.in diff --git a/contrib/libunwind/include/config.h.in b/contrib/libunwind/include/config.h.in deleted file mode 100644 index 5eb05c90784..00000000000 --- a/contrib/libunwind/include/config.h.in +++ /dev/null @@ -1,230 +0,0 @@ -/* include/config.h.in. Generated from configure.ac by autoheader. */ - -/* Block signals before mutex operations */ -#undef CONFIG_BLOCK_SIGNALS - -/* Enable Debug Frame */ -#undef CONFIG_DEBUG_FRAME - -/* Support for Microsoft ABI extensions */ -#undef CONFIG_MSABI_SUPPORT - -/* Define to 1 if you want every memory access validated */ -#undef CONSERVATIVE_CHECKS - -/* Define to 1 if you have the header file. */ -#undef HAVE_ASM_PTRACE_OFFSETS_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_ATOMIC_OPS_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_BYTESWAP_H - -/* Define to 1 if you have the declaration of `PTRACE_CONT', and to 0 if you - don't. */ -#undef HAVE_DECL_PTRACE_CONT - -/* Define to 1 if you have the declaration of `PTRACE_POKEDATA', and to 0 if - you don't. */ -#undef HAVE_DECL_PTRACE_POKEDATA - -/* Define to 1 if you have the declaration of `PTRACE_POKEUSER', and to 0 if - you don't. */ -#undef HAVE_DECL_PTRACE_POKEUSER - -/* Define to 1 if you have the declaration of `PTRACE_SINGLESTEP', and to 0 if - you don't. */ -#undef HAVE_DECL_PTRACE_SINGLESTEP - -/* Define to 1 if you have the declaration of `PTRACE_SYSCALL', and to 0 if - you don't. */ -#undef HAVE_DECL_PTRACE_SYSCALL - -/* Define to 1 if you have the declaration of `PTRACE_TRACEME', and to 0 if - you don't. */ -#undef HAVE_DECL_PTRACE_TRACEME - -/* Define to 1 if you have the declaration of `PT_CONTINUE', and to 0 if you - don't. */ -#undef HAVE_DECL_PT_CONTINUE - -/* Define to 1 if you have the declaration of `PT_GETFPREGS', and to 0 if you - don't. */ -#undef HAVE_DECL_PT_GETFPREGS - -/* Define to 1 if you have the declaration of `PT_GETREGS', and to 0 if you - don't. */ -#undef HAVE_DECL_PT_GETREGS - -/* Define to 1 if you have the declaration of `PT_IO', and to 0 if you don't. - */ -#undef HAVE_DECL_PT_IO - -/* Define to 1 if you have the declaration of `PT_STEP', and to 0 if you - don't. */ -#undef HAVE_DECL_PT_STEP - -/* Define to 1 if you have the declaration of `PT_SYSCALL', and to 0 if you - don't. */ -#undef HAVE_DECL_PT_SYSCALL - -/* Define to 1 if you have the declaration of `PT_TRACE_ME', and to 0 if you - don't. */ -#undef HAVE_DECL_PT_TRACE_ME - -/* Define to 1 if you have the header file. */ -#undef HAVE_DLFCN_H - -/* Define to 1 if you have the `dlmodinfo' function. */ -#undef HAVE_DLMODINFO - -/* Define to 1 if you have the `dl_iterate_phdr' function. */ -#undef HAVE_DL_ITERATE_PHDR - -/* Define to 1 if you have the `dl_phdr_removals_counter' function. */ -#undef HAVE_DL_PHDR_REMOVALS_COUNTER - -/* Define to 1 if you have the header file. */ -#undef HAVE_ELF_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_ENDIAN_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_EXECINFO_H - -/* Define to 1 if you have the `getunwind' function. */ -#undef HAVE_GETUNWIND - -/* Define to 1 if you have the header file. */ -#undef HAVE_IA64INTRIN_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_INTTYPES_H - -/* Define to 1 if you have the `uca' library (-luca). */ -#undef HAVE_LIBUCA - -/* Define to 1 if you have the header file. */ -#undef HAVE_LINK_H - -/* Define if you have liblzma */ -#undef HAVE_LZMA - -/* Define to 1 if you have the header file. */ -#undef HAVE_MEMORY_H - -/* Define to 1 if you have the `mincore' function. */ -#undef HAVE_MINCORE - -/* Define to 1 if you have the header file. */ -#undef HAVE_SIGNAL_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STDINT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STDLIB_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STRINGS_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STRING_H - -/* Define to 1 if `dlpi_subs' is a member of `struct dl_phdr_info'. */ -#undef HAVE_STRUCT_DL_PHDR_INFO_DLPI_SUBS - -/* Define to 1 if the system has the type `struct elf_prstatus'. */ -#undef HAVE_STRUCT_ELF_PRSTATUS - -/* Define to 1 if the system has the type `struct prstatus'. */ -#undef HAVE_STRUCT_PRSTATUS - -/* Defined if __sync atomics are available */ -#undef HAVE_SYNC_ATOMICS - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_ELF_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_ENDIAN_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_LINK_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_PROCFS_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_PTRACE_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_STAT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_TYPES_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_UC_ACCESS_H - -/* Define to 1 if you have the `ttrace' function. */ -#undef HAVE_TTRACE - -/* Define to 1 if you have the header file. */ -#undef HAVE_UNISTD_H - -/* Defined if __builtin_unreachable() is available */ -#undef HAVE__BUILTIN_UNREACHABLE - -/* Defined if __builtin___clear_cache() is available */ -#undef HAVE__BUILTIN___CLEAR_CACHE - -/* Define to 1 if __thread keyword is supported by the C compiler. */ -#undef HAVE___THREAD - -/* Define to the sub-directory where libtool stores uninstalled libraries. */ -#undef LT_OBJDIR - -/* Name of package */ -#undef PACKAGE - -/* Define to the address where bug reports for this package should be sent. */ -#undef PACKAGE_BUGREPORT - -/* Define to the full name of this package. */ -#undef PACKAGE_NAME - -/* Define to the full name and version of this package. */ -#undef PACKAGE_STRING - -/* Define to the one symbol short name of this package. */ -#undef PACKAGE_TARNAME - -/* Define to the home page for this package. */ -#undef PACKAGE_URL - -/* Define to the version of this package. */ -#undef PACKAGE_VERSION - -/* The size of `off_t', as computed by sizeof. */ -#undef SIZEOF_OFF_T - -/* Define to 1 if you have the ANSI C header files. */ -#undef STDC_HEADERS - -/* Version number of package */ -#undef VERSION - -/* Define to empty if `const' does not conform to ANSI C. */ -#undef const - -/* Define to `__inline__' or `__inline' if that's what the C compiler - calls it, or to nothing if 'inline' is not supported under any name. */ -#ifndef __cplusplus -#undef inline -#endif - -/* Define to `unsigned int' if does not define. */ -#undef size_t diff --git a/contrib/libunwind/include/dwarf-eh.h b/contrib/libunwind/include/dwarf-eh.h index e81aaef88ad..e03750760c5 100644 --- a/contrib/libunwind/include/dwarf-eh.h +++ b/contrib/libunwind/include/dwarf-eh.h @@ -106,16 +106,16 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define DW_EH_VERSION 1 /* The version we're implementing */ -struct dwarf_eh_frame_hdr +struct __attribute__((packed)) dwarf_eh_frame_hdr { unsigned char version; unsigned char eh_frame_ptr_enc; unsigned char fde_count_enc; unsigned char table_enc; + Elf_W (Addr) eh_frame; /* The rest of the header is variable-length and consists of the following members: - encoded_t eh_frame_ptr; encoded_t fde_count; struct { diff --git a/contrib/libunwind/include/dwarf.h b/contrib/libunwind/include/dwarf.h index e8ffa66cb48..db2a76972d1 100644 --- a/contrib/libunwind/include/dwarf.h +++ b/contrib/libunwind/include/dwarf.h @@ -37,13 +37,6 @@ struct elf_dyn_info; # include "config.h" #endif -#ifdef HAVE___THREAD - /* For now, turn off per-thread caching. It uses up too much TLS - memory per thread even when the thread never uses libunwind at - all. */ -# undef HAVE___THREAD -#endif - #ifndef UNW_REMOTE_ONLY #if defined(HAVE_LINK_H) #include diff --git a/contrib/libunwind/include/libunwind-arm.h b/contrib/libunwind/include/libunwind-arm.h index f208487a999..6709b7abaee 100644 --- a/contrib/libunwind/include/libunwind-arm.h +++ b/contrib/libunwind/include/libunwind-arm.h @@ -265,7 +265,7 @@ unw_tdep_context_t; #ifndef __thumb__ #define unw_tdep_getcontext(uc) (({ \ unw_tdep_context_t *unw_ctx = (uc); \ - register unsigned long *unw_base asm ("r0") = unw_ctx->regs; \ + register unsigned long *unw_base __asm__ ("r0") = unw_ctx->regs; \ __asm__ __volatile__ ( \ "stmia %[base], {r0-r15}" \ : : [base] "r" (unw_base) : "memory"); \ @@ -273,11 +273,12 @@ unw_tdep_context_t; #else /* __thumb__ */ #define unw_tdep_getcontext(uc) (({ \ unw_tdep_context_t *unw_ctx = (uc); \ - register unsigned long *unw_base asm ("r0") = unw_ctx->regs; \ + register unsigned long *unw_base __asm__ ("r0") = unw_ctx->regs; \ __asm__ __volatile__ ( \ ".align 2\nbx pc\nnop\n.code 32\n" \ "stmia %[base], {r0-r15}\n" \ - "orr %[base], pc, #1\nbx %[base]" \ + "orr %[base], pc, #1\nbx %[base]\n" \ + ".code 16\n" \ : [base] "+r" (unw_base) : : "memory", "cc"); \ }), 0) #endif diff --git a/contrib/libunwind/include/libunwind-common.h.in b/contrib/libunwind/include/libunwind-common.h.in index 0a9537ebf1d..9811f4915e6 100644 --- a/contrib/libunwind/include/libunwind-common.h.in +++ b/contrib/libunwind/include/libunwind-common.h.in @@ -86,6 +86,12 @@ typedef enum } unw_caching_policy_t; +typedef enum + { + UNW_INIT_SIGNAL_FRAME = 1, /* We know this is a signal frame */ + } +unw_init_local2_flags_t; + typedef int unw_regnum_t; /* The unwind cursor starts at the youngest (most deeply nested) frame @@ -219,7 +225,7 @@ unw_save_loc_t; #define unw_destroy_addr_space UNW_OBJ(destroy_addr_space) #define unw_get_accessors UNW_ARCH_OBJ(get_accessors) #define unw_init_local UNW_OBJ(init_local) -#define unw_init_local_signal UNW_OBJ(init_local_signal) +#define unw_init_local2 UNW_OBJ(init_local2) #define unw_init_remote UNW_OBJ(init_remote) #define unw_step UNW_OBJ(step) #define unw_resume UNW_OBJ(resume) @@ -250,7 +256,7 @@ extern int unw_set_cache_size (unw_addr_space_t, size_t, int); extern const char *unw_regname (unw_regnum_t); extern int unw_init_local (unw_cursor_t *, unw_context_t *); -extern int unw_init_local_signal (unw_cursor_t *, unw_context_t *); +extern int unw_init_local2 (unw_cursor_t *, unw_context_t *, int); extern int unw_init_remote (unw_cursor_t *, unw_addr_space_t, void *); extern int unw_step (unw_cursor_t *); extern int unw_resume (unw_cursor_t *); diff --git a/contrib/libunwind/include/libunwind_i.h b/contrib/libunwind/include/libunwind_i.h index ee2ea2fbec3..36cf7a14de7 100644 --- a/contrib/libunwind/include/libunwind_i.h +++ b/contrib/libunwind/include/libunwind_i.h @@ -37,11 +37,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "compiler.h" -#ifdef HAVE___THREAD - /* For now, turn off per-thread caching. It uses up too much TLS - memory per thread even when the thread never uses libunwind at - all. */ -# undef HAVE___THREAD +#if defined(HAVE___THREAD) && HAVE___THREAD +#define UNWI_DEFAULT_CACHING_POLICY UNW_CACHE_PER_THREAD +#else +#define UNWI_DEFAULT_CACHING_POLICY UNW_CACHE_GLOBAL #endif /* Platform-independent libunwind-internal declarations. */ @@ -69,6 +68,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ # include #elif defined(HAVE_SYS_ENDIAN_H) # include +# if defined(_LITTLE_ENDIAN) && !defined(__LITTLE_ENDIAN) +# define __LITTLE_ENDIAN _LITTLE_ENDIAN +# endif +# if defined(_BIG_ENDIAN) && !defined(__BIG_ENDIAN) +# define __BIG_ENDIAN _BIG_ENDIAN +# endif +# if defined(_BYTE_ORDER) && !defined(__BYTE_ORDER) +# define __BYTE_ORDER _BYTE_ORDER +# endif #else # define __LITTLE_ENDIAN 1234 # define __BIG_ENDIAN 4321 diff --git a/contrib/libunwind/include/stamp-h1 b/contrib/libunwind/include/stamp-h1 deleted file mode 100644 index b330768e9bf..00000000000 --- a/contrib/libunwind/include/stamp-h1 +++ /dev/null @@ -1 +0,0 @@ -timestamp for include/config.h diff --git a/contrib/libunwind/include/tdep-arm/libunwind_i.h b/contrib/libunwind/include/tdep-arm/libunwind_i.h index 9996b2f2cf2..2602f41c4f7 100644 --- a/contrib/libunwind/include/tdep-arm/libunwind_i.h +++ b/contrib/libunwind/include/tdep-arm/libunwind_i.h @@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ typedef enum { + UNW_ARM_FRAME_SYSCALL = -3, /* r7 saved in r12, sp offset zero */ UNW_ARM_FRAME_STANDARD = -2, /* regular r7, sp +/- offset */ UNW_ARM_FRAME_SIGRETURN = -1, /* special sigreturn frame */ UNW_ARM_FRAME_OTHER = 0, /* not cacheable (special or unrecognised) */ @@ -48,7 +49,7 @@ unw_tdep_frame_type_t; typedef struct { uint32_t virtual_address; - int32_t frame_type : 2; /* unw_tdep_frame_type_t classification */ + int32_t frame_type : 3; /* unw_tdep_frame_type_t classification */ int32_t last_frame : 1; /* non-zero if last frame in chain */ int32_t cfa_reg_sp : 1; /* cfa dwarf base register is sp vs. r7 */ int32_t cfa_reg_offset : 30; /* cfa is at this offset from base register value */ @@ -86,7 +87,9 @@ struct cursor ARM_SCF_LINUX_SIGFRAME, /* non-RT signal frame, kernel >=2.6.18 */ ARM_SCF_LINUX_RT_SIGFRAME, /* RT signal frame, kernel >=2.6.18 */ ARM_SCF_LINUX_OLD_SIGFRAME, /* non-RT signal frame, kernel < 2.6.18 */ - ARM_SCF_LINUX_OLD_RT_SIGFRAME /* RT signal frame, kernel < 2.6.18 */ + ARM_SCF_LINUX_OLD_RT_SIGFRAME, /* RT signal frame, kernel < 2.6.18 */ + ARM_SCF_FREEBSD_SIGFRAME, /* FreeBSD sigframe */ + ARM_SCF_FREEBSD_SYSCALL, /* FreeBSD syscall stub */ } sigcontext_format; unw_word_t sigcontext_addr; diff --git a/contrib/libunwind/include/tdep-tilegx/libunwind_i.h b/contrib/libunwind/include/tdep-tilegx/libunwind_i.h index 4a598f84700..2cfed456a70 100644 --- a/contrib/libunwind/include/tdep-tilegx/libunwind_i.h +++ b/contrib/libunwind/include/tdep-tilegx/libunwind_i.h @@ -36,10 +36,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "mempool.h" #include "dwarf.h" -#ifdef HAVE___THREAD -# undef HAVE___THREAD -#endif - typedef struct { /* no Tilegx-specific fast trace */ diff --git a/contrib/libunwind/include/tdep/jmpbuf.h b/contrib/libunwind/include/tdep/jmpbuf.h index 4eae183e5bd..13093a0cdc3 100644 --- a/contrib/libunwind/include/tdep/jmpbuf.h +++ b/contrib/libunwind/include/tdep/jmpbuf.h @@ -5,7 +5,7 @@ #if defined __aarch64__ # include "tdep-aarch64/jmpbuf.h" -#if defined __arm__ +#elif defined __arm__ # include "tdep-arm/jmpbuf.h" #elif defined __hppa__ # include "tdep-hppa/jmpbuf.h" diff --git a/contrib/libunwind/src/Makefile.am b/contrib/libunwind/src/Makefile.am new file mode 100644 index 00000000000..7514ab551aa --- /dev/null +++ b/contrib/libunwind/src/Makefile.am @@ -0,0 +1,749 @@ +SOVERSION=8:1:0 # See comments at end of file. +SETJMP_SO_VERSION=0:0:0 +COREDUMP_SO_VERSION=0:0:0 +# +# Don't link with start-files since we don't use any constructors/destructors: +# +COMMON_SO_LDFLAGS = $(LDFLAGS_NOSTARTFILES) + +lib_LIBRARIES = +lib_LTLIBRARIES = +if !REMOTE_ONLY +lib_LTLIBRARIES += libunwind.la +if BUILD_PTRACE +lib_LTLIBRARIES += libunwind-ptrace.la +endif +if BUILD_COREDUMP +lib_LTLIBRARIES += libunwind-coredump.la +endif +endif + +noinst_HEADERS = +noinst_LTLIBRARIES = + +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = libunwind-generic.pc + +if !REMOTE_ONLY +pkgconfig_DATA += unwind/libunwind.pc +endif + +if BUILD_PTRACE +pkgconfig_DATA += ptrace/libunwind-ptrace.pc +endif + +if BUILD_SETJMP +pkgconfig_DATA += setjmp/libunwind-setjmp.pc +endif + +if BUILD_COREDUMP +pkgconfig_DATA += coredump/libunwind-coredump.pc +endif + +### libunwind-ptrace: +libunwind_ptrace_la_SOURCES = \ + ptrace/_UPT_elf.c \ + ptrace/_UPT_accessors.c ptrace/_UPT_access_fpreg.c \ + ptrace/_UPT_access_mem.c ptrace/_UPT_access_reg.c \ + ptrace/_UPT_create.c ptrace/_UPT_destroy.c \ + ptrace/_UPT_find_proc_info.c ptrace/_UPT_get_dyn_info_list_addr.c \ + ptrace/_UPT_put_unwind_info.c ptrace/_UPT_get_proc_name.c \ + ptrace/_UPT_reg_offset.c ptrace/_UPT_resume.c +noinst_HEADERS += ptrace/_UPT_internal.h + +### libunwind-coredump: +libunwind_coredump_la_SOURCES = \ + coredump/_UCD_accessors.c \ + coredump/_UCD_create.c \ + coredump/_UCD_destroy.c \ + coredump/_UCD_access_mem.c \ + coredump/_UCD_elf_map_image.c \ + coredump/_UCD_find_proc_info.c \ + coredump/_UCD_get_proc_name.c \ + \ + coredump/_UPT_elf.c \ + coredump/_UPT_access_fpreg.c \ + coredump/_UPT_get_dyn_info_list_addr.c \ + coredump/_UPT_put_unwind_info.c \ + coredump/_UPT_resume.c +libunwind_coredump_la_LDFLAGS = $(COMMON_SO_LDFLAGS) \ + -version-info $(COREDUMP_SO_VERSION) +libunwind_coredump_la_LIBADD = $(LIBLZMA) +noinst_HEADERS += coredump/_UCD_internal.h coredump/_UCD_lib.h + +### libunwind-setjmp: +libunwind_setjmp_la_LDFLAGS = $(COMMON_SO_LDFLAGS) \ + -version-info $(SETJMP_SO_VERSION) + +if USE_ELF32 +LIBUNWIND_ELF = libunwind-elf32.la +endif +if USE_ELF64 +LIBUNWIND_ELF = libunwind-elf64.la +endif +if USE_ELFXX +LIBUNWIND_ELF = libunwind-elfxx.la +endif + +libunwind_setjmp_la_LIBADD = $(LIBUNWIND_ELF) \ + libunwind-$(arch).la \ + libunwind.la -lc +libunwind_setjmp_la_SOURCES = setjmp/longjmp.c \ + setjmp/siglongjmp.c +noinst_HEADERS += setjmp/setjmp_i.h + +### libunwind: +libunwind_la_LIBADD = + +# List of arch-independent files needed by both local-only and generic +# libraries: +libunwind_la_SOURCES_common = \ + $(libunwind_la_SOURCES_os) \ + mi/init.c mi/flush_cache.c mi/mempool.c mi/strerror.c + +# List of arch-independent files needed by generic library (libunwind-$ARCH): +libunwind_la_SOURCES_generic = \ + mi/Gdyn-extract.c mi/Gdyn-remote.c mi/Gfind_dynamic_proc_info.c \ + mi/Gget_accessors.c \ + mi/Gget_proc_info_by_ip.c mi/Gget_proc_name.c \ + mi/Gput_dynamic_unwind_info.c mi/Gdestroy_addr_space.c \ + mi/Gget_reg.c mi/Gset_reg.c \ + mi/Gget_fpreg.c mi/Gset_fpreg.c \ + mi/Gset_caching_policy.c \ + mi/Gset_cache_size.c + +if SUPPORT_CXX_EXCEPTIONS +libunwind_la_SOURCES_local_unwind = \ + unwind/Backtrace.c unwind/DeleteException.c \ + unwind/FindEnclosingFunction.c unwind/ForcedUnwind.c \ + unwind/GetBSP.c unwind/GetCFA.c unwind/GetDataRelBase.c \ + unwind/GetGR.c unwind/GetIP.c unwind/GetLanguageSpecificData.c \ + unwind/GetRegionStart.c unwind/GetTextRelBase.c \ + unwind/RaiseException.c unwind/Resume.c \ + unwind/Resume_or_Rethrow.c unwind/SetGR.c unwind/SetIP.c \ + unwind/GetIPInfo.c + +# _ReadULEB()/_ReadSLEB() are needed for Intel C++ 8.0 compatibility +libunwind_la_SOURCES_os_linux_local = mi/_ReadULEB.c mi/_ReadSLEB.c +endif + +# List of arch-independent files needed by local-only library (libunwind): +libunwind_la_SOURCES_local_nounwind = \ + $(libunwind_la_SOURCES_os_local) \ + mi/backtrace.c \ + mi/dyn-cancel.c mi/dyn-info-list.c mi/dyn-register.c \ + mi/Ldyn-extract.c mi/Lfind_dynamic_proc_info.c \ + mi/Lget_accessors.c \ + mi/Lget_proc_info_by_ip.c mi/Lget_proc_name.c \ + mi/Lput_dynamic_unwind_info.c mi/Ldestroy_addr_space.c \ + mi/Lget_reg.c mi/Lset_reg.c \ + mi/Lget_fpreg.c mi/Lset_fpreg.c \ + mi/Lset_caching_policy.c \ + mi/Lset_cache_size.c + +libunwind_la_SOURCES_local = \ + $(libunwind_la_SOURCES_local_nounwind) \ + $(libunwind_la_SOURCES_local_unwind) + +noinst_HEADERS += os-linux.h +libunwind_la_SOURCES_os_linux = os-linux.c + +libunwind_la_SOURCES_os_hpux = os-hpux.c + +libunwind_la_SOURCES_os_freebsd = os-freebsd.c + +libunwind_la_SOURCES_os_qnx = os-qnx.c + +libunwind_dwarf_common_la_SOURCES = dwarf/global.c + +libunwind_dwarf_local_la_SOURCES = \ + dwarf/Lexpr.c dwarf/Lfde.c dwarf/Lparser.c dwarf/Lpe.c \ + dwarf/Lfind_proc_info-lsb.c \ + dwarf/Lfind_unwind_table.c +libunwind_dwarf_local_la_LIBADD = libunwind-dwarf-common.la + +libunwind_dwarf_generic_la_SOURCES = \ + dwarf/Gexpr.c dwarf/Gfde.c dwarf/Gparser.c dwarf/Gpe.c \ + dwarf/Gfind_proc_info-lsb.c \ + dwarf/Gfind_unwind_table.c +libunwind_dwarf_generic_la_LIBADD = libunwind-dwarf-common.la + +if USE_DWARF + noinst_LTLIBRARIES += libunwind-dwarf-common.la libunwind-dwarf-generic.la +if !REMOTE_ONLY + noinst_LTLIBRARIES += libunwind-dwarf-local.la +endif + libunwind_la_LIBADD += libunwind-dwarf-local.la +endif + +noinst_HEADERS += elf32.h elf64.h elfxx.h + +libunwind_elf32_la_SOURCES = elf32.c +libunwind_elf64_la_SOURCES = elf64.c +libunwind_elfxx_la_SOURCES = elfxx.c +libunwind_elf32_la_LIBADD = $(LIBLZMA) +libunwind_elf64_la_LIBADD = $(LIBLZMA) +libunwind_elfxx_la_LIBADD = $(LIBLZMA) + +noinst_LTLIBRARIES += $(LIBUNWIND_ELF) +libunwind_la_LIBADD += $(LIBUNWIND_ELF) + +# The list of files that go into libunwind and libunwind-aarch64: +noinst_HEADERS += aarch64/init.h aarch64/offsets.h aarch64/unwind_i.h +libunwind_la_SOURCES_aarch64_common = $(libunwind_la_SOURCES_common) \ + aarch64/is_fpreg.c aarch64/regname.c + +# The list of files that go into libunwind: +libunwind_la_SOURCES_aarch64 = $(libunwind_la_SOURCES_aarch64_common) \ + $(libunwind_la_SOURCES_local) \ + aarch64/Lapply_reg_state.c aarch64/Lreg_states_iterate.c \ + aarch64/Lcreate_addr_space.c aarch64/Lget_proc_info.c \ + aarch64/Lget_save_loc.c aarch64/Lglobal.c aarch64/Linit.c \ + aarch64/Linit_local.c aarch64/Linit_remote.c \ + aarch64/Lis_signal_frame.c aarch64/Lregs.c aarch64/Lresume.c \ + aarch64/Lstash_frame.c aarch64/Lstep.c aarch64/Ltrace.c \ + aarch64/getcontext.S + +libunwind_aarch64_la_SOURCES_aarch64 = $(libunwind_la_SOURCES_aarch64_common) \ + $(libunwind_la_SOURCES_generic) \ + aarch64/Gapply_reg_state.c aarch64/Greg_states_iterate.c \ + aarch64/Gcreate_addr_space.c aarch64/Gget_proc_info.c \ + aarch64/Gget_save_loc.c aarch64/Gglobal.c aarch64/Ginit.c \ + aarch64/Ginit_local.c aarch64/Ginit_remote.c \ + aarch64/Gis_signal_frame.c aarch64/Gregs.c aarch64/Gresume.c \ + aarch64/Gstash_frame.c aarch64/Gstep.c aarch64/Gtrace.c + +# The list of files that go into libunwind and libunwind-arm: +noinst_HEADERS += arm/init.h arm/offsets.h arm/unwind_i.h +libunwind_la_SOURCES_arm_common = $(libunwind_la_SOURCES_common) \ + arm/is_fpreg.c arm/regname.c + +# The list of files that go into libunwind: +libunwind_la_SOURCES_arm = $(libunwind_la_SOURCES_arm_common) \ + $(libunwind_la_SOURCES_arm_os_local) \ + $(libunwind_la_SOURCES_local) \ + arm/getcontext.S \ + arm/Lapply_reg_state.c arm/Lreg_states_iterate.c \ + arm/Lcreate_addr_space.c arm/Lget_proc_info.c arm/Lget_save_loc.c \ + arm/Lglobal.c arm/Linit.c arm/Linit_local.c arm/Linit_remote.c \ + arm/Lregs.c arm/Lresume.c arm/Lstep.c \ + arm/Lex_tables.c arm/Lstash_frame.c arm/Ltrace.c + +# The list of files that go into libunwind-arm: +libunwind_arm_la_SOURCES_arm = $(libunwind_la_SOURCES_arm_common) \ + $(libunwind_la_SOURCES_arm_os) \ + $(libunwind_la_SOURCES_generic) \ + arm/Gapply_reg_state.c arm/Greg_states_iterate.c \ + arm/Gcreate_addr_space.c arm/Gget_proc_info.c arm/Gget_save_loc.c \ + arm/Gglobal.c arm/Ginit.c arm/Ginit_local.c arm/Ginit_remote.c \ + arm/Gregs.c arm/Gresume.c arm/Gstep.c \ + arm/Gex_tables.c arm/Gstash_frame.c arm/Gtrace.c + +# The list of files that go both into libunwind and libunwind-ia64: +noinst_HEADERS += ia64/init.h ia64/offsets.h ia64/regs.h \ + ia64/ucontext_i.h ia64/unwind_decoder.h ia64/unwind_i.h +libunwind_la_SOURCES_ia64_common = $(libunwind_la_SOURCES_common) \ + ia64/regname.c + +# The list of files that go into libunwind: +libunwind_la_SOURCES_ia64 = $(libunwind_la_SOURCES_ia64_common) \ + $(libunwind_la_SOURCES_local) \ + \ + ia64/dyn_info_list.S ia64/getcontext.S \ + \ + ia64/Lapply_reg_state.c ia64/Lreg_states_iterate.c \ + ia64/Lcreate_addr_space.c ia64/Lget_proc_info.c ia64/Lget_save_loc.c \ + ia64/Lglobal.c ia64/Linit.c ia64/Linit_local.c ia64/Linit_remote.c \ + ia64/Linstall_cursor.S ia64/Lis_signal_frame.c ia64/Lparser.c \ + ia64/Lrbs.c ia64/Lregs.c ia64/Lresume.c ia64/Lscript.c ia64/Lstep.c \ + ia64/Ltables.c ia64/Lfind_unwind_table.c + +# The list of files that go into libunwind-ia64: +libunwind_ia64_la_SOURCES_ia64 = $(libunwind_la_SOURCES_ia64_common) \ + $(libunwind_la_SOURCES_generic) \ + ia64/Gapply_reg_state.c ia64/Greg_states_iterate.c \ + ia64/Gcreate_addr_space.c ia64/Gget_proc_info.c ia64/Gget_save_loc.c \ + ia64/Gglobal.c ia64/Ginit.c ia64/Ginit_local.c ia64/Ginit_remote.c \ + ia64/Ginstall_cursor.S ia64/Gis_signal_frame.c ia64/Gparser.c \ + ia64/Grbs.c ia64/Gregs.c ia64/Gresume.c ia64/Gscript.c ia64/Gstep.c \ + ia64/Gtables.c ia64/Gfind_unwind_table.c + +# The list of files that go both into libunwind and libunwind-hppa: +noinst_HEADERS += hppa/init.h hppa/offsets.h hppa/unwind_i.h +libunwind_la_SOURCES_hppa_common = $(libunwind_la_SOURCES_common) \ + hppa/regname.c + +# The list of files that go into libunwind: +libunwind_la_SOURCES_hppa = $(libunwind_la_SOURCES_hppa_common) \ + $(libunwind_la_SOURCES_local) \ + hppa/getcontext.S hppa/setcontext.S \ + hppa/Lapply_reg_state.c hppa/Lreg_states_iterate.c \ + hppa/Lcreate_addr_space.c hppa/Lget_save_loc.c hppa/Lglobal.c \ + hppa/Linit.c hppa/Linit_local.c hppa/Linit_remote.c \ + hppa/Lis_signal_frame.c hppa/Lget_proc_info.c hppa/Lregs.c \ + hppa/Lresume.c hppa/Lstep.c + +# The list of files that go into libunwind-hppa: +libunwind_hppa_la_SOURCES_hppa = $(libunwind_la_SOURCES_hppa_common) \ + $(libunwind_la_SOURCES_generic) \ + hppa/Gapply_reg_state.c hppa/Greg_states_iterate.c \ + hppa/Gcreate_addr_space.c hppa/Gget_save_loc.c hppa/Gglobal.c \ + hppa/Ginit.c hppa/Ginit_local.c hppa/Ginit_remote.c \ + hppa/Gis_signal_frame.c hppa/Gget_proc_info.c hppa/Gregs.c \ + hppa/Gresume.c hppa/Gstep.c + +# The list of files that go info libunwind and libunwind-mips: +noinst_HEADERS += mips/init.h mips/offsets.h mips/unwind_i.h +libunwind_la_SOURCES_mips_common = $(libunwind_la_SOURCES_common) \ + mips/is_fpreg.c mips/regname.c + +# The list of files that go into libunwind: +libunwind_la_SOURCES_mips = $(libunwind_la_SOURCES_mips_common) \ + $(libunwind_la_SOURCES_local) \ + mips/getcontext.S \ + mips/Lapply_reg_state.c mips/Lreg_states_iterate.c \ + mips/Lcreate_addr_space.c mips/Lget_proc_info.c mips/Lget_save_loc.c \ + mips/Lglobal.c mips/Linit.c mips/Linit_local.c mips/Linit_remote.c \ + mips/Lis_signal_frame.c mips/Lregs.c mips/Lresume.c mips/Lstep.c + +libunwind_mips_la_SOURCES_mips = $(libunwind_la_SOURCES_mips_common) \ + $(libunwind_la_SOURCES_generic) \ + mips/Gapply_reg_state.c mips/Greg_states_iterate.c \ + mips/Gcreate_addr_space.c mips/Gget_proc_info.c mips/Gget_save_loc.c \ + mips/Gglobal.c mips/Ginit.c mips/Ginit_local.c mips/Ginit_remote.c \ + mips/Gis_signal_frame.c mips/Gregs.c mips/Gresume.c mips/Gstep.c + +# The list of files that go info libunwind and libunwind-tilegx: +noinst_HEADERS += tilegx/init.h tilegx/offsets.h tilegx/unwind_i.h +libunwind_la_SOURCES_tilegx_common = $(libunwind_la_SOURCES_common) \ + tilegx/is_fpreg.c tilegx/regname.c + +# The list of files that go into libunwind: +libunwind_la_SOURCES_tilegx = $(libunwind_la_SOURCES_tilegx_common) \ + $(libunwind_la_SOURCES_local) \ + tilegx/getcontext.S \ + tilegx/Lapply_reg_state.c tilegx/Lreg_states_iterate.c \ + tilegx/Lcreate_addr_space.c tilegx/Lget_proc_info.c tilegx/Lget_save_loc.c \ + tilegx/Lglobal.c tilegx/Linit.c tilegx/Linit_local.c tilegx/Linit_remote.c \ + tilegx/Lis_signal_frame.c tilegx/Lregs.c tilegx/Lresume.c tilegx/Lstep.c + +libunwind_tilegx_la_SOURCES_tilegx = $(libunwind_la_SOURCES_tilegx_common) \ + $(libunwind_la_SOURCES_generic) \ + tilegx/Gapply_reg_state.c tilegx/Greg_states_iterate.c \ + tilegx/Gcreate_addr_space.c tilegx/Gget_proc_info.c tilegx/Gget_save_loc.c \ + tilegx/Gglobal.c tilegx/Ginit.c tilegx/Ginit_local.c tilegx/Ginit_remote.c \ + tilegx/Gis_signal_frame.c tilegx/Gregs.c tilegx/Gresume.c tilegx/Gstep.c + + +# The list of files that go both into libunwind and libunwind-x86: +noinst_HEADERS += x86/init.h x86/offsets.h x86/unwind_i.h +libunwind_la_SOURCES_x86_common = $(libunwind_la_SOURCES_common) \ + x86/is_fpreg.c x86/regname.c + +# The list of files that go into libunwind: +libunwind_la_SOURCES_x86 = $(libunwind_la_SOURCES_x86_common) \ + $(libunwind_la_SOURCES_x86_os_local) \ + $(libunwind_la_SOURCES_local) \ + x86/Lapply_reg_state.c x86/Lreg_states_iterate.c \ + x86/Lcreate_addr_space.c x86/Lget_save_loc.c x86/Lglobal.c \ + x86/Linit.c x86/Linit_local.c x86/Linit_remote.c \ + x86/Lget_proc_info.c x86/Lregs.c \ + x86/Lresume.c x86/Lstep.c + +# The list of files that go into libunwind-x86: +libunwind_x86_la_SOURCES_x86 = $(libunwind_la_SOURCES_x86_common) \ + $(libunwind_la_SOURCES_x86_os) \ + $(libunwind_la_SOURCES_generic) \ + x86/Gapply_reg_state.c x86/Greg_states_iterate.c \ + x86/Gcreate_addr_space.c x86/Gget_save_loc.c x86/Gglobal.c \ + x86/Ginit.c x86/Ginit_local.c x86/Ginit_remote.c \ + x86/Gget_proc_info.c x86/Gregs.c \ + x86/Gresume.c x86/Gstep.c + +# The list of files that go both into libunwind and libunwind-x86_64: +noinst_HEADERS += x86_64/offsets.h \ + x86_64/init.h x86_64/unwind_i.h x86_64/ucontext_i.h +libunwind_la_SOURCES_x86_64_common = $(libunwind_la_SOURCES_common) \ + x86_64/is_fpreg.c x86_64/regname.c + +# The list of files that go into libunwind: +libunwind_la_SOURCES_x86_64 = $(libunwind_la_SOURCES_x86_64_common) \ + $(libunwind_la_SOURCES_x86_64_os_local) \ + $(libunwind_la_SOURCES_local) \ + x86_64/setcontext.S \ + x86_64/Lapply_reg_state.c x86_64/Lreg_states_iterate.c \ + x86_64/Lcreate_addr_space.c x86_64/Lget_save_loc.c x86_64/Lglobal.c \ + x86_64/Linit.c x86_64/Linit_local.c x86_64/Linit_remote.c \ + x86_64/Lget_proc_info.c x86_64/Lregs.c x86_64/Lresume.c \ + x86_64/Lstash_frame.c x86_64/Lstep.c x86_64/Ltrace.c x86_64/getcontext.S + +# The list of files that go into libunwind-x86_64: +libunwind_x86_64_la_SOURCES_x86_64 = $(libunwind_la_SOURCES_x86_64_common) \ + $(libunwind_la_SOURCES_x86_64_os) \ + $(libunwind_la_SOURCES_generic) \ + x86_64/Gapply_reg_state.c x86_64/Greg_states_iterate.c \ + x86_64/Gcreate_addr_space.c x86_64/Gget_save_loc.c x86_64/Gglobal.c \ + x86_64/Ginit.c x86_64/Ginit_local.c x86_64/Ginit_remote.c \ + x86_64/Gget_proc_info.c x86_64/Gregs.c x86_64/Gresume.c \ + x86_64/Gstash_frame.c x86_64/Gstep.c x86_64/Gtrace.c + +# The list of local files that go to Power 64 and 32: +libunwind_la_SOURCES_ppc = \ + ppc/Lget_proc_info.c ppc/Lget_save_loc.c ppc/Linit_local.c \ + ppc/Linit_remote.c ppc/Lis_signal_frame.c + +# The list of generic files that go to Power 64 and 32: +libunwind_ppc_la_SOURCES_ppc_generic = \ + ppc/Gget_proc_info.c ppc/Gget_save_loc.c ppc/Ginit_local.c \ + ppc/Ginit_remote.c ppc/Gis_signal_frame.c + +# The list of files that go both into libunwind and libunwind-ppc32: +noinst_HEADERS += ppc32/init.h ppc32/unwind_i.h ppc32/ucontext_i.h +libunwind_la_SOURCES_ppc32_common = $(libunwind_la_SOURCES_common) \ + ppc32/is_fpreg.c ppc32/regname.c ppc32/get_func_addr.c + +# The list of files that go into libunwind: +libunwind_la_SOURCES_ppc32 = $(libunwind_la_SOURCES_ppc32_common) \ + $(libunwind_la_SOURCES_local) \ + $(libunwind_la_SOURCES_ppc) \ + ppc32/Lapply_reg_state.c ppc32/Lreg_states_iterate.c \ + ppc32/Lcreate_addr_space.c \ + ppc32/Lglobal.c ppc32/Linit.c \ + ppc32/Lregs.c ppc32/Lresume.c ppc32/Lstep.c + +# The list of files that go into libunwind-ppc32: +libunwind_ppc32_la_SOURCES_ppc32 = $(libunwind_la_SOURCES_ppc32_common) \ + $(libunwind_la_SOURCES_generic) \ + $(libunwind_ppc_la_SOURCES_ppc_generic) \ + ppc32/Gapply_reg_state.c ppc32/Greg_states_iterate.c \ + ppc32/Gcreate_addr_space.c \ + ppc32/Gglobal.c ppc32/Ginit.c \ + ppc32/Gregs.c ppc32/Gresume.c ppc32/Gstep.c + +# The list of files that go both into libunwind and libunwind-ppc64: +noinst_HEADERS += ppc64/init.h ppc64/unwind_i.h ppc64/ucontext_i.h +libunwind_la_SOURCES_ppc64_common = $(libunwind_la_SOURCES_common) \ + ppc64/is_fpreg.c ppc64/regname.c ppc64/get_func_addr.c + +# The list of files that go into libunwind: +libunwind_la_SOURCES_ppc64 = $(libunwind_la_SOURCES_ppc64_common) \ + $(libunwind_la_SOURCES_local) \ + $(libunwind_la_SOURCES_ppc) \ + ppc64/Lapply_reg_state.c ppc64/Lreg_states_iterate.c \ + ppc64/Lcreate_addr_space.c \ + ppc64/Lglobal.c ppc64/Linit.c \ + ppc64/Lregs.c ppc64/Lresume.c ppc64/Lstep.c + +# The list of files that go into libunwind-ppc64: +libunwind_ppc64_la_SOURCES_ppc64 = $(libunwind_la_SOURCES_ppc64_common) \ + $(libunwind_la_SOURCES_generic) \ + $(libunwind_ppc_la_SOURCES_ppc_generic) \ + ppc64/Gapply_reg_state.c ppc64/Greg_states_iterate.c \ + ppc64/Gcreate_addr_space.c \ + ppc64/Gglobal.c ppc64/Ginit.c \ + ppc64/Gregs.c ppc64/Gresume.c ppc64/Gstep.c + +# The list of files that go into libunwind and libunwind-sh: +noinst_HEADERS += sh/init.h sh/offsets.h sh/unwind_i.h +libunwind_la_SOURCES_sh_common = $(libunwind_la_SOURCES_common) \ + sh/is_fpreg.c sh/regname.c + +# The list of files that go into libunwind: +libunwind_la_SOURCES_sh = $(libunwind_la_SOURCES_sh_common) \ + $(libunwind_la_SOURCES_local) \ + sh/Lapply_reg_state.c sh/Lreg_states_iterate.c \ + sh/Lcreate_addr_space.c sh/Lget_proc_info.c sh/Lget_save_loc.c \ + sh/Lglobal.c sh/Linit.c sh/Linit_local.c sh/Linit_remote.c \ + sh/Lis_signal_frame.c sh/Lregs.c sh/Lresume.c sh/Lstep.c + +libunwind_sh_la_SOURCES_sh = $(libunwind_la_SOURCES_sh_common) \ + $(libunwind_la_SOURCES_generic) \ + sh/Gapply_reg_state.c sh/Greg_states_iterate.c \ + sh/Gcreate_addr_space.c sh/Gget_proc_info.c sh/Gget_save_loc.c \ + sh/Gglobal.c sh/Ginit.c sh/Ginit_local.c sh/Ginit_remote.c \ + sh/Gis_signal_frame.c sh/Gregs.c sh/Gresume.c sh/Gstep.c + +if REMOTE_ONLY +install-exec-hook: +# Nothing to do here.... +else +# +# This is not ideal, but I know of no other way to install an +# alias for a library. For the shared version, we have to do +# a file check before creating the link, because it isn't going +# to be there if the user configured with --disable-shared. +# +install-exec-hook: + if test -f $(DESTDIR)$(libdir)/libunwind-$(arch).a; then \ + cd $(DESTDIR)$(libdir) && $(LN_S) -f libunwind-$(arch).a libunwind-generic.a; \ + fi + if test -f $(DESTDIR)$(libdir)/libunwind-$(arch).so; then \ + cd $(DESTDIR)$(libdir) && $(LN_S) -f libunwind-$(arch).so \ + libunwind-generic.so; \ + fi +endif + +if OS_LINUX + libunwind_la_SOURCES_os = $(libunwind_la_SOURCES_os_linux) + libunwind_la_SOURCES_os_local = $(libunwind_la_SOURCES_os_linux_local) + libunwind_la_SOURCES_x86_os = x86/Gos-linux.c + libunwind_x86_la_SOURCES_os = x86/getcontext-linux.S + libunwind_la_SOURCES_x86_os_local = x86/Los-linux.c + libunwind_la_SOURCES_x86_64_os = x86_64/Gos-linux.c + libunwind_la_SOURCES_x86_64_os_local = x86_64/Los-linux.c + libunwind_la_SOURCES_arm_os = arm/Gos-linux.c + libunwind_coredump_la_SOURCES += coredump/_UCD_access_reg_linux.c +endif + +if OS_HPUX + libunwind_la_SOURCES_os = $(libunwind_la_SOURCES_os_hpux) + libunwind_la_SOURCES_os_local = $(libunwind_la_SOURCES_os_hpux_local) +endif + +if OS_FREEBSD + libunwind_la_SOURCES_os = $(libunwind_la_SOURCES_os_freebsd) + libunwind_la_SOURCES_os_local = $(libunwind_la_SOURCES_os_freebsd_local) + libunwind_la_SOURCES_x86_os = x86/Gos-freebsd.c + libunwind_x86_la_SOURCES_os = x86/getcontext-freebsd.S + libunwind_la_SOURCES_x86_os_local = x86/Los-freebsd.c + libunwind_la_SOURCES_x86_64_os = x86_64/Gos-freebsd.c + libunwind_la_SOURCES_x86_64_os_local = x86_64/Los-freebsd.c + libunwind_la_SOURCES_arm_os = arm/Gos-freebsd.c + libunwind_la_SOURCES_arm_os_local = arm/Los-freebsd.c + libunwind_coredump_la_SOURCES += coredump/_UCD_access_reg_freebsd.c +endif + +if OS_QNX + libunwind_la_SOURCES_os = $(libunwind_la_SOURCES_os_qnx) + libunwind_la_SOURCES_os_local = $(libunwind_la_SOURCES_os_qnx_local) + libunwind_la_SOURCES_arm_os = arm/Gos-other.c + libunwind_la_SOURCES_arm_os_local = arm/Los-other.c +endif + +if ARCH_AARCH64 + lib_LTLIBRARIES += libunwind-aarch64.la + libunwind_la_SOURCES = $(libunwind_la_SOURCES_aarch64) + libunwind_aarch64_la_SOURCES = $(libunwind_aarch64_la_SOURCES_aarch64) + libunwind_aarch64_la_LDFLAGS = $(COMMON_SO_LDFLAGS) -version-info $(SOVERSION) + libunwind_aarch64_la_LIBADD = libunwind-dwarf-generic.la + libunwind_aarch64_la_LIBADD += libunwind-elf64.la +if !REMOTE_ONLY + libunwind_aarch64_la_LIBADD += libunwind.la -lc +endif + libunwind_setjmp_la_SOURCES += aarch64/siglongjmp.S +else +if ARCH_ARM + lib_LTLIBRARIES += libunwind-arm.la + libunwind_la_SOURCES = $(libunwind_la_SOURCES_arm) + libunwind_arm_la_SOURCES = $(libunwind_arm_la_SOURCES_arm) + libunwind_arm_la_LDFLAGS = $(COMMON_SO_LDFLAGS) -version-info $(SOVERSION) + libunwind_arm_la_LIBADD = libunwind-dwarf-generic.la + libunwind_arm_la_LIBADD += libunwind-elf32.la +if !REMOTE_ONLY + libunwind_arm_la_LIBADD += libunwind.la -lc +endif + libunwind_setjmp_la_SOURCES += arm/siglongjmp.S +else +if ARCH_IA64 + BUILT_SOURCES = Gcursor_i.h Lcursor_i.h +mk_Gcursor_i.s: $(srcdir)/ia64/mk_Gcursor_i.c + $(COMPILE) -S "$(srcdir)/ia64/mk_Gcursor_i.c" -o mk_Gcursor_i.s +mk_Lcursor_i.s: $(srcdir)/ia64/mk_Lcursor_i.c + $(COMPILE) -S "$(srcdir)/ia64/mk_Lcursor_i.c" -o mk_Lcursor_i.s +Gcursor_i.h: mk_Gcursor_i.s + "$(srcdir)/ia64/mk_cursor_i" mk_Gcursor_i.s > Gcursor_i.h +Lcursor_i.h: mk_Lcursor_i.s + "$(srcdir)/ia64/mk_cursor_i" mk_Lcursor_i.s > Lcursor_i.h + + lib_LTLIBRARIES += libunwind-ia64.la + libunwind_la_SOURCES = $(libunwind_la_SOURCES_ia64) + libunwind_ia64_la_SOURCES = $(libunwind_ia64_la_SOURCES_ia64) + libunwind_ia64_la_LDFLAGS = $(COMMON_SO_LDFLAGS) -version-info $(SOVERSION) + libunwind_ia64_la_LIBADD = libunwind-elf64.la +if !REMOTE_ONLY + libunwind_ia64_la_LIBADD += libunwind.la -lc +endif + libunwind_setjmp_la_SOURCES += ia64/setjmp.S ia64/sigsetjmp.S \ + ia64/longjmp.S ia64/siglongjmp.S +else +if ARCH_HPPA + lib_LTLIBRARIES += libunwind-hppa.la + libunwind_la_SOURCES = $(libunwind_la_SOURCES_hppa) + libunwind_hppa_la_SOURCES = $(libunwind_hppa_la_SOURCES_hppa) + libunwind_hppa_la_LDFLAGS = $(COMMON_SO_LDFLAGS) -version-info $(SOVERSION) + libunwind_hppa_la_LIBADD = libunwind-dwarf-generic.la + libunwind_hppa_la_LIBADD += libunwind-elf32.la +if !REMOTE_ONLY + libunwind_hppa_la_LIBADD += libunwind.la -lc +endif + libunwind_setjmp_la_SOURCES += hppa/siglongjmp.S +else +if ARCH_MIPS + lib_LTLIBRARIES += libunwind-mips.la + libunwind_la_SOURCES = $(libunwind_la_SOURCES_mips) + libunwind_mips_la_SOURCES = $(libunwind_mips_la_SOURCES_mips) + libunwind_mips_la_LDFLAGS = $(COMMON_SO_LDFLAGS) -version-info $(SOVERSION) + libunwind_mips_la_LIBADD = libunwind-dwarf-generic.la + libunwind_mips_la_LIBADD += libunwind-elfxx.la +if !REMOTE_ONLY + libunwind_mips_la_LIBADD += libunwind.la -lc +endif + libunwind_setjmp_la_SOURCES += mips/siglongjmp.S +else +if ARCH_TILEGX + lib_LTLIBRARIES += libunwind-tilegx.la + libunwind_la_SOURCES = $(libunwind_la_SOURCES_tilegx) + libunwind_tilegx_la_SOURCES = $(libunwind_tilegx_la_SOURCES_tilegx) + libunwind_tilegx_la_LDFLAGS = $(COMMON_SO_LDFLAGS) -version-info $(SOVERSION) + libunwind_tilegx_la_LIBADD = libunwind-dwarf-generic.la + libunwind_tilegx_la_LIBADD += libunwind-elfxx.la +if !REMOTE_ONLY + libunwind_tilegx_la_LIBADD += libunwind.la -lc +endif + libunwind_setjmp_la_SOURCES += tilegx/siglongjmp.S +else +if ARCH_X86 + lib_LTLIBRARIES += libunwind-x86.la + libunwind_la_SOURCES = $(libunwind_la_SOURCES_x86) $(libunwind_x86_la_SOURCES_os) + libunwind_x86_la_SOURCES = $(libunwind_x86_la_SOURCES_x86) + libunwind_x86_la_LDFLAGS = $(COMMON_SO_LDFLAGS) -version-info $(SOVERSION) + libunwind_x86_la_LIBADD = libunwind-dwarf-generic.la + libunwind_x86_la_LIBADD += libunwind-elf32.la +if !REMOTE_ONLY + libunwind_x86_la_LIBADD += libunwind.la -lc +endif + libunwind_setjmp_la_SOURCES += x86/longjmp.S x86/siglongjmp.S +else +if ARCH_X86_64 + lib_LTLIBRARIES += libunwind-x86_64.la + libunwind_la_SOURCES = $(libunwind_la_SOURCES_x86_64) + libunwind_x86_64_la_SOURCES = $(libunwind_x86_64_la_SOURCES_x86_64) + libunwind_x86_64_la_LDFLAGS = $(COMMON_SO_LDFLAGS) -version-info $(SOVERSION) + libunwind_x86_64_la_LIBADD = libunwind-dwarf-generic.la + libunwind_x86_64_la_LIBADD += libunwind-elf64.la +if !REMOTE_ONLY + libunwind_x86_64_la_LIBADD += libunwind.la -lc +endif + libunwind_setjmp_la_SOURCES += x86_64/longjmp.S x86_64/siglongjmp.S +else +if ARCH_PPC32 + lib_LTLIBRARIES += libunwind-ppc32.la + libunwind_la_SOURCES = $(libunwind_la_SOURCES_ppc32) + libunwind_ppc32_la_SOURCES = $(libunwind_ppc32_la_SOURCES_ppc32) + libunwind_ppc32_la_LDFLAGS = $(COMMON_SO_LDFLAGS) -version-info $(SOVERSION) + libunwind_ppc32_la_LIBADD = libunwind-dwarf-generic.la + libunwind_ppc32_la_LIBADD += libunwind-elf32.la +if !REMOTE_ONLY + libunwind_ppc32_la_LIBADD += libunwind.la -lc +endif + libunwind_setjmp_la_SOURCES += ppc/longjmp.S ppc/siglongjmp.S +else +if ARCH_PPC64 + lib_LTLIBRARIES += libunwind-ppc64.la + libunwind_la_SOURCES = $(libunwind_la_SOURCES_ppc64) + libunwind_ppc64_la_SOURCES = $(libunwind_ppc64_la_SOURCES_ppc64) + libunwind_ppc64_la_LDFLAGS = $(COMMON_SO_LDFLAGS) -version-info $(SOVERSION) + libunwind_ppc64_la_LIBADD = libunwind-dwarf-generic.la + libunwind_ppc64_la_LIBADD += libunwind-elf64.la +if !REMOTE_ONLY + libunwind_ppc64_la_LIBADD += libunwind.la -lc +endif + libunwind_setjmp_la_SOURCES += ppc/longjmp.S ppc/siglongjmp.S +else +if ARCH_SH + lib_LTLIBRARIES += libunwind-sh.la + libunwind_la_SOURCES = $(libunwind_la_SOURCES_sh) + libunwind_sh_la_SOURCES = $(libunwind_sh_la_SOURCES_sh) + libunwind_sh_la_LDFLAGS = $(COMMON_SO_LDFLAGS) -version-info $(SOVERSION) + libunwind_sh_la_LIBADD = libunwind-dwarf-generic.la + libunwind_sh_la_LIBADD += libunwind-elf32.la +if !REMOTE_ONLY + libunwind_sh_la_LIBADD += libunwind.la -lc +endif + libunwind_setjmp_la_SOURCES += sh/siglongjmp.S + +endif # ARCH_SH +endif # ARCH_PPC64 +endif # ARCH_PPC32 +endif # ARCH_X86_64 +endif # ARCH_X86 +endif # ARCH_TILEGX +endif # ARCH_MIPS +endif # ARCH_HPPA +endif # ARCH_IA64 +endif # ARCH_ARM +endif # ARCH_AARCH64 + +# libunwind-setjmp depends on libunwind-$(arch). Therefore must be added +# at the end. +if BUILD_SETJMP +lib_LTLIBRARIES += libunwind-setjmp.la +endif + +# +# Don't link with standard libraries, because those may mention +# libunwind already. +# +libunwind_la_LDFLAGS = $(COMMON_SO_LDFLAGS) -XCClinker -nostdlib \ + $(LDFLAGS_STATIC_LIBCXA) -version-info $(SOVERSION) +libunwind_la_LIBADD += -lc $(LIBCRTS) +libunwind_la_LIBADD += $(LIBLZMA) + +AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/include/tdep-$(arch) -I. +AM_CCASFLAGS = $(AM_CPPFLAGS) +noinst_HEADERS += unwind/unwind-internal.h + +EXTRA_DIST = $(libunwind_la_SOURCES_aarch64) \ + $(libunwind_la_SOURCES_arm) \ + $(libunwind_la_SOURCES_hppa) \ + $(libunwind_la_SOURCES_ia64) \ + $(libunwind_la_SOURCES_mips) \ + $(libunwind_la_SOURCES_sh) \ + $(libunwind_la_SOURCES_x86) \ + $(libunwind_la_SOURCES_os_freebsd) \ + $(libunwind_la_SOURCES_os_linux) \ + $(libunwind_la_SOURCES_os_hpux) \ + $(libunwind_la_SOURCES_os_qnx) \ + $(libunwind_la_SOURCES_common) \ + $(libunwind_la_SOURCES_local) \ + $(libunwind_la_SOURCES_generic) \ + $(libunwind_aarch64_la_SOURCES_aarch64) \ + $(libunwind_arm_la_SOURCES_arm) \ + $(libunwind_hppa_la_SOURCES_hppa) \ + $(libunwind_ia64_la_SOURCES_ia64) \ + $(libunwind_mips_la_SOURCES_mips) \ + $(libunwind_sh_la_SOURCES_sh) \ + $(libunwind_x86_la_SOURCES_x86) \ + $(libunwind_x86_64_la_SOURCES_x86_64) + +MAINTAINERCLEANFILES = Makefile.in + +# The -version-info flag accepts an argument of the form +# `current[:revision[:age]]'. So, passing `-version-info 3:12:1' sets +# current to 3, revision to 12, and age to 1. + +# If either revision or age are omitted, they default to 0. Also note +# that age must be less than or equal to the current interface number. + +# Here are a set of rules to help you update your library version +# information: + +# 1. Start with version information of `0:0:0' for each libtool +# library. + +# 2. Update the version information only immediately before a public +# release of your software. More frequent updates are unnecessary, +# and only guarantee that the current interface number gets larger +# faster. + +# 3. If the library source code has changed at all since the last +# update, then increment revision (`c:r:a' becomes `c:r+1:a'). + +# 4. If any interfaces have been added, removed, or changed since the +# last update, increment current, and set revision to 0. + +# 5. If any interfaces have been added since the last public release, +# then increment age. + +# 6. If any interfaces have been removed since the last public +# release, then set age to 0. diff --git a/contrib/libunwind/src/aarch64/Ginit.c b/contrib/libunwind/src/aarch64/Ginit.c index b9181ef061f..ab3999f307c 100644 --- a/contrib/libunwind/src/aarch64/Ginit.c +++ b/contrib/libunwind/src/aarch64/Ginit.c @@ -43,8 +43,10 @@ PROTECTED unw_addr_space_t unw_local_addr_space = &local_addr_space; static inline void * uc_addr (ucontext_t *uc, int reg) { - if (reg >= UNW_AARCH64_X0 && reg <= UNW_AARCH64_V31) + if (reg >= UNW_AARCH64_X0 && reg < UNW_AARCH64_V0) return &uc->uc_mcontext.regs[reg]; + else if (reg >= UNW_AARCH64_V0 && reg <= UNW_AARCH64_V31) + return &GET_FPCTX(uc)->vregs[reg - UNW_AARCH64_V0]; else return NULL; } @@ -172,7 +174,7 @@ HIDDEN void aarch64_local_addr_space_init (void) { memset (&local_addr_space, 0, sizeof (local_addr_space)); - local_addr_space.caching_policy = UNW_CACHE_GLOBAL; + local_addr_space.caching_policy = UNWI_DEFAULT_CACHING_POLICY; local_addr_space.acc.find_proc_info = dwarf_find_proc_info; local_addr_space.acc.put_unwind_info = put_unwind_info; local_addr_space.acc.get_dyn_info_list_addr = get_dyn_info_list_addr; diff --git a/contrib/libunwind/src/aarch64/Ginit_local.c b/contrib/libunwind/src/aarch64/Ginit_local.c index 45b1b30083e..d284224a369 100644 --- a/contrib/libunwind/src/aarch64/Ginit_local.c +++ b/contrib/libunwind/src/aarch64/Ginit_local.c @@ -59,9 +59,20 @@ unw_init_local (unw_cursor_t *cursor, unw_context_t *uc) } PROTECTED int -unw_init_local_signal (unw_cursor_t *cursor, unw_context_t *uc) +unw_init_local2 (unw_cursor_t *cursor, ucontext_t *uc, int flag) { - return unw_init_local_common(cursor, uc, 0); + if (!flag) + { + return unw_init_local_common(cursor, uc, 1); + } + else if (flag == UNW_INIT_SIGNAL_FRAME) + { + return unw_init_local_common(cursor, uc, 0); + } + else + { + return -UNW_EINVAL; + } } #endif /* !UNW_REMOTE_ONLY */ diff --git a/contrib/libunwind/src/aarch64/Gregs.c b/contrib/libunwind/src/aarch64/Gregs.c index 6288275bcd9..a8843734459 100644 --- a/contrib/libunwind/src/aarch64/Gregs.c +++ b/contrib/libunwind/src/aarch64/Gregs.c @@ -55,6 +55,9 @@ tdep_access_reg (struct cursor *c, unw_regnum_t reg, unw_word_t *valp, loc = c->dwarf.loc[reg]; break; + case UNW_AARCH64_X30: + if (write) + c->dwarf.ip = *valp; /* update the IP cache */ case UNW_AARCH64_X4: case UNW_AARCH64_X5: case UNW_AARCH64_X6: @@ -81,7 +84,6 @@ tdep_access_reg (struct cursor *c, unw_regnum_t reg, unw_word_t *valp, case UNW_AARCH64_X27: case UNW_AARCH64_X28: case UNW_AARCH64_X29: - case UNW_AARCH64_X30: case UNW_AARCH64_PC: case UNW_AARCH64_PSTATE: loc = c->dwarf.loc[reg]; @@ -108,6 +110,9 @@ HIDDEN int tdep_access_fpreg (struct cursor *c, unw_regnum_t reg, unw_fpreg_t *valp, int write) { - Debug (1, "bad register number %u\n", reg); - return -UNW_EBADREG; + dwarf_loc_t loc = c->dwarf.loc[reg]; + if (write) + return dwarf_putfp (&c->dwarf, loc, *valp); + else + return dwarf_getfp (&c->dwarf, loc, valp); } diff --git a/contrib/libunwind/src/aarch64/Gresume.c b/contrib/libunwind/src/aarch64/Gresume.c index d9acfa7ccc9..65517a252d1 100644 --- a/contrib/libunwind/src/aarch64/Gresume.c +++ b/contrib/libunwind/src/aarch64/Gresume.c @@ -40,7 +40,7 @@ aarch64_local_resume (unw_addr_space_t as, unw_cursor_t *cursor, void *arg) { /* Since there are no signals involved here we restore EH and non scratch registers only. */ - unsigned long regs[15]; + unsigned long regs[24]; regs[0] = uc->uc_mcontext.regs[0]; regs[1] = uc->uc_mcontext.regs[1]; regs[2] = uc->uc_mcontext.regs[2]; @@ -55,7 +55,16 @@ aarch64_local_resume (unw_addr_space_t as, unw_cursor_t *cursor, void *arg) regs[11] = uc->uc_mcontext.regs[26]; regs[12] = uc->uc_mcontext.regs[27]; regs[13] = uc->uc_mcontext.regs[28]; - regs[14] = uc->uc_mcontext.regs[30]; /* LR */ + regs[14] = uc->uc_mcontext.regs[29]; /* FP */ + regs[15] = uc->uc_mcontext.regs[30]; /* LR */ + regs[16] = GET_FPCTX(uc)->vregs[8]; + regs[17] = GET_FPCTX(uc)->vregs[9]; + regs[18] = GET_FPCTX(uc)->vregs[10]; + regs[19] = GET_FPCTX(uc)->vregs[11]; + regs[20] = GET_FPCTX(uc)->vregs[12]; + regs[21] = GET_FPCTX(uc)->vregs[13]; + regs[22] = GET_FPCTX(uc)->vregs[14]; + regs[23] = GET_FPCTX(uc)->vregs[15]; unsigned long sp = uc->uc_mcontext.sp; struct regs_overlay { @@ -72,7 +81,11 @@ aarch64_local_resume (unw_addr_space_t as, unw_cursor_t *cursor, void *arg) "ldp x23, x24, [x4,64]\n" "ldp x25, x26, [x4,80]\n" "ldp x27, x28, [x4,96]\n" - "ldr x30, [x4,112]\n" + "ldp x29, x30, [x4,112]\n" + "ldp d8, d9, [x4,128]\n" + "ldp d10, d11, [x4,144]\n" + "ldp d12, d13, [x4,160]\n" + "ldp d14, d15, [x4,176]\n" "mov sp, x5\n" "ret \n" : @@ -147,7 +160,7 @@ establish_machine_state (struct cursor *c) Debug (8, "copying out cursor state\n"); - for (reg = 0; reg <= UNW_AARCH64_PSTATE; ++reg) + for (reg = 0; reg <= UNW_AARCH64_V31; ++reg) { Debug (16, "copying %s %d\n", unw_regname (reg), reg); if (unw_is_fpreg (reg)) diff --git a/contrib/libunwind/src/aarch64/Gstep.c b/contrib/libunwind/src/aarch64/Gstep.c index 44fbb04c8c5..e38ff9794f5 100644 --- a/contrib/libunwind/src/aarch64/Gstep.c +++ b/contrib/libunwind/src/aarch64/Gstep.c @@ -27,6 +27,30 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "unwind_i.h" #include "offsets.h" +/* Recognise PLT entries such as: + 40ddf0: b0000570 adrp x16, 4ba000 <_GLOBAL_OFFSET_TABLE_+0x2a8> + 40ddf4: f9433611 ldr x17, [x16,#1640] + 40ddf8: 9119a210 add x16, x16, #0x668 + 40ddfc: d61f0220 br x17 */ +static int +is_plt_entry (struct dwarf_cursor *c) +{ + unw_word_t w0, w1; + unw_accessors_t *a; + int ret; + + a = unw_get_accessors (c->as); + if ((ret = (*a->access_mem) (c->as, c->ip, &w0, 0, c->as_arg)) < 0 + || (ret = (*a->access_mem) (c->as, c->ip + 8, &w1, 0, c->as_arg)) < 0) + return 0; + + ret = (((w0 & 0xff0000009f000000) == 0xf900000090000000) + && ((w1 & 0xffffffffff000000) == 0xd61f022091000000)); + + Debug (14, "ip=0x%lx => 0x%016lx 0x%016lx, ret = %d\n", c->ip, w0, w1, ret); + return ret; +} + PROTECTED int unw_handle_signal_frame (unw_cursor_t *cursor) { @@ -101,6 +125,7 @@ unw_handle_signal_frame (unw_cursor_t *cursor) dwarf_get (&c->dwarf, c->dwarf.loc[UNW_AARCH64_PC], &c->dwarf.ip); c->dwarf.pi_valid = 0; + c->dwarf.use_prev_instr = 0; return 1; } @@ -125,7 +150,40 @@ unw_step (unw_cursor_t *cursor) return ret; if (unlikely (ret < 0)) - return 0; + { + /* DWARF failed. */ + if (is_plt_entry (&c->dwarf)) + { + Debug (2, "found plt entry\n"); + c->frame_info.frame_type = UNW_AARCH64_FRAME_STANDARD; + } + else + { + Debug (2, "fallback\n"); + c->frame_info.frame_type = UNW_AARCH64_FRAME_GUESSED; + } + /* Use link register (X30). */ + c->frame_info.cfa_reg_offset = 0; + c->frame_info.cfa_reg_sp = 0; + c->frame_info.fp_cfa_offset = -1; + c->frame_info.lr_cfa_offset = -1; + c->frame_info.sp_cfa_offset = -1; + c->dwarf.loc[UNW_AARCH64_PC] = c->dwarf.loc[UNW_AARCH64_X30]; + c->dwarf.loc[UNW_AARCH64_X30] = DWARF_NULL_LOC; + if (!DWARF_IS_NULL_LOC (c->dwarf.loc[UNW_AARCH64_PC])) + { + ret = dwarf_get (&c->dwarf, c->dwarf.loc[UNW_AARCH64_PC], &c->dwarf.ip); + if (ret < 0) + { + Debug (2, "failed to get pc from link register: %d\n", ret); + return ret; + } + Debug (2, "link register (x30) = 0x%016lx\n", c->dwarf.ip); + ret = 1; + } + else + c->dwarf.ip = 0; + } return (c->dwarf.ip == 0) ? 0 : 1; } diff --git a/contrib/libunwind/src/aarch64/unwind_i.h b/contrib/libunwind/src/aarch64/unwind_i.h index 79b342cdab9..3d324c2b08b 100644 --- a/contrib/libunwind/src/aarch64/unwind_i.h +++ b/contrib/libunwind/src/aarch64/unwind_i.h @@ -59,4 +59,6 @@ extern int aarch64_local_resume (unw_addr_space_t as, unw_cursor_t *cursor, } while (0) #endif +#define GET_FPCTX(uc) ((struct fpsimd_context *)(&uc->uc_mcontext.__reserved)) + #endif /* unwind_i_h */ diff --git a/contrib/libunwind/src/arm/Ginit.c b/contrib/libunwind/src/arm/Ginit.c index 1ed3dbfc508..2d0b2ca8db1 100644 --- a/contrib/libunwind/src/arm/Ginit.c +++ b/contrib/libunwind/src/arm/Ginit.c @@ -126,6 +126,11 @@ static int access_mem (unw_addr_space_t as, unw_word_t addr, unw_word_t *val, int write, void *arg) { + /* validate address */ + const struct cursor *c = (const struct cursor *) arg; + if (c && validate_mem(addr)) + return -1; + if (write) { Debug (16, "mem[%x] <- %x\n", addr, *val); @@ -133,11 +138,6 @@ access_mem (unw_addr_space_t as, unw_word_t addr, unw_word_t *val, int write, } else { - /* validate address */ - const struct cursor *c = (const struct cursor *) arg; - if (c && validate_mem(addr)) - return -1; - *val = *(unw_word_t *) addr; Debug (16, "mem[%x] -> %x\n", addr, *val); } @@ -220,7 +220,7 @@ HIDDEN void arm_local_addr_space_init (void) { memset (&local_addr_space, 0, sizeof (local_addr_space)); - local_addr_space.caching_policy = UNW_CACHE_GLOBAL; + local_addr_space.caching_policy = UNWI_DEFAULT_CACHING_POLICY; local_addr_space.acc.find_proc_info = arm_find_proc_info; local_addr_space.acc.put_unwind_info = arm_put_unwind_info; local_addr_space.acc.get_dyn_info_list_addr = get_dyn_info_list_addr; diff --git a/contrib/libunwind/src/arm/Ginit_local.c b/contrib/libunwind/src/arm/Ginit_local.c index f74d55e7358..65941c369e4 100644 --- a/contrib/libunwind/src/arm/Ginit_local.c +++ b/contrib/libunwind/src/arm/Ginit_local.c @@ -59,9 +59,20 @@ unw_init_local (unw_cursor_t *cursor, unw_context_t *uc) } PROTECTED int -unw_init_local_signal (unw_cursor_t *cursor, unw_context_t *uc) +unw_init_local2 (unw_cursor_t *cursor, unw_context_t *uc, int flag) { - return unw_init_local_common(cursor, uc, 0); + if (!flag) + { + return unw_init_local_common(cursor, uc, 1); + } + else if (flag == UNW_INIT_SIGNAL_FRAME) + { + return unw_init_local_common(cursor, uc, 0); + } + else + { + return -UNW_EINVAL; + } } #endif /* !UNW_REMOTE_ONLY */ diff --git a/contrib/libunwind/src/arm/Gis_signal_frame.c b/contrib/libunwind/src/arm/Gis_signal_frame.c deleted file mode 100644 index e8efe7f4a85..00000000000 --- a/contrib/libunwind/src/arm/Gis_signal_frame.c +++ /dev/null @@ -1,87 +0,0 @@ -/* libunwind - a platform-independent unwind library - Copyright (C) 2008 CodeSourcery - Copyright 2011 Linaro Limited - -This file is part of libunwind. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include -#include "unwind_i.h" - -#ifdef __linux__ -#define ARM_NR_sigreturn 119 -#define ARM_NR_rt_sigreturn 173 -#define ARM_NR_OABI_SYSCALL_BASE 0x900000 - -/* ARM EABI sigreturn (the syscall number is loaded into r7) */ -#define MOV_R7_SIGRETURN (0xe3a07000UL | ARM_NR_sigreturn) -#define MOV_R7_RT_SIGRETURN (0xe3a07000UL | ARM_NR_rt_sigreturn) - -/* ARM OABI sigreturn (using SWI) */ -#define ARM_SIGRETURN \ - (0xef000000UL | ARM_NR_sigreturn | ARM_NR_OABI_SYSCALL_BASE) -#define ARM_RT_SIGRETURN \ - (0xef000000UL | ARM_NR_rt_sigreturn | ARM_NR_OABI_SYSCALL_BASE) - -/* Thumb sigreturn (two insns, syscall number is loaded into r7) */ -#define THUMB_SIGRETURN (0xdf00UL << 16 | 0x2700 | ARM_NR_sigreturn) -#define THUMB_RT_SIGRETURN (0xdf00UL << 16 | 0x2700 | ARM_NR_rt_sigreturn) -#endif /* __linux__ */ - -/* Returns 1 in case of a non-RT signal frame and 2 in case of a RT signal - frame. */ -PROTECTED int -unw_is_signal_frame (unw_cursor_t *cursor) -{ -#ifdef __linux__ - struct cursor *c = (struct cursor *) cursor; - unw_word_t w0, ip; - unw_addr_space_t as; - unw_accessors_t *a; - void *arg; - int ret; - - as = c->dwarf.as; - a = unw_get_accessors (as); - arg = c->dwarf.as_arg; - - ip = c->dwarf.ip; - - if ((ret = (*a->access_mem) (as, ip, &w0, 0, arg)) < 0) - return ret; - - /* Return 1 if the IP points to a non-RT sigreturn sequence. */ - if (w0 == MOV_R7_SIGRETURN || w0 == ARM_SIGRETURN || w0 == THUMB_SIGRETURN) - return 1; - /* Return 2 if the IP points to a RT sigreturn sequence. */ - else if (w0 == MOV_R7_RT_SIGRETURN || w0 == ARM_RT_SIGRETURN - || w0 == THUMB_RT_SIGRETURN) - return 2; - - return 0; -#elif defined(__QNX__) - /* Not supported yet */ - return 0; -#else - printf ("%s: implement me\n", __FUNCTION__); - return -UNW_ENOINFO; -#endif -} diff --git a/contrib/libunwind/src/arm/Gos-freebsd.c b/contrib/libunwind/src/arm/Gos-freebsd.c new file mode 100644 index 00000000000..3b9d2c3eb52 --- /dev/null +++ b/contrib/libunwind/src/arm/Gos-freebsd.c @@ -0,0 +1,129 @@ +/* libunwind - a platform-independent unwind library + Copyright (C) 2008 CodeSourcery + Copyright 2011 Linaro Limited + Copyright (C) 2012 Tommi Rantala + Copyright 2015 The FreeBSD Foundation + + Portions of this software were developed by Konstantin Belousov + under sponsorship from the FreeBSD Foundation. + +This file is part of libunwind. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include +#include +#include "unwind_i.h" +#include "offsets.h" +#include "ex_tables.h" + +PROTECTED int +unw_handle_signal_frame (unw_cursor_t *cursor) +{ + struct cursor *c = (struct cursor *) cursor; + int ret, fmt; + unw_word_t sc_addr, sp, sp_addr = c->dwarf.cfa; + struct dwarf_loc sp_loc = DWARF_LOC (sp_addr, 0); + + if ((ret = dwarf_get (&c->dwarf, sp_loc, &sp)) < 0) + return -UNW_EUNSPEC; + fmt = unw_is_signal_frame(cursor); + + c->dwarf.pi_valid = 0; + + if (fmt == UNW_ARM_FRAME_SYSCALL) + { + c->sigcontext_format = ARM_SCF_FREEBSD_SYSCALL; + c->frame_info.frame_type = UNW_ARM_FRAME_SYSCALL; + c->frame_info.cfa_reg_offset = 0; + c->dwarf.loc[UNW_ARM_R7] = c->dwarf.loc[UNW_ARM_R12]; + dwarf_get (&c->dwarf, c->dwarf.loc[UNW_ARM_R14], &c->dwarf.ip); + return 1; + } + + c->sigcontext_format = ARM_SCF_FREEBSD_SIGFRAME; + sc_addr = sp_addr; + + /* Save the SP and PC to be able to return execution at this point + later in time (unw_resume). */ + c->sigcontext_sp = c->dwarf.cfa; + c->sigcontext_pc = c->dwarf.ip; + + c->sigcontext_addr = sc_addr; + c->frame_info.frame_type = UNW_ARM_FRAME_SIGRETURN; + c->frame_info.cfa_reg_offset = sc_addr - sp_addr; + + /* Update the dwarf cursor. + Set the location of the registers to the corresponding addresses of the + uc_mcontext / sigcontext structure contents. */ +#define ROFF(n) (FREEBSD_SC_UCONTEXT_OFF + FREEBSD_UC_MCONTEXT_OFF + \ + FREEBSD_MC_R0_OFF + (n) * 4) +#define SL(n) \ + c->dwarf.loc[UNW_ARM_R ## n] = DWARF_LOC (sc_addr + ROFF(n), 0); + SL(0); SL(1); SL(2); SL(3); SL(4); SL(5); SL(6); SL(7); + SL(8); SL(9); SL(10); SL(11); SL(12); SL(13); SL(14); SL(15); +#undef SL +#undef ROFF + + /* Set SP/CFA and PC/IP. */ + dwarf_get (&c->dwarf, c->dwarf.loc[UNW_ARM_R13], &c->dwarf.cfa); + dwarf_get (&c->dwarf, c->dwarf.loc[UNW_ARM_R15], &c->dwarf.ip); + + return 1; +} + +/* Returns 1 in case of a non-RT signal frame and 2 in case of a RT signal + frame. */ +PROTECTED int +unw_is_signal_frame (unw_cursor_t *cursor) +{ + struct cursor *c = (struct cursor *) cursor; + unw_word_t w0, w1, w2, w3, ip; + unw_addr_space_t as; + unw_accessors_t *a; + void *arg; + int ret; + + as = c->dwarf.as; + a = unw_get_accessors (as); + arg = c->dwarf.as_arg; + + ip = c->dwarf.ip; + + if ((ret = (*a->access_mem) (as, ip, &w0, 0, arg)) < 0) + return ret; + if ((ret = (*a->access_mem) (as, ip + 4, &w1, 0, arg)) < 0) + return ret; + if ((ret = (*a->access_mem) (as, ip + 8, &w2, 0, arg)) < 0) + return ret; + if ((ret = (*a->access_mem) (as, ip + 12, &w3, 0, arg)) < 0) + return ret; + + if (w0 == 0xe1a0000d && w1 == 0xe2800040 && w2 == 0xe59f700c && + w3 == 0xef0001a1) + return UNW_ARM_FRAME_SIGRETURN; + + if ((ret = (*a->access_mem) (as, ip - 4, &w0, 0, arg)) < 0) + return ret; + if (w0 == 0xef000000) + return UNW_ARM_FRAME_SYSCALL; + + return 0; +} diff --git a/contrib/libunwind/src/arm/Gos-linux.c b/contrib/libunwind/src/arm/Gos-linux.c new file mode 100644 index 00000000000..585c2014fde --- /dev/null +++ b/contrib/libunwind/src/arm/Gos-linux.c @@ -0,0 +1,182 @@ +/* libunwind - a platform-independent unwind library + Copyright (C) 2008 CodeSourcery + Copyright 2011 Linaro Limited + Copyright (C) 2012 Tommi Rantala + +This file is part of libunwind. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include +#include +#include "unwind_i.h" +#include "offsets.h" + +PROTECTED int +unw_handle_signal_frame (unw_cursor_t *cursor) +{ + struct cursor *c = (struct cursor *) cursor; + int ret; + unw_word_t sc_addr, sp, sp_addr = c->dwarf.cfa; + struct dwarf_loc sp_loc = DWARF_LOC (sp_addr, 0); + + if ((ret = dwarf_get (&c->dwarf, sp_loc, &sp)) < 0) + return -UNW_EUNSPEC; + + /* Obtain signal frame type (non-RT or RT). */ + ret = unw_is_signal_frame (cursor); + + /* Save the SP and PC to be able to return execution at this point + later in time (unw_resume). */ + c->sigcontext_sp = c->dwarf.cfa; + c->sigcontext_pc = c->dwarf.ip; + + /* Since kernel version 2.6.18 the non-RT signal frame starts with a + ucontext while the RT signal frame starts with a siginfo, followed + by a sigframe whose first element is an ucontext. + Prior 2.6.18 the non-RT signal frame starts with a sigcontext while + the RT signal frame starts with two pointers followed by a siginfo + and an ucontext. The first pointer points to the start of the siginfo + structure and the second one to the ucontext structure. */ + + if (ret == 1) + { + /* Handle non-RT signal frames. Check if the first word on the stack + is the magic number. */ + if (sp == 0x5ac3c35a) + { + c->sigcontext_format = ARM_SCF_LINUX_SIGFRAME; + sc_addr = sp_addr + LINUX_UC_MCONTEXT_OFF; + } + else + { + c->sigcontext_format = ARM_SCF_LINUX_OLD_SIGFRAME; + sc_addr = sp_addr; + } + } + else if (ret == 2) + { + /* Handle RT signal frames. Check if the first word on the stack is a + pointer to the siginfo structure. */ + if (sp == sp_addr + 8) + { + c->sigcontext_format = ARM_SCF_LINUX_OLD_RT_SIGFRAME; + sc_addr = sp_addr + 8 + sizeof (siginfo_t) + LINUX_UC_MCONTEXT_OFF; + } + else + { + c->sigcontext_format = ARM_SCF_LINUX_RT_SIGFRAME; + sc_addr = sp_addr + sizeof (siginfo_t) + LINUX_UC_MCONTEXT_OFF; + } + } + else + return -UNW_EUNSPEC; + + c->sigcontext_addr = sc_addr; + c->frame_info.frame_type = UNW_ARM_FRAME_SIGRETURN; + c->frame_info.cfa_reg_offset = sc_addr - sp_addr; + + /* Update the dwarf cursor. + Set the location of the registers to the corresponding addresses of the + uc_mcontext / sigcontext structure contents. */ + c->dwarf.loc[UNW_ARM_R0] = DWARF_LOC (sc_addr + LINUX_SC_R0_OFF, 0); + c->dwarf.loc[UNW_ARM_R1] = DWARF_LOC (sc_addr + LINUX_SC_R1_OFF, 0); + c->dwarf.loc[UNW_ARM_R2] = DWARF_LOC (sc_addr + LINUX_SC_R2_OFF, 0); + c->dwarf.loc[UNW_ARM_R3] = DWARF_LOC (sc_addr + LINUX_SC_R3_OFF, 0); + c->dwarf.loc[UNW_ARM_R4] = DWARF_LOC (sc_addr + LINUX_SC_R4_OFF, 0); + c->dwarf.loc[UNW_ARM_R5] = DWARF_LOC (sc_addr + LINUX_SC_R5_OFF, 0); + c->dwarf.loc[UNW_ARM_R6] = DWARF_LOC (sc_addr + LINUX_SC_R6_OFF, 0); + c->dwarf.loc[UNW_ARM_R7] = DWARF_LOC (sc_addr + LINUX_SC_R7_OFF, 0); + c->dwarf.loc[UNW_ARM_R8] = DWARF_LOC (sc_addr + LINUX_SC_R8_OFF, 0); + c->dwarf.loc[UNW_ARM_R9] = DWARF_LOC (sc_addr + LINUX_SC_R9_OFF, 0); + c->dwarf.loc[UNW_ARM_R10] = DWARF_LOC (sc_addr + LINUX_SC_R10_OFF, 0); + c->dwarf.loc[UNW_ARM_R11] = DWARF_LOC (sc_addr + LINUX_SC_FP_OFF, 0); + c->dwarf.loc[UNW_ARM_R12] = DWARF_LOC (sc_addr + LINUX_SC_IP_OFF, 0); + c->dwarf.loc[UNW_ARM_R13] = DWARF_LOC (sc_addr + LINUX_SC_SP_OFF, 0); + c->dwarf.loc[UNW_ARM_R14] = DWARF_LOC (sc_addr + LINUX_SC_LR_OFF, 0); + c->dwarf.loc[UNW_ARM_R15] = DWARF_LOC (sc_addr + LINUX_SC_PC_OFF, 0); + + /* Set SP/CFA and PC/IP. */ + dwarf_get (&c->dwarf, c->dwarf.loc[UNW_ARM_R13], &c->dwarf.cfa); + dwarf_get (&c->dwarf, c->dwarf.loc[UNW_ARM_R15], &c->dwarf.ip); + + c->dwarf.pi_valid = 0; + + return 1; +} + +#define ARM_NR_sigreturn 119 +#define ARM_NR_rt_sigreturn 173 +#define ARM_NR_OABI_SYSCALL_BASE 0x900000 + +/* ARM EABI sigreturn (the syscall number is loaded into r7) */ +#define MOV_R7_SIGRETURN (0xe3a07000UL | ARM_NR_sigreturn) +#define MOV_R7_RT_SIGRETURN (0xe3a07000UL | ARM_NR_rt_sigreturn) + +/* ARM OABI sigreturn (using SWI) */ +#define ARM_SIGRETURN \ + (0xef000000UL | ARM_NR_sigreturn | ARM_NR_OABI_SYSCALL_BASE) +#define ARM_RT_SIGRETURN \ + (0xef000000UL | ARM_NR_rt_sigreturn | ARM_NR_OABI_SYSCALL_BASE) + +/* Thumb sigreturn (two insns, syscall number is loaded into r7) */ +#define THUMB_SIGRETURN (0xdf00UL << 16 | 0x2700 | ARM_NR_sigreturn) +#define THUMB_RT_SIGRETURN (0xdf00UL << 16 | 0x2700 | ARM_NR_rt_sigreturn) + +/* Thumb2 sigreturn (mov.w r7, $SYS_ify(rt_sigreturn/sigreturn)) */ +#define THUMB2_SIGRETURN (((0x0700 | ARM_NR_sigreturn) << 16) | \ + 0xf04f) +#define THUMB2_RT_SIGRETURN (((0x0700 | ARM_NR_rt_sigreturn) << 16) | \ + 0xf04f) +/* TODO: with different toolchains, there are a lot more possibilities */ + +/* Returns 1 in case of a non-RT signal frame and 2 in case of a RT signal + frame. */ +PROTECTED int +unw_is_signal_frame (unw_cursor_t *cursor) +{ + struct cursor *c = (struct cursor *) cursor; + unw_word_t w0, ip; + unw_addr_space_t as; + unw_accessors_t *a; + void *arg; + int ret; + + as = c->dwarf.as; + a = unw_get_accessors (as); + arg = c->dwarf.as_arg; + + /* The least bit denotes thumb/arm mode. Do not read there. */ + ip = c->dwarf.ip & ~0x1; + + if ((ret = (*a->access_mem) (as, ip, &w0, 0, arg)) < 0) + return ret; + + /* Return 1 if the IP points to a non-RT sigreturn sequence. */ + if (w0 == MOV_R7_SIGRETURN || w0 == ARM_SIGRETURN || w0 == THUMB_SIGRETURN + || w0 == THUMB2_SIGRETURN) + return 1; + /* Return 2 if the IP points to a RT sigreturn sequence. */ + else if (w0 == MOV_R7_RT_SIGRETURN || w0 == ARM_RT_SIGRETURN + || w0 == THUMB_RT_SIGRETURN || w0 == THUMB2_RT_SIGRETURN) + return 2; + + return 0; +} diff --git a/contrib/libunwind/src/ppc/Gcreate_addr_space.c b/contrib/libunwind/src/arm/Gos-other.c similarity index 65% rename from contrib/libunwind/src/ppc/Gcreate_addr_space.c rename to contrib/libunwind/src/arm/Gos-other.c index 21ec10fcd8c..66f38b1dace 100644 --- a/contrib/libunwind/src/ppc/Gcreate_addr_space.c +++ b/contrib/libunwind/src/arm/Gos-other.c @@ -1,8 +1,7 @@ /* libunwind - a platform-independent unwind library - Copyright (C) 2006-2007 IBM - Contributed by - Corey Ashford - Jose Flavio Aguilar Paulino + Copyright (C) 2008 CodeSourcery + Copyright 2011 Linaro Limited + Copyright (C) 2012 Tommi Rantala This file is part of libunwind. @@ -25,30 +24,25 @@ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include +#include +#include +#include "unwind_i.h" +#include "offsets.h" -#include - -PROTECTED unw_addr_space_t -unw_create_addr_space (unw_accessors_t *a, int byte_order) +PROTECTED int +unw_handle_signal_frame (unw_cursor_t *cursor) { -#ifdef UNW_LOCAL_ONLY - return NULL; + return -UNW_EUNSPEC; +} + +PROTECTED int +unw_is_signal_frame (unw_cursor_t *cursor) +{ +#if defined(__QNX__) + /* Not supported yet */ + return 0; #else - unw_addr_space_t as = malloc (sizeof (*as)); - - if (!as) - return NULL; - - memset (as, 0, sizeof (*as)); - - as->acc = *a; - - /* - * Linux ppc64 supports only big-endian. - */ - if (byte_order != 0 && byte_order != __BIG_ENDIAN) - return NULL; - return as; + printf ("%s: implement me\n", __FUNCTION__); + return -UNW_ENOINFO; #endif } diff --git a/contrib/libunwind/src/arm/Gregs.c b/contrib/libunwind/src/arm/Gregs.c index 688771f31e4..0d52f0b2225 100644 --- a/contrib/libunwind/src/arm/Gregs.c +++ b/contrib/libunwind/src/arm/Gregs.c @@ -32,6 +32,9 @@ tdep_access_reg (struct cursor *c, unw_regnum_t reg, unw_word_t *valp, switch (reg) { + case UNW_ARM_R15: + if (write) + c->dwarf.ip = *valp; /* update the IP cache */ case UNW_ARM_R0: case UNW_ARM_R1: case UNW_ARM_R2: @@ -46,7 +49,6 @@ tdep_access_reg (struct cursor *c, unw_regnum_t reg, unw_word_t *valp, case UNW_ARM_R11: case UNW_ARM_R12: case UNW_ARM_R14: - case UNW_ARM_R15: loc = c->dwarf.loc[reg - UNW_ARM_R0]; break; diff --git a/contrib/libunwind/src/arm/Gstep.c b/contrib/libunwind/src/arm/Gstep.c index 37e6c12f115..6679455ece2 100644 --- a/contrib/libunwind/src/arm/Gstep.c +++ b/contrib/libunwind/src/arm/Gstep.c @@ -45,8 +45,14 @@ arm_exidx_step (struct cursor *c) /* mark PC unsaved */ c->dwarf.loc[UNW_ARM_R15] = DWARF_NULL_LOC; - if ((ret = tdep_find_proc_info (&c->dwarf, c->dwarf.ip, 1)) < 0) - return ret; + /* check dynamic info first --- it overrides everything else */ + ret = unwi_find_dynamic_proc_info (c->dwarf.as, c->dwarf.ip, &c->dwarf.pi, 1, + c->dwarf.as_arg); + if (ret == -UNW_ENOINFO) + { + if ((ret = tdep_find_proc_info (&c->dwarf, c->dwarf.ip, 1)) < 0) + return ret; + } if (c->dwarf.pi.format != UNW_INFO_FORMAT_ARM_EXIDX) return -UNW_ENOINFO; @@ -73,99 +79,6 @@ arm_exidx_step (struct cursor *c) return (c->dwarf.ip == 0) ? 0 : 1; } -PROTECTED int -unw_handle_signal_frame (unw_cursor_t *cursor) -{ - struct cursor *c = (struct cursor *) cursor; - int ret; - unw_word_t sc_addr, sp, sp_addr = c->dwarf.cfa; - struct dwarf_loc sp_loc = DWARF_LOC (sp_addr, 0); - - if ((ret = dwarf_get (&c->dwarf, sp_loc, &sp)) < 0) - return -UNW_EUNSPEC; - - /* Obtain signal frame type (non-RT or RT). */ - ret = unw_is_signal_frame (cursor); - - /* Save the SP and PC to be able to return execution at this point - later in time (unw_resume). */ - c->sigcontext_sp = c->dwarf.cfa; - c->sigcontext_pc = c->dwarf.ip; - - /* Since kernel version 2.6.18 the non-RT signal frame starts with a - ucontext while the RT signal frame starts with a siginfo, followed - by a sigframe whose first element is an ucontext. - Prior 2.6.18 the non-RT signal frame starts with a sigcontext while - the RT signal frame starts with two pointers followed by a siginfo - and an ucontext. The first pointer points to the start of the siginfo - structure and the second one to the ucontext structure. */ - - if (ret == 1) - { - /* Handle non-RT signal frames. Check if the first word on the stack - is the magic number. */ - if (sp == 0x5ac3c35a) - { - c->sigcontext_format = ARM_SCF_LINUX_SIGFRAME; - sc_addr = sp_addr + LINUX_UC_MCONTEXT_OFF; - } - else - { - c->sigcontext_format = ARM_SCF_LINUX_OLD_SIGFRAME; - sc_addr = sp_addr; - } - } - else if (ret == 2) - { - /* Handle RT signal frames. Check if the first word on the stack is a - pointer to the siginfo structure. */ - if (sp == sp_addr + 8) - { - c->sigcontext_format = ARM_SCF_LINUX_OLD_RT_SIGFRAME; - sc_addr = sp_addr + 8 + sizeof (siginfo_t) + LINUX_UC_MCONTEXT_OFF; - } - else - { - c->sigcontext_format = ARM_SCF_LINUX_RT_SIGFRAME; - sc_addr = sp_addr + sizeof (siginfo_t) + LINUX_UC_MCONTEXT_OFF; - } - } - else - return -UNW_EUNSPEC; - - c->sigcontext_addr = sc_addr; - c->frame_info.frame_type = UNW_ARM_FRAME_SIGRETURN; - c->frame_info.cfa_reg_offset = sc_addr - sp_addr; - - /* Update the dwarf cursor. - Set the location of the registers to the corresponding addresses of the - uc_mcontext / sigcontext structure contents. */ - c->dwarf.loc[UNW_ARM_R0] = DWARF_LOC (sc_addr + LINUX_SC_R0_OFF, 0); - c->dwarf.loc[UNW_ARM_R1] = DWARF_LOC (sc_addr + LINUX_SC_R1_OFF, 0); - c->dwarf.loc[UNW_ARM_R2] = DWARF_LOC (sc_addr + LINUX_SC_R2_OFF, 0); - c->dwarf.loc[UNW_ARM_R3] = DWARF_LOC (sc_addr + LINUX_SC_R3_OFF, 0); - c->dwarf.loc[UNW_ARM_R4] = DWARF_LOC (sc_addr + LINUX_SC_R4_OFF, 0); - c->dwarf.loc[UNW_ARM_R5] = DWARF_LOC (sc_addr + LINUX_SC_R5_OFF, 0); - c->dwarf.loc[UNW_ARM_R6] = DWARF_LOC (sc_addr + LINUX_SC_R6_OFF, 0); - c->dwarf.loc[UNW_ARM_R7] = DWARF_LOC (sc_addr + LINUX_SC_R7_OFF, 0); - c->dwarf.loc[UNW_ARM_R8] = DWARF_LOC (sc_addr + LINUX_SC_R8_OFF, 0); - c->dwarf.loc[UNW_ARM_R9] = DWARF_LOC (sc_addr + LINUX_SC_R9_OFF, 0); - c->dwarf.loc[UNW_ARM_R10] = DWARF_LOC (sc_addr + LINUX_SC_R10_OFF, 0); - c->dwarf.loc[UNW_ARM_R11] = DWARF_LOC (sc_addr + LINUX_SC_FP_OFF, 0); - c->dwarf.loc[UNW_ARM_R12] = DWARF_LOC (sc_addr + LINUX_SC_IP_OFF, 0); - c->dwarf.loc[UNW_ARM_R13] = DWARF_LOC (sc_addr + LINUX_SC_SP_OFF, 0); - c->dwarf.loc[UNW_ARM_R14] = DWARF_LOC (sc_addr + LINUX_SC_LR_OFF, 0); - c->dwarf.loc[UNW_ARM_R15] = DWARF_LOC (sc_addr + LINUX_SC_PC_OFF, 0); - - /* Set SP/CFA and PC/IP. */ - dwarf_get (&c->dwarf, c->dwarf.loc[UNW_ARM_R13], &c->dwarf.cfa); - dwarf_get (&c->dwarf, c->dwarf.loc[UNW_ARM_R15], &c->dwarf.ip); - - c->dwarf.pi_valid = 0; - - return 1; -} - PROTECTED int unw_step (unw_cursor_t *cursor) { @@ -210,14 +123,18 @@ unw_step (unw_cursor_t *cursor) /* Fall back on APCS frame parsing. Note: This won't work in case the ARM EABI is used. */ +#ifdef __FreeBSD__ + if (0) +#else if (unlikely (ret < 0)) +#endif { if (UNW_TRY_METHOD(UNW_ARM_METHOD_FRAME)) { + Debug (13, "dwarf_step() failed (ret=%d), trying frame-chain\n", ret); ret = UNW_ESUCCESS; /* DWARF unwinding failed, try to follow APCS/optimized APCS frame chain */ unw_word_t instr, i; - Debug (13, "dwarf_step() failed (ret=%d), trying frame-chain\n", ret); dwarf_loc_t ip_loc, fp_loc; unw_word_t frame; /* Mark all registers unsaved, since we don't know where @@ -260,7 +177,7 @@ unw_step (unw_cursor_t *cursor) c->dwarf.loc[UNW_ARM_R12] = ip_loc; c->dwarf.loc[UNW_ARM_R11] = fp_loc; c->dwarf.pi_valid = 0; - Debug(15, "ip=%lx\n", c->dwarf.ip); + Debug(15, "ip=%x\n", c->dwarf.ip); } else { @@ -268,5 +185,5 @@ unw_step (unw_cursor_t *cursor) } } } - return ret == -UNW_ENOINFO ? 0 : 1; + return ret == -UNW_ENOINFO ? 0 : ret; } diff --git a/contrib/libunwind/src/arm/Gtrace.c b/contrib/libunwind/src/arm/Gtrace.c index 135563a38f4..2f277520b36 100644 --- a/contrib/libunwind/src/arm/Gtrace.c +++ b/contrib/libunwind/src/arm/Gtrace.c @@ -503,7 +503,7 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size) case UNW_ARM_FRAME_SIGRETURN: cfa = cfa + f->cfa_reg_offset; /* cfa now points to ucontext_t. */ - +#if defined(__linux__) ACCESS_MEM_FAST(ret, c->validate, d, cfa + LINUX_SC_PC_OFF, pc); if (likely(ret >= 0)) ACCESS_MEM_FAST(ret, c->validate, d, cfa + LINUX_SC_R7_OFF, r7); @@ -513,6 +513,9 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size) doesn't save the link register in the prologue, e.g. kill. */ if (likely(ret >= 0)) ACCESS_MEM_FAST(ret, c->validate, d, cfa + LINUX_SC_LR_OFF, lr); +#elif defined(__FreeBSD__) + printf("XXX\n"); +#endif /* Resume stack at signal restoration point. The stack is not necessarily continuous here, especially with sigaltstack(). */ @@ -522,6 +525,10 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size) d->use_prev_instr = 0; break; + case UNW_ARM_FRAME_SYSCALL: + printf("XXX1\n"); + break; + default: /* We cannot trace through this frame, give up and tell the caller we had to stop. Data collected so far may still be diff --git a/contrib/libunwind/src/x86_64/Lis_signal_frame.c b/contrib/libunwind/src/arm/Los-freebsd.c similarity index 78% rename from contrib/libunwind/src/x86_64/Lis_signal_frame.c rename to contrib/libunwind/src/arm/Los-freebsd.c index b9a7c4f51ad..a75a205df19 100644 --- a/contrib/libunwind/src/x86_64/Lis_signal_frame.c +++ b/contrib/libunwind/src/arm/Los-freebsd.c @@ -1,5 +1,5 @@ #define UNW_LOCAL_ONLY #include #if defined(UNW_LOCAL_ONLY) && !defined(UNW_REMOTE_ONLY) -#include "Gis_signal_frame.c" +#include "Gos-freebsd.c" #endif diff --git a/contrib/libunwind/src/dwarf/Lstep.c b/contrib/libunwind/src/arm/Los-linux.c similarity index 82% rename from contrib/libunwind/src/dwarf/Lstep.c rename to contrib/libunwind/src/arm/Los-linux.c index c1ac3c7547f..3cc18aabcc3 100644 --- a/contrib/libunwind/src/dwarf/Lstep.c +++ b/contrib/libunwind/src/arm/Los-linux.c @@ -1,5 +1,5 @@ #define UNW_LOCAL_ONLY #include #if defined(UNW_LOCAL_ONLY) && !defined(UNW_REMOTE_ONLY) -#include "Gstep.c" +#include "Gos-linux.c" #endif diff --git a/contrib/libunwind/src/ppc/Lcreate_addr_space.c b/contrib/libunwind/src/arm/Los-other.c similarity index 77% rename from contrib/libunwind/src/ppc/Lcreate_addr_space.c rename to contrib/libunwind/src/arm/Los-other.c index 0f2dc6be901..a75a205df19 100644 --- a/contrib/libunwind/src/ppc/Lcreate_addr_space.c +++ b/contrib/libunwind/src/arm/Los-other.c @@ -1,5 +1,5 @@ #define UNW_LOCAL_ONLY #include #if defined(UNW_LOCAL_ONLY) && !defined(UNW_REMOTE_ONLY) -#include "Gcreate_addr_space.c" +#include "Gos-freebsd.c" #endif diff --git a/contrib/libunwind/src/arm/getcontext.S b/contrib/libunwind/src/arm/getcontext.S index c52992bfb92..7e18784477d 100644 --- a/contrib/libunwind/src/arm/getcontext.S +++ b/contrib/libunwind/src/arm/getcontext.S @@ -35,8 +35,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ _Uarm_getcontext: stmfd sp!, {r0, r1} @ store r0 +#if defined(__linux__) str r0, [r0, #LINUX_UC_MCONTEXT_OFF + LINUX_SC_R0_OFF] add r0, r0, #LINUX_UC_MCONTEXT_OFF + LINUX_SC_R0_OFF +#elif defined(__FreeBSD__) + str r0, [r0, #FREEBSD_UC_MCONTEXT_OFF + FREEBSD_MC_R0_OFF] + add r0, r0, #FREEBSD_UC_MCONTEXT_OFF + FREEBSD_MC_R0_OFF +#else +#error Fix me +#endif @ store r1 to r12 stmib r0, {r1-r12} @ reconstruct r13 at call site, then store @@ -50,7 +57,7 @@ _Uarm_getcontext: str r1, [r0, #15 * 4] ldmfd sp!, {r0, r1} bx lr -#ifdef __linux__ +#if defined(__linux__) || defined(__FreeBSD__) /* We do not need executable stack. */ .section .note.GNU-stack,"",%progbits #endif diff --git a/contrib/libunwind/src/arm/offsets.h b/contrib/libunwind/src/arm/offsets.h index a63847be417..621701106c4 100644 --- a/contrib/libunwind/src/arm/offsets.h +++ b/contrib/libunwind/src/arm/offsets.h @@ -34,3 +34,9 @@ #define LINUX_SC_PC_OFF 0x48 #define LINUX_SC_CPSR_OFF 0x4C #define LINUX_SC_FAULTADDR_OFF 0x50 + +/* FreeBSD-specific definitions: */ + +#define FREEBSD_SC_UCONTEXT_OFF 0x40 +#define FREEBSD_UC_MCONTEXT_OFF 0x10 +#define FREEBSD_MC_R0_OFF 0 diff --git a/contrib/libunwind/src/coredump/_UCD_access_reg_freebsd.c b/contrib/libunwind/src/coredump/_UCD_access_reg_freebsd.c index 585b7e298b6..0e3a83bdc6c 100644 --- a/contrib/libunwind/src/coredump/_UCD_access_reg_freebsd.c +++ b/contrib/libunwind/src/coredump/_UCD_access_reg_freebsd.c @@ -76,7 +76,7 @@ _UCD_access_reg (unw_addr_space_t as, default: Debug(0, "bad regnum:%d\n", regnum); return -UNW_EINVAL; - }; + } #elif defined(UNW_TARGET_X86_64) switch (regnum) { case UNW_X86_64_RAX: @@ -109,7 +109,26 @@ _UCD_access_reg (unw_addr_space_t as, default: Debug(0, "bad regnum:%d\n", regnum); return -UNW_EINVAL; - }; + } +#elif defined(UNW_TARGET_ARM) + if (regnum >= UNW_ARM_R0 && regnum <= UNW_ARM_R12) { + *valp = ui->prstatus->pr_reg.r[regnum]; + } else { + switch (regnum) { + case UNW_ARM_R13: + *valp = ui->prstatus->pr_reg.r_sp; + break; + case UNW_ARM_R14: + *valp = ui->prstatus->pr_reg.r_lr; + break; + case UNW_ARM_R15: + *valp = ui->prstatus->pr_reg.r_pc; + break; + default: + Debug(0, "bad regnum:%d\n", regnum); + return -UNW_EINVAL; + } + } #else #error Port me #endif diff --git a/contrib/libunwind/src/coredump/_UCD_destroy.c b/contrib/libunwind/src/coredump/_UCD_destroy.c index 5aff989ccc1..ddc36ec8986 100644 --- a/contrib/libunwind/src/coredump/_UCD_destroy.c +++ b/contrib/libunwind/src/coredump/_UCD_destroy.c @@ -44,7 +44,9 @@ _UCD_destroy (struct UCD_info *ui) close(phdr->backing_fd); } + free(ui->phdrs); free(ui->note_phdr); + free(ui->threads); free(ui); } diff --git a/contrib/libunwind/src/coredump/libunwind-coredump.pc b/contrib/libunwind/src/coredump/libunwind-coredump.pc deleted file mode 100644 index 6d6e81c7042..00000000000 --- a/contrib/libunwind/src/coredump/libunwind-coredump.pc +++ /dev/null @@ -1,11 +0,0 @@ -prefix=/usr/local -exec_prefix=${prefix} -libdir=${exec_prefix}/lib -includedir=${prefix}/include - -Name: libunwind-coredump -Description: libunwind coredump library -Version: 1.2 -Requires: libunwind-generic libunwind -Libs: -L${libdir} -lunwind-coredump -Cflags: -I${includedir} diff --git a/contrib/libunwind/src/dwarf/Gfde.c b/contrib/libunwind/src/dwarf/Gfde.c index 55d8da8422d..49484eee131 100644 --- a/contrib/libunwind/src/dwarf/Gfde.c +++ b/contrib/libunwind/src/dwarf/Gfde.c @@ -32,7 +32,7 @@ is_cie_id (unw_word_t val, int is_debug_frame) 0xffffffffffffffff (for 64-bit ELF). However, .eh_frame uses 0. */ if (is_debug_frame) - return (val == - (uint32_t) 1 || val == - (uint64_t) 1); + return (val == (uint32_t)(-1) || val == (uint64_t)(-1)); else return (val == 0); } diff --git a/contrib/libunwind/src/dwarf/Gfind_proc_info-lsb.c b/contrib/libunwind/src/dwarf/Gfind_proc_info-lsb.c index 3e66bc1a96c..d8cbc3e09bc 100644 --- a/contrib/libunwind/src/dwarf/Gfind_proc_info-lsb.c +++ b/contrib/libunwind/src/dwarf/Gfind_proc_info-lsb.c @@ -199,7 +199,7 @@ locate_debug_info (unw_addr_space_t as, unw_word_t addr, const char *dlname, name = (char*) dlname; err = load_debug_frame (name, &buf, &bufsize, as == unw_local_addr_space); - + if (!err) { fdesc = malloc (sizeof (struct unw_debug_frame_list)); @@ -210,10 +210,10 @@ locate_debug_info (unw_addr_space_t as, unw_word_t addr, const char *dlname, fdesc->debug_frame_size = bufsize; fdesc->index = NULL; fdesc->next = as->debug_frames; - + as->debug_frames = fdesc; } - + return fdesc; } @@ -235,10 +235,10 @@ debug_frame_tab_append (struct debug_frame_tab *tab, tab->size *= 2; tab->tab = realloc (tab->tab, sizeof (struct table_entry) * tab->size); } - + tab->tab[length].fde_offset = fde_offset; tab->tab[length].start_ip_offset = start_ip; - + tab->length = length + 1; } @@ -256,7 +256,7 @@ static int debug_frame_tab_compare (const void *a, const void *b) { const struct table_entry *fa = a, *fb = b; - + if (fa->start_ip_offset > fb->start_ip_offset) return 1; else if (fa->start_ip_offset < fb->start_ip_offset) @@ -522,7 +522,7 @@ dwarf_callback (struct dl_phdr_info *info, size_t size, void *ptr) else if (phdr->p_type == PT_DYNAMIC) p_dynamic = phdr; } - + if (!p_text) return 0; @@ -537,14 +537,14 @@ dwarf_callback (struct dl_phdr_info *info, size_t size, void *ptr) eh_frame = dwarf_find_eh_frame_section (info); if (eh_frame) { - unsigned char *p = (unsigned char *) &synth_eh_frame_hdr; Debug (1, "using synthetic .eh_frame_hdr section for %s\n", info->dlpi_name); - /* synth_eh_frame_hdr.version */ p[0] = DW_EH_VERSION; - /* synth_eh_frame_hdr.eh_frame_ptr_enc */ p[1] = DW_EH_PE_absptr | ((sizeof(Elf_W (Addr)) == 4) ? DW_EH_PE_udata4 : DW_EH_PE_udata8); - /* synth_eh_frame_hdr.fde_count_enc */ p[2] = DW_EH_PE_omit; - /* synth_eh_frame_hdr.table_enc */ p[3] = DW_EH_PE_omit; - *(Elf_W (Addr) *)(&p[4]) = eh_frame; + synth_eh_frame_hdr.version = DW_EH_VERSION; + synth_eh_frame_hdr.eh_frame_ptr_enc = DW_EH_PE_absptr | + ((sizeof(Elf_W (Addr)) == 4) ? DW_EH_PE_udata4 : DW_EH_PE_udata8); + synth_eh_frame_hdr.fde_count_enc = DW_EH_PE_omit; + synth_eh_frame_hdr.table_enc = DW_EH_PE_omit; + synth_eh_frame_hdr.eh_frame = eh_frame; hdr = &synth_eh_frame_hdr; } } @@ -581,7 +581,7 @@ dwarf_callback (struct dl_phdr_info *info, size_t size, void *ptr) } a = unw_get_accessors (unw_local_addr_space); - addr = (unw_word_t) (uintptr_t) (hdr + 1); + addr = (unw_word_t) (uintptr_t) (&hdr->eh_frame); /* (Optionally) read eh_frame_ptr: */ if ((ret = dwarf_read_encoded_pointer (unw_local_addr_space, a, @@ -618,12 +618,13 @@ dwarf_callback (struct dl_phdr_info *info, size_t size, void *ptr) /* XXX we know how to build a local binary search table for .debug_frame, so we could do that here too. */ - cb_data->single_fde = 1; found = linear_search (unw_local_addr_space, ip, eh_frame_start, eh_frame_end, fde_count, pi, need_unwind_info, NULL); if (found != 1) found = 0; + else + cb_data->single_fde = 1; } else { diff --git a/contrib/libunwind/src/dwarf/Gfind_unwind_table.c b/contrib/libunwind/src/dwarf/Gfind_unwind_table.c index 215948e09f3..c171eeca9a9 100644 --- a/contrib/libunwind/src/dwarf/Gfind_unwind_table.c +++ b/contrib/libunwind/src/dwarf/Gfind_unwind_table.c @@ -139,7 +139,7 @@ dwarf_find_unwind_table (struct elf_dyn_info *edi, unw_addr_space_t as, } a = unw_get_accessors (unw_local_addr_space); - addr = to_unw_word (hdr + 1); + addr = to_unw_word (&hdr->eh_frame); /* Fill in a dummy proc_info structure. We just need to fill in enough to ensure that dwarf_read_encoded_pointer() can do it's diff --git a/contrib/libunwind/src/dwarf/Gparser.c b/contrib/libunwind/src/dwarf/Gparser.c index efbb5e07429..9d405e76696 100644 --- a/contrib/libunwind/src/dwarf/Gparser.c +++ b/contrib/libunwind/src/dwarf/Gparser.c @@ -278,7 +278,7 @@ run_cfi_program (struct dwarf_cursor *c, dwarf_state_record_t *sr, ret = -UNW_ENOMEM; break; } - memcpy (&(*rs_stack)->state, &sr->rs_current, sizeof (sr->rs_current)) + memcpy (&(*rs_stack)->state, &sr->rs_current, sizeof (sr->rs_current)); Debug (15, "CFA_remember_state\n"); break; @@ -289,8 +289,10 @@ run_cfi_program (struct dwarf_cursor *c, dwarf_state_record_t *sr, ret = -UNW_EINVAL; break; } - memcpy (&sr->rs_current, &(*rs_stack)->state, sizeof (sr->rs_current)); - pop_rstate_stack(rs_stack); + if (*ip < end_ip) { + memcpy (&sr->rs_current, &(*rs_stack)->state, sizeof (sr->rs_current)); + pop_rstate_stack(rs_stack); + } Debug (15, "CFA_restore_state\n"); break; @@ -606,7 +608,17 @@ get_rs_cache (unw_addr_space_t as, intrmask_t *saved_maskp) if (caching == UNW_CACHE_NONE) return NULL; +#if defined(HAVE___THREAD) && HAVE___THREAD + if (likely (caching == UNW_CACHE_PER_THREAD)) + { + static __thread struct dwarf_rs_cache tls_cache __attribute__((tls_model("initial-exec"))); + Debug (16, "using TLS cache\n"); + cache = &tls_cache; + } + else +#else if (likely (caching == UNW_CACHE_GLOBAL)) +#endif { Debug (16, "acquiring lock\n"); lock_acquire (&cache->lock, *saved_maskp); @@ -615,6 +627,8 @@ get_rs_cache (unw_addr_space_t as, intrmask_t *saved_maskp) if ((atomic_read (&as->cache_generation) != atomic_read (&cache->generation)) || !cache->hash) { + /* cache_size is only set in the global_cache, copy it over before flushing */ + cache->log_size = as->global_cache.log_size; if (dwarf_flush_rs_cache (cache) < 0) return NULL; cache->generation = as->cache_generation; @@ -679,7 +693,7 @@ rs_new (struct dwarf_rs_cache *cache, struct dwarf_cursor * c) unsigned short head; head = cache->rr_head; - cache->rr_head = (head + 1) & (cache->log_size - 1); + cache->rr_head = (head + 1) & (DWARF_UNW_CACHE_SIZE(cache->log_size) - 1); /* remove the old rs from the hash table (if it's there): */ if (cache->links[head].ip) @@ -885,7 +899,7 @@ find_reg_state (struct dwarf_cursor *c, dwarf_state_record_t *sr) int ret = 0; intrmask_t saved_mask; - if ((cache = get_rs_cache(c->as, &saved_mask)) && + if ((cache = get_rs_cache(c->as, &saved_mask)) && (rs = rs_lookup(cache, c))) { /* update hint; no locking needed: single-word writes are atomic */ @@ -951,7 +965,7 @@ dwarf_make_proc_info (struct dwarf_cursor *c) needed for unw_resume */ dwarf_state_record_t sr; int ret; - + /* Lookup it up the slow way... */ ret = fetch_proc_info (c, c->ip, 0); if (ret >= 0) @@ -1018,11 +1032,11 @@ dwarf_reg_states_iterate(struct dwarf_cursor *c, case UNW_INFO_FORMAT_REMOTE_TABLE: ret = dwarf_reg_states_table_iterate(c, cb, token); break; - + case UNW_INFO_FORMAT_DYNAMIC: ret = dwarf_reg_states_dynamic_iterate (c, cb, token); break; - + default: Debug (1, "Unexpected unwind-info format %d\n", c->pi.format); ret = -UNW_EINVAL; diff --git a/contrib/libunwind/src/elfxx.c b/contrib/libunwind/src/elfxx.c index 685cf2f534c..48a08cdc383 100644 --- a/contrib/libunwind/src/elfxx.c +++ b/contrib/libunwind/src/elfxx.c @@ -386,6 +386,8 @@ elf_w (load_debuglink) (const char* file, struct elf_image *ei, int is_local) { int ret; Elf_W (Shdr) *shdr; + Elf_W (Ehdr) *prev_image = ei->image; + off_t prev_size = ei->size; if (!ei->image) { @@ -420,7 +422,6 @@ elf_w (load_debuglink) (const char* file, struct elf_image *ei, int is_local) if (memchr (linkbuf, 0, shdr->sh_size) == NULL) return 0; - munmap (ei->image, ei->size); ei->image = NULL; Debug(1, "Found debuglink section, following %s\n", linkbuf); @@ -456,6 +457,19 @@ elf_w (load_debuglink) (const char* file, struct elf_image *ei, int is_local) ret = elf_w (load_debuglink) (newname, ei, -1); } + if (ret == -1) + { + /* No debuglink file found even though .gnu_debuglink existed */ + ei->image = prev_image; + ei->size = prev_size; + + return 0; + } + else + { + munmap (prev_image, prev_size); + } + return ret; } } diff --git a/contrib/libunwind/src/hppa/Ginit.c b/contrib/libunwind/src/hppa/Ginit.c index 89ad51caca6..28779c39ba5 100644 --- a/contrib/libunwind/src/hppa/Ginit.c +++ b/contrib/libunwind/src/hppa/Ginit.c @@ -179,7 +179,7 @@ HIDDEN void hppa_local_addr_space_init (void) { memset (&local_addr_space, 0, sizeof (local_addr_space)); - local_addr_space.caching_policy = UNW_CACHE_GLOBAL; + local_addr_space.caching_policy = UNWI_DEFAULT_CACHING_POLICY; local_addr_space.acc.find_proc_info = dwarf_find_proc_info; local_addr_space.acc.put_unwind_info = put_unwind_info; local_addr_space.acc.get_dyn_info_list_addr = get_dyn_info_list_addr; diff --git a/contrib/libunwind/src/hppa/Ginit_local.c b/contrib/libunwind/src/hppa/Ginit_local.c index 94583d9da75..5c59f48f880 100644 --- a/contrib/libunwind/src/hppa/Ginit_local.c +++ b/contrib/libunwind/src/hppa/Ginit_local.c @@ -58,9 +58,20 @@ unw_init_local (unw_cursor_t *cursor, ucontext_t *uc) } PROTECTED int -unw_init_local_signal (unw_cursor_t *cursor, ucontext_t *uc) +unw_init_local2 (unw_cursor_t *cursor, ucontext_t *uc, int flag) { - return unw_init_local_common(cursor, uc, 0); + if (!flag) + { + return unw_init_local_common(cursor, uc, 1); + } + else if (flag == UNW_INIT_SIGNAL_FRAME) + { + return unw_init_local_common(cursor, uc, 0); + } + else + { + return -UNW_EINVAL; + } } #endif /* !UNW_REMOTE_ONLY */ diff --git a/contrib/libunwind/src/ia64/Gapply_reg_state.c b/contrib/libunwind/src/ia64/Gapply_reg_state.c index eec93046f56..a049b57ecae 100644 --- a/contrib/libunwind/src/ia64/Gapply_reg_state.c +++ b/contrib/libunwind/src/ia64/Gapply_reg_state.c @@ -33,5 +33,7 @@ unw_apply_reg_state (unw_cursor_t *cursor, { struct cursor *c = (struct cursor *) cursor; - return dwarf_apply_reg_state (&c->dwarf, (dwarf_reg_state_t *)reg_states_data); + // Needs dwarf support on ia64 + // return dwarf_apply_reg_state (&c->dwarf, (dwarf_reg_state_t *)reg_states_data); + return -UNW_EINVAL; } diff --git a/contrib/libunwind/src/ia64/Ginit.c b/contrib/libunwind/src/ia64/Ginit.c index 7b64f0c1e1e..395450ec9ab 100644 --- a/contrib/libunwind/src/ia64/Ginit.c +++ b/contrib/libunwind/src/ia64/Ginit.c @@ -361,7 +361,7 @@ ia64_local_addr_space_init (void) #elif defined(__hpux) local_addr_space.abi = ABI_HPUX; #endif - local_addr_space.caching_policy = UNW_CACHE_GLOBAL; + local_addr_space.caching_policy = UNWI_DEFAULT_CACHING_POLICY; local_addr_space.acc.find_proc_info = tdep_find_proc_info; local_addr_space.acc.put_unwind_info = put_unwind_info; local_addr_space.acc.get_dyn_info_list_addr = get_dyn_info_list_addr; diff --git a/contrib/libunwind/src/ia64/Greg_states_iterate.c b/contrib/libunwind/src/ia64/Greg_states_iterate.c index a39837a1781..49908b2cab2 100644 --- a/contrib/libunwind/src/ia64/Greg_states_iterate.c +++ b/contrib/libunwind/src/ia64/Greg_states_iterate.c @@ -33,5 +33,7 @@ unw_reg_states_iterate (unw_cursor_t *cursor, { struct cursor *c = (struct cursor *) cursor; - return dwarf_reg_states_iterate (&c->dwarf, cb, token); + // Needs dwarf support on ia64 + // return dwarf_reg_states_iterate (&c->dwarf, cb, token); + return -UNW_EINVAL; } diff --git a/contrib/libunwind/src/ia64/Gscript.c b/contrib/libunwind/src/ia64/Gscript.c index e96e89e0e83..526aeaf299e 100644 --- a/contrib/libunwind/src/ia64/Gscript.c +++ b/contrib/libunwind/src/ia64/Gscript.c @@ -45,7 +45,7 @@ enum ia64_script_insn_opcode IA64_INSN_MOVE_SCRATCH_NO_NAT /* like above, but clear NaT info */ }; -#ifdef HAVE___THREAD +#if defined(HAVE___THREAD) && HAVE___THREAD static __thread struct ia64_script_cache ia64_per_thread_cache = { #ifdef HAVE_ATOMIC_OPS_H @@ -105,7 +105,7 @@ get_script_cache (unw_addr_space_t as, intrmask_t *saved_maskp) if (!spin_trylock_irqsave (&cache->busy, *saved_maskp)) return NULL; #else -# ifdef HAVE___THREAD +# if defined(HAVE___THREAD) && HAVE___THREAD if (as->caching_policy == UNW_CACHE_PER_THREAD) cache = &ia64_per_thread_cache; # endif diff --git a/contrib/libunwind/src/libunwind-generic.pc b/contrib/libunwind/src/libunwind-generic.pc deleted file mode 100644 index 714fcd75e90..00000000000 --- a/contrib/libunwind/src/libunwind-generic.pc +++ /dev/null @@ -1,11 +0,0 @@ -prefix=/usr/local -exec_prefix=${prefix} -libdir=${exec_prefix}/lib -includedir=${prefix}/include - -Name: libunwind-generic -Description: libunwind generic library -Version: 1.2 -Requires: libunwind -Libs: -L${libdir} -lunwind-generic -Cflags: -I${includedir} diff --git a/contrib/libunwind/src/mi/Gdyn-extract.c b/contrib/libunwind/src/mi/Gdyn-extract.c index c8ae7a03dcb..5f7682e650d 100644 --- a/contrib/libunwind/src/mi/Gdyn-extract.c +++ b/contrib/libunwind/src/mi/Gdyn-extract.c @@ -49,6 +49,7 @@ unwi_extract_dynamic_proc_info (unw_addr_space_t as, unw_word_t ip, case UNW_INFO_FORMAT_TABLE: case UNW_INFO_FORMAT_REMOTE_TABLE: + case UNW_INFO_FORMAT_ARM_EXIDX: case UNW_INFO_FORMAT_IP_OFFSET: #ifdef tdep_search_unwind_table /* call platform-specific search routine: */ diff --git a/contrib/libunwind/src/mi/Gget_proc_name.c b/contrib/libunwind/src/mi/Gget_proc_name.c index 5376f82cc76..41ed9394da4 100644 --- a/contrib/libunwind/src/mi/Gget_proc_name.c +++ b/contrib/libunwind/src/mi/Gget_proc_name.c @@ -104,11 +104,15 @@ unw_get_proc_name (unw_cursor_t *cursor, char *buf, size_t buf_len, int error; ip = tdep_get_ip (c); +#if !defined(__ia64__) if (c->dwarf.use_prev_instr) --ip; +#endif error = get_proc_name (tdep_get_as (c), ip, buf, buf_len, offp, tdep_get_as_arg (c)); +#if !defined(__ia64__) if (c->dwarf.use_prev_instr && offp != NULL && error == 0) *offp += 1; +#endif return error; } diff --git a/contrib/libunwind/src/mi/Gset_cache_size.c b/contrib/libunwind/src/mi/Gset_cache_size.c index 2f06deb3b14..a0d8b5bf13c 100644 --- a/contrib/libunwind/src/mi/Gset_cache_size.c +++ b/contrib/libunwind/src/mi/Gset_cache_size.c @@ -38,6 +38,12 @@ unw_set_cache_size (unw_addr_space_t as, size_t size, int flag) if (flag != 0) return -1; + /* Currently not supported for per-thread cache due to memory leak */ + /* A pthread-key destructor would work, but is not signal safe */ +#if defined(HAVE___THREAD) && HAVE___THREAD + return -1; +#endif + /* Round up to next power of two, slowly but portably */ while(power < size) { @@ -48,10 +54,12 @@ unw_set_cache_size (unw_addr_space_t as, size_t size, int flag) break; } +#if !defined(__ia64__) if (log_size == as->global_cache.log_size) return 0; /* no change */ as->global_cache.log_size = log_size; +#endif /* Ensure caches are empty (and initialized). */ unw_flush_cache (as, 0, 0); diff --git a/contrib/libunwind/src/mi/Gset_caching_policy.c b/contrib/libunwind/src/mi/Gset_caching_policy.c index 45ba1001323..9df9eb82e5c 100644 --- a/contrib/libunwind/src/mi/Gset_caching_policy.c +++ b/contrib/libunwind/src/mi/Gset_caching_policy.c @@ -31,7 +31,7 @@ unw_set_caching_policy (unw_addr_space_t as, unw_caching_policy_t policy) if (!tdep_init_done) tdep_init (); -#ifndef HAVE___THREAD +#if !(defined(HAVE___THREAD) && HAVE___THREAD) if (policy == UNW_CACHE_PER_THREAD) policy = UNW_CACHE_GLOBAL; #endif diff --git a/contrib/libunwind/src/mips/Ginit.c b/contrib/libunwind/src/mips/Ginit.c index 83b100fb8ec..077a386c6e8 100644 --- a/contrib/libunwind/src/mips/Ginit.c +++ b/contrib/libunwind/src/mips/Ginit.c @@ -195,7 +195,7 @@ mips_local_addr_space_init (void) # error Unsupported ABI #endif local_addr_space.addr_size = sizeof (void *); - local_addr_space.caching_policy = UNW_CACHE_GLOBAL; + local_addr_space.caching_policy = UNWI_DEFAULT_CACHING_POLICY; local_addr_space.acc.find_proc_info = dwarf_find_proc_info; local_addr_space.acc.put_unwind_info = put_unwind_info; local_addr_space.acc.get_dyn_info_list_addr = get_dyn_info_list_addr; diff --git a/contrib/libunwind/src/mips/Ginit_local.c b/contrib/libunwind/src/mips/Ginit_local.c index d24e9ea5150..a29b6d09b7a 100644 --- a/contrib/libunwind/src/mips/Ginit_local.c +++ b/contrib/libunwind/src/mips/Ginit_local.c @@ -57,9 +57,20 @@ unw_init_local(unw_cursor_t *cursor, ucontext_t *uc) } PROTECTED int -unw_init_local_signal(unw_cursor_t *cursor, ucontext_t *uc) +unw_init_local2 (unw_cursor_t *cursor, ucontext_t *uc, int flag) { - return unw_init_local_common(cursor, uc, 0); + if (!flag) + { + return unw_init_local_common(cursor, uc, 1); + } + else if (flag == UNW_INIT_SIGNAL_FRAME) + { + return unw_init_local_common(cursor, uc, 0); + } + else + { + return -UNW_EINVAL; + } } #endif /* !UNW_REMOTE_ONLY */ diff --git a/contrib/libunwind/src/mips/Gregs.c b/contrib/libunwind/src/mips/Gregs.c index 269777673f8..95194022d2b 100644 --- a/contrib/libunwind/src/mips/Gregs.c +++ b/contrib/libunwind/src/mips/Gregs.c @@ -70,6 +70,8 @@ tdep_access_reg (struct cursor *c, unw_regnum_t reg, unw_word_t *valp, break; case UNW_MIPS_PC: + if (write) + c->dwarf.ip = *valp; /* update the IP cache */ loc = c->dwarf.loc[reg]; break; diff --git a/contrib/libunwind/src/os-freebsd.c b/contrib/libunwind/src/os-freebsd.c index a96877d9bb3..2e59731e5d2 100644 --- a/contrib/libunwind/src/os-freebsd.c +++ b/contrib/libunwind/src/os-freebsd.c @@ -56,7 +56,7 @@ get_pid_by_tid(int tid) size_t len, len1; char *buf; struct kinfo_proc *kv; - int i, pid; + unsigned i, pid; len = 0; mib[0] = CTL_KERN; diff --git a/contrib/libunwind/src/ppc/Ginit_local.c b/contrib/libunwind/src/ppc/Ginit_local.c index 6c83b3b78a0..a05d4c8aaaa 100644 --- a/contrib/libunwind/src/ppc/Ginit_local.c +++ b/contrib/libunwind/src/ppc/Ginit_local.c @@ -69,9 +69,20 @@ unw_init_local(unw_cursor_t *cursor, ucontext_t *uc) } PROTECTED int -unw_init_local_signal(unw_cursor_t *cursor, ucontext_t *uc) +unw_init_local2 (unw_cursor_t *cursor, ucontext_t *uc, int flag) { - return unw_init_local_common(cursor, uc, 0); + if (!flag) + { + return unw_init_local_common(cursor, uc, 1); + } + else if (flag == UNW_INIT_SIGNAL_FRAME) + { + return unw_init_local_common(cursor, uc, 0); + } + else + { + return -UNW_EINVAL; + } } #endif /* !UNW_REMOTE_ONLY */ diff --git a/contrib/libunwind/src/ppc32/Gapply_reg_state.c b/contrib/libunwind/src/ppc32/Gapply_reg_state.c new file mode 100644 index 00000000000..eec93046f56 --- /dev/null +++ b/contrib/libunwind/src/ppc32/Gapply_reg_state.c @@ -0,0 +1,37 @@ +/* libunwind - a platform-independent unwind library + Copyright (c) 2002-2003 Hewlett-Packard Development Company, L.P. + Contributed by David Mosberger-Tang + + Modified for x86_64 by Max Asbock + +This file is part of libunwind. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "unwind_i.h" + +PROTECTED int +unw_apply_reg_state (unw_cursor_t *cursor, + void *reg_states_data) +{ + struct cursor *c = (struct cursor *) cursor; + + return dwarf_apply_reg_state (&c->dwarf, (dwarf_reg_state_t *)reg_states_data); +} diff --git a/contrib/libunwind/src/ppc32/Ginit.c b/contrib/libunwind/src/ppc32/Ginit.c index f2e6e823679..dc599b9d0e4 100644 --- a/contrib/libunwind/src/ppc32/Ginit.c +++ b/contrib/libunwind/src/ppc32/Ginit.c @@ -201,7 +201,7 @@ HIDDEN void ppc32_local_addr_space_init (void) { memset (&local_addr_space, 0, sizeof (local_addr_space)); - local_addr_space.caching_policy = UNW_CACHE_GLOBAL; + local_addr_space.caching_policy = UNWI_DEFAULT_CACHING_POLICY; local_addr_space.acc.find_proc_info = dwarf_find_proc_info; local_addr_space.acc.put_unwind_info = put_unwind_info; local_addr_space.acc.get_dyn_info_list_addr = get_dyn_info_list_addr; diff --git a/contrib/libunwind/src/dwarf/Gstep.c b/contrib/libunwind/src/ppc32/Greg_states_iterate.c similarity index 72% rename from contrib/libunwind/src/dwarf/Gstep.c rename to contrib/libunwind/src/ppc32/Greg_states_iterate.c index 59138e6f783..a39837a1781 100644 --- a/contrib/libunwind/src/dwarf/Gstep.c +++ b/contrib/libunwind/src/ppc32/Greg_states_iterate.c @@ -1,6 +1,8 @@ /* libunwind - a platform-independent unwind library - Copyright (c) 2003-2005 Hewlett-Packard Development Company, L.P. - Contributed by David Mosberger-Tang + Copyright (c) 2002-2003 Hewlett-Packard Development Company, L.P. + Contributed by David Mosberger-Tang + + Modified for x86_64 by Max Asbock This file is part of libunwind. @@ -23,19 +25,13 @@ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "dwarf.h" -#include "libunwind_i.h" +#include "unwind_i.h" -HIDDEN int -dwarf_step (struct dwarf_cursor *c) +PROTECTED int +unw_reg_states_iterate (unw_cursor_t *cursor, + unw_reg_states_callback cb, void *token) { - int ret; + struct cursor *c = (struct cursor *) cursor; - if ((ret = dwarf_find_save_locs (c)) >= 0) { - c->pi_valid = 0; - ret = (c->ip == 0) ? 0 : 1; - } - - Debug (15, "returning %d\n", ret); - return ret; + return dwarf_reg_states_iterate (&c->dwarf, cb, token); } diff --git a/contrib/libunwind/src/ppc32/Lapply_reg_state.c b/contrib/libunwind/src/ppc32/Lapply_reg_state.c new file mode 100644 index 00000000000..7ebada480e5 --- /dev/null +++ b/contrib/libunwind/src/ppc32/Lapply_reg_state.c @@ -0,0 +1,5 @@ +#define UNW_LOCAL_ONLY +#include +#if defined(UNW_LOCAL_ONLY) && !defined(UNW_REMOTE_ONLY) +#include "Gapply_reg_state.c" +#endif diff --git a/contrib/libunwind/src/ppc32/Lreg_states_iterate.c b/contrib/libunwind/src/ppc32/Lreg_states_iterate.c new file mode 100644 index 00000000000..f1eb1e79dcd --- /dev/null +++ b/contrib/libunwind/src/ppc32/Lreg_states_iterate.c @@ -0,0 +1,5 @@ +#define UNW_LOCAL_ONLY +#include +#if defined(UNW_LOCAL_ONLY) && !defined(UNW_REMOTE_ONLY) +#include "Greg_states_iterate.c" +#endif diff --git a/contrib/libunwind/src/ppc64/Gapply_reg_state.c b/contrib/libunwind/src/ppc64/Gapply_reg_state.c new file mode 100644 index 00000000000..eec93046f56 --- /dev/null +++ b/contrib/libunwind/src/ppc64/Gapply_reg_state.c @@ -0,0 +1,37 @@ +/* libunwind - a platform-independent unwind library + Copyright (c) 2002-2003 Hewlett-Packard Development Company, L.P. + Contributed by David Mosberger-Tang + + Modified for x86_64 by Max Asbock + +This file is part of libunwind. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "unwind_i.h" + +PROTECTED int +unw_apply_reg_state (unw_cursor_t *cursor, + void *reg_states_data) +{ + struct cursor *c = (struct cursor *) cursor; + + return dwarf_apply_reg_state (&c->dwarf, (dwarf_reg_state_t *)reg_states_data); +} diff --git a/contrib/libunwind/src/ppc64/Ginit.c b/contrib/libunwind/src/ppc64/Ginit.c index 3211cf4df17..287ecf4923d 100644 --- a/contrib/libunwind/src/ppc64/Ginit.c +++ b/contrib/libunwind/src/ppc64/Ginit.c @@ -214,7 +214,7 @@ ppc64_local_addr_space_init (void) #else local_addr_space.abi = UNW_PPC64_ABI_ELFv1; #endif - local_addr_space.caching_policy = UNW_CACHE_GLOBAL; + local_addr_space.caching_policy = UNWI_DEFAULT_CACHING_POLICY; local_addr_space.acc.find_proc_info = dwarf_find_proc_info; local_addr_space.acc.put_unwind_info = put_unwind_info; local_addr_space.acc.get_dyn_info_list_addr = get_dyn_info_list_addr; diff --git a/contrib/libunwind/src/ppc64/Greg_states_iterate.c b/contrib/libunwind/src/ppc64/Greg_states_iterate.c new file mode 100644 index 00000000000..a39837a1781 --- /dev/null +++ b/contrib/libunwind/src/ppc64/Greg_states_iterate.c @@ -0,0 +1,37 @@ +/* libunwind - a platform-independent unwind library + Copyright (c) 2002-2003 Hewlett-Packard Development Company, L.P. + Contributed by David Mosberger-Tang + + Modified for x86_64 by Max Asbock + +This file is part of libunwind. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "unwind_i.h" + +PROTECTED int +unw_reg_states_iterate (unw_cursor_t *cursor, + unw_reg_states_callback cb, void *token) +{ + struct cursor *c = (struct cursor *) cursor; + + return dwarf_reg_states_iterate (&c->dwarf, cb, token); +} diff --git a/contrib/libunwind/src/ppc64/Lapply_reg_state.c b/contrib/libunwind/src/ppc64/Lapply_reg_state.c new file mode 100644 index 00000000000..7ebada480e5 --- /dev/null +++ b/contrib/libunwind/src/ppc64/Lapply_reg_state.c @@ -0,0 +1,5 @@ +#define UNW_LOCAL_ONLY +#include +#if defined(UNW_LOCAL_ONLY) && !defined(UNW_REMOTE_ONLY) +#include "Gapply_reg_state.c" +#endif diff --git a/contrib/libunwind/src/ppc64/Lreg_states_iterate.c b/contrib/libunwind/src/ppc64/Lreg_states_iterate.c new file mode 100644 index 00000000000..f1eb1e79dcd --- /dev/null +++ b/contrib/libunwind/src/ppc64/Lreg_states_iterate.c @@ -0,0 +1,5 @@ +#define UNW_LOCAL_ONLY +#include +#if defined(UNW_LOCAL_ONLY) && !defined(UNW_REMOTE_ONLY) +#include "Greg_states_iterate.c" +#endif diff --git a/contrib/libunwind/src/ptrace/_UPT_access_fpreg.c b/contrib/libunwind/src/ptrace/_UPT_access_fpreg.c index e90ec47d081..2b92462fa92 100644 --- a/contrib/libunwind/src/ptrace/_UPT_access_fpreg.c +++ b/contrib/libunwind/src/ptrace/_UPT_access_fpreg.c @@ -75,6 +75,18 @@ _UPT_access_fpreg (unw_addr_space_t as, unw_regnum_t reg, unw_fpreg_t *val, pid_t pid = ui->pid; fpregset_t fpreg; +#if defined(__amd64__) + if (1) /* XXXKIB */ + return -UNW_EBADREG; +#elif defined(__i386__) + if ((unsigned) reg < UNW_X86_ST0 || (unsigned) reg > UNW_X86_ST7) + return -UNW_EBADREG; +#elif defined(__arm__) + if ((unsigned) reg < UNW_ARM_F0 || (unsigned) reg > UNW_ARM_F7) + return -UNW_EBADREG; +#else +#error Fix me +#endif if ((unsigned) reg >= ARRAY_SIZE (_UPT_reg_offset)) return -UNW_EBADREG; @@ -85,6 +97,8 @@ _UPT_access_fpreg (unw_addr_space_t as, unw_regnum_t reg, unw_fpreg_t *val, memcpy(&fpreg.fpr_xacc[reg], val, sizeof(unw_fpreg_t)); #elif defined(__i386__) memcpy(&fpreg.fpr_acc[reg], val, sizeof(unw_fpreg_t)); +#elif defined(__arm__) + memcpy(&fpreg.fpr[reg], val, sizeof(unw_fpreg_t)); #else #error Fix me #endif @@ -95,6 +109,8 @@ _UPT_access_fpreg (unw_addr_space_t as, unw_regnum_t reg, unw_fpreg_t *val, memcpy(val, &fpreg.fpr_xacc[reg], sizeof(unw_fpreg_t)); #elif defined(__i386__) memcpy(val, &fpreg.fpr_acc[reg], sizeof(unw_fpreg_t)); +#elif defined(__arm__) + memcpy(val, &fpreg.fpr[reg], sizeof(unw_fpreg_t)); #else #error Fix me #endif diff --git a/contrib/libunwind/src/ptrace/_UPT_access_reg.c b/contrib/libunwind/src/ptrace/_UPT_access_reg.c index ae71608b3df..ce25c783b04 100644 --- a/contrib/libunwind/src/ptrace/_UPT_access_reg.c +++ b/contrib/libunwind/src/ptrace/_UPT_access_reg.c @@ -34,7 +34,50 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ # include "tdep-ia64/rse.h" #endif -#if HAVE_DECL_PTRACE_POKEUSER || HAVE_TTRACE +#if HAVE_DECL_PTRACE_SETREGSET +#include +int +_UPT_access_reg (unw_addr_space_t as, unw_regnum_t reg, unw_word_t *val, + int write, void *arg) +{ + struct UPT_info *ui = arg; + pid_t pid = ui->pid; + gregset_t regs; + char *r; + struct iovec loc; + +#if UNW_DEBUG + Debug(16, "using getregset: reg: %s [%u], val: %lx, write: %u\n", + unw_regname(reg), (unsigned) reg, (long) val, write); + + if (write) + Debug (16, "%s [%u] <- %lx\n", unw_regname (reg), (unsigned) reg, (long) *val); +#endif + if ((unsigned) reg >= ARRAY_SIZE (_UPT_reg_offset)) + { + errno = EINVAL; + goto badreg; + } + + loc.iov_base = ®s; + loc.iov_len = sizeof(regs); + + r = (char *)®s + _UPT_reg_offset[reg]; + if (ptrace (PTRACE_GETREGSET, pid, NT_PRSTATUS, &loc) == -1) + goto badreg; + if (write) { + memcpy(r, val, sizeof(unw_word_t)); + if (ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &loc) == -1) + goto badreg; + } else + memcpy(val, r, sizeof(unw_word_t)); + return 0; + +badreg: + Debug (1, "bad register %s [%u] (error: %s)\n", unw_regname(reg), reg, strerror (errno)); + return -UNW_EBADREG; +} +#elif HAVE_DECL_PTRACE_POKEUSER || HAVE_TTRACE int _UPT_access_reg (unw_addr_space_t as, unw_regnum_t reg, unw_word_t *val, int write, void *arg) diff --git a/contrib/libunwind/src/ptrace/_UPT_reg_offset.c b/contrib/libunwind/src/ptrace/_UPT_reg_offset.c index 033594db0aa..c82d1c98872 100644 --- a/contrib/libunwind/src/ptrace/_UPT_reg_offset.c +++ b/contrib/libunwind/src/ptrace/_UPT_reg_offset.c @@ -484,6 +484,7 @@ const int _UPT_reg_offset[UNW_REG_LAST + 1] = #endif #elif defined(UNW_TARGET_ARM) +#if defined(__linux__) || defined(__FreeBSD__) [UNW_ARM_R0] = 0x00, [UNW_ARM_R1] = 0x04, [UNW_ARM_R2] = 0x08, @@ -500,6 +501,9 @@ const int _UPT_reg_offset[UNW_REG_LAST + 1] = [UNW_ARM_R13] = 0x34, [UNW_ARM_R14] = 0x38, [UNW_ARM_R15] = 0x3c, +#else +#error Fix me +#endif #elif defined(UNW_TARGET_MIPS) [UNW_MIPS_R0] = 0, [UNW_MIPS_R1] = 1, diff --git a/contrib/libunwind/src/ptrace/libunwind-ptrace.pc b/contrib/libunwind/src/ptrace/libunwind-ptrace.pc deleted file mode 100644 index df77448cf87..00000000000 --- a/contrib/libunwind/src/ptrace/libunwind-ptrace.pc +++ /dev/null @@ -1,11 +0,0 @@ -prefix=/usr/local -exec_prefix=${prefix} -libdir=${exec_prefix}/lib -includedir=${prefix}/include - -Name: libunwind-ptrace -Description: libunwind ptrace library -Version: 1.2 -Requires: libunwind-generic libunwind -Libs: -L${libdir} -lunwind-ptrace -Cflags: -I${includedir} diff --git a/contrib/libunwind/src/setjmp/libunwind-setjmp.pc b/contrib/libunwind/src/setjmp/libunwind-setjmp.pc deleted file mode 100644 index 680251db27a..00000000000 --- a/contrib/libunwind/src/setjmp/libunwind-setjmp.pc +++ /dev/null @@ -1,11 +0,0 @@ -prefix=/usr/local -exec_prefix=${prefix} -libdir=${exec_prefix}/lib -includedir=${prefix}/include - -Name: libunwind-setjmp -Description: libunwind setjmp library -Version: 1.2 -Requires: libunwind -Libs: -L${libdir} -lunwind-setjmp -Cflags: -I${includedir} diff --git a/contrib/libunwind/src/sh/Ginit.c b/contrib/libunwind/src/sh/Ginit.c index b380db1da62..0bfac4907ee 100644 --- a/contrib/libunwind/src/sh/Ginit.c +++ b/contrib/libunwind/src/sh/Ginit.c @@ -171,7 +171,7 @@ HIDDEN void sh_local_addr_space_init (void) { memset (&local_addr_space, 0, sizeof (local_addr_space)); - local_addr_space.caching_policy = UNW_CACHE_GLOBAL; + local_addr_space.caching_policy = UNWI_DEFAULT_CACHING_POLICY; local_addr_space.acc.find_proc_info = dwarf_find_proc_info; local_addr_space.acc.put_unwind_info = put_unwind_info; local_addr_space.acc.get_dyn_info_list_addr = get_dyn_info_list_addr; diff --git a/contrib/libunwind/src/sh/Ginit_local.c b/contrib/libunwind/src/sh/Ginit_local.c index 598f708a36f..36d1329b6ff 100644 --- a/contrib/libunwind/src/sh/Ginit_local.c +++ b/contrib/libunwind/src/sh/Ginit_local.c @@ -59,9 +59,20 @@ unw_init_local (unw_cursor_t *cursor, unw_context_t *uc) } PROTECTED int -unw_init_local_signal (unw_cursor_t *cursor, unw_context_t *uc) +unw_init_local2 (unw_cursor_t *cursor, ucontext_t *uc, int flag) { - return unw_init_local_common(cursor, uc, 0); + if (!flag) + { + return unw_init_local_common(cursor, uc, 1); + } + else if (flag == UNW_INIT_SIGNAL_FRAME) + { + return unw_init_local_common(cursor, uc, 0); + } + else + { + return -UNW_EINVAL; + } } #endif /* !UNW_REMOTE_ONLY */ diff --git a/contrib/libunwind/src/sh/Gregs.c b/contrib/libunwind/src/sh/Gregs.c index fb4ca740037..7d8e8e93da0 100644 --- a/contrib/libunwind/src/sh/Gregs.c +++ b/contrib/libunwind/src/sh/Gregs.c @@ -33,6 +33,9 @@ tdep_access_reg (struct cursor *c, unw_regnum_t reg, unw_word_t *valp, switch (reg) { + case UNW_SH_PC: + if (write) + c->dwarf.ip = *valp; /* update the IP cache */ case UNW_SH_R0: case UNW_SH_R1: case UNW_SH_R2: @@ -48,7 +51,6 @@ tdep_access_reg (struct cursor *c, unw_regnum_t reg, unw_word_t *valp, case UNW_SH_R12: case UNW_SH_R13: case UNW_SH_R14: - case UNW_SH_PC: case UNW_SH_PR: loc = c->dwarf.loc[reg]; break; diff --git a/contrib/libunwind/src/tilegx/Ginit.c b/contrib/libunwind/src/tilegx/Ginit.c index df3ffcaa643..a0bb69d7719 100644 --- a/contrib/libunwind/src/tilegx/Ginit.c +++ b/contrib/libunwind/src/tilegx/Ginit.c @@ -152,7 +152,7 @@ tilegx_local_addr_space_init (void) local_addr_space.abi = UNW_TILEGX_ABI_N64; local_addr_space.addr_size = sizeof (void *); - local_addr_space.caching_policy = UNW_CACHE_GLOBAL; + local_addr_space.caching_policy = UNWI_DEFAULT_CACHING_POLICY; local_addr_space.acc.find_proc_info = dwarf_find_proc_info; local_addr_space.acc.put_unwind_info = put_unwind_info; local_addr_space.acc.get_dyn_info_list_addr = get_dyn_info_list_addr; diff --git a/contrib/libunwind/src/tilegx/Ginit_local.c b/contrib/libunwind/src/tilegx/Ginit_local.c index 800dc00fb8c..6aa679f05df 100644 --- a/contrib/libunwind/src/tilegx/Ginit_local.c +++ b/contrib/libunwind/src/tilegx/Ginit_local.c @@ -61,9 +61,20 @@ unw_init_local (unw_cursor_t *cursor, ucontext_t *uc) } PROTECTED int -unw_init_local_signal (unw_cursor_t *cursor, ucontext_t *uc) +unw_init_local2 (unw_cursor_t *cursor, ucontext_t *uc, int flag) { - return unw_init_local_common(cursor, uc, 0); + if (!flag) + { + return unw_init_local_common(cursor, uc, 1); + } + else if (flag == UNW_INIT_SIGNAL_FRAME) + { + return unw_init_local_common(cursor, uc, 0); + } + else + { + return -UNW_EINVAL; + } } #endif /* !UNW_REMOTE_ONLY */ diff --git a/contrib/libunwind/src/tilegx/Gregs.c b/contrib/libunwind/src/tilegx/Gregs.c index 53e7bf4ce80..565c6f4432a 100644 --- a/contrib/libunwind/src/tilegx/Gregs.c +++ b/contrib/libunwind/src/tilegx/Gregs.c @@ -52,7 +52,17 @@ tdep_access_reg (struct cursor *c, unw_regnum_t reg, unw_word_t *valp, } if (write) - return dwarf_put (&c->dwarf, loc, *valp); + { + if (ci->dwarf.use_prev_instr == 0) { + if (reg == UNW_TILEGX_PC) + c->dwarf.ip = *valp; /* update the IP cache */ + } + else { + if (reg == UNW_TILEGX_R55) + c->dwarf.ip = *valp; /* update the IP cache */ + } + return dwarf_put (&c->dwarf, loc, *valp); + } else return dwarf_get (&c->dwarf, loc, valp); } diff --git a/contrib/libunwind/src/unwind/libunwind.pc b/contrib/libunwind/src/unwind/libunwind.pc deleted file mode 100644 index 987d55c22b3..00000000000 --- a/contrib/libunwind/src/unwind/libunwind.pc +++ /dev/null @@ -1,11 +0,0 @@ -prefix=/usr/local -exec_prefix=${prefix} -libdir=${exec_prefix}/lib -includedir=${prefix}/include - -Name: libunwind -Description: libunwind base library -Version: 1.2 -Libs: -L${libdir} -lunwind -Libs.private: -llzma -Cflags: -I${includedir} diff --git a/contrib/libunwind/src/x86/Ginit.c b/contrib/libunwind/src/x86/Ginit.c index b05a08edba3..876990fe1cd 100644 --- a/contrib/libunwind/src/x86/Ginit.c +++ b/contrib/libunwind/src/x86/Ginit.c @@ -228,7 +228,7 @@ HIDDEN void x86_local_addr_space_init (void) { memset (&local_addr_space, 0, sizeof (local_addr_space)); - local_addr_space.caching_policy = UNW_CACHE_GLOBAL; + local_addr_space.caching_policy = UNWI_DEFAULT_CACHING_POLICY; local_addr_space.acc.find_proc_info = dwarf_find_proc_info; local_addr_space.acc.put_unwind_info = put_unwind_info; local_addr_space.acc.get_dyn_info_list_addr = get_dyn_info_list_addr; diff --git a/contrib/libunwind/src/x86/Ginit_local.c b/contrib/libunwind/src/x86/Ginit_local.c index 025c84cb9d2..88c52de3848 100644 --- a/contrib/libunwind/src/x86/Ginit_local.c +++ b/contrib/libunwind/src/x86/Ginit_local.c @@ -60,9 +60,20 @@ unw_init_local (unw_cursor_t *cursor, ucontext_t *uc) } PROTECTED int -unw_init_local_signal (unw_cursor_t *cursor, ucontext_t *uc) +unw_init_local2 (unw_cursor_t *cursor, ucontext_t *uc, int flag) { - return unw_init_local_common(cursor, uc, 0); + if (!flag) + { + return unw_init_local_common(cursor, uc, 1); + } + else if (flag == UNW_INIT_SIGNAL_FRAME) + { + return unw_init_local_common(cursor, uc, 0); + } + else + { + return -UNW_EINVAL; + } } #endif /* !UNW_REMOTE_ONLY */ diff --git a/contrib/libunwind/src/x86/Gos-linux.c b/contrib/libunwind/src/x86/Gos-linux.c index 17aebc2974a..37a22b97b2f 100644 --- a/contrib/libunwind/src/x86/Gos-linux.c +++ b/contrib/libunwind/src/x86/Gos-linux.c @@ -52,7 +52,7 @@ unw_is_signal_frame (unw_cursor_t *cursor) __restore_rt: 0xb8 0xad 0x00 0x00 0x00 movl 0xad,%eax 0xcd 0x80 int 0x80 - 0x00 + 0x00 if SA_SIGINFO is specified. */ @@ -296,7 +296,7 @@ x86_local_resume (unw_addr_space_t as, unw_cursor_t *cursor, void *arg) struct sigcontext *sc = (struct sigcontext *) c->sigcontext_addr; Debug (8, "resuming at ip=%x via sigreturn(%p)\n", c->dwarf.ip, sc); - sigreturn (sc); + x86_sigreturn (sc); } else { @@ -305,4 +305,25 @@ x86_local_resume (unw_addr_space_t as, unw_cursor_t *cursor, void *arg) } return -UNW_EINVAL; } + +/* sigreturn() is a no-op on x86 glibc. */ +HIDDEN void +x86_sigreturn (unw_cursor_t *cursor) +{ + struct cursor *c = (struct cursor *) cursor; + struct sigcontext *sc = (struct sigcontext *) c->sigcontext_addr; + mcontext_t *sc_mcontext = &((struct ucontext*)sc)->uc_mcontext; + /* Copy in saved uc - all preserved regs are at the start of sigcontext */ + memcpy(sc_mcontext, &c->uc->uc_mcontext, + DWARF_NUM_PRESERVED_REGS * sizeof(unw_word_t)); + + Debug (8, "resuming at ip=%llx via sigreturn(%p)\n", + (unsigned long long) c->dwarf.ip, sc); + __asm__ __volatile__ ("mov %0, %%esp;" + "mov %1, %%eax;" + "syscall" + :: "r"(sc), "i"(SYS_rt_sigreturn) + : "memory"); + abort(); +} #endif diff --git a/contrib/libunwind/src/x86/unwind_i.h b/contrib/libunwind/src/x86/unwind_i.h index cd52824226a..d2aed609e57 100644 --- a/contrib/libunwind/src/x86/unwind_i.h +++ b/contrib/libunwind/src/x86/unwind_i.h @@ -52,6 +52,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define x86_scratch_loc UNW_OBJ(scratch_loc) #define x86_get_scratch_loc UNW_OBJ(get_scratch_loc) #define x86_r_uc_addr UNW_OBJ(r_uc_addr) +#define x86_sigreturn UNW_OBJ(sigreturn) extern void x86_local_addr_space_init (void); extern int x86_local_resume (unw_addr_space_t as, unw_cursor_t *cursor, @@ -60,4 +61,6 @@ extern dwarf_loc_t x86_scratch_loc (struct cursor *c, unw_regnum_t reg); extern dwarf_loc_t x86_get_scratch_loc (struct cursor *c, unw_regnum_t reg); extern void *x86_r_uc_addr (ucontext_t *uc, int reg); +extern void x86_sigreturn (unw_cursor_t *cursor); + #endif /* unwind_i_h */ diff --git a/contrib/libunwind/src/x86_64/Ginit.c b/contrib/libunwind/src/x86_64/Ginit.c index 782757622e5..c66d59ba685 100644 --- a/contrib/libunwind/src/x86_64/Ginit.c +++ b/contrib/libunwind/src/x86_64/Ginit.c @@ -72,10 +72,57 @@ get_dyn_info_list_addr (unw_addr_space_t as, unw_word_t *dyn_info_list_addr, #define PAGE_SIZE 4096 #define PAGE_START(a) ((a) & ~(PAGE_SIZE-1)) +static int mem_validate_pipe[2] = {-1, -1}; + +static inline void +open_pipe (void) +{ + /* ignore errors for closing invalid fd's */ + close (mem_validate_pipe[0]); + close (mem_validate_pipe[1]); + + pipe2 (mem_validate_pipe, O_CLOEXEC | O_NONBLOCK); +} + +ALWAYS_INLINE +static int +write_validate (void *addr) +{ + int ret = -1; + ssize_t bytes = 0; + + do + { + char buf; + bytes = read (mem_validate_pipe[0], &buf, 1); + } + while ( errno == EINTR ); + + int valid_read = (bytes > 0 || errno == EAGAIN || errno == EWOULDBLOCK); + if (!valid_read) + { + // re-open closed pipe + open_pipe (); + } + + do + { + ret = write (mem_validate_pipe[1], addr, 1); + } + while ( errno == EINTR ); + + return ret; +} + static int (*mem_validate_func) (void *addr, size_t len); static int msync_validate (void *addr, size_t len) { - return msync (addr, len, MS_ASYNC); + if (msync (addr, len, MS_ASYNC) != 0) + { + return -1; + } + + return write_validate (addr); } #ifdef HAVE_MINCORE @@ -96,7 +143,7 @@ static int mincore_validate (void *addr, size_t len) if (!(mvec[i] & 1)) return -1; } - return 0; + return write_validate (addr); } #endif @@ -107,6 +154,8 @@ static int mincore_validate (void *addr, size_t len) HIDDEN void tdep_init_mem_validate (void) { + open_pipe (); + #ifdef HAVE_MINCORE unsigned char present = 1; unw_word_t addr = PAGE_START((unw_word_t)&present); @@ -273,7 +322,7 @@ HIDDEN void x86_64_local_addr_space_init (void) { memset (&local_addr_space, 0, sizeof (local_addr_space)); - local_addr_space.caching_policy = UNW_CACHE_GLOBAL; + local_addr_space.caching_policy = UNWI_DEFAULT_CACHING_POLICY; local_addr_space.acc.find_proc_info = dwarf_find_proc_info; local_addr_space.acc.put_unwind_info = put_unwind_info; local_addr_space.acc.get_dyn_info_list_addr = get_dyn_info_list_addr; diff --git a/contrib/libunwind/src/x86_64/Ginit_local.c b/contrib/libunwind/src/x86_64/Ginit_local.c index 2d2b1754b7a..7696f11caf9 100644 --- a/contrib/libunwind/src/x86_64/Ginit_local.c +++ b/contrib/libunwind/src/x86_64/Ginit_local.c @@ -62,9 +62,20 @@ unw_init_local (unw_cursor_t *cursor, ucontext_t *uc) } PROTECTED int -unw_init_local_signal (unw_cursor_t *cursor, ucontext_t *uc) +unw_init_local2 (unw_cursor_t *cursor, ucontext_t *uc, int flag) { - return unw_init_local_common(cursor, uc, 0); + if (!flag) + { + return unw_init_local_common(cursor, uc, 1); + } + else if (flag == UNW_INIT_SIGNAL_FRAME) + { + return unw_init_local_common(cursor, uc, 0); + } + else + { + return -UNW_EINVAL; + } } #endif /* !UNW_REMOTE_ONLY */ diff --git a/contrib/libunwind/src/x86_64/Gos-linux.c b/contrib/libunwind/src/x86_64/Gos-linux.c index a4f80cad360..0a3c21221fb 100644 --- a/contrib/libunwind/src/x86_64/Gos-linux.c +++ b/contrib/libunwind/src/x86_64/Gos-linux.c @@ -138,7 +138,7 @@ x86_64_sigreturn (unw_cursor_t *cursor) { struct cursor *c = (struct cursor *) cursor; struct sigcontext *sc = (struct sigcontext *) c->sigcontext_addr; - mcontext_t *sc_mcontext = &((struct ucontext*)sc)->uc_mcontext; + mcontext_t *sc_mcontext = &((ucontext_t*)sc)->uc_mcontext; /* Copy in saved uc - all preserved regs are at the start of sigcontext */ memcpy(sc_mcontext, &c->uc->uc_mcontext, DWARF_NUM_PRESERVED_REGS * sizeof(unw_word_t)); diff --git a/contrib/libunwind/src/x86_64/init.h b/contrib/libunwind/src/x86_64/init.h index 3ceab791474..a7a996f1272 100644 --- a/contrib/libunwind/src/x86_64/init.h +++ b/contrib/libunwind/src/x86_64/init.h @@ -83,6 +83,7 @@ common_init (struct cursor *c, unsigned use_prev_instr) c->dwarf.pi_is_dynamic = 0; c->dwarf.hint = 0; c->dwarf.prev_rs = 0; + c->dwarf.eh_valid_mask = 0; return 0; } diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index 375f8dfa9cb..c25f8036f8b 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -192,7 +192,7 @@ size_t backtraceLibUnwind(void ** out_frames, size_t max_frames, ucontext_t & co unw_cursor_t cursor; - if (unw_init_local_signal(&cursor, &context) < 0) + if (unw_init_local2(&cursor, &context, UNW_INIT_SIGNAL_FRAME) < 0) return 0; size_t i = 0; From 5fdede6d8555e735e270447067ae8b16a2aca5e1 Mon Sep 17 00:00:00 2001 From: robot-metrika-test Date: Fri, 29 Sep 2017 01:08:47 +0300 Subject: [PATCH 30/63] Auto version update to [54294] --- dbms/cmake/version.cmake | 4 ++-- debian/changelog | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index 52b6e9f7b31..f8aceaeac86 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -1,6 +1,6 @@ # This strings autochanged from release_lib.sh: -set(VERSION_DESCRIBE v1.1.54292-testing) -set(VERSION_REVISION 54292) +set(VERSION_DESCRIBE v1.1.54294-testing) +set(VERSION_REVISION 54294) # end of autochange set (VERSION_MAJOR 1) diff --git a/debian/changelog b/debian/changelog index 72c8190bf58..1872c8a5e9b 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (1.1.54292) unstable; urgency=low +clickhouse (1.1.54294) unstable; urgency=low * Modified source code - -- Wed, 20 Sep 2017 21:05:46 +0300 + -- Fri, 29 Sep 2017 01:08:46 +0300 From 03bad49a0ef9e9d622b190965e1450763432cd36 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 29 Sep 2017 01:46:52 +0300 Subject: [PATCH 31/63] Added changelog for version 54292 [#CLICKHOUSE-3]. --- CHANGELOG_RU.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md index 4003edde1fd..97dce55a89e 100644 --- a/CHANGELOG_RU.md +++ b/CHANGELOG_RU.md @@ -1,3 +1,20 @@ +# Релиз ClickHouse 1.1.54292 + +## Новые возможности: +* Добавлена функция `pointInPolygon` для работы с координатами на плоскости. +* Добавлена агрегатная функция `sumMap`, обеспечивающая суммирование массивов аналогично `SummingMergeTree`. +* Добавлена функция `trunc`. Увеличена производительность функций округления `round`, `floor`, `ceil`, `roundToExp2`. Исправлена логика работы функций округления. Изменена логика работы функции `roundToExp2` для дробных и отрицательных чисел. +* Ослаблена зависимость исполняемого файла ClickHouse от версии libc. Один и тот же исполняемый файл ClickHouse может запускаться и работать на широком множестве Linux систем. Замечание: зависимость всё ещё присутствует при использовании скомпилированных запросов (настройка `compile = 1`, по-умолчанию не используется). +* Уменьшено время динамической компиляции запросов. + +## Исправления ошибок: +* Исправлена ошибка, которая могла приводить к сообщениям `part ... intersects previous part` и нарушению консистентности реплик. +* Исправлена ошибка, приводящая к блокировке при завершении работы сервера, если в это время ZooKeeper недоступен. +* Удалено избыточное логгирование при восстановлении реплик. +* Исправлена ошибка в реализации UNION ALL. +* Исправлена ошибка в функции concat, возникающая в случае, если первый столбец блока имеет тип Array. +* Исправлено отображение прогресса в таблице system.merges. + # Релиз ClickHouse 1.1.54289 ## Новые возможности: From 4e4355a210ac6394b7c84b3e48fa79d4be7b29c8 Mon Sep 17 00:00:00 2001 From: proller Date: Sat, 30 Sep 2017 14:04:56 +0300 Subject: [PATCH 32/63] Prevent infinite recursion in faultSignalHandler (#CLICKHOUSE-3304) (#1288) * Prevent infinite recursion in faultSignalHandler (#CLICKHOUSE-3304) 21 0x00000000012a0cbd in faultSignalHandler (sig=, info=0x7ffcacfd5730, context=0x7ffcacfd5600) at /home/robot-metrika-test/jenkins/workspace/clickhouse-packages-build@2/sources/libs/libdaemon/src/BaseDaemon.cpp:166 22 23 0x00007fc770f90c37 in raise () from /lib/x86_64-linux-gnu/libc.so.6 24 0x00007fc770f94028 in abort () from /lib/x86_64-linux-gnu/libc.so.6 25 0x00000000012a0d5f in terminate_handler () at /home/robot-metrika-test/jenkins/workspace/clickhouse-packages-build@2/sources/libs/libdaemon/src/BaseDaemon.cpp:404 26 0x00000000037c8836 in __cxxabiv1::__terminate(void (*)()) () 27 0x000000000382d379 in __cxa_call_terminate () 28 0x00000000037d475d in __gxx_personality_v0 () 29 0x00000000038331a3 in _Unwind_RaiseException_Phase2 () 30 0x00000000038339c7 in _Unwind_Resume () 31 0x00000000012a0cbd in faultSignalHandler (sig=, info=0x7ffcacfd6ef0, context=0x7ffcacfd6dc0) at /home/robot-metrika-test/jenkins/workspace/clickhouse-packages-build@2/sources/libs/libdaemon/src/BaseDaemon.cpp:166 32 33 0x00007fc770f90c37 in raise () from /lib/x86_64-linux-gnu/libc.so.6 34 0x00007fc770f94028 in abort () from /lib/x86_64-linux-gnu/libc.so.6 35 0x00000000012a0d5f in terminate_handler () at /home/robot-metrika-test/jenkins/workspace/clickhouse-packages-build@2/sources/libs/libdaemon/src/BaseDaemon.cpp:404 36 0x00000000037c8836 in __cxxabiv1::__terminate(void (*)()) () 37 0x000000000382d379 in __cxa_call_terminate () 38 0x00000000037d475d in __gxx_personality_v0 () 39 0x00000000038331a3 in _Unwind_RaiseException_Phase2 () 40 0x00000000038339c7 in _Unwind_Resume () 41 0x00000000012a0cbd in faultSignalHandler (sig=, info=0x7ffcacfd86b0, context=0x7ffcacfd8580) at /home/robot-metrika-test/jenkins/workspace/clickhouse-packages-build@2/sources/libs/libdaemon/src/BaseDaemon.cpp:166 * Requested changes --- libs/libdaemon/src/BaseDaemon.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index 375f8dfa9cb..533a4f9838b 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -161,10 +161,16 @@ static void terminateRequestedSignalHandler(int sig, siginfo_t * info, void * co } +thread_local bool already_signal_handled = false; + /** Обработчик некоторых сигналов. Выводит информацию в лог (если получится). */ static void faultSignalHandler(int sig, siginfo_t * info, void * context) { + if (already_signal_handled) + return; + already_signal_handled = true; + char buf[buf_size]; DB::WriteBufferFromFileDescriptor out(signal_pipe.write_fd, buf_size, buf); From 97277ed691ae479db9a06165d250033cba6fb215 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Mon, 25 Sep 2017 22:45:15 +0300 Subject: [PATCH 33/63] Return 411 code in case of empty body without Content-Length head. [#CLICKHOUSE-3333] --- dbms/src/Core/ErrorCodes.cpp | 1 + dbms/src/Server/HTTPHandler.cpp | 22 +++++++++++-------- .../0_stateless/00501_http_head.reference | 2 -- .../queries/0_stateless/00501_http_head.sh | 5 +++-- 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/dbms/src/Core/ErrorCodes.cpp b/dbms/src/Core/ErrorCodes.cpp index 6afa1361a5a..40e76c82ca1 100644 --- a/dbms/src/Core/ErrorCodes.cpp +++ b/dbms/src/Core/ErrorCodes.cpp @@ -383,6 +383,7 @@ namespace ErrorCodes extern const int DATA_TYPE_CANNOT_HAVE_ARGUMENTS = 378; extern const int UNKNOWN_STATUS_OF_DISTRIBUTED_DDL_TASK = 379; extern const int CANNOT_KILL = 380; + extern const int HTTP_LENGTH_REQUIRED = 381; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Server/HTTPHandler.cpp b/dbms/src/Server/HTTPHandler.cpp index ac7ef62c5ba..7936ebeeb53 100644 --- a/dbms/src/Server/HTTPHandler.cpp +++ b/dbms/src/Server/HTTPHandler.cpp @@ -84,6 +84,7 @@ namespace ErrorCodes extern const int REQUIRED_PASSWORD; extern const int INVALID_SESSION_TIMEOUT; + extern const int HTTP_LENGTH_REQUIRED; } @@ -127,6 +128,8 @@ static Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int excepti else if (exception_code == ErrorCodes::SOCKET_TIMEOUT || exception_code == ErrorCodes::CANNOT_OPEN_FILE) return HTTPResponse::HTTP_SERVICE_UNAVAILABLE; + else if (exception_code == ErrorCodes::HTTP_LENGTH_REQUIRED) + return HTTPResponse::HTTP_LENGTH_REQUIRED; return HTTPResponse::HTTP_INTERNAL_SERVER_ERROR; } @@ -379,14 +382,7 @@ void HTTPHandler::processQuery( std::unique_ptr in_param = std::make_unique(query_param); - std::unique_ptr in_post_raw; - /// A grubby workaround for CLICKHOUSE-3333 problem. This condition should detect POST query with empty body. - /// In that case Poco doesn't work properly and returns HTTPInputStream which just listen TCP connection. - /// NOTE: if Poco are updated, this heuristic might not work properly. - if (typeid_cast(&istr) == nullptr) - in_post_raw = std::make_unique(istr); - else - in_post_raw = std::make_unique(String()); // will read empty body. + std::unique_ptr in_post_raw = std::make_unique(istr); /// Request body can be compressed using algorithm specified in the Content-Encoding header. std::unique_ptr in_post; @@ -567,7 +563,8 @@ void HTTPHandler::trySendExceptionToClient(const std::string & s, int exception_ /// to avoid reading part of the current request body in the next request. if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST && response.getKeepAlive() - && !request.stream().eof()) + && !request.stream().eof() + && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED) { request.stream().ignore(std::numeric_limits::max()); } @@ -642,6 +639,13 @@ void HTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne HTMLForm params(request); with_stacktrace = params.getParsed("stacktrace", false); + /// Workaround. Poco does not detect 411 Length Required case. + if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST && !request.getChunkedTransferEncoding() && + !request.hasContentLength()) + { + throw Exception("There is neither Transfer-Encoding header nor Content-Length header", ErrorCodes::HTTP_LENGTH_REQUIRED); + } + processQuery(request, params, response, used_output); LOG_INFO(log, "Done processing query"); } diff --git a/dbms/tests/queries/0_stateless/00501_http_head.reference b/dbms/tests/queries/0_stateless/00501_http_head.reference index 80d87d6f7ab..9727c63b2d8 100644 --- a/dbms/tests/queries/0_stateless/00501_http_head.reference +++ b/dbms/tests/queries/0_stateless/00501_http_head.reference @@ -10,5 +10,3 @@ Content-Type: text/tab-separated-values; charset=UTF-8 Transfer-Encoding: chunked Keep-Alive: timeout=3 -1 -1 diff --git a/dbms/tests/queries/0_stateless/00501_http_head.sh b/dbms/tests/queries/0_stateless/00501_http_head.sh index 1510df71293..f6e7e10c6a8 100755 --- a/dbms/tests/queries/0_stateless/00501_http_head.sh +++ b/dbms/tests/queries/0_stateless/00501_http_head.sh @@ -3,5 +3,6 @@ ( curl -s --head "${CLICKHOUSE_URL:=http://localhost:8123/}?query=SELECT%201"; curl -s --head "${CLICKHOUSE_URL:=http://localhost:8123/}?query=select+*+from+system.numbers+limit+1000000" ) | grep -v "Date:" -curl -sS -X POST "http://127.0.0.1:8123?query=SELECT+1" -curl -sS -X POST "http://127.0.0.1:8123?query=SELECT+1" --data '' +if [[ `curl -sS -X POST -I "http://127.0.0.1:8123?query=SELECT+1" | grep -c '411 Length Required'` -ne 1 ]]; then + echo FAIL +fi From f08545b9b8255c809de92f444deaab13e6617c57 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Tue, 3 Oct 2017 19:21:25 +0300 Subject: [PATCH 34/63] Stop tests execution on KeyboardInterrupt. [#CLICKHOUSE-2] --- dbms/tests/clickhouse-test | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbms/tests/clickhouse-test b/dbms/tests/clickhouse-test index 4619b4f3848..8a2d547cd0c 100755 --- a/dbms/tests/clickhouse-test +++ b/dbms/tests/clickhouse-test @@ -232,6 +232,9 @@ def main(args): os.remove(stdout_file) if os.path.exists(stderr_file): os.remove(stderr_file) + except KeyboardInterrupt as e: + print(colored("Break tests execution", "red")) + raise e except: (exc_type, exc_value) = sys.exc_info()[:2] error = et.Element("error", attrib = {"type": exc_type.__name__, "message": str(exc_value)}) From 097357799d6104f8fc6d0a0ca356ba80152c0992 Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 3 Oct 2017 21:31:32 +0300 Subject: [PATCH 35/63] Tests: --random option; set some env variables for .sh tests --- dbms/tests/clickhouse-test | 11 ++++++++++- dbms/tests/queries/0_stateless/00501_http_head.sh | 8 +++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/dbms/tests/clickhouse-test b/dbms/tests/clickhouse-test index 8a2d547cd0c..c55b8d13186 100755 --- a/dbms/tests/clickhouse-test +++ b/dbms/tests/clickhouse-test @@ -18,6 +18,7 @@ from datetime import datetime from time import sleep from errno import ESRCH from termcolor import colored +from random import random OP_SQUARE_BRACKET = colored("[", attrs=['bold']) @@ -89,6 +90,11 @@ def main(args): failures_total = 0 + os.environ.setdefault("CLICKHOUSE_CLIENT", args.client) + os.environ.setdefault("CLICKHOUSE_BINARY", "clickhouse") + os.environ.setdefault("CLICKHOUSE_URL", "http://localhost:8123/") + os.environ.setdefault("CLICKHOUSE_CONFIG", "/etc/clickhouse-server/config.xml") + for suite in sorted(os.listdir(base_dir)): if SERVER_DIED: break @@ -109,6 +115,8 @@ def main(args): # Reverse sort order: we want run newest test first. # And not reverse subtests def key_func(item): + if args.random: + return random() prefix, suffix = item.split('_', 1) return -int(prefix), suffix for case in sorted(filter(lambda case: re.search(args.test, case) if args.test else True, os.listdir(suite_dir)), key=key_func): @@ -262,7 +270,8 @@ if __name__ == '__main__': parser.add_argument('-o', '--output', help = 'Output xUnit compliant test report directory') parser.add_argument('-t', '--timeout', type = int, default = 600, help = 'Timeout for each test case in seconds') parser.add_argument('test', nargs = '?', help = 'Optional test case name regex') - parser.add_argument('--stop', action = 'store_true', default = None, dest = 'stop', help = 'Stop on network errors ') + parser.add_argument('--stop', action = 'store_true', default = None, dest = 'stop', help = 'Stop on network errors') + parser.add_argument('--random', action = 'store_true', default = None, dest = 'random', help = 'Randomize tests order') group = parser.add_mutually_exclusive_group(required = False) group.add_argument('--zookeeper', action = 'store_true', default = None, dest = 'zookeeper', help = 'Run zookeeper related tests') diff --git a/dbms/tests/queries/0_stateless/00501_http_head.sh b/dbms/tests/queries/0_stateless/00501_http_head.sh index f6e7e10c6a8..75093295f21 100755 --- a/dbms/tests/queries/0_stateless/00501_http_head.sh +++ b/dbms/tests/queries/0_stateless/00501_http_head.sh @@ -1,8 +1,10 @@ #!/usr/bin/env bash -( curl -s --head "${CLICKHOUSE_URL:=http://localhost:8123/}?query=SELECT%201"; - curl -s --head "${CLICKHOUSE_URL:=http://localhost:8123/}?query=select+*+from+system.numbers+limit+1000000" ) | grep -v "Date:" +CLICKHOUSE_URL=${CLICKHOUSE_URL:=http://localhost:8123/} -if [[ `curl -sS -X POST -I "http://127.0.0.1:8123?query=SELECT+1" | grep -c '411 Length Required'` -ne 1 ]]; then +( curl -s --head "${CLICKHOUSE_URL}?query=SELECT%201"; + curl -s --head "${CLICKHOUSE_URL}?query=select+*+from+system.numbers+limit+1000000" ) | grep -v "Date:" + +if [[ `curl -sS -X POST -I "${CLICKHOUSE_URL}?query=SELECT+1" | grep -c '411 Length Required'` -ne 1 ]]; then echo FAIL fi From 6e8f4c6fecf17d8b2fa6175e043e856461c60181 Mon Sep 17 00:00:00 2001 From: Vitaly Samigullin Date: Tue, 3 Oct 2017 18:33:52 +0300 Subject: [PATCH 36/63] Locale fixed for client in docker --- docker/client/Dockerfile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index aab26246d29..d96eb4046b8 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -8,8 +8,13 @@ RUN apt-get update && \ mkdir -p /etc/apt/sources.list.d && \ echo $repository | tee /etc/apt/sources.list.d/clickhouse.list && \ apt-get update && \ - apt-get install --allow-unauthenticated -y clickhouse-client=$version && \ + apt-get install --allow-unauthenticated -y clickhouse-client=$version locales && \ rm -rf /var/lib/apt/lists/* /var/cache/debconf && \ apt-get clean +RUN locale-gen en_US.UTF-8 +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US:en +ENV LC_ALL en_US.UTF-8 + ENTRYPOINT ["/usr/bin/clickhouse-client"] From 4f990bcad6a637c6f63ea20abc9d7f22bdd34509 Mon Sep 17 00:00:00 2001 From: Andrey Dudin Date: Tue, 3 Oct 2017 18:32:39 +0300 Subject: [PATCH 37/63] Update cli.rst Add example of clickhouse-client conf (en) --- docs/en/interfaces/cli.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/en/interfaces/cli.rst b/docs/en/interfaces/cli.rst index b7a34b04f30..98c53543fc6 100644 --- a/docs/en/interfaces/cli.rst +++ b/docs/en/interfaces/cli.rst @@ -48,6 +48,15 @@ By default, files are searched for in this order: Settings are only taken from the first file found. +Example of config file: + +.. code-block:: xml + + + username + password + + You can also specify any settings that will be used for processing queries. For example, ``clickhouse-client --max_threads=1``. For more information, see the section "Settings". The client can be used in interactive and non-interactive (batch) mode. From 51e003bed65cb74b27064d9417e144760ad10294 Mon Sep 17 00:00:00 2001 From: Andrey Dudin Date: Tue, 3 Oct 2017 18:34:55 +0300 Subject: [PATCH 38/63] Update cli.rst Ru docs --- docs/ru/interfaces/cli.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/ru/interfaces/cli.rst b/docs/ru/interfaces/cli.rst index d2df0e88738..a5cf322fa51 100644 --- a/docs/ru/interfaces/cli.rst +++ b/docs/ru/interfaces/cli.rst @@ -45,6 +45,15 @@ /etc/clickhouse-client/config.xml Настройки берутся только из первого найденного файла. +Пример файла конфигурации: + +.. code-block:: xml + + + username + password + + Также вы можете указать любые настроки, которые будут использованы для обработки запросов. Например, clickhouse-client --max_threads=1. Подробнее см. раздел "Настройки". Клиент может быть использован в интерактивном и неинтерактивном (batch) режиме. From ebb4a6a0ed549034eec61a94d85ee092addc73c7 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 28 Sep 2017 22:43:31 +0300 Subject: [PATCH 39/63] Start ssl'ing (#CLICKHOUSE-3032) --- dbms/src/Client/Connection.cpp | 21 ++++++----- dbms/src/Client/Connection.h | 7 +++- dbms/src/Client/ConnectionPool.h | 9 +++-- dbms/src/Client/MultiplexedConnections.cpp | 8 ++--- dbms/src/Core/Protocol.h | 10 ++++++ dbms/src/Interpreters/Cluster.cpp | 6 ++-- dbms/src/Server/Client.cpp | 9 ++++- dbms/src/Server/Server.cpp | 42 ++++++++++++++++++++++ dbms/src/Server/config.xml | 6 +++- 9 files changed, 96 insertions(+), 22 deletions(-) diff --git a/dbms/src/Client/Connection.cpp b/dbms/src/Client/Connection.cpp index 8054699c7ec..4120bed14fc 100644 --- a/dbms/src/Client/Connection.cpp +++ b/dbms/src/Client/Connection.cpp @@ -1,6 +1,7 @@ #include #include +#include #include @@ -53,13 +54,14 @@ void Connection::connect() LOG_TRACE(log_wrapper.get(), "Connecting. Database: " << (default_database.empty() ? "(not specified)" : default_database) << ". User: " << user); - socket.connect(resolved_address, connect_timeout); - socket.setReceiveTimeout(receive_timeout); - socket.setSendTimeout(send_timeout); - socket.setNoDelay(true); + socket = encryption ? std::make_unique() : std::make_unique(); + socket->connect(resolved_address, connect_timeout); + socket->setReceiveTimeout(receive_timeout); + socket->setSendTimeout(send_timeout); + socket->setNoDelay(true); - in = std::make_shared(socket); - out = std::make_shared(socket); + in = std::make_shared(*socket); + out = std::make_shared(*socket); connected = true; @@ -93,7 +95,8 @@ void Connection::disconnect() { //LOG_TRACE(log_wrapper.get(), "Disconnecting"); - socket.close(); + socket->close(); + socket = nullptr; in = nullptr; out = nullptr; connected = false; @@ -233,7 +236,7 @@ bool Connection::ping() { // LOG_TRACE(log_wrapper.get(), "Ping"); - TimeoutSetter timeout_setter(socket, sync_request_timeout); + TimeoutSetter timeout_setter(*socket, sync_request_timeout); try { UInt64 pong = 0; @@ -273,7 +276,7 @@ TablesStatusResponse Connection::getTablesStatus(const TablesStatusRequest & req if (!connected) connect(); - TimeoutSetter timeout_setter(socket, sync_request_timeout); + TimeoutSetter timeout_setter(*socket, sync_request_timeout); writeVarUInt(Protocol::Client::TablesStatusRequest, *out); request.write(*out, server_revision); diff --git a/dbms/src/Client/Connection.h b/dbms/src/Client/Connection.h index 1ae955e7515..0f17af11746 100644 --- a/dbms/src/Client/Connection.h +++ b/dbms/src/Client/Connection.h @@ -54,6 +54,7 @@ public: const String & user_, const String & password_, const String & client_name_ = "client", Protocol::Compression::Enum compression_ = Protocol::Compression::Enable, + Protocol::Encryption::Enum encryption_ = Protocol::Encryption::Disable, Poco::Timespan connect_timeout_ = Poco::Timespan(DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, 0), Poco::Timespan receive_timeout_ = Poco::Timespan(DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, 0), Poco::Timespan send_timeout_ = Poco::Timespan(DBMS_DEFAULT_SEND_TIMEOUT_SEC, 0), @@ -63,6 +64,7 @@ public: user(user_), password(password_), resolved_address(host, port), client_name(client_name_), compression(compression_), + encryption(encryption_), connect_timeout(connect_timeout_), receive_timeout(receive_timeout_), send_timeout(send_timeout_), sync_request_timeout(sync_request_timeout_), log_wrapper(*this) @@ -80,6 +82,7 @@ public: const String & user_, const String & password_, const String & client_name_ = "client", Protocol::Compression::Enum compression_ = Protocol::Compression::Enable, + Protocol::Encryption::Enum encryption_ = Protocol::Encryption::Disable, Poco::Timespan connect_timeout_ = Poco::Timespan(DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, 0), Poco::Timespan receive_timeout_ = Poco::Timespan(DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, 0), Poco::Timespan send_timeout_ = Poco::Timespan(DBMS_DEFAULT_SEND_TIMEOUT_SEC, 0), @@ -91,6 +94,7 @@ public: resolved_address(resolved_address_), client_name(client_name_), compression(compression_), + encryption(encryption_), connect_timeout(connect_timeout_), receive_timeout(receive_timeout_), send_timeout(send_timeout_), sync_request_timeout(sync_request_timeout_), log_wrapper(*this) @@ -211,12 +215,13 @@ private: UInt64 server_revision = 0; String server_timezone; - Poco::Net::StreamSocket socket; + std::unique_ptr socket; std::shared_ptr in; std::shared_ptr out; String query_id; UInt64 compression; /// Enable data compression for communication. + UInt64 encryption; /// Enable data encryption for communication. /// What compression algorithm to use while sending data for INSERT queries and external tables. CompressionMethod network_compression_method = CompressionMethod::LZ4; diff --git a/dbms/src/Client/ConnectionPool.h b/dbms/src/Client/ConnectionPool.h index 056c315089f..35a37ff5eea 100644 --- a/dbms/src/Client/ConnectionPool.h +++ b/dbms/src/Client/ConnectionPool.h @@ -50,13 +50,14 @@ public: const String & user_, const String & password_, const String & client_name_ = "client", Protocol::Compression::Enum compression_ = Protocol::Compression::Enable, + Protocol::Encryption::Enum encryption_ = Protocol::Encryption::Disable, Poco::Timespan connect_timeout_ = Poco::Timespan(DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, 0), Poco::Timespan receive_timeout_ = Poco::Timespan(DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, 0), Poco::Timespan send_timeout_ = Poco::Timespan(DBMS_DEFAULT_SEND_TIMEOUT_SEC, 0)) : Base(max_connections_, &Logger::get("ConnectionPool (" + host_ + ":" + toString(port_) + ")")), host(host_), port(port_), default_database(default_database_), user(user_), password(password_), resolved_address(host_, port_), - client_name(client_name_), compression(compression_), + client_name(client_name_), compression(compression_), encryption(encryption_), connect_timeout(connect_timeout_), receive_timeout(receive_timeout_), send_timeout(send_timeout_) { } @@ -67,13 +68,14 @@ public: const String & user_, const String & password_, const String & client_name_ = "client", Protocol::Compression::Enum compression_ = Protocol::Compression::Enable, + Protocol::Encryption::Enum encryption_ = Protocol::Encryption::Disable, Poco::Timespan connect_timeout_ = Poco::Timespan(DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, 0), Poco::Timespan receive_timeout_ = Poco::Timespan(DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, 0), Poco::Timespan send_timeout_ = Poco::Timespan(DBMS_DEFAULT_SEND_TIMEOUT_SEC, 0)) : Base(max_connections_, &Logger::get("ConnectionPool (" + host_ + ":" + toString(port_) + ")")), host(host_), port(port_), default_database(default_database_), user(user_), password(password_), resolved_address(resolved_address_), - client_name(client_name_), compression(compression_), + client_name(client_name_), compression(compression_), encryption(encryption_), connect_timeout(connect_timeout_), receive_timeout(receive_timeout_), send_timeout(send_timeout_) { } @@ -104,7 +106,7 @@ protected: return std::make_shared( host, port, resolved_address, default_database, user, password, - client_name, compression, + client_name, compression, encryption, connect_timeout, receive_timeout, send_timeout); } @@ -122,6 +124,7 @@ private: String client_name; Protocol::Compression::Enum compression; /// Whether to compress data when interacting with the server. + Protocol::Encryption::Enum encryption; /// Whether to encrypt data when interacting with the server. Poco::Timespan connect_timeout; Poco::Timespan receive_timeout; diff --git a/dbms/src/Client/MultiplexedConnections.cpp b/dbms/src/Client/MultiplexedConnections.cpp index 30d1e6c1e34..f13587d79c9 100644 --- a/dbms/src/Client/MultiplexedConnections.cpp +++ b/dbms/src/Client/MultiplexedConnections.cpp @@ -20,7 +20,7 @@ MultiplexedConnections::MultiplexedConnections(Connection & connection, const Se ReplicaState replica_state; replica_state.connection = &connection; replica_states.push_back(replica_state); - fd_to_replica_state_idx.emplace(connection.socket.impl()->sockfd(), 0); + fd_to_replica_state_idx.emplace(connection.socket->impl()->sockfd(), 0); active_connection_count = 1; } @@ -47,7 +47,7 @@ MultiplexedConnections::MultiplexedConnections( replica_state.connection = connection; replica_states.push_back(std::move(replica_state)); - fd_to_replica_state_idx.emplace(connection->socket.impl()->sockfd(), i); + fd_to_replica_state_idx.emplace(connection->socket->impl()->sockfd(), i); } active_connection_count = connections.size(); @@ -280,7 +280,7 @@ MultiplexedConnections::ReplicaState & MultiplexedConnections::getReplicaForRead { Connection * connection = state.connection; if ((connection != nullptr) && connection->hasReadBufferPendingData()) - read_list.push_back(connection->socket); + read_list.push_back(*connection->socket); } /// If no data was found, then we check if there are any connections @@ -294,7 +294,7 @@ MultiplexedConnections::ReplicaState & MultiplexedConnections::getReplicaForRead { Connection * connection = state.connection; if (connection != nullptr) - read_list.push_back(connection->socket); + read_list.push_back(*connection->socket); } int n = Poco::Net::Socket::select(read_list, write_list, except_list, settings.receive_timeout); diff --git a/dbms/src/Core/Protocol.h b/dbms/src/Core/Protocol.h index ecc4caf14f1..44141a5e1e9 100644 --- a/dbms/src/Core/Protocol.h +++ b/dbms/src/Core/Protocol.h @@ -117,6 +117,16 @@ namespace Protocol Enable = 1, }; } + + /// Whether the ssl must be used. + namespace Encryption + { + enum Enum + { + Disable = 0, + Enable = 1, + }; + } } } diff --git a/dbms/src/Interpreters/Cluster.cpp b/dbms/src/Interpreters/Cluster.cpp index 304a0dd32d6..4d3b28a8caa 100644 --- a/dbms/src/Interpreters/Cluster.cpp +++ b/dbms/src/Interpreters/Cluster.cpp @@ -222,7 +222,7 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se settings.distributed_connections_pool_size, address.host_name, address.port, address.resolved_address, address.default_database, address.user, address.password, - "server", Protocol::Compression::Enable, + "server", Protocol::Compression::Enable, Protocol::Encryption::Disable, saturate(settings.connect_timeout, settings.limits.max_execution_time), saturate(settings.receive_timeout, settings.limits.max_execution_time), saturate(settings.send_timeout, settings.limits.max_execution_time))); @@ -301,7 +301,7 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se settings.distributed_connections_pool_size, replica.host_name, replica.port, replica.resolved_address, replica.default_database, replica.user, replica.password, - "server", Protocol::Compression::Enable, + "server", Protocol::Compression::Enable, Protocol::Encryption::Disable, saturate(settings.connect_timeout_with_failover_ms, settings.limits.max_execution_time), saturate(settings.receive_timeout, settings.limits.max_execution_time), saturate(settings.send_timeout, settings.limits.max_execution_time))); @@ -354,7 +354,7 @@ Cluster::Cluster(const Settings & settings, const std::vector(host, port, default_database, user, password, "client", compression, + encryption, Poco::Timespan(config().getInt("connect_timeout", DBMS_DEFAULT_CONNECT_TIMEOUT_SEC), 0), Poco::Timespan(config().getInt("receive_timeout", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0), Poco::Timespan(config().getInt("send_timeout", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0)); @@ -1246,13 +1250,14 @@ public: ("config-file,c", boost::program_options::value(), "config-file path") ("host,h", boost::program_options::value()->default_value("localhost"), "server host") ("port", boost::program_options::value()->default_value(9000), "server port") + ("secure,s", "secure") ("user,u", boost::program_options::value(), "user") ("password", boost::program_options::value(), "password") ("query,q", boost::program_options::value(), "query") ("database,d", boost::program_options::value(), "database") ("pager", boost::program_options::value(), "pager") ("multiline,m", "multiline") - ("multiquery,n", "multiquery") + ("multiquerymultiquery,n", "multiquery") ("format,f", boost::program_options::value(), "default output format") ("vertical,E", "vertical output format, same as --format=Vertical or FORMAT Vertical or \\G at end of command") ("time,t", "print query execution time to stderr in non-interactive mode (for benchmarks)") @@ -1346,6 +1351,8 @@ public: if (options.count("port") && !options["port"].defaulted()) config().setInt("port", options["port"].as()); + if (options.count("secure")) + config().setBool("secure", true); if (options.count("user")) config().setString("user", options["user"].as()); if (options.count("password")) diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 53534463359..893efe7801d 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -396,6 +396,7 @@ int Server::main(const std::vector & args) /// TCP if (config().has("tcp_port")) { + std::call_once(ssl_init_once, SSLInit); Poco::Net::SocketAddress tcp_address = make_socket_address(listen_host, config().getInt("tcp_port")); Poco::Net::ServerSocket tcp_socket(tcp_address); tcp_socket.setReceiveTimeout(settings.receive_timeout); @@ -409,6 +410,26 @@ int Server::main(const std::vector & args) LOG_INFO(log, "Listening tcp: " + tcp_address.toString()); } + /// TCP + if (config().has("tcps_port")) + { +#if Poco_NetSSL_FOUND + Poco::Net::SocketAddress tcp_address = make_socket_address(listen_host, config().getInt("tcps_port")); + Poco::Net::SecureServerSocket tcp_socket(tcp_address); + tcp_socket.setReceiveTimeout(settings.receive_timeout); + tcp_socket.setSendTimeout(settings.send_timeout); + servers.emplace_back(new Poco::Net::TCPServer( + new TCPHandlerFactory(*this), + server_pool, + tcp_socket, + new Poco::Net::TCPServerParams)); + LOG_INFO(log, "Listening tcps: " + tcp_address.toString()); +#else + throw Exception{"tcps protocol disabled because poco library built without NetSSL support.", + ErrorCodes::SUPPORT_IS_DISABLED}; +#endif + } + /// At least one of TCP and HTTP servers must be created. if (servers.empty()) throw Exception("No 'tcp_port' and 'http_port' is specified in configuration file.", ErrorCodes::NO_ELEMENTS_IN_CONFIG); @@ -428,6 +449,27 @@ int Server::main(const std::vector & args) LOG_INFO(log, "Listening interserver: " + interserver_address.toString()); } + /// Interserver IO HTTP + if (config().has("interserver_https_port")) + { +#if Poco_NetSSL_FOUND + std::call_once(ssl_init_once, SSLInit); + Poco::Net::SocketAddress interserver_address = make_socket_address(listen_host, config().getInt("interserver_https_port")); + Poco::Net::SecureServerSocket interserver_io_http_socket(interserver_address); + interserver_io_http_socket.setReceiveTimeout(settings.receive_timeout); + interserver_io_http_socket.setSendTimeout(settings.send_timeout); + servers.emplace_back(new Poco::Net::HTTPServer( + new InterserverIOHTTPHandlerFactory(*this, "InterserverIOHTTPHandler-factory"), + server_pool, + interserver_io_http_socket, + http_params)); + + LOG_INFO(log, "Listening interserver https: " + interserver_address.toString()); +#else + throw Exception{"https protocol disabled because poco library built without NetSSL support.", + ErrorCodes::SUPPORT_IS_DISABLED}; +#endif + } } catch (const Poco::Net::NetException & e) { diff --git a/dbms/src/Server/config.xml b/dbms/src/Server/config.xml index dfd3f2433dd..f67cf0a18cd 100644 --- a/dbms/src/Server/config.xml +++ b/dbms/src/Server/config.xml @@ -14,7 +14,7 @@ - + @@ -48,8 +48,12 @@ 9000 + + 9443 + 9009 + 9449 - 9443 + 9440 9009 - 9449 + 9443 + + + true + true + sslv2,sslv3 + true + + + + RejectCertificateHandler + + + diff --git a/dbms/src/Server/config.xml b/dbms/src/Server/config.xml index 85b494fb777..47106ba2ba2 100644 --- a/dbms/src/Server/config.xml +++ b/dbms/src/Server/config.xml @@ -16,7 +16,7 @@ --> - + /etc/clickhouse-server/server.crt /etc/clickhouse-server/server.key @@ -28,7 +28,8 @@ sslv2,sslv3 true - + + true true sslv2,sslv3 @@ -53,7 +54,6 @@ 9009 - 9443 - + @@ -50,7 +50,7 @@ 9000 - 9440 + 9440 9009 diff --git a/dbms/tests/queries/0_stateless/00505_tcp_ssl.sh b/dbms/tests/queries/0_stateless/00505_tcp_ssl.sh index 245012af730..eb45528813d 100755 --- a/dbms/tests/queries/0_stateless/00505_tcp_ssl.sh +++ b/dbms/tests/queries/0_stateless/00505_tcp_ssl.sh @@ -2,8 +2,8 @@ # Not default server config needed -tcps_port=`clickhouse extract-from-config -c /etc/clickhouse-server/config.xml -k tcps_port 2>/dev/null` -if [ -z ${tcps_port} ]; then +tcp_ssl_port=`clickhouse extract-from-config -c /etc/clickhouse-server/config.xml -k tcp_ssl_port 2>/dev/null` +if [ -z ${tcp_ssl_port} ]; then # Secure port disabled. Fake result echo 1 echo 2 From a4191614ed533c3dc3c8d2635b601f7c6897cf40 Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 2 Oct 2017 19:58:43 +0300 Subject: [PATCH 46/63] fixes --- dbms/src/Server/Client.cpp | 4 ++-- dbms/src/Server/Server.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Server/Client.cpp b/dbms/src/Server/Client.cpp index 99d584193cd..51889ebbf25 100644 --- a/dbms/src/Server/Client.cpp +++ b/dbms/src/Server/Client.cpp @@ -375,7 +375,7 @@ private: void connect() { - Protocol::Encryption::Enum encryption = config().getBool("secure", false) + Protocol::Encryption::Enum encryption = config().getBool("ssl", false) ? Protocol::Encryption::Enable : Protocol::Encryption::Disable; @@ -1251,7 +1251,7 @@ public: ("config-file,c", boost::program_options::value(), "config-file path") ("host,h", boost::program_options::value()->default_value("localhost"), "server host") ("port", boost::program_options::value()->default_value(9000), "server port") - ("secure,s", "secure") + ("ssl,s", "ssl") ("user,u", boost::program_options::value(), "user") ("password", boost::program_options::value(), "password") ("query,q", boost::program_options::value(), "query") diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 5447f190574..edc29725c08 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -423,9 +423,9 @@ int Server::main(const std::vector & args) server_pool, tcp_socket, new Poco::Net::TCPServerParams)); - LOG_INFO(log, "Listening tcps: " + tcp_address.toString()); + LOG_INFO(log, "Listening tcp_ssl: " + tcp_address.toString()); #else - throw Exception{"tcps protocol disabled because poco library built without NetSSL support.", + throw Exception{"tcp_ssl protocol disabled because poco library built without NetSSL support.", ErrorCodes::SUPPORT_IS_DISABLED}; #endif } From 7feb94357bd6a6215943c7b8175e1439793501f2 Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 3 Oct 2017 17:52:08 +0300 Subject: [PATCH 47/63] use enum class --- dbms/src/Client/Connection.cpp | 4 ++-- dbms/src/Client/Connection.h | 12 ++++++------ dbms/src/Client/ConnectionPool.h | 12 ++++++------ dbms/src/Core/Protocol.h | 23 +++++++++-------------- dbms/src/DataStreams/IBlockInputStream.h | 1 + dbms/src/Server/Client.cpp | 6 +++--- dbms/src/Server/TCPHandler.cpp | 2 +- dbms/src/Server/TCPHandler.h | 2 +- 8 files changed, 29 insertions(+), 33 deletions(-) diff --git a/dbms/src/Client/Connection.cpp b/dbms/src/Client/Connection.cpp index 3e13b906a74..8d3ab2a4f13 100644 --- a/dbms/src/Client/Connection.cpp +++ b/dbms/src/Client/Connection.cpp @@ -54,7 +54,7 @@ void Connection::connect() LOG_TRACE(log_wrapper.get(), "Connecting. Database: " << (default_database.empty() ? "(not specified)" : default_database) << ". User: " << user); - socket = encryption ? std::make_unique() : std::make_unique(); + socket = static_cast(encryption) ? std::make_unique() : std::make_unique(); socket->connect(resolved_address, connect_timeout); socket->setReceiveTimeout(receive_timeout); socket->setSendTimeout(send_timeout); @@ -346,7 +346,7 @@ void Connection::sendQuery( writeStringBinary("", *out); writeVarUInt(stage, *out); - writeVarUInt(compression, *out); + writeVarUInt(static_cast(compression), *out); writeStringBinary(query, *out); diff --git a/dbms/src/Client/Connection.h b/dbms/src/Client/Connection.h index 07110159e8e..9b349729320 100644 --- a/dbms/src/Client/Connection.h +++ b/dbms/src/Client/Connection.h @@ -53,8 +53,8 @@ public: Connection(const String & host_, UInt16 port_, const String & default_database_, const String & user_, const String & password_, const String & client_name_ = "client", - Protocol::Compression::Enum compression_ = Protocol::Compression::Enable, - Protocol::Encryption::Enum encryption_ = Protocol::Encryption::Disable, + Protocol::Compression compression_ = Protocol::Compression::Enable, + Protocol::Encryption encryption_ = Protocol::Encryption::Disable, Poco::Timespan connect_timeout_ = Poco::Timespan(DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, 0), Poco::Timespan receive_timeout_ = Poco::Timespan(DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, 0), Poco::Timespan send_timeout_ = Poco::Timespan(DBMS_DEFAULT_SEND_TIMEOUT_SEC, 0), @@ -81,8 +81,8 @@ public: const String & default_database_, const String & user_, const String & password_, const String & client_name_ = "client", - Protocol::Compression::Enum compression_ = Protocol::Compression::Enable, - Protocol::Encryption::Enum encryption_ = Protocol::Encryption::Disable, + Protocol::Compression compression_ = Protocol::Compression::Enable, + Protocol::Encryption encryption_ = Protocol::Encryption::Disable, Poco::Timespan connect_timeout_ = Poco::Timespan(DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, 0), Poco::Timespan receive_timeout_ = Poco::Timespan(DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, 0), Poco::Timespan send_timeout_ = Poco::Timespan(DBMS_DEFAULT_SEND_TIMEOUT_SEC, 0), @@ -220,8 +220,8 @@ private: std::shared_ptr out; String query_id; - bool compression; /// Enable data compression for communication. - bool encryption; /// Enable data encryption for communication. + Protocol::Compression compression; /// Enable data compression for communication. + Protocol::Encryption encryption; /// Enable data encryption for communication. /// What compression algorithm to use while sending data for INSERT queries and external tables. CompressionMethod network_compression_method = CompressionMethod::LZ4; diff --git a/dbms/src/Client/ConnectionPool.h b/dbms/src/Client/ConnectionPool.h index 35a37ff5eea..1e78f4a19e1 100644 --- a/dbms/src/Client/ConnectionPool.h +++ b/dbms/src/Client/ConnectionPool.h @@ -49,8 +49,8 @@ public: const String & default_database_, const String & user_, const String & password_, const String & client_name_ = "client", - Protocol::Compression::Enum compression_ = Protocol::Compression::Enable, - Protocol::Encryption::Enum encryption_ = Protocol::Encryption::Disable, + Protocol::Compression compression_ = Protocol::Compression::Enable, + Protocol::Encryption encryption_ = Protocol::Encryption::Disable, Poco::Timespan connect_timeout_ = Poco::Timespan(DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, 0), Poco::Timespan receive_timeout_ = Poco::Timespan(DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, 0), Poco::Timespan send_timeout_ = Poco::Timespan(DBMS_DEFAULT_SEND_TIMEOUT_SEC, 0)) @@ -67,8 +67,8 @@ public: const String & default_database_, const String & user_, const String & password_, const String & client_name_ = "client", - Protocol::Compression::Enum compression_ = Protocol::Compression::Enable, - Protocol::Encryption::Enum encryption_ = Protocol::Encryption::Disable, + Protocol::Compression compression_ = Protocol::Compression::Enable, + Protocol::Encryption encryption_ = Protocol::Encryption::Disable, Poco::Timespan connect_timeout_ = Poco::Timespan(DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, 0), Poco::Timespan receive_timeout_ = Poco::Timespan(DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, 0), Poco::Timespan send_timeout_ = Poco::Timespan(DBMS_DEFAULT_SEND_TIMEOUT_SEC, 0)) @@ -123,8 +123,8 @@ private: Poco::Net::SocketAddress resolved_address; String client_name; - Protocol::Compression::Enum compression; /// Whether to compress data when interacting with the server. - Protocol::Encryption::Enum encryption; /// Whether to encrypt data when interacting with the server. + Protocol::Compression compression; /// Whether to compress data when interacting with the server. + Protocol::Encryption encryption; /// Whether to encrypt data when interacting with the server. Poco::Timespan connect_timeout; Poco::Timespan receive_timeout; diff --git a/dbms/src/Core/Protocol.h b/dbms/src/Core/Protocol.h index 44141a5e1e9..6a3b2050921 100644 --- a/dbms/src/Core/Protocol.h +++ b/dbms/src/Core/Protocol.h @@ -109,24 +109,19 @@ namespace Protocol } /// Whether the compression must be used. - namespace Compression + enum class Compression { - enum Enum - { - Disable = 0, - Enable = 1, - }; - } + Disable = 0, + Enable = 1, + }; /// Whether the ssl must be used. - namespace Encryption + enum class Encryption { - enum Enum - { - Disable = 0, - Enable = 1, - }; - } + Disable = 0, + Enable = 1, + }; + } } diff --git a/dbms/src/DataStreams/IBlockInputStream.h b/dbms/src/DataStreams/IBlockInputStream.h index 8e4b6d4abc0..8a874ea127b 100644 --- a/dbms/src/DataStreams/IBlockInputStream.h +++ b/dbms/src/DataStreams/IBlockInputStream.h @@ -28,6 +28,7 @@ struct Progress; namespace ErrorCodes { extern const int OUTPUT_IS_NOT_SORTED; + extern const int NOT_IMPLEMENTED; } diff --git a/dbms/src/Server/Client.cpp b/dbms/src/Server/Client.cpp index 51889ebbf25..7261944eb97 100644 --- a/dbms/src/Server/Client.cpp +++ b/dbms/src/Server/Client.cpp @@ -375,17 +375,17 @@ private: void connect() { - Protocol::Encryption::Enum encryption = config().getBool("ssl", false) + auto encryption = config().getBool("ssl", false) ? Protocol::Encryption::Enable : Protocol::Encryption::Disable; String host = config().getString("host", "localhost"); - UInt16 port = config().getInt("port", config().getInt(encryption ? "tcp_ssl_port" : "tcp_port", encryption ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT)); + UInt16 port = config().getInt("port", config().getInt(static_cast(encryption) ? "tcp_ssl_port" : "tcp_port", static_cast(encryption) ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT)); String default_database = config().getString("database", ""); String user = config().getString("user", ""); String password = config().getString("password", ""); - Protocol::Compression::Enum compression = config().getBool("compression", true) + auto compression = config().getBool("compression", true) ? Protocol::Compression::Enable : Protocol::Compression::Disable; diff --git a/dbms/src/Server/TCPHandler.cpp b/dbms/src/Server/TCPHandler.cpp index 7ca2a1cbf8b..8523913639c 100644 --- a/dbms/src/Server/TCPHandler.cpp +++ b/dbms/src/Server/TCPHandler.cpp @@ -597,7 +597,7 @@ void TCPHandler::receiveQuery() state.stage = QueryProcessingStage::Enum(stage); readVarUInt(compression, *in); - state.compression = Protocol::Compression::Enum(compression); + state.compression = static_cast(compression); readStringBinary(state.query, *in); } diff --git a/dbms/src/Server/TCPHandler.h b/dbms/src/Server/TCPHandler.h index 846a5474561..5706976721f 100644 --- a/dbms/src/Server/TCPHandler.h +++ b/dbms/src/Server/TCPHandler.h @@ -31,7 +31,7 @@ struct QueryState String query_id; QueryProcessingStage::Enum stage = QueryProcessingStage::Complete; - Protocol::Compression::Enum compression = Protocol::Compression::Disable; + Protocol::Compression compression = Protocol::Compression::Disable; /// From where to read data for INSERT. std::shared_ptr maybe_compressed_in; From 88c88c3f4a978140213292438d21d508611a5d5a Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 3 Oct 2017 20:16:10 +0300 Subject: [PATCH 48/63] Fix --- dbms/src/Server/Client.cpp | 4 ++-- dbms/tests/queries/0_stateless/00505_tcp_ssl.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Server/Client.cpp b/dbms/src/Server/Client.cpp index 7261944eb97..b3909f9d2c5 100644 --- a/dbms/src/Server/Client.cpp +++ b/dbms/src/Server/Client.cpp @@ -1352,8 +1352,8 @@ public: if (options.count("port") && !options["port"].defaulted()) config().setInt("port", options["port"].as()); - if (options.count("secure")) - config().setBool("secure", true); + if (options.count("ssl")) + config().setBool("ssl", true); if (options.count("user")) config().setString("user", options["user"].as()); if (options.count("password")) diff --git a/dbms/tests/queries/0_stateless/00505_tcp_ssl.sh b/dbms/tests/queries/0_stateless/00505_tcp_ssl.sh index eb45528813d..9e2e13d0dd0 100755 --- a/dbms/tests/queries/0_stateless/00505_tcp_ssl.sh +++ b/dbms/tests/queries/0_stateless/00505_tcp_ssl.sh @@ -9,6 +9,6 @@ if [ -z ${tcp_ssl_port} ]; then echo 2 else # Auto port detect - clickhouse-client --secure -q "SELECT 1"; - clickhouse-client --secure --port=9440 -q "SELECT 2"; + clickhouse-client --ssl -q "SELECT 1"; + clickhouse-client --ssl --port=9440 -q "SELECT 2"; fi From 96d1ab89da451911eb54eccf1017eb5f94068a34 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Oct 2017 22:11:38 +0300 Subject: [PATCH 49/63] Update Server.cpp --- dbms/src/Server/Server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index edc29725c08..09f3b6410d0 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -410,7 +410,7 @@ int Server::main(const std::vector & args) LOG_INFO(log, "Listening tcp: " + tcp_address.toString()); } - /// TCP + /// TCP with SSL if (config().has("tcp_ssl_port")) { #if Poco_NetSSL_FOUND From 62fca4f4512dcbb8c210bb8e042ffb9a78a30625 Mon Sep 17 00:00:00 2001 From: Amy Krishnevsky Date: Wed, 4 Oct 2017 11:55:31 +0300 Subject: [PATCH 50/63] update CHANGELOG.md --- CHANGELOG.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a926e5cdc4b..ef7ff17d2ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,20 @@ +# ClickHouse release 1.1.54292 + +## New features: +* Added the `pointInPolygon` function for working with coordinates on a coordinate plane. +* Added the `sumMap` aggregate function for calculating the sum of arrays, similar to `SummingMergeTree`. +* Added the `trunc` function. Improved performance of the rounding functions (`round`, `floor`, `ceil`, `roundToExp2`) and corrected the logic of how they work. Changed the logic of the `roundToExp2` function for fractions and negative numbers. +* The ClickHouse executable file is now less dependent on the libc version. The same ClickHouse executable file can run on a wide variety of Linux systems. Note: There is still a dependency when using compiled queries (with the setting `compile = 1`, which is not used by default). +* Reduced the time needed for dynamic compilation of queries. + +## Bug fixes: +* Fixed an error that sometimes produced `part ... intersects previous part` messages and weakened replica consistency. +* Fixed an error that caused the server to lock up if ZooKeeper was unavailable during shutdown. +* Removed excessive logging when restoring replicas. +* Fixed an error in the UNION ALL implementation. +* Fixed an error in the concat function that occurred if the first column in a block has the Array type. +* Progress is now displayed correctly in the system.merges table. + # ClickHouse release 1.1.54289 ## New features: From 723f36ec6dbf0e264ebda04401febf82faaee2c1 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Wed, 4 Oct 2017 18:15:40 +0300 Subject: [PATCH 51/63] Fixed integration tests after introduction of 'clickhouse server' commands. [#CLICKHOUSE-2] --- dbms/tests/integration/helpers/cluster.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py index 57f165d9099..b20749a08d3 100644 --- a/dbms/tests/integration/helpers/cluster.py +++ b/dbms/tests/integration/helpers/cluster.py @@ -176,6 +176,7 @@ services: - {logs_dir}:/var/log/clickhouse-server/ entrypoint: - /usr/bin/clickhouse + - server - --config-file=/etc/clickhouse-server/config.xml - --log-file=/var/log/clickhouse-server/clickhouse-server.log depends_on: {depends_on} From d498e8e152f38c0f81a02b97f6b56c86af731751 Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 4 Oct 2017 18:18:05 +0300 Subject: [PATCH 52/63] Try fix work with not connected sockets (#1320) * Lazy fd_to_replica_state_idx init; Fix work with not connected sockets * Style * const --- dbms/src/Client/Connection.cpp | 4 ++-- dbms/src/Client/MultiplexedConnections.cpp | 13 ++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/dbms/src/Client/Connection.cpp b/dbms/src/Client/Connection.cpp index 8d3ab2a4f13..fc9315210b2 100644 --- a/dbms/src/Client/Connection.cpp +++ b/dbms/src/Client/Connection.cpp @@ -95,11 +95,11 @@ void Connection::disconnect() { //LOG_TRACE(log_wrapper.get(), "Disconnecting"); + in = nullptr; + out = nullptr; // can write to socket if (socket) socket->close(); socket = nullptr; - in = nullptr; - out = nullptr; connected = false; } diff --git a/dbms/src/Client/MultiplexedConnections.cpp b/dbms/src/Client/MultiplexedConnections.cpp index f13587d79c9..fb2baa105a6 100644 --- a/dbms/src/Client/MultiplexedConnections.cpp +++ b/dbms/src/Client/MultiplexedConnections.cpp @@ -20,7 +20,6 @@ MultiplexedConnections::MultiplexedConnections(Connection & connection, const Se ReplicaState replica_state; replica_state.connection = &connection; replica_states.push_back(replica_state); - fd_to_replica_state_idx.emplace(connection.socket->impl()->sockfd(), 0); active_connection_count = 1; } @@ -36,7 +35,6 @@ MultiplexedConnections::MultiplexedConnections( return; replica_states.reserve(connections.size()); - fd_to_replica_state_idx.reserve(connections.size()); for (size_t i = 0; i < connections.size(); ++i) { Connection * connection = &(*connections[i]); @@ -47,7 +45,6 @@ MultiplexedConnections::MultiplexedConnections( replica_state.connection = connection; replica_states.push_back(std::move(replica_state)); - fd_to_replica_state_idx.emplace(connection->socket->impl()->sockfd(), i); } active_connection_count = connections.size(); @@ -304,6 +301,16 @@ MultiplexedConnections::ReplicaState & MultiplexedConnections::getReplicaForRead } auto & socket = read_list[rand() % read_list.size()]; + if (fd_to_replica_state_idx.empty()) + { + fd_to_replica_state_idx.reserve(replica_states.size()); + size_t replica_state_number = 0; + for (const auto & replica_state : replica_states) + { + fd_to_replica_state_idx.emplace(replica_state.connection->socket->impl()->sockfd(), replica_state_number); + ++replica_state_number; + } + } return replica_states[fd_to_replica_state_idx.at(socket.impl()->sockfd())]; } From 95ebdd53c067569c5d7d445bfa1cbc8ea7768735 Mon Sep 17 00:00:00 2001 From: robot-metrika-test Date: Thu, 5 Oct 2017 12:22:56 +0300 Subject: [PATCH 53/63] Auto version update to [54297] --- dbms/cmake/version.cmake | 4 ++-- debian/changelog | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index f8aceaeac86..cbaed040a12 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -1,6 +1,6 @@ # This strings autochanged from release_lib.sh: -set(VERSION_DESCRIBE v1.1.54294-testing) -set(VERSION_REVISION 54294) +set(VERSION_DESCRIBE v1.1.54297-testing) +set(VERSION_REVISION 54297) # end of autochange set (VERSION_MAJOR 1) diff --git a/debian/changelog b/debian/changelog index 1872c8a5e9b..403d0960054 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (1.1.54294) unstable; urgency=low +clickhouse (1.1.54297) unstable; urgency=low * Modified source code - -- Fri, 29 Sep 2017 01:08:46 +0300 + -- Thu, 05 Oct 2017 12:22:56 +0300 From 704fee875041a209f9f5e63382daa8515d3c51ef Mon Sep 17 00:00:00 2001 From: proller Date: Fri, 6 Oct 2017 18:58:24 +0300 Subject: [PATCH 54/63] Tests: External: rename --use_http => --no_http and fix (#1324) --- .../generate_and_test.py | 49 +++++++++++-------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/dbms/tests/external_dictionaries/generate_and_test.py b/dbms/tests/external_dictionaries/generate_and_test.py index 097b152b56e..0815527b525 100755 --- a/dbms/tests/external_dictionaries/generate_and_test.py +++ b/dbms/tests/external_dictionaries/generate_and_test.py @@ -48,39 +48,43 @@ def generate_structure(args): [ 'file_flat', 0, True ], [ 'clickhouse_flat', 0, True ], [ 'executable_flat', 0, True ], - [ 'http_flat', 0, True ], [ 'file_hashed', 0, True ], [ 'clickhouse_hashed', 0, True ], [ 'executable_hashed', 0, True ], - [ 'http_hashed', 0, True ], [ 'clickhouse_cache', 0, True ], [ 'executable_cache', 0, True ], - [ 'http_cache', 0, True ], # Complex key dictionaries with (UInt8, UInt8) key [ 'file_complex_integers_key_hashed', 1, False ], [ 'clickhouse_complex_integers_key_hashed', 1, False ], [ 'executable_complex_integers_key_hashed', 1, False ], - [ 'http_complex_integers_key_hashed', 1, False ], [ 'clickhouse_complex_integers_key_cache', 1, False ], [ 'executable_complex_integers_key_cache', 1, False ], - [ 'http_complex_integers_key_cache', 1, False ], # Complex key dictionaries with (String, UInt8) key [ 'file_complex_mixed_key_hashed', 2, False ], [ 'clickhouse_complex_mixed_key_hashed', 2, False ], [ 'executable_complex_mixed_key_hashed', 2, False ], - [ 'http_complex_mixed_key_hashed', 2, False ], [ 'clickhouse_complex_mixed_key_cache', 2, False ], [ 'executable_complex_mixed_key_hashed', 2, False ], - [ 'http_complex_mixed_key_hashed', 2, False ], ]) - if args.use_https: + if not args.no_http: + dictionaries.extend([ + [ 'http_flat', 0, True ], + [ 'http_hashed', 0, True ], + [ 'http_cache', 0, True ], + [ 'http_complex_integers_key_hashed', 1, False ], + [ 'http_complex_integers_key_cache', 1, False ], + [ 'http_complex_mixed_key_hashed', 2, False ], + [ 'http_complex_mixed_key_hashed', 2, False ], + ]) + + if not args.no_https: dictionaries.extend([ [ 'https_flat', 0, True ], [ 'https_hashed', 0, True ], @@ -456,39 +460,44 @@ def generate_dictionaries(args): [ source_file % (generated_prefix + files[0]), layout_flat], [ source_clickhouse, layout_flat ], [ source_executable % (generated_prefix + files[0]), layout_flat ], - [ source_http % (files[0]), layout_flat ], [ source_file % (generated_prefix + files[0]), layout_hashed], [ source_clickhouse, layout_hashed ], [ source_executable % (generated_prefix + files[0]), layout_hashed ], - [ source_http % (files[0]), layout_hashed ], [ source_clickhouse, layout_cache ], [ source_executable_cache % (generated_prefix + files[0]), layout_cache ], - [ source_http % (files[0]), layout_cache ], # Complex key dictionaries with (UInt8, UInt8) key [ source_file % (generated_prefix + files[1]), layout_complex_key_hashed], [ source_clickhouse, layout_complex_key_hashed ], [ source_executable % (generated_prefix + files[1]), layout_complex_key_hashed ], - [ source_http % (files[1]), layout_complex_key_hashed ], [ source_clickhouse, layout_complex_key_cache ], [ source_executable_cache % (generated_prefix + files[1]), layout_complex_key_cache ], - [ source_http % (files[1]), layout_complex_key_cache ], # Complex key dictionaries with (String, UInt8) key [ source_file % (generated_prefix + files[2]), layout_complex_key_hashed], [ source_clickhouse, layout_complex_key_hashed ], [ source_executable % (generated_prefix + files[2]), layout_complex_key_hashed ], - [ source_http % (files[2]), layout_complex_key_hashed ], [ source_clickhouse, layout_complex_key_cache ], [ source_executable_cache % (generated_prefix + files[2]), layout_complex_key_cache ], - [ source_http % (files[2]), layout_complex_key_cache ], ] - if args.use_https: + + if not args.no_http: + sources_and_layouts.extend([ + [ source_http % (files[0]), layout_flat ], + [ source_http % (files[0]), layout_hashed ], + [ source_http % (files[0]), layout_cache ], + [ source_http % (files[1]), layout_complex_key_hashed ], + [ source_http % (files[1]), layout_complex_key_cache ], + [ source_http % (files[2]), layout_complex_key_hashed ], + [ source_http % (files[2]), layout_complex_key_cache ], + ]) + + if not args.no_https: sources_and_layouts.extend([ [ source_https % (files[0]), layout_flat ], [ source_https % (files[0]), layout_hashed ], @@ -549,13 +558,13 @@ def generate_dictionaries(args): def run_tests(args): - if args.use_http: + if not args.no_http: http_server = subprocess.Popen(["python", "http_server.py", "--port", str(args.http_port), "--host", args.http_host]); @atexit.register def http_killer(): http_server.kill() - if args.use_https: + if not args.no_https: https_server = subprocess.Popen(["python", "http_server.py", "--port", str(args.https_port), "--host", args.https_host, '--https']); @atexit.register def https_killer(): @@ -731,11 +740,11 @@ if __name__ == '__main__': parser.add_argument('--mongo_host', default = 'localhost', help = 'mongo server host') parser.add_argument('--use_mongo_user', action='store_true', help = 'Test mongodb with user-pass') - parser.add_argument('--use_http', default = True, help = 'Use http dictionaries') + parser.add_argument('--no_http', action='store_true', help = 'Dont use http dictionaries') parser.add_argument('--http_port', default = 58000, help = 'http server port') parser.add_argument('--http_host', default = 'localhost', help = 'http server host') parser.add_argument('--http_path', default = '/generated/', help = 'http server path') - parser.add_argument('--use_https', default = True, help = 'Use https dictionaries') + parser.add_argument('--no_https', action='store_true', help = 'Dont use https dictionaries') parser.add_argument('--https_port', default = 58443, help = 'https server port') parser.add_argument('--https_host', default = 'localhost', help = 'https server host') parser.add_argument('--https_path', default = '/generated/', help = 'https server path') From 19042816c4a1536cfab5eec51de427c6102d0cb9 Mon Sep 17 00:00:00 2001 From: proller Date: Fri, 6 Oct 2017 19:26:32 +0300 Subject: [PATCH 55/63] Fix compile with boost 1.65.1+ and clang 3.8 ( https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=222439 ) (#1325) --- dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index bd6e54623ac..1d17475d0ef 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -12,6 +12,7 @@ static constexpr bool is_integer = true; static constexpr int radix = 2; static constexpr int digits = 8 * sizeof(char) * 2; + static constexpr __uint128_t min () { return 0; } // used in boost 1.65.1+ }; } #endif From 3a7f4f04e248be7b44338e27b20aa50023eec559 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Sat, 7 Oct 2017 15:03:44 +0300 Subject: [PATCH 56/63] libglibc-compatibility: Added longjmp tests and fixed longjmp problem. [#METR-21516] --- libs/libglibc-compatibility/CMakeLists.txt | 5 +++- .../glibc-compatibility.c | 29 +++++++++++++++++-- libs/libglibc-compatibility/musl/longjmp.s | 9 ++---- libs/libglibc-compatibility/musl/setjmp.s | 16 ++++++++++ .../tests/CMakeLists.txt | 6 ++++ libs/libglibc-compatibility/tests/longjmp.c | 23 +++++++++++++++ .../libglibc-compatibility/tests/siglongjmp.c | 23 +++++++++++++++ 7 files changed, 102 insertions(+), 9 deletions(-) create mode 100644 libs/libglibc-compatibility/musl/setjmp.s create mode 100644 libs/libglibc-compatibility/tests/CMakeLists.txt create mode 100644 libs/libglibc-compatibility/tests/longjmp.c create mode 100644 libs/libglibc-compatibility/tests/siglongjmp.c diff --git a/libs/libglibc-compatibility/CMakeLists.txt b/libs/libglibc-compatibility/CMakeLists.txt index 9521dba9eab..b4fb9fc424e 100644 --- a/libs/libglibc-compatibility/CMakeLists.txt +++ b/libs/libglibc-compatibility/CMakeLists.txt @@ -1,2 +1,5 @@ +# N.B. This library works on x86_64 only. enable_language (ASM) -add_library (glibc-compatibility glibc-compatibility.c musl/pipe2.c musl/fallocate.c musl/longjmp.s musl/vasprintf.c musl/lgamma.c) +add_library (glibc-compatibility glibc-compatibility.c musl/pipe2.c musl/fallocate.c musl/longjmp.s musl/setjmp.s musl/vasprintf.c musl/lgamma.c) + +add_subdirectory (tests) diff --git a/libs/libglibc-compatibility/glibc-compatibility.c b/libs/libglibc-compatibility/glibc-compatibility.c index 0f6ce057132..594517bd9e0 100644 --- a/libs/libglibc-compatibility/glibc-compatibility.c +++ b/libs/libglibc-compatibility/glibc-compatibility.c @@ -67,13 +67,38 @@ int __poll_chk(struct pollfd * fds, nfds_t nfds, int timeout, size_t fdslen) #include -void longjmp(jmp_buf env, int val); + +void __attribute__ ((__noreturn__)) musl_longjmp(jmp_buf env, int val); +int musl_setjmp(jmp_buf env); /// NOTE This disables some of FORTIFY_SOURCE functionality. void __longjmp_chk(jmp_buf env, int val) { - return longjmp(env, val); + if (env->__mask_was_saved != 0) + { + sigprocmask(SIG_SETMASK, &env->__saved_mask, NULL); + } + musl_longjmp(env, val); +} + +int _setjmp(jmp_buf env) +{ + return musl_setjmp(env); +} + +int sigsetjmp(sigjmp_buf env, int save_sigmask) +{ + if (save_sigmask) + { + env->__mask_was_saved = 1; + sigprocmask(SIG_BLOCK, NULL, &env->__saved_mask); + } + else + { + env->__mask_was_saved = 0; + } + return musl_setjmp(env); } #include diff --git a/libs/libglibc-compatibility/musl/longjmp.s b/libs/libglibc-compatibility/musl/longjmp.s index e175a4b9606..2ed7642906d 100644 --- a/libs/libglibc-compatibility/musl/longjmp.s +++ b/libs/libglibc-compatibility/musl/longjmp.s @@ -1,10 +1,7 @@ /* Copyright 2011-2012 Nicholas J. Kain, licensed under standard MIT license */ -.global _longjmp -.global longjmp -.type _longjmp,@function -.type longjmp,@function -_longjmp: -longjmp: +.global musl_longjmp +.type musl_longjmp,@function +musl_longjmp: mov %rsi,%rax /* val will be longjmp return */ test %rax,%rax jnz 1f diff --git a/libs/libglibc-compatibility/musl/setjmp.s b/libs/libglibc-compatibility/musl/setjmp.s new file mode 100644 index 00000000000..c612db84bd1 --- /dev/null +++ b/libs/libglibc-compatibility/musl/setjmp.s @@ -0,0 +1,16 @@ +/* Copyright 2011-2012 Nicholas J. Kain, licensed under standard MIT license */ +.global musl_setjmp +.type musl_setjmp,@function +musl_setjmp: + mov %rbx,(%rdi) /* rdi is jmp_buf, move registers onto it */ + mov %rbp,8(%rdi) + mov %r12,16(%rdi) + mov %r13,24(%rdi) + mov %r14,32(%rdi) + mov %r15,40(%rdi) + lea 8(%rsp),%rdx /* this is our rsp WITHOUT current ret addr */ + mov %rdx,48(%rdi) + mov (%rsp),%rdx /* save return addr ptr for new rip */ + mov %rdx,56(%rdi) + xor %rax,%rax /* always return 0 */ + ret diff --git a/libs/libglibc-compatibility/tests/CMakeLists.txt b/libs/libglibc-compatibility/tests/CMakeLists.txt new file mode 100644 index 00000000000..e2a8dfd75be --- /dev/null +++ b/libs/libglibc-compatibility/tests/CMakeLists.txt @@ -0,0 +1,6 @@ +foreach (T longjmp siglongjmp) + add_executable (${T} ${T}.c) + target_link_libraries (${T} glibc-compatibility) + set_target_properties (${T} PROPERTIES LINKER_LANGUAGE CXX) + add_check (${T}) +endforeach () diff --git a/libs/libglibc-compatibility/tests/longjmp.c b/libs/libglibc-compatibility/tests/longjmp.c new file mode 100644 index 00000000000..aa96e6276b4 --- /dev/null +++ b/libs/libglibc-compatibility/tests/longjmp.c @@ -0,0 +1,23 @@ +#include + +int main() +{ + jmp_buf env; + int val; + volatile int count = 0; + val = setjmp(env); + ++count; + if (count == 1 && val != 0) + { + return 1; + } + if (count == 2 && val == 42) + { + return 0; + } + if (count == 1) + { + longjmp(env, 42); + } + return 1; +} diff --git a/libs/libglibc-compatibility/tests/siglongjmp.c b/libs/libglibc-compatibility/tests/siglongjmp.c new file mode 100644 index 00000000000..e4befb34259 --- /dev/null +++ b/libs/libglibc-compatibility/tests/siglongjmp.c @@ -0,0 +1,23 @@ +#include + +int main() +{ + sigjmp_buf env; + int val; + volatile int count = 0; + val = sigsetjmp(env, 0); + ++count; + if (count == 1 && val != 0) + { + return 1; + } + if (count == 2 && val == 42) + { + return 0; + } + if (count == 1) + { + siglongjmp(env, 42); + } + return 1; +} From 31d2ed298254f87963e7fd9c60b9f82f2e34b5aa Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Sat, 7 Oct 2017 20:44:17 +0300 Subject: [PATCH 57/63] libglibc-compatibility: Changed musl longjmp to work with glibc setjmp. [#METR-21516] --- libs/libglibc-compatibility/CMakeLists.txt | 3 +- .../glibc-compatibility.c | 29 +------------- libs/libglibc-compatibility/musl/longjmp.s | 39 ++++++++++--------- libs/libglibc-compatibility/musl/setjmp.s | 16 -------- 4 files changed, 24 insertions(+), 63 deletions(-) delete mode 100644 libs/libglibc-compatibility/musl/setjmp.s diff --git a/libs/libglibc-compatibility/CMakeLists.txt b/libs/libglibc-compatibility/CMakeLists.txt index b4fb9fc424e..a567387c065 100644 --- a/libs/libglibc-compatibility/CMakeLists.txt +++ b/libs/libglibc-compatibility/CMakeLists.txt @@ -1,5 +1,4 @@ -# N.B. This library works on x86_64 only. enable_language (ASM) -add_library (glibc-compatibility glibc-compatibility.c musl/pipe2.c musl/fallocate.c musl/longjmp.s musl/setjmp.s musl/vasprintf.c musl/lgamma.c) +add_library (glibc-compatibility glibc-compatibility.c musl/pipe2.c musl/fallocate.c musl/longjmp.s musl/vasprintf.c musl/lgamma.c) add_subdirectory (tests) diff --git a/libs/libglibc-compatibility/glibc-compatibility.c b/libs/libglibc-compatibility/glibc-compatibility.c index 594517bd9e0..5f9c1378378 100644 --- a/libs/libglibc-compatibility/glibc-compatibility.c +++ b/libs/libglibc-compatibility/glibc-compatibility.c @@ -67,38 +67,13 @@ int __poll_chk(struct pollfd * fds, nfds_t nfds, int timeout, size_t fdslen) #include - -void __attribute__ ((__noreturn__)) musl_longjmp(jmp_buf env, int val); -int musl_setjmp(jmp_buf env); +void musl_glibc_longjmp(jmp_buf env, int val); /// NOTE This disables some of FORTIFY_SOURCE functionality. void __longjmp_chk(jmp_buf env, int val) { - if (env->__mask_was_saved != 0) - { - sigprocmask(SIG_SETMASK, &env->__saved_mask, NULL); - } - musl_longjmp(env, val); -} - -int _setjmp(jmp_buf env) -{ - return musl_setjmp(env); -} - -int sigsetjmp(sigjmp_buf env, int save_sigmask) -{ - if (save_sigmask) - { - env->__mask_was_saved = 1; - sigprocmask(SIG_BLOCK, NULL, &env->__saved_mask); - } - else - { - env->__mask_was_saved = 0; - } - return musl_setjmp(env); + musl_glibc_longjmp(env, val); } #include diff --git a/libs/libglibc-compatibility/musl/longjmp.s b/libs/libglibc-compatibility/musl/longjmp.s index 2ed7642906d..2d1e26f2f99 100644 --- a/libs/libglibc-compatibility/musl/longjmp.s +++ b/libs/libglibc-compatibility/musl/longjmp.s @@ -1,19 +1,22 @@ /* Copyright 2011-2012 Nicholas J. Kain, licensed under standard MIT license */ -.global musl_longjmp -.type musl_longjmp,@function -musl_longjmp: - mov %rsi,%rax /* val will be longjmp return */ - test %rax,%rax - jnz 1f - inc %rax /* if val==0, val=1 per longjmp semantics */ -1: - mov (%rdi),%rbx /* rdi is the jmp_buf, restore regs from it */ - mov 8(%rdi),%rbp - mov 16(%rdi),%r12 - mov 24(%rdi),%r13 - mov 32(%rdi),%r14 - mov 40(%rdi),%r15 - mov 48(%rdi),%rdx /* this ends up being the stack pointer */ - mov %rdx,%rsp - mov 56(%rdi),%rdx /* this is the instruction pointer */ - jmp *%rdx /* goto saved address without altering rsp */ +.global musl_glibc_longjmp +.type musl_glibc_longjmp,@function +musl_glibc_longjmp: + mov 0x30(%rdi),%r8 + mov 0x8(%rdi),%r9 + mov 0x38(%rdi),%rdx + ror $0x11,%r8 + xor %fs:0x30,%r8 /* this ends up being the stack pointer */ + ror $0x11,%r9 + xor %fs:0x30,%r9 + ror $0x11,%rdx + xor %fs:0x30,%rdx /* this is the instruction pointer */ + mov (%rdi),%rbx /* rdi is the jmp_buf, restore regs from it */ + mov 0x10(%rdi),%r12 + mov 0x18(%rdi),%r13 + mov 0x20(%rdi),%r14 + mov 0x28(%rdi),%r15 + mov %esi,%eax + mov %r8,%rsp + mov %r9,%rbp + jmpq *%rdx /* goto saved address without altering rsp */ diff --git a/libs/libglibc-compatibility/musl/setjmp.s b/libs/libglibc-compatibility/musl/setjmp.s deleted file mode 100644 index c612db84bd1..00000000000 --- a/libs/libglibc-compatibility/musl/setjmp.s +++ /dev/null @@ -1,16 +0,0 @@ -/* Copyright 2011-2012 Nicholas J. Kain, licensed under standard MIT license */ -.global musl_setjmp -.type musl_setjmp,@function -musl_setjmp: - mov %rbx,(%rdi) /* rdi is jmp_buf, move registers onto it */ - mov %rbp,8(%rdi) - mov %r12,16(%rdi) - mov %r13,24(%rdi) - mov %r14,32(%rdi) - mov %r15,40(%rdi) - lea 8(%rsp),%rdx /* this is our rsp WITHOUT current ret addr */ - mov %rdx,48(%rdi) - mov (%rsp),%rdx /* save return addr ptr for new rip */ - mov %rdx,56(%rdi) - xor %rax,%rax /* always return 0 */ - ret From 6628b5d30800d6419b647fac29040b8236c49d71 Mon Sep 17 00:00:00 2001 From: robot-metrika-test Date: Sat, 7 Oct 2017 22:01:53 +0300 Subject: [PATCH 58/63] Auto version update to [54299] --- dbms/cmake/version.cmake | 4 ++-- debian/changelog | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index cbaed040a12..58f6c0dfb59 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -1,6 +1,6 @@ # This strings autochanged from release_lib.sh: -set(VERSION_DESCRIBE v1.1.54297-testing) -set(VERSION_REVISION 54297) +set(VERSION_DESCRIBE v1.1.54299-testing) +set(VERSION_REVISION 54299) # end of autochange set (VERSION_MAJOR 1) diff --git a/debian/changelog b/debian/changelog index 403d0960054..5c16f15c2d4 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (1.1.54297) unstable; urgency=low +clickhouse (1.1.54299) unstable; urgency=low * Modified source code - -- Thu, 05 Oct 2017 12:22:56 +0300 + -- Sat, 07 Oct 2017 22:01:52 +0300 From 28bb5e25cf8c9ddbbb4330c2c262a1918d65e18f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Vavrus=CC=8Ca?= Date: Thu, 28 Sep 2017 00:01:54 -0700 Subject: [PATCH 59/63] AggregateFunctionTopK: read alphaMap for generic * the alpha_map vector always (de)serialises the actual version (could empty sometimes) * AggregateFunctionTopK generic variant deserialises it as well instead of ignoring it * AggregateFunctionTopK generic variant clears the array before deserialising refs #1283 --- .../AggregateFunctions/AggregateFunctionTopK.h | 4 ++++ dbms/src/Common/SpaceSaving.h | 16 +++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionTopK.h b/dbms/src/AggregateFunctions/AggregateFunctionTopK.h index 64938d97f38..814b3564eb8 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionTopK.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionTopK.h @@ -195,7 +195,9 @@ public: { auto & set = this->data(place).value; set.resize(reserved); + set.clear(); + // Specialised here because there's no deserialiser for StringRef size_t count = 0; readVarUInt(count, buf); for (size_t i = 0; i < count; ++i) { @@ -206,6 +208,8 @@ public: set.insert(ref, count, error); arena->rollback(ref.size); } + + set.readAlphaMap(buf); } void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena * arena) const diff --git a/dbms/src/Common/SpaceSaving.h b/dbms/src/Common/SpaceSaving.h index 6e5f39725aa..ebf8659b32a 100644 --- a/dbms/src/Common/SpaceSaving.h +++ b/dbms/src/Common/SpaceSaving.h @@ -130,6 +130,11 @@ public: return m_capacity; } + void clear() + { + return destroyElements(); + } + void resize(size_t new_capacity) { counter_list.reserve(new_capacity); @@ -255,6 +260,8 @@ public: writeVarUInt(size(), wb); for (auto counter : counter_list) counter->write(wb); + + writeVarUInt(alpha_map.size(), wb); for (auto alpha : alpha_map) writeVarUInt(alpha, wb); } @@ -273,7 +280,14 @@ public: push(counter); } - for (size_t i = 0; i < nextAlphaSize(m_capacity); ++i) + readAlphaMap(rb); + } + + void readAlphaMap(ReadBuffer & rb) + { + size_t alpha_size = 0; + readVarUInt(alpha_size, rb); + for (size_t i = 0; i < alpha_size; ++i) { UInt64 alpha = 0; readVarUInt(alpha, rb); From 6e5ea5cc10f7c07786faff0c593f347aa1d0d75b Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 9 Oct 2017 01:09:00 +0300 Subject: [PATCH 60/63] Update AggregateFunctionTopK.h --- dbms/src/AggregateFunctions/AggregateFunctionTopK.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionTopK.h b/dbms/src/AggregateFunctions/AggregateFunctionTopK.h index 814b3564eb8..656d25d0235 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionTopK.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionTopK.h @@ -197,10 +197,11 @@ public: set.resize(reserved); set.clear(); - // Specialised here because there's no deserialiser for StringRef + // Specialized here because there's no deserialiser for StringRef size_t count = 0; readVarUInt(count, buf); - for (size_t i = 0; i < count; ++i) { + for (size_t i = 0; i < count; ++i) + { auto ref = readStringBinaryInto(*arena, buf); UInt64 count, error; readVarUInt(count, buf); @@ -215,7 +216,8 @@ public: void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena * arena) const { auto & set = this->data(place).value; - if (set.capacity() != reserved) { + if (set.capacity() != reserved) + { set.resize(reserved); } From 2dfb3fa4633d0479aa87b6405530fda129adcfd0 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 9 Oct 2017 01:10:13 +0300 Subject: [PATCH 61/63] Update AggregateFunctionTopK.h --- dbms/src/AggregateFunctions/AggregateFunctionTopK.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionTopK.h b/dbms/src/AggregateFunctions/AggregateFunctionTopK.h index 656d25d0235..be76c718f14 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionTopK.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionTopK.h @@ -194,8 +194,8 @@ public: void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override { auto & set = this->data(place).value; - set.resize(reserved); set.clear(); + set.resize(reserved); // Specialized here because there's no deserialiser for StringRef size_t count = 0; From 975e8575c78892c6217792221ce67a6c37306c05 Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 9 Oct 2017 20:22:15 +0300 Subject: [PATCH 62/63] Add test (failed on version 1.1.54292) (#1328) * Tests: External: rename --use_http => --no_http and fix * Add test (failed on version 1.1.54292) --- .../0_stateless/00507_nullable.reference | 2 + .../queries/0_stateless/00507_nullable.sql | 41 +++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00507_nullable.reference create mode 100644 dbms/tests/queries/0_stateless/00507_nullable.sql diff --git a/dbms/tests/queries/0_stateless/00507_nullable.reference b/dbms/tests/queries/0_stateless/00507_nullable.reference new file mode 100644 index 00000000000..1c42f9991e2 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00507_nullable.reference @@ -0,0 +1,2 @@ +1970-01-02 2 0 FAILED 2 +still alive diff --git a/dbms/tests/queries/0_stateless/00507_nullable.sql b/dbms/tests/queries/0_stateless/00507_nullable.sql new file mode 100644 index 00000000000..46e7aef5614 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00507_nullable.sql @@ -0,0 +1,41 @@ +CREATE DATABASE IF NOT EXISTS test; +DROP TABLE IF EXISTS test.runs; +DROP TABLE IF EXISTS test.tests; + +CREATE TABLE test.runs +( + date Date, + id UInt64, + t_id UInt64, + status Enum8('OK' = 1, 'FAILED' = 2, 'SKIPPED' = 3, 'DISCOVERED' = 4), + run_id UInt64 DEFAULT id +) ENGINE = MergeTree(date, (t_id, id), 8192); + +CREATE TABLE test.tests +( + date Date, + id UInt64, + path Nullable(String), + suite_id Nullable(String) +) ENGINE = MergeTree(date, id, 8192); + +INSERT INTO test.tests (date, id) VALUES (1,1); +INSERT INTO test.runs (date, id) VALUES (1,1); +INSERT INTO test.runs (date, id, status) VALUES (1,2, 'FAILED'); +INSERT INTO test.tests (date, id, path) VALUES (1,2 ,'rtline1'); + +SELECT * +FROM test.runs AS r +WHERE (r.status = 'FAILED') AND ( +( + SELECT path + FROM test.tests AS t + WHERE t.id = r.id + LIMIT 1 +) LIKE 'rtline%') +LIMIT 1; + +SELECT 'still alive'; + +DROP TABLE test.runs; +DROP TABLE test.tests; From 56f7f1df72b6048936664d87223783c0fd2a3612 Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 9 Oct 2017 20:41:27 +0300 Subject: [PATCH 63/63] fix cmake --- libs/libglibc-compatibility/tests/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libs/libglibc-compatibility/tests/CMakeLists.txt b/libs/libglibc-compatibility/tests/CMakeLists.txt index e2a8dfd75be..70a9e056226 100644 --- a/libs/libglibc-compatibility/tests/CMakeLists.txt +++ b/libs/libglibc-compatibility/tests/CMakeLists.txt @@ -1,3 +1,5 @@ +include (${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake) + foreach (T longjmp siglongjmp) add_executable (${T} ${T}.c) target_link_libraries (${T} glibc-compatibility)