From 0f039f83590339ab636e9b2e59c25ed22227d246 Mon Sep 17 00:00:00 2001
From: Kuba Kaflik <kuba.kaflik@clickhouse.com>
Date: Tue, 21 Mar 2023 09:09:57 +0100
Subject: [PATCH 01/52] Add Google Cloud Storage S3 compatible table function

---
 src/TableFunctions/TableFunctionS3.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h
index 859da9e9201..a2e93476448 100644
--- a/src/TableFunctions/TableFunctionS3.h
+++ b/src/TableFunctions/TableFunctionS3.h
@@ -93,4 +93,18 @@ private:
 
 }
 
+class TableFunctionGCS : public TableFunctionS3
+{
+public:
+    static constexpr auto name = "gcs";
+    std::string getName() const override
+    {
+        return name;
+    }
+private:
+    const char * getStorageTypeName() const override { return "GCS"; }
+};
+
+}
+
 #endif

From e6ddfc3486985393040222aec24ea70a4c60e7b8 Mon Sep 17 00:00:00 2001
From: Kuba Kaflik <kuba.kaflik@clickhouse.com>
Date: Tue, 21 Mar 2023 09:51:37 +0100
Subject: [PATCH 02/52] Update GCS table function docs

---
 docs/en/sql-reference/table-functions/gcs.md | 184 +++++++++++++++++++
 src/TableFunctions/TableFunctionS3.h         |   8 +
 2 files changed, 192 insertions(+)
 create mode 100644 docs/en/sql-reference/table-functions/gcs.md

diff --git a/docs/en/sql-reference/table-functions/gcs.md b/docs/en/sql-reference/table-functions/gcs.md
new file mode 100644
index 00000000000..8427a2db224
--- /dev/null
+++ b/docs/en/sql-reference/table-functions/gcs.md
@@ -0,0 +1,184 @@
+---
+slug: /en/sql-reference/table-functions/gcs
+sidebar_position: 45
+sidebar_label: s3
+keywords: [gcs, bucket]
+---
+
+# gcs Table Function
+
+Provides a table-like interface to select/insert files in [Google Cloud Storage](https://cloud.google.com/storage/).
+
+**Syntax**
+
+``` sql
+gcs(path [,hmac_key, hmac_secret] [,format] [,structure] [,compression])
+```
+
+:::tip GCS
+The GCS Table Function integrates with Google Cloud Storage by using the GCS XML API and HMAC keys. See the [Google interoperability docs]( https://cloud.google.com/storage/docs/interoperability) for more details about the endpoint and HMAC. 
+
+:::
+
+**Arguments**
+
+-   `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/gcs.md#wildcards-in-path).
+
+  :::note GCS
+  The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API:
+  ```
+  https://storage.googleapis.com/<bucket>/<folder>/<filename(s)>
+  ```
+  and not ~~https://storage.cloud.google.com~~.
+  :::
+
+-   `format` — The [format](../../interfaces/formats.md#formats) of the file.
+-   `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
+-   `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension.
+
+**Returned value**
+
+A table with the specified structure for reading or writing data in the specified file.
+
+**Examples**
+
+Selecting the first two rows from the table from S3 file `https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv`:
+
+``` sql
+SELECT *
+FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32')
+LIMIT 2;
+```
+
+``` text
+┌─column1─┬─column2─┬─column3─┐
+│       1 │       2 │       3 │
+│       3 │       2 │       1 │
+└─────────┴─────────┴─────────┘
+```
+
+The similar but from file with `gzip` compression:
+
+``` sql
+SELECT *
+FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip')
+LIMIT 2;
+```
+
+``` text
+┌─column1─┬─column2─┬─column3─┐
+│       1 │       2 │       3 │
+│       3 │       2 │       1 │
+└─────────┴─────────┴─────────┘
+```
+
+## Usage
+
+Suppose that we have several files with following URIs on S3:
+
+-   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_1.csv'
+-   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_2.csv'
+-   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_3.csv'
+-   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_4.csv'
+-   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_1.csv'
+-   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_2.csv'
+-   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_3.csv'
+-   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_4.csv'
+
+Count the amount of rows in files ending with numbers from 1 to 3:
+
+``` sql
+SELECT count(*)
+FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32')
+```
+
+``` text
+┌─count()─┐
+│      18 │
+└─────────┘
+```
+
+Count the total amount of rows in all files in these two directories:
+
+``` sql
+SELECT count(*)
+FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32')
+```
+
+``` text
+┌─count()─┐
+│      24 │
+└─────────┘
+```
+
+:::warning
+If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
+:::
+
+Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
+
+``` sql
+SELECT count(*)
+FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32');
+```
+
+``` text
+┌─count()─┐
+│      12 │
+└─────────┘
+```
+
+Insert data into file `test-data.csv.gz`:
+
+``` sql
+INSERT INTO FUNCTION gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
+VALUES ('test-data', 1), ('test-data-2', 2);
+```
+
+Insert data into file `test-data.csv.gz` from existing table:
+
+``` sql
+INSERT INTO FUNCTION gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
+SELECT name, value FROM existing_table;
+```
+
+Glob ** can be used for recursive directory traversal. Consider the below example, it will fetch all files from `my-test-bucket-768` directory recursively:
+
+``` sql
+SELECT * FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/**', 'CSV', 'name String, value UInt32', 'gzip');
+```
+
+The below get data from all `test-data.csv.gz` files from any folder inside `my-test-bucket` directory recursively:
+
+``` sql
+SELECT * FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip');
+```
+
+## Partitioned Write
+
+If you specify `PARTITION BY` expression when inserting data into `S3` table, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency.
+
+**Examples**
+
+1. Using partition ID in a key creates separate files:
+
+```sql
+INSERT INTO TABLE FUNCTION
+    gcs('http://bucket.amazonaws.com/my_bucket/file_{_partition_id}.csv', 'CSV', 'a String, b UInt32, c UInt32')
+    PARTITION BY a VALUES ('x', 2, 3), ('x', 4, 5), ('y', 11, 12), ('y', 13, 14), ('z', 21, 22), ('z', 23, 24);
+```
+As a result, the data is written into three files: `file_x.csv`, `file_y.csv`, and `file_z.csv`.
+
+2. Using partition ID in a bucket name creates files in different buckets:
+
+```sql
+INSERT INTO TABLE FUNCTION
+    gcs('http://bucket.amazonaws.com/my_bucket_{_partition_id}/file.csv', 'CSV', 'a UInt32, b UInt32, c UInt32')
+    PARTITION BY a VALUES (1, 2, 3), (1, 4, 5), (10, 11, 12), (10, 13, 14), (20, 21, 22), (20, 23, 24);
+```
+As a result, the data is written into three files in different buckets: `my_bucket_1/file.csv`, `my_bucket_10/file.csv`, and `my_bucket_20/file.csv`.
+
+**See Also**
+
+-   [S3 table function](s3.md)
+-   [S3 engine](../../engines/table-engines/integrations/s3.md)
diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h
index a2e93476448..ed8cd3bd41a 100644
--- a/src/TableFunctions/TableFunctionS3.h
+++ b/src/TableFunctions/TableFunctionS3.h
@@ -97,6 +97,14 @@ class TableFunctionGCS : public TableFunctionS3
 {
 public:
     static constexpr auto name = "gcs";
+    static constexpr auto signature = " - url\n"
+                                      " - url, format\n"
+                                      " - url, format, structure\n"
+                                      " - url, hmac_key, hmac_secret\n"
+                                      " - url, format, structure, compression_method\n"
+                                      " - url, hmac_key, hmac_secret, format\n"
+                                      " - url, hmac_key, hmac_secret, format, structure\n"
+                                      " - url, hmac_key, hmac_secret, format, structure, compression_method";
     std::string getName() const override
     {
         return name;

From e2c32c3bc072e2290620d975ada42c37bcabcc52 Mon Sep 17 00:00:00 2001
From: Kuba Kaflik <kuba.kaflik@clickhouse.com>
Date: Tue, 21 Mar 2023 13:46:37 +0100
Subject: [PATCH 03/52] Update GCS table function docs

---
 docs/en/sql-reference/table-functions/gcs.md | 40 ++++++++++----------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/docs/en/sql-reference/table-functions/gcs.md b/docs/en/sql-reference/table-functions/gcs.md
index 8427a2db224..dcf49a5108b 100644
--- a/docs/en/sql-reference/table-functions/gcs.md
+++ b/docs/en/sql-reference/table-functions/gcs.md
@@ -42,11 +42,11 @@ A table with the specified structure for reading or writing data in the specifie
 
 **Examples**
 
-Selecting the first two rows from the table from S3 file `https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv`:
+Selecting the first two rows from the table from GCS file `https://storage.googleapis.com/my-test-bucket-768/data.csv`:
 
 ``` sql
 SELECT *
-FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32')
+FROM gcs('https://storage.googleapis.com/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32')
 LIMIT 2;
 ```
 
@@ -61,7 +61,7 @@ The similar but from file with `gzip` compression:
 
 ``` sql
 SELECT *
-FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip')
+FROM gcs('https://storage.googleapis.com/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip')
 LIMIT 2;
 ```
 
@@ -74,22 +74,22 @@ LIMIT 2;
 
 ## Usage
 
-Suppose that we have several files with following URIs on S3:
+Suppose that we have several files with following URIs on GCS:
 
--   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_1.csv'
--   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_2.csv'
--   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_3.csv'
--   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_4.csv'
--   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_1.csv'
--   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_2.csv'
--   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_3.csv'
--   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_4.csv'
+-   'https://storage.googleapis.com/my-test-bucket-768/some_prefix/some_file_1.csv'
+-   'https://storage.googleapis.com/my-test-bucket-768/some_prefix/some_file_2.csv'
+-   'https://storage.googleapis.com/my-test-bucket-768/some_prefix/some_file_3.csv'
+-   'https://storage.googleapis.com/my-test-bucket-768/some_prefix/some_file_4.csv'
+-   'https://storage.googleapis.com/my-test-bucket-768/another_prefix/some_file_1.csv'
+-   'https://storage.googleapis.com/my-test-bucket-768/another_prefix/some_file_2.csv'
+-   'https://storage.googleapis.com/my-test-bucket-768/another_prefix/some_file_3.csv'
+-   'https://storage.googleapis.com/my-test-bucket-768/another_prefix/some_file_4.csv'
 
 Count the amount of rows in files ending with numbers from 1 to 3:
 
 ``` sql
 SELECT count(*)
-FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32')
+FROM gcs('https://storage.googleapis.com/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32')
 ```
 
 ``` text
@@ -102,7 +102,7 @@ Count the total amount of rows in all files in these two directories:
 
 ``` sql
 SELECT count(*)
-FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32')
+FROM gcs('https://storage.googleapis.com/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32')
 ```
 
 ``` text
@@ -119,7 +119,7 @@ Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, 
 
 ``` sql
 SELECT count(*)
-FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32');
+FROM gcs('https://storage.googleapis.com/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32');
 ```
 
 ``` text
@@ -131,32 +131,32 @@ FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768
 Insert data into file `test-data.csv.gz`:
 
 ``` sql
-INSERT INTO FUNCTION gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
+INSERT INTO FUNCTION gcs('https://storage.googleapis.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
 VALUES ('test-data', 1), ('test-data-2', 2);
 ```
 
 Insert data into file `test-data.csv.gz` from existing table:
 
 ``` sql
-INSERT INTO FUNCTION gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
+INSERT INTO FUNCTION gcs('https://storage.googleapis.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
 SELECT name, value FROM existing_table;
 ```
 
 Glob ** can be used for recursive directory traversal. Consider the below example, it will fetch all files from `my-test-bucket-768` directory recursively:
 
 ``` sql
-SELECT * FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/**', 'CSV', 'name String, value UInt32', 'gzip');
+SELECT * FROM gcs('https://storage.googleapis.com/my-test-bucket-768/**', 'CSV', 'name String, value UInt32', 'gzip');
 ```
 
 The below get data from all `test-data.csv.gz` files from any folder inside `my-test-bucket` directory recursively:
 
 ``` sql
-SELECT * FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip');
+SELECT * FROM gcs('https://storage.googleapis.com/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip');
 ```
 
 ## Partitioned Write
 
-If you specify `PARTITION BY` expression when inserting data into `S3` table, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency.
+If you specify `PARTITION BY` expression when inserting data into `GCS` table, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency.
 
 **Examples**
 

From d0a54ab21b2107ee6893a7480533c79c2919fd75 Mon Sep 17 00:00:00 2001
From: Kuba Kaflik <kuba.kaflik@clickhouse.com>
Date: Tue, 21 Mar 2023 14:45:58 +0100
Subject: [PATCH 04/52] Update GCS table function docs

---
 docs/en/sql-reference/table-functions/gcs.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/table-functions/gcs.md b/docs/en/sql-reference/table-functions/gcs.md
index dcf49a5108b..bfa7f36fa48 100644
--- a/docs/en/sql-reference/table-functions/gcs.md
+++ b/docs/en/sql-reference/table-functions/gcs.md
@@ -22,7 +22,7 @@ The GCS Table Function integrates with Google Cloud Storage by using the GCS XML
 
 **Arguments**
 
--   `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/gcs.md#wildcards-in-path).
+-   `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings.
 
   :::note GCS
   The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API:

From 576efc1da3384148664202acef1cdbd27ddc8a08 Mon Sep 17 00:00:00 2001
From: Kuba Kaflik <kuba.kaflik@clickhouse.com>
Date: Wed, 22 Mar 2023 06:58:09 +0100
Subject: [PATCH 05/52] register GCP function in factory

---
 src/TableFunctions/TableFunctionS3.cpp        | 5 +++++
 src/TableFunctions/registerTableFunctions.cpp | 1 +
 src/TableFunctions/registerTableFunctions.h   | 1 +
 3 files changed, 7 insertions(+)

diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp
index f082b192ee0..6f4e6acec8a 100644
--- a/src/TableFunctions/TableFunctionS3.cpp
+++ b/src/TableFunctions/TableFunctionS3.cpp
@@ -183,6 +183,11 @@ void registerTableFunctionOSS(TableFunctionFactory & factory)
     factory.registerFunction<TableFunctionOSS>();
 }
 
+void registerTableFunctionGCS(TableFunctionFactory & factory)
+{
+    factory.registerFunction<TableFunctionGCS>();
+}
+
 }
 
 #endif
diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp
index 7b2b989e724..c692173e689 100644
--- a/src/TableFunctions/registerTableFunctions.cpp
+++ b/src/TableFunctions/registerTableFunctions.cpp
@@ -28,6 +28,7 @@ void registerTableFunctions()
     registerTableFunctionS3Cluster(factory);
     registerTableFunctionCOS(factory);
     registerTableFunctionOSS(factory);
+    registerTableFunctionGCS(factory);
     registerTableFunctionHudi(factory);
     registerTableFunctionDeltaLake(factory);
 #if USE_AVRO
diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h
index 911aae199e2..af1b7129ec4 100644
--- a/src/TableFunctions/registerTableFunctions.h
+++ b/src/TableFunctions/registerTableFunctions.h
@@ -25,6 +25,7 @@ void registerTableFunctionS3(TableFunctionFactory & factory);
 void registerTableFunctionS3Cluster(TableFunctionFactory & factory);
 void registerTableFunctionCOS(TableFunctionFactory & factory);
 void registerTableFunctionOSS(TableFunctionFactory & factory);
+void registerTableFunctionGCS(TableFunctionFactory & factory);
 void registerTableFunctionHudi(TableFunctionFactory & factory);
 void registerTableFunctionDeltaLake(TableFunctionFactory & factory);
 #if USE_AVRO

From ae100defa279c8e3343482551041758f6a7c925c Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Thu, 20 Apr 2023 15:51:10 +0000
Subject: [PATCH 06/52] Add Array data type to MongoDB

---
 src/Processors/Sources/MongoDBSource.cpp      | 204 +++++++++++++++++-
 src/Processors/Sources/MongoDBSource.h        |   9 +
 .../integration/test_storage_mongodb/test.py  |  75 +++++++
 3 files changed, 284 insertions(+), 4 deletions(-)

diff --git a/src/Processors/Sources/MongoDBSource.cpp b/src/Processors/Sources/MongoDBSource.cpp
index a8bfefdf8a6..8ebedc3e877 100644
--- a/src/Processors/Sources/MongoDBSource.cpp
+++ b/src/Processors/Sources/MongoDBSource.cpp
@@ -6,7 +6,9 @@
 #include <Poco/MongoDB/Connection.h>
 #include <Poco/MongoDB/Cursor.h>
 #include <Poco/MongoDB/ObjectId.h>
+#include <Poco/MongoDB/Array.h>
 
+#include <Columns/ColumnArray.h>
 #include <Columns/ColumnNullable.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnsNumber.h>
@@ -17,6 +19,9 @@
 #include <Poco/URI.h>
 #include <Poco/Version.h>
 
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeNullable.h>
+
 // only after poco
 // naming conflict:
 // Poco/MongoDB/BSONWriter.h:54: void writeCString(const std::string & value);
@@ -33,6 +38,11 @@ namespace ErrorCodes
     extern const int MONGODB_ERROR;
 }
 
+namespace
+{
+    void prepareMongoDBArrayInfo(
+        std::unordered_map<size_t, MongoDBArrayInfo> & array_info, size_t column_idx, const DataTypePtr data_type);
+}
 
 std::unique_ptr<Poco::MongoDB::Cursor> createCursor(const std::string & database, const std::string & collection, const Block & sample_block_to_select)
 {
@@ -58,6 +68,10 @@ MongoDBSource::MongoDBSource(
     , max_block_size{max_block_size_}
 {
     description.init(sample_block);
+
+    for (const auto idx : collections::range(0, description.sample_block.columns()))
+        if (description.types[idx].first == ExternalResultDescription::ValueType::vtArray)
+            prepareMongoDBArrayInfo(array_info, idx, description.sample_block.getByPosition(idx).type);
 }
 
 
@@ -68,6 +82,7 @@ namespace
 {
     using ValueType = ExternalResultDescription::ValueType;
     using ObjectId = Poco::MongoDB::ObjectId;
+    using MongoArray = Poco::MongoDB::Array;
 
     template <typename T>
     void insertNumber(IColumn & column, const Poco::MongoDB::Element & value, const std::string & name)
@@ -103,7 +118,129 @@ namespace
         }
     }
 
-    void insertValue(IColumn & column, const ValueType type, const Poco::MongoDB::Element & value, const std::string & name)
+    template <typename T>
+    Field getNumber(const Poco::MongoDB::Element & value, const std::string & name)
+    {
+        switch (value.type())
+        {
+            case Poco::MongoDB::ElementTraits<Int32>::TypeId:
+                return static_cast<T>(static_cast<const Poco::MongoDB::ConcreteElement<Int32> &>(value).value());
+            case Poco::MongoDB::ElementTraits<Poco::Int64>::TypeId:
+                return static_cast<T>(static_cast<const Poco::MongoDB::ConcreteElement<Poco::Int64> &>(value).value());
+            case Poco::MongoDB::ElementTraits<Float64>::TypeId:
+                return static_cast<T>(static_cast<const Poco::MongoDB::ConcreteElement<Float64> &>(value).value());
+            case Poco::MongoDB::ElementTraits<bool>::TypeId:
+                return static_cast<T>(static_cast<const Poco::MongoDB::ConcreteElement<bool> &>(value).value());
+            case Poco::MongoDB::ElementTraits<Poco::MongoDB::NullValue>::TypeId:
+                return Field();
+            case Poco::MongoDB::ElementTraits<String>::TypeId:
+                return parse<T>(static_cast<const Poco::MongoDB::ConcreteElement<String> &>(value).value());
+            default:
+                throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected a number, got type id = {} for column {}",
+                    toString(value.type()), name);
+        }
+    }
+
+    void prepareMongoDBArrayInfo(
+        std::unordered_map<size_t, MongoDBArrayInfo> & array_info, size_t column_idx, const DataTypePtr data_type)
+    {
+        const auto * array_type = typeid_cast<const DataTypeArray *>(data_type.get());
+        auto nested = array_type->getNestedType();
+
+        size_t count_dimensions = 1;
+        while (isArray(nested))
+        {
+            ++count_dimensions;
+            nested = typeid_cast<const DataTypeArray *>(nested.get())->getNestedType();
+        }
+
+        Field default_value = nested->getDefault();
+        if (nested->isNullable())
+            nested = static_cast<const DataTypeNullable *>(nested.get())->getNestedType();
+
+        WhichDataType which(nested);
+        std::function<Field(const Poco::MongoDB::Element & value, const std::string & name)> parser;
+
+        if (which.isUInt8())
+            parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field { return getNumber<UInt8>(value, name); };
+        else if (which.isUInt16())
+            parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field { return getNumber<UInt16>(value, name); };
+        else if (which.isUInt32())
+            parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field { return getNumber<UInt32>(value, name); };
+        else if (which.isUInt64())
+            parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field { return getNumber<UInt64>(value, name); };
+        else if (which.isInt8())
+            parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field { return getNumber<Int8>(value, name); };
+        else if (which.isInt16())
+            parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field { return getNumber<Int16>(value, name); };
+        else if (which.isInt32())
+            parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field { return getNumber<Int32>(value, name); };
+        else if (which.isInt64())
+            parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field { return getNumber<Int64>(value, name); };
+        else if (which.isFloat32())
+            parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field { return getNumber<Float32>(value, name); };
+        else if (which.isFloat64())
+            parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field { return getNumber<Float64>(value, name); };
+        else if (which.isString() || which.isFixedString())
+            parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field
+            {
+                if (value.type() == Poco::MongoDB::ElementTraits<ObjectId::Ptr>::TypeId)
+                {
+                    String string_id = value.toString();
+                    return Field(string_id.data(), string_id.size());
+                }
+                else if (value.type() == Poco::MongoDB::ElementTraits<String>::TypeId)
+                {
+                    String string = static_cast<const Poco::MongoDB::ConcreteElement<String> &>(value).value();
+                    return Field(string.data(), string.size());
+                }
+
+                throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected String, got type id = {} for column {}",
+                                toString(value.type()), name);
+            };
+        else if (which.isDate())
+            parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field
+            {
+                if (value.type() != Poco::MongoDB::ElementTraits<Poco::Timestamp>::TypeId)
+                    throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected Timestamp, got type id = {} for column {}",
+                                    toString(value.type()), name);
+
+                return static_cast<UInt16>(DateLUT::instance().toDayNum(
+                    static_cast<const Poco::MongoDB::ConcreteElement<Poco::Timestamp> &>(value).value().epochTime()));
+            };
+        else if (which.isDateTime())
+            parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field
+            {
+                if (value.type() != Poco::MongoDB::ElementTraits<Poco::Timestamp>::TypeId)
+                    throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected Timestamp, got type id = {} for column {}",
+                                    toString(value.type()), name);
+
+                return static_cast<UInt32>(static_cast<const Poco::MongoDB::ConcreteElement<Poco::Timestamp> &>(value).value().epochTime());
+            };
+        else if (which.isUUID())
+            parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field
+            {
+                if (value.type() != Poco::MongoDB::ElementTraits<String>::TypeId)
+                    throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected String (UUID), got type id = {} for column {}",
+                                        toString(value.type()), name);
+
+                String string = static_cast<const Poco::MongoDB::ConcreteElement<String> &>(value).value();
+                return parse<UUID>(string);
+            };
+        else
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Type conversion to {} is not supported", nested->getName());
+
+        array_info[column_idx] = {count_dimensions, default_value, parser};
+
+    }
+
+    void insertValue(
+        IColumn & column,
+        const ValueType type,
+        const Poco::MongoDB::Element & value,
+        const std::string & name,
+        std::unordered_map<size_t, MongoDBArrayInfo> & array_info,
+        size_t idx)
     {
         switch (type)
         {
@@ -192,8 +329,67 @@ namespace
                                         toString(value.type()), name);
                 break;
             }
+            case ValueType::vtArray:
+            {
+                if (value.type() != Poco::MongoDB::ElementTraits<MongoArray::Ptr>::TypeId)
+                    throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected Array, got type id = {} for column {}",
+                                    toString(value.type()), name);
+
+                size_t max_dimension = 0, expected_dimensions = array_info[idx].num_dimensions;
+                const auto parse_value = array_info[idx].parser;
+                std::vector<Row> dimensions(expected_dimensions + 1);
+
+                auto array = static_cast<const Poco::MongoDB::ConcreteElement<MongoArray::Ptr> &>(value).value();
+
+                std::vector<std::pair<const Poco::MongoDB::Element *, size_t>> arrays;
+                arrays.emplace_back(&value, 0);
+
+                while (!arrays.empty())
+                {
+                    size_t dimension = arrays.size();
+                    max_dimension = std::max(max_dimension, dimension);
+
+                    auto [element, i] = arrays.back();
+
+                    auto parent = static_cast<const Poco::MongoDB::ConcreteElement<MongoArray::Ptr> &>(*element).value();
+
+                    if (i >= parent->size())
+                    {
+                        dimensions[dimension].emplace_back(Array(dimensions[dimension + 1].begin(), dimensions[dimension + 1].end()));
+                        dimensions[dimension + 1].clear();
+
+                        arrays.pop_back();
+                        continue;
+                    }
+
+                    Poco::MongoDB::Element::Ptr child = parent->get(static_cast<int>(i));
+
+                    if (child->type() == Poco::MongoDB::ElementTraits<MongoArray::Ptr>::TypeId)
+                    {
+                        if (dimension + 1 > expected_dimensions)
+                            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Got more dimensions than expected");
+
+                        arrays.back().second += 1;
+                        arrays.emplace_back(child.get(), 0);
+                    }
+                    else
+                    {
+                        dimensions[dimension].emplace_back(parse_value(*child, name));
+                    }
+                }
+
+                if (max_dimension < expected_dimensions)
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                            "Got less dimensions than expected. ({} instead of {})", max_dimension, expected_dimensions);
+
+                // TODO: default value
+
+                assert_cast<ColumnArray &>(column).insert(Array(dimensions[1].begin(), dimensions[1].end()));
+                break;
+
+            }
             default:
-                throw Exception(ErrorCodes::UNKNOWN_TYPE, "Value of unsupported type:{}", column.getName());
+                throw Exception(ErrorCodes::UNKNOWN_TYPE, "Value of unsupported type: {}", column.getName());
         }
     }
 
@@ -252,11 +448,11 @@ Chunk MongoDBSource::generate()
                     if (is_nullable)
                     {
                         ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[idx]);
-                        insertValue(column_nullable.getNestedColumn(), description.types[idx].first, *value, name);
+                        insertValue(column_nullable.getNestedColumn(), description.types[idx].first, *value, name, array_info, idx);
                         column_nullable.getNullMapData().emplace_back(0);
                     }
                     else
-                        insertValue(*columns[idx], description.types[idx].first, *value, name);
+                        insertValue(*columns[idx], description.types[idx].first, *value, name, array_info, idx);
                 }
             }
         }
diff --git a/src/Processors/Sources/MongoDBSource.h b/src/Processors/Sources/MongoDBSource.h
index d03a7a45477..ec73f00f378 100644
--- a/src/Processors/Sources/MongoDBSource.h
+++ b/src/Processors/Sources/MongoDBSource.h
@@ -19,6 +19,13 @@ namespace MongoDB
 namespace DB
 {
 
+struct MongoDBArrayInfo
+{
+    size_t num_dimensions;
+    Field default_value;
+    std::function<Field(const Poco::MongoDB::Element & value, const std::string & name)> parser;
+};
+
 void authenticate(Poco::MongoDB::Connection & connection, const std::string & database, const std::string & user, const std::string & password);
 
 std::unique_ptr<Poco::MongoDB::Cursor> createCursor(const std::string & database, const std::string & collection, const Block & sample_block_to_select);
@@ -45,6 +52,8 @@ private:
     const UInt64 max_block_size;
     ExternalResultDescription description;
     bool all_read = false;
+
+    std::unordered_map<size_t, MongoDBArrayInfo> array_info;
 };
 
 }
diff --git a/tests/integration/test_storage_mongodb/test.py b/tests/integration/test_storage_mongodb/test.py
index 74b2b15fda0..cf843ddd489 100644
--- a/tests/integration/test_storage_mongodb/test.py
+++ b/tests/integration/test_storage_mongodb/test.py
@@ -70,6 +70,81 @@ def test_simple_select(started_cluster):
     simple_mongo_table.drop()
 
 
+@pytest.mark.parametrize("started_cluster", [False], indirect=["started_cluster"])
+def test_arrays(started_cluster):
+    mongo_connection = get_mongo_connection(started_cluster)
+    db = mongo_connection["test"]
+    db.add_user("root", "clickhouse")
+    simple_mongo_table = db["simple_table"]
+    data = []
+    for i in range(0, 100):
+        data.append({
+            "key": i,
+            "arr_int64":    [- (i + 1), - (i + 2), - (i + 3)],
+            "arr_int32":    [- (i + 1), - (i + 2), - (i + 3)],
+            "arr_int16":    [- (i + 1), - (i + 2), - (i + 3)],
+            "arr_int8":     [- (i + 1), - (i + 2), - (i + 3)],
+            "arr_uint64":   [i + 1, i + 2, i + 3],
+            "arr_uint32":   [i + 1, i + 2, i + 3],
+            "arr_uint16":   [i + 1, i + 2, i + 3],
+            "arr_uint8":    [i + 1, i + 2, i + 3],
+            "arr_float32":  [i + 1.125, i + 2.5, i + 3.750],
+            "arr_float64":  [i + 1.125, i + 2.5, i + 3.750],
+            "arr_date":     ['2023-11-01', '2023-06-19'],
+            "arr_datetime": ['2023-03-31 06:03:12', '2023-02-01 12:46:34'],
+            "arr_string":   [str(i + 1), str(i + 2), str(i + 3)],
+            "arr_uuid":     ['f0e77736-91d1-48ce-8f01-15123ca1c7ed', '93376a07-c044-4281-a76e-ad27cf6973c5'],
+            "arr_arr_bool": [[True, False, True]]
+            })
+
+    simple_mongo_table.insert_many(data)
+
+    node = started_cluster.instances["node"]
+    node.query(
+        "CREATE TABLE simple_mongo_table("
+            "key UInt64,"
+            "arr_int64 Array(Int64),"
+            "arr_int32 Array(Int32),"
+            "arr_int16 Array(Int16),"
+            "arr_int8 Array(Int8),"
+            "arr_uint64 Array(UInt64),"
+            "arr_uint32 Array(UInt32),"
+            "arr_uint16 Array(UInt16),"
+            "arr_uint8 Array(UInt8),"
+            "arr_float32 Array(Float32),"
+            "arr_float64 Array(Float64),"
+            "arr_date Array(Date),"
+            "arr_datetime Array(DateTime),"
+            "arr_string Array(String),"
+            "arr_uuid Array(UUID),"
+            "arr_arr_bool Array(Array(Bool))"
+            ") ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'root', 'clickhouse')"
+    )
+
+    assert node.query("SELECT COUNT() FROM simple_mongo_table") == "100\n"
+
+    for column_name in ["arr_int64", "arr_int32", "arr_int16", "arr_int8"]:
+        assert (
+            node.query(f"SELECT {column_name} from simple_mongo_table where key = 42")
+            == "[-43,-44,-45]\n"
+        )
+
+    for column_name in ["arr_uint64", "arr_uint32", "arr_uint16", "arr_uint8"]:
+        assert (
+            node.query(f"SELECT {column_name} from simple_mongo_table where key = 42")
+            == "[43,44,45]\n"
+        )
+
+    for column_name in ["arr_float32", "arr_float64"]:
+        assert (
+            node.query(f"SELECT {column_name} from simple_mongo_table where key = 42")
+            == "[43,44,45]\n"
+        )
+
+    node.query("DROP TABLE simple_mongo_table")
+    simple_mongo_table.drop()
+
+
 @pytest.mark.parametrize("started_cluster", [False], indirect=["started_cluster"])
 def test_complex_data_type(started_cluster):
     mongo_connection = get_mongo_connection(started_cluster)

From 2d0812e3c745c1c589a489c824db2f21785733fb Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Mon, 24 Apr 2023 14:55:31 +0000
Subject: [PATCH 07/52] Refactor ColumnLowCardinality::cutAndCompact to avoid
 calling IColumn::assumeMutable.

---
 src/Columns/ColumnLowCardinality.cpp | 27 ++++++++++++---------------
 src/Columns/ColumnLowCardinality.h   |  8 +++++---
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp
index 11d02b023d6..4f9ab8215be 100644
--- a/src/Columns/ColumnLowCardinality.cpp
+++ b/src/Columns/ColumnLowCardinality.cpp
@@ -485,13 +485,8 @@ void ColumnLowCardinality::setSharedDictionary(const ColumnPtr & column_unique)
 ColumnLowCardinality::MutablePtr ColumnLowCardinality::cutAndCompact(size_t start, size_t length) const
 {
     auto sub_positions = IColumn::mutate(idx.getPositions()->cut(start, length));
-    /// Create column with new indexes and old dictionary.
-    /// Dictionary is shared, but will be recreated after compactInplace call.
-    auto column = ColumnLowCardinality::create(getDictionary().assumeMutable(), std::move(sub_positions));
-    /// Will create new dictionary.
-    column->compactInplace();
-
-    return column;
+    auto new_column_unique = Dictionary::compact(dictionary.getColumnUnique(), sub_positions);
+    return ColumnLowCardinality::create(std::move(new_column_unique), std::move(sub_positions));
 }
 
 void ColumnLowCardinality::compactInplace()
@@ -589,7 +584,7 @@ size_t ColumnLowCardinality::Index::getSizeOfIndexType(const IColumn & column, s
                     column.getName());
 }
 
-void ColumnLowCardinality::Index::attachPositions(ColumnPtr positions_)
+void ColumnLowCardinality::Index::attachPositions(MutableColumnPtr positions_)
 {
     positions = std::move(positions_);
     updateSizeOfType();
@@ -820,21 +815,23 @@ void ColumnLowCardinality::Dictionary::setShared(const ColumnPtr & column_unique
     shared = true;
 }
 
-void ColumnLowCardinality::Dictionary::compact(ColumnPtr & positions)
+void ColumnLowCardinality::Dictionary::compact(MutableColumnPtr & positions)
 {
-    auto new_column_unique = column_unique->cloneEmpty();
+    column_unique = compact(getColumnUnique(), positions);
+    shared = false;
+}
 
-    auto & unique = getColumnUnique();
+MutableColumnPtr ColumnLowCardinality::Dictionary::compact(const IColumnUnique & unique, MutableColumnPtr & positions)
+{
+    auto new_column_unique = unique.cloneEmpty();
     auto & new_unique = static_cast<IColumnUnique &>(*new_column_unique);
 
-    auto indexes = mapUniqueIndex(positions->assumeMutableRef());
+    auto indexes = mapUniqueIndex(*positions);
     auto sub_keys = unique.getNestedColumn()->index(*indexes, 0);
     auto new_indexes = new_unique.uniqueInsertRangeFrom(*sub_keys, 0, sub_keys->size());
 
     positions = IColumn::mutate(new_indexes->index(*positions, 0));
-    column_unique = std::move(new_column_unique);
-
-    shared = false;
+    return new_column_unique;
 }
 
 ColumnPtr ColumnLowCardinality::cloneWithDefaultOnNull() const
diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h
index e7f4b92d733..df707039b03 100644
--- a/src/Columns/ColumnLowCardinality.h
+++ b/src/Columns/ColumnLowCardinality.h
@@ -301,8 +301,8 @@ public:
 
         void checkSizeOfType();
 
-        ColumnPtr detachPositions() { return std::move(positions); }
-        void attachPositions(ColumnPtr positions_);
+        MutableColumnPtr detachPositions() { return IColumn::mutate(std::move(positions)); }
+        void attachPositions(MutableColumnPtr positions_);
 
         void countKeys(ColumnUInt64::Container & counts) const;
 
@@ -350,7 +350,9 @@ private:
         bool isShared() const { return shared; }
 
         /// Create new dictionary with only keys that are mentioned in positions.
-        void compact(ColumnPtr & positions);
+        void compact(MutableColumnPtr & positions);
+
+        static MutableColumnPtr compact(const IColumnUnique & column_unique, MutableColumnPtr & positions);
 
     private:
         WrappedPtr column_unique;

From 38622d07703804570643cc3ab0b5c764efbf675c Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Mon, 24 Apr 2023 18:21:49 +0000
Subject: [PATCH 08/52] add settings to delay or throw in case of too many
 mutations

---
 src/Common/ErrorCodes.cpp                     |  1 +
 src/Common/ProfileEvents.cpp                  |  3 +
 src/Core/Settings.h                           |  2 +
 src/Storages/MergeTree/MergeTreeData.cpp      | 49 +++++++++++++
 src/Storages/MergeTree/MergeTreeData.h        |  9 ++-
 src/Storages/MergeTree/MergeTreeSettings.h    |  4 ++
 .../MergeTree/ReplicatedMergeTreeQueue.cpp    | 20 ++++--
 .../MergeTree/ReplicatedMergeTreeQueue.h      |  2 +
 src/Storages/StorageMergeTree.cpp             | 27 +++++++-
 src/Storages/StorageMergeTree.h               |  2 +
 src/Storages/StorageReplicatedMergeTree.cpp   | 12 +++-
 src/Storages/StorageReplicatedMergeTree.h     |  2 +
 .../02724_delay_mutations.reference           |  8 +++
 .../0_stateless/02724_delay_mutations.sh      | 59 ++++++++++++++++
 .../02724_limit_num_mutations.reference       |  9 +++
 .../0_stateless/02724_limit_num_mutations.sh  | 69 +++++++++++++++++++
 16 files changed, 269 insertions(+), 9 deletions(-)
 create mode 100644 tests/queries/0_stateless/02724_delay_mutations.reference
 create mode 100755 tests/queries/0_stateless/02724_delay_mutations.sh
 create mode 100644 tests/queries/0_stateless/02724_limit_num_mutations.reference
 create mode 100755 tests/queries/0_stateless/02724_limit_num_mutations.sh

diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index 9abf3bba8ff..d570eab8f18 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -650,6 +650,7 @@
     M(679, IO_URING_SUBMIT_ERROR) \
     M(690, MIXED_ACCESS_PARAMETER_TYPES) \
     M(691, UNKNOWN_ELEMENT_OF_ENUM) \
+    M(692, TOO_MANY_MUTATIONS) \
     \
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index da096085d5b..387eafdc145 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -102,6 +102,9 @@
     M(DelayedInserts, "Number of times the INSERT of a block to a MergeTree table was throttled due to high number of active data parts for partition.") \
     M(RejectedInserts, "Number of times the INSERT of a block to a MergeTree table was rejected with 'Too many parts' exception due to high number of active data parts for partition.") \
     M(DelayedInsertsMilliseconds, "Total number of milliseconds spent while the INSERT of a block to a MergeTree table was throttled due to high number of active data parts for partition.") \
+    M(DelayedMutations, "Number of times the mutation of a MergeTree table was throttled due to high number of unfinished mutations for table.") \
+    M(RejectedMutations, "Number of times the mutation of a MergeTree table was rejected with 'Too many mutations' exception due to high number of unfinished mutations for table.") \
+    M(DelayedMutationsMilliseconds, "Total number of milliseconds spent while the mutation of a MergeTree table was throttled due to high number of unfinished mutations for table.") \
     M(DistributedDelayedInserts, "Number of times the INSERT of a block to a Distributed table was throttled due to high number of pending bytes.") \
     M(DistributedRejectedInserts, "Number of times the INSERT of a block to a Distributed table was rejected with 'Too many bytes' exception due to high number of pending bytes.") \
     M(DistributedDelayedInsertsMilliseconds, "Total number of milliseconds spent while the INSERT of a block to a Distributed table was throttled due to high number of pending bytes.") \
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 26409e98763..8fd2af5fa23 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -275,6 +275,8 @@ class IColumn;
     \
     M(UInt64, parts_to_delay_insert, 0, "If the destination table contains at least that many active parts in a single partition, artificially slow down insert into table.", 0) \
     M(UInt64, parts_to_throw_insert, 0, "If more than this number active parts in a single partition of the destination table, throw 'Too many parts ...' exception.", 0) \
+    M(UInt64, number_of_mutations_to_delay, 0, "If the mutated table contains at least that many unfinished mutations, artificially slow down mutations of table. 0 - disabled", 0) \
+    M(UInt64, number_of_mutations_to_throw, 0, "If the mutated table contains at least that many unfinished mutations, throw 'Too many mutations ...' exception. 0 - disabled", 0) \
     M(Bool, insert_distributed_sync, false, "If setting is enabled, insert query into distributed waits until data will be sent to all nodes in cluster.", 0) \
     M(UInt64, insert_distributed_timeout, 0, "Timeout for insert query into distributed. Setting is used only with insert_distributed_sync enabled. Zero value means no timeout.", 0) \
     M(Int64, distributed_ddl_task_timeout, 180, "Timeout for DDL query responses from all hosts in cluster. If a ddl request has not been performed on all hosts, a response will contain a timeout error and a request will be executed in an async mode. Negative value means infinite. Zero means async mode.", 0) \
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index f5f12660223..e9e3548f66f 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -114,6 +114,9 @@ namespace ProfileEvents
     extern const Event MergedIntoWideParts;
     extern const Event MergedIntoCompactParts;
     extern const Event MergedIntoInMemoryParts;
+    extern const Event RejectedMutations;
+    extern const Event DelayedMutations;
+    extern const Event DelayedMutationsMilliseconds;
 }
 
 namespace CurrentMetrics
@@ -171,6 +174,7 @@ namespace ErrorCodes
     extern const int SERIALIZATION_ERROR;
     extern const int NETWORK_ERROR;
     extern const int SOCKET_TIMEOUT;
+    extern const int TOO_MANY_MUTATIONS;
 }
 
 
@@ -4296,6 +4300,51 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const Contex
         std::this_thread::sleep_for(std::chrono::milliseconds(static_cast<size_t>(delay_milliseconds)));
 }
 
+void MergeTreeData::delayMutationOrThrowIfNeeded(Poco::Event * until, const ContextPtr & query_context) const
+{
+    const auto settings = getSettings();
+    const auto & query_settings = query_context->getSettingsRef();
+
+    size_t num_mutations_to_delay = query_settings.number_of_mutations_to_delay
+        ? query_settings.number_of_mutations_to_delay
+        : settings->number_of_mutations_to_delay;
+
+    size_t num_mutations_to_throw = query_settings.number_of_mutations_to_throw
+        ? query_settings.number_of_mutations_to_throw
+        : settings->number_of_mutations_to_throw;
+
+    if (!num_mutations_to_delay && !num_mutations_to_throw)
+        return;
+
+    size_t num_unfinished_mutations = getNumberOfUnfinishedMutations();
+    if (num_mutations_to_throw && num_unfinished_mutations >= num_mutations_to_throw)
+    {
+        ProfileEvents::increment(ProfileEvents::RejectedMutations);
+        throw Exception(ErrorCodes::TOO_MANY_MUTATIONS,
+            "Too many unfinished mutations ({}) in table {}",
+            num_unfinished_mutations, getLogName());
+    }
+
+    if (num_mutations_to_delay && num_unfinished_mutations >= num_mutations_to_delay)
+    {
+        if (!num_mutations_to_throw)
+            num_mutations_to_throw = num_mutations_to_delay * 2;
+
+        size_t mutations_over_threshold = num_unfinished_mutations - num_mutations_to_delay;
+        size_t allowed_mutations_over_threshold = num_mutations_to_throw - num_mutations_to_delay;
+
+        double delay_factor = std::min(static_cast<double>(mutations_over_threshold) / allowed_mutations_over_threshold, 1.0);
+        size_t delay_milliseconds = static_cast<size_t>(std::lerp(settings->min_delay_to_mutate_ms, settings->max_delay_to_mutate_ms, delay_factor));
+
+        ProfileEvents::increment(ProfileEvents::DelayedMutations);
+        ProfileEvents::increment(ProfileEvents::DelayedMutationsMilliseconds, delay_milliseconds);
+
+        if (until)
+            until->tryWait(delay_milliseconds);
+        else
+            std::this_thread::sleep_for(std::chrono::milliseconds(delay_milliseconds));
+    }
+}
 
 MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(
     const MergeTreePartInfo & part_info, MergeTreeData::DataPartState state, DataPartsLock & /*lock*/) const
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index b03b7d4a71e..cc5deb7c786 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -540,7 +540,6 @@ public:
     /// Makes sense only for ordinary MergeTree engines because for them block numbering doesn't depend on partition.
     std::optional<Int64> getMinPartDataVersion() const;
 
-
     /// Returns all detached parts
     DetachedPartsInfo getDetachedParts() const;
 
@@ -551,11 +550,17 @@ public:
     MutableDataPartsVector tryLoadPartsToAttach(const ASTPtr & partition, bool attach_part,
                                                 ContextPtr context, PartsTemporaryRename & renamed_parts);
 
-
     /// If the table contains too many active parts, sleep for a while to give them time to merge.
     /// If until is non-null, wake up from the sleep earlier if the event happened.
     void delayInsertOrThrowIfNeeded(Poco::Event * until, const ContextPtr & query_context) const;
 
+    /// If the table contains too many unfinished mutations, sleep for a while to give them time to execute.
+    /// If until is non-null, wake up from the sleep earlier if the event happened.
+    void delayMutationOrThrowIfNeeded(Poco::Event * until, const ContextPtr & query_context) const;
+
+    /// Returns number of unfinished mutations (is_done = 0).
+    virtual size_t getNumberOfUnfinishedMutations() const = 0;
+
     /// Renames temporary part to a permanent part and adds it to the parts set.
     /// It is assumed that the part does not intersect with existing parts.
     /// Adds the part in the PreActive state (the part will be added to the active set later with out_transaction->commit()).
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index ad55c9d47f3..b7b94359ccf 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -83,6 +83,10 @@ struct Settings;
     M(UInt64, max_delay_to_insert, 1, "Max delay of inserting data into MergeTree table in seconds, if there are a lot of unmerged parts in single partition.", 0) \
     M(UInt64, min_delay_to_insert_ms, 10, "Min delay of inserting data into MergeTree table in milliseconds, if there are a lot of unmerged parts in single partition.", 0) \
     M(UInt64, max_parts_in_total, 100000, "If more than this number active parts in all partitions in total, throw 'Too many parts ...' exception.", 0) \
+    M(UInt64, number_of_mutations_to_delay, 0, "If table has at least that many unfinished mutations, artificially slow down mutations of table. Disabled if set to 0", 0) \
+    M(UInt64, number_of_mutations_to_throw, 0, "If table has at least that many unfinished mutations, throw 'Too many mutations' exception. Disabled if set to 0", 0) \
+    M(UInt64, min_delay_to_mutate_ms, 10, "Min delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \
+    M(UInt64, max_delay_to_mutate_ms, 1000, "Max delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \
     \
     /* Part removal settings. */ \
     M(UInt64, simultaneous_parts_removal_limit, 0, "Maximum number of parts to remove during one CleanupThread iteration (0 means unlimited).", 0) \
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
index 1006bd5ab49..1762c7aabe9 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@@ -1727,18 +1727,30 @@ size_t ReplicatedMergeTreeQueue::countMutations() const
     return mutations_by_znode.size();
 }
 
-
 size_t ReplicatedMergeTreeQueue::countFinishedMutations() const
 {
     std::lock_guard lock(state_mutex);
 
     size_t count = 0;
-    for (const auto & pair : mutations_by_znode)
+    for (const auto & [_, status] : mutations_by_znode)
     {
-        const auto & mutation = pair.second;
-        if (!mutation.is_done)
+        if (!status.is_done)
             break;
+        ++count;
+    }
 
+    return count;
+}
+
+size_t ReplicatedMergeTreeQueue::countUnfinishedMutations() const
+{
+    std::lock_guard lock(state_mutex);
+
+    size_t count = 0;
+    for (const auto & [_, status] : mutations_by_znode | std::views::reverse)
+    {
+        if (status.is_done)
+            break;
         ++count;
     }
 
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h
index 72796ddd4eb..368f2d4bc1f 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h
@@ -386,6 +386,8 @@ public:
 
     /// Count the total number of active mutations that are finished (is_done = true).
     size_t countFinishedMutations() const;
+    /// Count the total number of active mutations that are not finished (is_done = false).
+    size_t countUnfinishedMutations() const;
 
     /// Returns functor which used by MergeTreeMergerMutator to select parts for merge
     ReplicatedMergeTreeMergePredicate getMergePredicate(zkutil::ZooKeeperPtr & zookeeper, PartitionIdsHint && partition_ids_hint);
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 5513603bca6..5592004d599 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -3,6 +3,7 @@
 #include "Storages/MergeTree/IMergeTreeDataPart.h"
 
 #include <optional>
+#include <ranges>
 
 #include <base/sort.h>
 #include <Backups/BackupEntriesCollector.h>
@@ -313,7 +314,11 @@ void StorageMergeTree::alter(
 
     StorageInMemoryMetadata new_metadata = getInMemoryMetadata();
     StorageInMemoryMetadata old_metadata = getInMemoryMetadata();
+
     auto maybe_mutation_commands = commands.getMutationCommands(new_metadata, local_context->getSettingsRef().materialize_ttl_after_modify, local_context);
+    if (!maybe_mutation_commands.empty())
+        delayMutationOrThrowIfNeeded(nullptr, local_context);
+
     Int64 mutation_version = -1;
     commands.apply(new_metadata, local_context);
 
@@ -321,7 +326,6 @@ void StorageMergeTree::alter(
     if (commands.isSettingsAlter())
     {
         changeSettings(new_metadata.settings_changes, table_lock_holder);
-
         DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(local_context, table_id, new_metadata);
     }
     else
@@ -587,11 +591,12 @@ void StorageMergeTree::setMutationCSN(const String & mutation_id, CSN csn)
 
 void StorageMergeTree::mutate(const MutationCommands & commands, ContextPtr query_context)
 {
+    delayMutationOrThrowIfNeeded(nullptr, query_context);
+
     /// Validate partition IDs (if any) before starting mutation
     getPartitionIdsAffectedByCommands(commands, query_context);
 
     Int64 version = startMutation(commands, query_context);
-
     if (query_context->getSettingsRef().mutations_sync > 0 || query_context->getCurrentTransaction())
         waitForMutation(version);
 }
@@ -1332,6 +1337,24 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign
     return scheduled;
 }
 
+size_t StorageMergeTree::getNumberOfUnfinishedMutations() const
+{
+    size_t count = 0;
+    for (const auto & [version, _] : current_mutations_by_version | std::views::reverse)
+    {
+        auto status = getIncompleteMutationsStatus(version);
+        if (!status)
+            continue;
+
+        if (status->is_done)
+            break;
+
+        ++count;
+    }
+
+    return count;
+}
+
 UInt64 StorageMergeTree::getCurrentMutationVersion(
     const DataPartPtr & part,
     std::unique_lock<std::mutex> & /*currently_processing_in_background_mutex_lock*/) const
diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h
index 6f8acf9965a..78bd6e3f374 100644
--- a/src/Storages/StorageMergeTree.h
+++ b/src/Storages/StorageMergeTree.h
@@ -113,6 +113,8 @@ public:
 
     bool scheduleDataProcessingJob(BackgroundJobsAssignee & assignee) override;
 
+    size_t getNumberOfUnfinishedMutations() const override;
+
     MergeTreeDeduplicationLog * getDeduplicationLog() { return deduplication_log.get(); }
 
 private:
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 9b4972ade59..7bca3cbf581 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -5215,7 +5215,10 @@ void StorageReplicatedMergeTree::alter(
         alter_entry->create_time = time(nullptr);
 
         auto maybe_mutation_commands = commands.getMutationCommands(
-            *current_metadata, query_context->getSettingsRef().materialize_ttl_after_modify, query_context);
+            *current_metadata,
+            query_context->getSettingsRef().materialize_ttl_after_modify,
+            query_context);
+
         bool have_mutation = !maybe_mutation_commands.empty();
         alter_entry->have_mutation = have_mutation;
 
@@ -5226,6 +5229,7 @@ void StorageReplicatedMergeTree::alter(
         PartitionBlockNumbersHolder partition_block_numbers_holder;
         if (have_mutation)
         {
+            delayMutationOrThrowIfNeeded(&partial_shutdown_event, query_context);
             const String mutations_path(fs::path(zookeeper_path) / "mutations");
 
             ReplicatedMergeTreeMutationEntry mutation_entry;
@@ -6406,6 +6410,8 @@ void StorageReplicatedMergeTree::mutate(const MutationCommands & commands, Conte
     /// After all needed parts are mutated (i.e. all active parts have the mutation version greater than
     /// the version of this mutation), the mutation is considered done and can be deleted.
 
+    delayMutationOrThrowIfNeeded(&partial_shutdown_event, query_context);
+
     ReplicatedMergeTreeMutationEntry mutation_entry;
     mutation_entry.source_replica = replica_name;
     mutation_entry.commands = commands;
@@ -8036,6 +8042,10 @@ String StorageReplicatedMergeTree::getTableSharedID() const
     return toString(table_shared_id);
 }
 
+size_t StorageReplicatedMergeTree::getNumberOfUnfinishedMutations() const
+{
+    return queue.countUnfinishedMutations();
+}
 
 void StorageReplicatedMergeTree::createTableSharedID() const
 {
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index ade4e4f0b4b..e81be299144 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -311,6 +311,8 @@ public:
     // Return table id, common for different replicas
     String getTableSharedID() const override;
 
+    size_t getNumberOfUnfinishedMutations() const override;
+
     /// Returns the same as getTableSharedID(), but extracts it from a create query.
     static std::optional<String> tryGetTableSharedIDFromCreateQuery(const IAST & create_query, const ContextPtr & global_context);
 
diff --git a/tests/queries/0_stateless/02724_delay_mutations.reference b/tests/queries/0_stateless/02724_delay_mutations.reference
new file mode 100644
index 00000000000..16bd972a06d
--- /dev/null
+++ b/tests/queries/0_stateless/02724_delay_mutations.reference
@@ -0,0 +1,8 @@
+1	2
+4
+1	6
+0
+ALTER TABLE t_delay_mutations UPDATE v = 3 WHERE 1;	0	0
+ALTER TABLE t_delay_mutations UPDATE v = 4 WHERE 1;	0	0
+ALTER TABLE t_delay_mutations UPDATE v = 5 WHERE 1;	1	1
+ALTER TABLE t_delay_mutations UPDATE v = 6 WHERE 1;	1	1
diff --git a/tests/queries/0_stateless/02724_delay_mutations.sh b/tests/queries/0_stateless/02724_delay_mutations.sh
new file mode 100755
index 00000000000..f349e29253a
--- /dev/null
+++ b/tests/queries/0_stateless/02724_delay_mutations.sh
@@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+# shellcheck source=./mergetree_mutations.lib
+. "$CURDIR"/mergetree_mutations.lib
+
+${CLICKHOUSE_CLIENT} -n --query "
+DROP TABLE IF EXISTS t_delay_mutations SYNC;
+
+CREATE TABLE t_delay_mutations (id UInt64, v UInt64)
+ENGINE = MergeTree ORDER BY id
+SETTINGS
+    number_of_mutations_to_delay = 2,
+    number_of_mutations_to_throw = 10,
+    min_delay_to_mutate_ms = 10,
+    min_delay_to_mutate_ms = 1000;
+
+SET mutations_sync = 0;
+SYSTEM STOP MERGES t_delay_mutations;
+
+INSERT INTO t_delay_mutations VALUES (1, 2);
+
+ALTER TABLE t_delay_mutations UPDATE v = 3 WHERE 1;
+ALTER TABLE t_delay_mutations UPDATE v = 4 WHERE 1;
+
+ALTER TABLE t_delay_mutations UPDATE v = 5 WHERE 1;
+ALTER TABLE t_delay_mutations UPDATE v = 6 WHERE 1;
+
+SELECT * FROM t_delay_mutations ORDER BY id;
+SELECT count() FROM system.mutations WHERE database = currentDatabase() AND table = 't_delay_mutations' AND NOT is_done;
+"
+
+${CLICKHOUSE_CLIENT} --query "SYSTEM START MERGES t_delay_mutations"
+wait_for_mutation "t_delay_mutations" "mutation_5.txt"
+
+${CLICKHOUSE_CLIENT} -n --query "
+SELECT * FROM t_delay_mutations ORDER BY id;
+SELECT count() FROM system.mutations WHERE database = currentDatabase() AND table = 't_delay_mutations' AND NOT is_done;
+
+DROP TABLE IF EXISTS t_delay_mutations SYNC;
+"
+
+${CLICKHOUSE_CLIENT} -n --query "
+SYSTEM FLUSH LOGS;
+
+SELECT
+    query,
+    ProfileEvents['DelayedMutations'],
+    ProfileEvents['DelayedMutationsMilliseconds'] BETWEEN 10 AND 1000
+FROM system.query_log
+WHERE
+    type = 'QueryFinish' AND
+    current_database = '$CLICKHOUSE_DATABASE' AND
+    query ILIKE 'ALTER TABLE t_delay_mutations UPDATE%'
+ORDER BY query;
+"
diff --git a/tests/queries/0_stateless/02724_limit_num_mutations.reference b/tests/queries/0_stateless/02724_limit_num_mutations.reference
new file mode 100644
index 00000000000..ecd1ce23ca2
--- /dev/null
+++ b/tests/queries/0_stateless/02724_limit_num_mutations.reference
@@ -0,0 +1,9 @@
+1	2
+2
+CREATE TABLE default.t_limit_mutations\n(\n    `id` UInt64,\n    `v` UInt64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/t_limit_mutations/\', \'1\')\nORDER BY id\nSETTINGS number_of_mutations_to_throw = 2, index_granularity = 8192
+1	2
+4
+CREATE TABLE default.t_limit_mutations\n(\n    `id` UInt64,\n    `v` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/t_limit_mutations/\', \'1\')\nORDER BY id\nSETTINGS number_of_mutations_to_throw = 2, index_granularity = 8192
+1	6
+0
+CREATE TABLE default.t_limit_mutations\n(\n    `id` UInt64,\n    `v` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/t_limit_mutations/\', \'1\')\nORDER BY id\nSETTINGS number_of_mutations_to_throw = 2, index_granularity = 8192
diff --git a/tests/queries/0_stateless/02724_limit_num_mutations.sh b/tests/queries/0_stateless/02724_limit_num_mutations.sh
new file mode 100755
index 00000000000..98bfdbbb551
--- /dev/null
+++ b/tests/queries/0_stateless/02724_limit_num_mutations.sh
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+# shellcheck source=./mergetree_mutations.lib
+. "$CURDIR"/mergetree_mutations.lib
+
+function wait_for_alter()
+{
+    type=$1
+    for i in {1..100}; do
+        sleep 0.1
+        ${CLICKHOUSE_CLIENT} --query "SHOW CREATE TABLE t_limit_mutations" | grep -q "\`v\` $type" && break;
+
+        if [[ $i -eq 100 ]]; then
+            echo "Timed out while waiting for alter to execute"
+        fi
+    done
+}
+
+${CLICKHOUSE_CLIENT} -n --query "
+DROP TABLE IF EXISTS t_limit_mutations SYNC;
+
+CREATE TABLE t_limit_mutations (id UInt64, v UInt64)
+ENGINE = ReplicatedMergeTree('/clickhouse/tables/t_limit_mutations/', '1') ORDER BY id
+SETTINGS number_of_mutations_to_throw = 2;
+
+SET mutations_sync = 0;
+SYSTEM STOP MERGES t_limit_mutations;
+
+INSERT INTO t_limit_mutations VALUES (1, 2);
+
+ALTER TABLE t_limit_mutations UPDATE v = 3 WHERE 1;
+ALTER TABLE t_limit_mutations UPDATE v = 4 WHERE 1;
+
+ALTER TABLE t_limit_mutations UPDATE v = 5 WHERE 1; -- { serverError TOO_MANY_MUTATIONS }
+ALTER TABLE t_limit_mutations MODIFY COLUMN v String; -- { serverError TOO_MANY_MUTATIONS }
+
+SELECT * FROM t_limit_mutations ORDER BY id;
+SELECT count() FROM system.mutations WHERE database = currentDatabase() AND table = 't_limit_mutations' AND NOT is_done;
+SHOW CREATE TABLE t_limit_mutations;
+"
+
+${CLICKHOUSE_CLIENT} -n --query "
+ALTER TABLE t_limit_mutations UPDATE v = 6 WHERE 1 SETTINGS number_of_mutations_to_throw = 100;
+ALTER TABLE t_limit_mutations MODIFY COLUMN v String SETTINGS number_of_mutations_to_throw = 100, alter_sync = 0;
+"
+
+wait_for_alter "String"
+
+${CLICKHOUSE_CLIENT} -n --query "
+SELECT * FROM t_limit_mutations ORDER BY id;
+SELECT count() FROM system.mutations WHERE database = currentDatabase() AND table = 't_limit_mutations' AND NOT is_done;
+SHOW CREATE TABLE t_limit_mutations;
+"
+
+${CLICKHOUSE_CLIENT} --query "SYSTEM START MERGES t_limit_mutations"
+
+wait_for_mutation "t_limit_mutations" "0000000003"
+
+${CLICKHOUSE_CLIENT} -n --query "
+SELECT * FROM t_limit_mutations ORDER BY id;
+SELECT count() FROM system.mutations WHERE database = currentDatabase() AND table = 't_limit_mutations' AND NOT is_done;
+SHOW CREATE TABLE t_limit_mutations;
+
+DROP TABLE IF EXISTS t_limit_mutations SYNC;
+"

From 30f1bef6e8d1a2086d8ef240601cf99e0e47887e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 27 Apr 2023 16:57:42 +0300
Subject: [PATCH 09/52] Update TableFunctionS3.h

---
 src/TableFunctions/TableFunctionS3.h | 44 ----------------------------
 1 file changed, 44 deletions(-)

diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h
index ed8cd3bd41a..70c4a020669 100644
--- a/src/TableFunctions/TableFunctionS3.h
+++ b/src/TableFunctions/TableFunctionS3.h
@@ -67,52 +67,8 @@ protected:
     ColumnsDescription structure_hint;
 };
 
-class TableFunctionCOS : public TableFunctionS3
-{
-public:
-    static constexpr auto name = "cosn";
-    std::string getName() const override
-    {
-        return name;
-    }
-private:
-    const char * getStorageTypeName() const override { return "COSN"; }
-};
-
-class TableFunctionOSS : public TableFunctionS3
-{
-public:
-    static constexpr auto name = "oss";
-    std::string getName() const override
-    {
-        return name;
-    }
-private:
-    const char * getStorageTypeName() const override { return "OSS"; }
-};
-
 }
 
-class TableFunctionGCS : public TableFunctionS3
-{
-public:
-    static constexpr auto name = "gcs";
-    static constexpr auto signature = " - url\n"
-                                      " - url, format\n"
-                                      " - url, format, structure\n"
-                                      " - url, hmac_key, hmac_secret\n"
-                                      " - url, format, structure, compression_method\n"
-                                      " - url, hmac_key, hmac_secret, format\n"
-                                      " - url, hmac_key, hmac_secret, format, structure\n"
-                                      " - url, hmac_key, hmac_secret, format, structure, compression_method";
-    std::string getName() const override
-    {
-        return name;
-    }
-private:
-    const char * getStorageTypeName() const override { return "GCS"; }
-};
-
 }
 
 #endif

From aec8f17614371f8dc4ea161098736adafeddc70b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 27 Apr 2023 16:58:18 +0300
Subject: [PATCH 10/52] Update TableFunctionS3.cpp

---
 src/TableFunctions/TableFunctionS3.cpp | 47 +++++++++++++++++++++++---
 1 file changed, 42 insertions(+), 5 deletions(-)

diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp
index 4153c6a81c9..841f5a91bc8 100644
--- a/src/TableFunctions/TableFunctionS3.cpp
+++ b/src/TableFunctions/TableFunctionS3.cpp
@@ -215,6 +215,48 @@ StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, Context
 }
 
 
+class TableFunctionGCS : public TableFunctionS3
+{
+public:
+    static constexpr auto name = "gcs";
+    std::string getName() const override
+    {
+        return name;
+    }
+private:
+    const char * getStorageTypeName() const override { return "GCS"; }
+};
+
+class TableFunctionCOS : public TableFunctionS3
+{
+public:
+    static constexpr auto name = "cosn";
+    std::string getName() const override
+    {
+        return name;
+    }
+private:
+    const char * getStorageTypeName() const override { return "COSN"; }
+};
+
+class TableFunctionOSS : public TableFunctionS3
+{
+public:
+    static constexpr auto name = "oss";
+    std::string getName() const override
+    {
+        return name;
+    }
+private:
+    const char * getStorageTypeName() const override { return "OSS"; }
+};
+
+
+void registerTableFunctionGCS(TableFunctionFactory & factory)
+{
+    factory.registerFunction<TableFunctionGCS>();
+}
+
 void registerTableFunctionS3(TableFunctionFactory & factory)
 {
     factory.registerFunction<TableFunctionS3>();
@@ -230,11 +272,6 @@ void registerTableFunctionOSS(TableFunctionFactory & factory)
     factory.registerFunction<TableFunctionOSS>();
 }
 
-void registerTableFunctionGCS(TableFunctionFactory & factory)
-{
-    factory.registerFunction<TableFunctionGCS>();
-}
-
 }
 
 #endif

From 536720605797f37a2870c79855a633cca3cd5faa Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 27 Apr 2023 23:24:36 +0300
Subject: [PATCH 11/52] Update TableFunctionS3.h

---
 src/TableFunctions/TableFunctionS3.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h
index 70c4a020669..4724684712b 100644
--- a/src/TableFunctions/TableFunctionS3.h
+++ b/src/TableFunctions/TableFunctionS3.h
@@ -69,6 +69,4 @@ protected:
 
 }
 
-}
-
 #endif

From 2da33b96eba3698dd347d7a0c5d487cfa031f85d Mon Sep 17 00:00:00 2001
From: xmy <xumovens@gmail.com>
Date: Fri, 28 Apr 2023 10:31:49 +0800
Subject: [PATCH 12/52] Allow Int* type argument for
 groupBitAnd/GroupBitOr/groupBitXor

---
 .../aggregate-functions/reference/groupbitand.md   |  4 ++--
 .../aggregate-functions/reference/groupbitor.md    |  4 ++--
 .../aggregate-functions/reference/groupbitxor.md   |  4 ++--
 .../AggregateFunctionBitwise.cpp                   |  2 +-
 .../AggregateFunctionSequenceMatch.cpp             |  2 +-
 .../AggregateFunctionWindowFunnel.cpp              |  2 +-
 src/AggregateFunctions/Helpers.h                   | 14 ++++++++++++--
 7 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md
index f89e3796aaa..5fd5029751a 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md
@@ -13,11 +13,11 @@ groupBitAnd(expr)
 
 **Arguments**
 
-`expr` – An expression that results in `UInt*` type.
+`expr` – An expression that results in `UInt* or Int*` type.
 
 **Return value**
 
-Value of the `UInt*` type.
+Value of the `UInt* or Int*` type.
 
 **Example**
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md
index 75b34d9c5a3..08a5c15da46 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md
@@ -13,11 +13,11 @@ groupBitOr(expr)
 
 **Arguments**
 
-`expr` – An expression that results in `UInt*` type.
+`expr` – An expression that results in `UInt* or Int*` type.
 
 **Returned value**
 
-Value of the `UInt*` type.
+Value of the `UInt* or Int*` type.
 
 **Example**
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md
index ca6fb9f8352..f33e375953c 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md
@@ -13,11 +13,11 @@ groupBitXor(expr)
 
 **Arguments**
 
-`expr` – An expression that results in `UInt*` type.
+`expr` – An expression that results in `UInt* or Int*` type.
 
 **Return value**
 
-Value of the `UInt*` type.
+Value of the `UInt* or Int*` type.
 
 **Example**
 
diff --git a/src/AggregateFunctions/AggregateFunctionBitwise.cpp b/src/AggregateFunctions/AggregateFunctionBitwise.cpp
index b87e899a685..82cb3b327f0 100644
--- a/src/AggregateFunctions/AggregateFunctionBitwise.cpp
+++ b/src/AggregateFunctions/AggregateFunctionBitwise.cpp
@@ -27,7 +27,7 @@ AggregateFunctionPtr createAggregateFunctionBitwise(const std::string & name, co
                         "is illegal, because it cannot be used in bitwise operations",
                         argument_types[0]->getName(), name);
 
-    AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunctionBitwise, Data>(*argument_types[0], argument_types[0]));
+    AggregateFunctionPtr res(createWithOptionSignedIntegerType<AggregateFunctionBitwise, Data, true>(*argument_types[0], argument_types[0]));
 
     if (!res)
         throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
diff --git a/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp b/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp
index 3dd9a8b658d..f2fe9014ceb 100644
--- a/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp
@@ -53,7 +53,7 @@ AggregateFunctionPtr createAggregateFunctionSequenceBase(
 
     String pattern = params.front().safeGet<std::string>();
 
-    AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunction, Data>(*argument_types[0], argument_types, params, pattern));
+    AggregateFunctionPtr res(createWithOptionSignedIntegerType<AggregateFunction, Data>(*argument_types[0], argument_types, params, pattern));
     if (res)
         return res;
 
diff --git a/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp b/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp
index d80d683fd04..71c675f7a3b 100644
--- a/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp
+++ b/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp
@@ -48,7 +48,7 @@ createAggregateFunctionWindowFunnel(const std::string & name, const DataTypes &
                             cond_arg->getName(), toString(i + 1), name);
     }
 
-    AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunctionWindowFunnel, Data>(*arguments[0], arguments, params));
+    AggregateFunctionPtr res(createWithOptionSignedIntegerType<AggregateFunctionWindowFunnel, Data>(*arguments[0], arguments, params));
     WhichDataType which(arguments.front().get());
     if (res)
         return res;
diff --git a/src/AggregateFunctions/Helpers.h b/src/AggregateFunctions/Helpers.h
index 19904dd9215..4131279a897 100644
--- a/src/AggregateFunctions/Helpers.h
+++ b/src/AggregateFunctions/Helpers.h
@@ -87,8 +87,8 @@ static IAggregateFunction * createWithNumericType(const IDataType & argument_typ
     return nullptr;
 }
 
-template <template <typename, typename> class AggregateFunctionTemplate, template <typename> class Data, typename... TArgs>
-static IAggregateFunction * createWithUnsignedIntegerType(const IDataType & argument_type, TArgs && ... args)
+template <template <typename, typename> class AggregateFunctionTemplate, template <typename> class Data, bool support_signed = false, typename... TArgs>
+static IAggregateFunction * createWithOptionSignedIntegerType(const IDataType & argument_type, TArgs && ... args)
 {
     WhichDataType which(argument_type);
     if (which.idx == TypeIndex::UInt8) return new AggregateFunctionTemplate<UInt8, Data<UInt8>>(std::forward<TArgs>(args)...);
@@ -97,6 +97,16 @@ static IAggregateFunction * createWithUnsignedIntegerType(const IDataType & argu
     if (which.idx == TypeIndex::UInt64) return new AggregateFunctionTemplate<UInt64, Data<UInt64>>(std::forward<TArgs>(args)...);
     if (which.idx == TypeIndex::UInt128) return new AggregateFunctionTemplate<UInt128, Data<UInt128>>(std::forward<TArgs>(args)...);
     if (which.idx == TypeIndex::UInt256) return new AggregateFunctionTemplate<UInt256, Data<UInt256>>(std::forward<TArgs>(args)...);
+
+    if constexpr (support_signed)
+    {
+        if (which.idx == TypeIndex::Int8) return new AggregateFunctionTemplate<Int8, Data<Int8>>(std::forward<TArgs>(args)...);
+        if (which.idx == TypeIndex::Int16) return new AggregateFunctionTemplate<Int16, Data<Int16>>(std::forward<TArgs>(args)...);
+        if (which.idx == TypeIndex::Int32) return new AggregateFunctionTemplate<Int32, Data<Int32>>(std::forward<TArgs>(args)...);
+        if (which.idx == TypeIndex::Int64) return new AggregateFunctionTemplate<Int64, Data<Int64>>(std::forward<TArgs>(args)...);
+        if (which.idx == TypeIndex::Int128) return new AggregateFunctionTemplate<Int128, Data<Int128>>(std::forward<TArgs>(args)...);
+        if (which.idx == TypeIndex::Int256) return new AggregateFunctionTemplate<Int256, Data<Int256>>(std::forward<TArgs>(args)...);
+    }
     return nullptr;
 }
 

From 7bab39105fef9ca787a691a92f61f8538698bf0d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 29 Apr 2023 00:14:10 +0200
Subject: [PATCH 13/52] Fix progress bar for LowCardinality fields with shared
 dictionaries

---
 src/Columns/ColumnLowCardinality.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h
index e7f4b92d733..f9bf7290601 100644
--- a/src/Columns/ColumnLowCardinality.h
+++ b/src/Columns/ColumnLowCardinality.h
@@ -160,7 +160,9 @@ public:
 
     void reserve(size_t n) override { idx.reserve(n); }
 
-    size_t byteSize() const override { return idx.getPositions()->byteSize() + getDictionary().byteSize(); }
+    /// Don't count the dictionary size as it can be shared between different blocks.
+    size_t byteSize() const override { return idx.getPositions()->byteSize(); }
+
     size_t byteSizeAt(size_t n) const override { return getDictionary().byteSizeAt(getIndexes().getUInt(n)); }
     size_t allocatedBytes() const override { return idx.getPositions()->allocatedBytes() + getDictionary().allocatedBytes(); }
 

From bcdd2307fbca82e8debc64a6a93d9d150d761ef6 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Tue, 2 May 2023 11:30:50 +0000
Subject: [PATCH 14/52] add comments

---
 src/Storages/MergeTree/MergeTreeData.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index cc5deb7c786..1a8177c6ae3 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -552,10 +552,12 @@ public:
 
     /// If the table contains too many active parts, sleep for a while to give them time to merge.
     /// If until is non-null, wake up from the sleep earlier if the event happened.
+    /// The decision to delay or throw is made according to settings 'parts_to_delay_insert' and 'parts_to_throw_insert'.
     void delayInsertOrThrowIfNeeded(Poco::Event * until, const ContextPtr & query_context) const;
 
     /// If the table contains too many unfinished mutations, sleep for a while to give them time to execute.
     /// If until is non-null, wake up from the sleep earlier if the event happened.
+    /// The decision to delay or throw is made according to settings 'number_of_mutations_to_delay' and 'number_of_mutations_to_throw'.
     void delayMutationOrThrowIfNeeded(Poco::Event * until, const ContextPtr & query_context) const;
 
     /// Returns number of unfinished mutations (is_done = 0).

From 2b21b8397925104e4cdebc8f3290df9d15b54328 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 2 May 2023 13:35:11 +0200
Subject: [PATCH 15/52] Reduce inter-header dependencies

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Functions/FunctionHelpers.h                  | 2 +-
 src/Functions/IFunction.cpp                      | 1 +
 src/Interpreters/AggregationCommon.h             | 1 -
 src/Interpreters/Context.h                       | 3 ---
 src/Planner/PlannerActionsVisitor.h              | 2 --
 src/Planner/PlannerContext.h                     | 2 --
 src/Storages/MarkCache.h                         | 1 +
 src/Storages/MergeTree/MergeTreeReaderStream.cpp | 1 +
 8 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/src/Functions/FunctionHelpers.h b/src/Functions/FunctionHelpers.h
index 18a4e584080..77affe8488d 100644
--- a/src/Functions/FunctionHelpers.h
+++ b/src/Functions/FunctionHelpers.h
@@ -7,8 +7,8 @@
 #include <Columns/IColumn.h>
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnConst.h>
-#include <Core/Block.h>
 #include <Core/ColumnNumbers.h>
+#include <Core/ColumnsWithTypeAndName.h>
 #include <Core/callOnTypeIndex.h>
 
 
diff --git a/src/Functions/IFunction.cpp b/src/Functions/IFunction.cpp
index e7f6c521238..7563135f21f 100644
--- a/src/Functions/IFunction.cpp
+++ b/src/Functions/IFunction.cpp
@@ -3,6 +3,7 @@
 #include <Common/typeid_cast.h>
 #include <Common/assert_cast.h>
 #include <Common/SipHash.h>
+#include <Core/Block.h>
 #include <Columns/ColumnConst.h>
 #include <Columns/ColumnNullable.h>
 #include <Columns/ColumnTuple.h>
diff --git a/src/Interpreters/AggregationCommon.h b/src/Interpreters/AggregationCommon.h
index 2e6da40ff1f..7ba9011f18b 100644
--- a/src/Interpreters/AggregationCommon.h
+++ b/src/Interpreters/AggregationCommon.h
@@ -3,7 +3,6 @@
 #include <array>
 
 #include <Common/SipHash.h>
-#include <Common/HashTable/Hash.h>
 #include <Common/memcpySmall.h>
 #include <Common/assert_cast.h>
 #include <Core/Defines.h>
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 36a6a8e6f49..371a9d18171 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -7,7 +7,6 @@
 #include <Common/RemoteHostFilter.h>
 #include <Common/ThreadPool_fwd.h>
 #include <Common/Throttler_fwd.h>
-#include <Core/Block.h>
 #include <Core/NamesAndTypes.h>
 #include <Core/Settings.h>
 #include <Core/UUID.h>
@@ -25,12 +24,10 @@
 #include "config.h"
 
 #include <boost/container/flat_set.hpp>
-#include <exception>
 #include <functional>
 #include <memory>
 #include <mutex>
 #include <optional>
-#include <thread>
 
 
 namespace Poco::Net { class IPAddress; }
diff --git a/src/Planner/PlannerActionsVisitor.h b/src/Planner/PlannerActionsVisitor.h
index 2a1c166bfc7..8506c309171 100644
--- a/src/Planner/PlannerActionsVisitor.h
+++ b/src/Planner/PlannerActionsVisitor.h
@@ -1,7 +1,5 @@
 #pragma once
 
-#include <Common/HashTable/Hash.h>
-
 #include <Core/Names.h>
 #include <Core/NamesAndTypes.h>
 
diff --git a/src/Planner/PlannerContext.h b/src/Planner/PlannerContext.h
index ccc4ab43638..4199c863033 100644
--- a/src/Planner/PlannerContext.h
+++ b/src/Planner/PlannerContext.h
@@ -1,7 +1,5 @@
 #pragma once
 
-#include <Common/HashTable/Hash.h>
-
 #include <Core/Names.h>
 #include <Core/NamesAndTypes.h>
 
diff --git a/src/Storages/MarkCache.h b/src/Storages/MarkCache.h
index 2b286ebb1e8..0f4af57fc8e 100644
--- a/src/Storages/MarkCache.h
+++ b/src/Storages/MarkCache.h
@@ -5,6 +5,7 @@
 #include <Common/CacheBase.h>
 #include <Common/ProfileEvents.h>
 #include <Common/SipHash.h>
+#include <Common/HashTable/Hash.h>
 #include <Interpreters/AggregationCommon.h>
 #include <Formats/MarkInCompressedFile.h>
 
diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.cpp b/src/Storages/MergeTree/MergeTreeReaderStream.cpp
index cdca5aa1247..e4aa0ffa4f4 100644
--- a/src/Storages/MergeTree/MergeTreeReaderStream.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderStream.cpp
@@ -2,6 +2,7 @@
 #include <Compression/CachedCompressedReadBuffer.h>
 
 #include <base/getThreadId.h>
+#include <base/range.h>
 #include <utility>
 
 

From 3946dcd2513c48d4eb0e655731cc2d25b9368a94 Mon Sep 17 00:00:00 2001
From: MeenaRenganathan22 <Meena.Renganathan@ibm.com>
Date: Wed, 3 May 2023 14:26:12 +0000
Subject: [PATCH 16/52] Fix the 01193_metadata_loading test to match the query
 execution time specific to s390x

---
 tests/queries/0_stateless/01193_metadata_loading.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/queries/0_stateless/01193_metadata_loading.sh b/tests/queries/0_stateless/01193_metadata_loading.sh
index 1604de6004a..1efd86d7470 100755
--- a/tests/queries/0_stateless/01193_metadata_loading.sh
+++ b/tests/queries/0_stateless/01193_metadata_loading.sh
@@ -13,6 +13,10 @@ threads=10
 count_multiplier=1
 max_time_ms=1000
 
+if [[ $(uname -a | grep s390x) ]]; then
+    max_time_ms=1500
+fi
+
 debug_or_sanitizer_build=$($CLICKHOUSE_CLIENT -q "WITH ((SELECT value FROM system.build_options WHERE name='BUILD_TYPE') AS build, (SELECT value FROM system.build_options WHERE name='CXX_FLAGS') as flags) SELECT build='Debug' OR flags LIKE '%fsanitize%' OR hasThreadFuzzer()")
 
 if [[ debug_or_sanitizer_build -eq 1 ]]; then tables=100; count_multiplier=10; max_time_ms=1500; fi

From 64bc03fefd2520cb6fd8790d8b2021201c44ee08 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 3 May 2023 17:38:43 +0200
Subject: [PATCH 17/52] Allow to get availability zone

---
 src/IO/S3/Credentials.cpp | 79 ++++++++++++++++++++++++++-------------
 src/IO/S3/Credentials.h   |  9 ++++-
 2 files changed, 59 insertions(+), 29 deletions(-)

diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp
index 5f494ff55b8..eaa1944403d 100644
--- a/src/IO/S3/Credentials.cpp
+++ b/src/IO/S3/Credentials.cpp
@@ -1,6 +1,7 @@
 #include <IO/S3/Credentials.h>
 
 #if USE_AWS_S3
+
 #    include <aws/core/Version.h>
 #    include <aws/core/platform/OSVersionInfo.h>
 #    include <aws/core/auth/STSCredentialsProvider.h>
@@ -95,39 +96,21 @@ Aws::String AWSEC2MetadataClient::awsComputeUserAgentString()
 Aws::String AWSEC2MetadataClient::getDefaultCredentialsSecurely() const
 {
     String user_agent_string = awsComputeUserAgentString();
-    String new_token;
-
+    auto [new_token, response_code] = getEC2MetadataToken(user_agent_string);
+    if (response_code == Aws::Http::HttpResponseCode::BAD_REQUEST)
+        return {};
+    else if (response_code != Aws::Http::HttpResponseCode::OK || new_token.empty())
     {
-        std::lock_guard locker(token_mutex);
-
-        Aws::StringStream ss;
-        ss << endpoint << EC2_IMDS_TOKEN_RESOURCE;
-        std::shared_ptr<Aws::Http::HttpRequest> token_request(Aws::Http::CreateHttpRequest(ss.str(), Aws::Http::HttpMethod::HTTP_PUT,
-                                                                    Aws::Utils::Stream::DefaultResponseStreamFactoryMethod));
-        token_request->SetHeaderValue(EC2_IMDS_TOKEN_TTL_HEADER, EC2_IMDS_TOKEN_TTL_DEFAULT_VALUE);
-        token_request->SetUserAgent(user_agent_string);
-        LOG_TRACE(logger, "Calling EC2MetadataService to get token.");
-        auto result = GetResourceWithAWSWebServiceResult(token_request);
-        const String & token_string = result.GetPayload();
-        new_token = Aws::Utils::StringUtils::Trim(token_string.c_str());
-
-        if (result.GetResponseCode() == Aws::Http::HttpResponseCode::BAD_REQUEST)
-        {
-            return {};
-        }
-        else if (result.GetResponseCode() != Aws::Http::HttpResponseCode::OK || new_token.empty())
-        {
-            LOG_TRACE(logger, "Calling EC2MetadataService to get token failed, falling back to less secure way.");
-            return getDefaultCredentials();
-        }
-        token = new_token;
+        LOG_TRACE(logger, "Calling EC2MetadataService to get token failed, falling back to less secure way.");
+        return getDefaultCredentials();
     }
 
+    token = new_token;
     String url = endpoint + EC2_SECURITY_CREDENTIALS_RESOURCE;
     std::shared_ptr<Aws::Http::HttpRequest> profile_request(Aws::Http::CreateHttpRequest(url,
             Aws::Http::HttpMethod::HTTP_GET,
             Aws::Utils::Stream::DefaultResponseStreamFactoryMethod));
-    profile_request->SetHeaderValue(EC2_IMDS_TOKEN_HEADER, new_token);
+    profile_request->SetHeaderValue(EC2_IMDS_TOKEN_HEADER, token);
     profile_request->SetUserAgent(user_agent_string);
     String profile_string = GetResourceWithAWSWebServiceResult(profile_request).GetPayload();
 
@@ -148,12 +131,54 @@ Aws::String AWSEC2MetadataClient::getDefaultCredentialsSecurely() const
     std::shared_ptr<Aws::Http::HttpRequest> credentials_request(Aws::Http::CreateHttpRequest(ss.str(),
             Aws::Http::HttpMethod::HTTP_GET,
             Aws::Utils::Stream::DefaultResponseStreamFactoryMethod));
-    credentials_request->SetHeaderValue(EC2_IMDS_TOKEN_HEADER, new_token);
+    credentials_request->SetHeaderValue(EC2_IMDS_TOKEN_HEADER, token);
     credentials_request->SetUserAgent(user_agent_string);
     LOG_DEBUG(logger, "Calling EC2MetadataService resource {} with token.", ss.str());
     return GetResourceWithAWSWebServiceResult(credentials_request).GetPayload();
 }
 
+Aws::String AWSEC2MetadataClient::getCurrentAvailabilityZone() const
+{
+    String user_agent_string = awsComputeUserAgentString();
+    auto [new_token, response_code] = getEC2MetadataToken(user_agent_string);
+    if (response_code != Aws::Http::HttpResponseCode::OK || new_token.empty())
+        return {};
+
+    token = new_token;
+    String url = endpoint + EC2_AVAILABILITY_ZONE_RESOURCE;
+    std::shared_ptr<Aws::Http::HttpRequest> profile_request(
+        Aws::Http::CreateHttpRequest(url, Aws::Http::HttpMethod::HTTP_GET, Aws::Utils::Stream::DefaultResponseStreamFactoryMethod));
+
+    profile_request->SetHeaderValue(EC2_IMDS_TOKEN_HEADER, token);
+    profile_request->SetUserAgent(user_agent_string);
+    auto result = GetResourceWithAWSWebServiceResult(profile_request).GetPayload();
+    return Aws::Utils::StringUtils::Trim(result.c_str());
+}
+
+std::pair<Aws::String, Aws::Http::HttpResponseCode> AWSEC2MetadataClient::getEC2MetadataToken(const std::string & user_agent_string) const
+{
+    std::lock_guard locker(token_mutex);
+
+    Aws::StringStream ss;
+    ss << endpoint << EC2_IMDS_TOKEN_RESOURCE;
+    std::shared_ptr<Aws::Http::HttpRequest> token_request(
+        Aws::Http::CreateHttpRequest(
+            ss.str(), Aws::Http::HttpMethod::HTTP_PUT,
+            Aws::Utils::Stream::DefaultResponseStreamFactoryMethod));
+    token_request->SetHeaderValue(EC2_IMDS_TOKEN_TTL_HEADER, EC2_IMDS_TOKEN_TTL_DEFAULT_VALUE);
+    token_request->SetUserAgent(user_agent_string);
+
+    LOG_TRACE(logger, "Calling EC2MetadataService to get token.");
+    auto result = GetResourceWithAWSWebServiceResult(token_request);
+
+    const auto & token_string = result.GetPayload();
+    const auto new_token = Aws::Utils::StringUtils::Trim(token_string.c_str());
+    const auto response_code = result.GetResponseCode();
+    if (response_code != Aws::Http::HttpResponseCode::OK || new_token.empty())
+        LOG_WARNING(logger, "Failed to make token request with result code {}", result.GetResponseCode());
+    return { new_token, response_code };
+}
+
 Aws::String AWSEC2MetadataClient::getCurrentRegion() const
 {
     return Aws::Region::AWS_GLOBAL;
diff --git a/src/IO/S3/Credentials.h b/src/IO/S3/Credentials.h
index 22cca76109f..324b750c683 100644
--- a/src/IO/S3/Credentials.h
+++ b/src/IO/S3/Credentials.h
@@ -3,13 +3,12 @@
 #include "config.h"
 
 #if USE_AWS_S3
+
 #    include <aws/core/client/ClientConfiguration.h>
 #    include <aws/core/internal/AWSHttpResourceClient.h>
 #    include <aws/core/config/AWSProfileConfigLoader.h>
-#    include <aws/core/auth/AWSCredentialsProvider.h>
 #    include <aws/core/auth/AWSCredentialsProviderChain.h>
 
-
 #    include <IO/S3/PocoHTTPClient.h>
 
 
@@ -21,6 +20,7 @@ inline static constexpr uint64_t DEFAULT_EXPIRATION_WINDOW_SECONDS = 120;
 class AWSEC2MetadataClient : public Aws::Internal::AWSHttpResourceClient
 {
     static constexpr char EC2_SECURITY_CREDENTIALS_RESOURCE[] = "/latest/meta-data/iam/security-credentials";
+    static constexpr char EC2_AVAILABILITY_ZONE_RESOURCE[] = "/latest/meta-data/placement/availability-zone";
     static constexpr char EC2_IMDS_TOKEN_RESOURCE[] = "/latest/api/token";
     static constexpr char EC2_IMDS_TOKEN_HEADER[] = "x-aws-ec2-metadata-token";
     static constexpr char EC2_IMDS_TOKEN_TTL_DEFAULT_VALUE[] = "21600";
@@ -49,7 +49,11 @@ public:
 
     virtual Aws::String getCurrentRegion() const;
 
+    virtual Aws::String getCurrentAvailabilityZone() const;
+
 private:
+    std::pair<Aws::String, Aws::Http::HttpResponseCode> getEC2MetadataToken(const std::string & user_agent_string) const;
+
     const Aws::String endpoint;
     mutable std::recursive_mutex token_mutex;
     mutable Aws::String token;
@@ -136,6 +140,7 @@ public:
         const Aws::Auth::AWSCredentials & credentials,
         CredentialsConfiguration credentials_configuration);
 };
+
 }
 
 #endif

From cb6e5fedc55b5a3167e10144c32cb2d819e987f9 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 3 May 2023 18:02:04 +0200
Subject: [PATCH 18/52] Throw on error

---
 src/IO/S3/Credentials.cpp | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp
index eaa1944403d..5f4c2612934 100644
--- a/src/IO/S3/Credentials.cpp
+++ b/src/IO/S3/Credentials.cpp
@@ -16,6 +16,7 @@
 #    include <IO/S3/PocoHTTPClient.h>
 #    include <IO/S3/PocoHTTPClientFactory.h>
 #    include <IO/S3/Client.h>
+#    include <IO/S3Common.h>
 
 #    include <fstream>
 
@@ -142,7 +143,8 @@ Aws::String AWSEC2MetadataClient::getCurrentAvailabilityZone() const
     String user_agent_string = awsComputeUserAgentString();
     auto [new_token, response_code] = getEC2MetadataToken(user_agent_string);
     if (response_code != Aws::Http::HttpResponseCode::OK || new_token.empty())
-        return {};
+        throw DB::Exception(ErrorCodes::S3_ERROR,
+            "Failed to token request. HTTP response code: {}", response_code);
 
     token = new_token;
     String url = endpoint + EC2_AVAILABILITY_ZONE_RESOURCE;
@@ -151,8 +153,11 @@ Aws::String AWSEC2MetadataClient::getCurrentAvailabilityZone() const
 
     profile_request->SetHeaderValue(EC2_IMDS_TOKEN_HEADER, token);
     profile_request->SetUserAgent(user_agent_string);
-    auto result = GetResourceWithAWSWebServiceResult(profile_request).GetPayload();
-    return Aws::Utils::StringUtils::Trim(result.c_str());
+    const auto result = GetResourceWithAWSWebServiceResult(profile_request);
+    if (result.GetResponseCode() != Aws::Http::HttpResponseCode::OK)
+        throw DB::Exception(ErrorCodes::S3_ERROR,
+            "Failed to get availability zone. HTTP response code: {}", result.GetResponseCode());
+    return Aws::Utils::StringUtils::Trim(result.GetPayload().c_str());
 }
 
 std::pair<Aws::String, Aws::Http::HttpResponseCode> AWSEC2MetadataClient::getEC2MetadataToken(const std::string & user_agent_string) const

From 0a18a87ad30581f2a591d86c04ab77c5065a69ed Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 3 May 2023 18:28:33 +0200
Subject: [PATCH 19/52] Fix style check

---
 src/Common/ErrorCodes.cpp | 1 +
 src/IO/S3/Credentials.cpp | 8 ++++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index 9abf3bba8ff..45c01c1ebc6 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -650,6 +650,7 @@
     M(679, IO_URING_SUBMIT_ERROR) \
     M(690, MIXED_ACCESS_PARAMETER_TYPES) \
     M(691, UNKNOWN_ELEMENT_OF_ENUM) \
+    M(692, AWS_ERROR) \
     \
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp
index 5f4c2612934..eac86a08dff 100644
--- a/src/IO/S3/Credentials.cpp
+++ b/src/IO/S3/Credentials.cpp
@@ -25,6 +25,10 @@ namespace DB::S3
 
 namespace
 {
+namespace ErrorCodes
+{
+    extern const int AWS_ERROR;
+}
 
 bool areCredentialsEmptyOrExpired(const Aws::Auth::AWSCredentials & credentials, uint64_t expiration_window_seconds)
 {
@@ -143,7 +147,7 @@ Aws::String AWSEC2MetadataClient::getCurrentAvailabilityZone() const
     String user_agent_string = awsComputeUserAgentString();
     auto [new_token, response_code] = getEC2MetadataToken(user_agent_string);
     if (response_code != Aws::Http::HttpResponseCode::OK || new_token.empty())
-        throw DB::Exception(ErrorCodes::S3_ERROR,
+        throw DB::Exception(ErrorCodes::AWS_ERROR,
             "Failed to token request. HTTP response code: {}", response_code);
 
     token = new_token;
@@ -155,7 +159,7 @@ Aws::String AWSEC2MetadataClient::getCurrentAvailabilityZone() const
     profile_request->SetUserAgent(user_agent_string);
     const auto result = GetResourceWithAWSWebServiceResult(profile_request);
     if (result.GetResponseCode() != Aws::Http::HttpResponseCode::OK)
-        throw DB::Exception(ErrorCodes::S3_ERROR,
+        throw DB::Exception(ErrorCodes::AWS_ERROR,
             "Failed to get availability zone. HTTP response code: {}", result.GetResponseCode());
     return Aws::Utils::StringUtils::Trim(result.GetPayload().c_str());
 }

From 48fab80dd719ca2777dc4f2e08ed0f7aa45cab7e Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Wed, 3 May 2023 19:39:43 +0200
Subject: [PATCH 20/52] Update Credentials.cpp

---
 src/IO/S3/Credentials.cpp | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp
index eac86a08dff..889b320d673 100644
--- a/src/IO/S3/Credentials.cpp
+++ b/src/IO/S3/Credentials.cpp
@@ -20,15 +20,19 @@
 
 #    include <fstream>
 
-namespace DB::S3
-{
-
-namespace
+namespace DB
 {
 namespace ErrorCodes
 {
     extern const int AWS_ERROR;
 }
+}
+
+namespace DB::S3
+{
+
+namespace
+{
 
 bool areCredentialsEmptyOrExpired(const Aws::Auth::AWSCredentials & credentials, uint64_t expiration_window_seconds)
 {
@@ -39,7 +43,6 @@ bool areCredentialsEmptyOrExpired(const Aws::Auth::AWSCredentials & credentials,
     return now >= credentials.GetExpiration() - std::chrono::seconds(expiration_window_seconds);
 }
 
-
 }
 
 AWSEC2MetadataClient::AWSEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration, const char * endpoint_)

From 5727585d95eea0cfea17ae190c3570540ef73a03 Mon Sep 17 00:00:00 2001
From: Mal Curtis <mal@mal.co.nz>
Date: Wed, 3 May 2023 23:40:12 +0000
Subject: [PATCH 21/52] s/use_tables_cache/use_table_cache

---
 src/Databases/DatabaseFactory.cpp                         | 2 +-
 src/Storages/StoragePostgreSQL.cpp                        | 2 +-
 tests/integration/test_postgresql_database_engine/test.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp
index 199bae4fbb4..f346205ea79 100644
--- a/src/Databases/DatabaseFactory.cpp
+++ b/src/Databases/DatabaseFactory.cpp
@@ -319,7 +319,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
         if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, context))
         {
             configuration = StoragePostgreSQL::processNamedCollectionResult(*named_collection, false);
-            use_table_cache = named_collection->getOrDefault<UInt64>("use_tables_cache", 0);
+            use_table_cache = named_collection->getOrDefault<UInt64>("use_table_cache", 0);
         }
         else
         {
diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp
index e013199c584..89198987fca 100644
--- a/src/Storages/StoragePostgreSQL.cpp
+++ b/src/Storages/StoragePostgreSQL.cpp
@@ -416,7 +416,7 @@ StoragePostgreSQL::Configuration StoragePostgreSQL::processNamedCollectionResult
         required_arguments.insert("table");
 
     validateNamedCollection<ValidateKeysMultiset<ExternalDatabaseEqualKeysSet>>(
-        named_collection, required_arguments, {"schema", "on_conflict", "addresses_expr", "host", "hostname", "port", "use_tables_cache"});
+        named_collection, required_arguments, {"schema", "on_conflict", "addresses_expr", "host", "hostname", "port", "use_table_cache"});
 
     configuration.addresses_expr = named_collection.getOrDefault<String>("addresses_expr", "");
     if (configuration.addresses_expr.empty())
diff --git a/tests/integration/test_postgresql_database_engine/test.py b/tests/integration/test_postgresql_database_engine/test.py
index 63e85afb1d4..d9f06f0295b 100644
--- a/tests/integration/test_postgresql_database_engine/test.py
+++ b/tests/integration/test_postgresql_database_engine/test.py
@@ -330,7 +330,7 @@ def test_predefined_connection_configuration(started_cluster):
     node1.query(
         """
         DROP DATABASE postgres_database;
-        CREATE DATABASE postgres_database ENGINE = PostgreSQL(postgres1, use_tables_cache=1);
+        CREATE DATABASE postgres_database ENGINE = PostgreSQL(postgres1, use_table_cache=1);
         """
     )
     assert (

From ed9a9ecd578737510867ac654d9585e778936cdf Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Thu, 4 May 2023 02:45:25 +0000
Subject: [PATCH 22/52] Fixes

---
 src/Processors/Sources/MongoDBSource.cpp      | 190 +++++++++---------
 src/Processors/Sources/MongoDBSource.h        |   2 +
 .../integration/test_storage_mongodb/test.py  | 159 ++++++++++-----
 3 files changed, 212 insertions(+), 139 deletions(-)

diff --git a/src/Processors/Sources/MongoDBSource.cpp b/src/Processors/Sources/MongoDBSource.cpp
index d37b7ab07e3..e8fc01488f5 100644
--- a/src/Processors/Sources/MongoDBSource.cpp
+++ b/src/Processors/Sources/MongoDBSource.cpp
@@ -35,87 +35,15 @@ namespace ErrorCodes
     extern const int TYPE_MISMATCH;
     extern const int UNKNOWN_TYPE;
     extern const int MONGODB_ERROR;
+    extern const int BAD_ARGUMENTS;
 }
 
-namespace
-{
-    void prepareMongoDBArrayInfo(
-        std::unordered_map<size_t, MongoDBArrayInfo> & array_info, size_t column_idx, const DataTypePtr data_type);
-}
-
-std::unique_ptr<Poco::MongoDB::Cursor> createCursor(const std::string & database, const std::string & collection, const Block & sample_block_to_select)
-{
-    auto cursor = std::make_unique<Poco::MongoDB::Cursor>(database, collection);
-
-    /// Looks like selecting _id column is implicit by default.
-    if (!sample_block_to_select.has("_id"))
-        cursor->query().returnFieldSelector().add("_id", 0);
-
-    for (const auto & column : sample_block_to_select)
-        cursor->query().returnFieldSelector().add(column.name, 1);
-    return cursor;
-}
-
-MongoDBSource::MongoDBSource(
-    std::shared_ptr<Poco::MongoDB::Connection> & connection_,
-    std::unique_ptr<Poco::MongoDB::Cursor> cursor_,
-    const Block & sample_block,
-    UInt64 max_block_size_)
-    : ISource(sample_block.cloneEmpty())
-    , connection(connection_)
-    , cursor{std::move(cursor_)}
-    , max_block_size{max_block_size_}
-{
-    description.init(sample_block);
-
-    for (const auto idx : collections::range(0, description.sample_block.columns()))
-        if (description.types[idx].first == ExternalResultDescription::ValueType::vtArray)
-            prepareMongoDBArrayInfo(array_info, idx, description.sample_block.getByPosition(idx).type);
-}
-
-
-MongoDBSource::~MongoDBSource() = default;
-
-
 namespace
 {
     using ValueType = ExternalResultDescription::ValueType;
     using ObjectId = Poco::MongoDB::ObjectId;
     using MongoArray = Poco::MongoDB::Array;
 
-    template <typename T>
-    void insertNumber(IColumn & column, const Poco::MongoDB::Element & value, const std::string & name)
-    {
-        switch (value.type())
-        {
-            case Poco::MongoDB::ElementTraits<Int32>::TypeId:
-                assert_cast<ColumnVector<T> &>(column).getData().push_back(
-                    static_cast<const Poco::MongoDB::ConcreteElement<Int32> &>(value).value());
-                break;
-            case Poco::MongoDB::ElementTraits<Poco::Int64>::TypeId:
-                assert_cast<ColumnVector<T> &>(column).getData().push_back(
-                    static_cast<T>(static_cast<const Poco::MongoDB::ConcreteElement<Poco::Int64> &>(value).value()));
-                break;
-            case Poco::MongoDB::ElementTraits<Float64>::TypeId:
-                assert_cast<ColumnVector<T> &>(column).getData().push_back(static_cast<T>(
-                    static_cast<const Poco::MongoDB::ConcreteElement<Float64> &>(value).value()));
-                break;
-            case Poco::MongoDB::ElementTraits<bool>::TypeId:
-                assert_cast<ColumnVector<T> &>(column).getData().push_back(
-                    static_cast<const Poco::MongoDB::ConcreteElement<bool> &>(value).value());
-                break;
-            case Poco::MongoDB::ElementTraits<Poco::MongoDB::NullValue>::TypeId:
-                assert_cast<ColumnVector<T> &>(column).getData().emplace_back();
-                break;
-            case Poco::MongoDB::ElementTraits<String>::TypeId:
-                assert_cast<ColumnVector<T> &>(column).getData().push_back(
-                    parse<T>(static_cast<const Poco::MongoDB::ConcreteElement<String> &>(value).value()));
-                break;
-            default:
-                throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected a number, got type id = {} for column {}",
-                    toString(value.type()), name);
-        }
-    }
 
     template <typename T>
     Field getNumber(const Poco::MongoDB::Element & value, const std::string & name)
@@ -230,7 +158,40 @@ namespace
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Type conversion to {} is not supported", nested->getName());
 
         array_info[column_idx] = {count_dimensions, default_value, parser};
+    }
 
+    template <typename T>
+    void insertNumber(IColumn & column, const Poco::MongoDB::Element & value, const std::string & name)
+    {
+        switch (value.type())
+        {
+            case Poco::MongoDB::ElementTraits<Int32>::TypeId:
+                assert_cast<ColumnVector<T> &>(column).getData().push_back(
+                    static_cast<const Poco::MongoDB::ConcreteElement<Int32> &>(value).value());
+                break;
+            case Poco::MongoDB::ElementTraits<Poco::Int64>::TypeId:
+                assert_cast<ColumnVector<T> &>(column).getData().push_back(
+                    static_cast<T>(static_cast<const Poco::MongoDB::ConcreteElement<Poco::Int64> &>(value).value()));
+                break;
+            case Poco::MongoDB::ElementTraits<Float64>::TypeId:
+                assert_cast<ColumnVector<T> &>(column).getData().push_back(static_cast<T>(
+                    static_cast<const Poco::MongoDB::ConcreteElement<Float64> &>(value).value()));
+                break;
+            case Poco::MongoDB::ElementTraits<bool>::TypeId:
+                assert_cast<ColumnVector<T> &>(column).getData().push_back(
+                    static_cast<const Poco::MongoDB::ConcreteElement<bool> &>(value).value());
+                break;
+            case Poco::MongoDB::ElementTraits<Poco::MongoDB::NullValue>::TypeId:
+                assert_cast<ColumnVector<T> &>(column).getData().emplace_back();
+                break;
+            case Poco::MongoDB::ElementTraits<String>::TypeId:
+                assert_cast<ColumnVector<T> &>(column).getData().push_back(
+                    parse<T>(static_cast<const Poco::MongoDB::ConcreteElement<String> &>(value).value()));
+                break;
+            default:
+                throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected a number, got type id = {} for column {}",
+                    toString(value.type()), name);
+        }
     }
 
     void insertValue(
@@ -334,7 +295,7 @@ namespace
                     throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected Array, got type id = {} for column {}",
                                     toString(value.type()), name);
 
-                size_t max_dimension = 0, expected_dimensions = array_info[idx].num_dimensions;
+                size_t expected_dimensions = array_info[idx].num_dimensions;
                 const auto parse_value = array_info[idx].parser;
                 std::vector<Row> dimensions(expected_dimensions + 1);
 
@@ -345,44 +306,52 @@ namespace
 
                 while (!arrays.empty())
                 {
-                    size_t dimension = arrays.size();
-                    max_dimension = std::max(max_dimension, dimension);
+                    size_t dimension_idx = arrays.size() - 1;
 
-                    auto [element, i] = arrays.back();
+                    if (dimension_idx + 1 > expected_dimensions)
+                        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Got more dimensions than expected");
 
-                    auto parent = static_cast<const Poco::MongoDB::ConcreteElement<MongoArray::Ptr> &>(*element).value();
+                    auto [parent_ptr, child_idx] = arrays.back();
+                    auto parent = static_cast<const Poco::MongoDB::ConcreteElement<MongoArray::Ptr> &>(*parent_ptr).value();
 
-                    if (i >= parent->size())
+                    if (child_idx >= parent->size())
                     {
-                        dimensions[dimension].emplace_back(Array(dimensions[dimension + 1].begin(), dimensions[dimension + 1].end()));
-                        dimensions[dimension + 1].clear();
-
                         arrays.pop_back();
+
+                        if (dimension_idx == 0)
+                            break;
+
+                        dimensions[dimension_idx].emplace_back(Array(dimensions[dimension_idx + 1].begin(), dimensions[dimension_idx + 1].end()));
+                        dimensions[dimension_idx + 1].clear();
+
                         continue;
                     }
 
-                    Poco::MongoDB::Element::Ptr child = parent->get(static_cast<int>(i));
+                    Poco::MongoDB::Element::Ptr child = parent->get(static_cast<int>(child_idx));
+                    arrays.back().second += 1;
 
                     if (child->type() == Poco::MongoDB::ElementTraits<MongoArray::Ptr>::TypeId)
                     {
-                        if (dimension + 1 > expected_dimensions)
-                            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Got more dimensions than expected");
-
-                        arrays.back().second += 1;
                         arrays.emplace_back(child.get(), 0);
                     }
+                    else if (child->type() == Poco::MongoDB::ElementTraits<Poco::MongoDB::NullValue>::TypeId)
+                    {
+                        if (dimension_idx + 1 == expected_dimensions)
+                            dimensions[dimension_idx + 1].emplace_back(array_info[idx].default_value);
+                        else
+                            dimensions[dimension_idx + 1].emplace_back(Array());
+                    }
+                    else if (dimension_idx + 1 == expected_dimensions)
+                    {
+                        dimensions[dimension_idx + 1].emplace_back(parse_value(*child, name));
+                    }
                     else
                     {
-                        dimensions[dimension].emplace_back(parse_value(*child, name));
+                        throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                            "Got less dimensions than expected. ({} instead of {})", dimension_idx + 1, expected_dimensions);
                     }
                 }
 
-                if (max_dimension < expected_dimensions)
-                    throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                            "Got less dimensions than expected. ({} instead of {})", max_dimension, expected_dimensions);
-
-                // TODO: default value
-
                 assert_cast<ColumnArray &>(column).insert(Array(dimensions[1].begin(), dimensions[1].end()));
                 break;
 
@@ -396,6 +365,39 @@ namespace
 }
 
 
+std::unique_ptr<Poco::MongoDB::Cursor> createCursor(const std::string & database, const std::string & collection, const Block & sample_block_to_select)
+{
+    auto cursor = std::make_unique<Poco::MongoDB::Cursor>(database, collection);
+
+    /// Looks like selecting _id column is implicit by default.
+    if (!sample_block_to_select.has("_id"))
+        cursor->query().returnFieldSelector().add("_id", 0);
+
+    for (const auto & column : sample_block_to_select)
+        cursor->query().returnFieldSelector().add(column.name, 1);
+    return cursor;
+}
+
+MongoDBSource::MongoDBSource(
+    std::shared_ptr<Poco::MongoDB::Connection> & connection_,
+    std::unique_ptr<Poco::MongoDB::Cursor> cursor_,
+    const Block & sample_block,
+    UInt64 max_block_size_)
+    : ISource(sample_block.cloneEmpty())
+    , connection(connection_)
+    , cursor{std::move(cursor_)}
+    , max_block_size{max_block_size_}
+{
+    description.init(sample_block);
+
+    for (const auto idx : collections::range(0, description.sample_block.columns()))
+        if (description.types[idx].first == ExternalResultDescription::ValueType::vtArray)
+            prepareMongoDBArrayInfo(array_info, idx, description.sample_block.getByPosition(idx).type);
+}
+
+
+MongoDBSource::~MongoDBSource() = default;
+
 Chunk MongoDBSource::generate()
 {
     if (all_read)
diff --git a/src/Processors/Sources/MongoDBSource.h b/src/Processors/Sources/MongoDBSource.h
index ec73f00f378..d4681d2c05f 100644
--- a/src/Processors/Sources/MongoDBSource.h
+++ b/src/Processors/Sources/MongoDBSource.h
@@ -6,6 +6,8 @@
 #include <Processors/ISource.h>
 #include <Core/ExternalResultDescription.h>
 
+#include <Core/Field.h>
+
 
 namespace Poco
 {
diff --git a/tests/integration/test_storage_mongodb/test.py b/tests/integration/test_storage_mongodb/test.py
index cf843ddd489..6ba5520704d 100644
--- a/tests/integration/test_storage_mongodb/test.py
+++ b/tests/integration/test_storage_mongodb/test.py
@@ -4,6 +4,7 @@ import pytest
 from helpers.client import QueryRuntimeException
 
 from helpers.cluster import ClickHouseCluster
+import datetime
 
 
 @pytest.fixture(scope="module")
@@ -75,74 +76,142 @@ def test_arrays(started_cluster):
     mongo_connection = get_mongo_connection(started_cluster)
     db = mongo_connection["test"]
     db.add_user("root", "clickhouse")
-    simple_mongo_table = db["simple_table"]
+    arrays_mongo_table = db["arrays_table"]
     data = []
     for i in range(0, 100):
-        data.append({
-            "key": i,
-            "arr_int64":    [- (i + 1), - (i + 2), - (i + 3)],
-            "arr_int32":    [- (i + 1), - (i + 2), - (i + 3)],
-            "arr_int16":    [- (i + 1), - (i + 2), - (i + 3)],
-            "arr_int8":     [- (i + 1), - (i + 2), - (i + 3)],
-            "arr_uint64":   [i + 1, i + 2, i + 3],
-            "arr_uint32":   [i + 1, i + 2, i + 3],
-            "arr_uint16":   [i + 1, i + 2, i + 3],
-            "arr_uint8":    [i + 1, i + 2, i + 3],
-            "arr_float32":  [i + 1.125, i + 2.5, i + 3.750],
-            "arr_float64":  [i + 1.125, i + 2.5, i + 3.750],
-            "arr_date":     ['2023-11-01', '2023-06-19'],
-            "arr_datetime": ['2023-03-31 06:03:12', '2023-02-01 12:46:34'],
-            "arr_string":   [str(i + 1), str(i + 2), str(i + 3)],
-            "arr_uuid":     ['f0e77736-91d1-48ce-8f01-15123ca1c7ed', '93376a07-c044-4281-a76e-ad27cf6973c5'],
-            "arr_arr_bool": [[True, False, True]]
-            })
+        data.append(
+            {
+                "key": i,
+                "arr_int64": [-(i + 1), -(i + 2), -(i + 3)],
+                "arr_int32": [-(i + 1), -(i + 2), -(i + 3)],
+                "arr_int16": [-(i + 1), -(i + 2), -(i + 3)],
+                "arr_int8": [-(i + 1), -(i + 2), -(i + 3)],
+                "arr_uint64": [i + 1, i + 2, i + 3],
+                "arr_uint32": [i + 1, i + 2, i + 3],
+                "arr_uint16": [i + 1, i + 2, i + 3],
+                "arr_uint8": [i + 1, i + 2, i + 3],
+                "arr_float32": [i + 1.125, i + 2.5, i + 3.750],
+                "arr_float64": [i + 1.125, i + 2.5, i + 3.750],
+                "arr_date": [
+                    datetime.datetime(2002, 10, 27),
+                    datetime.datetime(2024, 1, 8),
+                ],
+                "arr_datetime": [
+                    datetime.datetime(2023, 3, 31, 6, 3, 12),
+                    datetime.datetime(1999, 2, 28, 12, 46, 34),
+                ],
+                "arr_string": [str(i + 1), str(i + 2), str(i + 3)],
+                "arr_uuid": [
+                    "f0e77736-91d1-48ce-8f01-15123ca1c7ed",
+                    "93376a07-c044-4281-a76e-ad27cf6973c5",
+                ],
+                "arr_arr_bool": [
+                    [True, False, True],
+                    [True],
+                    [],
+                    None,
+                    [False],
+                    [None],
+                ],
+                "arr_empty": [],
+                "arr_null": None,
+                "arr_nullable": None,
+            }
+        )
 
-    simple_mongo_table.insert_many(data)
+    arrays_mongo_table.insert_many(data)
 
     node = started_cluster.instances["node"]
     node.query(
-        "CREATE TABLE simple_mongo_table("
-            "key UInt64,"
-            "arr_int64 Array(Int64),"
-            "arr_int32 Array(Int32),"
-            "arr_int16 Array(Int16),"
-            "arr_int8 Array(Int8),"
-            "arr_uint64 Array(UInt64),"
-            "arr_uint32 Array(UInt32),"
-            "arr_uint16 Array(UInt16),"
-            "arr_uint8 Array(UInt8),"
-            "arr_float32 Array(Float32),"
-            "arr_float64 Array(Float64),"
-            "arr_date Array(Date),"
-            "arr_datetime Array(DateTime),"
-            "arr_string Array(String),"
-            "arr_uuid Array(UUID),"
-            "arr_arr_bool Array(Array(Bool))"
-            ") ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'root', 'clickhouse')"
+        "CREATE TABLE arrays_mongo_table("
+        "key UInt64,"
+        "arr_int64 Array(Int64),"
+        "arr_int32 Array(Int32),"
+        "arr_int16 Array(Int16),"
+        "arr_int8 Array(Int8),"
+        "arr_uint64 Array(UInt64),"
+        "arr_uint32 Array(UInt32),"
+        "arr_uint16 Array(UInt16),"
+        "arr_uint8 Array(UInt8),"
+        "arr_float32 Array(Float32),"
+        "arr_float64 Array(Float64),"
+        "arr_date Array(Date),"
+        "arr_datetime Array(DateTime),"
+        "arr_string Array(String),"
+        "arr_uuid Array(UUID),"
+        "arr_arr_bool Array(Array(Bool)),"
+        "arr_empty Array(UInt64),"
+        "arr_null Array(UInt64),"
+        "arr_arr_null Array(Array(UInt64)),"
+        "arr_nullable Array(Nullable(UInt64))"
+        ") ENGINE = MongoDB('mongo1:27017', 'test', 'arrays_table', 'root', 'clickhouse')"
     )
 
-    assert node.query("SELECT COUNT() FROM simple_mongo_table") == "100\n"
+    assert node.query("SELECT COUNT() FROM arrays_mongo_table") == "100\n"
 
     for column_name in ["arr_int64", "arr_int32", "arr_int16", "arr_int8"]:
         assert (
-            node.query(f"SELECT {column_name} from simple_mongo_table where key = 42")
+            node.query(f"SELECT {column_name} FROM arrays_mongo_table WHERE key = 42")
             == "[-43,-44,-45]\n"
         )
 
     for column_name in ["arr_uint64", "arr_uint32", "arr_uint16", "arr_uint8"]:
         assert (
-            node.query(f"SELECT {column_name} from simple_mongo_table where key = 42")
+            node.query(f"SELECT {column_name} FROM arrays_mongo_table WHERE key = 42")
             == "[43,44,45]\n"
         )
 
     for column_name in ["arr_float32", "arr_float64"]:
         assert (
-            node.query(f"SELECT {column_name} from simple_mongo_table where key = 42")
-            == "[43,44,45]\n"
+            node.query(f"SELECT {column_name} FROM arrays_mongo_table WHERE key = 42")
+            == "[43.125,44.5,45.75]\n"
         )
 
-    node.query("DROP TABLE simple_mongo_table")
-    simple_mongo_table.drop()
+    assert (
+        node.query(f"SELECT arr_date FROM arrays_mongo_table WHERE key = 42")
+        == "['2002-10-27','2024-01-08']\n"
+    )
+
+    assert (
+        node.query(f"SELECT arr_datetime FROM arrays_mongo_table WHERE key = 42")
+        == "['2023-03-31 06:03:12','1999-02-28 12:46:34']\n"
+    )
+
+    assert (
+        node.query(f"SELECT arr_string FROM arrays_mongo_table WHERE key = 42")
+        == "['43','44','45']\n"
+    )
+
+    assert (
+        node.query(f"SELECT arr_uuid FROM arrays_mongo_table WHERE key = 42")
+        == "['f0e77736-91d1-48ce-8f01-15123ca1c7ed','93376a07-c044-4281-a76e-ad27cf6973c5']\n"
+    )
+
+    assert (
+        node.query(f"SELECT arr_arr_bool FROM arrays_mongo_table WHERE key = 42")
+        == "[[true,false,true],[true],[],[],[false],[false]]\n"
+    )
+
+    assert (
+        node.query(f"SELECT arr_empty FROM arrays_mongo_table WHERE key = 42") == "[]\n"
+    )
+
+    assert (
+        node.query(f"SELECT arr_null FROM arrays_mongo_table WHERE key = 42") == "[]\n"
+    )
+
+    assert (
+        node.query(f"SELECT arr_arr_null FROM arrays_mongo_table WHERE key = 42")
+        == "[]\n"
+    )
+
+    assert (
+        node.query(f"SELECT arr_nullable FROM arrays_mongo_table WHERE key = 42")
+        == "[]\n"
+    )
+
+    node.query("DROP TABLE arrays_mongo_table")
+    arrays_mongo_table.drop()
 
 
 @pytest.mark.parametrize("started_cluster", [False], indirect=["started_cluster"])

From 279970337a9d7aecaa7a2f5c6db256587ddc9560 Mon Sep 17 00:00:00 2001
From: Li Shuai <loneylee@live.cn>
Date: Thu, 27 Apr 2023 22:58:49 +0800
Subject: [PATCH 23/52] Fix all key value is null and group use rollup return
 wrong answer

---
 src/Interpreters/Aggregator.cpp                     |  4 ++--
 .../02725_null_group_key_with_rollup.reference      | 10 ++++++++++
 .../02725_null_group_key_with_rollup.sql            | 13 +++++++++++++
 3 files changed, 25 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/02725_null_group_key_with_rollup.reference
 create mode 100644 tests/queries/0_stateless/02725_null_group_key_with_rollup.sql

diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp
index f11792afcc7..7df36e8600a 100644
--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@@ -2102,6 +2102,7 @@ Aggregator::convertToBlockImplNotFinal(Method & method, Table & data, Arenas & a
 
     std::optional<OutputBlockColumns> out_cols;
     std::optional<Sizes> shuffled_key_sizes;
+    size_t rows_in_current_block = 0;
 
     auto init_out_cols = [&]()
     {
@@ -2116,6 +2117,7 @@ Aggregator::convertToBlockImplNotFinal(Method & method, Table & data, Arenas & a
                 for (size_t i = 0; i < params.aggregates_size; ++i)
                     out_cols->aggregate_columns_data[i]->push_back(data.getNullKeyData() + offsets_of_aggregate_states[i]);
 
+                ++rows_in_current_block;
                 data.getNullKeyData() = nullptr;
                 data.hasNullKeyData() = false;
             }
@@ -2127,8 +2129,6 @@ Aggregator::convertToBlockImplNotFinal(Method & method, Table & data, Arenas & a
     // should be invoked at least once, because null data might be the only content of the `data`
     init_out_cols();
 
-    size_t rows_in_current_block = 0;
-
     data.forEachValue(
         [&](const auto & key, auto & mapped)
         {
diff --git a/tests/queries/0_stateless/02725_null_group_key_with_rollup.reference b/tests/queries/0_stateless/02725_null_group_key_with_rollup.reference
new file mode 100644
index 00000000000..e296f838e48
--- /dev/null
+++ b/tests/queries/0_stateless/02725_null_group_key_with_rollup.reference
@@ -0,0 +1,10 @@
+\N	2
+
+\N	2
+\N	2
+
+\N	2
+\N	2
+\N	2
+\N	2
+\N	2
diff --git a/tests/queries/0_stateless/02725_null_group_key_with_rollup.sql b/tests/queries/0_stateless/02725_null_group_key_with_rollup.sql
new file mode 100644
index 00000000000..98f354e2911
--- /dev/null
+++ b/tests/queries/0_stateless/02725_null_group_key_with_rollup.sql
@@ -0,0 +1,13 @@
+set allow_suspicious_low_cardinality_types=1;
+DROP TABLE IF EXISTS group_by_null_key;
+CREATE TABLE group_by_null_key (c1 Nullable(Int32), c2 LowCardinality(Nullable(Int32))) ENGINE = Memory();
+INSERT INTO group_by_null_key VALUES (null, null), (null, null);
+
+select c1, count(*) from group_by_null_key group by c1 WITH TOTALS;
+select c2, count(*) from group_by_null_key group by c2 WITH TOTALS;
+
+select c1, count(*) from group_by_null_key group by ROLLUP(c1);
+select c2, count(*) from group_by_null_key group by ROLLUP(c2);
+
+
+DROP TABLE group_by_null_key;

From c8cccb393e33ce1b5be5301db6d172a99da54420 Mon Sep 17 00:00:00 2001
From: xmy <xumovens@gmail.com>
Date: Thu, 4 May 2023 16:02:23 +0800
Subject: [PATCH 24/52] fix

---
 .../AggregateFunctionBitwise.cpp              |  2 +-
 .../AggregateFunctionGroupBitmap.cpp          |  1 -
 .../AggregateFunctionSequenceMatch.cpp        |  2 +-
 .../AggregateFunctionWindowFunnel.cpp         |  2 +-
 src/AggregateFunctions/Helpers.h              | 36 ++++++++++++-------
 ...0498_bitwise_aggregate_functions.reference |  8 +++++
 .../00498_bitwise_aggregate_functions.sql     |  2 ++
 7 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionBitwise.cpp b/src/AggregateFunctions/AggregateFunctionBitwise.cpp
index 82cb3b327f0..f5c2deb4588 100644
--- a/src/AggregateFunctions/AggregateFunctionBitwise.cpp
+++ b/src/AggregateFunctions/AggregateFunctionBitwise.cpp
@@ -27,7 +27,7 @@ AggregateFunctionPtr createAggregateFunctionBitwise(const std::string & name, co
                         "is illegal, because it cannot be used in bitwise operations",
                         argument_types[0]->getName(), name);
 
-    AggregateFunctionPtr res(createWithOptionSignedIntegerType<AggregateFunctionBitwise, Data, true>(*argument_types[0], argument_types[0]));
+    AggregateFunctionPtr res(createWithIntegerType<AggregateFunctionBitwise, Data>(*argument_types[0], argument_types[0]));
 
     if (!res)
         throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp b/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp
index beabfe83c47..fd350b47026 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp
@@ -1,6 +1,5 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/FactoryHelpers.h>
-#include <AggregateFunctions/Helpers.h>
 #include <DataTypes/DataTypeAggregateFunction.h>
 
 // TODO include this last because of a broken roaring header. See the comment inside.
diff --git a/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp b/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp
index f2fe9014ceb..3dd9a8b658d 100644
--- a/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp
@@ -53,7 +53,7 @@ AggregateFunctionPtr createAggregateFunctionSequenceBase(
 
     String pattern = params.front().safeGet<std::string>();
 
-    AggregateFunctionPtr res(createWithOptionSignedIntegerType<AggregateFunction, Data>(*argument_types[0], argument_types, params, pattern));
+    AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunction, Data>(*argument_types[0], argument_types, params, pattern));
     if (res)
         return res;
 
diff --git a/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp b/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp
index 71c675f7a3b..d80d683fd04 100644
--- a/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp
+++ b/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp
@@ -48,7 +48,7 @@ createAggregateFunctionWindowFunnel(const std::string & name, const DataTypes &
                             cond_arg->getName(), toString(i + 1), name);
     }
 
-    AggregateFunctionPtr res(createWithOptionSignedIntegerType<AggregateFunctionWindowFunnel, Data>(*arguments[0], arguments, params));
+    AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunctionWindowFunnel, Data>(*arguments[0], arguments, params));
     WhichDataType which(arguments.front().get());
     if (res)
         return res;
diff --git a/src/AggregateFunctions/Helpers.h b/src/AggregateFunctions/Helpers.h
index 4131279a897..e5cfc3034b0 100644
--- a/src/AggregateFunctions/Helpers.h
+++ b/src/AggregateFunctions/Helpers.h
@@ -87,8 +87,8 @@ static IAggregateFunction * createWithNumericType(const IDataType & argument_typ
     return nullptr;
 }
 
-template <template <typename, typename> class AggregateFunctionTemplate, template <typename> class Data, bool support_signed = false, typename... TArgs>
-static IAggregateFunction * createWithOptionSignedIntegerType(const IDataType & argument_type, TArgs && ... args)
+template <template <typename, typename> class AggregateFunctionTemplate, template <typename> class Data, typename... TArgs>
+static IAggregateFunction * createWithUnsignedIntegerType(const IDataType & argument_type, TArgs && ... args)
 {
     WhichDataType which(argument_type);
     if (which.idx == TypeIndex::UInt8) return new AggregateFunctionTemplate<UInt8, Data<UInt8>>(std::forward<TArgs>(args)...);
@@ -97,19 +97,31 @@ static IAggregateFunction * createWithOptionSignedIntegerType(const IDataType &
     if (which.idx == TypeIndex::UInt64) return new AggregateFunctionTemplate<UInt64, Data<UInt64>>(std::forward<TArgs>(args)...);
     if (which.idx == TypeIndex::UInt128) return new AggregateFunctionTemplate<UInt128, Data<UInt128>>(std::forward<TArgs>(args)...);
     if (which.idx == TypeIndex::UInt256) return new AggregateFunctionTemplate<UInt256, Data<UInt256>>(std::forward<TArgs>(args)...);
-
-    if constexpr (support_signed)
-    {
-        if (which.idx == TypeIndex::Int8) return new AggregateFunctionTemplate<Int8, Data<Int8>>(std::forward<TArgs>(args)...);
-        if (which.idx == TypeIndex::Int16) return new AggregateFunctionTemplate<Int16, Data<Int16>>(std::forward<TArgs>(args)...);
-        if (which.idx == TypeIndex::Int32) return new AggregateFunctionTemplate<Int32, Data<Int32>>(std::forward<TArgs>(args)...);
-        if (which.idx == TypeIndex::Int64) return new AggregateFunctionTemplate<Int64, Data<Int64>>(std::forward<TArgs>(args)...);
-        if (which.idx == TypeIndex::Int128) return new AggregateFunctionTemplate<Int128, Data<Int128>>(std::forward<TArgs>(args)...);
-        if (which.idx == TypeIndex::Int256) return new AggregateFunctionTemplate<Int256, Data<Int256>>(std::forward<TArgs>(args)...);
-    }
     return nullptr;
 }
 
+template <template <typename, typename> class AggregateFunctionTemplate, template <typename> class Data, typename... TArgs>
+static IAggregateFunction * createWithSignedIntegerType(const IDataType & argument_type, TArgs && ... args)
+{
+    WhichDataType which(argument_type);
+    if (which.idx == TypeIndex::Int8) return new AggregateFunctionTemplate<Int8, Data<Int8>>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::Int16) return new AggregateFunctionTemplate<Int16, Data<Int16>>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::Int32) return new AggregateFunctionTemplate<Int32, Data<Int32>>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::Int64) return new AggregateFunctionTemplate<Int64, Data<Int64>>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::Int128) return new AggregateFunctionTemplate<Int128, Data<Int128>>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::Int256) return new AggregateFunctionTemplate<Int256, Data<Int256>>(std::forward<TArgs>(args)...);
+    return nullptr;
+}
+
+template <template <typename, typename> class AggregateFunctionTemplate, template <typename> class Data, typename... TArgs>
+static IAggregateFunction * createWithIntegerType(const IDataType & argument_type, TArgs && ... args)
+{
+    IAggregateFunction * f = createWithUnsignedIntegerType<AggregateFunctionTemplate, Data>(argument_type, std::forward<TArgs>(args)...);
+    if (f)
+        return f;
+    return createWithSignedIntegerType<AggregateFunctionTemplate, Data>(argument_type, std::forward<TArgs>(args)...);
+}
+
 template <template <typename, typename> class AggregateFunctionTemplate, template <typename> class Data, typename... TArgs>
 static IAggregateFunction * createWithBasicNumberOrDateOrDateTime(const IDataType & argument_type, TArgs &&... args)
 {
diff --git a/tests/queries/0_stateless/00498_bitwise_aggregate_functions.reference b/tests/queries/0_stateless/00498_bitwise_aggregate_functions.reference
index fb5cfb57d39..7986080ace3 100644
--- a/tests/queries/0_stateless/00498_bitwise_aggregate_functions.reference
+++ b/tests/queries/0_stateless/00498_bitwise_aggregate_functions.reference
@@ -2,3 +2,11 @@
 1	[1,5,9,13,17]	29	1	17
 2	[2,6,10,14,18]	30	2	18
 3	[3,7,11,15,19]	31	3	19
+0   [0,-4,-8,-12,-16]   -4  0   0
+1   [-1,-5,-9,-13,-17]  -1  -29 -17
+2   [-2,-6,-10,-14,-18] -2  -30 -18
+3   [-3,-7,-11,-15,-19] -3  -31 -19
+0   [-10,-6,-2,2,6] -2  2   -10
+1   [-9,-5,-1,3,7]  -1  3   -9
+2   [-8,-4,0,4,8]   -4  0   8
+3   [-7,-3,1,5,9]   -3  1   9
\ No newline at end of file
diff --git a/tests/queries/0_stateless/00498_bitwise_aggregate_functions.sql b/tests/queries/0_stateless/00498_bitwise_aggregate_functions.sql
index 07cd0b3630f..2c4610e6c4a 100644
--- a/tests/queries/0_stateless/00498_bitwise_aggregate_functions.sql
+++ b/tests/queries/0_stateless/00498_bitwise_aggregate_functions.sql
@@ -1 +1,3 @@
 SELECT number % 4 AS k, groupArray(number), groupBitOr(number), groupBitAnd(number), groupBitXor(number) FROM (SELECT * FROM system.numbers LIMIT 20) GROUP BY k ORDER BY k;
+SELECT number % 4 AS k, groupArray(-number), groupBitOr(-number), groupBitAnd(-number), groupBitXor(-number) FROM (SELECT * FROM system.numbers LIMIT 20) GROUP BY k ORDER BY k;
+SELECT number % 4 AS k, groupArray(number-10), groupBitOr(number-10), groupBitAnd(number-10), groupBitXor(number-10) FROM (SELECT * FROM system.numbers LIMIT 20) GROUP BY k ORDER BY k;

From dfd1c4d8e698106a28983edc5cadbe71737553c3 Mon Sep 17 00:00:00 2001
From: xmy <xumovens@gmail.com>
Date: Thu, 4 May 2023 16:11:39 +0800
Subject: [PATCH 25/52] fix

---
 .../00498_bitwise_aggregate_functions.reference  | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/queries/0_stateless/00498_bitwise_aggregate_functions.reference b/tests/queries/0_stateless/00498_bitwise_aggregate_functions.reference
index 7986080ace3..400db09c9f1 100644
--- a/tests/queries/0_stateless/00498_bitwise_aggregate_functions.reference
+++ b/tests/queries/0_stateless/00498_bitwise_aggregate_functions.reference
@@ -2,11 +2,11 @@
 1	[1,5,9,13,17]	29	1	17
 2	[2,6,10,14,18]	30	2	18
 3	[3,7,11,15,19]	31	3	19
-0   [0,-4,-8,-12,-16]   -4  0   0
-1   [-1,-5,-9,-13,-17]  -1  -29 -17
-2   [-2,-6,-10,-14,-18] -2  -30 -18
-3   [-3,-7,-11,-15,-19] -3  -31 -19
-0   [-10,-6,-2,2,6] -2  2   -10
-1   [-9,-5,-1,3,7]  -1  3   -9
-2   [-8,-4,0,4,8]   -4  0   8
-3   [-7,-3,1,5,9]   -3  1   9
\ No newline at end of file
+0       [0,-4,-8,-12,-16]       -4      0       0
+1       [-1,-5,-9,-13,-17]      -1      -29     -17
+2       [-2,-6,-10,-14,-18]     -2      -30     -18
+3       [-3,-7,-11,-15,-19]     -3      -31     -19
+0       [-10,-6,-2,2,6] -2      2       -10
+1       [-9,-5,-1,3,7]  -1      3       -9
+2       [-8,-4,0,4,8]   -4      0       8
+3       [-7,-3,1,5,9]   -3      1       9
\ No newline at end of file

From 8769ac21870f68f892391c9ae7e2ec421a7a2d79 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 4 May 2023 07:56:00 +0000
Subject: [PATCH 26/52] Correctly append arguments

---
 src/Storages/StorageS3Cluster.cpp             | 35 ++++++++++++++--
 src/Storages/StorageS3Cluster.h               |  4 +-
 ...lumnsStructureToQueryWithClusterEngine.cpp |  2 +-
 ...ColumnsStructureToQueryWithClusterEngine.h |  3 ++
 src/TableFunctions/TableFunctionS3.cpp        | 18 +++++++-
 src/TableFunctions/TableFunctionS3.h          |  9 +++-
 src/TableFunctions/TableFunctionS3Cluster.cpp | 18 ++++----
 src/TableFunctions/TableFunctionS3Cluster.h   |  2 +
 .../test_s3_cluster/s3_mocks/s3_mock.py       | 25 +++++++++++
 tests/integration/test_s3_cluster/test.py     | 42 +++++++++++++++++++
 10 files changed, 141 insertions(+), 17 deletions(-)
 create mode 100644 tests/integration/test_s3_cluster/s3_mocks/s3_mock.py

diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp
index fb99d95ef52..bccf2a59b35 100644
--- a/src/Storages/StorageS3Cluster.cpp
+++ b/src/Storages/StorageS3Cluster.cpp
@@ -41,13 +41,19 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
 StorageS3Cluster::StorageS3Cluster(
     const Configuration & configuration_,
     const StorageID & table_id_,
     const ColumnsDescription & columns_,
     const ConstraintsDescription & constraints_,
     ContextPtr context_,
-    bool structure_argument_was_provided_)
+    bool structure_argument_was_provided_,
+    bool format_argument_was_provided_)
     : IStorageCluster(table_id_)
     , log(&Poco::Logger::get("StorageS3Cluster (" + table_id_.table_name + ")"))
     , s3_configuration{configuration_}
@@ -55,6 +61,7 @@ StorageS3Cluster::StorageS3Cluster(
     , format_name(configuration_.format)
     , compression_method(configuration_.compression_method)
     , structure_argument_was_provided(structure_argument_was_provided_)
+    , format_argument_was_provided(format_argument_was_provided_)
 {
     context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.url.uri);
     StorageInMemoryMetadata storage_metadata;
@@ -89,6 +96,28 @@ void StorageS3Cluster::updateConfigurationIfChanged(ContextPtr local_context)
     s3_configuration.update(local_context);
 }
 
+namespace
+{
+
+void addColumnsStructureToQueryWithS3ClusterEngine(ASTPtr & query, const String & structure, bool format_argument_was_provided, const String & function_name)
+{
+    ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query);
+    if (!expression_list)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function {}, got '{}'", function_name, queryToString(query));
+
+    auto structure_literal = std::make_shared<ASTLiteral>(structure);
+
+    if (!format_argument_was_provided)
+    {
+        auto format_literal = std::make_shared<ASTLiteral>("auto");
+        expression_list->children.push_back(format_literal);
+    }
+
+    expression_list->children.push_back(structure_literal);
+}
+
+}
+
 /// The code executes on initiator
 Pipe StorageS3Cluster::read(
     const Names & column_names,
@@ -127,8 +156,8 @@ Pipe StorageS3Cluster::read(
     const bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState;
 
     if (!structure_argument_was_provided)
-        addColumnsStructureToQueryWithClusterEngine(
-            query_to_send, StorageDictionary::generateNamesAndTypesDescription(storage_snapshot->metadata->getColumns().getAll()), 5, getName());
+        addColumnsStructureToQueryWithS3ClusterEngine(
+            query_to_send, StorageDictionary::generateNamesAndTypesDescription(storage_snapshot->metadata->getColumns().getAll()), format_argument_was_provided, getName());
 
     RestoreQualifiedNamesVisitor::Data data;
     data.distributed_table = DatabaseAndTableWithAlias(*getTableExpression(query_info.query->as<ASTSelectQuery &>(), 0));
diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h
index 2896ab57f49..84661d23640 100644
--- a/src/Storages/StorageS3Cluster.h
+++ b/src/Storages/StorageS3Cluster.h
@@ -32,7 +32,8 @@ public:
         const ColumnsDescription & columns_,
         const ConstraintsDescription & constraints_,
         ContextPtr context_,
-        bool structure_argument_was_provided_);
+        bool structure_argument_was_provided_,
+        bool format_argument_was_provided_);
 
     std::string getName() const override { return "S3Cluster"; }
 
@@ -59,6 +60,7 @@ private:
     NamesAndTypesList virtual_columns;
     Block virtual_block;
     bool structure_argument_was_provided;
+    bool format_argument_was_provided;
 };
 
 
diff --git a/src/Storages/addColumnsStructureToQueryWithClusterEngine.cpp b/src/Storages/addColumnsStructureToQueryWithClusterEngine.cpp
index 75c5bebb901..106161ae620 100644
--- a/src/Storages/addColumnsStructureToQueryWithClusterEngine.cpp
+++ b/src/Storages/addColumnsStructureToQueryWithClusterEngine.cpp
@@ -14,7 +14,7 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-static ASTExpressionList * extractTableFunctionArgumentsFromSelectQuery(ASTPtr & query)
+ASTExpressionList * extractTableFunctionArgumentsFromSelectQuery(ASTPtr & query)
 {
     auto * select_query = query->as<ASTSelectQuery>();
     if (!select_query || !select_query->tables())
diff --git a/src/Storages/addColumnsStructureToQueryWithClusterEngine.h b/src/Storages/addColumnsStructureToQueryWithClusterEngine.h
index f39f3a31630..5939f3f43aa 100644
--- a/src/Storages/addColumnsStructureToQueryWithClusterEngine.h
+++ b/src/Storages/addColumnsStructureToQueryWithClusterEngine.h
@@ -1,10 +1,13 @@
 #pragma once
 
 #include <Parsers/IAST.h>
+#include <Parsers/ASTExpressionList.h>
 
 namespace DB
 {
 
+ASTExpressionList * extractTableFunctionArgumentsFromSelectQuery(ASTPtr & query);
+
 /// Add structure argument for queries with s3Cluster/hdfsCluster table function.
 void addColumnsStructureToQueryWithClusterEngine(ASTPtr & query, const String & structure, size_t max_arguments, const String & function_name);
 
diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp
index f5fdb873e42..89b71557843 100644
--- a/src/TableFunctions/TableFunctionS3.cpp
+++ b/src/TableFunctions/TableFunctionS3.cpp
@@ -31,9 +31,15 @@ namespace ErrorCodes
 
 
 /// This is needed to avoid copy-pase. Because s3Cluster arguments only differ in additional argument (first) - cluster name
-void TableFunctionS3::parseArgumentsImpl(
-    const String & error_message, ASTs & args, ContextPtr context, StorageS3::Configuration & s3_configuration, bool get_format_from_file)
+TableFunctionS3::ArgumentParseResult TableFunctionS3::parseArgumentsImpl(
+    const String & error_message,
+    ASTs & args,
+    ContextPtr context,
+    StorageS3::Configuration & s3_configuration,
+    bool get_format_from_file)
 {
+    ArgumentParseResult result;
+
     if (auto named_collection = tryGetNamedCollectionWithOverrides(args, context))
     {
         StorageS3::processNamedCollectionResult(s3_configuration, *named_collection);
@@ -133,10 +139,16 @@ void TableFunctionS3::parseArgumentsImpl(
         s3_configuration.url = S3::URI(checkAndGetLiteralArgument<String>(args[0], "url"));
 
         if (args_to_idx.contains("format"))
+        {
             s3_configuration.format = checkAndGetLiteralArgument<String>(args[args_to_idx["format"]], "format");
+            result.has_format_argument = true;
+        }
 
         if (args_to_idx.contains("structure"))
+        {
             s3_configuration.structure = checkAndGetLiteralArgument<String>(args[args_to_idx["structure"]], "structure");
+            result.has_structure_argument = true;
+        }
 
         if (args_to_idx.contains("compression_method"))
             s3_configuration.compression_method = checkAndGetLiteralArgument<String>(args[args_to_idx["compression_method"]], "compression_method");
@@ -155,6 +167,8 @@ void TableFunctionS3::parseArgumentsImpl(
     /// For DataLake table functions, we should specify default format.
     if (s3_configuration.format == "auto" && get_format_from_file)
         s3_configuration.format = FormatFactory::instance().getFormatFromFileName(s3_configuration.url.uri.getPath(), true);
+
+    return result;
 }
 
 void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr context)
diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h
index 859da9e9201..4fb5be9c62d 100644
--- a/src/TableFunctions/TableFunctionS3.h
+++ b/src/TableFunctions/TableFunctionS3.h
@@ -43,7 +43,14 @@ public:
     {
         return {"_path", "_file"};
     }
-    static void parseArgumentsImpl(
+
+    struct ArgumentParseResult
+    {
+        bool has_format_argument = false;
+        bool has_structure_argument = false;
+    };
+
+    static ArgumentParseResult parseArgumentsImpl(
         const String & error_message,
         ASTs & args,
         ContextPtr context,
diff --git a/src/TableFunctions/TableFunctionS3Cluster.cpp b/src/TableFunctions/TableFunctionS3Cluster.cpp
index ede0755efb0..a456994054f 100644
--- a/src/TableFunctions/TableFunctionS3Cluster.cpp
+++ b/src/TableFunctions/TableFunctionS3Cluster.cpp
@@ -45,9 +45,6 @@ void TableFunctionS3Cluster::parseArguments(const ASTPtr & ast_function, Context
 
     ASTs & args = args_func.at(0)->children;
 
-    for (auto & arg : args)
-        arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
-
     constexpr auto fmt_string = "The signature of table function {} could be the following:\n"
                                 " - cluster, url\n"
                                 " - cluster, url, format\n"
@@ -61,7 +58,10 @@ void TableFunctionS3Cluster::parseArguments(const ASTPtr & ast_function, Context
     if (args.size() < 2 || args.size() > 7)
         throw Exception::createDeprecated(message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
-    /// This arguments are always the first
+    /// evaluate only first argument, everything else will be done TableFunctionS3
+    args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args[0], context);
+
+    /// Cluster name is always the first
     configuration.cluster_name = checkAndGetLiteralArgument<String>(args[0], "cluster_name");
 
     if (!context->tryGetCluster(configuration.cluster_name))
@@ -69,11 +69,11 @@ void TableFunctionS3Cluster::parseArguments(const ASTPtr & ast_function, Context
 
     /// Just cut the first arg (cluster_name) and try to parse s3 table function arguments as is
     ASTs clipped_args;
-    clipped_args.reserve(args.size());
+    clipped_args.reserve(args.size() - 1);
     std::copy(args.begin() + 1, args.end(), std::back_inserter(clipped_args));
 
     /// StorageS3ClusterConfiguration inherints from StorageS3::Configuration, so it is safe to upcast it.
-    TableFunctionS3::parseArgumentsImpl(message.text, clipped_args, context, static_cast<StorageS3::Configuration &>(configuration));
+    argument_parse_result = TableFunctionS3::parseArgumentsImpl(message.text, clipped_args, context, static_cast<StorageS3::Configuration &>(configuration));
 }
 
 
@@ -94,9 +94,8 @@ StoragePtr TableFunctionS3Cluster::executeImpl(
 {
     StoragePtr storage;
     ColumnsDescription columns;
-    bool structure_argument_was_provided = configuration.structure != "auto";
 
-    if (structure_argument_was_provided)
+    if (argument_parse_result.has_structure_argument)
     {
         columns = parseColumnsListFromString(configuration.structure, context);
     }
@@ -126,7 +125,8 @@ StoragePtr TableFunctionS3Cluster::executeImpl(
             columns,
             ConstraintsDescription{},
             context,
-            structure_argument_was_provided);
+            argument_parse_result.has_structure_argument,
+            argument_parse_result.has_format_argument);
     }
 
     storage->startup();
diff --git a/src/TableFunctions/TableFunctionS3Cluster.h b/src/TableFunctions/TableFunctionS3Cluster.h
index 17df8999470..a663b24719f 100644
--- a/src/TableFunctions/TableFunctionS3Cluster.h
+++ b/src/TableFunctions/TableFunctionS3Cluster.h
@@ -5,6 +5,7 @@
 #if USE_AWS_S3
 
 #include <TableFunctions/ITableFunction.h>
+#include <TableFunctions/TableFunctionS3.h>
 #include <Storages/StorageS3Cluster.h>
 
 
@@ -52,6 +53,7 @@ protected:
 
     mutable StorageS3Cluster::Configuration configuration;
     ColumnsDescription structure_hint;
+    TableFunctionS3::ArgumentParseResult argument_parse_result;
 };
 
 }
diff --git a/tests/integration/test_s3_cluster/s3_mocks/s3_mock.py b/tests/integration/test_s3_cluster/s3_mocks/s3_mock.py
new file mode 100644
index 00000000000..a8578196653
--- /dev/null
+++ b/tests/integration/test_s3_cluster/s3_mocks/s3_mock.py
@@ -0,0 +1,25 @@
+import sys
+
+from bottle import route, run, request, response
+
+
+@route("/<_bucket>/<_path:path>")
+def server(_bucket, _path):
+    result = (
+        request.headers["MyCustomHeader"]
+        if "MyCustomHeader" in request.headers
+        else "unknown"
+    )
+    response.content_type = "text/plain"
+    response.set_header("Content-Length", len(result))
+    return result
+
+
+@route("/")
+def ping():
+    response.content_type = "text/plain"
+    response.set_header("Content-Length", 2)
+    return "OK"
+
+
+run(host="0.0.0.0", port=int(sys.argv[1]))
diff --git a/tests/integration/test_s3_cluster/test.py b/tests/integration/test_s3_cluster/test.py
index 237a81da0f5..b6bdb9368a5 100644
--- a/tests/integration/test_s3_cluster/test.py
+++ b/tests/integration/test_s3_cluster/test.py
@@ -8,6 +8,7 @@ import time
 import pytest
 from helpers.cluster import ClickHouseCluster
 from helpers.test_tools import TSV
+from helpers.mock_servers import start_mock_servers
 
 logging.getLogger().setLevel(logging.INFO)
 logging.getLogger().addHandler(logging.StreamHandler())
@@ -49,6 +50,17 @@ def create_buckets_s3(cluster):
         print(obj.object_name)
 
 
+def run_s3_mocks(started_cluster):
+    script_dir = os.path.join(os.path.dirname(__file__), "s3_mocks")
+    start_mock_servers(
+        started_cluster,
+        script_dir,
+        [
+            ("s3_mock.py", "resolver", "8080"),
+        ],
+    )
+
+
 @pytest.fixture(scope="module")
 def started_cluster():
     try:
@@ -79,6 +91,8 @@ def started_cluster():
 
         create_buckets_s3(cluster)
 
+        run_s3_mocks(cluster)
+
         yield cluster
     finally:
         shutil.rmtree(os.path.join(SCRIPT_DIR, "data/generated/"))
@@ -364,3 +378,31 @@ def test_parallel_distributed_insert_select_with_schema_inference(started_cluste
 
     count = int(node.query("SELECT count() FROM parallel_insert_select"))
     assert count == actual_count
+
+
+def test_cluster_with_header(started_cluster):
+    node = started_cluster.instances["s0_0_0"]
+    assert (
+        node.query(
+            "SELECT * from s3('http://resolver:8080/bucket/key.csv', headers(MyCustomHeader = 'SomeValue'))"
+        )
+        == "SomeValue\n"
+    )
+    assert (
+        node.query(
+            "SELECT * from s3('http://resolver:8080/bucket/key.csv', headers(MyCustomHeader = 'SomeValue'), 'CSV')"
+        )
+        == "SomeValue\n"
+    )
+    assert (
+        node.query(
+            "SELECT * from s3Cluster('cluster_simple', 'http://resolver:8080/bucket/key.csv', headers(MyCustomHeader = 'SomeValue'))"
+        )
+        == "SomeValue\n"
+    )
+    assert (
+        node.query(
+            "SELECT * from s3Cluster('cluster_simple', 'http://resolver:8080/bucket/key.csv', headers(MyCustomHeader = 'SomeValue'), 'CSV')"
+        )
+        == "SomeValue\n"
+    )

From f58ec4fbd790705fbdcfa91ebacd8ecda306d90f Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com>
Date: Thu, 4 May 2023 12:07:53 +0200
Subject: [PATCH 27/52] Apply suggestions from code review

Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
---
 src/Processors/Sources/MongoDBSource.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Processors/Sources/MongoDBSource.cpp b/src/Processors/Sources/MongoDBSource.cpp
index e8fc01488f5..279a842143f 100644
--- a/src/Processors/Sources/MongoDBSource.cpp
+++ b/src/Processors/Sources/MongoDBSource.cpp
@@ -71,19 +71,19 @@ namespace
     void prepareMongoDBArrayInfo(
         std::unordered_map<size_t, MongoDBArrayInfo> & array_info, size_t column_idx, const DataTypePtr data_type)
     {
-        const auto * array_type = typeid_cast<const DataTypeArray *>(data_type.get());
+        const auto * array_type = assert_cast<const DataTypeArray *>(data_type.get());
         auto nested = array_type->getNestedType();
 
         size_t count_dimensions = 1;
         while (isArray(nested))
         {
             ++count_dimensions;
-            nested = typeid_cast<const DataTypeArray *>(nested.get())->getNestedType();
+            nested = assert_cast<const DataTypeArray *>(nested.get())->getNestedType();
         }
 
         Field default_value = nested->getDefault();
         if (nested->isNullable())
-            nested = static_cast<const DataTypeNullable *>(nested.get())->getNestedType();
+            nested = assert_cast<const DataTypeNullable *>(nested.get())->getNestedType();
 
         WhichDataType which(nested);
         std::function<Field(const Poco::MongoDB::Element & value, const std::string & name)> parser;

From 08772e1da7c03cf28a4c4a85d4bf5c0fe4f45508 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 4 May 2023 13:09:03 +0200
Subject: [PATCH 28/52] Review fixes

---
 src/IO/S3/Credentials.cpp | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp
index eac86a08dff..e0c7f5f67f8 100644
--- a/src/IO/S3/Credentials.cpp
+++ b/src/IO/S3/Credentials.cpp
@@ -20,16 +20,20 @@
 
 #    include <fstream>
 
-namespace DB::S3
-{
 
-namespace
+namespace DB
 {
 namespace ErrorCodes
 {
     extern const int AWS_ERROR;
 }
+}
 
+namespace DB::S3
+{
+
+namespace
+{
 bool areCredentialsEmptyOrExpired(const Aws::Auth::AWSCredentials & credentials, uint64_t expiration_window_seconds)
 {
     if (credentials.IsEmpty())
@@ -106,11 +110,12 @@ Aws::String AWSEC2MetadataClient::getDefaultCredentialsSecurely() const
         return {};
     else if (response_code != Aws::Http::HttpResponseCode::OK || new_token.empty())
     {
-        LOG_TRACE(logger, "Calling EC2MetadataService to get token failed, falling back to less secure way.");
+        LOG_TRACE(logger, "Calling EC2MetadataService to get token failed, "
+                  "falling back to less secure way. HTTP response code: {}", response_code);
         return getDefaultCredentials();
     }
 
-    token = new_token;
+    token = std::move(new_token);
     String url = endpoint + EC2_SECURITY_CREDENTIALS_RESOURCE;
     std::shared_ptr<Aws::Http::HttpRequest> profile_request(Aws::Http::CreateHttpRequest(url,
             Aws::Http::HttpMethod::HTTP_GET,
@@ -148,19 +153,21 @@ Aws::String AWSEC2MetadataClient::getCurrentAvailabilityZone() const
     auto [new_token, response_code] = getEC2MetadataToken(user_agent_string);
     if (response_code != Aws::Http::HttpResponseCode::OK || new_token.empty())
         throw DB::Exception(ErrorCodes::AWS_ERROR,
-            "Failed to token request. HTTP response code: {}", response_code);
+            "Failed to make token request. HTTP response code: {}", response_code);
 
-    token = new_token;
-    String url = endpoint + EC2_AVAILABILITY_ZONE_RESOURCE;
+    token = std::move(new_token);
+    const String url = endpoint + EC2_AVAILABILITY_ZONE_RESOURCE;
     std::shared_ptr<Aws::Http::HttpRequest> profile_request(
         Aws::Http::CreateHttpRequest(url, Aws::Http::HttpMethod::HTTP_GET, Aws::Utils::Stream::DefaultResponseStreamFactoryMethod));
 
     profile_request->SetHeaderValue(EC2_IMDS_TOKEN_HEADER, token);
     profile_request->SetUserAgent(user_agent_string);
+
     const auto result = GetResourceWithAWSWebServiceResult(profile_request);
     if (result.GetResponseCode() != Aws::Http::HttpResponseCode::OK)
         throw DB::Exception(ErrorCodes::AWS_ERROR,
             "Failed to get availability zone. HTTP response code: {}", result.GetResponseCode());
+
     return Aws::Utils::StringUtils::Trim(result.GetPayload().c_str());
 }
 
@@ -178,14 +185,9 @@ std::pair<Aws::String, Aws::Http::HttpResponseCode> AWSEC2MetadataClient::getEC2
     token_request->SetUserAgent(user_agent_string);
 
     LOG_TRACE(logger, "Calling EC2MetadataService to get token.");
-    auto result = GetResourceWithAWSWebServiceResult(token_request);
-
+    const auto result = GetResourceWithAWSWebServiceResult(token_request);
     const auto & token_string = result.GetPayload();
-    const auto new_token = Aws::Utils::StringUtils::Trim(token_string.c_str());
-    const auto response_code = result.GetResponseCode();
-    if (response_code != Aws::Http::HttpResponseCode::OK || new_token.empty())
-        LOG_WARNING(logger, "Failed to make token request with result code {}", result.GetResponseCode());
-    return { new_token, response_code };
+    return { Aws::Utils::StringUtils::Trim(token_string.c_str()), result.GetResponseCode() };
 }
 
 Aws::String AWSEC2MetadataClient::getCurrentRegion() const

From 291bcc0e544e0f1acae7675500b889055d0704e5 Mon Sep 17 00:00:00 2001
From: Mike Kot <myrrc@double.cloud>
Date: Thu, 4 May 2023 12:38:50 +0000
Subject: [PATCH 29/52] allow passing password in URL

---
 programs/server/play.html | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/programs/server/play.html b/programs/server/play.html
index 323ba7d0b9e..b36e6c10d8d 100644
--- a/programs/server/play.html
+++ b/programs/server/play.html
@@ -517,11 +517,12 @@
     let previous_query = '';
 
     const current_url = new URL(window.location);
+    const opened_locally = document.location.href.startsWith("file://");
 
     const server_address = current_url.searchParams.get('url');
     if (server_address) {
         document.getElementById('url').value = server_address;
-    } else if (location.protocol != 'file:') {
+    } else if (!opened_locally) {
         /// Substitute the address of the server where the page is served.
         document.getElementById('url').value = location.origin;
     }
@@ -532,6 +533,18 @@
         document.getElementById('user').value = user_from_url;
     }
 
+    const pass_from_url = current_url.searchParams.get('password');
+    if (pass_from_url) {
+        document.getElementById('password').value = pass_from_url;
+        if (!opened_locally) {
+            let replaced_pass = current_url.searchParams;
+            replaced_pass.delete('password');
+            window.history.replaceState(null, '',
+                window.location.origin + window.location.pathname + '?'
+                + replaced_pass.toString() + window.location.hash);
+        }
+    }
+
     function postImpl(posted_request_num, query)
     {
         const user = document.getElementById('user').value;
@@ -548,7 +561,7 @@
             '&max_result_rows=1000&max_result_bytes=10000000&result_overflow_mode=break';
 
         // If play.html is opened locally, append username and password to the URL parameter to avoid CORS issue.
-        if (document.location.href.startsWith("file://")) {
+        if (opened_locally) {
             url += '&user=' + encodeURIComponent(user) +
             '&password=' + encodeURIComponent(password)
         }
@@ -557,7 +570,7 @@
 
         xhr.open('POST', url, true);
         // If play.html is open normally, use Basic auth to prevent username and password being exposed in URL parameters
-        if (!document.location.href.startsWith("file://")) {
+        if (!opened_locally) {
             xhr.setRequestHeader("Authorization", "Basic " + btoa(user+":"+password));
         }
         xhr.onreadystatechange = function()

From 33f0061f3617c413ca74860ebe17c6f177bed119 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Thu, 4 May 2023 14:44:33 +0000
Subject: [PATCH 30/52] fix test

---
 .../queries/0_stateless/02724_limit_num_mutations.reference | 6 +++---
 tests/queries/0_stateless/02724_limit_num_mutations.sh      | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/02724_limit_num_mutations.reference b/tests/queries/0_stateless/02724_limit_num_mutations.reference
index ecd1ce23ca2..5742648c79d 100644
--- a/tests/queries/0_stateless/02724_limit_num_mutations.reference
+++ b/tests/queries/0_stateless/02724_limit_num_mutations.reference
@@ -1,9 +1,9 @@
 1	2
 2
-CREATE TABLE default.t_limit_mutations\n(\n    `id` UInt64,\n    `v` UInt64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/t_limit_mutations/\', \'1\')\nORDER BY id\nSETTINGS number_of_mutations_to_throw = 2, index_granularity = 8192
+CREATE TABLE default.t_limit_mutations\n(\n    `id` UInt64,\n    `v` UInt64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/t_limit_mutations\', \'1\')\nORDER BY id\nSETTINGS number_of_mutations_to_throw = 2, index_granularity = 8192
 1	2
 4
-CREATE TABLE default.t_limit_mutations\n(\n    `id` UInt64,\n    `v` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/t_limit_mutations/\', \'1\')\nORDER BY id\nSETTINGS number_of_mutations_to_throw = 2, index_granularity = 8192
+CREATE TABLE default.t_limit_mutations\n(\n    `id` UInt64,\n    `v` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/t_limit_mutations\', \'1\')\nORDER BY id\nSETTINGS number_of_mutations_to_throw = 2, index_granularity = 8192
 1	6
 0
-CREATE TABLE default.t_limit_mutations\n(\n    `id` UInt64,\n    `v` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/t_limit_mutations/\', \'1\')\nORDER BY id\nSETTINGS number_of_mutations_to_throw = 2, index_granularity = 8192
+CREATE TABLE default.t_limit_mutations\n(\n    `id` UInt64,\n    `v` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/t_limit_mutations\', \'1\')\nORDER BY id\nSETTINGS number_of_mutations_to_throw = 2, index_granularity = 8192
diff --git a/tests/queries/0_stateless/02724_limit_num_mutations.sh b/tests/queries/0_stateless/02724_limit_num_mutations.sh
index 98bfdbbb551..ae56411c0aa 100755
--- a/tests/queries/0_stateless/02724_limit_num_mutations.sh
+++ b/tests/queries/0_stateless/02724_limit_num_mutations.sh
@@ -24,7 +24,7 @@ ${CLICKHOUSE_CLIENT} -n --query "
 DROP TABLE IF EXISTS t_limit_mutations SYNC;
 
 CREATE TABLE t_limit_mutations (id UInt64, v UInt64)
-ENGINE = ReplicatedMergeTree('/clickhouse/tables/t_limit_mutations/', '1') ORDER BY id
+ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/t_limit_mutations', '1') ORDER BY id
 SETTINGS number_of_mutations_to_throw = 2;
 
 SET mutations_sync = 0;

From 8873856ce5b06c90f73c583183c5598725c2b5a0 Mon Sep 17 00:00:00 2001
From: Ivan Takarlikov <ivan.takarlikov@sensortower.com>
Date: Thu, 4 May 2023 13:35:18 -0300
Subject: [PATCH 31/52] Fix some grammar mistakes in documentation, code and
 tests

---
 .../consistent-hashing/consistent_hashing.cpp |  2 +-
 .../mergetree-family/graphitemergetree.md     |  2 +-
 .../mergetree-family/mergetree.md             |  2 +-
 .../table-engines/special/distributed.md      |  2 +-
 .../table-engines/special/executable.md       |  2 +-
 docs/en/engines/table-engines/special/url.md  |  2 +-
 .../example-datasets/cell-towers.md           |  2 +-
 .../example-datasets/covid19.md               |  2 +-
 .../settings.md                               |  2 +-
 docs/en/operations/settings/settings.md       | 10 +++----
 .../operations/utilities/clickhouse-local.md  |  2 +-
 .../reference/quantiledeterministic.md        |  2 +-
 docs/en/sql-reference/dictionaries/index.md   |  2 +-
 .../sql-reference/functions/bit-functions.md  |  2 +-
 .../functions/bitmap-functions.md             |  2 +-
 .../functions/comparison-functions.md         |  2 +-
 .../functions/date-time-functions.md          |  2 +-
 docs/en/sql-reference/functions/geo/s2.md     |  4 +--
 .../sql-reference/functions/hash-functions.md |  2 +-
 .../functions/logical-functions.md            |  4 +--
 .../sql-reference/functions/nlp-functions.md  |  2 +-
 .../functions/other-functions.md              |  4 +--
 .../functions/random-functions.md             |  4 +--
 .../functions/string-functions.md             |  2 +-
 .../functions/tuple-functions.md              |  4 +--
 .../functions/tuple-map-functions.md          |  2 +-
 .../sql-reference/statements/create/table.md  |  2 +-
 docs/en/sql-reference/statements/optimize.md  |  2 +-
 .../table-functions/hdfsCluster.md            |  2 +-
 .../sql-reference/table-functions/remote.md   |  2 +-
 .../table-functions/s3Cluster.md              |  2 +-
 .../sql-reference/window-functions/index.md   |  2 +-
 .../functions/encryption-functions.md         |  4 +--
 .../settings.md                               |  2 +-
 src/Common/RWLock.h                           |  2 +-
 src/Common/isLocalAddress.cpp                 |  2 +-
 src/Interpreters/ClusterDiscovery.cpp         |  8 +++---
 .../QueryPlan/ReadFromMergeTree.cpp           |  4 +--
 .../MergeTree/MergeTreeBaseSelectProcessor.h  |  2 +-
 src/Storages/MergeTree/MergeTreeReadPool.cpp  |  2 +-
 src/TableFunctions/TableFunctionHDFSCluster.h |  2 +-
 src/TableFunctions/TableFunctionS3Cluster.h   |  2 +-
 tests/integration/test_s3_cluster/test.py     | 26 +++++++++----------
 .../test_storage_postgresql_replica/test.py   |  2 +-
 ...8_projection_with_group_by_alter.reference |  2 +-
 .../02478_projection_with_group_by_alter.sql  |  2 +-
 .../00174_distinct_in_order.reference         |  4 +--
 .../1_stateful/00174_distinct_in_order.sql    |  4 +--
 tests/sqllogic/self-test/test.test            |  2 +-
 49 files changed, 77 insertions(+), 77 deletions(-)

diff --git a/contrib/consistent-hashing/consistent_hashing.cpp b/contrib/consistent-hashing/consistent_hashing.cpp
index 347456eede3..c21e57bdaef 100644
--- a/contrib/consistent-hashing/consistent_hashing.cpp
+++ b/contrib/consistent-hashing/consistent_hashing.cpp
@@ -8,7 +8,7 @@
 
 /*
  * (all numbers are written in big-endian manner: the least significant digit on the right)
- * (only bit representations are used - no hex or octal, leading zeroes are ommited)
+ * (only bit representations are used - no hex or octal, leading zeroes are omitted)
  *
  * Consistent hashing scheme:
  *
diff --git a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md
index 03e8be13474..c96e40d127c 100644
--- a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md
@@ -148,7 +148,7 @@ Valid values:
     - `all` (default) - a universal rule, used when `rule_type` is omitted.
     - `plain` - a rule for plain metrics. The field `regexp` is processed as regular expression.
     - `tagged` - a rule for tagged metrics (metrics are stored in DB in the format of `someName?tag1=value1&tag2=value2&tag3=value3`). Regular expression must be sorted by tags' names, first tag must be `__name__` if exists. The field `regexp` is processed as regular expression.
-    - `tag_list` - a rule for tagged matrics, a simple DSL for easier metric description in graphite format `someName;tag1=value1;tag2=value2`, `someName`, or `tag1=value1;tag2=value2`. The field `regexp` is translated into a `tagged` rule. The sorting by tags' names is unnecessary, ti will be done automatically. A tag's value (but not a name) can be set as a regular expression, e.g. `env=(dev|staging)`.
+    - `tag_list` - a rule for tagged metrics, a simple DSL for easier metric description in graphite format `someName;tag1=value1;tag2=value2`, `someName`, or `tag1=value1;tag2=value2`. The field `regexp` is translated into a `tagged` rule. The sorting by tags' names is unnecessary, ti will be done automatically. A tag's value (but not a name) can be set as a regular expression, e.g. `env=(dev|staging)`.
 - `regexp` – A pattern for the metric name (a regular or DSL).
 - `age` – The minimum age of the data in seconds.
 - `precision`– How precisely to define the age of the data in seconds. Should be a divisor for 86400 (seconds in a day).
diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index 4044087256c..70e57acd548 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -727,7 +727,7 @@ TTL d + INTERVAL 1 MONTH RECOMPRESS CODEC(ZSTD(17)), d + INTERVAL 1 YEAR RECOMPR
 SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0;
 ```
 
-Creating a table, where expired rows are aggregated. In result rows `x` contains the maximum value accross the grouped rows, `y` — the minimum value, and `d` — any occasional value from grouped rows.
+Creating a table, where expired rows are aggregated. In result rows `x` contains the maximum value across the grouped rows, `y` — the minimum value, and `d` — any occasional value from grouped rows.
 
 ``` sql
 CREATE TABLE table_for_aggregation
diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md
index bb97e56c1cc..43b8e387ba2 100644
--- a/docs/en/engines/table-engines/special/distributed.md
+++ b/docs/en/engines/table-engines/special/distributed.md
@@ -242,7 +242,7 @@ When querying a `Distributed` table, `SELECT` queries are sent to all shards and
 
 When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max_parallel_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas).
 
-To learn more about how distibuted `in` and `global in` queries are processed, refer to [this](../../../sql-reference/operators/in.md#select-distributed-subqueries) documentation.
+To learn more about how distributed `in` and `global in` queries are processed, refer to [this](../../../sql-reference/operators/in.md#select-distributed-subqueries) documentation.
 
 ## Virtual Columns {#virtual-columns}
 
diff --git a/docs/en/engines/table-engines/special/executable.md b/docs/en/engines/table-engines/special/executable.md
index 5d01762f61b..25049d7b46e 100644
--- a/docs/en/engines/table-engines/special/executable.md
+++ b/docs/en/engines/table-engines/special/executable.md
@@ -120,7 +120,7 @@ Some comments about the `sentiment` table:
 - The `TabSeparated` format means our Python script needs to generate rows of raw data that contain tab-separated values
 - The query selects two columns from `hackernews`. The Python script will need to parse out those column values from the incoming rows
 
-Here is the defintion of `sentiment.py`:
+Here is the definition of `sentiment.py`:
 
 ```python
 #!/usr/local/bin/python3.9
diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md
index c2a8d9ce8bf..a4530767e11 100644
--- a/docs/en/engines/table-engines/special/url.md
+++ b/docs/en/engines/table-engines/special/url.md
@@ -14,7 +14,7 @@ Syntax: `URL(URL [,Format] [,CompressionMethod])`
 
 - The `Format` must be one that ClickHouse can use in `SELECT` queries and, if necessary, in `INSERTs`. For the full list of supported formats, see [Formats](../../../interfaces/formats.md#formats).
 
-    If this argument is not specified, ClickHouse detectes the format automatically from the suffix of the `URL` parameter. If the suffix of `URL` parameter does not match any supported formats, it fails to create table. For example, for engine expression `URL('http://localhost/test.json')`, `JSON` format is applied.
+    If this argument is not specified, ClickHouse detects the format automatically from the suffix of the `URL` parameter. If the suffix of `URL` parameter does not match any supported formats, it fails to create table. For example, for engine expression `URL('http://localhost/test.json')`, `JSON` format is applied.
 
 - `CompressionMethod` indicates that whether the HTTP body should be compressed. If the compression is enabled, the HTTP packets sent by the URL engine contain 'Content-Encoding' header to indicate which compression method is used.
 
diff --git a/docs/en/getting-started/example-datasets/cell-towers.md b/docs/en/getting-started/example-datasets/cell-towers.md
index d88ce5159d4..048eecb285b 100644
--- a/docs/en/getting-started/example-datasets/cell-towers.md
+++ b/docs/en/getting-started/example-datasets/cell-towers.md
@@ -308,7 +308,7 @@ To build a Superset dashboard using the OpenCelliD dataset you should:
   ![Choose clickhouse connect as database type](@site/docs/en/getting-started/example-datasets/images/superset-choose-a-database.png)
 
 :::note
-  If **ClickHouse Connect** is not one of your options, then you will need to install it.  The comand is `pip install clickhouse-connect`, and more info is [available here](https://pypi.org/project/clickhouse-connect/).
+  If **ClickHouse Connect** is not one of your options, then you will need to install it. The command is `pip install clickhouse-connect`, and more info is [available here](https://pypi.org/project/clickhouse-connect/).
 :::
 
 #### Add your connection details:
diff --git a/docs/en/getting-started/example-datasets/covid19.md b/docs/en/getting-started/example-datasets/covid19.md
index 9482e8870d2..3a7fae89ae0 100644
--- a/docs/en/getting-started/example-datasets/covid19.md
+++ b/docs/en/getting-started/example-datasets/covid19.md
@@ -261,5 +261,5 @@ The results look like
 ```
 
 :::note
-As mentioned in the [GitHub repo](https://github.com/GoogleCloudPlatform/covid-19-open-data), the datset is no longer updated as of September 15, 2022.
+As mentioned in the [GitHub repo](https://github.com/GoogleCloudPlatform/covid-19-open-data), the dataset is no longer updated as of September 15, 2022.
 :::
\ No newline at end of file
diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 267f37fd075..3e3cd89a9e0 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -208,7 +208,7 @@ Default value: `3600` (1 hour).
 ## database_catalog_unused_dir_rm_timeout_sec {#database_catalog_unused_dir_rm_timeout_sec}
 
 Parameter of a task that cleans up garbage from `store/` directory.
-If some subdirectory is not used by clickhouse-server and it was previousely "hidden"
+If some subdirectory is not used by clickhouse-server and it was previously "hidden"
 (see [database_catalog_unused_dir_hide_timeout_sec](../../operations/server-configuration-parameters/settings.md#database_catalog_unused_dir_hide_timeout_sec))
 and this directory was not modified for last
 `database_catalog_unused_dir_rm_timeout_sec` seconds, the task will remove this directory.
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index c6fdcf317c3..39bf111bef6 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -1027,7 +1027,7 @@ Timeout to close idle TCP connections after specified number of seconds.
 
 Possible values:
 
-- Positive integer (0 - close immediatly, after 0 seconds).
+- Positive integer (0 - close immediately, after 0 seconds).
 
 Default value: 3600.
 
@@ -1733,7 +1733,7 @@ Possible values:
 
 Default value: 1.
 
-By default, async inserts are inserted into replicated tables by the `INSERT` statement enabling [async_isnert](#async-insert) are deduplicated (see [Data Replication](../../engines/table-engines/mergetree-family/replication.md)).
+By default, async inserts are inserted into replicated tables by the `INSERT` statement enabling [async_insert](#async-insert) are deduplicated (see [Data Replication](../../engines/table-engines/mergetree-family/replication.md)).
 For the replicated tables, by default, only 10000 of the most recent inserts for each partition are deduplicated (see [replicated_deduplication_window_for_async_inserts](merge-tree-settings.md/#replicated-deduplication-window-async-inserts), [replicated_deduplication_window_seconds_for_async_inserts](merge-tree-settings.md/#replicated-deduplication-window-seconds-async-inserts)).
 We recommend enabling the [async_block_ids_cache](merge-tree-settings.md/#use-async-block-ids-cache) to increase the efficiency of deduplication.
 This function does not work for non-replicated tables.
@@ -1939,8 +1939,8 @@ Do not merge aggregation states from different servers for distributed query pro
 Possible values:
 
 - `0` — Disabled (final query processing is done on the initiator node).
-- `1` - Do not merge aggregation states from different servers for distributed query processing (query completelly processed on the shard, initiator only proxy the data), can be used in case it is for certain that there are different keys on different shards.
-- `2` - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possible when the query processed completelly on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`).
+- `1` - Do not merge aggregation states from different servers for distributed query processing (query completely processed on the shard, initiator only proxy the data), can be used in case it is for certain that there are different keys on different shards.
+- `2` - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possible when the query processed completely on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`).
 
 Default value: `0`
 
@@ -4110,7 +4110,7 @@ Enabled by default.
 
 ## use_hedged_requests {#use_hedged_requests}
 
-Enables hadged requests logic for remote queries. It allows to establish many connections with different replicas for query.
+Enables hedged requests logic for remote queries. It allows to establish many connections with different replicas for query.
 New connection is enabled in case existent connection(s) with replica(s) were not established within `hedged_connection_timeout`
 or no data was received within `receive_data_timeout`. Query uses the first connection which send non empty progress packet (or data packet, if `allow_changing_replica_until_first_data_packet`);
 other connections are cancelled. Queries with `max_parallel_replicas > 1` are supported.
diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md
index a7ecadf19fa..c5aea2b98a0 100644
--- a/docs/en/operations/utilities/clickhouse-local.md
+++ b/docs/en/operations/utilities/clickhouse-local.md
@@ -183,7 +183,7 @@ Arguments:
 - `-S`, `--structure` — table structure for input data.
 - `--input-format` — input format, `TSV` by default.
 - `-f`, `--file` — path to data, `stdin` by default.
-- `-q`, `--query` — queries to execute with `;` as delimeter. You must specify either `query` or `queries-file` option.
+- `-q`, `--query` — queries to execute with `;` as delimiter. You must specify either `query` or `queries-file` option.
 - `--queries-file` - file path with queries to execute. You must specify either `query` or `queries-file` option.
 - `-N`, `--table` — table name where to put output data, `table` by default.
 - `--format`, `--output-format` — output format, `TSV` by default.
diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md
index 446d287f0d2..7235c47da70 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md
@@ -23,7 +23,7 @@ Alias: `medianDeterministic`.
 
 - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
 - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
-- `determinator` — Number whose hash is used instead of a random number generator in the reservoir sampling algorithm to make the result of sampling deterministic. As a determinator you can use any deterministic positive number, for example, a user id or an event id. If the same determinator value occures too often, the function works incorrectly.
+- `determinator` — Number whose hash is used instead of a random number generator in the reservoir sampling algorithm to make the result of sampling deterministic. As a determinator you can use any deterministic positive number, for example, a user id or an event id. If the same determinator value occurs too often, the function works incorrectly.
 
 **Returned value**
 
diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md
index 189673cdae7..74ab7e3c948 100644
--- a/docs/en/sql-reference/dictionaries/index.md
+++ b/docs/en/sql-reference/dictionaries/index.md
@@ -949,7 +949,7 @@ SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source wher
 ...
 ```
 
-For `Cache`, `ComplexKeyCache`, `SSDCache`, and `SSDComplexKeyCache` dictionaries both synchronious and asynchronious updates are supported.
+For `Cache`, `ComplexKeyCache`, `SSDCache`, and `SSDComplexKeyCache` dictionaries both synchronious and asynchronous updates are supported.
 
 It is also possible for `Flat`, `Hashed`, `ComplexKeyHashed` dictionaries to only request data that was changed after the previous update. If `update_field` is specified as part of the dictionary source configuration, value of the previous update time in seconds will be added to the data request. Depends on source type (Executable, HTTP, MySQL, PostgreSQL, ClickHouse, or ODBC) different logic will be applied to `update_field` before request data from an external source.
 
diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md
index e754aa297c0..5b342fe4f24 100644
--- a/docs/en/sql-reference/functions/bit-functions.md
+++ b/docs/en/sql-reference/functions/bit-functions.md
@@ -314,7 +314,7 @@ SELECT bitTestAny(number, index1, index2, index3, index4, ...)
 
 **Returned values**
 
-Returns result of logical disjuction.
+Returns result of logical disjunction.
 
 Type: `UInt8`.
 
diff --git a/docs/en/sql-reference/functions/bitmap-functions.md b/docs/en/sql-reference/functions/bitmap-functions.md
index 1a175d5ffbc..9b66d00656b 100644
--- a/docs/en/sql-reference/functions/bitmap-functions.md
+++ b/docs/en/sql-reference/functions/bitmap-functions.md
@@ -256,7 +256,7 @@ Result:
 
 ## bitmapCardinality
 
-Rerturn the cardinality of a bitmap.
+Returns the cardinality of a bitmap.
 
 **Syntax**
 
diff --git a/docs/en/sql-reference/functions/comparison-functions.md b/docs/en/sql-reference/functions/comparison-functions.md
index b2c8f41e737..89e0f3a6f04 100644
--- a/docs/en/sql-reference/functions/comparison-functions.md
+++ b/docs/en/sql-reference/functions/comparison-functions.md
@@ -14,7 +14,7 @@ The following types can be compared:
 - dates
 - dates with times
 
-Only values within the same group can be compared (e.g. UInt16 and UInt64) but not accross groups (e.g. UInt16 and DateTime).
+Only values within the same group can be compared (e.g. UInt16 and UInt64) but not across groups (e.g. UInt16 and DateTime).
 
 Strings are compared byte-by-byte. Note that this may lead to unexpected results if one of the strings contains UTF-8 encoded multi-byte characters.
 
diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 15644b54c2b..0e899045c7c 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -289,7 +289,7 @@ Aliases: `DAYOFMONTH`, `DAY`.
 
 Converts a date or date with time to the number of the day in the week as UInt8 value.
 
-The two-argument form of `toDayOfWeek()` enables you to specify whether the week starts on Monday or Sunday, and whether the return value should be in the range from 0 to 6 or 1 to 7. If the mode argument is ommited, the default mode is 0. The time zone of the date can be specified as the third argument.
+The two-argument form of `toDayOfWeek()` enables you to specify whether the week starts on Monday or Sunday, and whether the return value should be in the range from 0 to 6 or 1 to 7. If the mode argument is omitted, the default mode is 0. The time zone of the date can be specified as the third argument.
 
 | Mode | First day of week | Range                                          |
 |------|-------------------|------------------------------------------------|
diff --git a/docs/en/sql-reference/functions/geo/s2.md b/docs/en/sql-reference/functions/geo/s2.md
index 24a9ac53f4d..63fe5ca8530 100644
--- a/docs/en/sql-reference/functions/geo/s2.md
+++ b/docs/en/sql-reference/functions/geo/s2.md
@@ -84,7 +84,7 @@ Result:
 
 ## s2GetNeighbors
 
-Returns S2 neighbor indixes corresponding to the provided [S2](#s2index). Each cell in the S2 system is a quadrilateral bounded by four geodesics. So, each cell has 4 neighbors. 
+Returns S2 neighbor indexes corresponding to the provided [S2](#s2index). Each cell in the S2 system is a quadrilateral bounded by four geodesics. So, each cell has 4 neighbors.
 
 **Syntax**
 
@@ -206,7 +206,7 @@ s2CapUnion(center1, radius1, center2, radius2)
 
 **Arguments**
 
-- `center1`, `center2` — S2 point indixes corresponding to the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `center1`, `center2` — S2 point indexes corresponding to the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md).
 - `radius1`, `radius2` — Radius of the two input caps in degrees. [Float64](../../../sql-reference/data-types/float.md).
 
 **Returned values**
diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md
index 1f471e99255..fe842732b89 100644
--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@@ -64,7 +64,7 @@ This is a cryptographic hash function. It works at least three times faster than
 The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm:
 
 1.  The first and the second hash value are concatenated to an array which is hashed.
-2.  The previously calculated hash value and the hash of the third input paramter are hashed in a similar way.
+2.  The previously calculated hash value and the hash of the third input parameter are hashed in a similar way.
 3.  This calculation is repeated for all remaining hash values of the original input.
 
 **Arguments**
diff --git a/docs/en/sql-reference/functions/logical-functions.md b/docs/en/sql-reference/functions/logical-functions.md
index 1c45994605a..f5a1a6aac12 100644
--- a/docs/en/sql-reference/functions/logical-functions.md
+++ b/docs/en/sql-reference/functions/logical-functions.md
@@ -84,7 +84,7 @@ Alias: The [OR Operator](../../sql-reference/operators/index.md#logical-or-opera
 
 **Returned value**
 
-- `1`, if at least one argument evalutes to `true`,
+- `1`, if at least one argument evaluates to `true`,
 - `0`, if all arguments evaluate to `false`,
 - `NULL`, if all arguments evaluate to `false` and at least one argument is `NULL`.
 
@@ -173,7 +173,7 @@ xor(val1, val2...)
 **Returned value**
 
 - `1`, for two values: if one of the values evaluates to `false` and other does not,
-- `0`, for two values: if both values evalute to `false` or to both `true`,
+- `0`, for two values: if both values evaluate to `false` or to both `true`,
 - `NULL`, if at least one of the inputs is `NULL`
 
 Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
diff --git a/docs/en/sql-reference/functions/nlp-functions.md b/docs/en/sql-reference/functions/nlp-functions.md
index 41773dc1a0d..337fb19b244 100644
--- a/docs/en/sql-reference/functions/nlp-functions.md
+++ b/docs/en/sql-reference/functions/nlp-functions.md
@@ -187,7 +187,7 @@ detectLanguageMixed('text_to_be_analyzed')
 
 **Returned value**
 
-- `Map(String, Float32)`: The keys are 2-letter ISO codes and the values are a perentage of text found for that language
+- `Map(String, Float32)`: The keys are 2-letter ISO codes and the values are a percentage of text found for that language
 
 
 **Examples**
diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 600ec576339..efe1a77c285 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -306,7 +306,7 @@ You can use this function in table engine parameters in a CREATE TABLE query whe
 
 ## currentUser()
 
-Returns the login of current user. Login of user, that initiated query, will be returned in case distibuted query.
+Returns the login of current user. Login of user, that initiated query, will be returned in case distributed query.
 
 ``` sql
 SELECT currentUser();
@@ -317,7 +317,7 @@ Alias: `user()`, `USER()`.
 **Returned values**
 
 - Login of current user.
-- Login of user that initiated query in case of disributed query.
+- Login of user that initiated query in case of distributed query.
 
 Type: `String`.
 
diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md
index 21c8ffa6e8c..e90d537fb74 100644
--- a/docs/en/sql-reference/functions/random-functions.md
+++ b/docs/en/sql-reference/functions/random-functions.md
@@ -19,13 +19,13 @@ The random numbers are generated by non-cryptographic algorithms.
 
 ## rand, rand32
 
-Returns a random UInt32 number, evenly distributed accross the range of all possible UInt32 numbers.
+Returns a random UInt32 number, evenly distributed across the range of all possible UInt32 numbers.
 
 Uses a linear congruential generator.
 
 ## rand64
 
-Returns a random UInt64 number, evenly distributed accross the range of all possible UInt64 numbers.
+Returns a random UInt64 number, evenly distributed across the range of all possible UInt64 numbers.
 
 Uses a linear congruential generator.
 
diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index c543cda5ae2..af0eff85836 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -310,7 +310,7 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b');
 
 ## repeat
 
-Conatenates a string as many times with itself as specified.
+Concatenates a string as many times with itself as specified.
 
 **Syntax**
 
diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md
index d7594e67443..1739920c9f0 100644
--- a/docs/en/sql-reference/functions/tuple-functions.md
+++ b/docs/en/sql-reference/functions/tuple-functions.md
@@ -133,7 +133,7 @@ Tuples should have the same type of the elements.
 
 - The Hamming distance.
 
-Type: The result type is calculed the same way it is for [Arithmetic functions](../../sql-reference/functions/arithmetic-functions.md), based on the number of elements in the input tuples.
+Type: The result type is calculated the same way it is for [Arithmetic functions](../../sql-reference/functions/arithmetic-functions.md), based on the number of elements in the input tuples.
 
 ``` sql
 SELECT
@@ -223,7 +223,7 @@ Result:
 └───────────────────────────────────────┘
 ```
 
-It is possible to transform colums to rows using this function:
+It is possible to transform columns to rows using this function:
 
 ``` sql
 CREATE TABLE tupletest (col Tuple(CPU Float64, Memory Float64, Disk Float64)) ENGINE = Memory;
diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md
index 786ea47f12c..8ec4a6e1444 100644
--- a/docs/en/sql-reference/functions/tuple-map-functions.md
+++ b/docs/en/sql-reference/functions/tuple-map-functions.md
@@ -449,7 +449,7 @@ mapExtractKeyLike(map, pattern)
 
 **Returned value**
 
-- A map contained elements the key of which matchs the specified pattern. If there are no elements matched the pattern, it will return an empty map.
+- A map contained elements the key of which matches the specified pattern. If there are no elements matched the pattern, it will return an empty map.
 
 **Example**
 
diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md
index f0101d39479..f8f031e1551 100644
--- a/docs/en/sql-reference/statements/create/table.md
+++ b/docs/en/sql-reference/statements/create/table.md
@@ -116,7 +116,7 @@ The column description can specify a default value expression in the form of `DE
 
 The expression `expr` is optional. If it is omitted, the column type must be specified explicitly and the default value will be `0` for numeric columns, `''` (the empty string) for string columns, `[]` (the empty array) for array columns, `1970-01-01` for date columns, or `NULL` for nullable columns.
 
-The column type of a default value column can be omitted in which case it is infered from `expr`'s type. For example the type of column `EventDate DEFAULT toDate(EventTime)` will be date.
+The column type of a default value column can be omitted in which case it is inferred from `expr`'s type. For example the type of column `EventDate DEFAULT toDate(EventTime)` will be date.
 
 If both a data type and a default value expression are specified, an implicit type casting function inserted which converts the expression to the specified type. Example: `Hits UInt32 DEFAULT 0` is internally represented as `Hits UInt32 DEFAULT toUInt32(0)`.
 
diff --git a/docs/en/sql-reference/statements/optimize.md b/docs/en/sql-reference/statements/optimize.md
index 8a7411b2594..45d336c42f2 100644
--- a/docs/en/sql-reference/statements/optimize.md
+++ b/docs/en/sql-reference/statements/optimize.md
@@ -34,7 +34,7 @@ If the `alter_sync` is set to `2` and some replicas are not active for more than
 
 ## BY expression
 
-If you want to perform deduplication on custom set of columns rather than on all, you can specify list of columns explicitly or use any combination of [`*`](../../sql-reference/statements/select/index.md#asterisk), [`COLUMNS`](../../sql-reference/statements/select/index.md#columns-expression) or [`EXCEPT`](../../sql-reference/statements/select/index.md#except-modifier) expressions. The explictly written or implicitly expanded list of columns must include all columns specified in row ordering expression (both primary and sorting keys) and partitioning expression (partitioning key).
+If you want to perform deduplication on custom set of columns rather than on all, you can specify list of columns explicitly or use any combination of [`*`](../../sql-reference/statements/select/index.md#asterisk), [`COLUMNS`](../../sql-reference/statements/select/index.md#columns-expression) or [`EXCEPT`](../../sql-reference/statements/select/index.md#except-modifier) expressions. The explicitly written or implicitly expanded list of columns must include all columns specified in row ordering expression (both primary and sorting keys) and partitioning expression (partitioning key).
 
 :::note    
 Notice that `*` behaves just like in `SELECT`: [MATERIALIZED](../../sql-reference/statements/create/table.md#materialized) and [ALIAS](../../sql-reference/statements/create/table.md#alias) columns are not used for expansion.
diff --git a/docs/en/sql-reference/table-functions/hdfsCluster.md b/docs/en/sql-reference/table-functions/hdfsCluster.md
index fa17a01accf..afd1fd28a5a 100644
--- a/docs/en/sql-reference/table-functions/hdfsCluster.md
+++ b/docs/en/sql-reference/table-functions/hdfsCluster.md
@@ -6,7 +6,7 @@ sidebar_label: hdfsCluster
 
 # hdfsCluster Table Function
 
-Allows processing files from HDFS in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterics in HDFS file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
+Allows processing files from HDFS in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterisks in HDFS file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
 
 **Syntax**
 
diff --git a/docs/en/sql-reference/table-functions/remote.md b/docs/en/sql-reference/table-functions/remote.md
index ccaf9565144..bf0abd49fc6 100644
--- a/docs/en/sql-reference/table-functions/remote.md
+++ b/docs/en/sql-reference/table-functions/remote.md
@@ -53,7 +53,7 @@ The `remote` table function can be useful in the following cases:
 - Infrequent distributed requests that are made manually.
 - Distributed requests where the set of servers is re-defined each time.
 
-### Adresses
+### Addresses
 
 ``` text
 example01-01-1
diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md
index 7ac6773672c..a1d9b9cdad4 100644
--- a/docs/en/sql-reference/table-functions/s3Cluster.md
+++ b/docs/en/sql-reference/table-functions/s3Cluster.md
@@ -5,7 +5,7 @@ sidebar_label: s3Cluster
 title: "s3Cluster Table Function"
 ---
 
-Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterics in S3 file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
+Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterisks in S3 file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
 
 **Syntax**
 
diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md
index bc0bf03e5d4..7ee2102c14d 100644
--- a/docs/en/sql-reference/window-functions/index.md
+++ b/docs/en/sql-reference/window-functions/index.md
@@ -80,7 +80,7 @@ WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
 - `PARTITION BY` - defines how to break a resultset into groups.
 - `ORDER BY` - defines how to order rows inside the group during calculation aggregate_function.
 - `ROWS or RANGE` - defines bounds of a frame, aggregate_function is calculated within a frame.
-- `WINDOW` - allows to reuse a window definition with multiple exressions.
+- `WINDOW` - allows to reuse a window definition with multiple expressions.
 
 ### Functions
 
diff --git a/docs/ru/sql-reference/functions/encryption-functions.md b/docs/ru/sql-reference/functions/encryption-functions.md
index 68a32b80e5b..711d903110c 100644
--- a/docs/ru/sql-reference/functions/encryption-functions.md
+++ b/docs/ru/sql-reference/functions/encryption-functions.md
@@ -107,7 +107,7 @@ SELECT comment, hex(secret) FROM encryption_test WHERE comment LIKE '%gcm%';
 
 ## aes_encrypt_mysql {#aes_encrypt_mysql}
 
-Совместима с шифрованием myqsl, результат может быть расшифрован функцией [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt).
+Совместима с шифрованием mysql, результат может быть расшифрован функцией [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt).
 
 При одинаковых входящих значениях зашифрованный текст будет совпадать с результатом, возвращаемым функцией `encrypt`. Однако если `key` или `iv` длиннее, чем должны быть, `aes_encrypt_mysql` будет работать аналогично функции `aes_encrypt` в MySQL: свернет ключ и проигнорирует лишнюю часть `iv`.
 
@@ -298,7 +298,7 @@ SELECT comment, decrypt('aes-256-ofb', secret, '12345678910121314151617181920212
 
 ## aes_decrypt_mysql {#aes_decrypt_mysql}
 
-Совместима с шифрованием myqsl и может расшифровать данные, зашифрованные функцией [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt).
+Совместима с шифрованием mysql и может расшифровать данные, зашифрованные функцией [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt).
 
 При одинаковых входящих значениях расшифрованный текст будет совпадать с результатом, возвращаемым функцией `decrypt`. Однако если `key` или `iv` длиннее, чем должны быть, `aes_decrypt_mysql` будет работать аналогично функции `aes_decrypt` в MySQL: свернет ключ и проигнорирует лишнюю часть `iv`.
 
diff --git a/docs/zh/operations/server-configuration-parameters/settings.md b/docs/zh/operations/server-configuration-parameters/settings.md
index 2fd04b1260a..52142eda2e8 100644
--- a/docs/zh/operations/server-configuration-parameters/settings.md
+++ b/docs/zh/operations/server-configuration-parameters/settings.md
@@ -778,7 +778,7 @@ TCP端口，用于与客户端进行安全通信。 使用它与 [OpenSSL](#serv
 
 ## zookeeper {#server-settings_zookeeper}
 
-包含允许ClickHouse与 [zookpeer](http://zookeeper.apache.org/) 集群。
+包含允许ClickHouse与 [zookeeper](http://zookeeper.apache.org/) 集群。
 
 ClickHouse使用ZooKeeper存储复制表副本的元数据。 如果未使用复制的表，则可以省略此部分参数。
 
diff --git a/src/Common/RWLock.h b/src/Common/RWLock.h
index dd965b65026..156e4297d81 100644
--- a/src/Common/RWLock.h
+++ b/src/Common/RWLock.h
@@ -37,7 +37,7 @@ using RWLock = std::shared_ptr<RWLockImpl>;
 ///
 /// NOTE: it is dangerous to acquire lock with NO_QUERY, because FastPath doesn't
 /// exist for this case and deadlock, described in previous note,
-/// may accur in case of recursive locking.
+/// may occur in case of recursive locking.
 class RWLockImpl : public std::enable_shared_from_this<RWLockImpl>
 {
 public:
diff --git a/src/Common/isLocalAddress.cpp b/src/Common/isLocalAddress.cpp
index 596fd4caad7..7569c6fc14e 100644
--- a/src/Common/isLocalAddress.cpp
+++ b/src/Common/isLocalAddress.cpp
@@ -44,7 +44,7 @@ struct NetworkInterfaces
             std::optional<Poco::Net::IPAddress> interface_address;
             switch (family)
             {
-                /// We interested only in IP-adresses
+                /// We interested only in IP-addresses
                 case AF_INET:
                 {
                     interface_address.emplace(*(iface->ifa_addr));
diff --git a/src/Interpreters/ClusterDiscovery.cpp b/src/Interpreters/ClusterDiscovery.cpp
index 610403a5262..a55e0e2a639 100644
--- a/src/Interpreters/ClusterDiscovery.cpp
+++ b/src/Interpreters/ClusterDiscovery.cpp
@@ -219,7 +219,7 @@ ClusterPtr ClusterDiscovery::makeCluster(const ClusterInfo & cluster_info)
 {
     std::vector<Strings> shards;
     {
-        std::map<size_t, Strings> replica_adresses;
+        std::map<size_t, Strings> replica_addresses;
 
         for (const auto & [_, node] : cluster_info.nodes_info)
         {
@@ -228,11 +228,11 @@ ClusterPtr ClusterDiscovery::makeCluster(const ClusterInfo & cluster_info)
                 LOG_WARNING(log, "Node '{}' in cluster '{}' has different 'secure' value, skipping it", node.address, cluster_info.name);
                 continue;
             }
-            replica_adresses[node.shard_id].emplace_back(node.address);
+            replica_addresses[node.shard_id].emplace_back(node.address);
         }
 
-        shards.reserve(replica_adresses.size());
-        for (auto & [_, replicas] : replica_adresses)
+        shards.reserve(replica_addresses.size());
+        for (auto & [_, replicas] : replica_addresses)
             shards.emplace_back(std::move(replicas));
     }
 
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 4af15350520..be6a5d33b74 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -290,7 +290,7 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas(
         .callback = read_task_callback.value(),
         .count_participating_replicas = client_info.count_participating_replicas,
         .number_of_current_replica = client_info.number_of_current_replica,
-        .colums_to_read = required_columns
+        .columns_to_read = required_columns
     };
 
     /// We have a special logic for local replica. It has to read less data, because in some cases it should
@@ -734,7 +734,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder(
             .callback = read_task_callback.value(),
             .count_participating_replicas = client_info.count_participating_replicas,
             .number_of_current_replica = client_info.number_of_current_replica,
-            .colums_to_read = column_names
+            .columns_to_read = column_names
         };
 
         auto min_marks_for_concurrent_read = info.min_marks_for_concurrent_read;
diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h
index a9ade25646d..e838a860d93 100644
--- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h
+++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h
@@ -34,7 +34,7 @@ struct ParallelReadingExtension
     /// This is needed to estimate the number of bytes
     /// between a pair of marks to perform one request
     /// over the network for a 1Gb of data.
-    Names colums_to_read;
+    Names columns_to_read;
 };
 
 /// Base class for MergeTreeThreadSelectAlgorithm and MergeTreeSelectAlgorithm
diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp
index 29be06b4e6a..c26958e12b6 100644
--- a/src/Storages/MergeTree/MergeTreeReadPool.cpp
+++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp
@@ -378,7 +378,7 @@ MergeTreeReadPoolParallelReplicas::~MergeTreeReadPoolParallelReplicas() = defaul
 
 Block MergeTreeReadPoolParallelReplicas::getHeader() const
 {
-    return storage_snapshot->getSampleBlockForColumns(extension.colums_to_read);
+    return storage_snapshot->getSampleBlockForColumns(extension.columns_to_read);
 }
 
 MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicas::getTask(size_t thread)
diff --git a/src/TableFunctions/TableFunctionHDFSCluster.h b/src/TableFunctions/TableFunctionHDFSCluster.h
index 9641b71c5e3..fff2c8ad116 100644
--- a/src/TableFunctions/TableFunctionHDFSCluster.h
+++ b/src/TableFunctions/TableFunctionHDFSCluster.h
@@ -15,7 +15,7 @@ class Context;
 /**
  * hdfsCluster(cluster, URI, format, structure, compression_method)
  * A table function, which allows to process many files from HDFS on a specific cluster
- * On initiator it creates a connection to _all_ nodes in cluster, discloses asterics
+ * On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks
  * in HDFS file path and dispatch each file dynamically.
  * On worker node it asks initiator about next task to process, processes it.
  * This is repeated until the tasks are finished.
diff --git a/src/TableFunctions/TableFunctionS3Cluster.h b/src/TableFunctions/TableFunctionS3Cluster.h
index 17df8999470..593d9b484a0 100644
--- a/src/TableFunctions/TableFunctionS3Cluster.h
+++ b/src/TableFunctions/TableFunctionS3Cluster.h
@@ -16,7 +16,7 @@ class Context;
 /**
  * s3cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure)
  * A table function, which allows to process many files from S3 on a specific cluster
- * On initiator it creates a connection to _all_ nodes in cluster, discloses asterics
+ * On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks
  * in S3 file path and dispatch each file dynamically.
  * On worker node it asks initiator about next task to process, processes it.
  * This is repeated until the tasks are finished.
diff --git a/tests/integration/test_s3_cluster/test.py b/tests/integration/test_s3_cluster/test.py
index 237a81da0f5..7cf983c178b 100644
--- a/tests/integration/test_s3_cluster/test.py
+++ b/tests/integration/test_s3_cluster/test.py
@@ -96,16 +96,16 @@ def test_select_all(started_cluster):
     ORDER BY (name, value, polygon)"""
     )
     # print(pure_s3)
-    s3_distibuted = node.query(
+    s3_distributed = node.query(
         """
     SELECT * from s3Cluster(
         'cluster_simple',
         'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV',
         'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ORDER BY (name, value, polygon)"""
     )
-    # print(s3_distibuted)
+    # print(s3_distributed)
 
-    assert TSV(pure_s3) == TSV(s3_distibuted)
+    assert TSV(pure_s3) == TSV(s3_distributed)
 
 
 def test_count(started_cluster):
@@ -118,16 +118,16 @@ def test_count(started_cluster):
         'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')"""
     )
     # print(pure_s3)
-    s3_distibuted = node.query(
+    s3_distributed = node.query(
         """
     SELECT count(*) from s3Cluster(
         'cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*',
         'minio', 'minio123', 'CSV',
         'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')"""
     )
-    # print(s3_distibuted)
+    # print(s3_distributed)
 
-    assert TSV(pure_s3) == TSV(s3_distibuted)
+    assert TSV(pure_s3) == TSV(s3_distributed)
 
 
 def test_count_macro(started_cluster):
@@ -140,17 +140,17 @@ def test_count_macro(started_cluster):
         'minio', 'minio123', 'CSV',
         'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')"""
     )
-    # print(s3_distibuted)
-    s3_distibuted = node.query(
+    # print(s3_distributed)
+    s3_distributed = node.query(
         """
     SELECT count(*) from s3Cluster(
         'cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*',
         'minio', 'minio123', 'CSV',
         'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')"""
     )
-    # print(s3_distibuted)
+    # print(s3_distributed)
 
-    assert TSV(s3_macro) == TSV(s3_distibuted)
+    assert TSV(s3_macro) == TSV(s3_distributed)
 
 
 def test_union_all(started_cluster):
@@ -173,7 +173,7 @@ def test_union_all(started_cluster):
     """
     )
     # print(pure_s3)
-    s3_distibuted = node.query(
+    s3_distributed = node.query(
         """
     SELECT * FROM
     (
@@ -190,9 +190,9 @@ def test_union_all(started_cluster):
     ORDER BY (name, value, polygon)
     """
     )
-    # print(s3_distibuted)
+    # print(s3_distributed)
 
-    assert TSV(pure_s3) == TSV(s3_distibuted)
+    assert TSV(pure_s3) == TSV(s3_distributed)
 
 
 def test_wrong_cluster(started_cluster):
diff --git a/tests/integration/test_storage_postgresql_replica/test.py b/tests/integration/test_storage_postgresql_replica/test.py
index 61a9dd5687b..66495700102 100644
--- a/tests/integration/test_storage_postgresql_replica/test.py
+++ b/tests/integration/test_storage_postgresql_replica/test.py
@@ -661,7 +661,7 @@ def test_virtual_columns(started_cluster):
         time.sleep(0.5)
         result = instance.query("SELECT count() FROM test.postgresql_replica;")
 
-    # just check that it works, no check with `expected` becuase _version is taken as LSN, which will be different each time.
+    # just check that it works, no check with `expected` because _version is taken as LSN, which will be different each time.
     result = instance.query(
         "SELECT key, value, _sign, _version FROM test.postgresql_replica;"
     )
diff --git a/tests/queries/0_stateless/02478_projection_with_group_by_alter.reference b/tests/queries/0_stateless/02478_projection_with_group_by_alter.reference
index 6ad2c8ec8db..0755be238e3 100644
--- a/tests/queries/0_stateless/02478_projection_with_group_by_alter.reference
+++ b/tests/queries/0_stateless/02478_projection_with_group_by_alter.reference
@@ -13,7 +13,7 @@ SELECT c FROM testing ORDER BY e, d;
 4
 1
 3
--- update all colums used by proj_1
+-- update all columns used by proj_1
 ALTER TABLE testing UPDATE c = c+1, d = d+2 WHERE True SETTINGS mutations_sync=2;
 SELECT * FROM system.mutations WHERE database = currentDatabase() AND table = 'testing' AND not is_done;
 SELECT c FROM testing ORDER BY d;
diff --git a/tests/queries/0_stateless/02478_projection_with_group_by_alter.sql b/tests/queries/0_stateless/02478_projection_with_group_by_alter.sql
index 9ed644fd7da..600d37956a9 100644
--- a/tests/queries/0_stateless/02478_projection_with_group_by_alter.sql
+++ b/tests/queries/0_stateless/02478_projection_with_group_by_alter.sql
@@ -25,7 +25,7 @@ OPTIMIZE TABLE testing FINAL;
 SELECT c FROM testing ORDER BY d;
 SELECT c FROM testing ORDER BY e, d;
 
--- update all colums used by proj_1
+-- update all columns used by proj_1
 ALTER TABLE testing UPDATE c = c+1, d = d+2 WHERE True SETTINGS mutations_sync=2;
 
 SELECT * FROM system.mutations WHERE database = currentDatabase() AND table = 'testing' AND not is_done;
diff --git a/tests/queries/1_stateful/00174_distinct_in_order.reference b/tests/queries/1_stateful/00174_distinct_in_order.reference
index c0b2aadc20e..084efe0092c 100644
--- a/tests/queries/1_stateful/00174_distinct_in_order.reference
+++ b/tests/queries/1_stateful/00174_distinct_in_order.reference
@@ -1,3 +1,3 @@
 -- check that distinct with and w/o optimization produce the same result
--- DISTINCT colums are the same as in ORDER BY
--- DISTINCT colums has prefix in ORDER BY columns
+-- DISTINCT columns are the same as in ORDER BY
+-- DISTINCT columns has prefix in ORDER BY columns
diff --git a/tests/queries/1_stateful/00174_distinct_in_order.sql b/tests/queries/1_stateful/00174_distinct_in_order.sql
index 08ab219c806..aac54d46181 100644
--- a/tests/queries/1_stateful/00174_distinct_in_order.sql
+++ b/tests/queries/1_stateful/00174_distinct_in_order.sql
@@ -3,7 +3,7 @@ select '-- check that distinct with and w/o optimization produce the same result
 drop table if exists distinct_in_order sync;
 drop table if exists ordinary_distinct sync;
 
-select '-- DISTINCT colums are the same as in ORDER BY';
+select '-- DISTINCT columns are the same as in ORDER BY';
 create table distinct_in_order (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate);
 insert into distinct_in_order select distinct CounterID, EventDate from test.hits order by CounterID, EventDate settings optimize_distinct_in_order=1;
 create table ordinary_distinct (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate);
@@ -13,7 +13,7 @@ select distinct * from distinct_in_order except select * from ordinary_distinct;
 drop table if exists distinct_in_order sync;
 drop table if exists ordinary_distinct sync;
 
-select '-- DISTINCT colums has prefix in ORDER BY columns';
+select '-- DISTINCT columns has prefix in ORDER BY columns';
 create table distinct_in_order (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate);
 insert into distinct_in_order select distinct CounterID, EventDate from test.hits order by CounterID settings optimize_distinct_in_order=1;
 create table ordinary_distinct (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate);
diff --git a/tests/sqllogic/self-test/test.test b/tests/sqllogic/self-test/test.test
index 1659b2b22ac..85b27ed7d60 100644
--- a/tests/sqllogic/self-test/test.test
+++ b/tests/sqllogic/self-test/test.test
@@ -85,7 +85,7 @@ SELECT 1.0, 1
 ----
 1.000 1
 
-# mess with colums count
+# mess with columns count
 query RT nosort
 SELECT 1.0
 ----

From f4b3f1eb28eb20adf6c40dc62d6f4519e4e3359e Mon Sep 17 00:00:00 2001
From: Sergei Trifonov <sergei@clickhouse.com>
Date: Thu, 4 May 2023 19:40:41 +0200
Subject: [PATCH 32/52] Fix UserTimeMicroseconds and SystemTimeMicroseconds
 descriptions

---
 src/Common/ProfileEvents.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index 166db25e14c..6b6a6f413d5 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -252,8 +252,8 @@ The server successfully detected this situation and will download merged part fr
     M(DNSError, "Total count of errors in DNS resolution") \
     \
     M(RealTimeMicroseconds, "Total (wall clock) time spent in processing (queries and other tasks) threads (note that this is a sum).") \
-    M(UserTimeMicroseconds, "Total time spent in processing (queries and other tasks) threads executing CPU instructions in user space. This include time CPU pipeline was stalled due to cache misses, branch mispredictions, hyper-threading, etc.") \
-    M(SystemTimeMicroseconds, "Total time spent in processing (queries and other tasks) threads executing CPU instructions in OS kernel space. This include time CPU pipeline was stalled due to cache misses, branch mispredictions, hyper-threading, etc.") \
+    M(UserTimeMicroseconds, "Total time spent in processing (queries and other tasks) threads executing CPU instructions in user mode. This include time CPU pipeline was stalled due to main memory access, cache misses, branch mispredictions, hyper-threading, etc.") \
+    M(SystemTimeMicroseconds, "Total time spent in processing (queries and other tasks) threads executing CPU instructions in OS kernel mode. This is time spent in syscalls, excluding waiting time during blocking syscalls.") \
     M(MemoryOvercommitWaitTimeMicroseconds, "Total time spent in waiting for memory to be freed in OvercommitTracker.") \
     M(MemoryAllocatorPurge, "Total number of times memory allocator purge was requested") \
     M(MemoryAllocatorPurgeTimeMicroseconds, "Total number of times memory allocator purge was requested") \

From f0f774e1ea3e699b0cbf0a7070744eb4b7445cc5 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Thu, 4 May 2023 21:11:18 +0000
Subject: [PATCH 33/52] fix test

---
 tests/queries/0_stateless/02724_limit_num_mutations.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/queries/0_stateless/02724_limit_num_mutations.sh b/tests/queries/0_stateless/02724_limit_num_mutations.sh
index ae56411c0aa..a9d69b2ed48 100755
--- a/tests/queries/0_stateless/02724_limit_num_mutations.sh
+++ b/tests/queries/0_stateless/02724_limit_num_mutations.sh
@@ -7,6 +7,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=./mergetree_mutations.lib
 . "$CURDIR"/mergetree_mutations.lib
 
+set -e
+
 function wait_for_alter()
 {
     type=$1
@@ -35,6 +37,8 @@ INSERT INTO t_limit_mutations VALUES (1, 2);
 ALTER TABLE t_limit_mutations UPDATE v = 3 WHERE 1;
 ALTER TABLE t_limit_mutations UPDATE v = 4 WHERE 1;
 
+SYSTEM SYNC REPLICA t_limit_mutations PULL;
+
 ALTER TABLE t_limit_mutations UPDATE v = 5 WHERE 1; -- { serverError TOO_MANY_MUTATIONS }
 ALTER TABLE t_limit_mutations MODIFY COLUMN v String; -- { serverError TOO_MANY_MUTATIONS }
 

From 4a38d5f2a99d80d3eda09af14f3d1abbeed81ab6 Mon Sep 17 00:00:00 2001
From: MeenaRenganathan22 <Meena.Renganathan@ibm.com>
Date: Thu, 4 May 2023 22:10:41 +0000
Subject: [PATCH 34/52] Added the comment

---
 tests/queries/0_stateless/01193_metadata_loading.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/01193_metadata_loading.sh b/tests/queries/0_stateless/01193_metadata_loading.sh
index 1efd86d7470..d0f3001fceb 100755
--- a/tests/queries/0_stateless/01193_metadata_loading.sh
+++ b/tests/queries/0_stateless/01193_metadata_loading.sh
@@ -13,6 +13,7 @@ threads=10
 count_multiplier=1
 max_time_ms=1000
 
+# In case of s390x, the query execution time seems to be approximately ~1.1 to ~1.2 secs. So, to match the query execution time, set max_time_ms=1500
 if [[ $(uname -a | grep s390x) ]]; then
     max_time_ms=1500
 fi

From 5cf3863e7e306de9e6f98fcfa91ef67f036aa2f9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 5 May 2023 03:37:13 +0300
Subject: [PATCH 35/52] Update play.html

---
 programs/server/play.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/programs/server/play.html b/programs/server/play.html
index b36e6c10d8d..193d824d594 100644
--- a/programs/server/play.html
+++ b/programs/server/play.html
@@ -517,7 +517,7 @@
     let previous_query = '';
 
     const current_url = new URL(window.location);
-    const opened_locally = document.location.href.startsWith("file://");
+    const opened_locally = location.protocol == 'file:';
 
     const server_address = current_url.searchParams.get('url');
     if (server_address) {

From 34798f53582f226a85de7be21af5016937bd2713 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 5 May 2023 03:26:31 +0200
Subject: [PATCH 36/52] Remove garbage from HDFS

---
 .../CMake/CMakeTestCompileStrerror.c          | 10 ----
 contrib/libhdfs3-cmake/CMake/Functions.cmake  | 46 -------------------
 contrib/libhdfs3-cmake/CMake/Options.cmake    | 44 ------------------
 contrib/libhdfs3-cmake/CMake/Platform.cmake   | 42 -----------------
 contrib/libhdfs3-cmake/CMakeLists.txt         | 15 ++++--
 5 files changed, 11 insertions(+), 146 deletions(-)
 delete mode 100644 contrib/libhdfs3-cmake/CMake/CMakeTestCompileStrerror.c
 delete mode 100644 contrib/libhdfs3-cmake/CMake/Functions.cmake
 delete mode 100644 contrib/libhdfs3-cmake/CMake/Options.cmake
 delete mode 100644 contrib/libhdfs3-cmake/CMake/Platform.cmake

diff --git a/contrib/libhdfs3-cmake/CMake/CMakeTestCompileStrerror.c b/contrib/libhdfs3-cmake/CMake/CMakeTestCompileStrerror.c
deleted file mode 100644
index 0ef4eda583e..00000000000
--- a/contrib/libhdfs3-cmake/CMake/CMakeTestCompileStrerror.c
+++ /dev/null
@@ -1,10 +0,0 @@
-#include <string.h>
-
-int main()
-{
-    // We can't test "char *p = strerror_r()" because that only causes a
-    // compiler warning when strerror_r returns an integer.
-    char *buf = 0;
-    int i = strerror_r(0, buf, 100);
-    return i;
-}
diff --git a/contrib/libhdfs3-cmake/CMake/Functions.cmake b/contrib/libhdfs3-cmake/CMake/Functions.cmake
deleted file mode 100644
index a771b6043fb..00000000000
--- a/contrib/libhdfs3-cmake/CMake/Functions.cmake
+++ /dev/null
@@ -1,46 +0,0 @@
-FUNCTION(AUTO_SOURCES RETURN_VALUE PATTERN SOURCE_SUBDIRS)
-
-	IF ("${SOURCE_SUBDIRS}" STREQUAL "RECURSE")
-		SET(PATH ".")
-		IF (${ARGC} EQUAL 4)
-			LIST(GET ARGV 3 PATH)
-		ENDIF ()
-	ENDIF()
-
-	IF ("${SOURCE_SUBDIRS}" STREQUAL "RECURSE")
-		UNSET(${RETURN_VALUE})
-		FILE(GLOB SUBDIR_FILES "${PATH}/${PATTERN}")
-		LIST(APPEND ${RETURN_VALUE} ${SUBDIR_FILES})
-
-		FILE(GLOB SUBDIRS RELATIVE ${PATH} ${PATH}/*)
-
-		FOREACH(DIR ${SUBDIRS})
-			IF (IS_DIRECTORY ${PATH}/${DIR})
-				IF (NOT "${DIR}" STREQUAL "CMAKEFILES")
-					FILE(GLOB_RECURSE SUBDIR_FILES "${PATH}/${DIR}/${PATTERN}")
-					LIST(APPEND ${RETURN_VALUE} ${SUBDIR_FILES})
-				ENDIF()
-			ENDIF()
-		ENDFOREACH()
-	ELSE ()
-		FILE(GLOB ${RETURN_VALUE} "${PATTERN}")
-
-		FOREACH (PATH ${SOURCE_SUBDIRS})
-			FILE(GLOB SUBDIR_FILES "${PATH}/${PATTERN}")
-			LIST(APPEND ${RETURN_VALUE} ${SUBDIR_FILES})
-		ENDFOREACH(PATH ${SOURCE_SUBDIRS})
-	ENDIF ()
-
-	IF (${FILTER_OUT})
-		LIST(REMOVE_ITEM ${RETURN_VALUE} ${FILTER_OUT})
-	ENDIF()
-
-	SET(${RETURN_VALUE} ${${RETURN_VALUE}} PARENT_SCOPE)
-ENDFUNCTION(AUTO_SOURCES)
-
-FUNCTION(CONTAINS_STRING FILE SEARCH RETURN_VALUE)
-	FILE(STRINGS ${FILE} FILE_CONTENTS REGEX ".*${SEARCH}.*")
-	IF (FILE_CONTENTS)
-		SET(${RETURN_VALUE} TRUE PARENT_SCOPE)
-	ENDIF()
-ENDFUNCTION(CONTAINS_STRING)
diff --git a/contrib/libhdfs3-cmake/CMake/Options.cmake b/contrib/libhdfs3-cmake/CMake/Options.cmake
deleted file mode 100644
index 933b24fb9b5..00000000000
--- a/contrib/libhdfs3-cmake/CMake/Options.cmake
+++ /dev/null
@@ -1,44 +0,0 @@
-OPTION(ENABLE_SSE "enable SSE4.2 builtin function" ON)
-
-INCLUDE (CheckFunctionExists)
-CHECK_FUNCTION_EXISTS(dladdr HAVE_DLADDR)
-CHECK_FUNCTION_EXISTS(nanosleep HAVE_NANOSLEEP)
-
-SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing")
-SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-strict-aliasing")
-
-IF(ENABLE_SSE STREQUAL ON AND ARCH_AMD64)
-    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2")
-ENDIF()
-
-IF(NOT TEST_HDFS_PREFIX)
-SET(TEST_HDFS_PREFIX "./" CACHE STRING "default directory prefix used for test." FORCE)
-ENDIF(NOT TEST_HDFS_PREFIX)
-
-ADD_DEFINITIONS(-DTEST_HDFS_PREFIX="${TEST_HDFS_PREFIX}")
-ADD_DEFINITIONS(-D__STDC_FORMAT_MACROS)
-ADD_DEFINITIONS(-D_GNU_SOURCE)
-ADD_DEFINITIONS(-D_GLIBCXX_USE_NANOSLEEP)
-
-TRY_COMPILE(STRERROR_R_RETURN_INT
-    ${CMAKE_CURRENT_BINARY_DIR}
-    "${CMAKE_CURRENT_SOURCE_DIR}/CMake/CMakeTestCompileStrerror.c"
-    CMAKE_FLAGS "-DCMAKE_CXX_LINK_EXECUTABLE='echo not linking now...'"
-    OUTPUT_VARIABLE OUTPUT)
-
-MESSAGE(STATUS "Checking whether strerror_r returns an int")
-
-IF(STRERROR_R_RETURN_INT)
-    MESSAGE(STATUS "Checking whether strerror_r returns an int -- yes")
-ELSE(STRERROR_R_RETURN_INT)
-    MESSAGE(STATUS "Checking whether strerror_r returns an int -- no")
-ENDIF(STRERROR_R_RETURN_INT)
-
-set(HAVE_STEADY_CLOCK 1)
-set(HAVE_NESTED_EXCEPTION 1)
-
-SET(HAVE_BOOST_CHRONO 0)
-SET(HAVE_BOOST_ATOMIC 0)
-
-SET(HAVE_STD_CHRONO 1)
-SET(HAVE_STD_ATOMIC 1)
diff --git a/contrib/libhdfs3-cmake/CMake/Platform.cmake b/contrib/libhdfs3-cmake/CMake/Platform.cmake
deleted file mode 100644
index fec1d974519..00000000000
--- a/contrib/libhdfs3-cmake/CMake/Platform.cmake
+++ /dev/null
@@ -1,42 +0,0 @@
-IF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
-    SET(OS_LINUX true CACHE INTERNAL "Linux operating system")
-ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
-    SET(OS_MACOSX true CACHE INTERNAL "Mac Darwin operating system")
-ELSE(CMAKE_SYSTEM_NAME STREQUAL "Linux")
-    MESSAGE(FATAL_ERROR "Unsupported OS: \"${CMAKE_SYSTEM_NAME}\"")
-ENDIF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
-
-IF(CMAKE_COMPILER_IS_GNUCXX)
-    EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} -dumpfullversion OUTPUT_VARIABLE GCC_COMPILER_VERSION)
-    
-    IF (NOT GCC_COMPILER_VERSION)
-        EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_COMPILER_VERSION)
-
-        IF (NOT GCC_COMPILER_VERSION)
-            MESSAGE(FATAL_ERROR "Cannot get gcc version")
-        ENDIF (NOT GCC_COMPILER_VERSION)
-    ENDIF (NOT GCC_COMPILER_VERSION)
-    
-    STRING(REGEX MATCHALL "[0-9]+" GCC_COMPILER_VERSION ${GCC_COMPILER_VERSION})
-    
-    LIST(LENGTH GCC_COMPILER_VERSION GCC_COMPILER_VERSION_LENGTH)
-    LIST(GET GCC_COMPILER_VERSION 0 GCC_COMPILER_VERSION_MAJOR)
-    if (GCC_COMPILER_VERSION_LENGTH GREATER 1)
-        LIST(GET GCC_COMPILER_VERSION 1 GCC_COMPILER_VERSION_MINOR)
-    else ()
-        set (GCC_COMPILER_VERSION_MINOR 0)
-    endif ()
-
-    SET(GCC_COMPILER_VERSION_MAJOR ${GCC_COMPILER_VERSION_MAJOR} CACHE INTERNAL "gcc major version")
-    SET(GCC_COMPILER_VERSION_MINOR ${GCC_COMPILER_VERSION_MINOR} CACHE INTERNAL "gcc minor version")
-    
-    MESSAGE(STATUS "checking compiler: GCC (${GCC_COMPILER_VERSION_MAJOR}.${GCC_COMPILER_VERSION_MINOR}.${GCC_COMPILER_VERSION_PATCH})")
-ELSE(CMAKE_COMPILER_IS_GNUCXX)
-    EXECUTE_PROCESS(COMMAND ${CMAKE_C_COMPILER} --version  OUTPUT_VARIABLE COMPILER_OUTPUT)
-    IF(COMPILER_OUTPUT MATCHES "clang")
-        SET(CMAKE_COMPILER_IS_CLANG true CACHE INTERNAL "using clang as compiler")
-        MESSAGE(STATUS "checking compiler: CLANG")
-    ELSE(COMPILER_OUTPUT MATCHES "clang")
-        MESSAGE(FATAL_ERROR "Unsupported compiler: \"${CMAKE_CXX_COMPILER}\"")
-    ENDIF(COMPILER_OUTPUT MATCHES "clang")
-ENDIF(CMAKE_COMPILER_IS_GNUCXX)
diff --git a/contrib/libhdfs3-cmake/CMakeLists.txt b/contrib/libhdfs3-cmake/CMakeLists.txt
index fd9ed7dc182..e2f122e282a 100644
--- a/contrib/libhdfs3-cmake/CMakeLists.txt
+++ b/contrib/libhdfs3-cmake/CMakeLists.txt
@@ -21,10 +21,17 @@ set(HDFS3_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3")
 set(HDFS3_SOURCE_DIR "${HDFS3_ROOT_DIR}/src")
 set(HDFS3_COMMON_DIR "${HDFS3_SOURCE_DIR}/common")
 
-# module
-set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMake" ${CMAKE_MODULE_PATH})
-include(Platform)
-include(Options)
+ADD_DEFINITIONS(-DTEST_HDFS_PREFIX="${TEST_HDFS_PREFIX}")
+ADD_DEFINITIONS(-D__STDC_FORMAT_MACROS)
+ADD_DEFINITIONS(-D_GNU_SOURCE)
+ADD_DEFINITIONS(-D_GLIBCXX_USE_NANOSLEEP)
+ADD_DEFINITIONS(-DHAVE_NANOSLEEP)
+set(HAVE_STEADY_CLOCK 1)
+set(HAVE_NESTED_EXCEPTION 1)
+SET(HAVE_BOOST_CHRONO 0)
+SET(HAVE_BOOST_ATOMIC 0)
+SET(HAVE_STD_CHRONO 1)
+SET(HAVE_STD_ATOMIC 1)
 
 # source
 set(PROTO_FILES

From 179eddee01c7318cdf5679aeefb1fd58b93e8476 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 5 May 2023 04:44:47 +0200
Subject: [PATCH 37/52] Remove garbage from Pretty format

---
 src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp | 9 +--------
 src/Processors/Formats/Impl/PrettyBlockOutputFormat.h   | 2 --
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp
index 739e14421c1..14648e68f94 100644
--- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp
@@ -1,8 +1,3 @@
-#include <sys/ioctl.h>
-#if defined(OS_SUNOS)
-#  include <sys/termios.h>
-#endif
-#include <unistd.h>
 #include <Processors/Formats/Impl/PrettyBlockOutputFormat.h>
 #include <Formats/FormatFactory.h>
 #include <IO/WriteBuffer.h>
@@ -12,6 +7,7 @@
 #include <Common/UTF8Helpers.h>
 #include <Common/PODArray.h>
 
+
 namespace DB
 {
 
@@ -19,9 +15,6 @@ PrettyBlockOutputFormat::PrettyBlockOutputFormat(
     WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, bool mono_block_)
      : IOutputFormat(header_, out_), format_settings(format_settings_), serializations(header_.getSerializations()), mono_block(mono_block_)
 {
-    struct winsize w;
-    if (0 == ioctl(STDOUT_FILENO, TIOCGWINSZ, &w))
-        terminal_width = w.ws_col;
 }
 
 
diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h
index 95c72d15fa9..dfb23ac63f9 100644
--- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h
+++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h
@@ -29,8 +29,6 @@ protected:
     void consumeExtremes(Chunk) override;
 
     size_t total_rows = 0;
-    size_t terminal_width = 0;
-
     size_t row_number_width = 7; // "10000. "
 
     const FormatSettings format_settings;

From efa66521dea78fb3014ec4577cd21fe7222c88ad Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 5 May 2023 06:40:12 +0200
Subject: [PATCH 38/52] Allow to run in Linux without PRCTL support

---
 src/Common/TimerDescriptor.cpp | 1 -
 src/Common/setThreadName.cpp   | 6 ++++--
 src/Daemon/BaseDaemon.cpp      | 1 -
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Common/TimerDescriptor.cpp b/src/Common/TimerDescriptor.cpp
index e0327bc1fc6..1f07f548d85 100644
--- a/src/Common/TimerDescriptor.cpp
+++ b/src/Common/TimerDescriptor.cpp
@@ -1,7 +1,6 @@
 #if defined(OS_LINUX)
 #include <Common/TimerDescriptor.h>
 #include <Common/Exception.h>
-#include <base/defines.h>
 
 #include <sys/timerfd.h>
 #include <fcntl.h>
diff --git a/src/Common/setThreadName.cpp b/src/Common/setThreadName.cpp
index 0e5fb1d72c6..65c4b5c6523 100644
--- a/src/Common/setThreadName.cpp
+++ b/src/Common/setThreadName.cpp
@@ -43,7 +43,8 @@ void setThreadName(const char * name)
 #else
     if (0 != prctl(PR_SET_NAME, name, 0, 0, 0))
 #endif
-        DB::throwFromErrno("Cannot set thread name with prctl(PR_SET_NAME, ...)", DB::ErrorCodes::PTHREAD_ERROR);
+        if (errno != ENOSYS)    /// It's ok if the syscall is unsupported in some environments.
+            DB::throwFromErrno("Cannot set thread name with prctl(PR_SET_NAME, ...)", DB::ErrorCodes::PTHREAD_ERROR);
 
     memcpy(thread_name, name, std::min<size_t>(1 + strlen(name), THREAD_NAME_SIZE - 1));
 }
@@ -62,7 +63,8 @@ const char * getThreadName()
 //        throw DB::Exception(DB::ErrorCodes::PTHREAD_ERROR, "Cannot get thread name with pthread_get_name_np()");
 #else
     if (0 != prctl(PR_GET_NAME, thread_name, 0, 0, 0))
-        DB::throwFromErrno("Cannot get thread name with prctl(PR_GET_NAME)", DB::ErrorCodes::PTHREAD_ERROR);
+        if (errno != ENOSYS)    /// It's ok if the syscall is unsupported in some environments.
+            DB::throwFromErrno("Cannot get thread name with prctl(PR_GET_NAME)", DB::ErrorCodes::PTHREAD_ERROR);
 #endif
 
     return thread_name;
diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp
index 3d6d9792090..077fa55e8d3 100644
--- a/src/Daemon/BaseDaemon.cpp
+++ b/src/Daemon/BaseDaemon.cpp
@@ -2,7 +2,6 @@
 
 #include <Daemon/BaseDaemon.h>
 #include <Daemon/SentryWriter.h>
-#include <Parsers/toOneLineQuery.h>
 #include <base/errnoToString.h>
 #include <base/defines.h>
 

From 14f5e785d621c2c9a63d1d852e9171605d76eb02 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 5 May 2023 06:42:29 +0200
Subject: [PATCH 39/52] Fix test

---
 tests/integration/test_system_metrics/test.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_system_metrics/test.py b/tests/integration/test_system_metrics/test.py
index 439e8b66db1..d5bf83f6bf2 100644
--- a/tests/integration/test_system_metrics/test.py
+++ b/tests/integration/test_system_metrics/test.py
@@ -122,11 +122,12 @@ def test_metrics_storage_buffer_size(start_cluster):
         )
         == "1\n"
     )
+    # By the way, this metric does not count the LowCardinality's dictionary size.
     assert (
         node1.query(
             "SELECT value FROM system.metrics WHERE metric = 'StorageBufferBytes'"
         )
-        == "24\n"
+        == "1\n"
     )
 
     node1.query("INSERT INTO test.buffer_table VALUES('hello');")
@@ -140,7 +141,7 @@ def test_metrics_storage_buffer_size(start_cluster):
         node1.query(
             "SELECT value FROM system.metrics WHERE metric = 'StorageBufferBytes'"
         )
-        == "25\n"
+        == "2\n"
     )
 
     # flush

From ec30b0f2bea83595dcf0c29d2b36cf0eb58c2797 Mon Sep 17 00:00:00 2001
From: Kuba Kaflik <kuba.kaflik@clickhouse.com>
Date: Fri, 5 May 2023 10:14:06 +0200
Subject: [PATCH 40/52] Add function inline documentation

---
 src/TableFunctions/TableFunctionS3.cpp | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp
index 841f5a91bc8..db99daf45c4 100644
--- a/src/TableFunctions/TableFunctionS3.cpp
+++ b/src/TableFunctions/TableFunctionS3.cpp
@@ -254,12 +254,22 @@ private:
 
 void registerTableFunctionGCS(TableFunctionFactory & factory)
 {
-    factory.registerFunction<TableFunctionGCS>();
+    factory.registerFunction<TableFunctionGCS>(
+        {.documentation
+         = {R"(The table function can be used to read the data stored on Google Cloud Storage.)",
+            Documentation::Examples{{"gcs", "SELECT * FROM gcs(url, hmac_key, hmac_secret)"}},
+            Documentation::Categories{"DataLake"}},
+         .allow_readonly = false});
 }
 
 void registerTableFunctionS3(TableFunctionFactory & factory)
 {
-    factory.registerFunction<TableFunctionS3>();
+    factory.registerFunction<TableFunctionS3>(
+        {.documentation
+         = {R"(The table function can be used to read the data stored on AWS S3.)",
+            Documentation::Examples{{"s3", "SELECT * FROM s3(url, access_key_id, secret_access_key)"}},
+            Documentation::Categories{"DataLake"}},
+         .allow_readonly = false}));
 }
 
 void registerTableFunctionCOS(TableFunctionFactory & factory)

From 3fe8c9e6ed0117838ce7689788126a8223aec114 Mon Sep 17 00:00:00 2001
From: Bulat Gaifullin <gaifullinbf@gmail.com>
Date: Fri, 5 May 2023 11:54:11 +0300
Subject: [PATCH 41/52] Remove extra semicolumns

---
 src/Common/logger_useful.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Common/logger_useful.h b/src/Common/logger_useful.h
index 6412d3d1896..3ac950cbdfb 100644
--- a/src/Common/logger_useful.h
+++ b/src/Common/logger_useful.h
@@ -17,10 +17,10 @@ namespace Poco { class Logger; }
 
 namespace
 {
-    [[maybe_unused]] const ::Poco::Logger * getLogger(const ::Poco::Logger * logger) { return logger; };
-    [[maybe_unused]] const ::Poco::Logger * getLogger(const std::atomic<::Poco::Logger *> & logger) { return logger.load(); };
-    [[maybe_unused]] std::unique_ptr<LogToStrImpl> getLogger(std::unique_ptr<LogToStrImpl> && logger) { return logger; };
-    [[maybe_unused]] std::unique_ptr<LogFrequencyLimiterIml> getLogger(std::unique_ptr<LogFrequencyLimiterIml> && logger) { return logger; };
+    [[maybe_unused]] const ::Poco::Logger * getLogger(const ::Poco::Logger * logger) { return logger; }
+    [[maybe_unused]] const ::Poco::Logger * getLogger(const std::atomic<::Poco::Logger *> & logger) { return logger.load(); }
+    [[maybe_unused]] std::unique_ptr<LogToStrImpl> getLogger(std::unique_ptr<LogToStrImpl> && logger) { return logger; }
+    [[maybe_unused]] std::unique_ptr<LogFrequencyLimiterIml> getLogger(std::unique_ptr<LogFrequencyLimiterIml> && logger) { return logger; }
 }
 
 #define LOG_IMPL_FIRST_ARG(X, ...) X

From 82e6ce22fa07a70f7a7c2597ba75a90e9b024a45 Mon Sep 17 00:00:00 2001
From: Bulat Gaifullin <gaifullinbf@gmail.com>
Date: Fri, 5 May 2023 11:59:37 +0300
Subject: [PATCH 42/52] Update IFileCachePriority.h

remove extra semicolumn
---
 src/Interpreters/Cache/IFileCachePriority.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/Cache/IFileCachePriority.h b/src/Interpreters/Cache/IFileCachePriority.h
index 4d5f67cc10c..ad63dcc7ea5 100644
--- a/src/Interpreters/Cache/IFileCachePriority.h
+++ b/src/Interpreters/Cache/IFileCachePriority.h
@@ -92,4 +92,4 @@ private:
     const size_t max_elements = 0;
 };
 
-};
+}

From a222c4c5a1c9fec66b283e7070453db44c83b946 Mon Sep 17 00:00:00 2001
From: Bulat Gaifullin <gaifullinbf@gmail.com>
Date: Fri, 5 May 2023 11:59:59 +0300
Subject: [PATCH 43/52] Update LRUFileCachePriority.h

---
 src/Interpreters/Cache/LRUFileCachePriority.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h
index 0cb81109fcd..a80e4647f2d 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.h
+++ b/src/Interpreters/Cache/LRUFileCachePriority.h
@@ -65,4 +65,4 @@ private:
     mutable LRUFileCachePriority::LRUQueueIterator queue_iter;
 };
 
-};
+}

From 8556493c9e26164829e8b86c24e7a8e8a18aeb84 Mon Sep 17 00:00:00 2001
From: Bulat Gaifullin <gaifullinbf@gmail.com>
Date: Fri, 5 May 2023 12:00:24 +0300
Subject: [PATCH 44/52] Update LRUFileCachePriority.cpp

---
 src/Interpreters/Cache/LRUFileCachePriority.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp
index d626edc2481..20a358f9110 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp
@@ -170,4 +170,4 @@ size_t LRUFileCachePriority::LRUFileCacheIterator::use(const CacheGuard::Lock &)
     return ++queue_iter->hits;
 }
 
-};
+}

From 180bb9f13d22425c0ece1c456f0723f7bfa37618 Mon Sep 17 00:00:00 2001
From: xmy <xumovens@gmail.com>
Date: Fri, 5 May 2023 18:03:07 +0800
Subject: [PATCH 45/52] fix

---
 .../aggregate-functions/reference/groupbitand.md       |  4 ++--
 .../aggregate-functions/reference/groupbitor.md        |  4 ++--
 .../aggregate-functions/reference/groupbitxor.md       |  4 ++--
 .../00498_bitwise_aggregate_functions.reference        | 10 +++++-----
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md
index 5fd5029751a..5f57407a419 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md
@@ -13,11 +13,11 @@ groupBitAnd(expr)
 
 **Arguments**
 
-`expr` – An expression that results in `UInt* or Int*` type.
+`expr` – An expression that results in `UInt*` or `Int*` type.
 
 **Return value**
 
-Value of the `UInt* or Int*` type.
+Value of the `UInt*` or `Int*` type.
 
 **Example**
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md
index 08a5c15da46..59be69540b0 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md
@@ -13,11 +13,11 @@ groupBitOr(expr)
 
 **Arguments**
 
-`expr` – An expression that results in `UInt* or Int*` type.
+`expr` – An expression that results in `UInt*` or `Int*` type.
 
 **Returned value**
 
-Value of the `UInt* or Int*` type.
+Value of the `UInt*` or `Int*` type.
 
 **Example**
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md
index f33e375953c..b00876a2fdf 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md
@@ -13,11 +13,11 @@ groupBitXor(expr)
 
 **Arguments**
 
-`expr` – An expression that results in `UInt* or Int*` type.
+`expr` – An expression that results in `UInt*` or `Int*` type.
 
 **Return value**
 
-Value of the `UInt* or Int*` type.
+Value of the `UInt*` or `Int*` type.
 
 **Example**
 
diff --git a/tests/queries/0_stateless/00498_bitwise_aggregate_functions.reference b/tests/queries/0_stateless/00498_bitwise_aggregate_functions.reference
index 400db09c9f1..4e5f00f1c25 100644
--- a/tests/queries/0_stateless/00498_bitwise_aggregate_functions.reference
+++ b/tests/queries/0_stateless/00498_bitwise_aggregate_functions.reference
@@ -1,7 +1,7 @@
-0	[0,4,8,12,16]	28	0	16
-1	[1,5,9,13,17]	29	1	17
-2	[2,6,10,14,18]	30	2	18
-3	[3,7,11,15,19]	31	3	19
+0       [0,4,8,12,16]   28      0       16
+1       [1,5,9,13,17]   29      1       17
+2       [2,6,10,14,18]  30      2       18
+3       [3,7,11,15,19]  31      3       19
 0       [0,-4,-8,-12,-16]       -4      0       0
 1       [-1,-5,-9,-13,-17]      -1      -29     -17
 2       [-2,-6,-10,-14,-18]     -2      -30     -18
@@ -9,4 +9,4 @@
 0       [-10,-6,-2,2,6] -2      2       -10
 1       [-9,-5,-1,3,7]  -1      3       -9
 2       [-8,-4,0,4,8]   -4      0       8
-3       [-7,-3,1,5,9]   -3      1       9
\ No newline at end of file
+3       [-7,-3,1,5,9]   -3      1       9

From 8a6084abd4cf818b1f7797b4d43d279fee9e7df9 Mon Sep 17 00:00:00 2001
From: Kuba Kaflik <kuba.kaflik@clickhouse.com>
Date: Fri, 5 May 2023 12:41:30 +0200
Subject: [PATCH 46/52] Fix syntax error for TableFunctionS3

---
 src/TableFunctions/TableFunctionS3.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp
index db99daf45c4..2369a4f384f 100644
--- a/src/TableFunctions/TableFunctionS3.cpp
+++ b/src/TableFunctions/TableFunctionS3.cpp
@@ -269,7 +269,7 @@ void registerTableFunctionS3(TableFunctionFactory & factory)
          = {R"(The table function can be used to read the data stored on AWS S3.)",
             Documentation::Examples{{"s3", "SELECT * FROM s3(url, access_key_id, secret_access_key)"}},
             Documentation::Categories{"DataLake"}},
-         .allow_readonly = false}));
+         .allow_readonly = false});
 }
 
 void registerTableFunctionCOS(TableFunctionFactory & factory)

From f704c0dfec66ed55b64c34e81d78876026b3de23 Mon Sep 17 00:00:00 2001
From: wangxiaobo <35131593+wzb5212@users.noreply.github.com>
Date: Fri, 5 May 2023 19:11:11 +0800
Subject: [PATCH 47/52] Implement SYSTEM DROP REPLICA from auxillary ZooKeeper
 clusters (#48932)

* multiple zookeeper drop replica bug fix.

* add an integration test.

* format code.

* set stay_alive=True for test.

* style check bug fix.

* add check for is_active

* format code

* remove table_settings and has_metadata_out param.
---
 src/Interpreters/InterpreterSystemQuery.cpp   |  9 +-
 src/Storages/StorageReplicatedMergeTree.cpp   | 14 +++
 src/Storages/StorageReplicatedMergeTree.h     |  2 +
 .../__init__.py                               |  0
 .../configs/remote_servers.xml                | 16 ++++
 .../configs/zookeeper_config.xml              | 28 ++++++
 .../test.py                                   | 87 +++++++++++++++++++
 7 files changed, 148 insertions(+), 8 deletions(-)
 create mode 100644 tests/integration/test_drop_replica_with_auxiliary_zookeepers/__init__.py
 create mode 100644 tests/integration/test_drop_replica_with_auxiliary_zookeepers/configs/remote_servers.xml
 create mode 100644 tests/integration/test_drop_replica_with_auxiliary_zookeepers/configs/zookeeper_config.xml
 create mode 100644 tests/integration/test_drop_replica_with_auxiliary_zookeepers/test.py

diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index 415cf7028da..33417a49b1c 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -800,7 +800,6 @@ bool InterpreterSystemQuery::dropReplicaImpl(ASTSystemQuery & query, const Stora
         return false;
 
     ReplicatedTableStatus status;
-    auto zookeeper = getContext()->getZooKeeper();
     storage_replicated->getStatus(status);
 
     /// Do not allow to drop local replicas and active remote replicas
@@ -809,13 +808,7 @@ bool InterpreterSystemQuery::dropReplicaImpl(ASTSystemQuery & query, const Stora
                         "We can't drop local replica, please use `DROP TABLE` if you want "
                         "to clean the data and drop this replica");
 
-    /// NOTE it's not atomic: replica may become active after this check, but before dropReplica(...)
-    /// However, the main use case is to drop dead replica, which cannot become active.
-    /// This check prevents only from accidental drop of some other replica.
-    if (zookeeper->exists(status.zookeeper_path + "/replicas/" + query.replica + "/is_active"))
-        throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, "Can't drop replica: {}, because it's active", query.replica);
-
-    storage_replicated->dropReplica(zookeeper, status.zookeeper_path, query.replica, log);
+    storage_replicated->dropReplica(status.zookeeper_path, query.replica, log);
     LOG_TRACE(log, "Dropped replica {} of {}", query.replica, table->getStorageID().getNameForLogs());
 
     return true;
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 8155f4fb98f..b548b1efd93 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1063,6 +1063,20 @@ void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, con
     }
 }
 
+void StorageReplicatedMergeTree::dropReplica(const String & drop_zookeeper_path, const String & drop_replica, Poco::Logger * logger)
+{
+    zkutil::ZooKeeperPtr zookeeper = getZooKeeperIfTableShutDown();
+
+    /// NOTE it's not atomic: replica may become active after this check, but before dropReplica(...)
+    /// However, the main use case is to drop dead replica, which cannot become active.
+    /// This check prevents only from accidental drop of some other replica.
+    if (zookeeper->exists(drop_zookeeper_path + "/replicas/" + drop_replica + "/is_active"))
+        throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, "Can't drop replica: {}, because it's active", drop_replica);
+
+    dropReplica(zookeeper, drop_zookeeper_path, drop_replica, logger);
+}
+
+
 bool StorageReplicatedMergeTree::removeTableNodesFromZooKeeper(zkutil::ZooKeeperPtr zookeeper,
         const String & zookeeper_path, const zkutil::EphemeralNodeHolder::Ptr & metadata_drop_lock, Poco::Logger * logger)
 {
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index 3a8025d3e78..8c8cc986c4c 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -230,6 +230,8 @@ public:
     static void dropReplica(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, const String & replica,
                             Poco::Logger * logger, MergeTreeSettingsPtr table_settings = nullptr, std::optional<bool> * has_metadata_out = nullptr);
 
+    void dropReplica(const String & drop_zookeeper_path, const String & drop_replica, Poco::Logger * logger);
+
     /// Removes table from ZooKeeper after the last replica was dropped
     static bool removeTableNodesFromZooKeeper(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path,
                                               const zkutil::EphemeralNodeHolder::Ptr & metadata_drop_lock, Poco::Logger * logger);
diff --git a/tests/integration/test_drop_replica_with_auxiliary_zookeepers/__init__.py b/tests/integration/test_drop_replica_with_auxiliary_zookeepers/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_drop_replica_with_auxiliary_zookeepers/configs/remote_servers.xml b/tests/integration/test_drop_replica_with_auxiliary_zookeepers/configs/remote_servers.xml
new file mode 100644
index 00000000000..45713eaed59
--- /dev/null
+++ b/tests/integration/test_drop_replica_with_auxiliary_zookeepers/configs/remote_servers.xml
@@ -0,0 +1,16 @@
+<clickhouse>
+    <remote_servers>
+        <test_cluster>
+            <shard>
+                <replica>
+                    <host>node1</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node2</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </test_cluster>
+    </remote_servers>
+</clickhouse>
diff --git a/tests/integration/test_drop_replica_with_auxiliary_zookeepers/configs/zookeeper_config.xml b/tests/integration/test_drop_replica_with_auxiliary_zookeepers/configs/zookeeper_config.xml
new file mode 100644
index 00000000000..3e8cc741bd5
--- /dev/null
+++ b/tests/integration/test_drop_replica_with_auxiliary_zookeepers/configs/zookeeper_config.xml
@@ -0,0 +1,28 @@
+<clickhouse>
+    <zookeeper>
+        <node index="1">
+            <host>zoo1</host>
+            <port>2181</port>
+        </node>
+        <node index="2">
+            <host>zoo2</host>
+            <port>2181</port>
+        </node>
+        <node index="3">
+            <host>zoo3</host>
+            <port>2181</port>
+        </node>
+    </zookeeper>
+    <auxiliary_zookeepers>
+        <zookeeper2>
+            <node index="1">
+                <host>zoo1</host>
+                <port>2181</port>
+            </node>
+            <node index="2">
+                <host>zoo2</host>
+                <port>2181</port>
+            </node>
+        </zookeeper2>
+    </auxiliary_zookeepers>
+</clickhouse>
diff --git a/tests/integration/test_drop_replica_with_auxiliary_zookeepers/test.py b/tests/integration/test_drop_replica_with_auxiliary_zookeepers/test.py
new file mode 100644
index 00000000000..e21449b93a8
--- /dev/null
+++ b/tests/integration/test_drop_replica_with_auxiliary_zookeepers/test.py
@@ -0,0 +1,87 @@
+import time
+
+import helpers.client as client
+import pytest
+from helpers.cluster import ClickHouseCluster
+from helpers.client import QueryRuntimeException
+from helpers.test_tools import TSV
+
+cluster = ClickHouseCluster(__file__)
+node1 = cluster.add_instance(
+    "node1",
+    main_configs=["configs/zookeeper_config.xml", "configs/remote_servers.xml"],
+    with_zookeeper=True,
+    use_keeper=False,
+    stay_alive=True,
+)
+node2 = cluster.add_instance(
+    "node2",
+    main_configs=["configs/zookeeper_config.xml", "configs/remote_servers.xml"],
+    with_zookeeper=True,
+    use_keeper=False,
+    stay_alive=True,
+)
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+
+        yield cluster
+
+    except Exception as ex:
+        print(ex)
+
+    finally:
+        cluster.shutdown()
+
+
+def drop_table(nodes, table_name):
+    for node in nodes:
+        node.query("DROP TABLE IF EXISTS {} NO DELAY".format(table_name))
+
+
+def test_drop_replica_in_auxiliary_zookeeper(started_cluster):
+    drop_table([node1, node2], "test_auxiliary_zookeeper")
+    for node in [node1, node2]:
+        node.query(
+            """
+                CREATE TABLE test_auxiliary_zookeeper(a Int32)
+                ENGINE = ReplicatedMergeTree('zookeeper2:/clickhouse/tables/test/test_auxiliary_zookeeper', '{replica}')
+                ORDER BY a;
+            """.format(
+                replica=node.name
+            )
+        )
+
+    # stop node2 server
+    node2.stop_clickhouse()
+    time.sleep(5)
+
+    # check is_active
+    retries = 0
+    max_retries = 5
+    zk = cluster.get_kazoo_client("zoo1")
+    while True:
+        if (
+            zk.exists(
+                "/clickhouse/tables/test/test_auxiliary_zookeeper/replicas/node2/is_active"
+            )
+            is None
+        ):
+            break
+        else:
+            retries += 1
+            if retries > max_retries:
+                raise Exception("Failed to stop server.")
+            time.sleep(1)
+
+    # drop replica node2
+    node1.query("SYSTEM DROP REPLICA 'node2'")
+
+    assert zk.exists("/clickhouse/tables/test/test_auxiliary_zookeeper")
+    assert (
+        zk.exists("/clickhouse/tables/test/test_auxiliary_zookeeper/replicas/node2")
+        is None
+    )

From 2bcd1e5847c37659c0fdc6c996a92373101d4128 Mon Sep 17 00:00:00 2001
From: xmy <xumovens@gmail.com>
Date: Fri, 5 May 2023 19:22:19 +0800
Subject: [PATCH 48/52] fix

---
 ...0498_bitwise_aggregate_functions.reference | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/tests/queries/0_stateless/00498_bitwise_aggregate_functions.reference b/tests/queries/0_stateless/00498_bitwise_aggregate_functions.reference
index 4e5f00f1c25..b5ca25ed0b0 100644
--- a/tests/queries/0_stateless/00498_bitwise_aggregate_functions.reference
+++ b/tests/queries/0_stateless/00498_bitwise_aggregate_functions.reference
@@ -1,12 +1,12 @@
-0       [0,4,8,12,16]   28      0       16
-1       [1,5,9,13,17]   29      1       17
-2       [2,6,10,14,18]  30      2       18
-3       [3,7,11,15,19]  31      3       19
-0       [0,-4,-8,-12,-16]       -4      0       0
-1       [-1,-5,-9,-13,-17]      -1      -29     -17
-2       [-2,-6,-10,-14,-18]     -2      -30     -18
-3       [-3,-7,-11,-15,-19]     -3      -31     -19
-0       [-10,-6,-2,2,6] -2      2       -10
-1       [-9,-5,-1,3,7]  -1      3       -9
-2       [-8,-4,0,4,8]   -4      0       8
-3       [-7,-3,1,5,9]   -3      1       9
+0	[0,4,8,12,16]	28	0	16
+1	[1,5,9,13,17]	29	1	17
+2	[2,6,10,14,18]	30	2	18
+3	[3,7,11,15,19]	31	3	19
+0	[0,-4,-8,-12,-16]	-4	0	0
+1	[-1,-5,-9,-13,-17]	-1	-29	-17
+2	[-2,-6,-10,-14,-18]	-2	-30	-18
+3	[-3,-7,-11,-15,-19]	-3	-31	-19
+0	[-10,-6,-2,2,6]	-2	2	-10
+1	[-9,-5,-1,3,7]	-1	3	-9
+2	[-8,-4,0,4,8]	-4	0	8
+3	[-7,-3,1,5,9]	-3	1	9

From 478542c6d7c4f559a299d03f43b2ab1cb830d36e Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Fri, 5 May 2023 13:56:10 +0200
Subject: [PATCH 49/52] Update ErrorCodes.cpp

---
 src/Common/ErrorCodes.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index 0e27ba85d22..d8a83378e26 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -650,7 +650,7 @@
     M(690, MIXED_ACCESS_PARAMETER_TYPES) \
     M(691, UNKNOWN_ELEMENT_OF_ENUM) \
     M(692, TOO_MANY_MUTATIONS) \
-    M(692, AWS_ERROR) \
+    M(693, AWS_ERROR) \
     \
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \

From 396157dcad5fd4bf96e768b06ce4ba270ad8f7b9 Mon Sep 17 00:00:00 2001
From: Michael Kolupaev <michael.kolupaev@clickhouse.com>
Date: Wed, 3 May 2023 23:17:22 +0000
Subject: [PATCH 50/52] Allow reordering by default when reading from
 file/s3/url

---
 src/Core/Settings.h                           |    4 +-
 src/Core/SettingsChangesHistory.h             |    6 +-
 .../00900_long_parquet_load.reference         | 1018 ++++++++---------
 .../0_stateless/00900_long_parquet_load.sh    |    2 +-
 4 files changed, 516 insertions(+), 514 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index eed9cfb97c0..27e2b1801b7 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -713,7 +713,7 @@ class IColumn;
     \
     M(String, workload, "default", "Name of workload to be used to access resources", 0) \
     \
-    M(Bool, parallelize_output_from_storages, false, "Parallelize output for reading step from storage. It allows parallelizing query processing right after reading from storage if possible", 0) \
+    M(Bool, parallelize_output_from_storages, true, "Parallelize output for reading step from storage. It allows parallelizing query processing right after reading from storage if possible", 0) \
     \
     /** Experimental functions */ \
     M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
@@ -826,7 +826,7 @@ class IColumn;
     M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \
     M(Bool, input_format_parquet_case_insensitive_column_matching, false, "Ignore case when matching Parquet columns with CH columns.", 0) \
     /* TODO: Consider unifying this with https://github.com/ClickHouse/ClickHouse/issues/38755 */ \
-    M(Bool, input_format_parquet_preserve_order, true, "Avoid reordering rows when reading from Parquet files. Usually makes it much slower.", 0) \
+    M(Bool, input_format_parquet_preserve_order, false, "Avoid reordering rows when reading from Parquet files. Usually makes it much slower.", 0) \
     M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \
     M(Bool, input_format_orc_allow_missing_columns, false, "Allow missing columns while reading ORC input formats", 0) \
     M(Bool, input_format_parquet_allow_missing_columns, false, "Allow missing columns while reading Parquet input formats", 0) \
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 266d14f645b..fb2adff2e88 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -80,8 +80,10 @@ namespace SettingsChangesHistory
 /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
-    {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}}},
-    {"23.4", {{"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"},
+    {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reade to reorder rows for better parallelism."},
+              {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}}},
+    {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"},
+              {"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"},
               {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"},
               {"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"}}},
     {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"},
diff --git a/tests/queries/0_stateless/00900_long_parquet_load.reference b/tests/queries/0_stateless/00900_long_parquet_load.reference
index 1ca2fbc2fd6..50631bfab59 100644
--- a/tests/queries/0_stateless/00900_long_parquet_load.reference
+++ b/tests/queries/0_stateless/00900_long_parquet_load.reference
@@ -6,19 +6,20 @@
 [1,-2,3]	[1,2,3]	[100,-200,300]	[100,200,300]	[10000000,-20000000,30000000]	[10000000,2000000,3000000]	[100000000000000,-200000000000,3000000000000]	[100000000000000,20000000000000,3000000000000]	['Some string','Some string','Some string']	['0000','1111','2222']	[42.42,424.2,0.4242]	[424242.424242,4242042420.242424,42]	['2000-01-01','2001-01-01','2002-01-01']	['1999-12-31 23:00:00','2000-12-31 23:00:00','2001-12-31 23:00:00']	[0.2,10,4]	[4,10000.1,10000.1]	[1000000000,90,101001.01]
 [1,-2,3]	[1,2,3]	[100,-200,300]	[100,200,300]	[10000000,-20000000,30000000]	[10000000,2000000,3000000]	[100000000000000,-200000000000,3000000000000]	[100000000000000,20000000000000,3000000000000]	['Some string','Some string','Some string']	['0000','1111','2222']	[42.42,424.2,0.4242]	[424242.424242,4242042420.242424,42]	['2000-01-01','2001-01-01','2002-01-01']	['1999-12-31 23:00:00','2000-12-31 23:00:00','2001-12-31 23:00:00']	[0.2,10,4]	[4,10000.1,10000.1]	[1000000000,90,101001.01]
 === Try load data from alltypes_plain.parquet
+0	1	0	0	0	0	0	0	01/01/09	0	1230768000
+1	0	1	1	1	10	1.1	10.1	01/01/09	1	1230768060
+2	1	0	0	0	0	0	0	02/01/09	0	1233446400
+3	0	1	1	1	10	1.1	10.1	02/01/09	1	1233446460
 4	1	0	0	0	0	0	0	03/01/09	0	1235865600
 5	0	1	1	1	10	1.1	10.1	03/01/09	1	1235865660
 6	1	0	0	0	0	0	0	04/01/09	0	1238544000
 7	0	1	1	1	10	1.1	10.1	04/01/09	1	1238544060
-2	1	0	0	0	0	0	0	02/01/09	0	1233446400
-3	0	1	1	1	10	1.1	10.1	02/01/09	1	1233446460
-0	1	0	0	0	0	0	0	01/01/09	0	1230768000
-1	0	1	1	1	10	1.1	10.1	01/01/09	1	1230768060
 === Try load data from alltypes_plain.snappy.parquet
 6	1	0	0	0	0	0	0	04/01/09	0	1238544000
 7	0	1	1	1	10	1.1	10.1	04/01/09	1	1238544060
 === Try load data from array_float.parquet
 idx1	[]
+idx10	[10.2,8.2]
 idx2	[10.2,8.2,7.2]
 idx3	[10.2,8.2]
 idx4	[10.2]
@@ -27,9 +28,9 @@ idx6	[10.2]
 idx7	[10.2,8.2]
 idx8	[10.2,8.2]
 idx9	[10.2]
-idx10	[10.2,8.2]
 === Try load data from array_int.parquet
 idx1	[100,101,102]
+idx10	[100,101,102]
 idx2	[100,101]
 idx3	[100,101,102,101]
 idx4	[100]
@@ -38,9 +39,9 @@ idx6	[100,101]
 idx7	[100,101]
 idx8	[100,101]
 idx9	[100,101,102]
-idx10	[100,101,102]
 === Try load data from array_string.parquet
 idx1	['This','is','a','test']
+idx10	['This','is','a','test']
 idx2	['cigarette','smoke']
 idx3	['the','grocery','clerks']
 idx4	[]
@@ -49,7 +50,6 @@ idx6	['me','up?']
 idx7	['then','I','put','him','back']
 idx8	['make','a','man']
 idx9	['Which','Heaven','to','gaudy','day','denies']
-idx10	['This','is','a','test']
 === Try load data from binary.parquet
 \0
 
@@ -95,8 +95,8 @@ idx10	['This','is','a','test']
 abc	1	2	1	[1,2,3]
 abc	2	3	1	[]
 abc	3	4	1	[]
-\N	4	5	0	[1,2,3]
 abc	5	2	1	[1,2]
+\N	4	5	0	[1,2,3]
 === Try load data from datatype-date32.parquet
 1925-01-01
 1949-10-01
@@ -274,8 +274,8 @@ abc	5	2	1	[1,2]
 24
 === Try load data from list_columns.parquet
 [1,2,3]	['abc','efg','hij']
-[NULL,1]	[]
 [4]	['efg',NULL,'hij','xyz']
+[NULL,1]	[]
 === Try load data from nation.dict-malformed.parquet
 0	ALGERIA	0	 haggle. carefully final deposits detect slyly agai
 1	ARGENTINA	1	al foxes promise slyly according to the regular accounts. bold requests alon
@@ -331,9 +331,9 @@ abc	5	2	1	[1,2]
 6	[]	[]	{}	[]	(NULL,[],([]),{})
 7	[]	[[],[5,6]]	{'k1':NULL,'k3':NULL}	[]	(7,[2,3,NULL],([[],[(NULL,NULL)],[]]),{})
 === Try load data from nullable_list.parquet
+[]	[]	[]
 [1,NULL,2]	[NULL,'Some string',NULL]	[0,NULL,42.42]
 [NULL]	[NULL]	[NULL]
-[]	[]	[]
 === Try load data from nulls.snappy.parquet
 (NULL)
 (NULL)
@@ -346,544 +346,544 @@ abc	5	2	1	[1,2]
 === Try load data from single_nan.parquet
 \N
 === Try load data from userdata1.parquet
-1454486129	1	Amanda	Jordan	ajordan0@com.com	Female	1.197.201.2	6759521864920116	Indonesia	3/8/1971	49756.53	Internal Auditor	1E+02
-1454519043	2	Albert	Freeman	afreeman1@is.gd	Male	218.111.175.34		Canada	1/16/1968	150280.17	Accountant IV	
-1454461771	3	Evelyn	Morgan	emorgan2@altervista.org	Female	7.161.136.94	6767119071901597	Russia	2/1/1960	144972.51	Structural Engineer	
-1454459781	4	Denise	Riley	driley3@gmpg.org	Female	140.35.109.83	3576031598965625	China	4/8/1997	90263.05	Senior Cost Accountant	
-1454475931	5	Carlos	Burns	cburns4@miitbeian.gov.cn		169.113.235.40	5602256255204850	South Africa		\N		
-1454484154	6	Kathryn	White	kwhite5@google.com	Female	195.131.81.179	3583136326049310	Indonesia	2/25/1983	69227.11	Account Executive	
-1454488388	7	Samuel	Holmes	sholmes6@foxnews.com	Male	232.234.81.197	3582641366974690	Portugal	12/18/1987	14247.62	Senior Financial Analyst	
-1454482026	8	Harry	Howell	hhowell7@eepurl.com	Male	91.235.51.73		Bosnia and Herzegovina	3/1/1962	186469.43	Web Developer IV	
-1454471573	9	Jose	Foster	jfoster8@yelp.com	Male	132.31.53.61		South Korea	3/27/1992	231067.84	Software Test Engineer I	1E+02
-1454524187	10	Emily	Stewart	estewart9@opensource.org	Female	143.28.251.245	3574254110301671	Nigeria	1/28/1997	27234.28	Health Coach IV	
-1454458242	11	Susan	Perkins	sperkinsa@patch.com	Female	180.85.0.62	3573823609854134	Russia		210001.95		
-1454522674	12	Alice	Berry	aberryb@wikipedia.org	Female	246.225.12.189	4917830851454417	China	8/12/1968	22944.53	Quality Engineer	
-1454525297	13	Justin	Berry	jberryc@usatoday.com	Male	157.7.146.43	6331109912871813274	Zambia	8/15/1975	44165.46	Structural Analysis Engineer	
-1454536012	14	Kathy	Reynolds	kreynoldsd@redcross.org	Female	81.254.172.13	5537178462965976	Bosnia and Herzegovina	6/27/1970	286592.99	Librarian	
-1454489603	15	Dorothy	Hudson	dhudsone@blogger.com	Female	8.59.7.0	3542586858224170	Japan	12/20/1989	157099.71	Nurse Practicioner	<script>alert(\'hi\')</script>
-1454460241	16	Bruce	Willis	bwillisf@bluehost.com	Male	239.182.219.189	3573030625927601	Brazil		239100.65		
-1454461065	17	Emily	Andrews	eandrewsg@cornell.edu	Female	29.231.180.172	30271790537626	Russia	4/13/1990	116800.65	Food Chemist	
-1454517864	18	Stephen	Wallace	swallaceh@netvibes.com	Male	152.49.213.62	5433943468526428	Ukraine	1/15/1978	248877.99	Account Representative I	
-1454499954	19	Clarence	Lawson	clawsoni@vkontakte.ru	Male	107.175.15.152	3544052814080964	Russia		177122.99		
-1454495436	20	Rebecca	Bell	rbellj@bandcamp.com	Female	172.215.104.127		China		137251.19		
-1454505444	21	Diane	Stevens	dstevensk@cnet.com	Female	141.243.73.164		Russia	6/5/1985	87978.22	Food Chemist	œ∑´®†¥¨ˆøπ“‘
-1454523505	22	Lawrence	Ramos	lramosl@sourceforge.net	Male	46.72.4.6	3537473810855655	Tanzania		131283.64		
-1454525455	23	Gregory	Barnes	gbarnesm@google.ru	Male	220.22.114.145	3538432455620641	Tunisia	1/23/1971	182233.49	Senior Sales Associate	사회과학원 어학연구소
-1454472340	24	Michelle	Ellis	mellisn@timesonline.co.uk	Female	239.81.215.135	3547383558025965	Tanzania	6/5/1964	278001.46	Tax Accountant	
-1454518347	25	Rachel	Perkins	rperkinso@lulu.com	Female	90.173.28.95	633313663891003209	Russia		176178.75		
-1454486554	26	Anthony	Lawrence	alawrencep@miitbeian.gov.cn	Male	121.211.242.99	564182969714151470	Japan	12/10/1979	170085.81	Electrical Engineer	
-1454488886	27	Henry	Henry	hhenryq@godaddy.com	Male	191.88.236.116	4905730021217853521	China	9/22/1995	284300.15	Nuclear Power Engineer	
-1454519352	28	Samuel	Hunter	shunterr@instagram.com	Male	72.190.230.173	5002353797389897	Brazil	9/21/1968	108950.24	Environmental Tech	
-1454469374	29	Jacqueline	Holmes	jholmess@ustream.tv	Female	47.141.224.95	3555934842115316	United States		247939.52		̗̺͖̹̯͓Ṯ̤͍̥͇͈h̲́e͏͓̼̗̙̼̣͔ ͇̜̱̠͓͍ͅN͕͠e̗̱z̘̝̜̺͙p̤̺̹͍̯͚e̠̻̠͜r̨̤͍̺̖͔̖̖d̠̟̭̬̝͟i̦͖̩͓͔̤a̠̗̬͉̙n͚͜ ̻̞̰͚ͅh̵͉i̳̞v̢͇ḙ͎͟-҉̭̩̼͔m̤̭̫i͕͇̝̦n̗͙ḍ̟ ̯̲͕͞ǫ̟̯̰̲͙̻̝f ̪̰̰̗̖̭̘͘c̦͍̲̞͍̩̙ḥ͚a̮͎̟̙͜ơ̩̹͎s̤.̝̝ ҉Z̡̖̜͖̰̣͉̜a͖̰͙̬͡l̲̫̳͍̩g̡̟̼̱͚̞̬ͅo̗͜.̟
-1454535469	30	Annie	Torres	atorrest@ning.com	Female	202.94.67.27	3530389861801215	Nigeria	5/20/1958	118310.72	Electrical Engineer	-1E+02
-1454526588	31	Antonio	Berry	aberryu@ow.ly	Male	5.82.180.4		Thailand		135007.96		
-1454533547	32	Nicole	Martinez	nmartinezv@oakley.com	Female	46.32.149.87		United States		149720.75		Z̮̞̠͙͔ͅḀ̗̞͈̻̗Ḷ͙͎̯̹̞͓G̻O̭̗̮
-1454459459	33	Christina	Mason	cmasonw@nydailynews.com	Female	74.214.22.120		Greece	7/21/1986	242593.85	Senior Sales Associate	
-1454541103	34	Margaret	Barnes	mbarnesx@angelfire.com	Female	133.178.126.244	3582552005871223	South Africa	11/13/1969	109644.23	Human Resources Assistant II	
-1454487881	35	Melissa	Kelly	mkellyy@unblog.fr	Female	179.132.207.169	6374648559206801	Indonesia	2/6/1968	45639.62	General Manager	Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣
-1454484472	36	Betty	Carr	bcarrz@parallels.com	Female	159.201.161.49		France		91370.3		-1E2
-1454532399	37	Dorothy	Gomez	dgomez10@jiathis.com	Female	65.111.200.146	493684876859391834	China		57194.86		
-1454538878	38	Kathryn	Lane	klane11@netlog.com	Female	169.141.178.89	5308993357499254	Czech Republic	8/20/1964	67783.73	Paralegal	
-1454511326	39	Jose	Murphy	jmurphy12@paypal.com	Male	118.85.253.180	4994715164232848	Chile	8/8/1991	134708.82	Nuclear Power Engineer	
-1454458506	40	Jack	Flores	jflores13@yolasite.com	Male	162.215.65.11	3577342788590928	Argentina	1/28/1958	81685.1	Financial Advisor	
-1454529124	41	Walter	Martinez	wmartinez14@spotify.com	Male	165.150.92.96		Somalia	3/8/1972	212105.33	Health Coach I	
-1454473984	42	Todd	Alvarez	talvarez15@csmonitor.com	Male	59.123.34.76	3557102122317535	Japan	12/19/1999	284728.99	Marketing Assistant	
-1454488466	43	Amanda	Gray	agray16@cdbaby.com	Female	252.20.193.145	3561501596653859	China	8/28/1967	213410.26	Senior Quality Engineer	
-1454494415	44	Sharon	Simpson	ssimpson17@weather.com	Female	242.68.147.87		France	9/28/1963	133884.94	Analog Circuit Design manager	
-1454526201	45	Bonnie	Collins	bcollins18@list-manage.com	Female	132.217.56.27	3540813015762450	Germany	7/21/1986	67661.42	Business Systems Development Analyst	
-1454474597	46	Deborah	Armstrong	darmstrong19@addthis.com	Female	89.44.11.142		Canada	4/8/1969	111569.22	Quality Control Specialist	⁦test⁧
-1454486980	47	Daniel	Mccoy	dmccoy1a@skype.com	Male	115.85.247.190	3554507990607374	Central African Republic		66260.14		❤️ 💔 💌 💕 💞 💓 💗 💖 💘 💝 💟 💜 💛 💚 💙
-1454505529	48	Jean	Flores	jflores1b@samsung.com	Female	211.70.131.207	5392903051983005	Nepal	4/6/1990	199100.32	Financial Advisor	
-1454521849	49	Lisa	Snyder	lsnyder1c@woothemes.com	Female	145.202.177.215	30475362189761	Germany	12/12/1974	210631.91	Safety Technician II	 
-1454469295	50	Sean	Alexander	salexander1d@dagondesign.com	Male	89.83.147.177		Bosnia and Herzegovina	5/29/1978	256068.38	Senior Financial Analyst	
-1454481568	51	Ernest	Carroll	ecarroll1e@dailymail.co.uk	Male	194.224.39.215	5100172156945078	Portugal	11/1/1992	100269.36	Dental Hygienist	
-1454492589	52	Louise	Dean	ldean1f@tamu.edu	Female	109.43.178.48	201996646854139	Ethiopia		173300.37		
+1454457660	721	Shirley	Williams	swilliamsk0@sciencedirect.com		132.137.10.218	5610801309305920	Indonesia	8/13/1978	\N	Help Desk Technician	
+1454457663	785	Daniel	Spencer	dspencerls@cargocollective.com	Male	241.143.186.140		China	12/3/1997	194214.08	Internal Auditor	
+1454457674	880	Lillian	Murray	lmurrayof@guardian.co.uk	Female	222.252.22.1	201713786459078	Norway	4/16/1981	282503.77	Business Systems Development Analyst	
+1454457684	852	Carol	Patterson	cpattersonnn@ycombinator.com	Female	244.190.113.241	0604512080706322395	Liberia	5/8/1984	263412.02	Assistant Professor	
+1454457705	244	Sarah	Freeman	sfreeman6r@wikimedia.org	Female	219.8.22.27	30520943172503	United States	3/25/1958	25806.31	Budget/Accounting Analyst II	⁰⁴⁵
+1454457740	633	Maria	Fowler	mfowlerhk@chronoengine.com	Female	246.85.249.122	3584144503415501	China	11/25/1998	276712.79	Staff Scientist	␣
+1454457782	925	Chris	Murphy	cmurphypo@nature.com		89.217.243.136	5602220700741429	Russia		\N		
+1454457790	788	Nicholas	Butler	nbutlerlv@thetimes.co.uk	Male	77.38.58.165	3575506969751259	Brazil	2/10/1981	192076.79	Data Coordiator	
+1454457853	301	Jerry	Welch	jwelch8c@paginegialle.it	Male	141.166.33.218	5602252929753349	Latvia	3/14/1973	28731.89	Software Engineer I	
 1454457952	53	Ralph	Price	rprice1g@tmall.com	Male	152.6.235.33	4844227560658222	China	8/26/1986	168208.4	Teacher	
-1454467269	54	George	Ferguson	gferguson1h@51.la	Male	129.108.219.50	3539784298399554	Macedonia	6/26/1971	153238.6	Computer Systems Analyst IV	パーティーへ行かないか
-1454515393	55	Anna	Montgomery	amontgomery1i@google.cn	Female	80.111.141.47	3586860392406446	China	9/6/1957	92837.5	Software Test Engineer IV	1E2
-1454514049	56	Cheryl	Lawrence	clawrence1j@ameblo.jp	Female	171.155.78.116		Finland	5/7/1985	200827.88	Recruiting Manager	
-1454459605	57	Willie	Palmer	wpalmer1k@t-online.de	Male	164.107.46.161	4026614769857244	China	8/23/1986	184978.64	Environmental Specialist	
-1454478957	58	Arthur	Berry	aberry1l@unc.edu	Male	52.42.24.55	3542761473624274	China		144164.88		
-1454519593	59	Patricia	Marshall	pmarshall1m@dell.com	Female	47.108.196.175		China	7/21/1984	69236.54	Environmental Specialist	
-1454466852	60	Cynthia	Richards	crichards1n@dailymail.co.uk	Female	178.236.66.213	3557986543874466	Brazil		179378		
-1454496286	61	David	Sanders	dsanders1o@fda.gov	Male	94.143.190.8	3585745042921822	Mexico	2/15/1963	197445.45	Data Coordiator	0️⃣ 1️⃣ 2️⃣ 3️⃣ 4️⃣ 5️⃣ 6️⃣ 7️⃣ 8️⃣ 9️⃣ 🔟
-1454534081	62	Julia	Sullivan	jsullivan1p@wisc.edu	Female	32.183.154.67	6767624411254094	Bolivia	11/28/1963	118311.39	Electrical Engineer	
-1454530379	63	Kevin	Butler	kbutler1q@symantec.com	Male	21.88.110.64	3551107057688681	Georgia	12/13/1962	129632.55	Database Administrator III	
-1454475593	64	Dennis	Ross	dross1r@parallels.com	Male	78.25.77.223		Portugal	5/27/1959	280933.71	Biostatistician II	
-1454478626	65	Raymond	Jacobs	rjacobs1s@sohu.com	Male	188.52.98.175	5048378563875353	Indonesia		13673.35		
-1454532460	66	Steven	Pierce	spierce1t@usgs.gov	Male	230.13.54.19	5100178880451481	Namibia	4/10/1965	152382.69	Analyst Programmer	
-1454480831	67	Jonathan	Ellis	jellis1u@g.co	Male	125.115.227.203		China	4/5/1991	268468.96	Staff Scientist	　
-1454460516	68	Rachel	Price	rprice1v@census.gov	Female	89.52.192.105		Indonesia	5/6/1982	234502.16	Payment Adjustment Coordinator	
-1454492257	69	Harold	Olson	holson1w@chronoengine.com	Male	169.173.35.139		China	7/25/1994	146917.43	Occupational Therapist	
-1454524497	70	Pamela	Wagner	pwagner1x@gravatar.com	Female	184.97.191.144	5593584893781844	Italy	5/3/1964	253108.75	Automation Specialist I	1;DROP TABLE users
-1454537805	71	Stephanie	Watkins	swatkins1y@rakuten.co.jp		124.183.29.113	30552863095190	Burkina Faso	8/29/1971	\N	Physical Therapy Assistant	
-1454530454	72	John	Ortiz	jortiz1z@mozilla.org	Male	4.70.220.127	5194470971764378	Sweden	2/13/1978	91566.02	Analyst Programmer	
-1454523864	73	Kimberly	Wheeler	kwheeler20@imgur.com	Female	26.46.50.55		China	11/6/1978	31026.94	Junior Executive	
-1454470404	74	Kathryn	Henderson	khenderson21@ask.com	Female	218.212.63.68	4936394111685353310	Ukraine	4/11/1985	59413.85	Pharmacist	-$1.00
-1454527390	75	Catherine	Gibson	cgibson22@ebay.com	Female	204.84.35.26	5402007176101895	Indonesia	12/20/1984	92315.94	Desktop Support Technician	
-1454509078	76	Carolyn	Nelson	cnelson23@tiny.cc	Female	64.13.61.211	4844223687165886	Estonia	3/9/1985	179193.6	Social Worker	
-1454479055	77	Denise	Nguyen	dnguyen24@ovh.net	Female	18.208.48.116	201900233821394	China		121013.48		
+1454458004	607	Johnny	Owens	jowensgu@blogspot.com	Male	181.25.18.91	5602239825516409	Indonesia	2/14/1960	169429.76	Health Coach III	
+1454458010	375	Bruce	Gonzales	bgonzalesae@studiopress.com	Male	19.195.169.187		Sweden	7/4/1993	118244.57	Human Resources Manager	"<>?:""{}|_+"
+1454458170	744	Heather	Richardson	hrichardsonkn@twitter.com	Female	129.15.137.135		Ukraine	12/26/1980	164117.18	GIS Technical Architect	
+1454458178	635	Willie	Dixon	wdixonhm@diigo.com	Male	27.245.227.220		Japan	8/29/1992	265321.18	Senior Cost Accountant	
+1454458242	11	Susan	Perkins	sperkinsa@patch.com	Female	180.85.0.62	3573823609854134	Russia		210001.95		
+1454458282	175	Samuel	Edwards	sedwards4u@businessweek.com	Male	60.248.106.175	676249211413011686	Russia	10/15/1986	75886.69	Senior Sales Associate	<img src=x onerror=alert(\'hi\') />
 1454458493	78	Mildred	Torres	mtorres25@alibaba.com	Female	38.102.60.15	6399156779396437	Russia	9/24/1960	166987.55	Paralegal	
-1454507970	79	Linda	Shaw	lshaw26@psu.edu	Female	188.221.197.229	3557917782902346	Russia	9/30/1987	67211.67	Structural Analysis Engineer	
-1454540546	80	Anna	Hudson	ahudson27@gmpg.org	Female	153.84.219.15		Indonesia	9/12/1997	110408.87	VP Marketing	
-1454536800	81	Albert	Pierce	apierce28@phoca.cz	Male	145.148.40.149		Palestinian Territory	11/4/1955	43019.01	Web Developer III	0/0
-1454542995	82	Carol	Franklin	cfranklin29@marketwatch.com	Female	32.189.30.244	67097647572873744	China	6/5/1978	31572.53	Automation Specialist II	
-1454506472	83	Carlos	Washington	cwashington2a@phpbb.com	Male	90.239.40.124	67063904960748578	United States	11/4/1970	28853.61	Developer I	❤️ 💔 💌 💕 💞 💓 💗 💖 💘 💝 💟 💜 💛 💚 💙
-1454463081	84	Kathryn	Austin	kaustin2b@livejournal.com	Female	152.193.181.90		Philippines	10/8/1990	131855.43	Nurse Practicioner	
-1454494358	85	Lillian	Gardner	lgardner2c@hao123.com	Female	189.104.46.70		Russia	10/28/1961	145282.64	Occupational Therapist	
-1454530407	86	Peter	Mendoza	pmendoza2d@paypal.com	Male	77.225.63.206	3562330687037049	Mexico	12/23/1988	40664.88	Staff Scientist	
-1454466533	87	Dennis	Torres	dtorres2e@ask.com	Male	199.131.129.105	50188330277167912	Croatia	5/25/1986	265985	Account Representative II	社會科學院語學研究所
-1454463286	88	Timothy	Watkins	twatkins2f@toplist.cz	Male	120.52.182.111		Tunisia	6/24/2000	242129.05	Operator	
-1454498394	89	Nicole	Willis	nwillis2g@cmu.edu	Female	44.196.120.110	6394724888228638	Indonesia	2/1/1966	258772.36	Physical Therapy Assistant	
-1454525151	90	Jacqueline	Carr	jcarr2h@freewebs.com	Female	197.40.38.49	201939989746686	China	5/31/1961	100733.44	Civil Engineer	(｡◕ ∀ ◕｡)
-1454510656	91	Theresa	Gonzalez	tgonzalez2i@nih.gov	Female	237.106.229.219		Argentina	8/10/1970	47723.61	Product Engineer	
-1454479785	92	Donald	Bradley	dbradley2j@latimes.com	Male	244.82.249.86	3534114122488321	Indonesia	7/8/2000	105051.77	Tax Accountant	
-1454512853	93	Katherine	Little	klittle2k@cyberchimps.com	Female	61.43.154.182	30218284989094	Poland	1/20/1990	155597.16	Associate Professor	
-1454516486	94	Ruth	Cooper	rcooper2l@apache.org	Female	114.82.62.61		Indonesia	7/20/1993	181481.5	Civil Engineer	
-1454498785	95	Stephen	Gutierrez	sgutierrez2m@walmart.com	Male	134.231.189.30	3560204445825528	Guatemala	8/22/1995	83986.79	Structural Engineer	
-1454473160	96	Kevin	Scott	kscott2n@histats.com	Male	226.59.43.229	3558997916332270	United States	6/5/1966	130054.63	Graphic Designer	ÅÍÎÏ˝ÓÔÒÚÆ☃
-1454540928	97	Steven	Williamson	swilliamson2o@devhub.com	Male	122.216.99.88		France		238119.62		
-1454473451	98	Shawn	Adams	sadams2p@imdb.com	Male	148.92.123.202	5893564746795315893	Indonesia	11/10/1959	67749.83	Senior Developer	‫test‫
-1454507278	99	Russell	Fields	rfields2q@google.ca	Male	110.74.199.162		Tanzania	1/2/1994	13268.99	Mechanical Systems Engineer	
-1454514595	100	Willie	Weaver	wweaver2r@google.de	Male	13.54.121.138	3534023246040472	Mexico	8/21/1970	175694.61	Dental Hygienist	̡͓̞ͅI̗̘̦͝n͇͇͙v̮̫ok̲̫̙͈i̖͙̭̹̠̞n̡̻̮̣̺g̲͈͙̭͙̬͎ ̰t͔̦h̞̲e̢̤ ͍̬̲͖f̴̘͕̣è͖ẹ̥̩l͖͔͚i͓͚̦͠n͖͍̗͓̳̮g͍ ̨o͚̪͡f̘̣̬ ̖̘͖̟͙̮c҉͔̫͖͓͇͖ͅh̵̤̣͚͔á̗̼͕ͅo̼̣̥s̱͈̺̖̦̻͢.̛̖̞̠̫̰
+1454458506	40	Jack	Flores	jflores13@yolasite.com	Male	162.215.65.11	3577342788590928	Argentina	1/28/1958	81685.1	Financial Advisor	
+1454458536	749	Larry	Fields	lfieldsks@theguardian.com	Male	46.57.123.222	3531208154739438	Yemen		139177.38		Œ„´‰ˇÁ¨ˆØ∏”’
+1454458564	521	Roy	Palmer	rpalmereg@nsw.gov.au	Male	255.242.77.68	3589146577885209	Nepal	8/28/1964	262816.87	Software Test Engineer IV	
+1454458607	314	James	Harvey	jharvey8p@npr.org	Male	96.88.41.248	3589416270039051	China		211553.57		
+1454458706	995	Jose	Mccoy	jmccoyrm@elpais.com	Male	117.37.215.98	560222933605513180	Norway	7/30/1987	275898.37	Graphic Designer	
+1454458727	835	Sean	Castillo	scastillon6@altervista.org		211.77.61.195		Portugal	6/15/1979	\N	Quality Control Specialist	
+1454458739	821	Juan	Foster	jfosterms@reference.com	Male	219.231.170.245	5108759901583907	Portugal	2/16/1969	120076.81	Quality Engineer	1E02
+1454458751	670	Irene	Hughes	ihughesil@topsy.com	Female	154.194.86.224	3536739760978536	Netherlands	6/17/1973	274295.42	Structural Analysis Engineer	
+1454458801	149	Gregory	Edwards	gedwards44@icq.com	Male	5.204.156.34	3548268624172124	Portugal	2/5/1977	236421.33	Librarian	
+1454458805	683	Joshua	Ramirez	jramireziy@liveinternet.ru	Male	164.224.133.177	3574998106893089	France	10/24/1987	17658.63	Senior Developer	
+1454458862	226	James	Austin	jaustin69@istockphoto.com	Male	228.107.68.143	4913037818454290	Russia		25084.49		
+1454458909	659	Doris	Welch	dwelchia@about.com	Female	195.125.217.107	3537263234825586	Indonesia	3/31/1995	183928.71	Quality Engineer	
+1454458914	479	Joseph	Gordon	jgordonda@trellian.com	Male	140.193.192.82	3533495991170988	Indonesia	6/30/1960	262448.45	Health Coach II	
+1454458932	615	Marie	Matthews	mmatthewsh2@smugmug.com		8.217.73.21	589312447234085155	Indonesia	8/10/1973	\N	Chief Design Engineer	<img src=x onerror=alert(\'hi\') />
+1454458946	379	Martha	Simmons	msimmonsai@tripadvisor.com	Female	8.141.39.185		Russia	9/18/1978	92766.32	Staff Scientist	
+1454458967	730	Anne	Perez	aperezk9@freewebs.com	Female	208.87.2.91		China	8/18/1966	47293.4	Nuclear Power Engineer	❤️ 💔 💌 💕 💞 💓 💗 💖 💘 💝 💟 💜 💛 💚 💙
+1454458979	426	Lois	Green	lgreenbt@1688.com		39.174.95.97	5100146457712544	Bulgaria	2/22/1955	\N	Health Coach III	
+1454459038	810	Mark	Kelley	mkelleymh@blog.com		210.153.220.197	3543227090716355	Poland	5/31/1969	\N	Programmer Analyst I	
+1454459045	475	Richard	Howell	rhowelld6@springer.com	Male	176.182.155.97		Central African Republic		138775.31		‪‪test‪
+1454459058	523	Phillip	Butler	pbutlerei@storify.com	Male	184.124.14.67		China	12/18/1957	106832.85	Paralegal	
+1454459092	437	Virginia	Robinson	vrobinsonc4@opensource.org	Female	148.213.54.195	3567035727522042	China	6/27/1995	24623.44	Senior Sales Associate	
+1454459132	722	Robin	Spencer	rspencerk1@github.com	Female	83.129.98.63	3580163142176138	Poland	1/18/1987	171963.73	Budget/Accounting Analyst I	
+1454459226	291	Julia	Medina	jmedina82@cbc.ca	Female	43.27.110.171	30163835573619	Russia	8/12/1991	109927.88	Software Engineer II	
+1454459288	800	Sarah	Andrews	sandrewsm7@kickstarter.com	Female	238.132.217.166	5018303367167648843	China	4/19/1970	42010.56	Computer Systems Analyst IV	
+1454459290	162	Steve	Spencer	sspencer4h@deliciousdays.com	Male	109.138.4.34		China	6/2/1964	79184.71	Teacher	() { _; } >_[$($())] { touch /tmp/blns.shellshock2.fail; }
+1454459301	322	Frances	Fisher	ffisher8x@businessinsider.com	Female	55.187.133.82	30168292124913	Poland	11/4/1997	140594.79	Geologist IV	社會科學院語學研究所
+1454459320	370	Roger	Gilbert	rgilberta9@businesswire.com	Male	46.96.123.235		Finland	1/20/1999	16506.02	Analog Circuit Design manager	
+1454459328	929	Susan	Jordan	sjordanps@ucla.edu	Female	108.42.4.149	589358467890938815	Philippines	5/31/1995	44739.92	Account Coordinator	
+1454459330	215	Philip	Fox	pfox5y@vimeo.com	Male	65.223.141.140		Israel	9/5/1991	218538.31	Graphic Designer	
+1454459356	265	Judith	Simpson	jsimpson7c@taobao.com		105.52.110.107	6378542962124121	Indonesia	12/12/1983	\N	Project Manager	"""\'""\'""\'\'\'"""
+1454459359	708	Judy	Young	jyoungjn@dailymail.co.uk	Female	21.109.231.236	3554148278137055	Tunisia	1/2/1958	212070.86	Chief Design Engineer	田中さんにあげて下さい
+1454459394	795	Clarence	Edwards	cedwardsm2@ed.gov		111.156.147.232	3533231926493017	Poland	12/23/1981	\N	General Manager	
+1454459439	589	Gerald	Porter	gportergc@pcworld.com	Male	97.189.77.0		Philippines	7/2/1979	278447.61	Professor	
+1454459459	33	Christina	Mason	cmasonw@nydailynews.com	Female	74.214.22.120		Greece	7/21/1986	242593.85	Senior Sales Associate	
+1454459497	524	Brenda	Willis	bwillisej@sun.com	Female	45.122.116.217	6380803357074248	Poland		108844.98		
+1454459499	591	Rose	Garrett	rgarrettge@mit.edu	Female	116.228.6.108	30147178065069	Philippines	10/5/1988	244134.1	Accountant III	
+1454459516	653		Lane		Male	192.59.226.245	3528384158258405	China	12/26/1997	127912.54	Geologist I	
+1454459556	779	Richard	Hunt	rhuntlm@ovh.net	Male	162.73.16.141	5203349476569897	China	6/24/1969	13375.17	Environmental Tech	
+1454459562	681	Betty	Hamilton	bhamiltoniw@facebook.com	Female	193.209.0.183		Morocco	5/5/1965	210804.85	Human Resources Assistant II	
+1454459577	173	Amy	Garza	agarza4s@woothemes.com	Female	75.187.251.37		China		82283.83		
+1454459605	57	Willie	Palmer	wpalmer1k@t-online.de	Male	164.107.46.161	4026614769857244	China	8/23/1986	184978.64	Environmental Specialist	
+1454459605	888	Marie	Torres	mtorreson@tamu.edu	Female	190.148.84.34	5610170119678060511	Bosnia and Herzegovina		261087.2		
+1454459709	293	Amy	Cook	acook84@prlog.org		186.92.46.224		Ukraine	7/23/1976	\N	Human Resources Assistant III	
+1454459719	920	Johnny	Brown	jbrownpj@constantcontact.com	Male	25.161.139.20		Sweden	4/17/1998	149870.24	Speech Pathologist	
+1454459729	137	Phillip	Vasquez	pvasquez3s@canalblog.com	Male	195.121.180.8	5602221706127365	Ethiopia	7/28/1992	274927.74	Internal Auditor	
+1454459747	876	Samuel	Hughes	shughesob@dion.ne.jp	Male	29.127.239.106	3535476909940686	Indonesia		220585.61		Œ„´‰ˇÁ¨ˆØ∏”’
+1454459781	4	Denise	Riley	driley3@gmpg.org	Female	140.35.109.83	3576031598965625	China	4/8/1997	90263.05	Senior Cost Accountant	
+1454459806	195	Joe	Hayes	jhayes5e@opensource.org	Male	96.48.27.170	343842871636339	Indonesia		239690.34		
+1454459806	525	Elizabeth	Porter	eporterek@china.com.cn	Female	249.248.212.114		Indonesia	7/7/1993	33270.67	Recruiter	
+1454459905	958	Louis	Griffin	lgriffinql@umn.edu		184.242.195.194	3571277617780793	China	10/31/1988	\N	Assistant Media Planner	
+1454459969	655	Johnny	Reed	jreedi6@chicagotribune.com	Male	169.161.103.111	4844445630272291	Russia	5/23/1979	68913.72	Quality Engineer	
+1454459981	614	Marie	Ramirez	mramirezh1@wikia.com	Female	143.213.146.199	633390820329851783	China	7/17/1988	131783.55	Dental Hygienist	
+1454460012	200	Russell	Ward	rward5j@surveymonkey.com	Male	73.156.128.8		Sweden		173849.81		
+1454460033	454	Ashley	Crawford	acrawfordcl@weather.com	Female	61.81.102.117	3563365997409370	Vietnam		264109.73		
+1454460230	685	Joan	Jackson	jjacksonj0@paypal.com	Female	153.5.15.100		Yemen	8/16/1992	54385.21	Structural Analysis Engineer	
+1454460236	222	Sara	Price	sprice65@usatoday.com	Female	46.58.242.198		Canada	2/11/1959	49611.44	Sales Representative	
+1454460241	16	Bruce	Willis	bwillisf@bluehost.com	Male	239.182.219.189	3573030625927601	Brazil		239100.65		
+1454460496	906	Amanda	Clark	aclarkp5@facebook.com	Female	190.75.162.144	56022268731524616	Norway	7/19/1982	39551.7	General Manager	
+1454460516	68	Rachel	Price	rprice1v@census.gov	Female	89.52.192.105		Indonesia	5/6/1982	234502.16	Payment Adjustment Coordinator	
+1454460605	879	Diane	Flores	dfloresoe@wiley.com	Female	88.102.252.118	201739112087937	Philippines	12/2/1969	250449.32	Sales Associate	
+1454460715	676	Michael	Jackson	mjacksonir@scribd.com	Male	130.159.201.48	201788384710734	China	7/8/1957	170234.61	Database Administrator III	
+1454460728	550	Cheryl	Evans	cevansf9@yolasite.com	Female	244.155.129.93		Japan	7/24/1955	12380.49	Budget/Accounting Analyst II	
+1454460813	761	Kathleen	Cook	kcookl4@geocities.jp	Female	154.7.81.231		Bulgaria	5/12/1996	107594.9	Analyst Programmer	
+1454460817	599	Sean	Garcia	sgarciagm@blogger.com	Male	94.211.15.55	3557998741604165	Serbia	8/24/1963	131270.12	Structural Engineer	0/0
+1454460934	939	Keith	Hernandez	khernandezq2@amazon.com	Male	153.51.249.140	3550284883492520	Belarus	10/12/1977	56167.67	Environmental Tech	
+1454460945	763	Amanda	Miller	amillerl6@dagondesign.com	Female	15.140.92.92		Philippines	11/24/1979	118824.39	Structural Engineer	
+1454460961	664	Kathleen	Torres	ktorresif@vistaprint.com	Female	11.165.183.246		Nicaragua	4/6/1960	257366	Environmental Specialist	
+1454461065	17	Emily	Andrews	eandrewsg@cornell.edu	Female	29.231.180.172	30271790537626	Russia	4/13/1990	116800.65	Food Chemist	
+1454461083	569	Heather	Johnson	hjohnsonfs@skype.com	Female	3.121.91.120	3552946432961233	Argentina	11/24/1966	197315	Cost Accountant	
+1454461104	768	Gregory	James	gjameslb@businessweek.com	Male	80.18.249.93	30041579214659	Sweden		78310.93		
+1454461128	584	Lois	Ross	lrossg7@irs.gov	Female	176.213.236.60		Brazil	6/23/1989	95013.72	Database Administrator IV	999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999
+1454461201	856	Mildred	Harper	mharpernr@samsung.com	Female	153.214.193.120	6763961170182948344	Finland		37573.27		
+1454461259	383	Beverly	Carter	bcarteram@wordpress.com	Female	4.251.6.51	3535631087457545	Indonesia	11/15/1982	272520.3	Compensation Analyst	
+1454461332	909	Samuel	Henry	shenryp8@163.com	Male	204.10.183.241	6771639706876926	Philippines	4/3/1998	164954.8	Compensation Analyst	
+1454461498	678	Wanda	Ford	wfordit@sitemeter.com	Female	63.28.195.79		Poland		28276.84		
+1454461562	801	Annie	Bradley	abradleym8@jimdo.com	Female	166.216.149.179		Poland	2/17/1970	267475.37	Quality Control Specialist	
+1454461671	643	Thomas	Hunter	thunterhu@pinterest.com	Male	91.145.126.98	3574840401671309	China	3/3/1962	201611.79	Programmer II	
+1454461690	428	Dennis	Marshall	dmarshallbv@bloglines.com		51.104.218.177	3544646067494556	Pakistan		\N		
+1454461771	3	Evelyn	Morgan	emorgan2@altervista.org	Female	7.161.136.94	6767119071901597	Russia	2/1/1960	144972.51	Structural Engineer	
+1454461843	832	Anthony	Duncan	aduncann3@merriam-webster.com	Male	54.202.218.90	3561384853362062	China	10/5/1982	239812.39	Human Resources Manager	
+1454461880	648	Eric	Bryant	ebryanthz@tripod.com	Male	11.228.180.159		Sweden	3/21/1981	46534.77	Budget/Accounting Analyst I	١٢٣
+1454462013	943	Arthur	Nelson	anelsonq6@sun.com	Male	201.79.146.145	5602257963938888	Ukraine		185554.08		
+1454462053	994	Carol	Williams	cwilliamsrl@army.mil	Female	53.242.60.20		France	1/5/1988	120933.54	Recruiter	
 === Try load data from userdata2.parquet
-1454506599	1	Donald	Lewis	dlewis0@clickbank.net	Male	102.22.124.20		Indonesia	7/9/1972	140249.37	Senior Financial Analyst	
-1454458948	2	Walter	Collins	wcollins1@bloglovin.com	Male	247.28.26.93	3587726269478025	China		\N		
-1454524144	3	Michelle	Henderson	mhenderson2@geocities.jp	Female	193.68.146.150		France	1/15/1964	236219.26	Teacher	
-1454506939	4	Lori	Hudson	lhudson3@dion.ne.jp		34.252.168.48	3568840151595649	Russia	4/22/1988	\N	Nuclear Power Engineer	
-1454458529	5	Howard	Miller	hmiller4@fema.gov	Male	103.193.150.230	3583473261055014	France	11/26/1998	50210.02	Senior Editor	
-1454496547	6	Frances	Adams	fadams5@123-reg.co.uk	Female	106.196.106.93		Russia	3/27/1997	82175.77	Account Coordinator	
-1454528652	\N	Steven	Hanson	shanson6@cisco.com	Male	234.130.172.185	3550842607768119	Indonesia		129582.61		
-1454487094	8	Louis	Simmons	lsimmons7@icio.us	Male	18.69.80.15		China	6/1/1992	90744.86	Product Engineer	
-1454543811	9	Keith	Parker	kparker8@amazonaws.com	Male	108.205.40.64		Guadeloupe	12/30/1992	60618.9	Developer II	
-1454485649	10	Wanda	Walker	wwalker9@latimes.com	Female	246.214.98.78	3539421569669478	Portugal		137664.53		
-1454517563	11	Kathryn	Weaver	kweavera@bizjournals.com	Female	157.237.161.75	201425019338900	Sweden		117572.65		
-1454482256	12	Philip	Ward	pwardb@sakura.ne.jp	Male	77.140.225.69	201508031789224	Greece	9/3/1984	238925.79	Human Resources Manager	
-1454542618	13	Evelyn	Harvey	eharveyc@time.com		254.174.154.7	3539535868968594	China	5/15/1979	\N	Software Engineer III	
-1454484804	14	Andrea	Lane	alaned@gov.uk	Female	192.253.116.192	5100174455306952	Indonesia	1/19/1989	166778.42	Operator	
-1454507104	15	Bobby	Vasquez	bvasqueze@furl.net	Male	126.60.18.195	3581051861650673	Philippines	1/25/1975	138184.83	Senior Editor	
-1454536690	16	Kenneth	Gibson	kgibsonf@soundcloud.com	Male	91.153.142.170	5389947292571488	Peru	11/3/1975	98614.53	Environmental Tech	
-1454516554	17	Emily	Hill	ehillg@house.gov	Female	109.107.174.205		Palestinian Territory	5/18/1956	218781.48	Executive Secretary	
-1454541649	18	Kelly	Fowler	kfowlerh@dell.com	Female	147.58.88.116	3551741291105936	Greece	6/11/1975	117249.56	Systems Administrator III	
-1454524126	19	Diana	Howell	dhowelli@sphinn.com	Female	21.240.75.42	4026635872860296	Iran	7/7/1993	174844.52	Teacher	
-1454466206	20	Johnny	Collins	jcollinsj@google.ca	Male	38.173.129.250	372301677387203	Afghanistan	7/28/1987	155908.69	Social Worker	
-1454493912	21	Frank	Bradley	fbradleyk@shinystat.com	Male	186.9.38.46	4913033819988246	Czech Republic		211051.83		
-1454509391	22	Billy	Thomas	bthomasl@moonfruit.com	Male	143.89.197.162	4026052248187794	Czech Republic	10/7/1991	282061.72	Professor	👾 🙇 💁 🙅 🙆 🙋 🙎 🙍 
-1454523133	23	Philip	Moreno	pmorenom@rambler.ru	Male	9.39.210.239	4041597502244971	United States		122560.95		
-1454536839	24	Billy	Ray	brayn@meetup.com	Male	230.255.220.160	201925598515489	Kazakhstan	2/9/1966	130424.35	VP Accounting	사회과학원 어학연구소
-1454509252	25	Ryan	Wilson	rwilsono@forbes.com	Male	197.77.142.137		Poland	7/4/1961	280703.91	Software Test Engineer III	
-1454458024	26	Sandra	Coleman	scolemanp@blogger.com	Female	230.159.39.252	3555708337891155	China	8/7/1971	113688.11	VP Sales	
-1454513250	27	Evelyn	Moreno	emorenoq@chronoengine.com	Female	126.96.111.52	3557508895347766	United States	8/17/1990	167131.57	Recruiting Manager	
-1454509036	28	Elizabeth	Warren	ewarrenr@flavors.me	Female	213.8.204.211	67099385430526802	China	6/14/1996	119515.12	Media Manager II	
-1454541241	29	Linda	Hawkins	lhawkinss@fotki.com	Female	206.6.3.196	4913079795915711	Philippines	2/14/1961	107779.93	Technical Writer	
-1454493935	30	Janice	Day	jdayt@devhub.com	Female	243.24.120.209		Ukraine	6/9/1972	53906.4	Marketing Manager	
-1454483872	31	Diane	Perez	dperezu@ihg.com	Female	182.136.218.77		Belarus	2/9/1957	170326.91	Chief Design Engineer	
-1454529216	32	Bruce	Robinson	brobinsonv@redcross.org	Male	5.126.135.106	201769377515751	Philippines		169520.45		
-1454470160	33	Daniel	Lawrence	dlawrencew@usgs.gov	Male	200.168.191.214	4911581295367856744	United States	5/7/1967	199535.76	VP Sales	
-1454474809	34	Theresa	James	tjamesx@quantcast.com	Female	83.122.166.224	3545570545148759	Russia		104683.19		
-1454536922	35	Scott	Russell	srusselly@printfriendly.com	Male	92.233.3.208		Bolivia		205730.41		
-1454514354	36	Ruby	Vasquez	rvasquezz@toplist.cz	Female	8.148.83.49		France	11/5/1999	95407.16	Financial Advisor	
-1454524074	37	Jeffrey	Hall	jhall10@pagesperso-orange.fr	Male	91.103.226.35	3531476231658075	Indonesia	5/29/1987	247716.37	Business Systems Development Analyst	
-1454477697	38	Debra	Kennedy	dkennedy11@state.tx.us	Female	116.247.236.130	676732277565853203	Mexico	5/22/1955	272563.67	Desktop Support Technician	
-1454464041	39		Cole		Male	157.157.28.86	4911512925983388490	Panama		91174.63		
-1454521471	40	Helen	Sanchez	hsanchez13@oakley.com	Female	222.122.74.77		Venezuela	2/11/1969	189240.59	Food Chemist	
-1454527305	41	Jennifer	Russell	jrussell14@cpanel.net	Female	42.82.215.191		Morocco		80644.64		1E02
-1454479360	42	Fred	Marshall	fmarshall15@ifeng.com		160.92.143.233	6374102245574313	China	12/18/1984	\N	Structural Engineer	
-1454464402	43	Terry	Ford	tford16@shop-pro.jp	Male	169.34.131.192	3588107849306045	Turkmenistan		286388.01		
-1454468866	44	Maria	Mason	mmason17@miibeian.gov.cn	Female	213.62.60.224	060438374765421941	Sweden	7/6/1973	34664.91	Social Worker	
-1454486568	45	Sharon	Schmidt	sschmidt18@istockphoto.com	Female	111.247.11.124	5100179876769597	Argentina	10/4/1982	150142.49	Mechanical Systems Engineer	
-1454483332	46	Gregory	Jones	gjones19@jimdo.com	Male	132.88.44.128	30372001476487	China	12/31/1972	240265.01	Design Engineer	
-1454520829	47	Raymond	Moore	rmoore1a@arizona.edu		89.39.221.170	5602248693774107	Japan	4/24/1956	\N	VP Sales	
-1454531788	48	Tammy	Scott	tscott1b@mlb.com	Female	236.12.148.59	3577211980737555	Peru	10/14/1959	132064.01	Software Consultant	
-1454480004	49	Willie	Alexander	walexander1c@home.pl	Male	2.199.150.177		Brazil	10/14/1958	26424.57	Executive Secretary	｀ｨ(´∀｀∩
-1454473891	50	William	Garrett	wgarrett1d@java.com	Male	20.24.142.67		Croatia	10/9/1963	181424.2	Database Administrator III	
-1454463118	51	Patricia	Peterson	ppeterson1e@cpanel.net	Female	77.242.54.160	3585161324543005	Peru	3/5/1987	176561.19	Media Manager III	
-1454488118	52	Andrew	Cook	acook1f@ftc.gov	Male	220.139.174.228	6333320102003586	Bolivia	3/8/1969	185775.61	Computer Systems Analyst III	
-1454536072	53	Carol	Nichols	cnichols1g@statcounter.com	Female	233.176.31.182	3543580855019963	Nigeria	1/6/1960	105346.38	Compensation Analyst	
-1454489053	54	Jimmy	Morales	jmorales1h@archive.org	Male	199.160.215.73	3587538933267985	Kiribati	8/25/1961	146625.62	Assistant Media Planner	
-1454538033	55	Nancy	Montgomery	nmontgomery1i@freewebs.com	Female	11.235.20.56	3586137339728301	China		128631.29		$1.00
-1454461902	56	Thomas	Freeman	tfreeman1j@java.com	Male	161.123.216.250	3536920916224146	Colombia	8/4/1973	239571.27	Senior Developer	
-1454488504	57	Virginia	Bell	vbell1k@aboutads.info	Female	79.142.13.145	3585595583423005	Malaysia	4/2/1998	252007.47	Actuary	
-1454496671	58	Tammy	Adams	tadams1l@virginia.edu	Female	106.207.61.165	3528072249217643	Canada	1/26/1973	98463.77	Business Systems Development Analyst	
-1454516066	59	Cynthia	Robertson	crobertson1m@alibaba.com	Female	106.110.239.97		Belarus	12/20/1962	90950.39	Help Desk Technician	
-1454523801	60	Steven	Romero	sromero1n@usa.gov	Male	65.249.97.254	5007669084530801	Argentina	9/27/1963	14358.32	Quality Control Specialist	
-1454458452	61	Sean	Greene	sgreene1o@goo.gl	Male	71.195.178.59	5602246313163081	China	2/20/1991	70656.63	Sales Representative	
-1454537851	62	Jerry	Turner	jturner1p@scribd.com	Male	69.148.19.138	3561778321182616	New Zealand	5/25/1991	89186	Information Systems Manager	
-1454523562	63	Jennifer	Mendoza	jmendoza1q@shutterfly.com	Female	54.114.8.9	3544098267391200	Russia	7/8/1973	263720.16	General Manager	
-1454477002	64	Roy	Hughes	rhughes1r@stanford.edu	Male	209.120.70.78	3552886646968253	Canada	10/30/1968	191750.33	Mechanical Systems Engineer	
-1454477109	65	Susan	Jenkins	sjenkins1s@princeton.edu	Female	247.155.65.12		Philippines	3/1/1967	86339.04	VP Sales	
-1454527329	66	Norma	Dunn	ndunn1t@pen.io	Female	250.241.78.109		China	7/20/1967	77739.6	Web Designer I	
-1454461701	67	Tina	Reid	treid1u@163.com	Female	116.38.145.226		Germany	4/25/1967	228301.51	Financial Analyst	
-1454478121	68	Cynthia	Daniels	cdaniels1v@pinterest.com	Female	17.140.57.238	3589952234971047	Burundi	1/9/1956	42221.96	Research Nurse	
-1454462100	69		Wells		Male	92.13.7.20		Philippines	7/4/1969	78486.77	Tax Accountant	
-1454516337	70	Stephen	Butler	sbutler1x@moonfruit.com	Male	230.147.124.190		Argentina		125060.01		
-1454459366	71	Jacqueline	Wallace	jwallace1y@dagondesign.com	Female	203.83.140.84	3578315582149538	Turkmenistan	4/15/1997	89436.49	Cost Accountant	
-1454479818	72	Carol	Dunn	cdunn1z@ocn.ne.jp	Female	241.2.84.72	5602252003430282308	Bulgaria	2/1/1981	203473.36	Geological Engineer	
-1454505977	73	Russell	Williams	rwilliams20@imgur.com	Male	21.217.68.126	3566925409646658	Slovenia	1/30/1977	252402.64	Librarian	
-1454476392	74	Kathryn	Torres	ktorres21@rakuten.co.jp	Female	4.124.222.88	4026779356659103	Portugal	7/31/1956	121285.58	Project Manager	
-1454463675	75	Larry	Mason	lmason22@alibaba.com	Male	172.104.78.232	3587717468815331	Sweden	4/20/1969	248583.77	Professor	
-1454517479	76	Rachel	Dunn	rdunn23@hugedomains.com	Female	101.213.94.161	6374938227969686	Peru	6/18/1999	79245.45	Chief Design Engineer	
+1454457626	638	Richard	Perkins	rperkinshp@princeton.edu	Male	206.117.180.117		China	4/11/2000	123221.64	Tax Accountant	
 1454457675	77	Doris	Elliott	delliott24@shinystat.com	Female	36.27.140.126		Portugal	9/23/1987	98288.74	Design Engineer	
-1454483215	78	William	Mendoza	wmendoza25@prlog.org	Male	71.28.136.31	3580069171786970	China	3/20/1967	81965.94	Media Manager II	"ثم نفس سقطت وبالتحديد،
-1454504790	79	Elizabeth	Payne	epayne26@about.me	Female	40.237.87.45	337941052859146	Estonia		49661.99		
-1454481311	80	Dennis	Robertson	drobertson27@w3.org	Male	189.45.163.164		Italy	5/2/1972	19984.47	Web Developer III	
-1454514914	81	Edward	Little	elittle28@mozilla.org	Male	114.189.184.212		South Korea	11/19/1984	141645.22	Senior Sales Associate	../../../../../../../../../../../etc/passwd%00
-1454530264	82	Roy	Tucker	rtucker29@vistaprint.com	Male	254.148.189.172		Portugal		285617.13		
-1454510066	83	Matthew	Gardner	mgardner2a@wix.com	Male	91.23.27.42	5602247355547230028	Brazil	1/18/1977	267617.18	Actuary	
-1454535958	84	Anthony	Palmer	apalmer2b@uol.com.br		25.228.124.126	3561410660537354	China	7/4/1974	\N	Human Resources Assistant III	
-1454460668	85	John	Hudson	jhudson2c@rediff.com	Male	75.191.191.171	3538638405093479	Georgia	6/22/1994	82621.71	Tax Accountant	
-1454479399	86	Jonathan	Mills	jmills2d@mail.ru	Male	224.145.163.163	36504499928546	Philippines		77260.7		00˙Ɩ$-
-1454491670	87	Christine	Jackson	cjackson2e@feedburner.com	Female	8.207.125.219		Philippines	6/12/1964	32832.61	Occupational Therapist	
-1454475253	88	Eric	Fernandez	efernandez2f@artisteer.com	Male	246.217.21.160		France		124825.77		
-1454483421	89	Heather	Diaz	hdiaz2g@tmall.com	Female	220.248.165.145	502080553226612964	China	7/26/1966	280714.33	Food Chemist	
-1454515874	90	Nicole	Reid	nreid2h@cisco.com	Female	10.75.131.59	5610704755842409780	Philippines	12/15/1985	24922.19	Marketing Assistant	
-1454542340	91	Donald	Murphy	dmurphy2i@fema.gov	Male	127.141.234.199		China	4/10/1977	76449.81	Cost Accountant	
-1454531823	92	Steven	Wagner	swagner2j@go.com	Male	211.154.182.230		United Kingdom		249411.22		
-1454539859	93	Ruth	Alvarez	ralvarez2k@sciencedaily.com		240.195.230.204		South Korea	7/11/1964	\N	Senior Developer	
-1454462055	94	Carl	Oliver	coliver2l@cafepress.com	Male	199.184.71.24		China	6/26/1967	215279.38	Operator	(╯°□°）╯︵ ┻━┻)  
+1454457741	472	Sara	Collins	scollinsd3@yellowbook.com	Female	238.228.239.222	5002357683259593	Philippines	1/6/1966	220244.65	Internal Auditor	-1E02
+1454457764	681	Samuel	Foster	sfosteriw@github.io	Male	101.228.90.125	676725448783712104	Brazil	6/27/1982	275514.12	Office Assistant II	
+1454457800	216	Robin	Reed	rreed5z@guardian.co.uk		191.104.133.70		Portugal	3/15/1978	\N	Desktop Support Technician	test⁠test‫
+1454457912	321	Joe	Collins	jcollins8w@google.com.hk	Male	135.236.105.189	3573647966682865	Dominican Republic		106582.46		
+1454457928	837	Jonathan	Romero	jromeron8@hp.com	Male	129.49.88.101	30180713638645	Brazil	2/27/1957	238966.77	Speech Pathologist	
 1454457982	95	Teresa	Ruiz	truiz2m@diigo.com	Female	22.118.240.24	337941028849437	Brazil	7/25/1994	243603.67	Cost Accountant	
-1454465475	96	Kathryn	Carter	kcarter2n@fastcompany.com	Female	203.255.226.40		Greece	1/23/1969	34951.57	Registered Nurse	
-1454542755	97	Fred	Perry	fperry2o@imgur.com		46.52.134.142	3544236333368634	Indonesia	2/6/1966	\N	Programmer Analyst III	
-1454477885	98	Harry	Perkins	hperkins2p@domainmarket.com	Male	235.202.132.85	374288817366643	Russia	1/9/1962	167340.53	Physical Therapy Assistant	
-1454509699	99	Bobby	Hicks	bhicks2q@wix.com	Male	253.252.57.121	3555445397654443	United States	8/10/1964	238304.33	Quality Control Specialist	Z̮̞̠͙͔ͅḀ̗̞͈̻̗Ḷ͙͎̯̹̞͓G̻O̭̗̮
-1454515572	100	Tammy	Dunn	tdunn2r@list-manage.com	Female	162.156.75.67		Brazil	4/24/1980	163106.38	Sales Representative	
+1454458012	218	Samuel	Reed	sreed61@sohu.com	Male	131.124.128.124	3540638382406385	Brazil		257041.54		
+1454458014	128	Harold	Jenkins	hjenkins3j@hostgator.com		204.144.188.106	374283629923426	Dominican Republic		\N		
+1454458024	26	Sandra	Coleman	scolemanp@blogger.com	Female	230.159.39.252	3555708337891155	China	8/7/1971	113688.11	VP Sales	
+1454458038	609	Joyce	Palmer	jpalmergw@mashable.com	Female	164.56.14.55	6371540406366768	China		201121.46		
+1454458083	879	Kevin	Meyer	kmeyeroe@squarespace.com	Male	233.187.65.16		France		98010.89		
+1454458190	705	Beverly	Gonzales	bgonzalesjk@wufoo.com	Female	38.31.68.95	4405331360959318	Philippines	9/21/1957	42738.65	Director of Sales	
+1454458307	237	Richard	Grant	rgrant6k@etsy.com	Male	241.252.232.2	6304639002149768801	Poland	2/23/1991	71635.33	Paralegal	
+1454458377	986	Melissa	George	mgeorgerd@apple.com	Female	143.50.124.180	5602226915795555	Czech Republic	12/6/1962	63403.41	Internal Auditor	
+1454458390	181	Scott	Marshall	smarshall50@geocities.jp	Male	137.234.29.113	3571996025746621	Philippines	4/23/1978	206952.7	Staff Scientist	␣
+1454458452	61	Sean	Greene	sgreene1o@goo.gl	Male	71.195.178.59	5602246313163081	China	2/20/1991	70656.63	Sales Representative	
+1454458464	327	Janice	Matthews	jmatthews92@guardian.co.uk	Female	71.195.173.202	6304527633260205	Russia	7/29/2000	157292.61	Physical Therapy Assistant	
+1454458470	657	Kathy	Boyd	kboydi8@skyrock.com		36.183.199.94	6389206450992194	China	4/24/1982	\N	General Manager	🚾 🆒 🆓 🆕 🆖 🆗 🆙 🏧
+1454458494	390	Chris	Mason	cmasonat@purevolume.com	Male	21.36.118.254		China	4/28/1983	168120.17	Sales Representative	
+1454458497	365	Albert	Mills	amillsa4@t.co	Male	181.108.162.242		China	8/25/1962	180913.71	Recruiter	
+1454458508	999	Marie	Medina	mmedinarq@thetimes.co.uk	Female	223.83.175.211		Kazakhstan	3/25/1969	53564.76	Speech Pathologist	
+1454458512	185	Brandon	Williamson	bwilliamson54@vimeo.com	Male	4.249.36.104	4913822210519505	Russia		277603.75		
+1454458529	5	Howard	Miller	hmiller4@fema.gov	Male	103.193.150.230	3583473261055014	France	11/26/1998	50210.02	Senior Editor	
+1454458591	978	Jean	Jacobs	jjacobsr5@springer.com	Female	143.77.255.89	6377468383747335	Guatemala	11/13/1977	218108.02	Accounting Assistant III	
+1454458647	788	Dennis	Price	dpricelv@google.co.jp	Male	50.213.201.120	3588056573581168	Albania	10/29/1962	218338.58	Environmental Specialist	
+1454458655	450	Rose	Mccoy	rmccoych@livejournal.com	Female	91.93.75.71		Dominican Republic	1/2/1972	192818.85	Executive Secretary	\N
+1454458657	213	Norma	Garrett	ngarrett5w@technorati.com	Female	65.49.237.93		Albania		80916.71		
+1454458725	110	Theresa	Gardner	tgardner31@photobucket.com	Female	232.118.202.192		Ukraine	1/6/1982	243844.4	Health Coach II	
+1454458764	346	Thomas	Richards	trichards9l@ifeng.com	Male	0.111.159.70	5610777337517834253	Thailand	2/19/1981	221644.31	Analog Circuit Design manager	
+1454458768	430	Linda	Harvey	lharveybx@google.ca	Female	138.19.27.11		Indonesia	8/19/1961	200606	Teacher	-1/2
+1454458782	287	Martin	Ferguson	mferguson7y@eventbrite.com	Male	67.188.95.86		Portugal	7/2/1981	262746.89	Cost Accountant	
+1454458853	926	Joan	Graham	jgrahampp@icio.us	Female	209.238.1.225	3557860962551501	China	3/1/1972	197284.8	Chief Design Engineer	‪‪test‪
+1454458888	533	Sarah	Jordan	sjordanes@europa.eu	Female	120.197.115.153	5002357582121340	Indonesia	9/10/1963	146649.24	Programmer Analyst IV	
+1454458948	2	Walter	Collins	wcollins1@bloglovin.com	Male	247.28.26.93	3587726269478025	China		\N		
+1454459077	720	Theresa	Hayes	thayesjz@dion.ne.jp	Female	43.78.228.159		Russia		231701.16		
+1454459120	214	Margaret	Hughes	mhughes5x@biglobe.ne.jp	Female	36.234.5.134	3546342491809456	Azerbaijan		127862.72		˙ɐnbᴉlɐ ɐuƃɐɯ ǝɹolop ʇǝ ǝɹoqɐl ʇn ʇunpᴉpᴉɔuᴉ ɹodɯǝʇ poɯsnᴉǝ op pǝs \'ʇᴉlǝ ƃuᴉɔsᴉdᴉpɐ ɹnʇǝʇɔǝsuoɔ \'ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥
+1454459148	737	Joseph	Gray	jgraykg@bbb.org	Male	60.23.118.26	3540391233313117	United States		159699.28		
+1454459184	419	Larry	Black	lblackbm@github.com	Male	61.181.102.70	5108758999951786	Canada	4/12/1997	263463.01	Staff Accountant I	
+1454459341	559	Raymond	Gray	rgrayfi@mapy.cz	Male	104.112.4.152	201619406564124	Brazil	4/29/1955	132421.37	VP Quality Control	和製漢語
+1454459366	71	Jacqueline	Wallace	jwallace1y@dagondesign.com	Female	203.83.140.84	3578315582149538	Turkmenistan	4/15/1997	89436.49	Cost Accountant	
+1454459447	315	Earl	Rivera	erivera8q@weebly.com	Male	249.22.156.255	6333306262684398	Macedonia		33051.81		""""
+1454459464	298	Johnny	Kelly	jkelly89@dailymail.co.uk	Male	56.120.150.167	4614973744018	Malaysia	10/20/1965	254369.91	Automation Specialist III	
+1454459540	329	Mary	Diaz	mdiaz94@macromedia.com	Female	60.49.220.52	5108751463671162	Mongolia	9/12/1997	112279.71	Project Manager	
+1454459624	842	Brenda	Jones	bjonesnd@mysql.com	Female	200.142.153.124		Colombia	10/1/1963	250051.84	Safety Technician III	
+1454459634	775	Lillian	Ryan	lryanli@t-online.de	Female	152.216.220.164	3541599165648107	Iran	8/19/1967	138178.35	VP Marketing	
+1454459634	998	Stephanie	Sims	ssimsrp@newyorker.com	Female	135.66.68.181	3548125808139842	Poland		112275.78		
+1454459658	659	Julie	Anderson	jandersonia@shareasale.com	Female	21.61.224.82	343450744553044	Netherlands	12/27/1976	68225.51	Compensation Analyst	
+1454459679	634	Harry	Olson	holsonhl@skyrock.com	Male	57.82.212.119	5002351465267817	Chile	4/3/1956	173608.69	Assistant Professor	
+1454459732	892		Thompson		Female	9.228.212.189		Czech Republic	10/3/1964	184732.94	Budget/Accounting Analyst IV	
+1454459817	495	Steve	Ramos	sramosdq@go.com	Male	209.215.139.231	5602239349519376	France		194636.12		
+1454459838	271	Nicole	Wright	nwright7i@businessinsider.com	Female	213.168.29.131	3551761943539373	Chile	2/22/1967	34243.03	Budget/Accounting Analyst III	
+1454459839	424	Kimberly	Coleman	kcolemanbr@bizjournals.com	Female	83.237.12.153	5641829981259605	Iran		280387.11		
+1454459870	701	Bobby	Chavez	bchavezjg@tinypic.com	Male	71.18.120.35	3575292555485293	China	5/20/1965	13910.56	Product Engineer	åß∂ƒ©˙∆˚¬…æ
+1454459921	954	Willie	Thomas	wthomasqh@earthlink.net	Male	173.219.113.26	3560763628353111	Mexico	5/31/1990	201325.44	Programmer Analyst I	
+1454459944	694	Theresa	Graham	tgrahamj9@amazon.com	Female	176.19.106.64	3539554098566813	China	4/8/1983	155735.87	Administrative Assistant III	
+1454459984	693	Jonathan	Graham	jgrahamj8@berkeley.edu	Male	239.139.123.46	3581752291204508	Sweden	9/12/1961	16159.02	Statistician III	
+1454460022	127	Anna	Moreno	amoreno3i@cafepress.com	Female	2.85.251.176	5610875550247635	Guatemala	12/30/1983	156757.41	Research Nurse	
+1454460158	232	Susan	Burns	sburns6f@cbsnews.com	Female	2.93.31.196	5602245359290816	China	10/25/1992	58832.39	Research Assistant IV	
+1454460185	711	Alice	Robertson	arobertsonjq@sakura.ne.jp	Female	182.147.6.194		Thailand	8/9/1955	54046.02	Legal Assistant	
+1454460227	661	Phyllis	Brown	pbrownic@macromedia.com	Female	115.89.196.124		Brazil	7/31/1990	245014.11	Librarian	
+1454460293	146	Christina	Gibson	cgibson41@over-blog.com	Female	226.138.197.167		China	3/14/1987	201589	Accountant II	
+1454460311	259	Donna	Marshall	dmarshall76@jimdo.com	Female	249.36.126.149	6709877241918640	Indonesia	4/15/1986	281443.65	Structural Engineer	１２３
+1454460317	899	Harold	Robinson	hrobinsonoy@privacy.gov.au	Male	94.237.36.16	5602247816220394	Philippines	10/3/1955	181832.97	Civil Engineer	0/0
+1454460450	391		Stone		Female	205.229.198.173		Portugal	10/13/1968	173807.29	Web Developer I	
+1454460563	814	Kelly	Riley	krileyml@4shared.com		166.51.39.101	3529610026130015	China	6/24/1987	\N	Data Coordiator	
+1454460586	350	Ruth	Green	rgreen9p@vk.com	Female	170.37.204.80	3567581372052553	Poland	10/30/1990	76094.37	Community Outreach Specialist	
+1454460599	284	Joyce	Bryant	jbryant7v@stumbleupon.com	Female	125.142.215.135	3551722227261571	Czech Republic		26866.76		""""
+1454460658	129	Paula	Oliver	poliver3k@barnesandnoble.com	Female	108.49.104.111	3551237510305944	China		149572.54		
+1454460668	85	John	Hudson	jhudson2c@rediff.com	Male	75.191.191.171	3538638405093479	Georgia	6/22/1994	82621.71	Tax Accountant	
+1454460753	578	Clarence	Gonzales	cgonzalesg1@fc2.com		13.29.242.81	30237628216824	Norway		\N		
+1454460790	754	Rose	Brooks	rbrookskx@chron.com	Female	99.103.60.118	201422963957371	China	4/8/1994	201004.89	Legal Assistant	1/2
+1454460792	118	Charles	Gonzalez	cgonzalez39@google.com.au	Male	52.126.168.127		Nigeria	8/26/1958	108318.24	Internal Auditor	
+1454460806	479	Henry	Scott	hscottda@cornell.edu	Male	53.161.182.142	5602240199354518	Indonesia	6/22/1992	32141.19	Assistant Professor	
+1454460806	791	Anthony	Butler	abutlerly@springer.com	Male	84.141.89.156		Czech Republic	8/21/1969	282078.29	Health Coach IV	
+1454460833	1000	Alice	Peterson	apetersonrr@parallels.com	Female	244.89.94.58	5602227843485236	Nigeria		239858.7		
+1454460836	246	Billy	Spencer	bspencer6t@mozilla.com	Male	1.121.193.207	5127963978663124	Malta		275300.87		
+1454460842	236	Susan	Wilson	swilson6j@mapy.cz	Female	253.105.50.250	4913609318117229	Cameroon	5/10/2000	135956.76	Director of Sales	
+1454460867	161	Janice	Armstrong	jarmstrong4g@sciencedirect.com	Female	76.231.89.120	6759331684315962	Philippines	7/14/1996	64638.14	Project Manager	
+1454460947	250		Larson		Male	250.66.116.249	6709520051264027651	Indonesia	9/30/1975	121560.88	Staff Accountant I	
+1454460979	951	Arthur	Long	alongqe@devhub.com	Male	92.244.136.245	4175006438208322	China	3/4/1959	74667.22	Pharmacist	
+1454461020	339	Doris	Bennett	dbennett9e@de.vu	Female	98.5.171.133	4041599256556998	Nicaragua		85802.06		$1.00
+1454461049	725	Patrick	Rodriguez	prodriguezk4@blogs.com	Male	233.167.251.29	3543135453573752	Poland	8/10/1956	129023.91	Web Designer IV	
+1454461082	359	Ruby	Fox	rfox9y@chron.com	Female	39.224.24.103	3566813987246457	Moldova		199091.31		
+1454461084	488	Mark	Weaver	mweaverdj@dot.gov	Male	36.130.233.58	3568615406520315	China		225258.27		
+1454461184	802	Joyce	Lopez	jlopezm9@ocn.ne.jp	Female	232.61.24.78		Ecuador		258343.17		
+1454461219	258	Paul	Gordon	pgordon75@gravatar.com	Male	160.61.49.169	3567008825292446	Czech Republic	2/25/2000	258680.6	Structural Analysis Engineer	
+1454461293	230	Victor	Campbell	vcampbell6d@stumbleupon.com	Male	212.43.106.70		China	9/19/1993	42985.78	Analog Circuit Design manager	
+1454461314	421	Timothy	Gomez	tgomezbo@examiner.com	Male	33.5.250.113	373343849259778	Czech Republic		215485.48		
+1454461350	944	Kelly	Hanson	khansonq7@phpbb.com		250.78.86.48		United States	1/2/1969	\N	Account Executive	
+1454461510	985	Rachel	Holmes	rholmesrc@hubpages.com	Female	182.16.233.193	3578965006812598	Nigeria	4/1/1980	273229.15	Assistant Professor	
+1454461537	400	Arthur	Smith	asmithb3@accuweather.com	Male	107.97.38.111	5602233710304252	China	1/30/1985	114652.62	Mechanical Systems Engineer	
+1454461604	993	Christina	Hayes	chayesrk@xing.com	Female	199.58.20.93		North Korea	10/30/1967	121659.5	Librarian	
+1454461701	67	Tina	Reid	treid1u@163.com	Female	116.38.145.226		Germany	4/25/1967	228301.51	Financial Analyst	
+1454461723	708	Carlos	Mason	cmasonjn@state.tx.us	Male	171.189.25.159	5402971302511824	Thailand	4/8/1965	163810.9	Business Systems Development Analyst	
+1454461756	816	Sara	Sanders	ssandersmn@cornell.edu	Female	54.250.225.134		Netherlands	7/26/1998	261953.95	Quality Engineer	
+1454461763	299	Diane	Watkins	dwatkins8a@netvibes.com		141.246.209.93		Yemen		\N		
+1454461897	976	Paula	Ross	prossr3@tumblr.com		39.229.193.40	3535447138661799	Jordan	8/19/1990	\N	Budget/Accounting Analyst IV	
+1454461902	56	Thomas	Freeman	tfreeman1j@java.com	Male	161.123.216.250	3536920916224146	Colombia	8/4/1973	239571.27	Senior Developer	
 === Try load data from userdata3.parquet
-1454515666	1	Ernest	Fuller	efuller0@examiner.com	Male	106.72.28.74	5610608195667267	Israel		140639.36		
-1454536327	2	Anthony	Foster	afoster1@weibo.com	Male	156.243.130.166	4508242795214771	Indonesia	1/16/1998	172843.61	Developer II	👾 🙇 💁 🙅 🙆 🙋 🙎 🙍 
-1454466139	3	Ryan	Montgomery	rmontgomery2@mozilla.org	Male	28.55.168.128		Colombia	11/21/1978	204620.66	Developer I	␢
-1454473204	4	Brenda	Nelson	bnelson3@photobucket.com	Female	185.81.160.85		Guatemala	10/29/1998	260474.12	GIS Technical Architect	
-1454458516	5	Jacqueline	Ellis	jellis4@amazon.com	Female	158.137.238.6		Russia	7/12/1959	286038.78	Marketing Assistant	
-1454528894	6	Paul	Ferguson	pferguson5@gmpg.org	Male	141.122.136.144	30501574577558	Thailand		241518.24		
-1454489945	7	Linda	Hunt	lhunt6@prlog.org	Female	104.179.97.82		Russia	3/30/1988	192756.38	Professor	
-1454486691	8	Frances	Kim	fkim7@blog.com	Female	28.77.158.48	676306013856639159	Indonesia		188511.28		<svg><script>0<1>alert(\'XSS\')</script>
-1454487153	9	Jason	Matthews	jmatthews8@google.co.uk	Male	72.129.239.24	3534550235909507	China	7/29/1982	238068.56	Web Designer III	
-1454519282	10	Carolyn	Elliott	celliott9@cpanel.net	Female	51.211.70.30	3563436733386899	Indonesia	4/28/1977	132718.26	Research Nurse	
-1454473379	11	Thomas	Mills	tmillsa@psu.edu	Male	104.114.227.199	5018278895598921190	Russia		236386.69		
-1454534367	12	Russell	Lee	rleeb@howstuffworks.com	Male	193.165.137.217		China		280252.36		🐵 🙈 🙉 🙊
-1454525264	13	Chris	Bailey	cbaileyc@redcross.org	Male	246.109.118.154	30485245023962	Thailand	11/26/1970	200218.34	Research Assistant I	
+1454457607	457	Clarence	Hunt	chuntco@drupal.org	Male	89.135.47.216		Zambia	9/27/1977	97179.31	Staff Accountant III	1E02
+1454457613	723	Arthur	Jones	ajonesk2@theguardian.com	Male	31.151.216.146		France	2/6/1986	12068.96	Teacher	
+1454457706	234	Doris	Grant	dgrant6h@nasa.gov	Female	195.132.180.36	5602256096038525	Colombia	7/14/1969	283813.79	Senior Cost Accountant	
 1454457712	14	Eric	Parker	eparkerd@usa.gov	Male	25.73.91.135	5602249431899032	Russia	8/12/1986	102832.54	Tax Accountant	
-1454526788	15	Anne	Robertson	arobertsone@geocities.jp	Female	209.77.27.30		Armenia		168201.04		　
-1454494278	16	Angela	Gonzalez	agonzalezf@state.gov	Female	118.77.43.191		Sweden	7/1/1972	161220.37	Database Administrator I	
-1454488522	17	Edward	Moreno	emorenog@hp.com	Male	200.50.125.67	3559979696602303	France	8/17/1966	144551.41	Chief Design Engineer	
-1454496145	18	Roy	Murray	rmurrayh@sphinn.com	Male	91.52.226.221	3546330084792460	Portugal		285872.87		𠜎𠜱𠝹𠱓𠱸𠲖𠳏
-1454492939	19	Louis	Willis	lwillisi@hp.com		14.132.82.250		Philippines	8/1/1980	\N	Director of Sales	
-1454530172	20	Edward	Perez	eperezj@china.com.cn	Male	24.152.201.59	3571014044514515	Indonesia		29515.23		
-1454518522	21	Nicole	Price	npricek@cpanel.net	Female	4.21.204.142		Peru	5/8/1978	154023.3	Office Assistant III	
-1454496552	22	Virginia	Nichols	vnicholsl@ning.com	Female	160.202.18.170	30166467912021	Greece	5/10/1966	145509.34	Programmer II	
-1454474290	23	Katherine	Roberts	krobertsm@hostgator.com	Female	247.21.118.188		Cuba		192723.43		
-1454522256	24	Emily	Sullivan	esullivann@sakura.ne.jp	Female	33.152.103.14	4074771539744796	Indonesia	6/28/1965	36127.55	VP Sales	
-1454527958	25	Susan	Turner	sturnero@google.pl		150.94.47.96	374283138983226	United States		\N		
-1454540961	26	Fred	Jenkins	fjenkinsp@walmart.com	Male	219.195.7.86		China	3/23/1965	69388.75	Human Resources Assistant I	
-1454496916	27	Jane	Torres	jtorresq@photobucket.com	Female	147.220.219.158	5002353015111222	Indonesia	9/29/1997	226788.25	Occupational Therapist	
-1454508711	28	Louis	Patterson	lpattersonr@wp.com	Male	158.176.255.43	5100145505218793	China	9/20/1993	30309.45	VP Quality Control	
-1454538643	29	Brandon	Wagner	bwagners@slashdot.org	Male	124.203.101.37	6771208405057819279	Iraq	10/3/1959	95522.88	Research Associate	
-1454484725	30	Amy	Jenkins	ajenkinst@wikia.com	Female	21.0.126.111	3542005201579396	Ethiopia	9/26/1984	167682.84	Tax Accountant	"""\'""\'""\'\'\'"""
-1454513613	31	Timothy	Frazier	tfrazieru@toplist.cz		100.218.94.178		China	5/17/1963	\N	Director of Sales	0.00
-1454463548	32	Phillip	Meyer	pmeyerv@live.com	Male	184.208.76.39	3541248561759148	France	11/3/1974	245572.41	Nurse	
-1454528692	33	Joe	Wallace	jwallacew@mail.ru	Male	167.122.66.246	5602246900361320	Russia		64311.11		
-1454466352	34	Walter	Rivera	wriverax@de.vu	Male	67.169.221.120	5366484318587717	Russia	1/28/1983	271690.8	Programmer Analyst I	
-1454480715	35	Lois	Mcdonald	lmcdonaldy@paypal.com		44.140.199.251		Portugal		\N		
-1454499439	36	William	Edwards	wedwardsz@acquirethisname.com	Male	69.187.29.7	3528411636358679	Egypt	2/23/1958	252476.42	Financial Analyst	Œ„´‰ˇÁ¨ˆØ∏”’
-1454460587	37	Frank	Stevens	fstevens10@samsung.com	Male	61.182.84.178		Philippines	3/19/1958	47326.14	VP Product Management	
-1454536874	38	Albert	Martinez	amartinez11@godaddy.com	Male	76.139.124.119		Ukraine	11/11/1994	57220.55	Software Engineer III	
-1454504601	39	Stephanie	Stewart	sstewart12@elpais.com	Female	104.98.138.203	4905603900430425379	Syria	2/11/1975	250118.59	Developer I	
-1454521301	40	Annie	Stevens	astevens13@slate.com	Female	214.146.163.79	3553338148582934	South Africa	11/8/1983	12963.52	Systems Administrator I	-1E2
-1454460788	41	Joyce	Butler	jbutler14@csmonitor.com	Female	88.243.175.236		Indonesia		135825.27		
-1454460615	42	Carlos	Armstrong	carmstrong15@technorati.com	Male	85.22.216.153	3532000356234436	Indonesia		23446.58		
-1454537073	43	Frances	Kelly	fkelly16@springer.com	Female	146.38.150.164	4026344347458956	China		242916.36		
-1454507861	44	Amanda	Pierce	apierce17@phpbb.com	Female	214.208.248.216	201678379872880	Faroe Islands	6/1/1990	38037.1	Software Test Engineer II	 test 
-1454464352	45	Alan	Torres	atorres18@histats.com	Male	117.124.224.32	4844818559255911	Israel		114759.77		
-1454528513	46	Nancy	Brown	nbrown19@lycos.com	Female	98.103.84.222	4041378619584967	Portugal	9/16/1972	170596.79	GIS Technical Architect	
-1454518979	47	Kenneth	Larson	klarson1a@cnet.com	Male	71.35.49.21		Philippines	2/3/1990	178010.01	Staff Scientist	
-1454536052	48	Thomas	Lawson	tlawson1b@canalblog.com	Male	209.50.87.12	50201361710870252	Ukraine	10/5/1987	35118.14	Software Test Engineer II	
-1454488725	49	Debra	Gomez	dgomez1c@lycos.com	Female	26.107.134.220	30508009555281	China	9/10/1979	129186.15	Electrical Engineer	
-1454489047	50	Deborah	Price	dprice1d@google.nl	Female	207.145.225.232	4055636387933119	Russia	1/26/1983	165945.4	Dental Hygienist	␡
-1454478467	51	Diane	Banks	dbanks1e@wikispaces.com	Female	22.253.228.131		China		39139.44		
-1454468949	52	Marie	Woods	mwoods1f@bbc.co.uk		41.109.183.128		Russia	2/20/1989	\N	Human Resources Manager	
-1454489570	53	Randy	Romero	rromero1g@tamu.edu	Male	134.90.91.230		Indonesia	11/30/1960	230039.26	Professor	
-1454528266	54	Brandon	Fox	bfox1h@ocn.ne.jp	Male	157.130.211.215	6391404048298002	China	2/1/1979	223567.43	Programmer III	
-1454513948	55	Albert	Smith	asmith1i@jalbum.net	Male	167.84.86.133	3530479136988416	Ukraine		263457.42		
-1454467976	56	Jeremy	Black	jblack1j@sphinn.com	Male	181.85.144.139		Poland		194896.66		
-1454463146	57	Marilyn	Shaw	mshaw1k@bloomberg.com	Female	141.42.43.91	30110642387063	China		178473.04		
-1454540383	58	Stephanie	Diaz	sdiaz1l@who.int	Female	127.174.128.199	3571927033182087	Indonesia	3/25/1974	135570.75	Paralegal	
-1454492347	59	Christopher	Reynolds	creynolds1m@sun.com	Male	81.89.26.14		China	5/29/1956	147519.69	Account Executive	
-1454529565	60	Douglas	Holmes	dholmes1n@weather.com	Male	99.22.29.208		Honduras	11/29/2000	45372.51	VP Accounting	œ∑´®†¥¨ˆøπ“‘
-1454485707	61	Howard	Rogers	hrogers1o@sciencedirect.com	Male	222.229.220.65		Ukraine	2/26/1995	143231.21	Account Executive	
-1454489894	62	Melissa	Washington	mwashington1p@cmu.edu	Female	32.151.71.144	374288910553246	Czech Republic	2/24/1966	266547.15	Human Resources Manager	
-1454541195	63	Margaret	Flores	mflores1q@usnews.com	Female	108.42.248.249		France	8/25/1999	110594.3	Data Coordiator	
+1454457781	846	Sharon	Porter	sporternh@yelp.com	Female	206.179.138.50	6706029727013149	Colombia	7/3/1966	175902.84	Project Manager	
+1454457884	637	Frank	Hudson	fhudsonho@walmart.com	Male	52.37.91.110	4405081678166102	China	2/7/1997	126102.31	Senior Developer	
+1454457968	134	Teresa	Gray	tgray3p@ox.ac.uk	Female	60.117.57.222		China	9/18/1994	159276.6	Assistant Media Planner	
+1454458022	549	Aaron	Reid	areidf8@topsy.com	Male	117.148.230.113		Russia	3/25/1983	211580.8	Product Engineer	
+1454458079	156	Ann	Morris	amorris4b@newyorker.com	Female	14.165.90.97	3553147941910493	Indonesia	6/4/1956	158396.75	Engineer I	
+1454458121	794	Joshua	Flores	jfloresm1@sphinn.com	Male	84.212.10.197	3587575297567030	China	2/9/1989	267751.84	Developer III	
+1454458182	604	Steve	Castillo	scastillogr@ezinearticles.com	Male	159.158.95.181	3545937730645529	China	6/8/1993	86028	Programmer III	
 1454458233	64	Rose	Fernandez	rfernandez1r@usgs.gov	Female	199.141.221.229	3564435193511524	Brazil	5/5/1972	196329.18	Senior Cost Accountant	
-1454472500	65	Julie	Mendoza	jmendoza1s@unesco.org	Female	137.192.7.121	3586331607810566	Cuba		149157.14		
-1454515883	66	Earl	Sanders	esanders1t@github.com	Male	179.122.203.141	3561742181897127	Vietnam		215545.14		𠜎𠜱𠝹𠱓𠱸𠲖𠳏
-1454460569	67	Eric	Armstrong	earmstrong1u@arizona.edu	Male	128.202.252.112	4041590574307	Indonesia	5/30/1973	75347.18	Web Designer II	
-1454532395	68	Joyce	Perez	jperez1v@dmoz.org	Female	145.86.183.96		Canada	3/29/1975	115579.36	Director of Sales	
-1454524697	69		Sanchez		Female	100.163.22.106		Russia		127045.66		
-1454489862	70	Laura	Romero	lromero1x@godaddy.com	Female	237.131.116.77	3539134691869631	Madagascar	12/20/1957	208213.96	Business Systems Development Analyst	
-1454538359	71	Maria	Thomas	mthomas1y@lycos.com	Female	12.113.23.220	5602229580950679	China	10/29/1990	88961.11	Nurse	
-1454520121	72	Victor	Romero	vromero1z@reference.com	Male	208.79.116.61	6767842086446946518	Brazil		209207.14		
-1454510241	73	Betty	Hayes	bhayes20@goo.ne.jp	Female	153.254.225.4	201881044698306	Jordan	3/9/1970	173372.32	VP Accounting	
-1454465142	74	Roger	Jacobs	rjacobs21@rediff.com	Male	51.122.147.153	36548589951538	Benin	7/18/1977	18545.32	Paralegal	1/2
-1454470850	75	Ruth	Thompson	rthompson22@reuters.com	Female	220.41.116.217	67067442144878124	Croatia	6/30/1972	167279	Account Executive	ヽ༼ຈل͜ຈ༽ﾉ ヽ༼ຈل͜ຈ༽ﾉ 
-1454515259	76	Theresa	James	tjames23@un.org	Female	31.135.76.146		China	12/28/1974	188732.88	Financial Advisor	
-1454517695	77	Pamela	Collins	pcollins24@nih.gov	Female	21.45.74.249	490591529416018576	Moldova	7/28/1998	252394.72	Marketing Assistant	🚾 🆒 🆓 🆕 🆖 🆗 🆙 🏧
-1454523543	78	Adam	Ward	award25@telegraph.co.uk	Male	242.85.131.30	201794641891036	Brazil		276446.24		
 1454458334	79	Robin	Price	rprice26@jugem.jp	Female	235.141.108.176	5610389618618837	Russia	1/7/1977	120293.75	Biostatistician IV	
-1454529469	80	Barbara	Ryan	bryan27@usda.gov	Female	58.0.103.48	30526192141883	Philippines		198959.68		
-1454497076	81	Melissa	Gibson	mgibson28@census.gov	Female	54.212.104.159	3529828486403520	Bhutan	7/29/1990	224163.74	Senior Developer	
-1454467979	82	Carolyn	Morris	cmorris29@cbslocal.com	Female	86.106.24.230		Portugal	2/12/1958	87727.95	Quality Engineer	0.00
-1454484623	83	Stephen	Harris	sharris2a@un.org	Male	247.19.48.100		Russia	4/9/1983	284559.55	Product Engineer	١٢٣
-1454476730	84	Linda	Campbell	lcampbell2b@mapy.cz	Female	28.62.77.24	6759510168753943	Peru	2/27/1982	16435.84	VP Quality Control	␡
-1454463822	85	Brian	Daniels	bdaniels2c@ovh.net	Male	143.36.66.196		Ecuador	7/6/1966	148952.4	Information Systems Manager	
 1454458337	86		West		Female	247.72.186.254	3541609903446548	Indonesia	12/11/1984	132544.98	Physical Therapy Assistant	
-1454518267	87	Timothy	Moore	tmoore2e@printfriendly.com	Male	109.229.170.253		Samoa		42697.58		
-1454523368	88	Eric	Walker	ewalker2f@mozilla.com	Male	243.173.35.155		Thailand	5/29/1970	48715.81	Engineer IV	
-1454486082	89	Maria	Arnold	marnold2g@google.com.br	Female	58.58.77.228	3589928770150089	Uruguay	3/14/1956	64067	Geological Engineer	
-1454541738	90	Edward	Garza	egarza2h@moonfruit.com	Male	43.21.138.236		New Zealand	3/27/1965	139025.58	Structural Analysis Engineer	
-1454490484	91	Alice	Young	ayoung2i@typepad.com	Female	120.255.189.145	630468343049978318	Serbia	4/18/1981	17663.49	Automation Specialist I	
-1454512586	92	Kenneth	Powell	kpowell2j@unicef.org	Male	238.251.71.34	3586683330377036	Philippines	2/10/1955	68010.82	Social Worker	
-1454472784	93	Kelly	Bell	kbell2k@hud.gov	Female	176.210.241.20		Russia	11/17/1984	57640.41	Web Developer I	　
-1454490007	94	David	Garcia	dgarcia2l@tmall.com	Male	100.18.61.166		Paraguay		201297.71		
-1454504627	95	Maria	Harvey	mharvey2m@nydailynews.com	Female	192.209.117.213	67593619471737741	Mongolia		283649.67		
-1454505519	96	Chris	Hall	chall2n@imageshack.us	Male	241.96.162.44	5594268668744901	Russia	1/3/1964	67656.08	Web Designer II	
-1454481847	97	Roger	Simpson	rsimpson2o@nymag.com	Male	80.110.89.28	493618903455317947	Indonesia		76354.79		
-1454515032	98	Richard	Nelson	rnelson2p@simplemachines.org	Male	43.54.4.82		Brazil		237205.58		NIL
+1454458375	939	Craig	Jones	cjonesq2@de.vu	Male	154.208.206.255		Indonesia	1/29/1989	266312.01	Safety Technician II	
+1454458415	805	George	Meyer	gmeyermc@google.nl	Male	146.59.222.51		Syria	5/28/1973	242409.4	Analog Circuit Design manager	
+1454458434	914	Earl	Martinez	emartinezpd@squidoo.com	Male	150.29.51.94	677135530260451546	Philippines	10/25/1970	257708.77	Software Engineer II	1E+02
+1454458516	5	Jacqueline	Ellis	jellis4@amazon.com	Female	158.137.238.6		Russia	7/12/1959	286038.78	Marketing Assistant	
+1454458597	371	Heather	Fisher	hfisheraa@printfriendly.com	Female	190.23.234.91	6304245587473860	Portugal	4/24/1955	101118.28	Associate Professor	
+1454458619	680	Mildred	Dean	mdeaniv@alibaba.com	Female	173.255.221.184	3576992005749797	Armenia	4/3/1979	78889.63	Desktop Support Technician	"__ﾛ(
+1454458806	695	Ashley	Olson	aolsonja@noaa.gov	Female	233.175.155.3	376319939588935	Indonesia	5/8/1979	256795.8	Systems Administrator III	
+1454458825	946	Beverly	Henderson	bhendersonq9@amazon.com	Female	96.37.213.162	3554635936579520	Russia	8/4/1979	65339.1	VP Marketing	
+1454458897	881		Collins		Male	100.212.189.244	3531552235272517	South Korea	7/5/1981	72539.92	VP Sales	
+1454458915	332	Raymond	Ward	rward97@drupal.org	Male	89.82.25.71	3538744508795034	South Africa	5/4/1994	163739.08	Data Coordiator	
+1454458981	216	Judy	Gutierrez	jgutierrez5z@ftc.gov	Female	120.107.239.171		China	11/13/1965	36744.51	Statistician I	🐵 🙈 🙉 🙊
+1454458994	539	Donald	Holmes	dholmesey@examiner.com	Male	24.129.145.78	3532611982139532	Czech Republic	11/7/1988	256744.28	Administrative Assistant I	
+1454459168	721	Christopher	Hunt	chuntk0@blogtalkradio.com	Male	69.240.85.94	201463274401428	Indonesia	6/8/1968	32269.1	Data Coordiator	
+1454459172	733	Bonnie	Hawkins	bhawkinskc@vinaora.com	Female	150.107.139.217	5010121004388204	China	8/28/1971	133958.72	Information Systems Manager	
+1454459204	768	Victor	Nichols	vnicholslb@blogs.com	Male	231.113.119.58	3587933684998468	France		13777.53		
+1454459243	803	Donald	Wood	dwoodma@parallels.com	Male	212.8.149.51	67610717455795070	Mexico	6/22/1971	20752.43	Chief Design Engineer	Œ„´‰ˇÁ¨ˆØ∏”’
+1454459252	752	Mark	Gomez	mgomezkv@hud.gov	Male	116.39.31.225	337941154145279	Indonesia	1/12/1965	232731.06	Professor	
+1454459281	282	Jason	Kelly	jkelly7t@themeforest.net	Male	129.110.129.46	3532753335256769	Botswana		122812.35		
+1454459307	681	Carlos	Fields	cfieldsiw@trellian.com	Male	253.69.168.229	3573119954905542	Japan		121346.35		
+1454459430	892	Gloria	Fowler	gfowleror@apache.org	Female	31.26.133.176	5602245069101311	Jamaica	5/31/1962	172923.11	Desktop Support Technician	-1E+02
+1454459462	728	Jacqueline	Porter	jporterk7@example.com	Female	183.189.204.28	3558636209028613	China	2/18/1966	60948.17	VP Marketing	
+1454459482	847	Brenda	Hall	bhallni@craigslist.org	Female	239.232.28.195		Sweden	12/5/1962	14658.92	Senior Quality Engineer	
+1454459511	512	Phyllis	Rice	pricee7@t-online.de	Female	141.247.60.33	4041591621552	China	3/9/1992	74670.8	Web Developer I	
+1454459535	331	Patrick	White	pwhite96@sina.com.cn	Male	145.132.114.239	3534146356970178	Ukraine	1/19/1994	96246.01	Executive Secretary	
+1454459549	611	Elizabeth	Day	edaygy@archive.org	Female	244.129.35.183	4903539550370988748	China	6/28/1974	217382.97	Paralegal	𠜎𠜱𠝹𠱓𠱸𠲖𠳏
+1454459623	424	Lillian	Vasquez	lvasquezbr@about.me	Female	15.233.130.74	6706936038940735306	Netherlands	6/28/2000	256419.66	Account Representative I	
+1454459691	579	Irene	Day	idayg2@theglobeandmail.com	Female	124.253.55.20	3564632724049897	Argentina	9/3/1974	58715.23	Teacher	
+1454459729	362	Melissa	Stephens	mstephensa1@comsenz.com	Female	105.158.98.174	3534057744078246	Philippines	1/22/1974	210781.96	Cost Accountant	᠎
+1454459735	103	Justin	Grant	jgrant2u@lycos.com	Male	251.111.132.81	3542141314461899	China	1/7/2001	140911.2	Project Manager	
+1454459793	662	Jesse	Gonzales	jgonzalesid@google.fr	Male	215.192.238.90	3550826252709387	Peru	7/22/1978	260505.75	Environmental Specialist	
+1454459819	866	Andrea	Carpenter	acarpentero1@taobao.com	Female	246.154.31.121		Japan	3/6/1984	248740.81	Senior Quality Engineer	
+1454459841	923	Marilyn	Long	mlongpm@walmart.com	Female	215.6.99.179	5602241011840536	Cameroon	10/28/1964	110571.54	Social Worker	
+1454459858	560	Judy	Wright	jwrightfj@blogs.com		7.139.209.42	560222806370845260	Colombia	3/6/1961	\N	Software Test Engineer IV	
+1454459862	244	Diane	Hawkins	dhawkins6r@hatena.ne.jp	Female	90.247.138.242	4026763155071942	China	5/10/1968	171218.47	Help Desk Operator	
+1454459921	639	Gloria	Fields	gfieldshq@mlb.com	Female	76.62.183.159	6334660493144630501	Peru	5/7/1996	210991.41	Accounting Assistant II	
+1454459945	193	Catherine	Rivera	crivera5c@liveinternet.ru	Female	197.164.37.102	4903900636714991	China	10/17/1984	240545.5	Cost Accountant	
+1454459958	186	Larry	Coleman	lcoleman55@imdb.com	Male	139.205.254.237	3549906950974212	Germany	12/19/1958	182376.29	Compensation Analyst	
+1454459959	195	Andrew	Henderson	ahenderson5e@ftc.gov	Male	44.116.118.204		United States	5/27/1977	108242.9	Accountant I	
+1454460044	743	Mildred	Clark	mclarkkm@issuu.com	Female	179.135.234.32	3589587359210761	Philippines		268426		-1E+02
+1454460050	189	Samuel	Fox	sfox58@bing.com	Male	220.161.213.119	3535192418612498	Argentina	9/2/1991	56084.78	Marketing Assistant	
+1454460053	209	Anne	Flores	aflores5s@marketwatch.com	Female	8.136.212.14		Canada	6/17/1964	195673.07	Occupational Therapist	
+1454460230	956	John	Baker	jbakerqj@exblog.jp	Male	96.167.232.236		Spain	9/29/1992	177531.95	Sales Representative	
+1454460278	683	Paula	Johnston	pjohnstoniy@marketwatch.com	Female	246.57.43.147	560221588257454843	Mongolia	10/20/1978	227145.54	Administrative Officer	
+1454460325	341	Samuel	Jordan	sjordan9g@jimdo.com	Male	183.29.32.119	3535569167756420	China	3/29/1975	130541.17	Safety Technician IV	
+1454460330	654	Michael	Sims	msimsi5@discuz.net	Male	169.136.209.75		Bulgaria	6/14/1982	277854.98	Recruiting Manager	
+1454460342	814	Deborah	Hudson	dhudsonml@parallels.com		186.205.3.210		Ukraine	11/3/2000	\N	Marketing Manager	
+1454460373	813	Mildred	Harris	mharrismk@vistaprint.com	Female	250.65.167.151	3577530968521354	Greece		238399.8		
+1454460382	624	Wayne	Henry	whenryhb@dedecms.com	Male	173.2.93.236		China		147631.62		
+1454460446	1000	Wanda	Brooks	wbrooksrr@yellowpages.com	Female	241.43.62.149	3539260761630759	Japan		158607.84		
+1454460471	685	Joe	Rivera	jriveraj0@ebay.com	Male	101.130.15.106	4903855508114581	Thailand		74067.89		
+1454460482	330	Robin	Campbell	rcampbell95@stanford.edu	Female	144.152.165.130	4662544509352	Sierra Leone	4/9/1969	64481.72	Quality Engineer	
+1454460569	67	Eric	Armstrong	earmstrong1u@arizona.edu	Male	128.202.252.112	4041590574307	Indonesia	5/30/1973	75347.18	Web Designer II	
+1454460587	37	Frank	Stevens	fstevens10@samsung.com	Male	61.182.84.178		Philippines	3/19/1958	47326.14	VP Product Management	
+1454460615	42	Carlos	Armstrong	carmstrong15@technorati.com	Male	85.22.216.153	3532000356234436	Indonesia		23446.58		
+1454460668	556	Lisa	Turner	lturnerff@ustream.tv	Female	192.4.71.81	3579076936527626	China		127717.62		
+1454460696	958	Howard	Gomez	hgomezql@people.com.cn		226.78.136.12	6706662408386172373	Philippines		\N		test⁠test‫
+1454460697	959	Kimberly	Alvarez	kalvarezqm@gizmodo.com	Female	244.177.51.246	30135810163038	Philippines	8/5/1976	211292	Design Engineer	
+1454460701	612	Dorothy	Hanson	dhansongz@i2i.jp	Female	165.73.75.69		Azerbaijan	9/5/1971	246728.41	Information Systems Manager	
+1454460759	126	Amy	Roberts	aroberts3h@dyndns.org	Female	166.99.225.202		Costa Rica		273960.79		𠜎𠜱𠝹𠱓𠱸𠲖𠳏
+1454460768	822	Jane	Tucker	jtuckermt@arizona.edu	Female	43.88.112.223		Sweden		55680.59		
+1454460788	41	Joyce	Butler	jbutler14@csmonitor.com	Female	88.243.175.236		Indonesia		135825.27		
+1454460812	496	Jesse	Cole	jcoledr@sogou.com	Male	106.227.88.115	50184107778776571	Peru	6/2/1965	205296.96	Actuary	
+1454460898	516	Wayne	Carter	wcartereb@g.co	Male	151.122.136.210	3547971451281253	Portugal	1/22/1992	122139.24	Cost Accountant	
+1454460912	571	Joan	Chavez	jchavezfu@com.com	Female	17.161.255.139		Poland	10/16/1972	277679.98	Safety Technician I	
+1454460930	166	Pamela	Perkins	pperkins4l@wsj.com	Female	237.225.95.141	378608444146629	China		141169.54		
+1454460959	128	Wayne	Kim	wkim3j@cdc.gov		196.5.87.192	5007668319479461	Malaysia	1/27/1979	\N	Internal Auditor	
+1454460980	465	Julie	Phillips	jphillipscw@ning.com	Female	186.219.160.248	5602251286921119	Spain	6/10/1976	120755.68	Marketing Manager	/dev/null; touch /tmp/blns.fail ; echo
+1454460991	144	Martha	Martin	mmartin3z@sakura.ne.jp	Female	220.126.107.146	201779098970730	New Zealand	5/23/1985	88724.94	Administrative Officer	
+1454461001	874	Laura	Wells	lwellso9@mit.edu	Female	135.67.140.204	5482317399663099	Sweden	12/4/1993	262303.96	Environmental Tech	
+1454461065	833	Lois	Lee	lleen4@zdnet.com	Female	31.87.204.102	5602245033844400	Bulgaria		113425.72		
+1454461292	575	Jessica	Watkins	jwatkinsfy@marketwatch.com	Female	165.50.211.193	201566979007298	Macedonia	7/12/1989	253506.67	Food Chemist	
+1454461361	184	Clarence	Moore	cmoore53@bloglines.com	Male	212.30.218.42		Indonesia	6/16/1974	283539.78	Internal Auditor	
+1454461642	406	Frances	Ray	frayb9@theguardian.com	Female	24.12.13.133	3555958533555779	Colombia	9/19/2000	282052.82	Staff Accountant III	
+1454461847	446	Helen	Ward	hwardcd@indiegogo.com	Female	249.175.182.167	3550054667502541	Colombia	2/15/1959	115934.54	Graphic Designer	
+1454461863	101	Irene	Adams	iadams2s@biblegateway.com	Female	135.79.211.166		Palestinian Territory	7/29/1994	73723.8	Help Desk Technician	00˙Ɩ$-
 1454461907	99	Ruth	Howell	rhowell2q@cornell.edu	Female	190.170.191.14		China	5/2/1969	286113.38	Senior Quality Engineer	
-1454524115	100	Judith	Garza	jgarza2r@usnews.com	Female	204.216.154.40		Ecuador	6/22/1962	256786.42	Teacher	
+1454461978	340	Gloria	Wilson	gwilson9f@soup.io	Female	116.58.188.151	3539542269827494	Croatia		206401.2		
+1454462106	132	Amanda	Porter	aporter3n@cloudflare.com	Female	64.254.17.111		Brazil	7/26/1964	41956.4	Nurse	
+1454462425	102	Ralph	Walker	rwalker2t@sitemeter.com		101.111.216.188		Peru	4/15/1959	\N	VP Accounting	
+1454462469	188	Christine	Rodriguez	crodriguez57@sciencedaily.com		240.122.189.81	6397046163164230	China	12/13/1998	\N	Sales Representative	
+1454462692	106	Cynthia	Vasquez	cvasquez2x@washingtonpost.com	Female	70.52.238.194		Kazakhstan		175907.62		1E+02
+1454462763	121	Heather	Davis	hdavis3c@hhs.gov	Female	154.156.181.140		Poland		71140.46		
+1454462944	704	Patrick	Torres	ptorresjj@ask.com	Male	122.10.211.188	5602254083107544	Russia	10/28/1995	119841.99	Environmental Tech	
+1454463056	718	Tammy	Simpson	tsimpsonjx@imdb.com	Female	28.114.238.250	5602250512089980	Russia	4/30/1987	240161.08	Human Resources Manager	-1/2
+1454463110	548		Ryan			48.44.183.147		Russia	12/7/1999	\N	Recruiting Manager	
+1454463111	206	Jeremy	Boyd	jboyd5p@sciencedirect.com	Male	190.221.209.41		Mexico	8/17/1963	169562.93	Legal Assistant	$1.00
 === Try load data from userdata4.parquet
-1454599685	1	Howard	Morgan	hmorgan0@typepad.com		158.178.195.62		Colombia	12/2/1992	\N	Data Coordiator	
-1454581720	2	Jessica	Schmidt	jschmidt1@google.com	Female	168.118.247.35	3565285464047941	Luxembourg	4/14/1995	222396.46	Research Nurse	nil
-1454608896	3	Beverly	Flores	bflores2@wikipedia.org	Female	51.97.88.173		Sweden	2/15/1965	141112.8	Actuary	
-1454575874	4	Marilyn	Sanchez	msanchez3@intel.com	Female	186.206.142.162		China	8/6/1969	87914.29	Structural Engineer	
-1454567588	5	Janice	Mitchell	jmitchell4@sina.com.cn	Female	205.187.116.241	5610719759939376962	Poland	7/4/1995	269297.4	Systems Administrator I	
+1454544135	174	Arthur	Bishop	abishop4t@deliciousdays.com	Male	23.143.216.45	3543731590226021	Portugal		74352.02		
+1454544166	397	Adam	Harrison	aharrisonb0@symantec.com	Male	24.23.251.104	30250631299455	United States	10/14/1976	220537.78	Systems Administrator IV	
+1454544275	676	Julia	Turner	jturnerir@tripadvisor.com	Female	246.75.105.64	3573355428855000	Philippines	9/23/1975	43244.37	Engineer I	
+1454544290	694	Carol	Griffin	cgriffinj9@zimbio.com		4.106.189.110		Philippines	5/5/1958	\N	Quality Engineer	
+1454544350	790	Michael	Mitchell	mmitchelllx@blog.com	Male	142.112.74.125		China		74089.46		
+1454544355	372	Brandon	Hicks	bhicksab@unicef.org	Male	14.1.141.83	564182403737341280	China	10/4/1985	62678.54	Sales Representative	
+1454544427	582	Annie	Spencer	aspencerg5@gizmodo.com	Female	193.135.127.103		Philippines	7/29/1965	32342.28	Cost Accountant	
+1454544628	802	Lois	Gibson	lgibsonm9@mayoclinic.com	Female	226.250.177.108	5610916546870112	Thailand	5/16/1955	149273.02	Occupational Therapist	
+1454544647	382	Paul	Sanders	psandersal@photobucket.com	Male	216.84.37.205	6385564398040268	Sweden	6/9/1980	240223.98	Mechanical Systems Engineer	1
+1454544648	364	Jason	Fox	jfoxa3@unesco.org	Male	184.48.48.126		Japan	8/9/1976	84483.3	Mechanical Systems Engineer	
+1454544719	716	Diana	Little	dlittlejv@shop-pro.jp	Female	168.15.235.95		Argentina		267712.23		
+1454544765	766	Lisa	Harper	lharperl9@boston.com	Female	26.253.184.166	4903454632131201206	China	9/30/1986	177862.14	Analog Circuit Design manager	
+1454544797	471	Linda	Arnold	larnoldd2@yellowbook.com	Female	25.72.220.19	3573669257084239	Indonesia	2/6/1983	249094.03	GIS Technical Architect	"
+1454544833	508	Andrea	Alvarez	aalvareze3@amazon.co.uk	Female	94.93.141.212		Indonesia		165484.69		　
+1454544883	991	Mary	Willis	mwillisri@i2i.jp	Female	188.83.241.84		Russia	9/4/1992	133498.3	Payment Adjustment Coordinator	
+1454544907	137	Harry	Thomas	hthomas3s@edublogs.org	Male	203.181.156.216	3586074069338235	Poland	6/6/1979	159098.74	Chemical Engineer	
+1454545008	824	Jack	Hudson	jhudsonmv@hp.com	Male	195.27.62.30		Ukraine	9/19/1970	163426.27	Community Outreach Specialist	
+1454545044	173	Ruth	Welch	rwelch4s@spotify.com	Female	7.253.134.135	3543426983427878	Japan	8/6/1964	203330.7	Paralegal	
+1454545053	225	Judy	Greene	jgreene68@discovery.com		246.203.234.47	589310636256482728	Dominica		\N		
+1454545135	948	Janet	Lawson	jlawsonqb@indiatimes.com	Female	90.48.142.31	4026186827051821	Philippines		197991.65		
+1454545185	757	James	Pierce	jpiercel0@meetup.com	Male	14.116.62.43	5018717793434778	Greece	12/25/1989	17173.34	Assistant Manager	
+1454545221	995	Philip	Mcdonald	pmcdonaldrm@tripadvisor.com	Male	224.59.55.103	5108753554344402	France	4/22/1955	59331.14	Recruiting Manager	
 1454545227	6	William	Williamson	wwilliamson5@trellian.com	Male	44.86.73.201	201849487683564	Indonesia	12/6/1993	95352.25	Librarian	1E+02
-1454602212	7	Jack	James	jjames6@sogou.com	Male	59.184.76.208	3552911855395632	Indonesia	11/25/1968	82549.73	Compensation Analyst	‪‪test‪
-1454556325	8	Jesse	Arnold	jarnold7@soup.io	Male	7.25.90.13	5100177285965756	Brazil	10/19/1987	257968.86	Executive Secretary	
-1454622627	9	Lori	Woods	lwoods8@fastcompany.com	Female	147.157.215.9	4844532485570190	Indonesia	12/26/1975	186145.91	Health Coach I	
-1454601455	10	Juan	Evans	jevans9@zimbio.com	Male	150.132.218.181	3578802610769023	Philippines	5/29/1988	129369.52	Social Worker	
-1454579490	11	Roy	Matthews	rmatthewsa@ucsd.edu	Male	203.239.85.224	5100135134598509	Russia		192057.84		
-1454586145	12	Kenneth	King	kkingb@zimbio.com		9.103.96.206	675913564329481832	Greece		\N		
-1454568600	13	Raymond	Green	rgreenc@fc2.com	Male	163.9.101.43		United States	1/28/1984	225094.01	Budget/Accounting Analyst III	
-1454603300	14	Lillian	Stephens	lstephensd@psu.edu	Female	31.50.183.23	630455284969060148	Finland	6/1/1973	19354.85	Information Systems Manager	
-1454560697	15	Mary	Gonzales	mgonzalese@wired.com	Female	91.42.17.109	3560985473023370	France	5/7/1966	23746.36	Compensation Analyst	
-1454561895	16	Roger	Mason	rmasonf@newyorker.com	Male	169.33.172.204	3545036194973129	Norway		165855.47		
-1454604198	17	Diane	Cole	dcoleg@unesco.org	Female	157.11.85.209		Philippines	6/9/1994	105028.67	Assistant Manager	
-1454601270	18	Annie	Hunt	ahunth@ocn.ne.jp	Female	169.47.232.187	5100177440436305	Poland	3/30/1992	266071.6	Legal Assistant	
-1454600872	19	Jacqueline	Bradley	jbradleyi@epa.gov	Female	83.241.214.77	5100131814165289	Indonesia	12/1/1971	55440.88	Dental Hygienist	
-1454600248	20	Kathy	Russell	krussellj@joomla.org	Female	158.32.89.44	3585627581021729	Indonesia	11/20/1999	29602.23	Sales Representative	
-1454551378	21	Beverly	Barnes	bbarnesk@europa.eu	Female	189.157.45.179	3548552521258155	Bulgaria	4/21/1956	37295.89	Human Resources Assistant II	
-1454604764	22	Roy	Morris	rmorrisl@scribd.com		201.51.139.86		China		\N		
-1454569146	23	Alice	Ramos	aramosm@utexas.edu	Female	185.168.142.9	374622349140748	Philippines	4/20/1966	138021.54	Paralegal	
-1454597325	24	Todd	Kelly	tkellyn@fotki.com	Male	46.19.203.86	4041599550654	Portugal	3/14/1998	84343.96	Executive Secretary	() { _; } >_[$($())] { touch /tmp/blns.shellshock2.fail; }
-1454551797	25	Lawrence	Ramos	lramoso@imageshack.us	Male	5.96.81.47	5010121401502407	Palestinian Territory	1/26/1994	265545.92	Operator	
-1454605654	26	Jennifer	Rogers	jrogersp@so-net.ne.jp	Female	31.48.54.193	5610097864736794573	Yemen	6/5/1992	138365.1	Computer Systems Analyst II	
-1454603775	27	Kimberly	Morgan	kmorganq@seesaa.net	Female	154.61.255.47		China		14486.75		0/0
-1454606635	28	Jessica	Marshall	jmarshallr@mtv.com	Female	164.101.35.148	3531025977662047	Brazil	7/2/1987	216211.96	VP Accounting	
-1454597817	29	Katherine	Gordon	kgordons@phoca.cz	Female	248.30.182.15	5602230546469168	Italy	10/11/1956	48478.51	Librarian	
-1454557995	30	Jennifer	Phillips	jphillipst@pcworld.com	Female	61.30.215.16	5100179891124018	Sweden	9/3/1967	254808.27	Software Consultant	
-1454613512	31	Gerald	Nguyen	gnguyenu@seesaa.net	Male	9.13.167.17	67717376159922001	China	9/3/1972	285571.49	Tax Accountant	
-1454625134	32	Rose	Ellis	rellisv@walmart.com	Female	250.88.7.15	3580333318847248	China	4/23/1987	47695.25	Systems Administrator II	和製漢語
-1454622672	33	Margaret	Grant	mgrantw@bbb.org	Female	227.165.116.192	3565645038486711	Slovenia	12/10/1992	106452.61	Account Coordinator	
-1454568796	34	Jessica	Wells	jwellsx@blogtalkradio.com	Female	185.189.187.186		Azerbaijan	9/13/1996	173164.24	Project Manager	
-1454582324	35	Henry	Jenkins	hjenkinsy@mit.edu	Male	10.83.90.235	5602221853972654	China	11/12/1975	25740.85	Recruiter	田中さんにあげて下さい
+1454545361	770	Gregory	Henderson	ghendersonld@issuu.com	Male	233.65.87.175		Philippines		79047.27		
+1454545379	713	Ruth	Barnes	rbarnesjs@google.it	Female	29.37.239.173	56108753791531632	Sweden	8/23/1965	268965.5	Occupational Therapist	
+1454545666	430	Stephen	Knight	sknightbx@so-net.ne.jp	Male	233.213.210.160		China	7/7/1969	183842.12	Quality Control Specialist	
+1454545825	470	Carl	Freeman	cfreemand1@de.vu	Male	40.13.20.8	5002357075956137	Armenia	1/6/1984	140264.63	Accountant III	
+1454545841	736	Ashley	Black	ablackkf@freewebs.com	Female	130.87.75.86	30046346841197	China	5/8/1991	263407.66	Senior Developer	
 1454545876	36	Earl	Mccoy	emccoyz@bigcartel.com	Male	161.179.122.154	5038877150819047588	Japan	10/12/1976	114766.43	Software Test Engineer IV	0.00
-1454618571	37	Paul	Knight	pknight10@google.cn	Male	182.38.37.173	5020715558032859593	Ukraine	10/25/1971	199366	Social Worker	
-1454576590	38	Martha	Clark	mclark11@usda.gov		189.166.203.239		South Korea		\N		
-1454601033	39	Clarence	Bryant	cbryant12@bigcartel.com	Male	120.218.175.241		Poland	9/1/1968	257075.65	Professor	田中さんにあげて下さい
-1454548319	40	Joan	Price	jprice13@mtv.com		233.4.158.135	3584182571037112	Portugal		\N		
-1454573152	41	Anthony	Ford	aford14@chicagotribune.com	Male	100.240.61.163		Iran	6/26/1992	152800.71	Senior Cost Accountant	
-1454595667	42	Roger	Henderson	rhenderson15@sitemeter.com	Male	206.185.213.252	3560757094744860	Brazil	6/26/1970	40949.78	Nurse	
-1454591751	43	Kenneth	Butler	kbutler16@youtu.be	Male	2.12.57.207	3586795027670612	Thailand	3/26/1987	165121.43	Research Assistant IV	
-1454566774	44	Kenneth	Wright	kwright17@google.de	Male	241.213.136.95	5602246924892961	Belarus	10/15/1995	227583.86	Speech Pathologist	
-1454617513	45	Aaron	Smith	asmith18@flickr.com	Male	185.244.9.145		China	11/25/1972	286108.94	Paralegal	
-1454574169	46	Amy	Matthews	amatthews19@t.co	Female	206.172.83.152	5002357749310919	China		39365.73		
-1454586102	47	Janet	Cooper	jcooper1a@dailymotion.com	Female	9.148.129.197		Comoros	8/2/1968	168391.72	Senior Cost Accountant	
-1454601994	48	Russell	Stewart	rstewart1b@edublogs.org	Male	113.23.229.63	675993663890158630	Thailand	4/17/1963	57609.96	Senior Editor	
-1454582839	49	Howard	Elliott	helliott1c@illinois.edu	Male	225.208.151.89	3577055641640512	Mongolia		176999.03		
-1454573932	50	Keith	Lane	klane1d@eventbrite.com	Male	250.24.9.55		Russia	5/27/1983	80452.19	Budget/Accounting Analyst II	
-1454583292	51	Jimmy	Richardson	jrichardson1e@vimeo.com	Male	152.87.188.99		China	6/30/1960	194774.28	Assistant Manager	❤️ 💔 💌 💕 💞 💓 💗 💖 💘 💝 💟 💜 💛 💚 💙
-1454623280	52	Justin	Bryant	jbryant1f@github.com	Male	245.48.63.169	3562259518717901	Guatemala	10/28/1960	144419.21	Database Administrator III	
-1454582337	53	Ruby	Allen	rallen1g@cyberchimps.com	Female	238.148.148.156	3541217939068433	Japan		248388.64		
-1454578101	54		Ward		Male	120.88.247.59		Russia		125075.78		
+1454545911	981	Martin	Hudson	mhudsonr8@senate.gov	Male	103.7.125.212	3580063273741488	Azerbaijan		55371.91		
+1454545934	728	Brandon	Oliver	boliverk7@tuttocitta.it	Male	190.202.45.71	3561315827587251	Norway	10/31/1960	157819.05	Structural Engineer	
+1454545941	224	Julia	Lane	jlane67@networksolutions.com	Female	126.98.58.100	3566544839563357	Brazil	9/24/1975	77279.09	Business Systems Development Analyst	
+1454545957	112	Mildred	Martinez	mmartinez33@wufoo.com		206.47.25.150		Brazil		\N		
+1454546022	147	John	Henry	jhenry42@google.nl	Male	175.38.124.31	3534881822199867	China	7/7/1959	180821.73	Engineer I	"<>?:""{}|_+"
+1454546057	132	Rose	Evans	revans3n@hubpages.com	Female	18.134.14.151	6767390430172490489	United States	2/11/1977	109352.69	Automation Specialist II	
+1454546075	162	Nancy	Sanchez	nsanchez4h@yahoo.com	Female	180.250.167.88		Malawi	5/12/1956	280050.1	Health Coach III	
+1454546122	913	Lisa	Oliver	loliverpc@nydailynews.com	Female	153.239.15.222	201665522335840	Sweden	1/28/1957	180645.76	Marketing Assistant	() { 0; }; touch /tmp/blns.shellshock1.fail;
 1454546163	55	Nancy	Stephens	nstephens1i@godaddy.com	Female	211.0.225.116		Mongolia		20805.69		
-1454580277	56	Dorothy	Kennedy	dkennedy1j@mlb.com	Female	177.229.94.96		Indonesia	3/26/1984	118098.45	Legal Assistant	
-1454597567	57	Katherine	Ferguson	kferguson1k@google.cn	Female	185.67.150.20	5038883804496681778	Russia	1/28/1982	255040.89	Chemical Engineer	
-1454609494	58	Norma	Daniels	ndaniels1l@adobe.com	Female	72.161.56.76	5602256058813840	Lithuania	5/30/1986	228396.52	Junior Executive	
-1454549169	59	John	Rogers	jrogers1m@miitbeian.gov.cn	Male	91.131.170.178	3578552255653202	Croatia	9/25/1971	164207.53	Administrative Assistant III	
-1454627177	60	Lisa	Nguyen	lnguyen1n@phpbb.com	Female	99.51.36.31	3587343436670904	Ghana	6/10/1970	213963.71	Research Nurse	
-1454564279	61	Roy	Carter	rcarter1o@cmu.edu	Male	154.176.171.103	3581163353975466	Germany	7/21/1980	216294.79	Marketing Manager	
-1454546835	62	Donna	Gonzalez	dgonzalez1p@instagram.com	Female	81.57.136.186		China	3/3/1975	181562.45	Junior Executive	
-1454610240	63		Medina		Female	84.135.250.216	3579667388606106	Indonesia	7/18/1958	80267.81	Accounting Assistant III	
-1454613635	64	Samuel	Bishop	sbishop1r@npr.org	Male	87.38.89.122	3534693555244475	Indonesia		97009.57		
-1454551032	65	Jerry	Bradley	jbradley1s@umn.edu	Male	184.79.105.210	5602258009829107	China	3/13/1984	50863.85	Junior Executive	
-1454555641	66	Ralph	Castillo	rcastillo1t@nba.com	Male	96.246.167.130	6373313274491359	United States	5/14/1986	13099.91	Health Coach III	
-1454615262	67	Margaret	Vasquez	mvasquez1u@tuttocitta.it	Female	206.79.16.146		Poland	2/19/1973	281677.49	Quality Engineer	
-1454564143	68	Shawn	Payne	spayne1v@privacy.gov.au	Male	233.32.138.222	6380689013620353	China	5/29/1996	152175.99	Help Desk Operator	
-1454560234	69	Bonnie	Hart	bhart1w@networkadvertising.org	Female	92.158.145.51	5100141023990187	Philippines	8/10/1976	270525.27	Clinical Specialist	
-1454557523	70	Ruby	Phillips	rphillips1x@google.com.hk	Female	180.71.236.34		Russia	12/29/1980	175991.04	Analog Circuit Design manager	
-1454615738	71	Michael	Watkins	mwatkins1y@infoseek.co.jp	Male	20.48.165.57	6304600968704640	United States		277599.55		
-1454549243	72	Walter	Hill	whill1z@fda.gov	Male	169.189.26.193		Philippines	4/25/1989	170789.26	Executive Secretary	
-1454590835	73	Deborah	Garcia	dgarcia20@ehow.com	Female	176.149.163.227	3578754434491831	Brazil		213787.81		!@#$%^&*()
-1454592567	74	Sandra	Lee	slee21@hatena.ne.jp	Female	196.212.29.124		China	12/25/1976	190399.56	Assistant Media Planner	../../../../../../../../../../../etc/passwd%00
-1454570808	75	Steve	Shaw	sshaw22@photobucket.com	Male	56.32.41.109	3561652394394350	Macedonia	3/2/1961	180130.01	Recruiting Manager	
-1454627208	76	Jerry	Hansen	jhansen23@newyorker.com	Male	180.99.147.201	36652106508977	Ukraine	4/27/1992	201900.61	Chief Design Engineer	
-1454595596	77	Joshua	Harris	jharris24@china.com.cn	Male	93.173.2.87	3566428334927244	Greece	8/27/1987	189392.3	Account Representative III	
-1454615457	78	Clarence	Simmons	csimmons25@dailymotion.com	Male	30.117.30.162	3571762129017388	Philippines		180434.25		
-1454604481	79	Denise	Bishop	dbishop26@wsj.com	Female	251.230.214.155	3556286320706184	Philippines	10/18/1999	194426.62	Geologist II	
-1454614660	80	Jason	Warren	jwarren27@shop-pro.jp	Male	197.52.56.75	4913424719275497	China	8/26/1998	92571.41	Accounting Assistant II	
-1454592347	81	Jesse	Reynolds	jreynolds28@amazon.com		46.11.66.226		Portugal	10/6/1977	\N	Administrative Officer	<img src=x onerror=alert(\'hi\') />
-1454579746	82	Ruby	Lynch	rlynch29@xing.com	Female	50.190.120.2	340177638737200	Portugal	5/7/1981	159634.3	Sales Associate	
-1454578991	83	Phillip	Olson	polson2a@marriott.com	Male	38.205.137.200	4905640692662084	Indonesia	1/8/1987	161622.19	Assistant Media Planner	
-1454574785	84	Sean	Watkins	swatkins2b@ft.com	Male	22.52.43.242	6759770945991352	China	2/7/1964	103943.54	Senior Financial Analyst	
-1454603364	85	Teresa	Parker	tparker2c@shinystat.com	Female	36.134.254.22	4844522554899455	China	11/24/1987	137739.95	Chief Design Engineer	
-1454629483	86	Anthony	Harris	aharris2d@uiuc.edu	Male	142.3.139.220		China	2/26/1975	194926.38	Senior Quality Engineer	
-1454617821	87	Donna	Ray	dray2e@wikimedia.org	Female	122.113.90.100	3548062974262878	Peru	7/24/1964	121072.45	Clinical Specialist	åß∂ƒ©˙∆˚¬…æ
-1454567199	88	Craig	Lewis	clewis2f@purevolume.com	Male	106.156.113.218	3535698276698452	Slovenia		113013.98		
-1454606687	89	Adam	Turner	aturner2g@delicious.com	Male	94.92.15.85	3530109929436477	Sweden	3/18/1976	233715.21	Nurse Practicioner	
-1454565501	90	Terry	Parker	tparker2h@hc360.com	Male	189.36.77.133		China	4/2/1987	232623.76	GIS Technical Architect	
-1454604198	91	Juan	Shaw	jshaw2i@ehow.com	Male	222.127.83.190	493610712595084582	Democratic Republic of the Congo		220779.8		
-1454592729	92	Nicole	Russell	nrussell2j@angelfire.com	Female	247.123.224.36	4120730296866808	Germany		90748.17		
-1454563310	93	Robin	Ray	rray2k@t.co	Female	217.150.228.185		Sweden	9/28/1968	175995.93	Human Resources Assistant III	""""
+1454546263	498	Lillian	Lynch	llynchdt@posterous.com	Female	13.168.64.88		Brazil	6/18/1982	203558.13	Accountant I	
+1454546287	934	Mark	Dunn	mdunnpx@booking.com	Male	77.125.49.164		Indonesia	7/2/1990	120101.43	Financial Advisor	
+1454546293	748	Carol	Perry	cperrykr@cmu.edu	Female	113.54.30.174	675928304974727871	Colombia		122048.92		
+1454546294	830	Catherine	Rice	cricen1@hexun.com	Female	134.65.177.193		Portugal		100751.27		
+1454546294	924	Jimmy	Nelson	jnelsonpn@rediff.com	Male	244.130.194.232		Norway		259092.5		
+1454546377	431	Pamela	Ruiz	pruizby@java.com		42.71.124.95		Pakistan	9/15/1976	\N	Software Engineer I	
+1454546405	158	Melissa	Alexander	malexander4d@google.pl	Female	186.71.215.96		Greece	5/7/1972	180150.8	VP Marketing	
 1454546406	94	Debra	Sims	dsims2l@meetup.com	Female	150.198.93.159	5602215295621929	Brazil	12/21/1984	276704.96	Office Assistant IV	
-1454550946	95	Teresa	Harrison	tharrison2m@t.co	Female	111.107.40.16	5007666196554596	Philippines	5/12/1959	129967.9	GIS Technical Architect	
-1454603302	96	Tammy	Ward	tward2n@51.la	Female	148.119.68.255	3568303818489466	France	8/20/1984	63550.31	General Manager	
-1454605950	97	Louis	Harrison	lharrison2o@usgs.gov	Male	134.95.151.68	5100179516595931	Ukraine	9/27/1986	169379.73	Payment Adjustment Coordinator	
-1454579744	98	Charles	Simpson	csimpson2p@mashable.com	Male	241.0.124.209	3562073915241617	Sweden	9/20/1956	116909.68	Biostatistician IV	
-1454584629	99	Maria	Richards	mrichards2q@rediff.com	Female	108.13.82.54		Azerbaijan	1/23/1978	34000.68	Clinical Specialist	社會科學院語學研究所
-1454622328	100	Diana	Hall	dhall2r@oaic.gov.au	Female	6.215.107.104	3528227609255704	Russia	8/29/1996	221168.13	Assistant Professor	
+1454546500	909	Cynthia	Smith	csmithp8@house.gov	Female	166.21.108.146	374622628177056	China	9/30/1974	252566.03	Physical Therapy Assistant	
+1454546508	599	John	Lewis	jlewisgm@youtube.com	Male	90.227.58.221		Sweden	5/16/1970	58222.46	Software Engineer II	
+1454546576	617	Jonathan	Hall	jhallh4@upenn.edu	Male	12.13.126.157	491109978928388311	China	5/16/1986	50824.51	GIS Technical Architect	
+1454546653	227	Helen	Green	hgreen6a@vimeo.com	Female	156.198.175.255	5048379124161648	Uganda	10/20/2000	163189.36	Computer Systems Analyst III	
+1454546690	349	David	Washington	dwashington9o@un.org	Male	131.53.93.63	3578517361666653	Greece	10/16/1998	34742.07	Staff Accountant IV	
+1454546703	474	Betty	Cook	bcookd5@admin.ch	Female	23.9.243.170		China	5/16/1962	151829.78	Budget/Accounting Analyst I	
+1454546726	767	Philip	Burton	pburtonla@zimbio.com	Male	138.134.59.28	3528288812489043	Russia	5/27/1983	241065.94	Software Engineer III	
+1454546741	904	Debra	Wilson	dwilsonp3@desdev.cn	Female	254.162.119.226	630461807132739339	Poland	4/20/1969	107766.71	Financial Analyst	
+1454546820	480	Todd	Wagner	twagnerdb@reuters.com		25.149.209.61	3560449524302754	Tunisia	8/31/1983	\N	Research Associate	
+1454546835	62	Donna	Gonzalez	dgonzalez1p@instagram.com	Female	81.57.136.186		China	3/3/1975	181562.45	Junior Executive	
+1454546857	666	Anthony	Sullivan	asullivanih@boston.com	Male	119.85.206.152	561007482254370160	Portugal	5/20/1970	164827.57	Systems Administrator IV	
+1454546901	366	Julie	Garrett	jgarretta5@wsj.com	Female	40.18.147.38		China		225753.62		
+1454546930	808	Russell	Freeman	rfreemanmf@comcast.net	Male	244.181.177.133	30295400628590	Greece		173731.67		
+1454546970	739	Nicholas	Sanders	nsanderski@scientificamerican.com	Male	13.8.6.64	347899819407351	Portugal	6/3/1991	130727.91	Research Associate	
+1454547011	198	Timothy	Ford	tford5h@vk.com	Male	3.35.147.123	5602236379905962	Morocco	4/27/1998	55901.49	Paralegal	
+1454547029	644	Jean	Cole	jcolehv@mac.com	Female	5.188.221.124		Comoros	7/24/1985	215195.83	Civil Engineer	
+1454547030	916	Andrew	Campbell	acampbellpf@nymag.com	Male	172.206.158.110		Guatemala	8/12/1962	33394.2	Financial Analyst	
+1454547032	264	Charles	James	cjames7b@wordpress.org	Male	40.115.241.175	6761364619849686314	Canada	9/21/1958	227083.18	Professor	
+1454547070	727	Louise	Castillo	lcastillok6@cmu.edu	Female	54.15.177.72	3586380225985649	France	3/22/1978	17830.21	Nurse	
+1454547124	168	Christopher	Hughes	chughes4n@businessinsider.com	Male	23.110.32.151	6304281728252855	Serbia	12/9/1975	220573.8	Design Engineer	999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999
+1454547192	232	Angela	Evans	aevans6f@a8.net	Female	115.244.254.13	6333718316396730	China	1/7/1968	265380.99	VP Quality Control	
+1454547201	464	Marie	Harris	mharriscv@dot.gov	Female	26.45.137.53		Tajikistan	9/24/1961	203845.4	Analog Circuit Design manager	
+1454547203	427	Rebecca	Thompson	rthompsonbu@wikipedia.org	Female	110.47.151.2		Indonesia	4/29/1992	216830.25	Assistant Manager	
+1454547212	575	Arthur	Reyes	areyesfy@ca.gov	Male	161.254.47.140		Poland	3/9/1962	214072.68	Health Coach I	
+1454547223	564	Rebecca	Ford	rfordfn@stanford.edu	Female	210.231.201.84		Indonesia	9/3/1969	204041.63	Office Assistant II	ÅÍÎÏ˝ÓÔÒÚÆ☃
+1454547242	997	William	Patterson	wpattersonro@omniture.com	Male	149.242.140.255	3528460022712031	Colombia	3/1/2000	108955.05	Executive Secretary	✋🏿 💪🏿 👐🏿 🙌🏿 👏🏿 🙏🏿
+1454547281	463	Gerald	Knight	gknightcu@independent.co.uk	Male	34.192.129.107		China	4/27/1975	84585.78	Civil Engineer	⁰⁴⁵
+1454547356	276	Albert	Gordon	agordon7n@examiner.com	Male	88.159.237.102	3534524682255003	Sweden	8/25/1996	265299.22	Assistant Media Planner	
+1454547362	317	Clarence	Simpson	csimpson8s@comsenz.com	Male	104.53.119.249	3586887721906879	Venezuela	3/7/1977	35314.18	Professor	
+1454547369	217	Anthony	Jacobs	ajacobs60@ycombinator.com	Male	59.162.173.59	374283051163301	Ivory Coast	7/11/1988	103409	Safety Technician II	(｡◕ ∀ ◕｡)
+1454547401	230	Jimmy	Bailey	jbailey6d@odnoklassniki.ru	Male	22.173.156.124	3576503167968271	China		197603.47		$1.00
+1454547508	960	Craig	Shaw	cshawqn@wordpress.org	Male	88.203.243.165	5602229798654196	Tanzania	8/5/1999	119584.32	Senior Sales Associate	
+1454547541	585	Bonnie	Snyder	bsnyderg8@ftc.gov	Female	170.100.220.94	3564602303009802	Japan	6/5/1998	89020.39	Desktop Support Technician	
+1454547577	871	Gloria	Howard	ghowardo6@harvard.edu	Female	173.45.99.88		Egypt	8/27/1972	140945.69	Human Resources Assistant I	
+1454547609	878	Kathryn	Snyder	ksnyderod@e-recht24.de	Female	235.195.131.110	6761199763991532	Indonesia	3/29/1973	168235	GIS Technical Architect	
+1454547697	473	Joseph	Coleman	jcolemand4@ucoz.ru	Male	1.40.64.123	4508104337648496	Argentina	6/14/1975	167526.19	Librarian	/dev/null; touch /tmp/blns.fail ; echo
+1454547707	206	Shirley	Ruiz	sruiz5p@dagondesign.com	Female	159.102.238.195	201955789975119	Bosnia and Herzegovina	10/8/1963	197240.2	General Manager	
+1454548013	705	Alan	Sims	asimsjk@ed.gov	Male	180.200.150.10	3531118427209962	Israel	12/8/1982	269504.53	Biostatistician III	
+1454548122	108	Craig	Knight	cknight2z@ucsd.edu	Male	139.37.241.169	3556934424099549	Greece	2/21/1955	247303.71	Senior Financial Analyst	Ω≈ç√∫˜µ≤≥÷
+1454548170	611	Steve	Ford	sfordgy@hubpages.com	Male	190.25.153.64	56022386492755060	China	6/7/1979	39645.72	Health Coach IV	̦H̬̤̗̤͝e͜ ̜̥̝̻͍̟́w̕h̖̯͓o̝͙̖͎̱̮ ҉̺̙̞̟͈W̷̼̭a̺̪͍į͈͕̭͙̯̜t̶̼̮s̘͙͖̕ ̠̫̠B̻͍͙͉̳ͅe̵h̵̬͇̫͙i̹͓̳̳̮͎̫̕n͟d̴̪̜̖ ̰͉̩͇͙̲͞ͅT͖̼͓̪͢h͏͓̮̻e̬̝̟ͅ ̤̹̝W͙̞̝͔͇͝ͅa͏͓͔̹̼̣l̴͔̰̤̟͔ḽ̫.͕
+1454548319	40	Joan	Price	jprice13@mtv.com		233.4.158.135	3584182571037112	Portugal		\N		
+1454548438	618	Jeremy	Roberts	jrobertsh5@go.com	Male	89.14.246.154		Russia	7/31/1989	273400	Research Assistant II	
+1454548507	314		Dixon		Male	93.252.91.51	670677121929947139	Ireland		209533.24		
+1454548522	522	Eric	Kelley	ekelleyeh@pcworld.com	Male	131.75.70.227		Syria	7/22/1990	163141.3	General Manager	"__ﾛ(
+1454548725	133	Lillian	Collins	lcollins3o@csmonitor.com		80.80.47.76	4175009027155995	Czech Republic		\N		
+1454549109	306	Mark	Boyd	mboyd8h@cocolog-nifty.com	Male	158.13.1.119	3562815747212335	Brazil	2/15/1967	66134.2	Social Worker	
+1454549131	371	Carl	Knight	cknightaa@unc.edu	Male	64.176.41.31		Macedonia	6/4/1973	116193.06	Environmental Specialist	
+1454549158	346	Kathryn	Butler	kbutler9l@washingtonpost.com		32.220.87.246	374288729624402	China	11/24/1972	\N	Staff Accountant II	
+1454549169	59	John	Rogers	jrogers1m@miitbeian.gov.cn	Male	91.131.170.178	3578552255653202	Croatia	9/25/1971	164207.53	Administrative Assistant III	
+1454549202	304	Billy	Howard	bhoward8f@geocities.com	Male	101.47.248.109	3561004867229459	Ireland	2/23/1963	147308.45	Software Test Engineer II	
+1454549230	702	Patricia	Oliver	poliverjh@cmu.edu		18.206.245.40		Ireland		\N		ÅÍÎÏ˝ÓÔÒÚÆ☃
+1454549233	179	Christine	Duncan	cduncan4y@furl.net		49.36.119.18	30544573199206	China	8/15/2000	\N	Mechanical Systems Engineer	
+1454549243	72	Walter	Hill	whill1z@fda.gov	Male	169.189.26.193		Philippines	4/25/1989	170789.26	Executive Secretary	
+1454549360	862	Joseph	Patterson	jpattersonnx@google.it	Male	79.70.102.172	3548682692624495	Argentina		87931.98		
 === Try load data from userdata5.parquet
-1454582047	1	Kelly	Ortiz	kortiz0@omniture.com	Female	252.115.158.159	3537905681760845	Russia	4/23/1980	277302.99	Nurse	
-1454626441	2	Sharon	Carroll	scarroll1@disqus.com	Female	29.217.252.62	56022458507191696	Indonesia	8/28/1992	209258.05	Recruiter	åß∂ƒ©˙∆˚¬…æ
-1454608790	3	Ruth	Ross	rross2@cbc.ca	Female	220.224.80.32	3589642396435648	Benin	6/13/1994	18270.7	Design Engineer	
-1454601797	4	Kelly	Meyer	kmeyer3@cornell.edu	Female	255.65.123.124		Philippines	1/6/1967	17485.27	Cost Accountant	
-1454584344	5	Irene	Jordan	ijordan4@pagesperso-orange.fr	Female	162.57.23.136	3576848317807089	United States	1/4/1997	163979.38	Programmer Analyst III	
-1454547199	6	Irene	Wells	iwells5@fema.gov	Female	85.5.67.113		Iran		74337.42		
-1454604109	7	Jessica	Grant	jgrant6@gov.uk	Female	127.235.63.12	3536345996536989	Ecuador	1/27/1969	128665.86	Payment Adjustment Coordinator	
-1454549472	8	Norma	Wright	nwright7@prweb.com	Female	81.219.156.187	63047796765720509	Indonesia	6/27/1997	68907.46	Office Assistant III	
-1454611735	9	Brandon	Snyder	bsnyder8@artisteer.com	Male	102.118.191.191	490339322609872711	Malta	10/6/1981	71646.15	Physical Therapy Assistant	
-1454610256	10	Stephanie	Reed	sreed9@who.int	Female	175.52.228.75	502081312903167845	Afghanistan	8/27/1957	137924.13	Recruiter	 test 
-1454565105	11	Jane	Armstrong	jarmstronga@state.gov		202.44.98.126	374283443294665	China	10/30/1991	\N	Associate Professor	
-1454607247	12	Donna	Coleman	dcolemanb@upenn.edu	Female	178.9.167.99		Vietnam	11/21/1957	93283.06	Librarian	
-1454567839	13	Samuel	Butler	sbutlerc@hp.com	Male	129.114.220.80	3587725229492688	Colombia	9/12/1984	208303.6	Compensation Analyst	
-1454567413	14	Jerry	Medina	jmedinad@youtu.be	Male	87.0.152.222	3579766249568578	Japan	8/30/1988	53502.26	Registered Nurse	
-1454603317	15	Samuel	Lane	slanee@i2i.jp	Male	225.20.25.160		Canada	9/6/1983	142643.38	GIS Technical Architect	❤️ 💔 💌 💕 💞 💓 💗 💖 💘 💝 💟 💜 💛 💚 💙
-1454630090	16	Kathy	Rice	kricef@independent.co.uk	Female	4.200.99.226	6709951086431189768	Philippines		52614.1		
-1454575979	17	Adam	Woods	awoodsg@mapy.cz	Male	229.247.245.218	3580417672766100	Indonesia	12/8/1987	284906.49	Payment Adjustment Coordinator	
-1454555573	18	Theresa	Ellis	tellish@nydailynews.com	Female	39.249.101.160		Belarus	6/18/1966	35216.95	Sales Representative	
-1454555343	19	Christopher	Brooks	cbrooksi@intel.com	Male	252.52.58.13		China		119492.57		
 1454544139	20	Debra	White	dwhitej@umn.edu	Female	142.140.184.111		Indonesia		47859.54		
-1454559526	21	Alice	Ward	awardk@cafepress.com	Female	14.157.183.41	3554057857533990	Vietnam	5/7/1977	117790.3	Technical Writer	
-1454597106	22	Tina	Wood	twoodl@businesswire.com	Female	201.242.103.145	3568980472135848	Sweden	3/28/1969	47283.17	Staff Scientist	
-1454591306	23	Carolyn	Mendoza	cmendozam@army.mil		214.205.231.22		Greece		\N		␡
-1454611603	24	Craig	Ford	cfordn@vistaprint.com	Male	236.178.217.229	633110713949459104	Indonesia	12/22/1996	274187.59	Dental Hygienist	
-1454618551	25	Christine	Morrison	cmorrisono@ask.com	Female	219.71.212.187	3538407669945679	Tanzania	3/12/1991	84756.66	Executive Secretary	社會科學院語學研究所
-1454580024	26	Janice	Dean	jdeanp@statcounter.com	Female	49.234.145.208	3537160378882698	Ukraine	8/21/1991	217443.08	Administrative Assistant III	
-1454558127	27	Joan	Burton	jburtonq@oaic.gov.au	Female	221.227.41.244	201770241278691	China	4/6/1993	256763.22	Staff Accountant I	\N
-1454619460	28	Brandon	Stone	bstoner@discovery.com	Male	1.106.6.30	30535344906416	Indonesia	7/13/1964	166396.41	Health Coach II	
-1454571966	29	Sarah	Hall	shalls@loc.gov	Female	235.168.89.65	3528746985103311	Czech Republic	11/13/1959	123411.44	Assistant Manager	
-1454569447	30	Kelly	Crawford	kcrawfordt@typepad.com	Female	152.220.24.54	3578225435679583	Poland	10/21/1970	115305.8	Chief Design Engineer	
-1454609438	31	Maria	Banks	mbanksu@google.co.uk	Female	107.120.193.133	5602224764294077	Italy	10/29/1981	213273.21	Financial Analyst	
-1454546937	32	Roy	Simmons	rsimmonsv@telegraph.co.uk	Male	21.20.158.183	5602244835346375	Mongolia	6/27/1994	13987.6	Senior Editor	"<>?:""{}|_+"
-1454611880	33	Judith	Williamson	jwilliamsonw@hubpages.com	Female	128.75.193.80	3540423032294659	Indonesia	10/19/1975	35326.68	Senior Sales Associate	
-1454567714	34	Joe	Arnold	jarnoldx@soundcloud.com	Male	170.118.207.254	4017955870878	Morocco	1/11/1991	261893.92	Mechanical Systems Engineer	
-1454605829	35	Richard	Griffin	rgriffiny@barnesandnoble.com	Male	180.74.211.58	3539729371124817	Philippines	8/23/1964	43742.89	Nurse	
-1454607440	36	Billy	Freeman	bfreemanz@fda.gov	Male	223.238.104.92		Sweden	5/19/1961	185185.85	Office Assistant I	
-1454601803	37	Shawn	Welch	swelch10@oaic.gov.au	Male	239.144.169.67		Brazil		45785.65		‪‪test‪
-1454626608	38	Kenneth	Price	kprice11@tamu.edu	Male	121.107.99.253	372301962802254	China	3/1/1958	110448	Senior Sales Associate	
-1454612578	39	Patricia	Lawson	plawson12@dailymotion.com	Female	181.201.209.42	6761282787969476	Czech Republic	4/6/1956	126454.68	Staff Accountant I	
-1454544201	40	Christine	Alexander	calexander13@aboutads.info	Female	163.32.3.92	50183677518131890	China	1/14/1981	213713.99	Sales Associate	
-1454599667	41	Mark	Wagner	mwagner14@imageshack.us	Male	78.141.201.64	5007660710388524	China	3/10/1987	207149.01	Staff Scientist	
-1454624139	42	Richard	Armstrong	rarmstrong15@baidu.com	Male	229.173.184.111	3546008978147005	Indonesia	9/6/1961	52279.16	Software Engineer II	
-1454618327	43	Phillip	Ellis	pellis16@berkeley.edu	Male	183.182.90.8	3561054399919267	Brazil	1/31/1994	59681.04	Analog Circuit Design manager	\N
-1454614376	44	Beverly	Perry	bperry17@nasa.gov	Female	47.117.191.34		Vietnam	9/15/1983	41351.4	Database Administrator IV	1E+02
-1454559810	45	Carolyn	Parker	cparker18@soup.io	Female	124.227.162.209	3555739550936724	Belarus	1/29/1988	162142.52	Chemical Engineer	
-1454605899	46	Martin	Knight	mknight19@umn.edu	Male	173.169.240.26	5387225346178705	China	9/4/1994	200217.98	Assistant Professor	
-1454580952	47	Michael	Stephens	mstephens1a@altervista.org	Male	181.48.175.67		Honduras	9/10/1958	248987	Environmental Specialist	
-1454545483	48	Frances	Willis	fwillis1b@linkedin.com		102.186.57.75	4175001067968122	Philippines	8/3/1998	\N	VP Marketing	
-1454618611	49	Gary	Fox	gfox1c@paginegialle.it	Male	80.221.129.42		Belgium		261175.89		
-1454605416	50	Cynthia	Bailey	cbailey1d@microsoft.com	Female	210.74.99.47		Indonesia	4/23/1989	38171.71	Sales Associate	
-1454547938	51	Terry	Mitchell	tmitchell1e@soundcloud.com	Male	64.34.240.165		Peru		101626.65		
-1454607980	52	Edward	Webb	ewebb1f@123-reg.co.uk	Male	208.114.99.74	6386981481832436	Jordan		235457.76		
 1454544152	53	Ralph	Simmons	rsimmons1g@google.cn	Male	180.159.250.232	3554040768947822	Pakistan		111413.03		
-1454606074	54	Sara	Kelly	skelly1h@wix.com	Female	97.243.219.196	3560161969850482	Portugal	12/11/1963	185788.86	Chief Design Engineer	
-1454577433	55	Donna	Dean	ddean1i@ftc.gov	Female	91.232.196.181		Indonesia		285481.87		
-1454545198	56	Jane	Murray	jmurray1j@apache.org	Female	174.82.82.71	5100149053428994	China	7/15/1973	57832.83	Software Consultant	
-1454582927	57	Walter	Cook	wcook1k@webnode.com	Male	4.223.17.187	5048374925679138	China	7/19/1979	164010.7	Accounting Assistant IV	
-1454553504	58	Bonnie	Hanson	bhanson1l@squidoo.com	Female	209.131.133.80	3546400025538536	China	8/6/1989	207065.08	Recruiter	
-1454583403	59	Patrick	Kelly	pkelly1m@usgs.gov	Male	92.132.67.51	30129138653846	Poland	10/22/1984	281404.55	Librarian	
-1454551706	60	George	Ross	gross1n@sciencedaily.com	Male	77.33.183.49	201938854334636	Portugal	2/17/1986	96243.17	Teacher	
-1454572199	61	Joan	Harvey	jharvey1o@biglobe.ne.jp	Female	244.175.30.138	5479197462183554	Indonesia	12/30/1974	269498	Nurse Practicioner	åß∂ƒ©˙∆˚¬…æ
-1454555502	62	Louise	Stone	lstone1p@1und1.de	Female	230.79.20.66		Indonesia	1/14/1980	44528.64	Senior Editor	
-1454597662	63	Lawrence	Pierce	lpierce1q@ihg.com	Male	35.230.80.125	6763027632739915	Indonesia	7/22/1982	269467.08	Human Resources Assistant IV	
-1454577961	64	Dorothy	Gray	dgray1r@vimeo.com	Female	206.99.76.117	3582462082297450	China	10/8/1975	58802.03	Staff Scientist	-1.00
-1454578138	65	Shawn	Larson	slarson1s@sohu.com	Male	233.109.124.208	3557232712378033	Pakistan	6/11/1987	24566.92	Programmer I	
-1454620878	66	Ashley	Carter	acarter1t@weather.com	Female	120.243.16.33	5641823823569006485	Philippines	2/4/1999	181594.54	Technical Writer	
-1454608592	67	Bruce	Gonzalez	bgonzalez1u@behance.net	Male	213.165.12.93	5602219496203313	Sweden	6/27/1975	152915.03	Social Worker	
-1454570547	68	Gary	Porter	gporter1v@nhs.uk	Male	113.26.17.148	3551504699131924	China	10/15/1988	239398.41	VP Sales	åß∂ƒ©˙∆˚¬…æ
-1454623375	69	Kimberly	Bell	kbell1w@techcrunch.com	Female	232.188.203.114	06048433236353334	Tanzania		239482.42		"
-1454580645	70	James	Torres	jtorres1x@rakuten.co.jp	Male	42.70.136.181		Brazil	3/19/1968	66432.01	Information Systems Manager	
-1454565683	71	Cheryl	Williams	cwilliams1y@clickbank.net		24.11.168.130		Latvia	9/28/1958	\N	Quality Control Specialist	
-1454572298	72	Diane	Hicks	dhicks1z@noaa.gov	Female	220.185.241.90	36196827669213	Honduras	11/20/1977	104365.11	Systems Administrator I	
-1454630150	73	Judith	Brown	jbrown20@acquirethisname.com	Female	173.62.110.176		Czech Republic	12/26/1994	218616.17	Safety Technician IV	
-1454550898	74	Jesse	Dixon	jdixon21@bloglines.com	Male	156.125.120.208		Syria		277530.58		(╯°□°）╯︵ ┻━┻)  
-1454560223	75	Timothy	Garza	tgarza22@tmall.com	Male	56.172.71.231		Poland	4/1/1978	21103.66	Desktop Support Technician	␡
-1454549446	76	Gloria	Washington	gwashington23@hud.gov	Female	249.63.88.116	3528613230855766	Portugal	10/17/1960	175586.21	Information Systems Manager	
-1454555260	77	Patricia	Bell	pbell24@youtu.be	Female	20.46.164.228	3528267541114924	Honduras	1/31/1999	47750.6	Payment Adjustment Coordinator	
-1454579807	78	Theresa	Clark	tclark25@wp.com	Female	178.250.150.112	6396247540156151	Indonesia	10/10/1989	78319.93	Executive Secretary	
-1454629649	79	Matthew	Matthews	mmatthews26@typepad.com	Male	33.186.230.54	5213341713953768	Azerbaijan	10/4/1990	12883.34	Help Desk Technician	
-1454568333	80	Betty	White	bwhite27@github.com	Female	128.110.102.181	3572999005932624	Morocco	12/6/1980	30998.69	Operator	
-1454559489	81	Christina	Nguyen	cnguyen28@washingtonpost.com	Female	63.57.110.32	36954036240279	Philippines	7/23/1984	259707.25	Project Manager	
-1454575575	82	Norma	Stevens	nstevens29@newyorker.com	Female	148.35.34.31		Brazil	7/24/1984	233848.07	Professor	
-1454547659	83	Tammy	Walker	twalker2a@craigslist.org	Female	115.94.89.2	4508955158259501	China	1/1/1972	241046.96	Community Outreach Specialist	
-1454559813	84	Mark	Jackson	mjackson2b@utexas.edu	Male	136.242.153.66	36666130651082	Philippines	12/9/1957	245352.11	Account Executive	部落格
-1454547442	85	Scott	Washington	swashington2c@bloomberg.com	Male	79.185.72.100	6395647151650882	Brazil	2/17/1957	240505.52	Professor	
-1454577775	86	Margaret	Franklin	mfranklin2d@mapy.cz	Female	139.209.240.12	501835281527257384	Brazil		72758.49		
-1454582451	87	Carolyn	Wilson	cwilson2e@hp.com	Female	5.172.62.195	3581164938009805	France	1/19/1997	162909.64	Librarian	
-1454608782	88	Emily	Cole	ecole2f@epa.gov		97.83.153.33		Burkina Faso	5/3/1996	\N	Accounting Assistant IV	1.00
-1454544809	89	Carolyn	Gutierrez	cgutierrez2g@smh.com.au	Female	109.77.234.103		Madagascar	2/13/1999	139612.73	Nurse	
-1454591667	90	Jose	Wallace	jwallace2h@about.com	Male	250.231.81.57		Philippines	12/17/1983	213500.16	Design Engineer	
-1454561119	91	Charles	Reed	creed2i@independent.co.uk	Male	28.212.235.149	4017954848825528	China		88039.86		
-1454615732	92	Brian	Parker	bparker2j@hugedomains.com	Male	143.67.111.179		Portugal	1/18/1996	202446.54	Executive Secretary	
-1454613613	93	Donald	Fox	dfox2k@webs.com	Male	251.61.52.170	3553498748210516	Indonesia	12/19/1975	134745.75	Human Resources Manager	
-1454603200	94	Jack	West	jwest2l@biblegateway.com	Male	115.144.142.60		Poland	10/30/1956	245162.49	Office Assistant I	1.00
-1454574412	95	Doris	Gomez	dgomez2m@tinypic.com	Female	156.173.76.213	4041593860679	Colombia	8/28/1977	164689.56	Speech Pathologist	
+1454544187	564	Christine	Willis	cwillisfn@pagesperso-orange.fr	Female	166.102.221.213	3534808021291708	Russia	8/3/1991	112850.81	Desktop Support Technician	
+1454544201	40	Christine	Alexander	calexander13@aboutads.info	Female	163.32.3.92	50183677518131890	China	1/14/1981	213713.99	Sales Associate	
+1454544213	992	Anna	Dean	adeanrj@netvibes.com	Female	113.127.227.85	3586135192218451	Vietnam	5/29/1962	286181.88	Automation Specialist II	
+1454544238	601	Aaron	Kim	akimgo@mayoclinic.com	Male	182.52.179.175	3587685548758112	Kazakhstan	11/6/1963	156217.14	Accounting Assistant I	
+1454544284	903	John	Harris	jharrisp2@goo.ne.jp	Male	65.10.215.144	3565387100757980	China	6/7/1970	153671.44	Analog Circuit Design manager	
+1454544326	325	Billy	Meyer	bmeyer90@nature.com	Male	163.186.10.162	3538589516492193	Colombia	7/20/1983	84716.67	Assistant Professor	
+1454544328	746	Christine	Howell	chowellkp@php.net	Female	71.95.250.29	5100170292026399	China	1/11/1964	30533.25	Account Executive	
+1454544347	353	Alan	Collins	acollins9s@cpanel.net	Male	16.99.94.145	3536005999242155	Guatemala	6/1/1980	38434.4	Software Test Engineer II	・(￣∀￣)・:*:
+1454544495	879	Marie	Vasquez	mvasquezoe@is.gd	Female	101.194.66.108	3563730358790256	China	9/21/1958	12182.09	Nurse	
+1454544507	912	Evelyn	Fisher	efisherpb@soup.io	Female	221.207.200.158	201473318880354	China	5/17/1998	208654.68	Geological Engineer	
+1454544523	923	Jessica	George	jgeorgepm@so-net.ne.jp	Female	119.65.145.55		Russia	6/22/1965	73210.79	Nurse	
 1454544624	96	Brandon	Owens	bowens2n@si.edu	Male	5.39.151.46	4591258400528650	France	3/13/1998	74028.68	Software Engineer III	
-1454596449	97	Evelyn	Wagner	ewagner2o@sbwire.com	Female	84.231.120.250	3571837377153521	China	1/5/1965	78692.34	Operator	
+1454544685	617	Judith	Bishop	jbishoph4@weibo.com	Female	50.167.35.101	3536263290947101	Taiwan		147732.13		(｡◕ ∀ ◕｡)
+1454544809	89	Carolyn	Gutierrez	cgutierrez2g@smh.com.au	Female	109.77.234.103		Madagascar	2/13/1999	139612.73	Nurse	
+1454544817	929	Harold	Tucker	htuckerps@stanford.edu	Male	243.182.109.135	374622077056546	China		161472.14		
+1454544819	590	Irene	Larson	ilarsongd@addthis.com	Female	67.196.118.250		Syria	8/11/1969	222598.25	Business Systems Development Analyst	﻿
+1454544888	577	Frances	Day	fdayg0@ox.ac.uk	Female	54.131.119.123	3534463936023182	Portugal	11/15/1969	206386.03	Environmental Specialist	
+1454544926	908	Bruce	Banks	bbanksp7@ifeng.com	Male	3.58.102.49	560224852697998794	Indonesia	1/18/1983	146835.33	Professor	
+1454544936	951	Carolyn	Lewis	clewisqe@blogger.com	Female	154.230.220.164	5469666950681032	Uruguay	11/25/1955	119686.8	Help Desk Technician	
+1454545198	56	Jane	Murray	jmurray1j@apache.org	Female	174.82.82.71	5100149053428994	China	7/15/1973	57832.83	Software Consultant	
+1454545225	439	Keith	Cook	kcookc6@usa.gov	Male	22.162.180.159		Poland		146503.61		
+1454545268	358	Todd	Meyer	tmeyer9x@huffingtonpost.com	Male	183.45.201.202	5593314243312813	China	7/31/1987	115187.5	Paralegal	
+1454545307	204	Lillian	Long	llong5n@skype.com	Female	146.238.55.254	5641820612278798844	Czech Republic	6/18/1999	150598.38	Human Resources Assistant IV	
+1454545319	409	Doris	Bishop	dbishopbc@spotify.com	Female	199.116.182.20	3575820879808061	Canada	11/29/1964	169913.1	Geological Engineer	
+1454545330	559	Eric	West	ewestfi@mapquest.com	Male	229.67.66.9	3584340222063867	Italy	8/31/1998	59102.31	General Manager	1E2
+1454545330	702	Dennis	Kelly	dkellyjh@cargocollective.com	Male	159.10.27.86	3586421938986530	China	3/22/1982	260296.17	Desktop Support Technician	
+1454545334	371	Gerald	Russell	grussellaa@last.fm		174.119.43.205	3545489024436298	Bahrain	12/21/2000	\N	Senior Cost Accountant	
+1454545338	369	Judy	Perez	jpereza8@gmpg.org	Female	109.68.19.234	5249772984361935	Philippines	7/9/1989	257973.8	Sales Associate	
+1454545351	178	Melissa	Thomas	mthomas4x@mysql.com	Female	192.210.201.207	5562824139318432	Equatorial Guinea	8/17/1965	267092.73	Junior Executive	
+1454545414	831	Arthur	Hill	ahilln2@usnews.com	Male	231.181.126.173	5602223371820245193	Colombia	3/25/1993	247436.07	Mechanical Systems Engineer	
+1454545426	170	Anne	Oliver	aoliver4p@jimdo.com	Female	205.100.30.244	3530095445603833	Indonesia	3/31/1970	232499.96	Software Test Engineer III	
+1454545483	48	Frances	Willis	fwillis1b@linkedin.com		102.186.57.75	4175001067968122	Philippines	8/3/1998	\N	VP Marketing	
+1454545502	478	Joshua	Harrison	jharrisond9@noaa.gov	Male	231.249.108.195	30492555718355	Japan	11/24/1971	143815.22	Clinical Specialist	
 1454545547	98	Timothy	Boyd	tboyd2p@imdb.com	Male	211.20.45.168	5602253132446507	Peru	7/8/1976	127883.56	Data Coordiator	
-1454549050	99	Edward	Gilbert	egilbert2q@ocn.ne.jp	Male	237.183.200.242	3586807595028188	Bangladesh	8/30/1956	214872.75	Senior Financial Analyst	᠎
-1454583513	100	Howard	Patterson	hpatterson2r@toplist.cz	Male	200.77.150.4	3558592437934298	China	7/9/1991	23607	Administrative Assistant IV	
+1454545556	148		Powell		Female	77.50.112.73	5303311226469439	China		175168.8		
+1454545565	998	Louis	Lee	lleerp@thetimes.co.uk	Male	8.88.141.81		Russia	11/20/1982	13134.47	Office Assistant IV	
+1454545585	116	Lisa	James	ljames37@walmart.com		149.162.35.129		Sweden	3/19/1986	\N	Graphic Designer	
+1454545601	269	Carlos	Flores	cflores7g@samsung.com	Male	121.205.206.52		France		89368.56		
+1454545680	197	Eugene	Shaw	eshaw5g@topsy.com	Male	75.2.214.89	5602236558365152	France	11/25/1983	204106.08	Associate Professor	
+1454545733	536	Charles	Welch	cwelchev@paginegialle.it	Male	135.156.127.116	3540766046216294	Bulgaria	11/26/1980	280230.13	Accountant II	
+1454545747	800	Sharon	Crawford	scrawfordm7@google.cn	Female	185.219.127.5	5141634704661813	Pakistan	12/1/1980	14880.86	Clinical Specialist	
+1454545784	370	Martin	Webb	mwebba9@shutterfly.com	Male	241.183.200.48		Portugal	5/28/1981	134676.08	Database Administrator III	
+1454545905	425	Wanda	Olson	wolsonbs@pen.io	Female	136.216.93.167	3579427292475142	Slovenia		195983.76		
+1454545917	158		Nelson		Female	158.42.83.104		Nigeria		56092.93		
+1454545926	144	Ruth	Ryan	rryan3z@reference.com	Female	157.117.150.254	3580511168862041	Indonesia	9/9/1972	56717.9	Account Coordinator	999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999
+1454545928	432	Aaron	Sims	asimsbz@squidoo.com	Male	176.74.122.74	3553550116250639	China	12/20/1992	245201.62	Recruiting Manager	
+1454545988	167	Frank	Cunningham	fcunningham4m@github.com	Male	150.174.230.186	5602249442759621	France	4/17/1969	254828.23	Nuclear Power Engineer	
+1454546017	736	Doris	Reyes	dreyeskf@trellian.com	Female	50.37.101.111		Russia	3/23/1967	48543.29	Electrical Engineer	١٢٣
+1454546051	696	Walter	Baker	wbakerjb@webmd.com	Male	33.81.54.207		Poland	12/4/1985	257839.28	Occupational Therapist	
+1454546096	418	Nicole	Weaver	nweaverbl@yellowbook.com	Female	178.127.204.49	6333547435590930225	Brazil		91251		
+1454546121	521	Jesse	Mccoy	jmccoyeg@illinois.edu	Male	77.2.76.98	5602212301270239	Indonesia		265697.47		
+1454546176	324	Randy	Perkins	rperkins8z@spotify.com	Male	90.152.116.122	4903530859961340	Canada	9/29/1982	59754.4	Programmer IV	
+1454546214	720	Daniel	Roberts	drobertsjz@blog.com	Male	200.191.212.146	4917780904858553	Argentina	3/31/1965	151397.44	Analog Circuit Design manager	
+1454546253	279	Ernest	Palmer	epalmer7q@zdnet.com	Male	24.129.157.239	5384992294623031	China		158317.63		
+1454546269	937	Julia	Hawkins	jhawkinsq0@businesswire.com	Female	41.247.95.119		Japan		52113.66		
+1454546342	307	Phillip	Mason	pmason8i@hubpages.com	Male	231.103.199.111	5602233897712483	China		277619.14		␢
+1454546380	547	Benjamin	Garcia	bgarciaf6@spotify.com	Male	151.228.6.14	3555896626891000	Macedonia		240109.95		
+1454546423	616	Frances	Hamilton	fhamiltonh3@tamu.edu	Female	188.88.34.240		Peru	3/19/1989	69117.34	Assistant Professor	
+1454546426	753	Raymond	Harper	rharperkw@facebook.com	Male	148.46.64.54	5002351763645136	China	1/27/1980	191542.74	VP Accounting	
+1454546437	972	Bonnie	Morrison	bmorrisonqz@simplemachines.org		13.205.160.142	6763571935984496	Georgia	3/8/1973	\N	Tax Accountant	../../../../../../../../../../../etc/passwd%00
+1454546468	185	Lisa	Castillo	lcastillo54@ebay.com	Female	96.65.226.75	5100133275364427	Iran	4/8/1989	19003.55	Database Administrator I	
+1454546507	980	Marilyn	Castillo	mcastillor7@wikipedia.org	Female	225.8.34.64	3560325383537120	Thailand		166569.16		
+1454546551	293	Barbara	Diaz	bdiaz84@usnews.com	Female	176.106.164.136	30109403344362	Egypt	11/25/1984	41388.68	Quality Control Specialist	
+1454546607	172	Christina	Payne	cpayne4r@umich.edu		208.172.251.134	3567551256592404	Hungary	5/9/1977	\N	Quality Control Specialist	
+1454546678	454	Amy	Phillips	aphillipscl@blog.com	Female	156.231.253.161		Russia	11/21/1997	136062.09	Environmental Tech	␣
+1454546732	792	Christine	Howard	chowardlz@prweb.com	Female	69.22.66.149		Kosovo	3/10/1998	90266.03	Civil Engineer	
+1454546852	671	Juan	Scott	jscottim@theatlantic.com	Male	170.84.164.52	3530364751135776	Indonesia	12/29/1979	127445.95	Assistant Professor	
+1454546865	878	Robin	Matthews	rmatthewsod@alexa.com	Female	168.96.0.234	5108756854169874	China	11/17/1975	155909.78	Staff Accountant I	
+1454546874	578	Lisa	Foster	lfosterg1@va.gov	Female	116.239.143.83	30550897409197	Canada	12/25/1980	282301.9	Product Engineer	
+1454546885	514	Clarence	Gardner	cgardnere9@addthis.com	Male	241.164.83.193	3567799117668968	Mexico	2/8/1983	69661.64	Business Systems Development Analyst	
+1454546937	32	Roy	Simmons	rsimmonsv@telegraph.co.uk	Male	21.20.158.183	5602244835346375	Mongolia	6/27/1994	13987.6	Senior Editor	"<>?:""{}|_+"
+1454546996	140	Christina	Hanson	chanson3v@seattletimes.com	Female	154.87.3.146	3589004738797807	Peru	12/6/1994	157444.39	Budget/Accounting Analyst I	
+1454547050	714	Sean	Shaw	sshawjt@stumbleupon.com	Male	190.171.138.84	4041370678096900	Portugal	11/13/1987	280420.03	Director of Sales	
+1454547183	440	David	Dixon	ddixonc7@google.es	Male	102.192.92.231	3571723971536297	China		197005		ﾟ･✿ヾ╲(｡◕‿◕｡)╱✿･ﾟ
+1454547190	109	Janice	Edwards	jedwards30@huffingtonpost.com	Female	156.5.183.66		Czech Republic	9/3/1977	166805.79	Account Coordinator	
+1454547193	807	Helen	Roberts	hrobertsme@marketwatch.com	Female	242.160.113.180	201415538184406	Armenia	9/30/1968	131695.03	Help Desk Technician	
+1454547199	6	Irene	Wells	iwells5@fema.gov	Female	85.5.67.113		Iran		74337.42		
+1454547206	629	Donna	Crawford	dcrawfordhg@google.fr	Female	139.87.72.237	3548002968267145	Philippines	9/10/1974	120949.74	Senior Quality Engineer	
+1454547314	239	Terry	Anderson	tanderson6m@joomla.org	Male	126.193.158.217		Slovenia	6/2/1988	241130.56	Senior Sales Associate	
+1454547413	874	Roger	Armstrong	rarmstrongo9@shop-pro.jp	Male	176.127.63.161		Sweden	1/4/1969	195125.77	Environmental Tech	
+1454547442	85	Scott	Washington	swashington2c@bloomberg.com	Male	79.185.72.100	6395647151650882	Brazil	2/17/1957	240505.52	Professor	
+1454547470	265	Ronald	Simmons	rsimmons7c@php.net	Male	231.21.126.12		Colombia	5/12/1959	28563.27	Staff Accountant III	
+1454547497	574	Laura	Lawson	llawsonfx@disqus.com	Female	227.157.239.115	5108755030972003	Mongolia	6/17/1987	192790.7	Sales Representative	../../../../../../../../../../../etc/hosts
+1454547546	582		Medina		Male	230.187.35.16		China		87740.62		
+1454547580	868	Todd	Simmons	tsimmonso3@amazon.co.uk	Male	232.231.42.85		Peru	1/28/1977	70099.6	Sales Associate	NULL
+1454547632	421	Sara	Murray	smurraybo@instagram.com	Female	83.32.41.79		Mongolia	3/2/1972	21859.35	Research Associate	
+1454547659	83	Tammy	Walker	twalker2a@craigslist.org	Female	115.94.89.2	4508955158259501	China	1/1/1972	241046.96	Community Outreach Specialist	
+1454547745	476	Norma	Palmer	npalmerd7@etsy.com	Female	24.81.30.107	6759877990739668322	China	2/22/1974	273005.88	Executive Secretary	
+1454547823	333	Ruth	Ryan	rryan98@gov.uk	Female	165.226.217.32	6771454237379758	Philippines	4/25/1993	246324.26	Staff Accountant I	
+1454547897	523	Raymond	Green	rgreenei@sciencedaily.com	Male	129.154.223.20	5020525177159002	Brazil	7/25/1966	217735.34	Sales Associate	
+1454547914	626	Steven	Cooper	scooperhd@home.pl	Male	226.75.17.73	30583351914956	United States	4/22/2000	174475.39	Web Developer II	
+1454547938	51	Terry	Mitchell	tmitchell1e@soundcloud.com	Male	64.34.240.165		Peru		101626.65		
+1454547979	282	Lisa	Romero	lromero7t@pinterest.com	Female	54.113.22.9		Portugal		224233.61		
+1454548111	899	Raymond	Payne	rpayneoy@purevolume.com	Male	170.237.246.144	201978019687940	Philippines	1/21/1993	126392.14	Staff Accountant I	
+1454548272	966	Kevin	Martin	kmartinqt@hostgator.com	Male	87.47.66.144	3550408592420163	Sweden	10/24/1965	213135.46	Senior Sales Associate	
+1454548342	846	Keith	Taylor	ktaylornh@about.me	Male	90.199.26.239	4175007392203366	South Africa	2/4/1990	64012.82	Associate Professor	
+1454548358	164	Lawrence	Johnston	ljohnston4j@businessweek.com	Male	150.125.123.49		China	6/14/1993	243318.68	Design Engineer	
+1454548489	550	Brandon	Owens	bowensf9@wired.com	Male	220.236.132.34		Vietnam		271248.99		
 === Try load data from v0.7.1.all-named-index.parquet
+0.21	59.8	61	326	3.89	3.84	2.31	Premium	E	SI1
 0.22	65.1	61	337	3.87	3.78	2.49	Fair	E	VS2
 0.23	56.9	65	327	4.05	4.07	2.31	Good	E	VS1
-0.31	63.3	58	335	4.34	4.35	2.75	Good	J	SI2
-0.23	61.5	55	326	3.95	3.98	2.43	Ideal	E	SI2
-0.21	59.8	61	326	3.89	3.84	2.31	Premium	E	SI1
-0.29	62.4	58	334	4.2	4.23	2.63	Premium	I	VS2
-0.26	61.9	55	337	4.07	4.11	2.53	Very Good	H	SI1
 0.23	59.4	61	338	4	4.05	2.39	Very Good	H	VS1
+0.23	61.5	55	326	3.95	3.98	2.43	Ideal	E	SI2
 0.24	62.3	57	336	3.95	3.98	2.47	Very Good	I	VVS1
 0.24	62.8	57	336	3.94	3.96	2.48	Very Good	J	VVS2
+0.26	61.9	55	337	4.07	4.11	2.53	Very Good	H	SI1
+0.29	62.4	58	334	4.2	4.23	2.63	Premium	I	VS2
+0.31	63.3	58	335	4.34	4.35	2.75	Good	J	SI2
 === Try load data from v0.7.1.column-metadata-handling.parquet
 1	0.1	2017-01-01 02:00:00	a	2017-01-01 02:00:00
 2	0.2	2017-01-02 02:00:00	b	2017-01-02 02:00:00
 3	0.3	2017-01-03 02:00:00	c	2017-01-03 02:00:00
 === Try load data from v0.7.1.parquet
-0.23	Ideal	E	SI2	61.5	55	326	3.95	3.98	2.43	0
 0.21	Premium	E	SI1	59.8	61	326	3.89	3.84	2.31	1
+0.22	Fair	E	VS2	65.1	61	337	3.87	3.78	2.49	8
 0.23	Good	E	VS1	56.9	65	327	4.05	4.07	2.31	2
+0.23	Ideal	E	SI2	61.5	55	326	3.95	3.98	2.43	0
+0.23	Very Good	H	VS1	59.4	61	338	4	4.05	2.39	9
+0.24	Very Good	I	VVS1	62.3	57	336	3.95	3.98	2.47	6
+0.24	Very Good	J	VVS2	62.8	57	336	3.94	3.96	2.48	5
+0.26	Very Good	H	SI1	61.9	55	337	4.07	4.11	2.53	7
 0.29	Premium	I	VS2	62.4	58	334	4.2	4.23	2.63	3
 0.31	Good	J	SI2	63.3	58	335	4.34	4.35	2.75	4
-0.24	Very Good	J	VVS2	62.8	57	336	3.94	3.96	2.48	5
-0.24	Very Good	I	VVS1	62.3	57	336	3.95	3.98	2.47	6
-0.26	Very Good	H	SI1	61.9	55	337	4.07	4.11	2.53	7
-0.22	Fair	E	VS2	65.1	61	337	3.87	3.78	2.49	8
-0.23	Very Good	H	VS1	59.4	61	338	4	4.05	2.39	9
 === Try load data from v0.7.1.some-named-index.parquet
+0.21	59.8	61	326	3.89	3.84	2.31	Premium	E	SI1
 0.22	65.1	61	337	3.87	3.78	2.49	Fair	E	VS2
 0.23	56.9	65	327	4.05	4.07	2.31	Good	E	VS1
-0.31	63.3	58	335	4.34	4.35	2.75	Good	J	SI2
-0.23	61.5	55	326	3.95	3.98	2.43	Ideal	E	SI2
-0.21	59.8	61	326	3.89	3.84	2.31	Premium	E	SI1
-0.29	62.4	58	334	4.2	4.23	2.63	Premium	I	VS2
-0.26	61.9	55	337	4.07	4.11	2.53	Very Good	H	SI1
 0.23	59.4	61	338	4	4.05	2.39	Very Good	H	VS1
+0.23	61.5	55	326	3.95	3.98	2.43	Ideal	E	SI2
 0.24	62.3	57	336	3.95	3.98	2.47	Very Good	I	VVS1
 0.24	62.8	57	336	3.94	3.96	2.48	Very Good	J	VVS2
+0.26	61.9	55	337	4.07	4.11	2.53	Very Good	H	SI1
+0.29	62.4	58	334	4.2	4.23	2.63	Premium	I	VS2
+0.31	63.3	58	335	4.34	4.35	2.75	Good	J	SI2
diff --git a/tests/queries/0_stateless/00900_long_parquet_load.sh b/tests/queries/0_stateless/00900_long_parquet_load.sh
index 8e6ea24edb4..8142c5b5810 100755
--- a/tests/queries/0_stateless/00900_long_parquet_load.sh
+++ b/tests/queries/0_stateless/00900_long_parquet_load.sh
@@ -66,6 +66,6 @@ EOF
     # Some files contain unsupported data structures, exception is ok.
     cat "$DATA_DIR"/"$NAME" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO parquet_load FORMAT Parquet" 2>&1 | sed 's/Exception/Ex---tion/'
 
-    ${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load LIMIT 100"
+    ${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load ORDER BY tuple(*) LIMIT 100"
     ${CLICKHOUSE_CLIENT} --query="DROP TABLE parquet_load"
 done

From 157da5b8cb5e70117b9b5bad819eb9d85ffe15be Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 5 May 2023 20:01:20 +0200
Subject: [PATCH 51/52] Optimize PODArray::resize_fill() callers (#49459)

* Use PODArray::resize_fill() without initialize whenever it is possible

resize_fill() with initializer uses std::fill() which is a loop over all
elements, while resize_fill() without argument uses memset().

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>

* Optimize ColumnFixedString::insert/insertFrom

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>

---------

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Columns/ColumnFixedString.cpp | 13 ++++---------
 src/Columns/ColumnNullable.cpp    |  2 +-
 src/Columns/ColumnString.cpp      |  2 +-
 3 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp
index 1b7355d91f5..f708405667b 100644
--- a/src/Columns/ColumnFixedString.cpp
+++ b/src/Columns/ColumnFixedString.cpp
@@ -60,13 +60,7 @@ bool ColumnFixedString::isDefaultAt(size_t index) const
 void ColumnFixedString::insert(const Field & x)
 {
     const String & s = x.get<const String &>();
-
-    if (s.size() > n)
-        throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string '{}' for FixedString column", s);
-
-    size_t old_size = chars.size();
-    chars.resize_fill(old_size + n);
-    memcpy(chars.data() + old_size, s.data(), s.size());
+    insertData(s.data(), s.size());
 }
 
 void ColumnFixedString::insertFrom(const IColumn & src_, size_t index)
@@ -87,8 +81,9 @@ void ColumnFixedString::insertData(const char * pos, size_t length)
         throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string for FixedString column");
 
     size_t old_size = chars.size();
-    chars.resize_fill(old_size + n);
+    chars.resize(old_size + n);
     memcpy(chars.data() + old_size, pos, length);
+    memset(chars.data() + old_size + length, 0, n - length);
 }
 
 StringRef ColumnFixedString::serializeValueIntoArena(size_t index, Arena & arena, char const *& begin) const
@@ -278,7 +273,7 @@ void ColumnFixedString::expand(const IColumn::Filter & mask, bool inverted)
 
     ssize_t index = mask.size() - 1;
     ssize_t from = size() - 1;
-    chars.resize_fill(mask.size() * n, 0);
+    chars.resize_fill(mask.size() * n);
     while (index >= 0)
     {
         if (!!mask[index] ^ inverted)
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index f70dac20a2a..5ac7d07364b 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -214,7 +214,7 @@ void ColumnNullable::insertFromNotNullable(const IColumn & src, size_t n)
 void ColumnNullable::insertRangeFromNotNullable(const IColumn & src, size_t start, size_t length)
 {
     getNestedColumn().insertRangeFrom(src, start, length);
-    getNullMapData().resize_fill(getNullMapData().size() + length, 0);
+    getNullMapData().resize_fill(getNullMapData().size() + length);
 }
 
 void ColumnNullable::insertManyFromNotNullable(const IColumn & src, size_t position, size_t length)
diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp
index b00600e1748..9d0d39e9fde 100644
--- a/src/Columns/ColumnString.cpp
+++ b/src/Columns/ColumnString.cpp
@@ -176,7 +176,7 @@ void ColumnString::expand(const IColumn::Filter & mask, bool inverted)
     /// (if not, one of exceptions below will throw) and we can calculate the resulting chars size.
     UInt64 last_offset = offsets_data[from] + (mask.size() - offsets_data.size());
     offsets_data.resize(mask.size());
-    chars_data.resize_fill(last_offset, 0);
+    chars_data.resize_fill(last_offset);
     while (index >= 0)
     {
         offsets_data[index] = last_offset;

From 5e2221bc5df10b2115367e42077f9107b677c70b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 5 May 2023 21:36:10 +0300
Subject: [PATCH 52/52] Update play.html

---
 programs/server/play.html | 1 +
 1 file changed, 1 insertion(+)

diff --git a/programs/server/play.html b/programs/server/play.html
index 193d824d594..3ee133f0e01 100644
--- a/programs/server/play.html
+++ b/programs/server/play.html
@@ -536,6 +536,7 @@
     const pass_from_url = current_url.searchParams.get('password');
     if (pass_from_url) {
         document.getElementById('password').value = pass_from_url;
+	/// Browsers don't allow manipulating history for the 'file:' protocol.
         if (!opened_locally) {
             let replaced_pass = current_url.searchParams;
             replaced_pass.delete('password');