diff --git a/cmake/find/blob_storage.cmake b/cmake/find/blob_storage.cmake
index 74a907da7db..ec1b97f4695 100644
--- a/cmake/find/blob_storage.cmake
+++ b/cmake/find/blob_storage.cmake
@@ -1,14 +1,16 @@
 option (ENABLE_AZURE_BLOB_STORAGE "Enable Azure blob storage" ${ENABLE_LIBRARIES})
 
-option(USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY
-    "Set to FALSE to use system Azure SDK instead of bundled (OFF currently not implemented)"
-    ON)
-
 if (ENABLE_AZURE_BLOB_STORAGE)
     set(USE_AZURE_BLOB_STORAGE 1)
     set(AZURE_BLOB_STORAGE_LIBRARY azure_sdk)
+else()
+    return()
 endif()
 
+option(USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY
+    "Set to FALSE to use system Azure SDK instead of bundled (OFF currently not implemented)"
+    ON)
+
 if ((NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/azure/sdk"
         OR NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/azure/cmake-modules")
         AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
diff --git a/cmake/find/ccache.cmake b/cmake/find/ccache.cmake
index 95ec3d8a034..9acc0423f67 100644
--- a/cmake/find/ccache.cmake
+++ b/cmake/find/ccache.cmake
@@ -31,6 +31,7 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
 
    if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
       message(STATUS "Using ${CCACHE_FOUND} ${CCACHE_VERSION}")
+      set(LAUNCHER ${CCACHE_FOUND})
 
       # debian (debhelpers) set SOURCE_DATE_EPOCH environment variable, that is
       # filled from the debian/changelog or current time.
@@ -39,13 +40,8 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
       #   of the manifest, which do not allow to use previous cache,
       # - 4.2+ ccache ignores SOURCE_DATE_EPOCH for every file w/o __DATE__/__TIME__
       #
-      # So for:
-      # - 4.2+ does not require any sloppiness
-      # - 4.0+ will ignore SOURCE_DATE_EPOCH environment variable.
-      if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.2")
-         message(STATUS "ccache is 4.2+ no quirks for SOURCE_DATE_EPOCH required")
-         set(LAUNCHER ${CCACHE_FOUND})
-      elseif (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0")
+      # Exclude SOURCE_DATE_EPOCH env for ccache versions between [4.0, 4.2).
+      if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0" AND CCACHE_VERSION VERSION_LESS "4.2")
          message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache")
          set(LAUNCHER env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND})
       endif()
diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md
index 38e729fde0b..a49143bf599 100644
--- a/docs/en/interfaces/http.md
+++ b/docs/en/interfaces/http.md
@@ -186,7 +186,7 @@ $ echo "SELECT 1" | gzip -c | \
 ```
 
 ``` bash
-# Receiving compressed data from the server
+# Receiving compressed data archive from the server
 $ curl -vsS "http://localhost:8123/?enable_http_compression=1" \
     -H 'Accept-Encoding: gzip' --output result.gz -d 'SELECT number FROM system.numbers LIMIT 3'
 $ zcat result.gz
@@ -195,6 +195,15 @@ $ zcat result.gz
 2
 ```
 
+```bash
+# Receiving compressed data from the server and using the gunzip to receive decompressed data
+$ curl -sS "http://localhost:8123/?enable_http_compression=1" \
+    -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 3' | gunzip -
+0
+1
+2
+```
+
 ## Default Database {#default-database}
 
 You can use the ‘database’ URL parameter or the ‘X-ClickHouse-Database’ header to specify the default database.
diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md
index 87c5a6f7aec..c2660653907 100644
--- a/docs/en/introduction/adopters.md
+++ b/docs/en/introduction/adopters.md
@@ -60,8 +60,10 @@ toc_title: Adopters
 | <a href="https://www.exness.com/" class="favicon">Exness</a> | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) |
 | <a href="https://www.eventbunker.io/" class="favicon">EventBunker.io</a> | Serverless Data Processing | — | — | — | [Tweet, April 2021](https://twitter.com/Halil_D_/status/1379839133472985091) |
 | <a href="https://fastnetmon.com/" class="favicon">FastNetMon</a> | DDoS Protection | Main Product | | — | [Official website](https://fastnetmon.com/docs-fnm-advanced/fastnetmon-advanced-traffic-persistency/) |
+| <a href="https://www.firebolt.io/" class="favicon">Firebolt</a> | Analytics | Main product | - | - | [YouTube Tech Talk](https://www.youtube.com/watch?v=9rW9uEJ15tU) |
 | <a href="https://www.flipkart.com/" class="favicon">Flipkart</a> | e-Commerce | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=239) |
 | <a href="https://fun.co/rp" class="favicon">FunCorp</a> | Games | | — | 14 bn records/day as of Jan 2021 | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) |
+| <a href="https://futurragroup.com/" class="favicon">Futurra Group</a> | Analytics | — | — | — | [Article in Russian, December 2021](https://dou.ua/forums/topic/35587/) |
 | <a href="https://geniee.co.jp" class="favicon">Geniee</a> | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) |
 | <a href="https://www.genotek.ru/" class="favicon">Genotek</a> | Bioinformatics | Main product | — | — | [Video, August 2020](https://youtu.be/v3KyZbz9lEE) |
 | <a href="https://gigapipe.com/" class="favicon">Gigapipe</a> | Managed ClickHouse | Main product | — | — | [Official website](https://gigapipe.com/) |
@@ -70,6 +72,7 @@ toc_title: Adopters
 | <a href="https://www.grouparoo.com" class="favicon">Grouparoo</a> | Data Warehouse Integrations | Main product | — | — | [Official Website, November 2021](https://www.grouparoo.com/integrations) |
 | <a href="https://www.huya.com/" class="favicon">HUYA</a> | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) |
 | <a href="https://www.hydrolix.io/" class="favicon">Hydrolix</a> | Cloud data platform | Main product | — | — | [Documentation](https://docs.hydrolix.io/guide/query) |
+| <a href="https://hystax.com" class="favicon">Hystax</a> | Cloud Operations | Observability Analytics | - | - | [Blog](https://hystax.com/clickhouse-for-real-time-cost-saving-analytics-how-to-stop-hammering-screws-and-use-an-electric-screwdriver/) |
 | <a href="https://www.the-ica.com/" class="favicon">ICA</a> | FinTech | Risk Management | — | — | [Blog Post in English, Sep 2020](https://altinity.com/blog/clickhouse-vs-redshift-performance-for-fintech-risk-management?utm_campaign=ClickHouse%20vs%20RedShift&utm_content=143520807&utm_medium=social&utm_source=twitter&hss_channel=tw-3894792263) |
 | <a href="https://www.idealista.com" class="favicon">Idealista</a> | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.com/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) |
 | <a href="https://infobaleen.com" class="favicon">Infobaleen</a> | AI markting tool  | Analytics | — | — | [Official site](https://infobaleen.com) |
@@ -81,14 +84,18 @@ toc_title: Adopters
 | <a href="https://ippon.tech" class="favicon">Ippon Technologies</a> | Technology Consulting | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=205) |
 | <a href="https://www.ivi.ru/" class="favicon">Ivi</a> | Online Cinema | Analytics, Monitoring | — | — | [Article in Russian, Jan 2018](https://habr.com/en/company/ivi/blog/347408/) |
 | <a href="https://jinshuju.net" class="favicon">Jinshuju 金数据</a> | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) |
-| <a href="https://jitsu.com" class="favicon">Jitsu</a> | Cloud Software | Data Pipeline | — | — | [Documentation](https://jitsu.com/docs/destinations-configuration/clickhouse-destination), [Hacker News](https://news.ycombinator.com/item?id=29106082) |
+| <a href="https://jitsu.com" class="favicon">Jitsu</a> | Cloud Software | Data Pipeline | — | — | [Documentation](https://jitsu.com/docs/destinations-configuration/clickhouse-destination), [Hacker News post](https://news.ycombinator.com/item?id=29106082) |
+| <a href="https://juicefs.com/" class="favicon">JuiceFS</a> | Storage | Shopping Cart | - | - | [Blog](https://juicefs.com/blog/en/posts/shopee-clickhouse-with-juicefs/) |
 | <a href="https://www.kakaocorp.com/" class="favicon">kakaocorp</a> | Internet company | — | — | — | [if(kakao)2020](https://tv.kakao.com/channel/3693125/cliplink/414129353), [if(kakao)2021](https://if.kakao.com/session/24) |
 | <a href="https://www.kodiakdata.com/" class="favicon">Kodiak Data</a> | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) |
 | <a href="https://kontur.ru" class="favicon">Kontur</a> | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) |
 | <a href="https://www.kuaishou.com/" class="favicon">Kuaishou</a> | Video | — | — | — | [ClickHouse Meetup, October 2018](https://clickhouse.com/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/) |
 | <a href="https://www.kgk-global.com/en/" class="favicon">KGK Global</a> | Vehicle monitoring | — | — | — | [Press release, June 2021](https://zoom.cnews.ru/news/item/530921) |
+| <a href="https://www.lancom-systems.com/" class="favicon">LANCOM Systems</a> | Network Solutions | Traffic analysis | - | - | [ClickHouse Operator for Kubernetes](https://www.lancom-systems.com/), [Hacker News post] (https://news.ycombinator.com/item?id=29413660) |
 | <a href="https://www.lbl.gov" class="favicon">Lawrence Berkeley National Laboratory</a> | Research | Traffic analysis | 5 servers | 55 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) |
+| <a href="https://www.lever.co/" class="favicon">Lever</a> | Talent Management | Recruiting | - | - | [Hacker News post](https://news.ycombinator.com/item?id=29558544) |
 | <a href="https://lifestreet.com/" class="favicon">LifeStreet</a> | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) |
+| <a href="https://lookforsale.ru/" class="favicon">Lookforsale</a> | E-Commerce | — | — | — | [Job Posting, December 2021](https://telegram.me/javascript_jobs/587318) |
 | <a href="https://mcs.mail.ru/" class="favicon">Mail.ru Cloud Solutions</a> | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) |
 | <a href="https://maxilect.com/" class="favicon">MAXILECT</a> | Ad Tech, Blockchain, ML, AI | — | — | — | [Job advertisement, 2021](https://www.linkedin.com/feed/update/urn:li:activity:6780842017229430784/) |
 | <a href="https://tech.mymarilyn.ru" class="favicon">Marilyn</a> | Advertising | Statistics | — | — | [Talk in Russian, June 2017](https://www.youtube.com/watch?v=iXlIgx2khwc) |
@@ -106,6 +113,7 @@ toc_title: Adopters
 | <a href="https://ok.ru" class="favicon">Ok.ru</a> | Social Network | — | 72 servers | 810 TB compressed, 50bn rows/day, 1.5 TB/day | [SmartData conference, October 2021](https://assets.ctfassets.net/oxjq45e8ilak/4JPHkbJenLgZhBGGyyonFP/57472ec6987003ec4078d0941740703b/____________________ClickHouse_______________________.pdf) |
 | <a href="https://omnicomm.ru/" class="favicon">Omnicomm</a> | Transportation Monitoring | — | — | — | [Facebook post, October 2021](https://www.facebook.com/OmnicommTeam/posts/2824479777774500) |
 | <a href="https://www.oneapm.com/" class="favicon">OneAPM</a> | Monitoring and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) |
+| <a href="https://opensee.io/" class="favicon">Opensee</a> | Financial Analytics | Main product | - | - | [Blog](https://opensee.io/news/from-moscow-to-wall-street-the-remarkable-journey-of-clickhouse/) |
 | <a href="https://www.opentargets.org/" class="favicon">Open Targets</a> | Genome Research | Genome Search | — | — | [Tweet, October 2021](https://twitter.com/OpenTargets/status/1452570865342758913?s=20), [Blog](https://blog.opentargets.org/graphql/) |
 | <a href="https://corp.ozon.com/" class="favicon">OZON</a> | E-commerce | — | — | — | [Official website](https://job.ozon.ru/vacancy/razrabotchik-clickhouse-ekspluatatsiya-40991870/) |
 | <a href="https://panelbear.com/" class="favicon">Panelbear | Analytics | Monitoring and Analytics | — | — | [Tech Stack, November 2020](https://panelbear.com/blog/tech-stack/) |
@@ -118,6 +126,7 @@ toc_title: Adopters
 | <a href="https://prana-system.com/en/" class="favicon">PRANA</a> | Industrial predictive analytics | Main product | — | — | [News (russian), Feb 2021](https://habr.com/en/news/t/541392/) |
 | <a href="https://www.qingcloud.com/" class="favicon">QINGCLOUD</a> | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) |
 | <a href="https://qrator.net" class="favicon">Qrator</a> | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) |
+| <a href="https://rvision.pro/en/" class="favicon">R-Vision</a> | Information Security | — | — | — | [Article in Russian, December 2021](https://www.anti-malware.ru/reviews/R-Vision-SENSE-15) |
 | <a href="https://www.rbinternational.com/" class="favicon">Raiffeisenbank</a> | Banking | Analytics | — | — | [Lecture in Russian, December 2020](https://cs.hse.ru/announcements/421965599.html) |
 | <a href="https://rambler.ru" class="favicon">Rambler</a> | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) |
 | <a href="https://replicahq.com" class="favicon">Replica</a> | Urban Planning | Analytics | — | — | [Job advertisement](https://boards.greenhouse.io/replica/jobs/5547732002?gh_jid=5547732002) |
@@ -153,6 +162,7 @@ toc_title: Adopters
 | <a href="https://www.tinybird.co/" class="favicon">Tinybird</a> | Real-time Data Products | Data processing | — | — | [Official website](https://www.tinybird.co/) |
 | <a href="https://trafficstars.com/" class="favicon">Traffic Stars</a> | AD network | — | 300 servers in Europe/US | 1.8 PiB, 700 000 insert rps (as of 2021) | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) |
 | <a href="https://www.uber.com" class="favicon">Uber</a> | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.com/meetup40/uber.pdf) |
+| <a href="https://usetech.com/" class="favicon">UseTech</a> | Software Development | — | — | — | [Job Posting, December 2021](https://vk.com/wall136266658_2418) |
 | <a href="https://hello.utmstat.com/" class="favicon">UTMSTAT</a> | Analytics | Main product | — | — | [Blog post, June 2020](https://vc.ru/tribuna/133956-striming-dannyh-iz-servisa-skvoznoy-analitiki-v-clickhouse) |
 | <a href="https://vercel.com/" class="favicon">Vercel</a> | Traffic and Performance Analytics | — | — | — | Direct reference, October 2021 |
 | <a href="https://vk.com" class="favicon">VKontakte</a> | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) |
@@ -168,7 +178,8 @@ toc_title: Adopters
 | <a href="https://cloud.yandex.ru/services/managed-clickhouse" class="favicon">Yandex Cloud</a> | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) |
 | <a href="https://cloud.yandex.ru/services/datalens" class="favicon">Yandex DataLens</a> | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.com/meetup38/datalens.pdf) |
 | <a href="https://market.yandex.ru/" class="favicon">Yandex Market</a> | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) |
-| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Main product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.com/meetup40/introduction/#13) |
+| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Macin product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.com/meetup40/introduction/#13) |
+| <a href="https://www.yellowfinbi.com" class="favicon"><COMPANYNAME></a> | Analytics | Main product | - | - | [Integration](https://www.yellowfinbi.com/campaign/yellowfin-9-whats-new#el-30219e0e) |
 | <a href="https://www.yotascale.com/" class="favicon">Yotascale</a> | Cloud | Data pipeline | — | 2 bn records/day | [LinkedIn (Accomplishments)](https://www.linkedin.com/in/adilsaleem/) |
 | <a href="https://www.your-analytics.org/" class="favicon">Your Analytics</a> | Product Analytics | Main Product | — | - | [Tweet, November 2021](https://twitter.com/mikenikles/status/1459737241165565953) |
 | <a href="https://zagravagames.com/en/" class="favicon">Zagrava Trading</a> | — | — | — | — | [Job offer, May 2021](https://twitter.com/datastackjobs/status/1394707267082063874) |
@@ -178,9 +189,5 @@ toc_title: Adopters
 | <a href="https://promo.croc.ru/digitalworker" class="favicon">Цифровой Рабочий</a> | Industrial IoT, Analytics | — | — | — | [Blog post in Russian, March 2021](https://habr.com/en/company/croc/blog/548018/) |
 | <a href="https://shop.okraina.ru/" class="favicon">ООО «МПЗ Богородский»</a> | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) |
 | <a href="https://domclick.ru/" class="favicon">ДомКлик</a> | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) |
-| <a href="https://futurragroup.com/" class="favicon">Futurra Group</a> | Analytics | — | — | — | [Article in Russian, December 2021](https://dou.ua/forums/topic/35587/) |
-| <a href="https://usetech.com/" class="favicon">UseTech</a> | Software Development | — | — | — | [Job Posting, December 2021](https://vk.com/wall136266658_2418) |
-| <a href="https://lookforsale.ru/" class="favicon">Lookforsale</a> | E-Commerce | — | — | — | [Job Posting, December 2021](https://telegram.me/javascript_jobs/587318) |
-| <a href="https://rvision.pro/en/" class="favicon">R-Vision</a> | Information Security | — | — | — | [Article in Russian, December 2021](https://www.anti-malware.ru/reviews/R-Vision-SENSE-15) |
 
 [Original article](https://clickhouse.com/docs/en/introduction/adopters/) <!--hide-->
diff --git a/docs/en/sql-reference/statements/alter/projection.md b/docs/en/sql-reference/statements/alter/projection.md
index 96cd8f5d607..c7ebc83c496 100644
--- a/docs/en/sql-reference/statements/alter/projection.md
+++ b/docs/en/sql-reference/statements/alter/projection.md
@@ -9,11 +9,12 @@ The following operations with [projections](../../../engines/table-engines/merge
 
 -   `ALTER TABLE [db].name ADD PROJECTION name ( SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY] )` - Adds projection description to tables metadata.
 
--   `ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk.
+-   `ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
 
 -   `ALTER TABLE [db.]table MATERIALIZE PROJECTION name IN PARTITION partition_name` - The query rebuilds the projection `name` in the partition `partition_name`. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
 
--   `ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` - Deletes projection files from disk without removing description.
+-   `ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` - Deletes projection files from disk without removing description. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
+
 
 The commands `ADD`, `DROP` and `CLEAR` are lightweight in a sense that they only change metadata or remove files.
 
diff --git a/docs/ru/sql-reference/functions/nlp-functions.md b/docs/ru/sql-reference/functions/nlp-functions.md
index 250403ab127..992a7d6ccf3 100644
--- a/docs/ru/sql-reference/functions/nlp-functions.md
+++ b/docs/ru/sql-reference/functions/nlp-functions.md
@@ -3,10 +3,10 @@ toc_priority: 67
 toc_title: NLP
 ---
 
-# [экспериментально] Функции для работы с ествественным языком {#nlp-functions}
+# [экспериментально] Функции для работы с естественным языком {#nlp-functions}
 
 !!! warning "Предупреждение"
-    Сейчас использование функций для работы с ествественным языком является экспериментальной возможностью. Чтобы использовать данные функции, включите настройку `allow_experimental_nlp_functions = 1`.
+    Сейчас использование функций для работы с естественным языком является экспериментальной возможностью. Чтобы использовать данные функции, включите настройку `allow_experimental_nlp_functions = 1`.
 
 ## stem {#stem}
 
@@ -84,7 +84,7 @@ SELECT lemmatize('en', 'wolves');
 
 Находит синонимы к заданному слову. Представлены два типа расширений словарей: `plain` и `wordnet`.
 
-Для работы расширения типа `plain` необходимо указать путь до простого текстового файла, где каждая строка соотвествует одному набору синонимов. Слова в данной строке должны быть разделены с помощью пробела или знака табуляции.
+Для работы расширения типа `plain` необходимо указать путь до простого текстового файла, где каждая строка соответствует одному набору синонимов. Слова в данной строке должны быть разделены с помощью пробела или знака табуляции.
 
 Для работы расширения типа `plain` необходимо указать путь до WordNet тезауруса. Тезаурус должен содержать WordNet sense index.
 
diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp
index 1c276a83768..35ffb97b8e2 100644
--- a/programs/benchmark/Benchmark.cpp
+++ b/programs/benchmark/Benchmark.cpp
@@ -342,6 +342,9 @@ private:
             }
         }
 
+        /// Now we don't block the Ctrl+C signal and second signal will terminate the program without waiting.
+        interrupt_listener.unblock();
+
         pool.wait();
         total_watch.stop();
 
@@ -586,7 +589,6 @@ public:
 #ifndef __clang__
 #pragma GCC optimize("-fno-var-tracking-assignments")
 #endif
-#pragma GCC diagnostic ignored "-Wmissing-declarations"
 
 int mainEntryClickHouseBenchmark(int argc, char ** argv)
 {
diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index 53e295b7fbb..aa4747636c9 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -313,11 +313,11 @@ void LocalServer::cleanup()
 
 std::string LocalServer::getInitialCreateTableQuery()
 {
-    if (!config().has("table-structure"))
+    if (!config().has("table-structure") && !config().has("table-file"))
         return {};
 
     auto table_name = backQuoteIfNeed(config().getString("table-name", "table"));
-    auto table_structure = config().getString("table-structure");
+    auto table_structure = config().getString("table-structure", "auto");
     auto data_format = backQuoteIfNeed(config().getString("table-data-format", "TSV"));
 
     String table_file;
@@ -332,7 +332,12 @@ std::string LocalServer::getInitialCreateTableQuery()
         table_file = quoteString(config().getString("table-file"));
     }
 
-    return fmt::format("CREATE TABLE {} ({}) ENGINE = File({}, {});",
+    if (table_structure == "auto")
+        table_structure = "";
+    else
+        table_structure = "(" + table_structure + ")";
+
+    return fmt::format("CREATE TABLE {} {} ENGINE = File({}, {});",
                        table_name, table_structure, data_format, table_file);
 }
 
@@ -422,7 +427,7 @@ try
 #else
     is_interactive = stdin_is_a_tty
         && (config().hasOption("interactive")
-            || (!config().has("query") && !config().has("table-structure") && queries_files.empty()));
+            || (!config().has("query") && !config().has("table-structure") && queries_files.empty() && !config().has("table-file")));
 #endif
     if (!is_interactive)
     {
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 7124961821e..0fe66314114 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -526,6 +526,14 @@ if (USE_BZIP2)
     target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BZIP2_INCLUDE_DIR})
 endif()
 
+if(USE_SIMDJSON)
+    dbms_target_link_libraries(PRIVATE simdjson)
+endif()
+
+if(USE_RAPIDJSON)
+    dbms_target_include_directories(SYSTEM PRIVATE ${RAPIDJSON_INCLUDE_DIR})
+endif()
+
 dbms_target_link_libraries(PUBLIC consistent-hashing)
 
 include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake")
diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index f2e5e018e1b..16f85fcae61 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -604,6 +604,7 @@
     M(633, QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW) \
     M(634, MONGODB_ERROR) \
     M(635, CANNOT_POLL) \
+    M(636, CANNOT_EXTRACT_TABLE_STRUCTURE) \
     \
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
diff --git a/src/Common/SymbolIndex.cpp b/src/Common/SymbolIndex.cpp
index 568f633975b..32c1a15337c 100644
--- a/src/Common/SymbolIndex.cpp
+++ b/src/Common/SymbolIndex.cpp
@@ -86,7 +86,7 @@ namespace
 /// https://stackoverflow.com/questions/32088140/multiple-string-tables-in-elf-object
 
 
-void updateResources(std::string_view name, const void * address, SymbolIndex::Resources & resources)
+void updateResources(ElfW(Addr) base_address, std::string_view object_name, std::string_view name, const void * address, SymbolIndex::Resources & resources)
 {
     const char * char_address = static_cast<const char *>(address);
 
@@ -97,18 +97,23 @@ void updateResources(std::string_view name, const void * address, SymbolIndex::R
             name = name.substr((name[0] == '_') + strlen("binary_"));
             name = name.substr(0, name.size() - strlen("_start"));
 
-            resources.emplace(name, std::string_view{char_address, 0}); // NOLINT
+            resources.emplace(name, SymbolIndex::ResourcesBlob{
+                base_address,
+                object_name,
+                std::string_view{char_address, 0}, // NOLINT
+            });
         }
         else if (name.ends_with("_end"))
         {
             name = name.substr((name[0] == '_') + strlen("binary_"));
             name = name.substr(0, name.size() - strlen("_end"));
 
-            if (auto it = resources.find(name); it != resources.end() && it->second.empty())
+            auto it = resources.find(name);
+            if (it != resources.end() && it->second.base_address == base_address && it->second.data.empty())
             {
-                const char * start = it->second.data();
+                const char * start = it->second.data.data();
                 assert(char_address >= start);
-                it->second = std::string_view{start, static_cast<size_t>(char_address - start)};
+                it->second.data = std::string_view{start, static_cast<size_t>(char_address - start)};
             }
         }
     }
@@ -153,10 +158,12 @@ void collectSymbolsFromProgramHeaders(
         size_t sym_cnt = 0;
         for (const auto * it = dyn_begin; it->d_tag != DT_NULL; ++it)
         {
+            ElfW(Addr) base_address = correct_address(info->dlpi_addr, it->d_un.d_ptr);
+
             // TODO: this branch leads to invalid address of the hash table. Need further investigation.
             // if (it->d_tag == DT_HASH)
             // {
-            //     const ElfW(Word) * hash = reinterpret_cast<const ElfW(Word) *>(correct_address(info->dlpi_addr, it->d_un.d_ptr));
+            //     const ElfW(Word) * hash = reinterpret_cast<const ElfW(Word) *>(base_address);
             //     sym_cnt = hash[1];
             //     break;
             // }
@@ -167,7 +174,7 @@ void collectSymbolsFromProgramHeaders(
                 const uint32_t * buckets = nullptr;
                 const uint32_t * hashval = nullptr;
 
-                const ElfW(Word) * hash = reinterpret_cast<const ElfW(Word) *>(correct_address(info->dlpi_addr, it->d_un.d_ptr));
+                const ElfW(Word) * hash = reinterpret_cast<const ElfW(Word) *>(base_address);
 
                 buckets = hash + 4 + (hash[2] * sizeof(size_t) / 4);
 
@@ -196,9 +203,11 @@ void collectSymbolsFromProgramHeaders(
         const char * strtab = nullptr;
         for (const auto * it = dyn_begin; it->d_tag != DT_NULL; ++it)
         {
+            ElfW(Addr) base_address = correct_address(info->dlpi_addr, it->d_un.d_ptr);
+
             if (it->d_tag == DT_STRTAB)
             {
-                strtab = reinterpret_cast<const char *>(correct_address(info->dlpi_addr, it->d_un.d_ptr));
+                strtab = reinterpret_cast<const char *>(base_address);
                 break;
             }
         }
@@ -208,10 +217,12 @@ void collectSymbolsFromProgramHeaders(
 
         for (const auto * it = dyn_begin; it->d_tag != DT_NULL; ++it)
         {
+            ElfW(Addr) base_address = correct_address(info->dlpi_addr, it->d_un.d_ptr);
+
             if (it->d_tag == DT_SYMTAB)
             {
                 /* Get the pointer to the first entry of the symbol table */
-                const ElfW(Sym) * elf_sym = reinterpret_cast<const ElfW(Sym) *>(correct_address(info->dlpi_addr, it->d_un.d_ptr));
+                const ElfW(Sym) * elf_sym = reinterpret_cast<const ElfW(Sym) *>(base_address);
 
                 /* Iterate over the symbol table */
                 for (ElfW(Word) sym_index = 0; sym_index < ElfW(Word)(sym_cnt); ++sym_index)
@@ -236,7 +247,7 @@ void collectSymbolsFromProgramHeaders(
                         symbols.push_back(symbol);
 
                     /// But resources can be represented by a pair of empty symbols (indicating their boundaries).
-                    updateResources(symbol.name, symbol.address_begin, resources);
+                    updateResources(base_address, info->dlpi_name, symbol.name, symbol.address_begin, resources);
                 }
 
                 break;
@@ -299,7 +310,7 @@ void collectSymbolsFromELFSymbolTable(
         if (symbol_table_entry->st_size)
             symbols.push_back(symbol);
 
-        updateResources(symbol.name, symbol.address_begin, resources);
+        updateResources(info->dlpi_addr, info->dlpi_name, symbol.name, symbol.address_begin, resources);
     }
 }
 
diff --git a/src/Common/SymbolIndex.h b/src/Common/SymbolIndex.h
index 7c542980099..1331cf81cf7 100644
--- a/src/Common/SymbolIndex.h
+++ b/src/Common/SymbolIndex.h
@@ -51,7 +51,7 @@ public:
     std::string_view getResource(String name) const
     {
         if (auto it = data.resources.find(name); it != data.resources.end())
-            return it->second;
+            return it->second.data;
         return {};
     }
 
@@ -59,7 +59,17 @@ public:
     String getBuildID() const { return data.build_id; }
     String getBuildIDHex() const;
 
-    using Resources = std::unordered_map<std::string_view /* symbol name */, std::string_view /* blob */>;
+    struct ResourcesBlob
+    {
+        /// Symbol can be presented in multiple shared objects,
+        /// base_address will be used to compare only symbols from the same SO.
+        ElfW(Addr) base_address;
+        /// Just a human name of the SO.
+        std::string_view object_name;
+        /// Data blob.
+        std::string_view data;
+    };
+    using Resources = std::unordered_map<std::string_view /* symbol name */, ResourcesBlob>;
 
     struct Data
     {
diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp
index f05a10b8815..c8753c8edaf 100644
--- a/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/src/Common/ZooKeeper/ZooKeeper.cpp
@@ -26,6 +26,7 @@ namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
     extern const int NOT_IMPLEMENTED;
+    extern const int BAD_ARGUMENTS;
 }
 }
 
@@ -1133,4 +1134,54 @@ Coordination::RequestPtr makeCheckRequest(const std::string & path, int version)
     return request;
 }
 
+std::string normalizeZooKeeperPath(std::string zookeeper_path, bool check_starts_with_slash, Poco::Logger * log)
+{
+    if (!zookeeper_path.empty() && zookeeper_path.back() == '/')
+        zookeeper_path.resize(zookeeper_path.size() - 1);
+    /// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it.
+    if (!zookeeper_path.empty() && zookeeper_path.front() != '/')
+    {
+        /// Do not allow this for new tables, print warning for tables created in old versions
+        if (check_starts_with_slash)
+            throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "ZooKeeper path must starts with '/', got '{}'", zookeeper_path);
+        if (log)
+            LOG_WARNING(log, "ZooKeeper path ('{}') does not start with '/'. It will not be supported in future releases");
+        zookeeper_path = "/" + zookeeper_path;
+    }
+
+    return zookeeper_path;
+}
+
+String extractZooKeeperName(const String & path)
+{
+    static constexpr auto default_zookeeper_name = "default";
+    if (path.empty())
+        throw DB::Exception("ZooKeeper path should not be empty", DB::ErrorCodes::BAD_ARGUMENTS);
+    if (path[0] == '/')
+        return default_zookeeper_name;
+    auto pos = path.find(":/");
+    if (pos != String::npos && pos < path.find('/'))
+    {
+        auto zookeeper_name = path.substr(0, pos);
+        if (zookeeper_name.empty())
+            throw DB::Exception("Zookeeper path should start with '/' or '<auxiliary_zookeeper_name>:/'", DB::ErrorCodes::BAD_ARGUMENTS);
+        return zookeeper_name;
+    }
+    return default_zookeeper_name;
+}
+
+String extractZooKeeperPath(const String & path, bool check_starts_with_slash, Poco::Logger * log)
+{
+    if (path.empty())
+        throw DB::Exception("ZooKeeper path should not be empty", DB::ErrorCodes::BAD_ARGUMENTS);
+    if (path[0] == '/')
+        return normalizeZooKeeperPath(path, check_starts_with_slash, log);
+    auto pos = path.find(":/");
+    if (pos != String::npos && pos < path.find('/'))
+    {
+        return normalizeZooKeeperPath(path.substr(pos + 1, String::npos), check_starts_with_slash, log);
+    }
+    return normalizeZooKeeperPath(path, check_starts_with_slash, log);
+}
+
 }
diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h
index 8e015b1f331..371f93f6df3 100644
--- a/src/Common/ZooKeeper/ZooKeeper.h
+++ b/src/Common/ZooKeeper/ZooKeeper.h
@@ -379,4 +379,11 @@ private:
 };
 
 using EphemeralNodeHolderPtr = EphemeralNodeHolder::Ptr;
+
+String normalizeZooKeeperPath(std::string zookeeper_path, bool check_starts_with_slash, Poco::Logger * log = nullptr);
+
+String extractZooKeeperName(const String & path);
+
+String extractZooKeeperPath(const String & path, bool check_starts_with_slash, Poco::Logger * log = nullptr);
+
 }
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 952009047d4..6e53fa4342c 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -596,6 +596,8 @@ class IColumn;
     M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \
     M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \
     M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \
+    M(UInt64, input_format_msgpack_number_of_columns, 0, "The number of columns in inserted MsgPack data. Used for automatic schema inference from data.", 0) \
+    M(UInt64, input_format_max_rows_to_read_for_schema_inference, 100, "The maximum rows of data to read for automatic schema inference", 0) \
     \
     M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic' and 'best_effort'.", 0) \
     M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, "Method to write DateTime to text output. Possible values: 'simple', 'iso', 'unix_timestamp'.", 0) \
@@ -661,6 +663,7 @@ class IColumn;
     M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \
     \
     M(EnumComparingMode, format_capn_proto_enum_comparising_mode, FormatSettings::EnumComparingMode::BY_VALUES, "How to map ClickHouse Enum and CapnProto Enum", 0)\
+
 // End of FORMAT_FACTORY_SETTINGS
 // Please add settings non-related to formats into the COMMON_SETTINGS above.
 
diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h
index e74df5c327a..85644b6f6ca 100644
--- a/src/DataTypes/IDataType.h
+++ b/src/DataTypes/IDataType.h
@@ -377,6 +377,8 @@ struct WhichDataType
     constexpr bool isNullable() const { return idx == TypeIndex::Nullable; }
     constexpr bool isFunction() const { return idx == TypeIndex::Function; }
     constexpr bool isAggregateFunction() const { return idx == TypeIndex::AggregateFunction; }
+
+    constexpr bool isLowCarnality() const { return idx == TypeIndex::LowCardinality; }
 };
 
 /// IDataType helpers (alternative for IDataType virtual methods with single point of truth)
diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp
index e9944b592ed..165bad950f5 100644
--- a/src/Databases/DatabaseOnDisk.cpp
+++ b/src/Databases/DatabaseOnDisk.cpp
@@ -76,10 +76,16 @@ std::pair<String, StoragePtr> createTableFromAST(
         /// - the database has not been loaded yet;
         /// - the code is simpler, since the query is already brought to a suitable form.
         if (!ast_create_query.columns_list || !ast_create_query.columns_list->columns)
-            throw Exception("Missing definition of columns.", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED);
-
-        columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns_list->columns, context, true);
-        constraints = InterpreterCreateQuery::getConstraintsDescription(ast_create_query.columns_list->constraints);
+        {
+            if (!StorageFactory::instance().checkIfStorageSupportsSchemaInterface(ast_create_query.storage->engine->name))
+                throw Exception("Missing definition of columns.", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED);
+            /// Leave columns empty.
+        }
+        else
+        {
+            columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns_list->columns, context, true);
+            constraints = InterpreterCreateQuery::getConstraintsDescription(ast_create_query.columns_list->constraints);
+        }
     }
 
     return
diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp
index ecfa5df8351..bed46a97c1b 100644
--- a/src/Formats/CapnProtoUtils.cpp
+++ b/src/Formats/CapnProtoUtils.cpp
@@ -7,6 +7,8 @@
 #include <DataTypes/DataTypeLowCardinality.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeString.h>
 #include <DataTypes/IDataType.h>
 #include <boost/algorithm/string.hpp>
 #include <boost/algorithm/string/join.hpp>
@@ -26,6 +28,7 @@ namespace ErrorCodes
     extern const int FILE_DOESNT_EXIST;
     extern const int UNKNOWN_EXCEPTION;
     extern const int INCORRECT_DATA;
+    extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
 }
 
 capnp::StructSchema CapnProtoSchemaParser::getMessageSchema(const FormatSchemaInfo & schema_info)
@@ -427,6 +430,113 @@ void checkCapnProtoSchemaStructure(const capnp::StructSchema & schema, const Blo
     }
 }
 
+template <typename ValueType>
+static DataTypePtr getEnumDataTypeFromEnumerants(const capnp::EnumSchema::EnumerantList & enumerants)
+{
+    std::vector<std::pair<String, ValueType>> values;
+    for (auto enumerant : enumerants)
+        values.emplace_back(enumerant.getProto().getName(), ValueType(enumerant.getOrdinal()));
+    return std::make_shared<DataTypeEnum<ValueType>>(std::move(values));
+}
+
+static DataTypePtr getEnumDataTypeFromEnumSchema(const capnp::EnumSchema & enum_schema)
+{
+    auto enumerants = enum_schema.getEnumerants();
+    if (enumerants.size() < 128)
+        return getEnumDataTypeFromEnumerants<Int8>(enumerants);
+    if (enumerants.size() < 32768)
+        return getEnumDataTypeFromEnumerants<Int16>(enumerants);
+
+    throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "ClickHouse supports only 8 and 16-but Enums");
+}
+
+static DataTypePtr getDataTypeFromCapnProtoType(const capnp::Type & capnp_type)
+{
+    switch (capnp_type.which())
+    {
+        case capnp::schema::Type::INT8:
+            return std::make_shared<DataTypeInt8>();
+        case capnp::schema::Type::INT16:
+            return std::make_shared<DataTypeInt16>();
+        case capnp::schema::Type::INT32:
+            return std::make_shared<DataTypeInt32>();
+        case capnp::schema::Type::INT64:
+            return std::make_shared<DataTypeInt64>();
+        case capnp::schema::Type::BOOL: [[fallthrough]];
+        case capnp::schema::Type::UINT8:
+            return std::make_shared<DataTypeUInt8>();
+        case capnp::schema::Type::UINT16:
+            return std::make_shared<DataTypeUInt16>();
+        case capnp::schema::Type::UINT32:
+            return std::make_shared<DataTypeUInt32>();
+        case capnp::schema::Type::UINT64:
+            return std::make_shared<DataTypeUInt64>();
+        case capnp::schema::Type::FLOAT32:
+            return std::make_shared<DataTypeFloat32>();
+        case capnp::schema::Type::FLOAT64:
+            return std::make_shared<DataTypeFloat64>();
+        case capnp::schema::Type::DATA: [[fallthrough]];
+        case capnp::schema::Type::TEXT:
+            return std::make_shared<DataTypeString>();
+        case capnp::schema::Type::ENUM:
+            return getEnumDataTypeFromEnumSchema(capnp_type.asEnum());
+        case capnp::schema::Type::LIST:
+        {
+            auto list_schema = capnp_type.asList();
+            auto nested_type = getDataTypeFromCapnProtoType(list_schema.getElementType());
+            return std::make_shared<DataTypeArray>(nested_type);
+        }
+        case capnp::schema::Type::STRUCT:
+        {
+            auto struct_schema = capnp_type.asStruct();
+
+            /// Check if it can be Nullable.
+            if (checkIfStructIsNamedUnion(struct_schema))
+            {
+                auto fields = struct_schema.getUnionFields();
+                if (fields.size() != 2 || (!fields[0].getType().isVoid() && !fields[1].getType().isVoid()))
+                    throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Unions are not supported");
+                auto value_type = fields[0].getType().isVoid() ? fields[1].getType() : fields[0].getType();
+                if (value_type.isStruct() || value_type.isList())
+                    throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Tuples and Lists cannot be inside Nullable");
+
+                auto nested_type = getDataTypeFromCapnProtoType(value_type);
+                return std::make_shared<DataTypeNullable>(nested_type);
+            }
+
+            if (checkIfStructContainsUnnamedUnion(struct_schema))
+                throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Unnamed union is not supported");
+
+            /// Treat Struct as Tuple.
+            DataTypes nested_types;
+            Names nested_names;
+            for (auto field : struct_schema.getNonUnionFields())
+            {
+                nested_names.push_back(field.getProto().getName());
+                nested_types.push_back(getDataTypeFromCapnProtoType(field.getType()));
+            }
+            return std::make_shared<DataTypeTuple>(std::move(nested_types), std::move(nested_names));
+        }
+        default:
+            throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Unsupported CapnProtoType: {}", getCapnProtoFullTypeName(capnp_type));
+    }
+}
+
+NamesAndTypesList capnProtoSchemaToCHSchema(const capnp::StructSchema & schema)
+{
+    if (checkIfStructContainsUnnamedUnion(schema))
+        throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Unnamed union is not supported");
+
+    NamesAndTypesList names_and_types;
+    for (auto field : schema.getNonUnionFields())
+    {
+        auto name = field.getProto().getName();
+        auto type = getDataTypeFromCapnProtoType(field.getType());
+        names_and_types.emplace_back(name, type);
+    }
+    return names_and_types;
+}
+
 }
 
 #endif
diff --git a/src/Formats/CapnProtoUtils.h b/src/Formats/CapnProtoUtils.h
index 93ca0a5e616..51c152de17f 100644
--- a/src/Formats/CapnProtoUtils.h
+++ b/src/Formats/CapnProtoUtils.h
@@ -38,6 +38,7 @@ capnp::DynamicValue::Reader getReaderByColumnName(const capnp::DynamicStruct::Re
 
 void checkCapnProtoSchemaStructure(const capnp::StructSchema & schema, const Block & header, FormatSettings::EnumComparingMode mode);
 
+NamesAndTypesList capnProtoSchemaToCHSchema(const capnp::StructSchema & schema);
 }
 
 #endif
diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp
index d956d9e6bfb..0a7747fc864 100644
--- a/src/Formats/EscapingRuleUtils.cpp
+++ b/src/Formats/EscapingRuleUtils.cpp
@@ -1,7 +1,16 @@
 #include <Formats/EscapingRuleUtils.h>
+#include <Formats/JSONEachRowUtils.h>
+#include <Formats/ReadSchemaUtils.h>
 #include <DataTypes/Serializations/SerializationNullable.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeNullable.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
+#include <Poco/JSON/Parser.h>
+#include <Parsers/TokenIterator.h>
+#include <Parsers/ExpressionListParsers.h>
+#include <Interpreters/evaluateConstantExpression.h>
 
 namespace DB
 {
@@ -9,6 +18,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int BAD_ARGUMENTS;
+    extern const int LOGICAL_ERROR;
 }
 
 FormatSettings::EscapingRule stringToEscapingRule(const String & escaping_rule)
@@ -193,30 +203,145 @@ void writeStringByEscapingRule(const String & value, WriteBuffer & out, FormatSe
     }
 }
 
-String readStringByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings)
+template <bool read_string>
+String readByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings)
 {
     String result;
     switch (escaping_rule)
     {
         case FormatSettings::EscapingRule::Quoted:
-            readQuotedString(result, buf);
+            if constexpr (read_string)
+                readQuotedString(result, buf);
+            else
+                readQuotedFieldIntoString(result, buf);
             break;
         case FormatSettings::EscapingRule::JSON:
-            readJSONString(result, buf);
+            if constexpr (read_string)
+                readJSONString(result, buf);
+            else
+                readJSONFieldIntoString(result, buf);
             break;
         case FormatSettings::EscapingRule::Raw:
             readString(result, buf);
             break;
         case FormatSettings::EscapingRule::CSV:
-            readCSVString(result, buf, format_settings.csv);
+            if constexpr (read_string)
+                readCSVString(result, buf, format_settings.csv);
+            else
+                readCSVField(result, buf, format_settings.csv);
             break;
         case FormatSettings::EscapingRule::Escaped:
             readEscapedString(result, buf);
             break;
         default:
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read string with {} escaping rule", escapingRuleToString(escaping_rule));
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read value with {} escaping rule", escapingRuleToString(escaping_rule));
     }
     return result;
 }
 
+String readFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings)
+{
+    return readByEscapingRule<false>(buf, escaping_rule, format_settings);
+}
+
+String readStringByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings)
+{
+    return readByEscapingRule<true>(buf, escaping_rule, format_settings);
+}
+
+static bool evaluateConstantExpressionFromString(const StringRef & field, DataTypePtr & type, ContextPtr context)
+{
+    if (!context)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "You must provide context to evaluate constant expression");
+
+    ParserExpression parser;
+    Expected expected;
+    Tokens tokens(field.data, field.data + field.size);
+    IParser::Pos token_iterator(tokens, context->getSettingsRef().max_parser_depth);
+    ASTPtr ast;
+
+    /// FIXME: Our parser cannot parse maps in the form of '{key : value}' that is used in text formats.
+    bool parsed = parser.parse(token_iterator, ast, expected);
+    if (!parsed)
+        return false;
+
+    try
+    {
+        std::pair<Field, DataTypePtr> result = evaluateConstantExpression(ast, context);
+        type = generalizeDataType(result.second);
+        return true;
+    }
+    catch (...)
+    {
+        return false;
+    }
+}
+
+DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, ContextPtr context)
+{
+    switch (escaping_rule)
+    {
+        case FormatSettings::EscapingRule::Quoted:
+        {
+            DataTypePtr type;
+            bool parsed = evaluateConstantExpressionFromString(field, type, context);
+            return parsed ? type : nullptr;
+        }
+        case FormatSettings::EscapingRule::JSON:
+            return getDataTypeFromJSONField(field);
+        case FormatSettings::EscapingRule::CSV:
+        {
+            if (field.empty() || field == format_settings.csv.null_representation)
+                return nullptr;
+
+            if (field == format_settings.bool_false_representation || field == format_settings.bool_true_representation)
+                return std::make_shared<DataTypeUInt8>();
+
+            DataTypePtr type;
+            bool parsed;
+            if (field[0] == '\'' || field[0] == '"')
+            {
+                /// Try to evaluate expression inside quotes.
+                parsed = evaluateConstantExpressionFromString(StringRef(field.data() + 1, field.size() - 2), type, context);
+                /// If it's a number in quotes we determine it as a string.
+                if (parsed && type && isNumber(removeNullable(type)))
+                    return makeNullable(std::make_shared<DataTypeString>());
+            }
+            else
+                parsed = evaluateConstantExpressionFromString(field, type, context);
+
+            /// If we couldn't parse an expression, determine it as a string.
+            return parsed ? type : makeNullable(std::make_shared<DataTypeString>());
+        }
+        case FormatSettings::EscapingRule::Raw: [[fallthrough]];
+        case FormatSettings::EscapingRule::Escaped:
+            /// TODO: Try to use some heuristics here to determine the type of data.
+            return field.empty() ? nullptr : makeNullable(std::make_shared<DataTypeString>());
+        default:
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot determine the type for value with {} escaping rule", escapingRuleToString(escaping_rule));
+    }
+}
+
+DataTypes determineDataTypesByEscapingRule(const std::vector<String> & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, ContextPtr context)
+{
+    DataTypes data_types;
+    data_types.reserve(fields.size());
+    for (const auto & field : fields)
+        data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, escaping_rule, context));
+    return data_types;
+}
+
+DataTypePtr getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule escaping_rule)
+{
+    switch (escaping_rule)
+    {
+        case FormatSettings::EscapingRule::CSV: [[fallthrough]];
+        case FormatSettings::EscapingRule::Escaped: [[fallthrough]];
+        case FormatSettings::EscapingRule::Raw:
+            return makeNullable(std::make_shared<DataTypeString>());
+        default:
+            return nullptr;
+    }
+}
+
 }
diff --git a/src/Formats/EscapingRuleUtils.h b/src/Formats/EscapingRuleUtils.h
index 02f027db74d..10147b29ad6 100644
--- a/src/Formats/EscapingRuleUtils.h
+++ b/src/Formats/EscapingRuleUtils.h
@@ -4,6 +4,7 @@
 #include <DataTypes/IDataType.h>
 #include <DataTypes/Serializations/ISerialization.h>
 #include <IO/ReadBuffer.h>
+#include <Interpreters/Context.h>
 
 namespace DB
 {
@@ -33,5 +34,24 @@ void serializeFieldByEscapingRule(
 void writeStringByEscapingRule(const String & value, WriteBuffer & out, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings);
 
 String readStringByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings);
+String readFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings);
+
+/// Try to determine the type of the field written by a specific escaping rule.
+/// If cannot, return nullptr.
+/// - For Quoted escaping rule we can interpret a single field as a constant
+///   expression and get it's type by evaluation this expression.
+/// - For JSON escaping rule we can use JSON parser to parse a single field
+///   and then convert JSON type of this field to ClickHouse type.
+/// - For CSV escaping rule we can do the next:
+///    - If the field is an unquoted string, then we could try to evaluate it
+///      as a constant expression, and if it fails, treat it as a String.
+///    - If the field is a string in quotes, then we can try to evaluate
+///      expression inside quotes as a constant expression, and if it fails or
+///      the result is a number (we don't parse numbers in quotes) we treat it as a String.
+/// - For TSV and TSVRaw we treat each field as a String (TODO: try to use some tweaks and heuristics here)
+DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, ContextPtr context = nullptr);
+DataTypes determineDataTypesByEscapingRule(const std::vector<String> & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, ContextPtr context = nullptr);
+
+DataTypePtr getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule escaping_rule);
 
 }
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 467fdfddafe..2068de0d01c 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -14,9 +14,6 @@
 #include <Poco/URI.h>
 #include <Common/Exception.h>
 
-#include <IO/BufferWithOwnMemory.h>
-#include <IO/ReadHelpers.h>
-
 namespace DB
 {
 
@@ -120,6 +117,8 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields;
     format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
     format_settings.seekable_read = settings.input_format_allow_seeks;
+    format_settings.msgpack.number_of_columns = settings.input_format_msgpack_number_of_columns;
+    format_settings.max_rows_to_read_for_schema_inference = settings.input_format_max_rows_to_read_for_schema_inference;
 
     /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context
     if (format_settings.schema.is_server)
@@ -201,7 +200,6 @@ InputFormatPtr FormatFactory::getInput(
     return format;
 }
 
-
 InputFormatPtr FormatFactory::getInputFormat(
     const String & name,
     ReadBuffer & buf,
@@ -342,6 +340,32 @@ String FormatFactory::getContentType(
     return format->getContentType();
 }
 
+SchemaReaderPtr FormatFactory::getSchemaReader(
+    const String & name,
+    ReadBuffer & buf,
+    ContextPtr context,
+    const std::optional<FormatSettings> & _format_settings) const
+{
+    const auto & schema_reader_creator = dict.at(name).schema_reader_creator;
+    if (!schema_reader_creator)
+        throw Exception("FormatFactory: Format " + name + " doesn't support schema inference.", ErrorCodes::LOGICAL_ERROR);
+
+    auto format_settings = _format_settings ? *_format_settings : getFormatSettings(context);
+    return schema_reader_creator(buf, format_settings, context);
+}
+
+ExternalSchemaReaderPtr FormatFactory::getExternalSchemaReader(
+    const String & name,
+    ContextPtr context,
+    const std::optional<FormatSettings> & _format_settings) const
+{
+    const auto & external_schema_reader_creator = dict.at(name).external_schema_reader_creator;
+    if (!external_schema_reader_creator)
+        throw Exception("FormatFactory: Format " + name + " doesn't support schema inference.", ErrorCodes::LOGICAL_ERROR);
+
+    auto format_settings = _format_settings ? *_format_settings : getFormatSettings(context);
+    return external_schema_reader_creator(format_settings);
+}
 
 void FormatFactory::registerInputFormat(const String & name, InputCreator input_creator)
 {
@@ -375,6 +399,21 @@ void FormatFactory::registerFileSegmentationEngine(const String & name, FileSegm
     target = std::move(file_segmentation_engine);
 }
 
+void FormatFactory::registerSchemaReader(const String & name, SchemaReaderCreator schema_reader_creator)
+{
+    auto & target = dict[name].schema_reader_creator;
+    if (target)
+        throw Exception("FormatFactory: Schema reader " + name + " is already registered", ErrorCodes::LOGICAL_ERROR);
+    target = std::move(schema_reader_creator);
+}
+
+void FormatFactory::registerExternalSchemaReader(const String & name, ExternalSchemaReaderCreator external_schema_reader_creator)
+{
+    auto & target = dict[name].external_schema_reader_creator;
+    if (target)
+        throw Exception("FormatFactory: Schema reader " + name + " is already registered", ErrorCodes::LOGICAL_ERROR);
+    target = std::move(external_schema_reader_creator);
+}
 
 void FormatFactory::markOutputFormatSupportsParallelFormatting(const String & name)
 {
@@ -412,6 +451,23 @@ bool FormatFactory::isOutputFormat(const String & name) const
     return it != dict.end() && it->second.output_creator;
 }
 
+bool FormatFactory::checkIfFormatHasSchemaReader(const String & name)
+{
+    const auto & target = getCreators(name);
+    return bool(target.schema_reader_creator);
+}
+
+bool FormatFactory::checkIfFormatHasExternalSchemaReader(const String & name)
+{
+    const auto & target = getCreators(name);
+    return bool(target.external_schema_reader_creator);
+}
+
+bool FormatFactory::checkIfFormatHasAnySchemaReader(const String & name)
+{
+    return checkIfFormatHasSchemaReader(name) || checkIfFormatHasExternalSchemaReader(name);
+}
+
 FormatFactory & FormatFactory::instance()
 {
     static FormatFactory ret;
diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h
index ea285c47996..a62b32da0cc 100644
--- a/src/Formats/FormatFactory.h
+++ b/src/Formats/FormatFactory.h
@@ -4,7 +4,9 @@
 #include <Columns/IColumn.h>
 #include <Formats/FormatSettings.h>
 #include <Interpreters/Context_fwd.h>
+#include <IO/BufferWithOwnMemory.h>
 #include <base/types.h>
+#include <Core/NamesAndTypes.h>
 
 #include <boost/noncopyable.hpp>
 
@@ -31,6 +33,11 @@ class IOutputFormat;
 struct RowInputFormatParams;
 struct RowOutputFormatParams;
 
+class ISchemaReader;
+class IExternalSchemaReader;
+using SchemaReaderPtr = std::shared_ptr<ISchemaReader>;
+using ExternalSchemaReaderPtr = std::shared_ptr<IExternalSchemaReader>;
+
 using InputFormatPtr = std::shared_ptr<IInputFormat>;
 using OutputFormatPtr = std::shared_ptr<IOutputFormat>;
 
@@ -85,11 +92,16 @@ private:
     /// The checker should return true if parallel parsing should be disabled.
     using NonTrivialPrefixAndSuffixChecker = std::function<bool(ReadBuffer & buf)>;
 
+    using SchemaReaderCreator = std::function<SchemaReaderPtr(ReadBuffer & in, const FormatSettings & settings, ContextPtr context)>;
+    using ExternalSchemaReaderCreator = std::function<ExternalSchemaReaderPtr(const FormatSettings & settings)>;
+
     struct Creators
     {
         InputCreator input_creator;
         OutputCreator output_creator;
         FileSegmentationEngine file_segmentation_engine;
+        SchemaReaderCreator schema_reader_creator;
+        ExternalSchemaReaderCreator external_schema_reader_creator;
         bool supports_parallel_formatting{false};
         bool is_column_oriented{false};
         NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker;
@@ -138,6 +150,17 @@ public:
         ContextPtr context,
         const std::optional<FormatSettings> & format_settings = std::nullopt) const;
 
+    SchemaReaderPtr getSchemaReader(
+        const String & name,
+        ReadBuffer & buf,
+        ContextPtr context,
+        const std::optional<FormatSettings> & format_settings = std::nullopt) const;
+
+    ExternalSchemaReaderPtr getExternalSchemaReader(
+        const String & name,
+        ContextPtr context,
+        const std::optional<FormatSettings> & format_settings = std::nullopt) const;
+
     void registerFileSegmentationEngine(const String & name, FileSegmentationEngine file_segmentation_engine);
 
     void registerNonTrivialPrefixAndSuffixChecker(const String & name, NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker);
@@ -146,11 +169,19 @@ public:
     void registerInputFormat(const String & name, InputCreator input_creator);
     void registerOutputFormat(const String & name, OutputCreator output_creator);
 
+    /// Register schema readers for format its name.
+    void registerSchemaReader(const String & name, SchemaReaderCreator schema_reader_creator);
+    void registerExternalSchemaReader(const String & name, ExternalSchemaReaderCreator external_schema_reader_creator);
+
     void markOutputFormatSupportsParallelFormatting(const String & name);
     void markFormatAsColumnOriented(const String & name);
 
     bool checkIfFormatIsColumnOriented(const String & name);
 
+    bool checkIfFormatHasSchemaReader(const String & name);
+    bool checkIfFormatHasExternalSchemaReader(const String & name);
+    bool checkIfFormatHasAnySchemaReader(const String & name);
+
     const FormatsDictionary & getAllFormats() const
     {
         return dict;
@@ -163,6 +194,7 @@ private:
     FormatsDictionary dict;
 
     const Creators & getCreators(const String & name) const;
+
 };
 
 }
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index d9af07fdc9c..6298e959c3e 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -33,6 +33,7 @@ struct FormatSettings
     bool defaults_for_omitted_fields = true;
 
     bool seekable_read = true;
+    UInt64 max_rows_to_read_for_schema_inference = 100;
 
     enum class DateTimeInputFormat
     {
@@ -217,6 +218,11 @@ struct FormatSettings
     {
         EnumComparingMode enum_comparing_mode = EnumComparingMode::BY_VALUES;
     } capn_proto;
+
+    struct
+    {
+        UInt64 number_of_columns = 0;
+    } msgpack;
 };
 
 }
diff --git a/src/Formats/JSONEachRowUtils.cpp b/src/Formats/JSONEachRowUtils.cpp
index b55e9f59cc7..c63b8453634 100644
--- a/src/Formats/JSONEachRowUtils.cpp
+++ b/src/Formats/JSONEachRowUtils.cpp
@@ -1,7 +1,17 @@
 #include <IO/ReadHelpers.h>
 #include <Formats/JSONEachRowUtils.h>
+#include <Formats/ReadSchemaUtils.h>
 #include <IO/ReadBufferFromString.h>
 #include <DataTypes/Serializations/SerializationNullable.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeMap.h>
+#include <Functions/SimdJSONParser.h>
+#include <Functions/RapidJSONParser.h>
+#include <Functions/DummyJSONParser.h>
 
 #include <base/find_symbols.h>
 
@@ -26,7 +36,7 @@ static std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer
     while (loadAtPosition(in, memory, pos) && (balance || memory.size() + static_cast<size_t>(pos - in.position()) < min_chunk_size || number_of_rows < min_rows))
     {
         const auto current_object_size = memory.size() + static_cast<size_t>(pos - in.position());
-        if (current_object_size > 10 * min_chunk_size)
+        if (min_chunk_size != 0 && current_object_size > 10 * min_chunk_size)
             throw ParsingException("Size of JSON object is extremely large. Expected not greater than " +
             std::to_string(min_chunk_size) + " bytes, but current is " + std::to_string(current_object_size) +
             " bytes per row. Increase the value setting 'min_chunk_bytes_for_parallel_parsing' or check your data manually, most likely JSON is malformed", ErrorCodes::INCORRECT_DATA);
@@ -92,6 +102,122 @@ static std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer
     return {loadAtPosition(in, memory, pos), number_of_rows};
 }
 
+template <const char opening_bracket, const char closing_bracket>
+static String readJSONEachRowLineIntoStringImpl(ReadBuffer & in)
+{
+    Memory memory;
+    fileSegmentationEngineJSONEachRowImpl<opening_bracket, closing_bracket>(in, memory, 0, 1);
+    return String(memory.data(), memory.size());
+}
+
+template <class Element>
+DataTypePtr getDataTypeFromJSONFieldImpl(const Element & field)
+{
+    if (field.isNull())
+        return nullptr;
+
+    if (field.isBool())
+        return makeNullable(std::make_shared<DataTypeUInt8>());
+
+    if (field.isInt64() || field.isUInt64() || field.isDouble())
+        return makeNullable(std::make_shared<DataTypeFloat64>());
+
+    if (field.isString())
+        return makeNullable(std::make_shared<DataTypeString>());
+
+    if (field.isArray())
+    {
+        auto array = field.getArray();
+
+        /// Return nullptr in case of empty array because we cannot determine nested type.
+        if (array.size() == 0)
+            return nullptr;
+
+        DataTypes nested_data_types;
+        /// If this array contains fields with different types we will treat it as Tuple.
+        bool is_tuple = false;
+        for (const auto element : array)
+        {
+            auto type = getDataTypeFromJSONFieldImpl(element);
+            if (!type)
+                return nullptr;
+
+            if (!nested_data_types.empty() && type->getName() != nested_data_types.back()->getName())
+                is_tuple = true;
+
+            nested_data_types.push_back(std::move(type));
+        }
+
+        if (is_tuple)
+            return std::make_shared<DataTypeTuple>(nested_data_types);
+
+        return std::make_shared<DataTypeArray>(nested_data_types.back());
+    }
+
+    if (field.isObject())
+    {
+        auto object = field.getObject();
+        DataTypePtr value_type;
+        for (const auto key_value_pair : object)
+        {
+            auto type = getDataTypeFromJSONFieldImpl(key_value_pair.second);
+            if (!type)
+                return nullptr;
+
+            if (value_type && value_type->getName() != type->getName())
+                return nullptr;
+
+            value_type = type;
+        }
+        return std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), value_type);
+    }
+
+    throw Exception{ErrorCodes::INCORRECT_DATA, "Unexpected JSON type"};
+}
+
+auto getJSONParserAndElement()
+{
+#if USE_SIMDJSON
+    return std::pair<SimdJSONParser, SimdJSONParser::Element>();
+#elif USE_RAPIDJSON
+    return std::pair<RapidJSONParser, RapidJSONParser::Element>();
+#else
+    return std::pair<DummyJSONParser, DummyJSONParser::Element>();
+#endif
+}
+
+DataTypePtr getDataTypeFromJSONField(const String & field)
+{
+    auto [parser, element] = getJSONParserAndElement();
+    bool parsed = parser.parse(field, element);
+    if (!parsed)
+        throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object");
+
+    return getDataTypeFromJSONFieldImpl(element);
+}
+
+template <class Extractor, const char opening_bracket, const char closing_bracket>
+static DataTypes determineColumnDataTypesFromJSONEachRowDataImpl(ReadBuffer & in, bool /*json_strings*/, Extractor & extractor)
+{
+    String line = readJSONEachRowLineIntoStringImpl<opening_bracket, closing_bracket>(in);
+    auto [parser, element] = getJSONParserAndElement();
+    bool parsed = parser.parse(line, element);
+    if (!parsed)
+        throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object");
+
+    auto fields = extractor.extract(element);
+
+    DataTypes data_types;
+    data_types.reserve(fields.size());
+    for (const auto & field : fields)
+        data_types.push_back(getDataTypeFromJSONFieldImpl(field));
+
+    /// TODO: For JSONStringsEachRow/JSONCompactStringsEach all types will be strings.
+    ///       Should we try to parse data inside strings somehow in this case?
+
+    return data_types;
+}
+
 std::pair<bool, size_t> fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size)
 {
     return fileSegmentationEngineJSONEachRowImpl<'{', '}'>(in, memory, min_chunk_size, 1);
@@ -102,6 +228,60 @@ std::pair<bool, size_t> fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in
     return fileSegmentationEngineJSONEachRowImpl<'[', ']'>(in, memory, min_chunk_size, min_rows);
 }
 
+struct JSONEachRowFieldsExtractor
+{
+    template <class Element>
+    std::vector<Element> extract(const Element & element)
+    {
+        /// {..., "<column_name>" : <value>, ...}
+        auto object = element.getObject();
+        std::vector<Element> fields;
+        fields.reserve(object.size());
+        column_names.reserve(object.size());
+        for (const auto & key_value_pair : object)
+        {
+            column_names.emplace_back(key_value_pair.first);
+            fields.push_back(key_value_pair.second);
+        }
+
+        return fields;
+    }
+
+    std::vector<String> column_names;
+};
+
+std::unordered_map<String, DataTypePtr> readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, bool json_strings)
+{
+    JSONEachRowFieldsExtractor extractor;
+    auto data_types = determineColumnDataTypesFromJSONEachRowDataImpl<JSONEachRowFieldsExtractor, '{', '}'>(in, json_strings, extractor);
+    std::unordered_map<String, DataTypePtr> result;
+    for (size_t i = 0; i != extractor.column_names.size(); ++i)
+        result[extractor.column_names[i]] = data_types[i];
+    return result;
+}
+
+struct JSONCompactEachRowFieldsExtractor
+{
+    template <class Element>
+    std::vector<Element> extract(const Element & element)
+    {
+        /// [..., <value>, ...]
+        auto array = element.getArray();
+        std::vector<Element> fields;
+        fields.reserve(array.size());
+        for (size_t i = 0; i != array.size(); ++i)
+            fields.push_back(array[i]);
+        return fields;
+    }
+};
+
+DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, bool json_strings)
+{
+    JSONCompactEachRowFieldsExtractor extractor;
+    return determineColumnDataTypesFromJSONEachRowDataImpl<JSONCompactEachRowFieldsExtractor, '[', ']'>(in, json_strings, extractor);
+}
+
+
 bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf)
 {
     /// For JSONEachRow we can safely skip whitespace characters
diff --git a/src/Formats/JSONEachRowUtils.h b/src/Formats/JSONEachRowUtils.h
index 4a049aa1abd..6f71baa8b40 100644
--- a/src/Formats/JSONEachRowUtils.h
+++ b/src/Formats/JSONEachRowUtils.h
@@ -11,6 +11,21 @@ namespace DB
 std::pair<bool, size_t> fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size);
 std::pair<bool, size_t> fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows);
 
+
+/// Parse JSON from string and convert it's type to ClickHouse type. Make the result type always Nullable.
+/// JSON array with different nested types is treated as Tuple.
+/// If cannot convert (for example when field contains null), return nullptr.
+DataTypePtr getDataTypeFromJSONField(const String & field);
+
+/// Read row in JSONEachRow format and try to determine type for each field.
+/// Return map {column_name : type}.
+/// If cannot determine the type of some field, return nullptr for it.
+std::unordered_map<String, DataTypePtr> readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, bool json_strings);
+
+/// Read row in JSONCompactEachRow format and try to determine type for each field.
+/// If cannot determine the type of some field, return nullptr for it.
+DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, bool json_strings);
+
 bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf);
 
 bool readFieldImpl(ReadBuffer & in, IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, const String & column_name, const FormatSettings & format_settings, bool yield_strings);
diff --git a/src/Formats/ParsedTemplateFormatString.cpp b/src/Formats/ParsedTemplateFormatString.cpp
index 4966420f05b..8d1b987d01a 100644
--- a/src/Formats/ParsedTemplateFormatString.cpp
+++ b/src/Formats/ParsedTemplateFormatString.cpp
@@ -14,14 +14,14 @@ namespace ErrorCodes
     extern const int INVALID_TEMPLATE_FORMAT;
 }
 
-ParsedTemplateFormatString::ParsedTemplateFormatString(const FormatSchemaInfo & schema, const ColumnIdxGetter & idx_by_name)
+ParsedTemplateFormatString::ParsedTemplateFormatString(const FormatSchemaInfo & schema, const ColumnIdxGetter & idx_by_name, bool allow_indexes)
 {
     ReadBufferFromFile schema_file(schema.absoluteSchemaPath(), 4096);
     String format_string;
     readStringUntilEOF(format_string, schema_file);
     try
     {
-        parse(format_string, idx_by_name);
+        parse(format_string, idx_by_name, allow_indexes);
     }
     catch (DB::Exception & e)
     {
@@ -33,7 +33,7 @@ ParsedTemplateFormatString::ParsedTemplateFormatString(const FormatSchemaInfo &
 }
 
 
-void ParsedTemplateFormatString::parse(const String & format_string, const ColumnIdxGetter & idx_by_name)
+void ParsedTemplateFormatString::parse(const String & format_string, const ColumnIdxGetter & idx_by_name, bool allow_indexes)
 {
     enum ParserState
     {
@@ -100,6 +100,8 @@ void ParsedTemplateFormatString::parse(const String & format_string, const Colum
                     column_idx = strtoull(column_names.back().c_str(), &col_idx_end, 10);
                     if (col_idx_end != column_names.back().c_str() + column_names.back().size() || errno)
                         column_idx = idx_by_name(column_names.back());
+                    else if (!allow_indexes)
+                        throw Exception(ErrorCodes::INVALID_TEMPLATE_FORMAT, "Indexes instead of names are not allowed");
                 }
                 format_idx_to_column_idx.emplace_back(column_idx);
                 break;
diff --git a/src/Formats/ParsedTemplateFormatString.h b/src/Formats/ParsedTemplateFormatString.h
index ba0ebdf5aa8..c5617d0f0ef 100644
--- a/src/Formats/ParsedTemplateFormatString.h
+++ b/src/Formats/ParsedTemplateFormatString.h
@@ -31,9 +31,9 @@ struct ParsedTemplateFormatString
     typedef std::function<std::optional<size_t>(const String &)> ColumnIdxGetter;
 
     ParsedTemplateFormatString() = default;
-    ParsedTemplateFormatString(const FormatSchemaInfo & schema, const ColumnIdxGetter & idx_by_name);
+    ParsedTemplateFormatString(const FormatSchemaInfo & schema, const ColumnIdxGetter & idx_by_name, bool allow_indexes = true);
 
-    void parse(const String & format_string, const ColumnIdxGetter & idx_by_name);
+    void parse(const String & format_string, const ColumnIdxGetter & idx_by_name, bool allow_indexes = true);
 
     static const char * readMayBeQuotedColumnNameInto(const char * pos, size_t size, String & s);
     size_t columnsCount() const;
diff --git a/src/Formats/ProtobufSerializer.cpp b/src/Formats/ProtobufSerializer.cpp
index 5232b76b7fe..b59db12a16c 100644
--- a/src/Formats/ProtobufSerializer.cpp
+++ b/src/Formats/ProtobufSerializer.cpp
@@ -24,6 +24,7 @@
 #   include <DataTypes/DataTypeMap.h>
 #   include <DataTypes/DataTypeNullable.h>
 #   include <DataTypes/DataTypeTuple.h>
+#   include <DataTypes/DataTypeString.h>
 #   include <DataTypes/Serializations/SerializationDecimal.h>
 #   include <DataTypes/Serializations/SerializationFixedString.h>
 #   include <Formats/ProtobufReader.h>
@@ -56,6 +57,7 @@ namespace ErrorCodes
     extern const int PROTOBUF_FIELD_NOT_REPEATED;
     extern const int PROTOBUF_BAD_CAST;
     extern const int LOGICAL_ERROR;
+    extern const int BAD_ARGUMENTS;
 }
 
 namespace
@@ -3017,10 +3019,8 @@ namespace
                             {
                                 std::vector<std::string_view> column_names_used;
                                 column_names_used.reserve(used_column_indices_in_nested.size());
-
                                 for (size_t i : used_column_indices_in_nested)
                                     column_names_used.emplace_back(nested_column_names[i]);
-
                                 auto field_serializer = std::make_unique<ProtobufSerializerFlattenedNestedAsArrayOfNestedMessages>(
                                     std::move(column_names_used), field_descriptor, std::move(nested_message_serializer), get_root_desc_function);
                                 transformColumnIndices(used_column_indices_in_nested, nested_column_indices);
@@ -3230,8 +3230,105 @@ namespace
         std::function<String(size_t)> get_root_desc_function;
         std::shared_ptr<ProtobufSerializer *> root_serializer_ptr;
     };
-}
 
+    template <typename Type>
+    DataTypePtr getEnumDataType(const google::protobuf::EnumDescriptor * enum_descriptor)
+    {
+        std::vector<std::pair<String, Type>> values;
+        for (int i = 0; i != enum_descriptor->value_count(); ++i)
+        {
+            const auto * enum_value_descriptor = enum_descriptor->value(i);
+            values.emplace_back(enum_value_descriptor->name(), enum_value_descriptor->number());
+        }
+        return std::make_shared<DataTypeEnum<Type>>(std::move(values));
+    }
+
+    NameAndTypePair getNameAndDataTypeFromField(const google::protobuf::FieldDescriptor * field_descriptor, bool allow_repeat = true)
+    {
+        if (allow_repeat && field_descriptor->is_map())
+        {
+            auto name_and_type = getNameAndDataTypeFromField(field_descriptor, false);
+            const auto * tuple_type = assert_cast<const DataTypeTuple *>(name_and_type.type.get());
+            return {name_and_type.name, std::make_shared<DataTypeMap>(tuple_type->getElements())};
+        }
+
+        if (allow_repeat && field_descriptor->is_repeated())
+        {
+            auto name_and_type = getNameAndDataTypeFromField(field_descriptor, false);
+            return {name_and_type.name, std::make_shared<DataTypeArray>(name_and_type.type)};
+        }
+
+        switch (field_descriptor->type())
+        {
+            case FieldTypeId::TYPE_SFIXED32: [[fallthrough]];
+            case FieldTypeId::TYPE_SINT32: [[fallthrough]];
+            case FieldTypeId::TYPE_INT32:
+                return {field_descriptor->name(), std::make_shared<DataTypeInt32>()};
+            case FieldTypeId::TYPE_SFIXED64: [[fallthrough]];
+            case FieldTypeId::TYPE_SINT64: [[fallthrough]];
+            case FieldTypeId::TYPE_INT64:
+                return {field_descriptor->name(), std::make_shared<DataTypeInt64>()};
+            case FieldTypeId::TYPE_BOOL:
+                return {field_descriptor->name(), std::make_shared<DataTypeUInt8>()};
+            case FieldTypeId::TYPE_FLOAT:
+                return {field_descriptor->name(), std::make_shared<DataTypeFloat32>()};
+            case FieldTypeId::TYPE_DOUBLE:
+                return {field_descriptor->name(), std::make_shared<DataTypeFloat64>()};
+            case FieldTypeId::TYPE_UINT32: [[fallthrough]];
+            case FieldTypeId::TYPE_FIXED32:
+                return {field_descriptor->name(), std::make_shared<DataTypeUInt32>()};
+            case FieldTypeId::TYPE_UINT64: [[fallthrough]];
+            case FieldTypeId::TYPE_FIXED64:
+                return {field_descriptor->name(), std::make_shared<DataTypeUInt64>()};
+            case FieldTypeId::TYPE_BYTES: [[fallthrough]];
+            case FieldTypeId::TYPE_STRING:
+                return {field_descriptor->name(), std::make_shared<DataTypeString>()};
+            case FieldTypeId::TYPE_ENUM:
+            {
+                const auto * enum_descriptor = field_descriptor->enum_type();
+                if (enum_descriptor->value_count() == 0)
+                    throw Exception("Empty enum field", ErrorCodes::BAD_ARGUMENTS);
+                int max_abs = std::abs(enum_descriptor->value(0)->number());
+                for (int i = 1; i != enum_descriptor->value_count(); ++i)
+                {
+                    if (std::abs(enum_descriptor->value(i)->number()) > max_abs)
+                        max_abs = std::abs(enum_descriptor->value(i)->number());
+                }
+                if (max_abs < 128)
+                    return {field_descriptor->name(), getEnumDataType<Int8>(enum_descriptor)};
+                else if (max_abs < 32768)
+                    return {field_descriptor->name(), getEnumDataType<Int16>(enum_descriptor)};
+                else
+                    throw Exception("ClickHouse supports only 8-bit and 16-bit enums", ErrorCodes::BAD_ARGUMENTS);
+            }
+            case FieldTypeId::TYPE_GROUP: [[fallthrough]];
+            case FieldTypeId::TYPE_MESSAGE:
+            {
+                const auto * message_descriptor = field_descriptor->message_type();
+                if (message_descriptor->field_count() == 1)
+                {
+                    const auto * nested_field_descriptor = message_descriptor->field(0);
+                    auto nested_name_and_type = getNameAndDataTypeFromField(nested_field_descriptor);
+                    return {field_descriptor->name() + "_" + nested_name_and_type.name, nested_name_and_type.type};
+                }
+                else
+                {
+                    DataTypes nested_types;
+                    Strings nested_names;
+                    for (int i = 0; i != message_descriptor->field_count(); ++i)
+                    {
+                        auto nested_name_and_type = getNameAndDataTypeFromField(message_descriptor->field(i));
+                        nested_types.push_back(nested_name_and_type.type);
+                        nested_names.push_back(nested_name_and_type.name);
+                    }
+                    return {field_descriptor->name(), std::make_shared<DataTypeTuple>(std::move(nested_types), std::move(nested_names))};
+                }
+            }
+        }
+
+        __builtin_unreachable();
+    }
+}
 
 std::unique_ptr<ProtobufSerializer> ProtobufSerializer::create(
     const Strings & column_names,
@@ -3254,5 +3351,14 @@ std::unique_ptr<ProtobufSerializer> ProtobufSerializer::create(
     std::vector<size_t> missing_column_indices;
     return ProtobufSerializerBuilder(writer).buildMessageSerializer(column_names, data_types, missing_column_indices, message_descriptor, with_length_delimiter);
 }
+
+NamesAndTypesList protobufSchemaToCHSchema(const google::protobuf::Descriptor * message_descriptor)
+{
+    NamesAndTypesList schema;
+    for (int i = 0; i != message_descriptor->field_count(); ++i)
+        schema.push_back(getNameAndDataTypeFromField(message_descriptor->field(i)));
+    return schema;
+}
+
 }
 #endif
diff --git a/src/Formats/ProtobufSerializer.h b/src/Formats/ProtobufSerializer.h
index 3eaca6a18d6..d9bed913517 100644
--- a/src/Formats/ProtobufSerializer.h
+++ b/src/Formats/ProtobufSerializer.h
@@ -4,6 +4,7 @@
 
 #if USE_PROTOBUF
 #   include <Columns/IColumn.h>
+#include <Core/NamesAndTypes.h>
 
 
 namespace google::protobuf { class Descriptor; }
@@ -48,5 +49,7 @@ public:
         ProtobufWriter & writer);
 };
 
+NamesAndTypesList protobufSchemaToCHSchema(const google::protobuf::Descriptor * message_descriptor);
+
 }
 #endif
diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp
new file mode 100644
index 00000000000..37067eae64f
--- /dev/null
+++ b/src/Formats/ReadSchemaUtils.cpp
@@ -0,0 +1,112 @@
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeMap.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+#include <Formats/ReadSchemaUtils.h>
+#include <Processors/Formats/ISchemaReader.h>
+#include <Common/assert_cast.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
+    extern const int BAD_ARGUMENTS;
+}
+
+ColumnsDescription readSchemaFromFormat(const String & format_name, const std::optional<FormatSettings> & format_settings, ReadBufferCreator read_buffer_creator, ContextPtr context)
+{
+    NamesAndTypesList names_and_types;
+    if (FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format_name))
+    {
+        auto external_schema_reader = FormatFactory::instance().getExternalSchemaReader(format_name, context, format_settings);
+        try
+        {
+            names_and_types = external_schema_reader->readSchema();
+        }
+        catch (const DB::Exception & e)
+        {
+            throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file. Error: {}", format_name, e.message());
+        }
+    }
+    else if (FormatFactory::instance().checkIfFormatHasSchemaReader(format_name))
+    {
+        auto read_buf = read_buffer_creator();
+        if (read_buf->eof())
+            throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file, file is empty", format_name);
+
+        auto schema_reader = FormatFactory::instance().getSchemaReader(format_name, *read_buf, context, format_settings);
+        try
+        {
+            names_and_types = schema_reader->readSchema();
+        }
+        catch (const DB::Exception & e)
+        {
+            throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file. Error: {}", format_name, e.message());
+        }
+    }
+    else
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} file format doesn't support schema inference", format_name);
+
+    return ColumnsDescription(names_and_types);
+}
+
+DataTypePtr generalizeDataType(DataTypePtr type)
+{
+    WhichDataType which(type);
+
+    if (which.isNothing())
+        return nullptr;
+
+    if (which.isNullable())
+    {
+        const auto * nullable_type = assert_cast<const DataTypeNullable *>(type.get());
+        return generalizeDataType(nullable_type->getNestedType());
+    }
+
+    if (isNumber(type))
+        return makeNullable(std::make_shared<DataTypeFloat64>());
+
+    if (which.isArray())
+    {
+        const auto * array_type = assert_cast<const DataTypeArray *>(type.get());
+        auto nested_type = generalizeDataType(array_type->getNestedType());
+        return nested_type ? std::make_shared<DataTypeArray>(nested_type) : nullptr;
+    }
+
+    if (which.isTuple())
+    {
+        const auto * tuple_type = assert_cast<const DataTypeTuple *>(type.get());
+        DataTypes nested_types;
+        for (const auto & element : tuple_type->getElements())
+        {
+            auto nested_type = generalizeDataType(element);
+            if (!nested_type)
+                return nullptr;
+            nested_types.push_back(nested_type);
+        }
+        return std::make_shared<DataTypeTuple>(std::move(nested_types));
+    }
+
+    if (which.isMap())
+    {
+        const auto * map_type = assert_cast<const DataTypeMap *>(type.get());
+        auto key_type = removeNullable(generalizeDataType(map_type->getKeyType()));
+        auto value_type = generalizeDataType(map_type->getValueType());
+        return key_type && value_type ? std::make_shared<DataTypeMap>(key_type, value_type) : nullptr;
+    }
+
+    if (which.isLowCarnality())
+    {
+        const auto * lc_type = assert_cast<const DataTypeLowCardinality *>(type.get());
+        auto nested_type = generalizeDataType(lc_type->getDictionaryType());
+        return nested_type ? std::make_shared<DataTypeLowCardinality>(nested_type) : nullptr;
+    }
+
+    return makeNullable(type);
+}
+
+}
diff --git a/src/Formats/ReadSchemaUtils.h b/src/Formats/ReadSchemaUtils.h
new file mode 100644
index 00000000000..fb43acc3cd6
--- /dev/null
+++ b/src/Formats/ReadSchemaUtils.h
@@ -0,0 +1,30 @@
+#pragma once
+
+#include <Storages/ColumnsDescription.h>
+#include <Formats/FormatFactory.h>
+
+namespace DB
+{
+
+/// Try to determine the schema of the data in specifying format.
+/// For formats that have an external schema reader, it will
+/// use it and won't create a read buffer.
+/// For formats that have a schema reader from the data,
+/// read buffer will be created by the provided creator and
+/// the schema will be extracted from the data.
+/// If format doesn't have any schema reader or a schema reader
+/// couldn't determine the schema, an exception will be thrown.
+using ReadBufferCreator = std::function<std::unique_ptr<ReadBuffer>()>;
+ColumnsDescription readSchemaFromFormat(const String & format_name, const std::optional<FormatSettings> & format_settings, ReadBufferCreator read_buffer_creator, ContextPtr context);
+
+/// Convert type to the most general type:
+/// - IntN, UIntN, FloatN, Decimal -> Float64
+/// - Type -> Nullable(type)
+/// - Array(Type) -> Array(Nullable(Type))
+/// - Tuple(Type1, ..., TypeN) -> Tuple(Nullable(Type1), ..., Nullable(TypeN))
+/// - Map(KeyType, ValueType) -> Map(KeyType, Nullable(ValueType))
+/// - LowCardinality(Type) -> LowCardinality(Nullable(Type))
+/// If type is Nothing or one of the nested types is Nothing, return nullptr.
+DataTypePtr generalizeDataType(DataTypePtr type);
+
+}
diff --git a/src/Formats/config_formats.h.in b/src/Formats/config_formats.h.in
index f6497b4830b..427abc7d1ce 100644
--- a/src/Formats/config_formats.h.in
+++ b/src/Formats/config_formats.h.in
@@ -10,4 +10,3 @@
 #cmakedefine01 USE_ARROW
 #cmakedefine01 USE_PROTOBUF
 #cmakedefine01 USE_MSGPACK
-
diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp
index 7425c6898de..1349c9e3323 100644
--- a/src/Formats/registerFormats.cpp
+++ b/src/Formats/registerFormats.cpp
@@ -81,6 +81,28 @@ void registerInputFormatCapnProto(FormatFactory & factory);
 void registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(FormatFactory & factory);
 void registerNonTrivialPrefixAndSuffixCheckerJSONAsString(FormatFactory & factory);
 
+void registerArrowSchemaReader(FormatFactory & factory);
+void registerParquetSchemaReader(FormatFactory & factory);
+void registerORCSchemaReader(FormatFactory & factory);
+void registerTSVSchemaReader(FormatFactory & factory);
+void registerCSVSchemaReader(FormatFactory & factory);
+void registerJSONCompactEachRowSchemaReader(FormatFactory & factory);
+void registerJSONEachRowSchemaReader(FormatFactory & factory);
+void registerNativeSchemaReader(FormatFactory & factory);
+void registerRowBinaryWithNamesAndTypesSchemaReader(FormatFactory & factory);
+void registerAvroSchemaReader(FormatFactory & factory);
+void registerProtobufSchemaReader(FormatFactory & factory);
+void registerLineAsStringSchemaReader(FormatFactory & factory);
+void registerJSONAsStringSchemaReader(FormatFactory & factory);
+void registerRawBLOBSchemaReader(FormatFactory & factory);
+void registerMsgPackSchemaReader(FormatFactory & factory);
+void registerCapnProtoSchemaReader(FormatFactory & factory);
+void registerCustomSeparatedSchemaReader(FormatFactory & factory);
+void registerRegexpSchemaReader(FormatFactory & factory);
+void registerTSKVSchemaReader(FormatFactory & factory);
+void registerValuesSchemaReader(FormatFactory & factory);
+void registerTemplateSchemaReader(FormatFactory & factory);
+
 void registerFormats()
 {
     auto & factory = FormatFactory::instance();
@@ -152,6 +174,28 @@ void registerFormats()
 
     registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(factory);
     registerNonTrivialPrefixAndSuffixCheckerJSONAsString(factory);
+
+    registerArrowSchemaReader(factory);
+    registerParquetSchemaReader(factory);
+    registerORCSchemaReader(factory);
+    registerTSVSchemaReader(factory);
+    registerCSVSchemaReader(factory);
+    registerJSONCompactEachRowSchemaReader(factory);
+    registerJSONEachRowSchemaReader(factory);
+    registerNativeSchemaReader(factory);
+    registerRowBinaryWithNamesAndTypesSchemaReader(factory);
+    registerAvroSchemaReader(factory);
+    registerProtobufSchemaReader(factory);
+    registerLineAsStringSchemaReader(factory);
+    registerJSONAsStringSchemaReader(factory);
+    registerRawBLOBSchemaReader(factory);
+    registerMsgPackSchemaReader(factory);
+    registerCapnProtoSchemaReader(factory);
+    registerCustomSeparatedSchemaReader(factory);
+    registerRegexpSchemaReader(factory);
+    registerTSKVSchemaReader(factory);
+    registerValuesSchemaReader(factory);
+    registerTemplateSchemaReader(factory);
 }
 
 }
diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h
index 8018fa8e726..62e62b5f5dc 100644
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@@ -1835,6 +1835,8 @@ public:
     size_t getNumberOfArguments() const override { return 0; }
 
     bool useDefaultImplementationForConstants() const override { return true; }
+    bool canBeExecutedOnDefaultArguments() const override { return false; }
+
     ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
 
     DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
diff --git a/src/IO/BrotliReadBuffer.cpp b/src/IO/BrotliReadBuffer.cpp
index b66bbf45054..77069746153 100644
--- a/src/IO/BrotliReadBuffer.cpp
+++ b/src/IO/BrotliReadBuffer.cpp
@@ -39,7 +39,7 @@ BrotliReadBuffer::BrotliReadBuffer(std::unique_ptr<ReadBuffer> in_, size_t buf_s
         , in_data(nullptr)
         , out_capacity(0)
         , out_data(nullptr)
-        , eof(false)
+        , eof_flag(false)
 {
 }
 
@@ -47,7 +47,7 @@ BrotliReadBuffer::~BrotliReadBuffer() = default;
 
 bool BrotliReadBuffer::nextImpl()
 {
-    if (eof)
+    if (eof_flag)
         return false;
 
     if (!in_available)
@@ -74,7 +74,7 @@ bool BrotliReadBuffer::nextImpl()
     {
         if (in->eof())
         {
-            eof = true;
+            eof_flag = true;
             return !working_buffer.empty();
         }
         else
diff --git a/src/IO/BrotliReadBuffer.h b/src/IO/BrotliReadBuffer.h
index 0fa999d1de5..44a7dc7ddbd 100644
--- a/src/IO/BrotliReadBuffer.h
+++ b/src/IO/BrotliReadBuffer.h
@@ -32,7 +32,7 @@ private:
     size_t out_capacity;
     uint8_t  * out_data;
 
-    bool eof;
+    bool eof_flag;
 };
 
 }
diff --git a/src/IO/Bzip2ReadBuffer.cpp b/src/IO/Bzip2ReadBuffer.cpp
index df9a8d5b369..c2060612757 100644
--- a/src/IO/Bzip2ReadBuffer.cpp
+++ b/src/IO/Bzip2ReadBuffer.cpp
@@ -42,7 +42,7 @@ Bzip2ReadBuffer::Bzip2ReadBuffer(std::unique_ptr<ReadBuffer> in_, size_t buf_siz
         : BufferWithOwnMemory<ReadBuffer>(buf_size, existing_memory, alignment)
         , in(std::move(in_))
         , bz(std::make_unique<Bzip2StateWrapper>())
-        , eof(false)
+        , eof_flag(false)
 {
 }
 
@@ -50,7 +50,7 @@ Bzip2ReadBuffer::~Bzip2ReadBuffer() = default;
 
 bool Bzip2ReadBuffer::nextImpl()
 {
-    if (eof)
+    if (eof_flag)
         return false;
 
     if (!bz->stream.avail_in)
@@ -72,7 +72,7 @@ bool Bzip2ReadBuffer::nextImpl()
     {
         if (in->eof())
         {
-            eof = true;
+            eof_flag = true;
             return !working_buffer.empty();
         }
         else
@@ -91,7 +91,7 @@ bool Bzip2ReadBuffer::nextImpl()
 
     if (in->eof())
     {
-        eof = true;
+        eof_flag = true;
         throw Exception(ErrorCodes::UNEXPECTED_END_OF_FILE, "Unexpected end of bzip2 archive");
     }
 
diff --git a/src/IO/Bzip2ReadBuffer.h b/src/IO/Bzip2ReadBuffer.h
index dc113800683..de1e61ee388 100644
--- a/src/IO/Bzip2ReadBuffer.h
+++ b/src/IO/Bzip2ReadBuffer.h
@@ -26,7 +26,7 @@ private:
     class Bzip2StateWrapper;
     std::unique_ptr<Bzip2StateWrapper> bz;
 
-    bool eof;
+    bool eof_flag;
 };
 
 }
diff --git a/src/IO/LZMAInflatingReadBuffer.cpp b/src/IO/LZMAInflatingReadBuffer.cpp
index f2df6bdca6a..80da7421fc3 100644
--- a/src/IO/LZMAInflatingReadBuffer.cpp
+++ b/src/IO/LZMAInflatingReadBuffer.cpp
@@ -7,7 +7,7 @@ namespace ErrorCodes
     extern const int LZMA_STREAM_DECODER_FAILED;
 }
 LZMAInflatingReadBuffer::LZMAInflatingReadBuffer(std::unique_ptr<ReadBuffer> in_, size_t buf_size, char * existing_memory, size_t alignment)
-    : BufferWithOwnMemory<ReadBuffer>(buf_size, existing_memory, alignment), in(std::move(in_)), eof(false)
+    : BufferWithOwnMemory<ReadBuffer>(buf_size, existing_memory, alignment), in(std::move(in_)), eof_flag(false)
 {
     lstr = LZMA_STREAM_INIT;
     lstr.allocator = nullptr;
@@ -36,7 +36,7 @@ LZMAInflatingReadBuffer::~LZMAInflatingReadBuffer()
 
 bool LZMAInflatingReadBuffer::nextImpl()
 {
-    if (eof)
+    if (eof_flag)
         return false;
 
     lzma_action action = LZMA_RUN;
@@ -64,7 +64,7 @@ bool LZMAInflatingReadBuffer::nextImpl()
     {
         if (in->eof())
         {
-            eof = true;
+            eof_flag = true;
             return !working_buffer.empty();
         }
         else
diff --git a/src/IO/LZMAInflatingReadBuffer.h b/src/IO/LZMAInflatingReadBuffer.h
index 18922f64516..2d676eeeeb3 100644
--- a/src/IO/LZMAInflatingReadBuffer.h
+++ b/src/IO/LZMAInflatingReadBuffer.h
@@ -25,7 +25,7 @@ private:
     std::unique_ptr<ReadBuffer> in;
     lzma_stream lstr;
 
-    bool eof;
+    bool eof_flag;
 };
 
 }
diff --git a/src/IO/Lz4InflatingReadBuffer.cpp b/src/IO/Lz4InflatingReadBuffer.cpp
index 22bce94cad2..61e912d440c 100644
--- a/src/IO/Lz4InflatingReadBuffer.cpp
+++ b/src/IO/Lz4InflatingReadBuffer.cpp
@@ -32,7 +32,7 @@ Lz4InflatingReadBuffer::~Lz4InflatingReadBuffer()
 
 bool Lz4InflatingReadBuffer::nextImpl()
 {
-    if (eof)
+    if (eof_flag)
         return false;
 
     if (!in_available)
@@ -66,7 +66,7 @@ bool Lz4InflatingReadBuffer::nextImpl()
 
     if (in->eof())
     {
-        eof = true;
+        eof_flag = true;
         return !working_buffer.empty();
     }
 
diff --git a/src/IO/Lz4InflatingReadBuffer.h b/src/IO/Lz4InflatingReadBuffer.h
index 0462d85adf7..d4d81f8765c 100644
--- a/src/IO/Lz4InflatingReadBuffer.h
+++ b/src/IO/Lz4InflatingReadBuffer.h
@@ -35,7 +35,7 @@ private:
     size_t in_available;
     size_t out_available;
 
-    bool eof = false;
+    bool eof_flag = false;
 };
 
 }
diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index b0a6838b81e..48811a41edd 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -702,6 +702,25 @@ void readCSVString(String & s, ReadBuffer & buf, const FormatSettings::CSV & set
     readCSVStringInto(s, buf, settings);
 }
 
+void readCSVField(String & s, ReadBuffer & buf, const FormatSettings::CSV & settings)
+{
+    s.clear();
+    bool add_quote = false;
+    char quote = '\'';
+
+    if (!buf.eof() && (*buf.position() == '\'' || *buf.position() == '"'))
+    {
+        quote = *buf.position();
+        s.push_back(quote);
+        add_quote = true;
+    }
+
+    readCSVStringInto(s, buf, settings);
+
+    if (add_quote)
+        s.push_back(quote);
+}
+
 template void readCSVStringInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
 
 
@@ -1212,6 +1231,19 @@ void skipToNextRowOrEof(PeekableReadBuffer & buf, const String & row_after_delim
     }
 }
 
+// Use PeekableReadBuffer to copy field to string after parsing.
+template <typename ParseFunc>
+static void readParsedValueIntoString(String & s, ReadBuffer & buf, ParseFunc parse_func)
+{
+    PeekableReadBuffer peekable_buf(buf);
+    peekable_buf.setCheckpoint();
+    parse_func(peekable_buf);
+    peekable_buf.makeContinuousMemoryFromCheckpointToPos();
+    auto * end = peekable_buf.position();
+    peekable_buf.rollbackToCheckpoint();
+    s.append(peekable_buf.position(), end);
+    peekable_buf.position() = end;
+}
 
 template <char opening_bracket, char closing_bracket>
 static void readQuotedFieldInBrackets(String & s, ReadBuffer & buf)
@@ -1266,7 +1298,11 @@ void readQuotedFieldIntoString(String & s, ReadBuffer & buf)
     /// - Number: integer, float, decimal.
 
     if (*buf.position() == '\'')
-        readQuotedString(s, buf);
+    {
+        s.push_back('\'');
+        readQuotedStringInto<false>(s, buf);
+        s.push_back('\'');
+    }
     else if (*buf.position() == '[')
         readQuotedFieldInBrackets<'[', ']'>(s, buf);
     else if (*buf.position() == '(')
@@ -1290,18 +1326,19 @@ void readQuotedFieldIntoString(String & s, ReadBuffer & buf)
     else
     {
         /// It's an integer, float or decimal. They all can be parsed as float.
-        /// Use PeekableReadBuffer to copy field to string after parsing.
-        PeekableReadBuffer peekable_buf(buf);
-        peekable_buf.setCheckpoint();
-        Float64 tmp;
-        readFloatText(tmp, peekable_buf);
-        peekable_buf.makeContinuousMemoryFromCheckpointToPos();
-        auto * end = peekable_buf.position();
-        peekable_buf.rollbackToCheckpoint();
-        s.append(peekable_buf.position(), end);
-        peekable_buf.position() = end;
+        auto parse_func = [](ReadBuffer & in)
+        {
+            Float64 tmp;
+            readFloatText(tmp, in);
+        };
+        readParsedValueIntoString(s, buf, parse_func);
     }
 }
 
+void readJSONFieldIntoString(String & s, ReadBuffer & buf)
+{
+    auto parse_func = [](ReadBuffer & in) { skipJSONField(in, "json_field"); };
+    readParsedValueIntoString(s, buf, parse_func);
+}
 
 }
diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h
index b2ad4035cdc..6d1023947a5 100644
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@@ -563,6 +563,8 @@ void readStringUntilWhitespace(String & s, ReadBuffer & buf);
   */
 void readCSVString(String & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
 
+/// Differ from readCSVString in that it doesn't remove quotes around field if any.
+void readCSVField(String & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
 
 /// Read and append result to array of characters.
 template <typename Vector>
@@ -1381,4 +1383,7 @@ struct PcgDeserializer
 
 void readQuotedFieldIntoString(String & s, ReadBuffer & buf);
 
+void readJSONFieldIntoString(String & s, ReadBuffer & buf);
+
 }
+
diff --git a/src/IO/ZlibInflatingReadBuffer.cpp b/src/IO/ZlibInflatingReadBuffer.cpp
index 472399dea3d..28426e920ef 100644
--- a/src/IO/ZlibInflatingReadBuffer.cpp
+++ b/src/IO/ZlibInflatingReadBuffer.cpp
@@ -16,7 +16,7 @@ ZlibInflatingReadBuffer::ZlibInflatingReadBuffer(
         size_t alignment)
     : BufferWithOwnMemory<ReadBuffer>(buf_size, existing_memory, alignment)
     , in(std::move(in_))
-    , eof(false)
+    , eof_flag(false)
 {
     zstr.zalloc = nullptr;
     zstr.zfree = nullptr;
@@ -54,7 +54,7 @@ bool ZlibInflatingReadBuffer::nextImpl()
     do
     {
         /// if we already found eof, we shouldn't do anything
-        if (eof)
+        if (eof_flag)
             return false;
 
         /// if there is no available bytes in zstr, move ptr to next available data
@@ -83,7 +83,7 @@ bool ZlibInflatingReadBuffer::nextImpl()
             /// * false if there is no data in working buffer
             if (in->eof())
             {
-                eof = true;
+                eof_flag = true;
                 return !working_buffer.empty();
             }
             /// If it is not end of file, we need to reset zstr and return true, because we still have some data to read
diff --git a/src/IO/ZlibInflatingReadBuffer.h b/src/IO/ZlibInflatingReadBuffer.h
index b8c141e9b9b..905ab0cd3fc 100644
--- a/src/IO/ZlibInflatingReadBuffer.h
+++ b/src/IO/ZlibInflatingReadBuffer.h
@@ -33,7 +33,7 @@ private:
 
     std::unique_ptr<ReadBuffer> in;
     z_stream zstr;
-    bool eof;
+    bool eof_flag;
 };
 
 }
diff --git a/src/IO/ZstdInflatingReadBuffer.cpp b/src/IO/ZstdInflatingReadBuffer.cpp
index ce89f09f955..6f244dc5a75 100644
--- a/src/IO/ZstdInflatingReadBuffer.cpp
+++ b/src/IO/ZstdInflatingReadBuffer.cpp
@@ -31,7 +31,7 @@ bool ZstdInflatingReadBuffer::nextImpl()
     do
     {
         // If it is known that end of file was reached, return false
-        if (eof)
+        if (eof_flag)
             return false;
 
         /// If end was reached, get next part
@@ -64,7 +64,7 @@ bool ZstdInflatingReadBuffer::nextImpl()
         /// If end of file is reached, fill eof variable and return true if there is some data in buffer, otherwise return false
         if (in->eof())
         {
-            eof = true;
+            eof_flag = true;
             return !working_buffer.empty();
         }
         /// It is possible, that input buffer is not at eof yet, but nothing was decompressed in current iteration.
diff --git a/src/IO/ZstdInflatingReadBuffer.h b/src/IO/ZstdInflatingReadBuffer.h
index e6e2dad0ad5..ec80b860e0e 100644
--- a/src/IO/ZstdInflatingReadBuffer.h
+++ b/src/IO/ZstdInflatingReadBuffer.h
@@ -31,7 +31,7 @@ private:
     ZSTD_DCtx * dctx;
     ZSTD_inBuffer input;
     ZSTD_outBuffer output;
-    bool eof = false;
+    bool eof_flag = false;
 };
 
 }
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 8f003e75a07..7ddb0c8c26e 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -637,13 +637,14 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
         /// Table function without columns list.
         auto table_function = TableFunctionFactory::instance().get(create.as_table_function, getContext());
         properties.columns = table_function->getActualTableStructure(getContext());
-        assert(!properties.columns.empty());
     }
     else if (create.is_dictionary)
     {
         return {};
     }
-    else
+    /// We can have queries like "CREATE TABLE <table> ENGINE=<engine>" if <engine>
+    /// supports schema inference (will determine table structure in it's constructor).
+    else if (!StorageFactory::instance().checkIfStorageSupportsSchemaInterface(create.storage->engine->name))
         throw Exception("Incorrect CREATE query: required list of column descriptions or AS section or SELECT.", ErrorCodes::INCORRECT_QUERY);
 
     /// Even if query has list of columns, canonicalize it (unfold Nested columns).
@@ -1083,7 +1084,10 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
     {
         const auto & factory = TableFunctionFactory::instance();
         auto table_func = factory.get(create.as_table_function, getContext());
-        res = table_func->execute(create.as_table_function, getContext(), create.getTable(), properties.columns);
+        /// In case of CREATE AS table_function() query we should use global context
+        /// in storage creation because there will be no query context on server startup
+        /// and because storage lifetime is bigger than query context lifetime.
+        res = table_func->execute(create.as_table_function, getContext(), create.getTable(), properties.columns, /*use_global_context=*/true);
         res->renameInMemory({create.getDatabase(), create.getTable(), create.uuid});
     }
     else
diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp
index 3e77bee19a9..e61a0f55142 100644
--- a/src/Parsers/ASTCreateQuery.cpp
+++ b/src/Parsers/ASTCreateQuery.cpp
@@ -359,7 +359,7 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat
 
     if (as_table_function)
     {
-        if (columns_list)
+        if (columns_list && !columns_list->empty())
         {
             frame.expression_list_always_start_on_new_line = true;
             settings.ostr << (settings.one_line ? " (" : "\n(");
@@ -375,7 +375,7 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat
 
     frame.expression_list_always_start_on_new_line = true;
 
-    if (columns_list && !as_table_function)
+    if (columns_list && !columns_list->empty() && !as_table_function)
     {
         settings.ostr << (settings.one_line ? " (" : "\n(");
         FormatStateStacked frame_nested = frame;
diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h
index 93fced7dba5..2e35731acad 100644
--- a/src/Parsers/ASTCreateQuery.h
+++ b/src/Parsers/ASTCreateQuery.h
@@ -50,6 +50,12 @@ public:
     ASTPtr clone() const override;
 
     void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override;
+
+    bool empty()
+    {
+        return (!columns || columns->children.empty()) && (!indices || indices->children.empty()) && (!constraints || constraints->children.empty())
+            && (!projections || projections->children.empty());
+    }
 };
 
 
diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp
index dbbea986404..7f47e1efb49 100644
--- a/src/Parsers/ParserCreateQuery.cpp
+++ b/src/Parsers/ParserCreateQuery.cpp
@@ -557,34 +557,43 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
             }
         }
     }
+    /** Create queries without list of columns:
+      *  - CREATE|ATTACH TABLE ... AS ...
+      *  - CREATE|ATTACH TABLE ... ENGINE = engine
+      */
     else
     {
         storage_p.parse(pos, storage, expected);
 
-        if (!s_as.ignore(pos, expected))
-            return false;
-
-        if (!select_p.parse(pos, select, expected)) /// AS SELECT ...
+        /// CREATE|ATTACH TABLE ... AS ...
+        if (s_as.ignore(pos, expected))
         {
-            /// ENGINE can not be specified for table functions.
-            if (storage || !table_function_p.parse(pos, as_table_function, expected))
+            if (!select_p.parse(pos, select, expected)) /// AS SELECT ...
             {
-                /// AS [db.]table
-                if (!name_p.parse(pos, as_table, expected))
-                    return false;
-
-                if (s_dot.ignore(pos, expected))
+                /// ENGINE can not be specified for table functions.
+                if (storage || !table_function_p.parse(pos, as_table_function, expected))
                 {
-                    as_database = as_table;
+                    /// AS [db.]table
                     if (!name_p.parse(pos, as_table, expected))
                         return false;
-                }
 
-                /// Optional - ENGINE can be specified.
-                if (!storage)
-                    storage_p.parse(pos, storage, expected);
+                    if (s_dot.ignore(pos, expected))
+                    {
+                        as_database = as_table;
+                        if (!name_p.parse(pos, as_table, expected))
+                            return false;
+                    }
+
+                    /// Optional - ENGINE can be specified.
+                    if (!storage)
+                        storage_p.parse(pos, storage, expected);
+                }
             }
         }
+        else if (!storage)
+        {
+            return false;
+        }
     }
     auto comment = parseComment(pos, expected);
 
diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h
index bc1ebd65639..33aafb40d83 100644
--- a/src/Parsers/ParserCreateQuery.h
+++ b/src/Parsers/ParserCreateQuery.h
@@ -361,6 +361,8 @@ protected:
   * Or:
   * CREATE|ATTACH TABLE [IF NOT EXISTS] [db.]name [UUID 'uuid'] [ON CLUSTER cluster] AS ENGINE = engine SELECT ...
   *
+  * Or (for engines that supports schema inference):
+  * CREATE|ATTACH TABLE [IF NOT EXISTS] [db.]name [UUID 'uuid'] [ON CLUSTER cluster] ENGINE = engine
   */
 class ParserCreateTableQuery : public IParserBase
 {
diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp
new file mode 100644
index 00000000000..096e39a2893
--- /dev/null
+++ b/src/Processors/Formats/ISchemaReader.cpp
@@ -0,0 +1,160 @@
+#include <Processors/Formats/ISchemaReader.h>
+#include <Formats/ReadSchemaUtils.h>
+#include <DataTypes/DataTypeString.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
+}
+
+IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, size_t max_rows_to_read_, DataTypePtr default_type_)
+    : ISchemaReader(in_), max_rows_to_read(max_rows_to_read_), default_type(default_type_)
+{
+}
+
+NamesAndTypesList IRowSchemaReader::readSchema()
+{
+    DataTypes data_types = readRowAndGetDataTypes();
+    for (size_t row = 1; row < max_rows_to_read; ++row)
+    {
+        DataTypes new_data_types = readRowAndGetDataTypes();
+        if (new_data_types.empty())
+            /// We reached eof.
+            break;
+
+        if (new_data_types.size() != data_types.size())
+            throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Rows have different amount of values");
+
+        for (size_t i = 0; i != data_types.size(); ++i)
+        {
+            /// We couldn't determine the type of this column in a new row, just skip it.
+            if (!new_data_types[i])
+                continue;
+
+            /// If we couldn't determine the type of column yet, just set the new type.
+            if (!data_types[i])
+                data_types[i] = new_data_types[i];
+            /// If the new type and the previous type for this column are different,
+            /// we will use default type if we have it or throw an exception.
+            else if (data_types[i]->getName() != new_data_types[i]->getName())
+            {
+                if (default_type)
+                    data_types[i] = default_type;
+                else
+                    throw Exception(
+                        ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                        "Automatically defined type {} for column {} in row {} differs from type defined by previous rows: {}", new_data_types[i]->getName(), i + 1, row, data_types[i]->getName());
+            }
+        }
+    }
+
+    /// Check that we read at list one column.
+    if (data_types.empty())
+        throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot read rows from the data");
+
+    /// If column names weren't set, use default names 'c1', 'c2', ...
+    if (column_names.empty())
+    {
+        column_names.reserve(data_types.size());
+        for (size_t i = 0; i != data_types.size(); ++i)
+            column_names.push_back("c" + std::to_string(i + 1));
+    }
+    /// If column names were set, check that the number of names match the number of types.
+    else if (column_names.size() != data_types.size())
+        throw Exception(
+            ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+            "The number of column names {} differs with the number of types {}", column_names.size(), data_types.size());
+
+    NamesAndTypesList result;
+    for (size_t i = 0; i != data_types.size(); ++i)
+    {
+        /// Check that we could determine the type of this column.
+        if (!data_types[i])
+        {
+            if (!default_type)
+                throw Exception(
+                    ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                    "Cannot determine table structure by first {} rows of data, because some columns contain only Nulls. To increase the maximum "
+                    "number of rows to read for structure determination, use setting input_format_max_rows_to_read_for_schema_inference",
+                    max_rows_to_read);
+
+            data_types[i] = default_type;
+        }
+        result.emplace_back(column_names[i], data_types[i]);
+    }
+
+    return result;
+}
+
+IRowWithNamesSchemaReader::IRowWithNamesSchemaReader(ReadBuffer & in_, size_t max_rows_to_read_, DataTypePtr default_type_)
+    : ISchemaReader(in_), max_rows_to_read(max_rows_to_read_), default_type(default_type_)
+{
+}
+
+NamesAndTypesList IRowWithNamesSchemaReader::readSchema()
+{
+    auto names_and_types = readRowAndGetNamesAndDataTypes();
+    for (size_t row = 1; row < max_rows_to_read; ++row)
+    {
+        auto new_names_and_types = readRowAndGetNamesAndDataTypes();
+        if (new_names_and_types.empty())
+            /// We reached eof.
+            break;
+
+        for (const auto & [name, new_type] : new_names_and_types)
+        {
+            auto it = names_and_types.find(name);
+            /// If we didn't see this column before, just add it.
+            if (it == names_and_types.end())
+            {
+                names_and_types[name] = new_type;
+                continue;
+            }
+
+            auto & type = it->second;
+            /// If we couldn't determine the type of column yet, just set the new type.
+            if (!type)
+                type = new_type;
+            /// If the new type and the previous type for this column are different,
+            /// we will use default type if we have it or throw an exception.
+            else if (new_type && type->getName() != new_type->getName())
+            {
+                if (default_type)
+                    type = default_type;
+                else
+                    throw Exception(
+                        ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                        "Automatically defined type {} for column {} in row {} differs from type defined by previous rows: {}", type->getName(), name, row, new_type->getName());
+            }
+        }
+    }
+
+    /// Check that we read at list one column.
+    if (names_and_types.empty())
+        throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot read rows from the data");
+
+    NamesAndTypesList result;
+    for (auto & [name, type] : names_and_types)
+    {
+        /// Check that we could determine the type of this column.
+        if (!type)
+        {
+            if (!default_type)
+                throw Exception(
+                    ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                    "Cannot determine table structure by first {} rows of data, because some columns contain only Nulls. To increase the maximum "
+                    "number of rows to read for structure determination, use setting input_format_max_rows_to_read_for_schema_inference",
+                    max_rows_to_read);
+
+            type = default_type;
+        }
+        result.emplace_back(name, type);
+    }
+
+    return result;
+}
+
+}
diff --git a/src/Processors/Formats/ISchemaReader.h b/src/Processors/Formats/ISchemaReader.h
new file mode 100644
index 00000000000..67a8eb88d61
--- /dev/null
+++ b/src/Processors/Formats/ISchemaReader.h
@@ -0,0 +1,87 @@
+#pragma once
+
+#include <Core/NamesAndTypes.h>
+#include <DataTypes/IDataType.h>
+#include <Formats/FormatSettings.h>
+#include <IO/ReadBuffer.h>
+
+namespace DB
+{
+
+/// Base class for schema inference for the data in some specific format.
+/// It reads some data from read buffer and try to determine the schema
+/// from read data.
+class ISchemaReader
+{
+public:
+    ISchemaReader(ReadBuffer & in_) : in(in_) {}
+
+    virtual NamesAndTypesList readSchema() = 0;
+
+    virtual ~ISchemaReader() = default;
+
+protected:
+    ReadBuffer & in;
+};
+
+/// Base class for schema inference for formats that read data row by row.
+/// It reads data row by row (up to max_rows_to_read), determines types of columns
+/// for each row and compare them with types from the previous rows. If some column
+/// contains values with different types in different rows, the default type will be
+/// used for this column or the exception will be thrown (if default type is not set).
+class IRowSchemaReader : public ISchemaReader
+{
+public:
+    IRowSchemaReader(ReadBuffer & in_, size_t max_rows_to_read_, DataTypePtr default_type_ = nullptr);
+    NamesAndTypesList readSchema() override;
+
+protected:
+    /// Read one row and determine types of columns in it.
+    /// Return types in the same order in which the values were in the row.
+    /// If it's impossible to determine the type for some column, return nullptr for it.
+    /// Return empty list if can't read more data.
+    virtual DataTypes readRowAndGetDataTypes() = 0;
+
+    void setColumnNames(const std::vector<String> & names) { column_names = names; }
+
+private:
+    size_t max_rows_to_read;
+    DataTypePtr default_type;
+    std::vector<String> column_names;
+};
+
+/// Base class for schema inference for formats that read data row by row and each
+/// row contains column names and values (ex: JSONEachRow, TSKV).
+/// Differ from IRowSchemaReader in that after reading a row we get
+/// a map {column_name : type} and some columns may be missed in a single row
+/// (in this case we will use types from the previous rows for missed columns).
+class IRowWithNamesSchemaReader : public ISchemaReader
+{
+public:
+    IRowWithNamesSchemaReader(ReadBuffer & in_, size_t max_rows_to_read_, DataTypePtr default_type_ = nullptr);
+    NamesAndTypesList readSchema() override;
+
+protected:
+    /// Read one row and determine types of columns in it.
+    /// Return map {column_name : type}.
+    /// If it's impossible to determine the type for some column, return nullptr for it.
+    /// Return empty map is can't read more data.
+    virtual std::unordered_map<String, DataTypePtr> readRowAndGetNamesAndDataTypes() = 0;
+
+private:
+    size_t max_rows_to_read;
+    DataTypePtr default_type;
+};
+
+/// Base class for schema inference for formats that don't need any data to
+/// determine the schema: formats with constant schema (ex: JSONAsString, LineAsString)
+/// and formats that use external format schema (ex: Protobuf, CapnProto).
+class IExternalSchemaReader
+{
+public:
+    virtual NamesAndTypesList readSchema() = 0;
+
+    virtual ~IExternalSchemaReader() = default;
+};
+
+}
diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
index 1f6b530d72f..4af2c651c39 100644
--- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
@@ -85,31 +85,38 @@ void ArrowBlockInputFormat::resetParser()
     record_batch_current = 0;
 }
 
+static std::shared_ptr<arrow::RecordBatchReader> createStreamReader(ReadBuffer & in)
+{
+    auto stream_reader_status = arrow::ipc::RecordBatchStreamReader::Open(std::make_unique<ArrowInputStreamFromReadBuffer>(in));
+    if (!stream_reader_status.ok())
+        throw Exception(ErrorCodes::UNKNOWN_EXCEPTION,
+                        "Error while opening a table: {}", stream_reader_status.status().ToString());
+    return *stream_reader_status;
+}
+
+static std::shared_ptr<arrow::ipc::RecordBatchFileReader> createFileReader(ReadBuffer & in, const FormatSettings & format_settings, std::atomic<int> & is_stopped)
+{
+    auto arrow_file = asArrowFile(in, format_settings, is_stopped);
+    if (is_stopped)
+        return nullptr;
+
+    auto file_reader_status = arrow::ipc::RecordBatchFileReader::Open(std::move(arrow_file));
+    if (!file_reader_status.ok())
+        throw Exception(ErrorCodes::UNKNOWN_EXCEPTION,
+            "Error while opening a table: {}", file_reader_status.status().ToString());
+    return *file_reader_status;
+}
+
+
 void ArrowBlockInputFormat::prepareReader()
 {
-    std::shared_ptr<arrow::Schema> schema;
-
     if (stream)
-    {
-        auto stream_reader_status = arrow::ipc::RecordBatchStreamReader::Open(std::make_unique<ArrowInputStreamFromReadBuffer>(*in));
-        if (!stream_reader_status.ok())
-            throw Exception(ErrorCodes::UNKNOWN_EXCEPTION,
-                "Error while opening a table: {}", stream_reader_status.status().ToString());
-        stream_reader = *stream_reader_status;
-        schema = stream_reader->schema();
-    }
+        stream_reader = createStreamReader(*in);
     else
     {
-        auto arrow_file = asArrowFile(*in, format_settings, is_stopped);
-        if (is_stopped)
+        file_reader = createFileReader(*in, format_settings, is_stopped);
+        if (!file_reader)
             return;
-
-        auto file_reader_status = arrow::ipc::RecordBatchFileReader::Open(std::move(arrow_file));
-        if (!file_reader_status.ok())
-            throw Exception(ErrorCodes::UNKNOWN_EXCEPTION,
-                "Error while opening a table: {}", file_reader_status.status().ToString());
-        file_reader = *file_reader_status;
-        schema = file_reader->schema();
     }
 
     arrow_column_to_ch_column = std::make_unique<ArrowColumnToCHColumn>(getPort().getHeader(), "Arrow", format_settings.arrow.import_nested);
@@ -122,6 +129,27 @@ void ArrowBlockInputFormat::prepareReader()
     record_batch_current = 0;
 }
 
+ArrowSchemaReader::ArrowSchemaReader(ReadBuffer & in_, bool stream_, const FormatSettings & format_settings_)
+    : ISchemaReader(in_), stream(stream_), format_settings(format_settings_)
+{
+}
+
+NamesAndTypesList ArrowSchemaReader::readSchema()
+{
+    std::shared_ptr<arrow::Schema> schema;
+
+    if (stream)
+        schema = createStreamReader(in)->schema();
+    else
+    {
+        std::atomic<int> is_stopped = 0;
+        schema = createFileReader(in, format_settings, is_stopped)->schema();
+    }
+
+    auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(*schema, stream ? "ArrowStream" : "Arrow");
+    return header.getNamesAndTypesList();
+}
+
 void registerInputFormatArrow(FormatFactory & factory)
 {
     factory.registerInputFormat(
@@ -145,6 +173,20 @@ void registerInputFormatArrow(FormatFactory & factory)
         });
 }
 
+void registerArrowSchemaReader(FormatFactory & factory)
+{
+    factory.registerSchemaReader(
+        "Arrow",
+        [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+        {
+            return std::make_shared<ArrowSchemaReader>(buf, false, settings);
+        });
+    factory.registerSchemaReader(
+        "ArrowStream",
+        [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+        {
+            return std::make_shared<ArrowSchemaReader>(buf, true, settings);
+        });}
 }
 #else
 
@@ -154,6 +196,8 @@ class FormatFactory;
 void registerInputFormatArrow(FormatFactory &)
 {
 }
+
+void registerArrowSchemaReader(FormatFactory &) {}
 }
 
 #endif
diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.h b/src/Processors/Formats/Impl/ArrowBlockInputFormat.h
index bb8a000477c..62cbf949fc2 100644
--- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.h
+++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.h
@@ -4,6 +4,7 @@
 #if USE_ARROW
 
 #include <Processors/Formats/IInputFormat.h>
+#include <Processors/Formats/ISchemaReader.h>
 #include <Formats/FormatSettings.h>
 
 namespace arrow { class RecordBatchReader; }
@@ -51,6 +52,18 @@ private:
     std::atomic<int> is_stopped{0};
 };
 
+class ArrowSchemaReader : public ISchemaReader
+{
+public:
+    ArrowSchemaReader(ReadBuffer & in_, bool stream_, const FormatSettings & format_settings_);
+
+    NamesAndTypesList readSchema() override;
+
+private:
+    bool stream;
+    const FormatSettings format_settings;
+};
+
 }
 
 #endif
diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
index 272907022a1..aa181ea0b8b 100644
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
@@ -239,10 +239,8 @@ static ColumnWithTypeAndName readColumnWithTimestampData(std::shared_ptr<arrow::
 }
 
 template <typename DecimalType, typename DecimalArray>
-static ColumnWithTypeAndName readColumnWithDecimalData(std::shared_ptr<arrow::ChunkedArray> & arrow_column, const String & column_name)
+static ColumnWithTypeAndName readColumnWithDecimalDataImpl(std::shared_ptr<arrow::ChunkedArray> & arrow_column, const String & column_name, DataTypePtr internal_type)
 {
-    const auto * arrow_decimal_type = static_cast<arrow::DecimalType *>(arrow_column->type().get());
-    auto internal_type = std::make_shared<DataTypeDecimal<DecimalType>>(arrow_decimal_type->precision(), arrow_decimal_type->scale());
     auto internal_column = internal_type->createColumn();
     auto & column = assert_cast<ColumnDecimal<DecimalType> &>(*internal_column);
     auto & column_data = column.getData();
@@ -259,6 +257,21 @@ static ColumnWithTypeAndName readColumnWithDecimalData(std::shared_ptr<arrow::Ch
     return {std::move(internal_column), std::move(internal_type), column_name};
 }
 
+template <typename DecimalArray>
+static ColumnWithTypeAndName readColumnWithDecimalData(std::shared_ptr<arrow::ChunkedArray> & arrow_column, const String & column_name)
+{
+    const auto * arrow_decimal_type = static_cast<arrow::DecimalType *>(arrow_column->type().get());
+    size_t precision = arrow_decimal_type->precision();
+    auto internal_type = createDecimal<DataTypeDecimal>(precision, arrow_decimal_type->scale());
+    if (precision <= DecimalUtils::max_precision<Decimal32>)
+        return readColumnWithDecimalDataImpl<Decimal32, DecimalArray>(arrow_column, column_name, internal_type);
+    else if (precision <= DecimalUtils::max_precision<Decimal64>)
+        return readColumnWithDecimalDataImpl<Decimal64, DecimalArray>(arrow_column, column_name, internal_type);
+    else if (precision <= DecimalUtils::max_precision<Decimal128>)
+        return readColumnWithDecimalDataImpl<Decimal128, DecimalArray>(arrow_column, column_name, internal_type);
+    return readColumnWithDecimalDataImpl<Decimal256, DecimalArray>(arrow_column, column_name, internal_type);
+}
+
 /// Creates a null bytemap from arrow's null bitmap
 static ColumnPtr readByteMapFromArrowColumn(std::shared_ptr<arrow::ChunkedArray> & arrow_column)
 {
@@ -328,12 +341,13 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
     const std::string & column_name,
     const std::string & format_name,
     bool is_nullable,
-    std::unordered_map<String, std::shared_ptr<ColumnWithTypeAndName>> & dictionary_values)
+    std::unordered_map<String, std::shared_ptr<ColumnWithTypeAndName>> & dictionary_values,
+    bool read_ints_as_dates)
 {
     if (!is_nullable && arrow_column->null_count() && arrow_column->type()->id() != arrow::Type::LIST
         && arrow_column->type()->id() != arrow::Type::MAP && arrow_column->type()->id() != arrow::Type::STRUCT)
     {
-        auto nested_column = readColumnFromArrowColumn(arrow_column, column_name, format_name, true, dictionary_values);
+        auto nested_column = readColumnFromArrowColumn(arrow_column, column_name, format_name, true, dictionary_values, read_ints_as_dates);
         auto nullmap_column = readByteMapFromArrowColumn(arrow_column);
         auto nullable_type = std::make_shared<DataTypeNullable>(std::move(nested_column.type));
         auto nullable_column = ColumnNullable::create(std::move(nested_column.column), std::move(nullmap_column));
@@ -358,25 +372,27 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
         case arrow::Type::UINT16:
         {
             auto column = readColumnWithNumericData<UInt16>(arrow_column, column_name);
-            column.type = std::make_shared<DataTypeDate>();
+            if (read_ints_as_dates)
+                column.type = std::make_shared<DataTypeDate>();
             return column;
         }
         case arrow::Type::UINT32:
         {
             auto column = readColumnWithNumericData<UInt32>(arrow_column, column_name);
-            column.type = std::make_shared<DataTypeDateTime>();
+            if (read_ints_as_dates)
+                column.type = std::make_shared<DataTypeDateTime>();
             return column;
         }
         case arrow::Type::TIMESTAMP:
             return readColumnWithTimestampData(arrow_column, column_name);
         case arrow::Type::DECIMAL128:
-            return readColumnWithDecimalData<Decimal128, arrow::Decimal128Array>(arrow_column, column_name);
+            return readColumnWithDecimalData<arrow::Decimal128Array>(arrow_column, column_name);
         case arrow::Type::DECIMAL256:
-            return readColumnWithDecimalData<Decimal256, arrow::Decimal256Array>(arrow_column, column_name);
+            return readColumnWithDecimalData<arrow::Decimal256Array>(arrow_column, column_name);
         case arrow::Type::MAP:
         {
             auto arrow_nested_column = getNestedArrowColumn(arrow_column);
-            auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values);
+            auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates);
             auto offsets_column = readOffsetsFromArrowListColumn(arrow_column);
 
             const auto * tuple_column = assert_cast<const ColumnTuple *>(nested_column.column.get());
@@ -388,7 +404,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
         case arrow::Type::LIST:
         {
             auto arrow_nested_column = getNestedArrowColumn(arrow_column);
-            auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values);
+            auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates);
             auto offsets_column = readOffsetsFromArrowListColumn(arrow_column);
             auto array_column = ColumnArray::create(std::move(nested_column.column), std::move(offsets_column));
             auto array_type = std::make_shared<DataTypeArray>(nested_column.type);
@@ -413,7 +429,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
             for (int i = 0; i != arrow_struct_type->num_fields(); ++i)
             {
                 auto nested_arrow_column = std::make_shared<arrow::ChunkedArray>(nested_arrow_columns[i]);
-                auto element = readColumnFromArrowColumn(nested_arrow_column, arrow_struct_type->field(i)->name(), format_name, false, dictionary_values);
+                auto element = readColumnFromArrowColumn(nested_arrow_column, arrow_struct_type->field(i)->name(), format_name, false, dictionary_values, read_ints_as_dates);
                 tuple_elements.emplace_back(std::move(element.column));
                 tuple_types.emplace_back(std::move(element.type));
                 tuple_names.emplace_back(std::move(element.name));
@@ -436,7 +452,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
                     dict_array.emplace_back(dict_chunk.dictionary());
                 }
                 auto arrow_dict_column = std::make_shared<arrow::ChunkedArray>(dict_array);
-                auto dict_column = readColumnFromArrowColumn(arrow_dict_column, column_name, format_name, false, dictionary_values);
+                auto dict_column = readColumnFromArrowColumn(arrow_dict_column, column_name, format_name, false, dictionary_values, read_ints_as_dates);
 
                 /// We should convert read column to ColumnUnique.
                 auto tmp_lc_column = DataTypeLowCardinality(dict_column.type).createColumn();
@@ -483,7 +499,7 @@ static void checkStatus(const arrow::Status & status, const String & column_name
         throw Exception{ErrorCodes::UNKNOWN_EXCEPTION, "Error with a {} column '{}': {}.", format_name, column_name, status.ToString()};
 }
 
-static Block arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name)
+Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name)
 {
     ColumnsWithTypeAndName sample_columns;
     for (const auto & field : schema.fields())
@@ -493,24 +509,21 @@ static Block arrowSchemaToCHHeader(const arrow::Schema & schema, const std::stri
         std::unique_ptr<arrow::ArrayBuilder> array_builder;
         arrow::Status status = MakeBuilder(pool, field->type(), &array_builder);
         checkStatus(status, field->name(), format_name);
+
         std::shared_ptr<arrow::Array> arrow_array;
         status = array_builder->Finish(&arrow_array);
         checkStatus(status, field->name(), format_name);
+
         arrow::ArrayVector array_vector = {arrow_array};
         auto arrow_column = std::make_shared<arrow::ChunkedArray>(array_vector);
         std::unordered_map<std::string, std::shared_ptr<ColumnWithTypeAndName>> dict_values;
-        ColumnWithTypeAndName sample_column = readColumnFromArrowColumn(arrow_column, field->name(), format_name, false, dict_values);
+        ColumnWithTypeAndName sample_column = readColumnFromArrowColumn(arrow_column, field->name(), format_name, false, dict_values, false);
+
         sample_columns.emplace_back(std::move(sample_column));
     }
     return Block(std::move(sample_columns));
 }
 
-ArrowColumnToCHColumn::ArrowColumnToCHColumn(
-    const arrow::Schema & schema, const std::string & format_name_, bool import_nested_)
-    : header(arrowSchemaToCHHeader(schema, format_name_)), format_name(format_name_), import_nested(import_nested_)
-{
-}
-
 ArrowColumnToCHColumn::ArrowColumnToCHColumn(
     const Block & header_, const std::string & format_name_, bool import_nested_)
     : header(header_), format_name(format_name_), import_nested(import_nested_)
@@ -553,7 +566,7 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr &
                 if (!nested_tables.contains(nested_table_name))
                 {
                     std::shared_ptr<arrow::ChunkedArray> arrow_column = name_to_column_ptr[nested_table_name];
-                    ColumnsWithTypeAndName cols = {readColumnFromArrowColumn(arrow_column, nested_table_name, format_name, false, dictionary_values)};
+                    ColumnsWithTypeAndName cols = {readColumnFromArrowColumn(arrow_column, nested_table_name, format_name, false, dictionary_values, true)};
                     Block block(cols);
                     nested_tables[nested_table_name] = std::make_shared<Block>(Nested::flatten(block));
                 }
@@ -573,7 +586,7 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr &
         if (read_from_nested)
             column = nested_tables[nested_table_name]->getByName(header_column.name);
         else
-            column = readColumnFromArrowColumn(arrow_column, header_column.name, format_name, false, dictionary_values);
+            column = readColumnFromArrowColumn(arrow_column, header_column.name, format_name, false, dictionary_values, true);
 
         try
         {
diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h
index 46976093f0b..58f8f1536b5 100644
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h
@@ -23,16 +23,14 @@ public:
 
     ArrowColumnToCHColumn(const Block & header_, const std::string & format_name_, bool import_nested_);
 
-    /// Constructor that create header by arrow schema. It will be useful for inserting
-    /// data from file without knowing table structure.
-    ArrowColumnToCHColumn(const arrow::Schema & schema, const std::string & format_name, bool import_nested_);
-
     void arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table);
 
     void arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & name_to_column_ptr);
 
+    static Block arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name);
+
 private:
-    const Block header;
+    const Block & header;
     const std::string format_name;
     bool import_nested;
 
diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
index 11e56ecbe0c..a372df41344 100644
--- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
@@ -815,6 +815,92 @@ const AvroDeserializer & AvroConfluentRowInputFormat::getOrCreateDeserializer(Sc
     return it->second;
 }
 
+AvroSchemaReader::AvroSchemaReader(ReadBuffer & in_, bool confluent_, const FormatSettings & format_settings_)
+    : ISchemaReader(in_), confluent(confluent_), format_settings(format_settings_)
+{
+}
+
+NamesAndTypesList AvroSchemaReader::readSchema()
+{
+    avro::NodePtr root_node;
+    if (confluent)
+    {
+        UInt32 schema_id = readConfluentSchemaId(in);
+        root_node = getConfluentSchemaRegistry(format_settings)->getSchema(schema_id).root();
+    }
+    else
+    {
+        auto file_reader_ptr = std::make_unique<avro::DataFileReaderBase>(std::make_unique<InputStreamReadBufferAdapter>(in));
+        root_node = file_reader_ptr->dataSchema().root();
+    }
+
+    if (root_node->type() != avro::Type::AVRO_RECORD)
+        throw Exception("Root schema must be a record", ErrorCodes::TYPE_MISMATCH);
+
+    NamesAndTypesList names_and_types;
+    for (size_t i = 0; i != root_node->leaves(); ++i)
+        names_and_types.emplace_back(root_node->nameAt(i), avroNodeToDataType(root_node->leafAt(i)));
+
+    return names_and_types;
+}
+
+DataTypePtr AvroSchemaReader::avroNodeToDataType(avro::NodePtr node)
+{
+    switch (node->type())
+    {
+        case avro::Type::AVRO_INT:
+            return {std::make_shared<DataTypeInt32>()};
+        case avro::Type::AVRO_LONG:
+            return std::make_shared<DataTypeInt64>();
+        case avro::Type::AVRO_BOOL:
+            return std::make_shared<DataTypeUInt8>();
+        case avro::Type::AVRO_FLOAT:
+            return std::make_shared<DataTypeFloat32>();
+        case avro::Type::AVRO_DOUBLE:
+            return std::make_shared<DataTypeFloat64>();
+        case avro::Type::AVRO_STRING:
+            return std::make_shared<DataTypeString>();
+        case avro::Type::AVRO_BYTES:
+            return std::make_shared<DataTypeFloat32>();
+        case avro::Type::AVRO_ENUM:
+        {
+            if (node->names() < 128)
+            {
+                EnumValues<Int8>::Values values;
+                for (size_t i = 0; i != node->names(); ++i)
+                    values.emplace_back(node->nameAt(i), i);
+                return std::make_shared<DataTypeEnum8>(std::move(values));
+            }
+            else if (node->names() < 32768)
+            {
+                EnumValues<Int16>::Values values;
+                for (size_t i = 0; i != node->names(); ++i)
+                    values.emplace_back(node->nameAt(i), i);
+                return std::make_shared<DataTypeEnum16>(std::move(values));
+            }
+
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "ClickHouse supports only 8 and 16-bit Enum.");
+        }
+        case avro::Type::AVRO_FIXED:
+            return std::make_shared<DataTypeFixedString>(node->fixedSize());
+        case avro::Type::AVRO_ARRAY:
+            return std::make_shared<DataTypeArray>(avroNodeToDataType(node->leafAt(0)));
+        case avro::Type::AVRO_NULL:
+            return std::make_shared<DataTypeNothing>();
+        case avro::Type::AVRO_UNION:
+            if (node->leaves() == 2 && (node->leafAt(0)->type() == avro::Type::AVRO_NULL || node->leafAt(1)->type() == avro::Type::AVRO_NULL))
+            {
+                size_t nested_leaf_index = node->leafAt(0)->type() == avro::Type::AVRO_NULL ? 1 : 0;
+                return makeNullable(avroNodeToDataType(node->leafAt(nested_leaf_index)));
+            }
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Avro type  UNION is not supported for inserting.");
+        case avro::Type::AVRO_SYMBOLIC:
+            return avroNodeToDataType(avro::resolveSymbol(node));
+        default:
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Avro column {} is not supported for inserting.");
+    }
+}
+
 void registerInputFormatAvro(FormatFactory & factory)
 {
     factory.registerInputFormat("Avro", [](
@@ -836,6 +922,21 @@ void registerInputFormatAvro(FormatFactory & factory)
     });
 }
 
+void registerAvroSchemaReader(FormatFactory & factory)
+{
+    factory.registerSchemaReader("Avro", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+    {
+           return std::make_shared<AvroSchemaReader>(buf, false, settings);
+    });
+
+    factory.registerSchemaReader("AvroConfluent", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+    {
+        return std::make_shared<AvroSchemaReader>(buf, true, settings);
+    });
+
+}
+
+
 }
 
 #else
@@ -846,6 +947,8 @@ class FormatFactory;
 void registerInputFormatAvro(FormatFactory &)
 {
 }
+
+void registerAvroSchemaReader(FormatFactory &) {}
 }
 
 #endif
diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.h b/src/Processors/Formats/Impl/AvroRowInputFormat.h
index 73237369e56..46e571d87ec 100644
--- a/src/Processors/Formats/Impl/AvroRowInputFormat.h
+++ b/src/Processors/Formats/Impl/AvroRowInputFormat.h
@@ -13,6 +13,7 @@
 #include <Formats/FormatSettings.h>
 #include <Formats/FormatSchemaInfo.h>
 #include <Processors/Formats/IRowInputFormat.h>
+#include <Processors/Formats/ISchemaReader.h>
 
 #include <avro/DataFile.hh>
 #include <avro/Decoder.hh>
@@ -160,6 +161,20 @@ private:
     FormatSettings format_settings;
 };
 
+class AvroSchemaReader : public ISchemaReader
+{
+public:
+    AvroSchemaReader(ReadBuffer & in_, bool confluent_, const FormatSettings & format_settings_);
+
+    NamesAndTypesList readSchema() override;
+
+private:
+    DataTypePtr avroNodeToDataType(avro::NodePtr node);
+
+    bool confluent;
+    const FormatSettings format_settings;
+};
+
 }
 
 #endif
diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp
index 0506c539c0f..b356967a544 100644
--- a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp
@@ -5,7 +5,6 @@
 #include <Formats/registerWithNamesAndTypes.h>
 #include <DataTypes/DataTypeFactory.h>
 
-
 namespace DB
 {
 
@@ -15,11 +14,23 @@ namespace ErrorCodes
 }
 
 BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, Block header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_)
-    : RowInputFormatWithNamesAndTypes(std::move(header), in_, std::move(params_), with_names_, with_types_, format_settings_)
+    : RowInputFormatWithNamesAndTypes(
+        std::move(header),
+        in_,
+        std::move(params_),
+        with_names_,
+        with_types_,
+        format_settings_,
+        std::make_unique<BinaryFormatReader>(in_, format_settings_))
 {
 }
 
-std::vector<String> BinaryRowInputFormat::readHeaderRow()
+
+BinaryFormatReader::BinaryFormatReader(ReadBuffer & in_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesReader(in_, format_settings_)
+{
+}
+
+std::vector<String> BinaryFormatReader::readHeaderRow()
 {
     std::vector<String> fields;
     String field;
@@ -31,13 +42,13 @@ std::vector<String> BinaryRowInputFormat::readHeaderRow()
     return fields;
 }
 
-std::vector<String> BinaryRowInputFormat::readNames()
+std::vector<String> BinaryFormatReader::readNames()
 {
     readVarUInt(read_columns, *in);
     return readHeaderRow();
 }
 
-std::vector<String> BinaryRowInputFormat::readTypes()
+std::vector<String> BinaryFormatReader::readTypes()
 {
     auto types = readHeaderRow();
     for (const auto & type_name : types)
@@ -45,31 +56,37 @@ std::vector<String> BinaryRowInputFormat::readTypes()
     return types;
 }
 
-bool BinaryRowInputFormat::readField(IColumn & column, const DataTypePtr & /*type*/, const SerializationPtr & serialization, bool /*is_last_file_column*/, const String & /*column_name*/)
+bool BinaryFormatReader::readField(IColumn & column, const DataTypePtr & /*type*/, const SerializationPtr & serialization, bool /*is_last_file_column*/, const String & /*column_name*/)
 {
     serialization->deserializeBinary(column, *in);
     return true;
 }
 
-void BinaryRowInputFormat::skipHeaderRow()
+void BinaryFormatReader::skipHeaderRow()
 {
     String tmp;
     for (size_t i = 0; i < read_columns; ++i)
         readStringBinary(tmp, *in);
 }
 
-void BinaryRowInputFormat::skipNames()
+void BinaryFormatReader::skipNames()
 {
     readVarUInt(read_columns, *in);
     skipHeaderRow();
 }
 
-void BinaryRowInputFormat::skipTypes()
+void BinaryFormatReader::skipTypes()
 {
+    if (read_columns == 0)
+    {
+        /// It's possible only when with_names = false and with_types = true
+        readVarUInt(read_columns, *in);
+    }
+
     skipHeaderRow();
 }
 
-void BinaryRowInputFormat::skipField(size_t file_column)
+void BinaryFormatReader::skipField(size_t file_column)
 {
     if (file_column >= read_data_types.size())
         throw Exception(ErrorCodes::CANNOT_SKIP_UNKNOWN_FIELD, "Cannot skip unknown field in RowBinaryWithNames format, because it's type is unknown");
@@ -77,6 +94,11 @@ void BinaryRowInputFormat::skipField(size_t file_column)
     read_data_types[file_column]->getDefaultSerialization()->deserializeBinary(field, *in);
 }
 
+BinaryWithNamesAndTypesSchemaReader::BinaryWithNamesAndTypesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
+    : FormatWithNamesAndTypesSchemaReader(in_, 0, true, true, &reader), reader(in_, format_settings_)
+{
+}
+
 void registerInputFormatRowBinary(FormatFactory & factory)
 {
     auto register_func = [&](const String & format_name, bool with_names, bool with_types)
@@ -94,4 +116,13 @@ void registerInputFormatRowBinary(FormatFactory & factory)
     registerWithNamesAndTypes("RowBinary", register_func);
 }
 
+void registerRowBinaryWithNamesAndTypesSchemaReader(FormatFactory & factory)
+{
+    factory.registerSchemaReader("RowBinaryWithNamesAndTypes", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+    {
+        return std::make_shared<BinaryWithNamesAndTypesSchemaReader>(buf, settings);
+    });
+}
+
+
 }
diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.h b/src/Processors/Formats/Impl/BinaryRowInputFormat.h
index 61d6df77522..d98e75bf621 100644
--- a/src/Processors/Formats/Impl/BinaryRowInputFormat.h
+++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.h
@@ -1,15 +1,19 @@
 #pragma once
 
 #include <Core/Block.h>
-#include <Processors/Formats/IRowInputFormat.h>
 #include <Processors/Formats/RowInputFormatWithNamesAndTypes.h>
+#include <Processors/Formats/ISchemaReader.h>
 
 
 namespace DB
 {
 
-class ReadBuffer;
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
 
+class ReadBuffer;
 
 /** A stream for inputting data in a binary line-by-line format.
   */
@@ -24,9 +28,15 @@ public:
     /// in this format we cannot provide any DiagnosticInfo, because here we have
     /// just binary data.
     std::string getDiagnosticInfo() override { return {}; }
+};
+
+class BinaryFormatReader : public FormatWithNamesAndTypesReader
+{
+public:
+    BinaryFormatReader(ReadBuffer & in_, const FormatSettings & format_settings_);
 
-private:
     bool readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column, const String & column_name) override;
+
     void skipField(size_t file_column) override;
 
     void skipNames() override;
@@ -37,9 +47,24 @@ private:
     std::vector<String> readTypes() override;
     std::vector<String> readHeaderRow();
 
+private:
     /// Data types read from input data.
     DataTypes read_data_types;
-    UInt64 read_columns = 0;
+    UInt64 read_columns;
+};
+
+class BinaryWithNamesAndTypesSchemaReader : public FormatWithNamesAndTypesSchemaReader
+{
+public:
+    BinaryWithNamesAndTypesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_);
+
+private:
+    DataTypes readRowAndGetDataTypes() override
+    {
+        throw Exception{ErrorCodes::NOT_IMPLEMENTED, "Method readRowAndGetDataTypes is not implemented"};
+    }
+
+    BinaryFormatReader reader;
 };
 
 }
diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
index 9de2b908b1e..735a549d0a6 100644
--- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
@@ -5,13 +5,16 @@
 #include <Formats/verbosePrintString.h>
 #include <Formats/registerWithNamesAndTypes.h>
 #include <Formats/FormatFactory.h>
+#include <Formats/ReadSchemaUtils.h>
+#include <Formats/EscapingRuleUtils.h>
 #include <Processors/Formats/Impl/CSVRowInputFormat.h>
 #include <DataTypes/Serializations/SerializationNullable.h>
-#include <DataTypes/DataTypeNothing.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeString.h>
+
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int BAD_ARGUMENTS;
@@ -26,7 +29,14 @@ CSVRowInputFormat::CSVRowInputFormat(
     bool with_names_,
     bool with_types_,
     const FormatSettings & format_settings_)
-    : RowInputFormatWithNamesAndTypes(header_, in_, params_, with_names_, with_types_, format_settings_)
+    : RowInputFormatWithNamesAndTypes(
+        header_,
+        in_,
+        params_,
+        with_names_,
+        with_types_,
+        format_settings_,
+        std::make_unique<CSVFormatReader>(in_, format_settings_))
 {
     const String bad_delimiters = " \t\"'.UL";
     if (bad_delimiters.find(format_settings.csv.delimiter) != String::npos)
@@ -36,6 +46,11 @@ CSVRowInputFormat::CSVRowInputFormat(
             ErrorCodes::BAD_ARGUMENTS);
 }
 
+void CSVRowInputFormat::syncAfterError()
+{
+    skipToNextLineOrEOF(*in);
+}
+
 static void skipEndOfLine(ReadBuffer & in)
 {
     /// \n (Unix) or \r\n (DOS/Windows) or \n\r (Mac OS Classic)
@@ -52,8 +67,10 @@ static void skipEndOfLine(ReadBuffer & in)
         if (!in.eof() && *in.position() == '\n')
             ++in.position();
         else
-            throw Exception("Cannot parse CSV format: found \\r (CR) not followed by \\n (LF)."
-                " Line must end by \\n (LF) or \\r\\n (CR LF) or \\n\\r.", ErrorCodes::INCORRECT_DATA);
+            throw Exception(
+                "Cannot parse CSV format: found \\r (CR) not followed by \\n (LF)."
+                " Line must end by \\n (LF) or \\r\\n (CR LF) or \\n\\r.",
+                ErrorCodes::INCORRECT_DATA);
     }
     else if (!in.eof())
         throw Exception("Expected end of line", ErrorCodes::INCORRECT_DATA);
@@ -62,32 +79,38 @@ static void skipEndOfLine(ReadBuffer & in)
 /// Skip `whitespace` symbols allowed in CSV.
 static inline void skipWhitespacesAndTabs(ReadBuffer & in)
 {
-    while (!in.eof()
-            && (*in.position() == ' '
-                || *in.position() == '\t'))
+    while (!in.eof() && (*in.position() == ' ' || *in.position() == '\t'))
         ++in.position();
 }
 
-void CSVRowInputFormat::skipFieldDelimiter()
+CSVFormatReader::CSVFormatReader(ReadBuffer & in_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesReader(in_, format_settings_)
+{
+}
+
+void CSVFormatReader::skipFieldDelimiter()
 {
     skipWhitespacesAndTabs(*in);
     assertChar(format_settings.csv.delimiter, *in);
 }
 
-String CSVRowInputFormat::readFieldIntoString()
+template <bool read_string>
+String CSVFormatReader::readCSVFieldIntoString()
 {
     skipWhitespacesAndTabs(*in);
     String field;
-    readCSVString(field, *in, format_settings.csv);
+    if constexpr (read_string)
+        readCSVString(field, *in, format_settings.csv);
+    else
+        readCSVField(field, *in, format_settings.csv);
     return field;
 }
 
-void CSVRowInputFormat::skipField()
+void CSVFormatReader::skipField()
 {
-    readFieldIntoString();
+    readCSVFieldIntoString<true>();
 }
 
-void CSVRowInputFormat::skipRowEndDelimiter()
+void CSVFormatReader::skipRowEndDelimiter()
 {
     skipWhitespacesAndTabs(*in);
 
@@ -105,33 +128,32 @@ void CSVRowInputFormat::skipRowEndDelimiter()
     skipEndOfLine(*in);
 }
 
-void CSVRowInputFormat::skipHeaderRow()
+void CSVFormatReader::skipHeaderRow()
 {
     do
     {
         skipField();
         skipWhitespacesAndTabs(*in);
-    }
-    while (checkChar(format_settings.csv.delimiter, *in));
+    } while (checkChar(format_settings.csv.delimiter, *in));
 
     skipRowEndDelimiter();
 }
 
-std::vector<String> CSVRowInputFormat::readHeaderRow()
+template <bool is_header>
+std::vector<String> CSVFormatReader::readRowImpl()
 {
     std::vector<String> fields;
     do
     {
-        fields.push_back(readFieldIntoString());
+        fields.push_back(readCSVFieldIntoString<is_header>());
         skipWhitespacesAndTabs(*in);
-    }
-    while (checkChar(format_settings.csv.delimiter, *in));
+    } while (checkChar(format_settings.csv.delimiter, *in));
 
     skipRowEndDelimiter();
     return fields;
 }
 
-bool CSVRowInputFormat::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out)
+bool CSVFormatReader::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out)
 {
     const char delimiter = format_settings.csv.delimiter;
 
@@ -144,7 +166,8 @@ bool CSVRowInputFormat::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out)
     {
         if (*in->position() == '\n' || *in->position() == '\r')
         {
-            out << "ERROR: Line feed found where delimiter (" << delimiter << ") is expected."
+            out << "ERROR: Line feed found where delimiter (" << delimiter
+                << ") is expected."
                    " It's like your file has less columns than expected.\n"
                    "And if your file has the right number of columns, maybe it has unescaped quotes in values.\n";
         }
@@ -160,7 +183,7 @@ bool CSVRowInputFormat::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out)
     return true;
 }
 
-bool CSVRowInputFormat::parseRowEndWithDiagnosticInfo(WriteBuffer & out)
+bool CSVFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out)
 {
     skipWhitespacesAndTabs(*in);
 
@@ -191,23 +214,21 @@ bool CSVRowInputFormat::parseRowEndWithDiagnosticInfo(WriteBuffer & out)
     return true;
 }
 
-void CSVRowInputFormat::syncAfterError()
-{
-    skipToNextLineOrEOF(*in);
-}
-
-bool CSVRowInputFormat::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column, const String & /*column_name*/)
+bool CSVFormatReader::readField(
+    IColumn & column,
+    const DataTypePtr & type,
+    const SerializationPtr & serialization,
+    bool is_last_file_column,
+    const String & /*column_name*/)
 {
     skipWhitespacesAndTabs(*in);
 
     const bool at_delimiter = !in->eof() && *in->position() == format_settings.csv.delimiter;
-    const bool at_last_column_line_end = is_last_file_column
-                                         && (in->eof() || *in->position() == '\n' || *in->position() == '\r');
+    const bool at_last_column_line_end = is_last_file_column && (in->eof() || *in->position() == '\n' || *in->position() == '\r');
 
     /// Note: Tuples are serialized in CSV as separate columns, but with empty_as_default or null_as_default
     /// only one empty or NULL column will be expected
-    if (format_settings.csv.empty_as_default
-        && (at_delimiter || at_last_column_line_end))
+    if (format_settings.csv.empty_as_default && (at_delimiter || at_last_column_line_end))
     {
         /// Treat empty unquoted column value as default value, if
         /// specified in the settings. Tuple columns might seem
@@ -231,6 +252,31 @@ bool CSVRowInputFormat::readField(IColumn & column, const DataTypePtr & type, co
     }
 }
 
+
+CSVSchemaReader::CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_setting_, ContextPtr context_)
+    : FormatWithNamesAndTypesSchemaReader(
+        in_,
+        format_setting_.max_rows_to_read_for_schema_inference,
+        with_names_,
+        with_types_,
+        &reader,
+        getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::CSV))
+    , reader(in_, format_setting_)
+    , context(context_)
+{
+}
+
+
+DataTypes CSVSchemaReader::readRowAndGetDataTypes()
+{
+    if (in.eof())
+        return {};
+
+    auto fields = reader.readRow();
+    return determineDataTypesByEscapingRule(fields, reader.getFormatSettings(), FormatSettings::EscapingRule::CSV, context);
+}
+
+
 void registerInputFormatCSV(FormatFactory & factory)
 {
     auto register_func = [&](const String & format_name, bool with_names, bool with_types)
@@ -326,4 +372,17 @@ void registerFileSegmentationEngineCSV(FormatFactory & factory)
     registerWithNamesAndTypes("CSV", register_func);
 }
 
+void registerCSVSchemaReader(FormatFactory & factory)
+{
+    auto register_func = [&](const String & format_name, bool with_names, bool with_types)
+    {
+        factory.registerSchemaReader(format_name, [with_names, with_types](ReadBuffer & buf, const FormatSettings & settings, ContextPtr context)
+        {
+            return std::make_shared<CSVSchemaReader>(buf, with_names, with_types, settings, context);
+        });
+    };
+
+    registerWithNamesAndTypes("CSV", register_func);
+}
+
 }
diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h
index d7c557b58d8..d723647595e 100644
--- a/src/Processors/Formats/Impl/CSVRowInputFormat.h
+++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h
@@ -5,6 +5,7 @@
 
 #include <Core/Block.h>
 #include <Processors/Formats/RowInputFormatWithNamesAndTypes.h>
+#include <Processors/Formats/ISchemaReader.h>
 #include <Formats/FormatSettings.h>
 
 
@@ -28,6 +29,12 @@ public:
 private:
     bool allowSyncAfterError() const override { return true; }
     void syncAfterError() override;
+};
+
+class CSVFormatReader : public FormatWithNamesAndTypesReader
+{
+public:
+    CSVFormatReader(ReadBuffer & in_, const FormatSettings & format_settings_);
 
     bool parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) override;
     bool parseRowEndWithDiagnosticInfo(WriteBuffer & out) override;
@@ -42,17 +49,34 @@ private:
     void skipField(size_t /*file_column*/) override { skipField(); }
     void skipField();
 
-    void skipHeaderRow() ;
+    void skipHeaderRow();
     void skipNames() override { skipHeaderRow(); }
     void skipTypes() override { skipHeaderRow(); }
     void skipFieldDelimiter() override;
     void skipRowEndDelimiter() override;
 
-    std::vector<String> readHeaderRow();
     std::vector<String> readNames() override { return readHeaderRow(); }
     std::vector<String> readTypes() override { return readHeaderRow(); }
+    std::vector<String> readHeaderRow() { return readRowImpl<true>(); }
+    std::vector<String> readRow() { return readRowImpl<false>(); }
 
-    String readFieldIntoString();
+    template <bool is_header>
+    std::vector<String> readRowImpl();
+
+    template <bool read_string>
+    String readCSVFieldIntoString();
+};
+
+class CSVSchemaReader : public FormatWithNamesAndTypesSchemaReader
+{
+public:
+    CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_setting_, ContextPtr context_);
+
+private:
+    DataTypes readRowAndGetDataTypes() override;
+
+    CSVFormatReader reader;
+    ContextPtr context;
 };
 
 }
diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp
index 4d000bb1f35..311f4742335 100644
--- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp
@@ -273,6 +273,7 @@ bool CapnProtoRowInputFormat::readRow(MutableColumns & columns, RowReadExtension
 #endif
 
     auto root_reader = msg.getRoot<capnp::DynamicStruct>(root);
+
     for (size_t i = 0; i != columns.size(); ++i)
     {
         auto value = getReaderByColumnName(root_reader, column_names[i]);
@@ -282,6 +283,24 @@ bool CapnProtoRowInputFormat::readRow(MutableColumns & columns, RowReadExtension
     return true;
 }
 
+CapnProtoSchemaReader::CapnProtoSchemaReader(const FormatSettings & format_settings_) : format_settings(format_settings_)
+{
+}
+
+NamesAndTypesList CapnProtoSchemaReader::readSchema()
+{
+    auto schema_info = FormatSchemaInfo(
+        format_settings.schema.format_schema,
+        "CapnProto",
+        true,
+        format_settings.schema.is_server,
+        format_settings.schema.format_schema_path);
+
+    auto schema_parser = CapnProtoSchemaParser();
+    auto schema = schema_parser.getMessageSchema(schema_info);
+    return capnProtoSchemaToCHSchema(schema);
+}
+
 void registerInputFormatCapnProto(FormatFactory & factory)
 {
     factory.registerInputFormat(
@@ -293,6 +312,14 @@ void registerInputFormatCapnProto(FormatFactory & factory)
         });
 }
 
+void registerCapnProtoSchemaReader(FormatFactory & factory)
+{
+    factory.registerExternalSchemaReader("CapnProto", [](const FormatSettings & settings)
+    {
+       return std::make_shared<CapnProtoSchemaReader>(settings);
+    });
+}
+
 }
 
 #else
@@ -301,6 +328,7 @@ namespace DB
 {
     class FormatFactory;
     void registerInputFormatCapnProto(FormatFactory &) {}
+    void registerCapnProtoSchemaReader(FormatFactory &) {}
 }
 
 #endif // USE_CAPNP
diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h
index 4c0f34d70a3..053de14d1a4 100644
--- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h
+++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h
@@ -6,6 +6,7 @@
 #include <Core/Block.h>
 #include <Formats/CapnProtoUtils.h>
 #include <Processors/Formats/IRowInputFormat.h>
+#include <Processors/Formats/ISchemaReader.h>
 
 namespace DB
 {
@@ -38,6 +39,17 @@ private:
     Names column_names;
 };
 
+class CapnProtoSchemaReader : public IExternalSchemaReader
+{
+public:
+    explicit CapnProtoSchemaReader(const FormatSettings & format_settings_);
+
+    NamesAndTypesList readSchema() override;
+
+private:
+    const FormatSettings format_settings;
+};
+
 }
 
 #endif // USE_CAPNP
diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp
index 56aa4345777..d2e0d6e21a9 100644
--- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp
@@ -31,7 +31,7 @@ CustomSeparatedRowInputFormat::CustomSeparatedRowInputFormat(
     bool ignore_spaces_,
     const FormatSettings & format_settings_)
     : CustomSeparatedRowInputFormat(
-        header_, std::make_unique<PeekableReadBuffer>(in_buf_), params_, with_names_, with_types_, ignore_spaces_, format_settings_)
+        header_, std::make_unique<PeekableReadBuffer>(in_buf_), params_, with_names_, with_types_, ignore_spaces_, updateFormatSettings(format_settings_))
 {
 }
 
@@ -43,10 +43,15 @@ CustomSeparatedRowInputFormat::CustomSeparatedRowInputFormat(
     bool with_types_,
     bool ignore_spaces_,
     const FormatSettings & format_settings_)
-    : RowInputFormatWithNamesAndTypes(header_, *buf_, params_, with_names_, with_types_, updateFormatSettings(format_settings_))
+    : RowInputFormatWithNamesAndTypes(
+        header_,
+        *buf_,
+        params_,
+        with_names_,
+        with_types_,
+        format_settings_,
+        std::make_unique<CustomSeparatedFormatReader>(*buf_, ignore_spaces_, format_settings_))
     , buf(std::move(buf_))
-    , ignore_spaces(ignore_spaces_)
-    , escaping_rule(format_settings_.custom.escaping_rule)
 {
     /// In case of CustomSeparatedWithNames(AndTypes) formats and enabled setting input_format_with_names_use_header we don't know
     /// the exact number of columns in data (because it can contain unknown columns). So, if field_delimiter and row_after_delimiter are
@@ -61,43 +66,76 @@ CustomSeparatedRowInputFormat::CustomSeparatedRowInputFormat(
     }
 }
 
-void CustomSeparatedRowInputFormat::skipPrefixBeforeHeader()
+
+bool CustomSeparatedRowInputFormat::allowSyncAfterError() const
+{
+    return !format_settings.custom.row_after_delimiter.empty() || !format_settings.custom.row_between_delimiter.empty();
+}
+
+void CustomSeparatedRowInputFormat::syncAfterError()
+{
+    skipToNextRowOrEof(*buf, format_settings.custom.row_after_delimiter, format_settings.custom.row_between_delimiter, ignore_spaces);
+    end_of_stream = buf->eof();
+    /// It can happen that buf->position() is not at the beginning of row
+    /// if some delimiters is similar to row_format.delimiters.back() and row_between_delimiter.
+    /// It will cause another parsing error.
+}
+
+void CustomSeparatedRowInputFormat::setReadBuffer(ReadBuffer & in_)
+{
+    buf = std::make_unique<PeekableReadBuffer>(in_);
+    RowInputFormatWithNamesAndTypes::setReadBuffer(*buf);
+}
+
+CustomSeparatedFormatReader::CustomSeparatedFormatReader(
+    PeekableReadBuffer & buf_, bool ignore_spaces_, const FormatSettings & format_settings_)
+    : FormatWithNamesAndTypesReader(buf_, format_settings_), buf(&buf_), ignore_spaces(ignore_spaces_)
+{
+}
+
+void CustomSeparatedRowInputFormat::resetParser()
+{
+    RowInputFormatWithNamesAndTypes::resetParser();
+    buf->reset();
+}
+
+void CustomSeparatedFormatReader::skipPrefixBeforeHeader()
 {
     skipSpaces();
     assertString(format_settings.custom.result_before_delimiter, *buf);
 }
 
-void CustomSeparatedRowInputFormat::skipRowStartDelimiter()
+void CustomSeparatedFormatReader::skipRowStartDelimiter()
 {
     skipSpaces();
     assertString(format_settings.custom.row_before_delimiter, *buf);
 }
 
-void CustomSeparatedRowInputFormat::skipFieldDelimiter()
+void CustomSeparatedFormatReader::skipFieldDelimiter()
 {
     skipSpaces();
     assertString(format_settings.custom.field_delimiter, *buf);
 }
 
-void CustomSeparatedRowInputFormat::skipRowEndDelimiter()
+void CustomSeparatedFormatReader::skipRowEndDelimiter()
 {
     skipSpaces();
     assertString(format_settings.custom.row_after_delimiter, *buf);
 }
 
-void CustomSeparatedRowInputFormat::skipRowBetweenDelimiter()
+void CustomSeparatedFormatReader::skipRowBetweenDelimiter()
 {
     skipSpaces();
     assertString(format_settings.custom.row_between_delimiter, *buf);
 }
 
-void CustomSeparatedRowInputFormat::skipField()
+void CustomSeparatedFormatReader::skipField()
 {
     skipSpaces();
-    skipFieldByEscapingRule(*buf, escaping_rule, format_settings);
+    skipFieldByEscapingRule(*buf, format_settings.custom.escaping_rule, format_settings);
 }
 
-bool CustomSeparatedRowInputFormat::checkEndOfRow()
+bool CustomSeparatedFormatReader::checkEndOfRow()
 {
     PeekableReadBufferCheckpoint checkpoint{*buf, true};
 
@@ -118,43 +156,66 @@ bool CustomSeparatedRowInputFormat::checkEndOfRow()
     return checkForSuffixImpl(true);
 }
 
-std::vector<String> CustomSeparatedRowInputFormat::readHeaderRow()
+template <bool is_header>
+String CustomSeparatedFormatReader::readFieldIntoString(bool is_first)
+{
+    if (!is_first)
+        skipFieldDelimiter();
+    skipSpaces();
+    if constexpr (is_header)
+        return readStringByEscapingRule(*buf, format_settings.custom.escaping_rule, format_settings);
+    else
+        return readFieldByEscapingRule(*buf, format_settings.custom.escaping_rule, format_settings);
+}
+
+template <bool is_header>
+std::vector<String> CustomSeparatedFormatReader::readRowImpl()
 {
     std::vector<String> values;
     skipRowStartDelimiter();
-    do
+
+    if (columns == 0)
     {
-        if (!values.empty())
-            skipFieldDelimiter();
-        skipSpaces();
-        values.push_back(readStringByEscapingRule(*buf, escaping_rule, format_settings));
+        do
+        {
+            values.push_back(readFieldIntoString<is_header>(values.empty()));
+        } while (!checkEndOfRow());
+        columns = values.size();
+    }
+    else
+    {
+        for (size_t i = 0; i != columns; ++i)
+            values.push_back(readFieldIntoString<is_header>(i == 0));
     }
-    while (!checkEndOfRow());
 
     skipRowEndDelimiter();
     return values;
 }
 
-void CustomSeparatedRowInputFormat::skipHeaderRow()
+void CustomSeparatedFormatReader::skipHeaderRow()
 {
-    size_t columns = getPort().getHeader().columns();
     skipRowStartDelimiter();
-    for (size_t i = 0; i != columns; ++i)
+    bool first = true;
+    do
     {
-        skipField();
-        if (i + 1 != columns)
+        if (!first)
             skipFieldDelimiter();
+        first = false;
+
+        skipField();
     }
+    while (!checkEndOfRow());
+
     skipRowEndDelimiter();
 }
 
-bool CustomSeparatedRowInputFormat::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool, const String &)
+bool CustomSeparatedFormatReader::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool, const String &)
 {
     skipSpaces();
-    return deserializeFieldByEscapingRule(type, serialization, column, *buf, escaping_rule, format_settings);
+    return deserializeFieldByEscapingRule(type, serialization, column, *buf, format_settings.custom.escaping_rule, format_settings);
 }
 
-bool CustomSeparatedRowInputFormat::checkForSuffixImpl(bool check_eof)
+bool CustomSeparatedFormatReader::checkForSuffixImpl(bool check_eof)
 {
     skipSpaces();
     if (format_settings.custom.result_after_delimiter.empty())
@@ -177,7 +238,7 @@ bool CustomSeparatedRowInputFormat::checkForSuffixImpl(bool check_eof)
     return false;
 }
 
-bool CustomSeparatedRowInputFormat::tryParseSuffixWithDiagnosticInfo(WriteBuffer & out)
+bool CustomSeparatedFormatReader::tryParseSuffixWithDiagnosticInfo(WriteBuffer & out)
 {
     PeekableReadBufferCheckpoint checkpoint{*buf};
     if (checkForSuffixImpl(false))
@@ -192,7 +253,7 @@ bool CustomSeparatedRowInputFormat::tryParseSuffixWithDiagnosticInfo(WriteBuffer
     return true;
 }
 
-bool CustomSeparatedRowInputFormat::checkForSuffix()
+bool CustomSeparatedFormatReader::checkForSuffix()
 {
     PeekableReadBufferCheckpoint checkpoint{*buf};
     if (checkForSuffixImpl(true))
@@ -201,51 +262,60 @@ bool CustomSeparatedRowInputFormat::checkForSuffix()
     return false;
 }
 
-
-bool CustomSeparatedRowInputFormat::allowSyncAfterError() const
-{
-    return !format_settings.custom.row_after_delimiter.empty() || !format_settings.custom.row_between_delimiter.empty();
-}
-
-void CustomSeparatedRowInputFormat::syncAfterError()
-{
-    skipToNextRowOrEof(*buf, format_settings.custom.row_after_delimiter, format_settings.custom.row_between_delimiter, ignore_spaces);
-    end_of_stream = buf->eof();
-    /// It can happen that buf->position() is not at the beginning of row
-    /// if some delimiters is similar to row_format.delimiters.back() and row_between_delimiter.
-    /// It will cause another parsing error.
-}
-
-bool CustomSeparatedRowInputFormat::parseRowStartWithDiagnosticInfo(WriteBuffer & out)
+bool CustomSeparatedFormatReader::parseRowStartWithDiagnosticInfo(WriteBuffer & out)
 {
     return parseDelimiterWithDiagnosticInfo(out, *buf, format_settings.custom.row_before_delimiter, "delimiter before first field", ignore_spaces);
 }
 
-bool CustomSeparatedRowInputFormat::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out)
+bool CustomSeparatedFormatReader::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out)
 {
     return parseDelimiterWithDiagnosticInfo(out, *buf, format_settings.custom.field_delimiter, "delimiter between fields", ignore_spaces);
 }
 
-bool CustomSeparatedRowInputFormat::parseRowEndWithDiagnosticInfo(WriteBuffer & out)
+bool CustomSeparatedFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out)
 {
     return parseDelimiterWithDiagnosticInfo(out, *buf, format_settings.custom.row_after_delimiter, "delimiter after last field", ignore_spaces);
 }
 
-bool CustomSeparatedRowInputFormat::parseRowBetweenDelimiterWithDiagnosticInfo(WriteBuffer & out)
+bool CustomSeparatedFormatReader::parseRowBetweenDelimiterWithDiagnosticInfo(WriteBuffer & out)
 {
     return parseDelimiterWithDiagnosticInfo(out, *buf, format_settings.custom.row_between_delimiter, "delimiter between rows", ignore_spaces);
 }
 
-void CustomSeparatedRowInputFormat::resetParser()
+void CustomSeparatedFormatReader::setReadBuffer(ReadBuffer & in_)
 {
-    RowInputFormatWithNamesAndTypes::resetParser();
-    buf->reset();
+    buf = assert_cast<PeekableReadBuffer *>(&in_);
+    FormatWithNamesAndTypesReader::setReadBuffer(in_);
 }
 
-void CustomSeparatedRowInputFormat::setReadBuffer(ReadBuffer & in_)
+CustomSeparatedSchemaReader::CustomSeparatedSchemaReader(
+    ReadBuffer & in_, bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_setting_, ContextPtr context_)
+    : FormatWithNamesAndTypesSchemaReader(
+        buf,
+        format_setting_.max_rows_to_read_for_schema_inference,
+        with_names_,
+        with_types_,
+        &reader,
+        getDefaultDataTypeForEscapingRule(format_setting_.custom.escaping_rule))
+    , buf(in_)
+    , reader(buf, ignore_spaces_, updateFormatSettings(format_setting_))
+    , context(context_)
 {
-    buf = std::make_unique<PeekableReadBuffer>(in_);
-    IInputFormat::setReadBuffer(*buf);
+}
+
+DataTypes CustomSeparatedSchemaReader::readRowAndGetDataTypes()
+{
+    if (reader.checkForSuffix())
+        return {};
+
+    if (!first_row || with_names || with_types)
+        reader.skipRowBetweenDelimiter();
+
+    if (first_row)
+        first_row = false;
+
+    auto fields = reader.readRow();
+    return determineDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule(), context);
 }
 
 void registerInputFormatCustomSeparated(FormatFactory & factory)
@@ -267,4 +337,20 @@ void registerInputFormatCustomSeparated(FormatFactory & factory)
     }
 }
 
+void registerCustomSeparatedSchemaReader(FormatFactory & factory)
+{
+    for (bool ignore_spaces : {false, true})
+    {
+        auto register_func = [&](const String & format_name, bool with_names, bool with_types)
+        {
+            factory.registerSchemaReader(format_name, [with_names, with_types, ignore_spaces](ReadBuffer & buf, const FormatSettings & settings, ContextPtr context)
+            {
+                return std::make_shared<CustomSeparatedSchemaReader>(buf, with_names, with_types, ignore_spaces, settings, context);
+            });
+        };
+
+        registerWithNamesAndTypes(ignore_spaces ? "CustomSeparatedIgnoreSpaces" : "CustomSeparated", register_func);
+    }
+}
+
 }
diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h
index 6b572ca1417..d38d5bf0da4 100644
--- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h
+++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h
@@ -19,7 +19,6 @@ public:
 
     void resetParser() override;
     String getName() const override { return "CustomSeparatedRowInputFormat"; }
-
     void setReadBuffer(ReadBuffer & in_) override;
 
 private:
@@ -28,6 +27,19 @@ private:
         std::unique_ptr<PeekableReadBuffer> in_buf_,
         const Params & params_,
         bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_settings_);
+
+    bool allowSyncAfterError() const override;
+    void syncAfterError() override;
+
+    std::unique_ptr<PeekableReadBuffer> buf;
+    bool ignore_spaces;
+};
+
+class CustomSeparatedFormatReader : public FormatWithNamesAndTypesReader
+{
+public:
+    CustomSeparatedFormatReader(PeekableReadBuffer & buf_, bool ignore_spaces_, const FormatSettings & format_settings_);
+
     using EscapingRule = FormatSettings::EscapingRule;
 
     bool readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column, const String & column_name) override;
@@ -46,9 +58,6 @@ private:
 
     bool checkForSuffix() override;
 
-    bool allowSyncAfterError() const override;
-    void syncAfterError() override;
-
     bool parseRowStartWithDiagnosticInfo(WriteBuffer & out) override;
     bool parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) override;
     bool parseRowEndWithDiagnosticInfo(WriteBuffer & out) override;
@@ -57,15 +66,41 @@ private:
 
     std::vector<String> readNames() override { return readHeaderRow(); }
     std::vector<String> readTypes() override { return readHeaderRow(); }
-    std::vector<String> readHeaderRow();
+    std::vector<String> readHeaderRow() {return readRowImpl<true>(); }
+
+    std::vector<String> readRow() { return readRowImpl<false>(); }
 
     bool checkEndOfRow();
     bool checkForSuffixImpl(bool check_eof);
     inline void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf); }
 
-    std::unique_ptr<PeekableReadBuffer> buf;
+    EscapingRule getEscapingRule() { return format_settings.custom.escaping_rule; }
+
+    void setReadBuffer(ReadBuffer & in_) override;
+private:
+    template <bool is_header>
+    std::vector<String> readRowImpl();
+
+    template <bool read_string>
+    String readFieldIntoString(bool is_first);
+
+    PeekableReadBuffer * buf;
     bool ignore_spaces;
-    EscapingRule escaping_rule;
+    size_t columns = 0;
+};
+
+class CustomSeparatedSchemaReader : public FormatWithNamesAndTypesSchemaReader
+{
+public:
+    CustomSeparatedSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_setting_, ContextPtr context_);
+
+private:
+    DataTypes readRowAndGetDataTypes() override;
+
+    PeekableReadBuffer buf;
+    CustomSeparatedFormatReader reader;
+    ContextPtr context;
+    bool first_row = true;
 };
 
 }
diff --git a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp
index 476985c2509..56ba975dea1 100644
--- a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp
@@ -202,4 +202,12 @@ void registerNonTrivialPrefixAndSuffixCheckerJSONAsString(FormatFactory & factor
     factory.registerNonTrivialPrefixAndSuffixChecker("JSONAsString", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl);
 }
 
+void registerJSONAsStringSchemaReader(FormatFactory & factory)
+{
+    factory.registerExternalSchemaReader("JSONAsString", [](const FormatSettings &)
+    {
+        return std::make_shared<JSONAsStringExternalSchemaReader>();
+    });
+}
+
 }
diff --git a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h
index d86142af795..ea6e9a1ed2f 100644
--- a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h
+++ b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h
@@ -1,8 +1,10 @@
 #pragma once
 
 #include <Processors/Formats/IRowInputFormat.h>
+#include <Processors/Formats/ISchemaReader.h>
 #include <Formats/FormatFactory.h>
 #include <IO/PeekableReadBuffer.h>
+#include <DataTypes/DataTypeString.h>
 
 namespace DB
 {
@@ -39,4 +41,13 @@ private:
     bool allow_new_rows = true;
 };
 
+class JSONAsStringExternalSchemaReader : public IExternalSchemaReader
+{
+public:
+    NamesAndTypesList readSchema() override
+    {
+        return {{"json", std::make_shared<DataTypeString>()}};
+    }
+};
+
 }
diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
index 88fb411ffbd..263702ad20f 100644
--- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
@@ -1,6 +1,7 @@
 #include <Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h>
 
 #include <IO/ReadHelpers.h>
+#include <IO/PeekableReadBuffer.h>
 #include <IO/Operators.h>
 #include <Formats/FormatFactory.h>
 #include <Formats/verbosePrintString.h>
@@ -8,16 +9,13 @@
 #include <Formats/registerWithNamesAndTypes.h>
 #include <DataTypes/NestedUtils.h>
 #include <DataTypes/Serializations/SerializationNullable.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <Poco/JSON/Parser.h>
 
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int INCORRECT_DATA;
-}
-
-
 JSONCompactEachRowRowInputFormat::JSONCompactEachRowRowInputFormat(
     const Block & header_,
     ReadBuffer & in_,
@@ -26,24 +24,40 @@ JSONCompactEachRowRowInputFormat::JSONCompactEachRowRowInputFormat(
     bool with_types_,
     bool yield_strings_,
     const FormatSettings & format_settings_)
-    : RowInputFormatWithNamesAndTypes(header_, in_, std::move(params_), with_names_, with_types_, format_settings_)
-    , yield_strings(yield_strings_)
+    : RowInputFormatWithNamesAndTypes(
+        header_,
+        in_,
+        std::move(params_),
+        with_names_,
+        with_types_,
+        format_settings_,
+        std::make_unique<JSONCompactEachRowFormatReader>(in_, yield_strings_, format_settings_))
 {
 }
 
-void JSONCompactEachRowRowInputFormat::skipRowStartDelimiter()
+void JSONCompactEachRowRowInputFormat::syncAfterError()
+{
+    skipToUnescapedNextLineOrEOF(*in);
+}
+
+JSONCompactEachRowFormatReader::JSONCompactEachRowFormatReader(ReadBuffer & in_, bool yield_strings_, const FormatSettings & format_settings_)
+    : FormatWithNamesAndTypesReader(in_, format_settings_), yield_strings(yield_strings_)
+{
+}
+
+void JSONCompactEachRowFormatReader::skipRowStartDelimiter()
 {
     skipWhitespaceIfAny(*in);
     assertChar('[', *in);
 }
 
-void JSONCompactEachRowRowInputFormat::skipFieldDelimiter()
+void JSONCompactEachRowFormatReader::skipFieldDelimiter()
 {
     skipWhitespaceIfAny(*in);
     assertChar(',', *in);
 }
 
-void JSONCompactEachRowRowInputFormat::skipRowEndDelimiter()
+void JSONCompactEachRowFormatReader::skipRowEndDelimiter()
 {
     skipWhitespaceIfAny(*in);
     assertChar(']', *in);
@@ -55,29 +69,18 @@ void JSONCompactEachRowRowInputFormat::skipRowEndDelimiter()
     skipWhitespaceIfAny(*in);
 }
 
-String JSONCompactEachRowRowInputFormat::readFieldIntoString()
+void JSONCompactEachRowFormatReader::skipField()
 {
     skipWhitespaceIfAny(*in);
-    String field;
-    readJSONString(field, *in);
-    return field;
+    skipJSONField(*in, "skipped_field");
 }
 
-void JSONCompactEachRowRowInputFormat::skipField(size_t file_column)
-{
-    skipWhitespaceIfAny(*in);
-    skipJSONField(*in, column_mapping->names_of_columns[file_column]);
-}
-
-void JSONCompactEachRowRowInputFormat::skipHeaderRow()
+void JSONCompactEachRowFormatReader::skipHeaderRow()
 {
     skipRowStartDelimiter();
-    size_t i = 0;
     do
     {
-        if (i >= column_mapping->names_of_columns.size())
-            throw Exception(ErrorCodes::INCORRECT_DATA, "The number of columns in a row differs from the number of column names");
-        skipField(i++);
+        skipField();
         skipWhitespaceIfAny(*in);
     }
     while (checkChar(',', *in));
@@ -85,13 +88,16 @@ void JSONCompactEachRowRowInputFormat::skipHeaderRow()
     skipRowEndDelimiter();
 }
 
-std::vector<String> JSONCompactEachRowRowInputFormat::readHeaderRow()
+std::vector<String> JSONCompactEachRowFormatReader::readHeaderRow()
 {
     skipRowStartDelimiter();
     std::vector<String> fields;
+    String field;
     do
     {
-        fields.push_back(readFieldIntoString());
+        skipWhitespaceIfAny(*in);
+        readJSONString(field, *in);
+        fields.push_back(field);
         skipWhitespaceIfAny(*in);
     }
     while (checkChar(',', *in));
@@ -100,18 +106,13 @@ std::vector<String> JSONCompactEachRowRowInputFormat::readHeaderRow()
     return fields;
 }
 
-bool JSONCompactEachRowRowInputFormat::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool /*is_last_file_column*/, const String & column_name)
+bool JSONCompactEachRowFormatReader::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool /*is_last_file_column*/, const String & column_name)
 {
     skipWhitespaceIfAny(*in);
     return readFieldImpl(*in, column, type, serialization, column_name, format_settings, yield_strings);
 }
 
-void JSONCompactEachRowRowInputFormat::syncAfterError()
-{
-    skipToUnescapedNextLineOrEOF(*in);
-}
-
-bool JSONCompactEachRowRowInputFormat::parseRowStartWithDiagnosticInfo(WriteBuffer & out)
+bool JSONCompactEachRowFormatReader::parseRowStartWithDiagnosticInfo(WriteBuffer & out)
 {
     skipWhitespaceIfAny(*in);
     if (!checkChar('[', *in))
@@ -123,7 +124,7 @@ bool JSONCompactEachRowRowInputFormat::parseRowStartWithDiagnosticInfo(WriteBuff
     return true;
 }
 
-bool JSONCompactEachRowRowInputFormat::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out)
+bool JSONCompactEachRowFormatReader::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out)
 {
     try
     {
@@ -150,7 +151,7 @@ bool JSONCompactEachRowRowInputFormat::parseFieldDelimiterWithDiagnosticInfo(Wri
     return true;
 }
 
-bool JSONCompactEachRowRowInputFormat::parseRowEndWithDiagnosticInfo(WriteBuffer & out)
+bool JSONCompactEachRowFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out)
 {
     skipWhitespaceIfAny(*in);
 
@@ -180,6 +181,20 @@ bool JSONCompactEachRowRowInputFormat::parseRowEndWithDiagnosticInfo(WriteBuffer
     return true;
 }
 
+JSONCompactEachRowRowSchemaReader::JSONCompactEachRowRowSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool yield_strings_, const FormatSettings & format_settings_)
+    : FormatWithNamesAndTypesSchemaReader(in_, format_settings_.max_rows_to_read_for_schema_inference, with_names_, with_types_, &reader), reader(in_, yield_strings_, format_settings_)
+{
+}
+
+DataTypes JSONCompactEachRowRowSchemaReader::readRowAndGetDataTypes()
+{
+    skipWhitespaceIfAny(in);
+    if (in.eof())
+        return {};
+
+    return readRowAndGetDataTypesForJSONCompactEachRow(in, reader.yieldStrings());
+}
+
 void registerInputFormatJSONCompactEachRow(FormatFactory & factory)
 {
     for (bool yield_strings : {true, false})
@@ -200,6 +215,21 @@ void registerInputFormatJSONCompactEachRow(FormatFactory & factory)
     }
 }
 
+void registerJSONCompactEachRowSchemaReader(FormatFactory & factory)
+{
+    for (bool json_strings : {false, true})
+    {
+        auto register_func = [&](const String & format_name, bool with_names, bool with_types)
+        {
+            factory.registerSchemaReader(format_name, [=](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+            {
+                return std::make_shared<JSONCompactEachRowRowSchemaReader>(buf, with_names, with_types, json_strings, settings);
+            });
+        };
+        registerWithNamesAndTypes(json_strings ? "JSONCompactStringsEachRow" : "JSONCompactEachRow", register_func);
+    }
+}
+
 void registerFileSegmentationEngineJSONCompactEachRow(FormatFactory & factory)
 {
     auto register_func = [&](const String & format_name, bool with_names, bool with_types)
diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h
index e01a4f49b30..0551aa8b64e 100644
--- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h
+++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h
@@ -2,6 +2,7 @@
 
 #include <Core/Block.h>
 #include <Processors/Formats/RowInputFormatWithNamesAndTypes.h>
+#include <Processors/Formats/ISchemaReader.h>
 #include <Formats/FormatSettings.h>
 #include <Common/HashTable/HashMap.h>
 
@@ -10,6 +11,7 @@ namespace DB
 
 class ReadBuffer;
 
+
 /** A stream for reading data in a bunch of formats:
  *  - JSONCompactEachRow
  *  - JSONCompactEachRowWithNamesAndTypes
@@ -34,6 +36,13 @@ public:
 private:
     bool allowSyncAfterError() const override { return true; }
     void syncAfterError() override;
+};
+
+class JSONCompactEachRowFormatReader : public FormatWithNamesAndTypesReader
+{
+public:
+    JSONCompactEachRowFormatReader(ReadBuffer & in_, bool yield_strings_, const FormatSettings & format_settings_);
+
 
     bool parseRowStartWithDiagnosticInfo(WriteBuffer & out) override;
     bool parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) override;
@@ -45,7 +54,8 @@ private:
 
     bool readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column, const String & column_name) override;
 
-    void skipField(size_t file_column) override;
+    void skipField(size_t /*column_index*/) override { skipField(); }
+    void skipField();
     void skipHeaderRow();
     void skipNames() override { skipHeaderRow(); }
     void skipTypes() override { skipHeaderRow(); }
@@ -56,9 +66,21 @@ private:
     std::vector<String> readHeaderRow();
     std::vector<String> readNames() override { return readHeaderRow(); }
     std::vector<String> readTypes() override { return readHeaderRow(); }
-    String readFieldIntoString();
 
+    bool yieldStrings() const { return yield_strings; }
+private:
     bool yield_strings;
 };
 
+class JSONCompactEachRowRowSchemaReader : public FormatWithNamesAndTypesSchemaReader
+{
+public:
+    JSONCompactEachRowRowSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool yield_strings_, const FormatSettings & format_settings_);
+
+private:
+    DataTypes readRowAndGetDataTypes() override;
+
+    JSONCompactEachRowFormatReader reader;
+};
+
 }
diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
index 28481313974..75beca955b9 100644
--- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
@@ -6,6 +6,7 @@
 #include <Formats/FormatFactory.h>
 #include <DataTypes/NestedUtils.h>
 #include <DataTypes/Serializations/SerializationNullable.h>
+#include <DataTypes/getLeastSupertype.h>
 
 namespace DB
 {
@@ -286,11 +287,7 @@ void JSONEachRowRowInputFormat::readPrefix()
     skipBOMIfExists(*in);
 
     skipWhitespaceIfAny(*in);
-    if (!in->eof() && *in->position() == '[')
-    {
-        ++in->position();
-        data_in_square_brackets = true;
-    }
+    data_in_square_brackets = checkChar('[', *in);
 }
 
 void JSONEachRowRowInputFormat::readSuffix()
@@ -309,6 +306,28 @@ void JSONEachRowRowInputFormat::readSuffix()
     assertEOF(*in);
 }
 
+JSONEachRowSchemaReader::JSONEachRowSchemaReader(ReadBuffer & in_, bool json_strings_, const FormatSettings & format_settings)
+    : IRowWithNamesSchemaReader(in_, format_settings.max_rows_to_read_for_schema_inference), json_strings(json_strings_)
+{
+}
+
+
+std::unordered_map<String, DataTypePtr> JSONEachRowSchemaReader::readRowAndGetNamesAndDataTypes()
+{
+    if (first_row)
+    {
+        skipBOMIfExists(in);
+        skipWhitespaceIfAny(in);
+        checkChar('[', in);
+        first_row = false;
+    }
+
+    skipWhitespaceIfAny(in);
+    if (in.eof())
+        return {};
+
+    return readRowAndGetNamesAndDataTypesForJSONEachRow(in, json_strings);
+}
 
 void registerInputFormatJSONEachRow(FormatFactory & factory)
 {
@@ -343,4 +362,17 @@ void registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(FormatFactory & factory
     factory.registerNonTrivialPrefixAndSuffixChecker("JSONStringsEachRow", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl);
 }
 
+void registerJSONEachRowSchemaReader(FormatFactory & factory)
+{
+    factory.registerSchemaReader("JSONEachRow", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+    {
+        return std::make_unique<JSONEachRowSchemaReader>(buf, false, settings);
+    });
+
+    factory.registerSchemaReader("JSONStringsEachRow", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+    {
+        return std::make_unique<JSONEachRowSchemaReader>(buf, true, settings);
+    });
+}
+
 }
diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h
index 9810f2dc765..323909a7730 100644
--- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h
+++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h
@@ -2,6 +2,7 @@
 
 #include <Core/Block.h>
 #include <Processors/Formats/IRowInputFormat.h>
+#include <Processors/Formats/ISchemaReader.h>
 #include <Formats/FormatSettings.h>
 #include <Common/HashTable/HashMap.h>
 
@@ -84,4 +85,16 @@ private:
     bool yield_strings;
 };
 
+class JSONEachRowSchemaReader : public IRowWithNamesSchemaReader
+{
+public:
+    JSONEachRowSchemaReader(ReadBuffer & in_, bool json_strings, const FormatSettings & format_settings);
+
+private:
+    std::unordered_map<String, DataTypePtr> readRowAndGetNamesAndDataTypes() override;
+
+    bool json_strings;
+    bool first_row = true;
+};
+
 }
diff --git a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp
index 1a05f61d36b..5983f3170e5 100644
--- a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp
@@ -72,4 +72,13 @@ void registerInputFormatLineAsString(FormatFactory & factory)
         return std::make_shared<LineAsStringRowInputFormat>(sample, buf, params);
     });
 }
+
+void registerLineAsStringSchemaReader(FormatFactory & factory)
+{
+    factory.registerExternalSchemaReader("LineAsString", [](
+            const FormatSettings &)
+    {
+        return std::make_shared<LinaAsStringSchemaReader>();
+    });
+}
 }
diff --git a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.h b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.h
index 1a6c6247558..c4c17c47dbe 100644
--- a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.h
+++ b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.h
@@ -1,7 +1,9 @@
 #pragma once
 
 #include <Processors/Formats/IRowInputFormat.h>
+#include <Processors/Formats/ISchemaReader.h>
 #include <Formats/FormatFactory.h>
+#include <DataTypes/DataTypeString.h>
 
 namespace DB
 {
@@ -26,4 +28,13 @@ private:
     void readLineObject(IColumn & column);
 };
 
+class LinaAsStringSchemaReader : public IExternalSchemaReader
+{
+public:
+    NamesAndTypesList readSchema() override
+    {
+        return {{"line", std::make_shared<DataTypeString>()}};
+    }
+};
+
 }
diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
index 60db32d879a..c56af536e15 100644
--- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
@@ -11,6 +11,7 @@
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeMap.h>
 #include <DataTypes/DataTypeLowCardinality.h>
+#include <DataTypes/DataTypeString.h>
 
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnNullable.h>
@@ -26,6 +27,8 @@ namespace ErrorCodes
 {
     extern const int ILLEGAL_COLUMN;
     extern const int INCORRECT_DATA;
+    extern const int BAD_ARGUMENTS;
+    extern const int UNEXPECTED_END_OF_FILE;
 }
 
 MsgPackRowInputFormat::MsgPackRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_)
@@ -369,7 +372,108 @@ bool MsgPackRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &
 void MsgPackRowInputFormat::setReadBuffer(ReadBuffer & in_)
 {
     buf = std::make_unique<PeekableReadBuffer>(in_);
-    IInputFormat::setReadBuffer(*buf);
+    IInputFormat::setReadBuffer(in_);
+}
+
+MsgPackSchemaReader::MsgPackSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
+    : IRowSchemaReader(buf, format_settings_.max_rows_to_read_for_schema_inference), buf(in_), number_of_columns(format_settings_.msgpack.number_of_columns)
+{
+    if (!number_of_columns)
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "You must specify setting input_format_msgpack_number_of_columns to extract table schema from MsgPack data");
+}
+
+
+msgpack::object_handle MsgPackSchemaReader::readObject()
+{
+    if (buf.eof())
+        throw Exception(ErrorCodes::UNEXPECTED_END_OF_FILE, "Unexpected eof while parsing msgpack object");
+
+    PeekableReadBufferCheckpoint checkpoint{buf};
+    size_t offset = 0;
+    bool need_more_data = true;
+    msgpack::object_handle object_handle;
+    while (need_more_data)
+    {
+        offset = 0;
+        try
+        {
+            object_handle = msgpack::unpack(buf.position(), buf.buffer().end() - buf.position(), offset);
+            need_more_data = false;
+        }
+        catch (msgpack::insufficient_bytes &)
+        {
+            buf.position() = buf.buffer().end();
+            if (buf.eof())
+                throw Exception("Unexpected end of file while parsing msgpack object", ErrorCodes::UNEXPECTED_END_OF_FILE);
+            buf.position() = buf.buffer().end();
+            buf.makeContinuousMemoryFromCheckpointToPos();
+            buf.rollbackToCheckpoint();
+        }
+    }
+    buf.position() += offset;
+    return object_handle;
+}
+
+DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object)
+{
+    switch (object.type)
+    {
+        case msgpack::type::object_type::POSITIVE_INTEGER: [[fallthrough]];
+        case msgpack::type::object_type::NEGATIVE_INTEGER:
+            return makeNullable(std::make_shared<DataTypeInt64>());
+        case msgpack::type::object_type::FLOAT32:
+            return makeNullable(std::make_shared<DataTypeFloat32>());
+        case msgpack::type::object_type::FLOAT64:
+            return makeNullable(std::make_shared<DataTypeFloat64>());
+        case msgpack::type::object_type::BOOLEAN:
+            return makeNullable(std::make_shared<DataTypeUInt8>());
+        case msgpack::type::object_type::BIN: [[fallthrough]];
+        case msgpack::type::object_type::STR:
+            return makeNullable(std::make_shared<DataTypeString>());
+        case msgpack::type::object_type::ARRAY:
+        {
+            msgpack::object_array object_array = object.via.array;
+            if (object_array.size)
+            {
+                auto nested_type = getDataType(object_array.ptr[0]);
+                if (nested_type)
+                    return std::make_shared<DataTypeArray>(getDataType(object_array.ptr[0]));
+            }
+            return nullptr;
+        }
+        case msgpack::type::object_type::MAP:
+        {
+            msgpack::object_map object_map = object.via.map;
+            if (object_map.size)
+            {
+                auto key_type = removeNullable(getDataType(object_map.ptr[0].key));
+                auto value_type = getDataType(object_map.ptr[0].val);
+                if (key_type && value_type)
+                    return std::make_shared<DataTypeMap>(key_type, value_type);
+            }
+            return nullptr;
+        }
+        case msgpack::type::object_type::NIL:
+            return nullptr;
+        default:
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack type is not supported");
+    }
+}
+
+DataTypes MsgPackSchemaReader::readRowAndGetDataTypes()
+{
+    if (buf.eof())
+        return {};
+
+    DataTypes data_types;
+    data_types.reserve(number_of_columns);
+    for (size_t i = 0; i != number_of_columns; ++i)
+    {
+        auto object_handle = readObject();
+        data_types.push_back(getDataType(object_handle.get()));
+    }
+
+    return data_types;
 }
 
 void registerInputFormatMsgPack(FormatFactory & factory)
@@ -384,6 +488,14 @@ void registerInputFormatMsgPack(FormatFactory & factory)
     });
 }
 
+void registerMsgPackSchemaReader(FormatFactory & factory)
+{
+    factory.registerSchemaReader("MsgPack", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+    {
+        return std::make_shared<MsgPackSchemaReader>(buf, settings);
+    });
+}
+
 }
 
 #else
@@ -394,6 +506,10 @@ class FormatFactory;
 void registerInputFormatMsgPack(FormatFactory &)
 {
 }
+
+void registerMsgPackSchemaReader(FormatFactory &)
+{
+}
 }
 
 #endif
diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h
index bb3887695eb..dd5655c80fc 100644
--- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h
+++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h
@@ -6,6 +6,7 @@
 #if USE_MSGPACK
 
 #include <Processors/Formats/IRowInputFormat.h>
+#include <Processors/Formats/ISchemaReader.h>
 #include <Formats/FormatFactory.h>
 #include <IO/PeekableReadBuffer.h>
 #include <msgpack.hpp>
@@ -76,6 +77,20 @@ private:
     const DataTypes data_types;
 };
 
+class MsgPackSchemaReader : public IRowSchemaReader
+{
+public:
+    MsgPackSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_);
+
+private:
+    msgpack::object_handle readObject();
+    DataTypePtr getDataType(const msgpack::object & object);
+    DataTypes readRowAndGetDataTypes() override;
+
+    PeekableReadBuffer buf;
+    UInt64 number_of_columns;
+};
+
 }
 
 #endif
diff --git a/src/Processors/Formats/Impl/NativeFormat.cpp b/src/Processors/Formats/Impl/NativeFormat.cpp
index 07cf4670981..19e2ede6b65 100644
--- a/src/Processors/Formats/Impl/NativeFormat.cpp
+++ b/src/Processors/Formats/Impl/NativeFormat.cpp
@@ -1,8 +1,10 @@
 #include <Formats/NativeReader.h>
 #include <Formats/NativeWriter.h>
+
 #include <Formats/FormatFactory.h>
 #include <Processors/Formats/IInputFormat.h>
 #include <Processors/Formats/IOutputFormat.h>
+#include <Processors/Formats/ISchemaReader.h>
 #include <Processors/Transforms/AggregatingTransform.h>
 
 
@@ -82,6 +84,20 @@ private:
     NativeWriter writer;
 };
 
+class NativeSchemaReader : public ISchemaReader
+{
+public:
+    explicit NativeSchemaReader(ReadBuffer & in_) : ISchemaReader(in_) {}
+
+    NamesAndTypesList readSchema() override
+    {
+        auto reader = NativeReader(in, 0);
+        auto block = reader.read();
+        return block.getNamesAndTypesList();
+    }
+};
+
+
 void registerInputFormatNative(FormatFactory & factory)
 {
     factory.registerInputFormat("Native", [](
@@ -106,4 +122,14 @@ void registerOutputFormatNative(FormatFactory & factory)
     });
 }
 
+
+void registerNativeSchemaReader(FormatFactory & factory)
+{
+    factory.registerSchemaReader("Native", [](ReadBuffer & buf, const FormatSettings &, ContextPtr)
+    {
+        return std::make_shared<NativeSchemaReader>(buf);
+    });
+}
+
+
 }
diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
index 87eec459aa3..9a787e5a614 100644
--- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
@@ -87,9 +87,14 @@ static size_t countIndicesForType(std::shared_ptr<arrow::DataType> type)
     return 1;
 }
 
-void ORCBlockInputFormat::prepareReader()
+static void getFileReaderAndSchema(
+    ReadBuffer & in,
+    std::unique_ptr<arrow::adapters::orc::ORCFileReader> & file_reader,
+    std::shared_ptr<arrow::Schema> & schema,
+    const FormatSettings & format_settings,
+    std::atomic<int> & is_stopped)
 {
-    auto arrow_file = asArrowFile(*in, format_settings, is_stopped);
+    auto arrow_file = asArrowFile(in, format_settings, is_stopped);
     if (is_stopped)
         return;
 
@@ -101,7 +106,15 @@ void ORCBlockInputFormat::prepareReader()
     auto read_schema_result = file_reader->ReadSchema();
     if (!read_schema_result.ok())
         throw Exception(read_schema_result.status().ToString(), ErrorCodes::BAD_ARGUMENTS);
-    std::shared_ptr<arrow::Schema> schema = std::move(read_schema_result).ValueOrDie();
+    schema = std::move(read_schema_result).ValueOrDie();
+}
+
+void ORCBlockInputFormat::prepareReader()
+{
+    std::shared_ptr<arrow::Schema> schema;
+    getFileReaderAndSchema(*in, file_reader, schema, format_settings, is_stopped);
+    if (is_stopped)
+        return;
 
     arrow_column_to_ch_column = std::make_unique<ArrowColumnToCHColumn>(getPort().getHeader(), "ORC", format_settings.orc.import_nested);
 
@@ -128,7 +141,21 @@ void ORCBlockInputFormat::prepareReader()
     }
 }
 
-void registerInputFormatORC(FormatFactory &factory)
+ORCSchemaReader::ORCSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) : ISchemaReader(in_), format_settings(format_settings_)
+{
+}
+
+NamesAndTypesList ORCSchemaReader::readSchema()
+{
+    std::unique_ptr<arrow::adapters::orc::ORCFileReader> file_reader;
+    std::shared_ptr<arrow::Schema> schema;
+    std::atomic<int> is_stopped = 0;
+    getFileReaderAndSchema(in, file_reader, schema, format_settings, is_stopped);
+    auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(*schema, "ORC");
+    return header.getNamesAndTypesList();
+}
+
+void registerInputFormatORC(FormatFactory & factory)
 {
     factory.registerInputFormat(
             "ORC",
@@ -142,6 +169,17 @@ void registerInputFormatORC(FormatFactory &factory)
     factory.markFormatAsColumnOriented("ORC");
 }
 
+void registerORCSchemaReader(FormatFactory & factory)
+{
+    factory.registerSchemaReader(
+        "ORC",
+        [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+        {
+            return std::make_shared<ORCSchemaReader>(buf, settings);
+        }
+        );
+}
+
 }
 #else
 
@@ -151,6 +189,10 @@ namespace DB
     void registerInputFormatORC(FormatFactory &)
     {
     }
+
+    void registerORCSchemaReader(FormatFactory &)
+    {
+    }
 }
 
 #endif
diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.h b/src/Processors/Formats/Impl/ORCBlockInputFormat.h
index c7dc1c4a710..9b55747f552 100644
--- a/src/Processors/Formats/Impl/ORCBlockInputFormat.h
+++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.h
@@ -3,6 +3,7 @@
 #if USE_ORC
 
 #include <Processors/Formats/IInputFormat.h>
+#include <Processors/Formats/ISchemaReader.h>
 #include <Formats/FormatSettings.h>
 
 #include <arrow/adapters/orc/adapter.h>
@@ -54,5 +55,16 @@ private:
     std::atomic<int> is_stopped{0};
 };
 
+class ORCSchemaReader : public ISchemaReader
+{
+public:
+    ORCSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_);
+
+    NamesAndTypesList readSchema() override;
+
+private:
+    const FormatSettings format_settings;
+};
+
 }
 #endif
diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
index 4c8f6ab2c54..651b9545c81 100644
--- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
@@ -87,6 +87,7 @@ ORC_UNIQUE_PTR<orc::Type> ORCBlockOutputFormat::getORCType(const DataTypePtr & t
         {
             return orc::createPrimitiveType(orc::TypeKind::DOUBLE);
         }
+        case TypeIndex::Date32: [[fallthrough]];
         case TypeIndex::Date:
         {
             return orc::createPrimitiveType(orc::TypeKind::DATE);
@@ -292,6 +293,7 @@ void ORCBlockOutputFormat::writeColumn(
             writeNumbers<UInt16, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const UInt16 & value){ return value; });
             break;
         }
+        case TypeIndex::Date32: [[fallthrough]];
         case TypeIndex::Int32:
         {
             writeNumbers<Int32, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const Int32 & value){ return value; });
diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
index 0b6cd006300..1d303014d31 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
@@ -94,19 +94,30 @@ static size_t countIndicesForType(std::shared_ptr<arrow::DataType> type)
     return 1;
 }
 
+static void getFileReaderAndSchema(
+    ReadBuffer & in,
+    std::unique_ptr<parquet::arrow::FileReader> & file_reader,
+    std::shared_ptr<arrow::Schema> & schema,
+    const FormatSettings & format_settings,
+    std::atomic<int> & is_stopped)
+{
+    auto arrow_file = asArrowFile(in, format_settings, is_stopped);
+    if (is_stopped)
+        return;
+    THROW_ARROW_NOT_OK(parquet::arrow::OpenFile(std::move(arrow_file), arrow::default_memory_pool(), &file_reader));
+    THROW_ARROW_NOT_OK(file_reader->GetSchema(&schema));
+}
+
 void ParquetBlockInputFormat::prepareReader()
 {
-    auto arrow_file = asArrowFile(*in, format_settings, is_stopped);
+    std::shared_ptr<arrow::Schema> schema;
+    getFileReaderAndSchema(*in, file_reader, schema, format_settings, is_stopped);
     if (is_stopped)
         return;
 
-    THROW_ARROW_NOT_OK(parquet::arrow::OpenFile(std::move(arrow_file), arrow::default_memory_pool(), &file_reader));
     row_group_total = file_reader->num_row_groups();
     row_group_current = 0;
 
-    std::shared_ptr<arrow::Schema> schema;
-    THROW_ARROW_NOT_OK(file_reader->GetSchema(&schema));
-
     arrow_column_to_ch_column = std::make_unique<ArrowColumnToCHColumn>(getPort().getHeader(), "Parquet", format_settings.parquet.import_nested);
 
     std::unordered_set<String> nested_table_names;
@@ -130,7 +141,21 @@ void ParquetBlockInputFormat::prepareReader()
     }
 }
 
-void registerInputFormatParquet(FormatFactory &factory)
+ParquetSchemaReader::ParquetSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) : ISchemaReader(in_), format_settings(format_settings_)
+{
+}
+
+NamesAndTypesList ParquetSchemaReader::readSchema()
+{
+    std::unique_ptr<parquet::arrow::FileReader> file_reader;
+    std::shared_ptr<arrow::Schema> schema;
+    std::atomic<int> is_stopped = 0;
+    getFileReaderAndSchema(in, file_reader, schema, format_settings, is_stopped);
+    auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(*schema, "Parquet");
+    return header.getNamesAndTypesList();
+}
+
+void registerInputFormatParquet(FormatFactory & factory)
 {
     factory.registerInputFormat(
             "Parquet",
@@ -144,6 +169,17 @@ void registerInputFormatParquet(FormatFactory &factory)
     factory.markFormatAsColumnOriented("Parquet");
 }
 
+void registerParquetSchemaReader(FormatFactory & factory)
+{
+    factory.registerSchemaReader(
+        "Parquet",
+        [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+        {
+            return std::make_shared<ParquetSchemaReader>(buf, settings);
+        }
+        );
+}
+
 }
 
 #else
@@ -154,6 +190,8 @@ class FormatFactory;
 void registerInputFormatParquet(FormatFactory &)
 {
 }
+
+void registerParquetSchemaReader(FormatFactory &) {}
 }
 
 #endif
diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h
index 3e04c523442..dbc99c08a35 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h
@@ -3,6 +3,7 @@
 #if USE_PARQUET
 
 #include <Processors/Formats/IInputFormat.h>
+#include <Processors/Formats/ISchemaReader.h>
 #include <Formats/FormatSettings.h>
 
 namespace parquet::arrow { class FileReader; }
@@ -44,6 +45,17 @@ private:
     std::atomic<int> is_stopped{0};
 };
 
+class ParquetSchemaReader : public ISchemaReader
+{
+public:
+    ParquetSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_);
+
+    NamesAndTypesList readSchema() override;
+
+private:
+    const FormatSettings format_settings;
+};
+
 }
 
 #endif
diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp
index df7b7102739..66da27e8829 100644
--- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp
@@ -73,6 +73,34 @@ void registerInputFormatProtobuf(FormatFactory & factory)
     }
 }
 
+ProtobufSchemaReader::ProtobufSchemaReader(const FormatSettings & format_settings)
+    : schema_info(
+          format_settings.schema.format_schema,
+          "Protobuf",
+          true,
+          format_settings.schema.is_server,
+          format_settings.schema.format_schema_path)
+{
+}
+
+NamesAndTypesList ProtobufSchemaReader::readSchema()
+{
+    const auto * message_descriptor = ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info);
+    return protobufSchemaToCHSchema(message_descriptor);
+}
+
+void registerProtobufSchemaReader(FormatFactory & factory)
+{
+    factory.registerExternalSchemaReader("Protobuf", [](const FormatSettings & settings)
+    {
+        return std::make_shared<ProtobufSchemaReader>(settings);
+    });
+    factory.registerExternalSchemaReader("ProtobufSingle", [](const FormatSettings & settings)
+    {
+        return std::make_shared<ProtobufSchemaReader>(settings);
+    });
+}
+
 }
 
 #else
@@ -81,6 +109,8 @@ namespace DB
 {
 class FormatFactory;
 void registerInputFormatProtobuf(FormatFactory &) {}
+
+void registerProtobufSchemaReader(FormatFactory &) {}
 }
 
 #endif
diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.h b/src/Processors/Formats/Impl/ProtobufRowInputFormat.h
index 6f465e3f0b8..d7d16d36ddf 100644
--- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.h
+++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.h
@@ -3,7 +3,9 @@
 #include "config_formats.h"
 
 #if USE_PROTOBUF
+#    include <Formats/FormatSchemaInfo.h>
 #    include <Processors/Formats/IRowInputFormat.h>
+#    include <Processors/Formats/ISchemaReader.h>
 
 namespace DB
 {
@@ -42,5 +44,16 @@ private:
     std::unique_ptr<ProtobufSerializer> serializer;
 };
 
+class ProtobufSchemaReader : public IExternalSchemaReader
+{
+public:
+    explicit ProtobufSchemaReader(const FormatSettings & format_settings);
+
+    NamesAndTypesList readSchema() override;
+
+private:
+    FormatSchemaInfo schema_info;
+};
+
 }
 #endif
diff --git a/src/Processors/Formats/Impl/RawBLOBRowInputFormat.cpp b/src/Processors/Formats/Impl/RawBLOBRowInputFormat.cpp
index 34424fffd34..91b1cc60fae 100644
--- a/src/Processors/Formats/Impl/RawBLOBRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/RawBLOBRowInputFormat.cpp
@@ -51,5 +51,14 @@ void registerInputFormatRawBLOB(FormatFactory & factory)
     });
 }
 
+void registerRawBLOBSchemaReader(FormatFactory & factory)
+{
+    factory.registerExternalSchemaReader("RawBLOB", [](
+            const FormatSettings &)
+    {
+        return std::make_shared<RawBLOBSchemaReader>();
+    });
+}
+
 }
 
diff --git a/src/Processors/Formats/Impl/RawBLOBRowInputFormat.h b/src/Processors/Formats/Impl/RawBLOBRowInputFormat.h
index 343af9f4068..367ca04f9d8 100644
--- a/src/Processors/Formats/Impl/RawBLOBRowInputFormat.h
+++ b/src/Processors/Formats/Impl/RawBLOBRowInputFormat.h
@@ -1,6 +1,8 @@
 #pragma once
 
 #include <Processors/Formats/IRowInputFormat.h>
+#include <Processors/Formats/ISchemaReader.h>
+#include <DataTypes/DataTypeString.h>
 
 
 namespace DB
@@ -22,5 +24,14 @@ private:
     bool readRow(MutableColumns & columns, RowReadExtension &) override;
 };
 
+class RawBLOBSchemaReader: public IExternalSchemaReader
+{
+public:
+    NamesAndTypesList readSchema() override
+    {
+        return {{"raw_blob", std::make_shared<DataTypeString>()}};
+    }
+};
+
 }
 
diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
index 279ae89aba5..90db6f6f0ec 100644
--- a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
@@ -14,18 +14,7 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-RegexpRowInputFormat::RegexpRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_)
-    : RegexpRowInputFormat(std::make_unique<PeekableReadBuffer>(in_), header_, params_, format_settings_)
-{
-}
-
-RegexpRowInputFormat::RegexpRowInputFormat(
-        std::unique_ptr<PeekableReadBuffer> buf_, const Block & header_, Params params_, const FormatSettings & format_settings_)
-        : IRowInputFormat(header_, *buf_, std::move(params_))
-        , buf(std::move(buf_))
-        , format_settings(format_settings_)
-        , escaping_rule(format_settings_.regexp.escaping_rule)
-        , regexp(format_settings_.regexp.regexp)
+RegexpFieldExtractor::RegexpFieldExtractor(const FormatSettings & format_settings) : regexp(format_settings.regexp.regexp), skip_unmatched(format_settings.regexp.skip_unmatched)
 {
     size_t fields_count = regexp.NumberOfCapturingGroups();
     matched_fields.resize(fields_count);
@@ -40,6 +29,50 @@ RegexpRowInputFormat::RegexpRowInputFormat(
     }
 }
 
+bool RegexpFieldExtractor::parseRow(PeekableReadBuffer & buf)
+{
+    PeekableReadBufferCheckpoint checkpoint{buf};
+
+    size_t line_size = 0;
+
+    do
+    {
+        char * pos = find_first_symbols<'\n', '\r'>(buf.position(), buf.buffer().end());
+        line_size += pos - buf.position();
+        buf.position() = pos;
+    } while (buf.position() == buf.buffer().end() && !buf.eof());
+
+    buf.makeContinuousMemoryFromCheckpointToPos();
+    buf.rollbackToCheckpoint();
+
+    bool match = RE2::FullMatchN(re2::StringPiece(buf.position(), line_size), regexp, re2_arguments_ptrs.data(), re2_arguments_ptrs.size());
+
+    if (!match && !skip_unmatched)
+        throw Exception("Line \"" + std::string(buf.position(), line_size) + "\" doesn't match the regexp.", ErrorCodes::INCORRECT_DATA);
+
+    buf.position() += line_size;
+    checkChar('\r', buf);
+    if (!buf.eof() && !checkChar('\n', buf))
+        throw Exception("No \\n after \\r at the end of line.", ErrorCodes::INCORRECT_DATA);
+
+    return match;
+}
+
+RegexpRowInputFormat::RegexpRowInputFormat(
+    ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_)
+    : RegexpRowInputFormat(std::make_unique<PeekableReadBuffer>(in_), header_, params_, format_settings_)
+{
+}
+
+RegexpRowInputFormat::RegexpRowInputFormat(
+        std::unique_ptr<PeekableReadBuffer> buf_, const Block & header_, Params params_, const FormatSettings & format_settings_)
+        : IRowInputFormat(header_, *buf_, std::move(params_))
+        , buf(std::move(buf_))
+        , format_settings(format_settings_)
+        , escaping_rule(format_settings_.regexp.escaping_rule)
+        , field_extractor(RegexpFieldExtractor(format_settings_))
+{
+}
 
 void RegexpRowInputFormat::resetParser()
 {
@@ -50,7 +83,8 @@ void RegexpRowInputFormat::resetParser()
 bool RegexpRowInputFormat::readField(size_t index, MutableColumns & columns)
 {
     const auto & type = getPort().getHeader().getByPosition(index).type;
-    ReadBuffer field_buf(const_cast<char *>(matched_fields[index].data()), matched_fields[index].size(), 0);
+    auto matched_field = field_extractor.getField(index);
+    ReadBuffer field_buf(const_cast<char *>(matched_field.data()), matched_field.size(), 0);
     try
     {
         return deserializeFieldByEscapingRule(type, serializations[index], *columns[index], field_buf, escaping_rule, format_settings);
@@ -64,7 +98,7 @@ bool RegexpRowInputFormat::readField(size_t index, MutableColumns & columns)
 
 void RegexpRowInputFormat::readFieldsFromMatch(MutableColumns & columns, RowReadExtension & ext)
 {
-    if (matched_fields.size() != columns.size())
+    if (field_extractor.getMatchedFieldsSize() != columns.size())
         throw Exception("The number of matched fields in line doesn't match the number of columns.", ErrorCodes::INCORRECT_DATA);
 
     ext.read_columns.assign(columns.size(), false);
@@ -79,39 +113,8 @@ bool RegexpRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &
     if (buf->eof())
         return false;
 
-    PeekableReadBufferCheckpoint checkpoint{*buf};
-
-    size_t line_size = 0;
-
-    do
-    {
-        char * pos = find_first_symbols<'\n', '\r'>(buf->position(), buf->buffer().end());
-        line_size += pos - buf->position();
-        buf->position() = pos;
-    } while (buf->position() == buf->buffer().end() && !buf->eof());
-
-    buf->makeContinuousMemoryFromCheckpointToPos();
-    buf->rollbackToCheckpoint();
-
-    bool match = RE2::FullMatchN(re2::StringPiece(buf->position(), line_size), regexp, re2_arguments_ptrs.data(), re2_arguments_ptrs.size());
-    bool read_line = true;
-
-    if (!match)
-    {
-        if (!format_settings.regexp.skip_unmatched)
-            throw Exception("Line \"" + std::string(buf->position(), line_size) + "\" doesn't match the regexp.", ErrorCodes::INCORRECT_DATA);
-        read_line = false;
-    }
-
-    if (read_line)
+    if (field_extractor.parseRow(*buf))
         readFieldsFromMatch(columns, ext);
-
-    buf->position() += line_size;
-
-    checkChar('\r', *buf);
-    if (!buf->eof() && !checkChar('\n', *buf))
-        throw Exception("No \\n after \\r at the end of line.", ErrorCodes::INCORRECT_DATA);
-
     return true;
 }
 
@@ -121,6 +124,36 @@ void RegexpRowInputFormat::setReadBuffer(ReadBuffer & in_)
     IInputFormat::setReadBuffer(*buf);
 }
 
+RegexpSchemaReader::RegexpSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, ContextPtr context_)
+    : IRowSchemaReader(
+        buf,
+        format_settings_.max_rows_to_read_for_schema_inference,
+        getDefaultDataTypeForEscapingRule(format_settings_.regexp.escaping_rule))
+    , format_settings(format_settings_)
+    , field_extractor(format_settings)
+    , buf(in_)
+    , context(context_)
+{
+}
+
+DataTypes RegexpSchemaReader::readRowAndGetDataTypes()
+{
+    if (buf.eof())
+        return {};
+
+    field_extractor.parseRow(buf);
+
+    DataTypes data_types;
+    data_types.reserve(field_extractor.getMatchedFieldsSize());
+    for (size_t i = 0; i != field_extractor.getMatchedFieldsSize(); ++i)
+    {
+        String field(field_extractor.getField(i));
+        data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, format_settings.regexp.escaping_rule, context));
+    }
+
+    return data_types;
+}
+
 void registerInputFormatRegexp(FormatFactory & factory)
 {
     factory.registerInputFormat("Regexp", [](
@@ -172,4 +205,12 @@ void registerFileSegmentationEngineRegexp(FormatFactory & factory)
     factory.registerFileSegmentationEngine("Regexp", &fileSegmentationEngineRegexpImpl);
 }
 
+void registerRegexpSchemaReader(FormatFactory & factory)
+{
+    factory.registerSchemaReader("Regexp", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr context)
+    {
+        return std::make_shared<RegexpSchemaReader>(buf, settings, context);
+    });
+}
+
 }
diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.h b/src/Processors/Formats/Impl/RegexpRowInputFormat.h
index dbce31a9b49..dffd2f82e02 100644
--- a/src/Processors/Formats/Impl/RegexpRowInputFormat.h
+++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.h
@@ -6,6 +6,7 @@
 #include <vector>
 #include <Core/Block.h>
 #include <Processors/Formats/IRowInputFormat.h>
+#include <Processors/Formats/ISchemaReader.h>
 #include <Formats/FormatSettings.h>
 #include <Formats/FormatFactory.h>
 #include <IO/PeekableReadBuffer.h>
@@ -16,6 +17,29 @@ namespace DB
 
 class ReadBuffer;
 
+/// Class for extracting row fields from data by regexp.
+class RegexpFieldExtractor
+{
+public:
+    RegexpFieldExtractor(const FormatSettings & format_settings);
+
+    /// Return true if row was successfully parsed and row fields were extracted.
+    bool parseRow(PeekableReadBuffer & buf);
+
+    re2::StringPiece getField(size_t index) { return matched_fields[index]; }
+    size_t getMatchedFieldsSize() const { return matched_fields.size(); }
+    size_t getNumberOfGroups() const { return regexp.NumberOfCapturingGroups(); }
+
+private:
+    const RE2 regexp;
+    // The vector of fields extracted from line using regexp.
+    std::vector<re2::StringPiece> matched_fields;
+    // These two vectors are needed to use RE2::FullMatchN (function for extracting fields).
+    std::vector<RE2::Arg> re2_arguments;
+    std::vector<RE2::Arg *> re2_arguments_ptrs;
+    bool skip_unmatched;
+};
+
 /// Regexp input format.
 /// This format applies regular expression from format_regexp setting for every line of file
 /// (the lines must be separated by newline character ('\n') or DOS-style newline ("\r\n")).
@@ -25,7 +49,6 @@ class ReadBuffer;
 
 class RegexpRowInputFormat : public IRowInputFormat
 {
-    using EscapingRule = FormatSettings::EscapingRule;
 public:
     RegexpRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_);
 
@@ -36,6 +59,8 @@ public:
 private:
     RegexpRowInputFormat(std::unique_ptr<PeekableReadBuffer> buf_, const Block & header_, Params params_, const FormatSettings & format_settings_);
 
+    using EscapingRule = FormatSettings::EscapingRule;
+
     bool readRow(MutableColumns & columns, RowReadExtension & ext) override;
 
     bool readField(size_t index, MutableColumns & columns);
@@ -44,13 +69,22 @@ private:
     std::unique_ptr<PeekableReadBuffer> buf;
     const FormatSettings format_settings;
     const EscapingRule escaping_rule;
+    RegexpFieldExtractor field_extractor;
+};
 
-    const RE2 regexp;
-    // The vector of fields extracted from line using regexp.
-    std::vector<re2::StringPiece> matched_fields;
-    // These two vectors are needed to use RE2::FullMatchN (function for extracting fields).
-    std::vector<RE2::Arg> re2_arguments;
-    std::vector<RE2::Arg *> re2_arguments_ptrs;
+class RegexpSchemaReader : public IRowSchemaReader
+{
+public:
+    RegexpSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, ContextPtr context_);
+
+private:
+    DataTypes readRowAndGetDataTypes() override;
+
+    using EscapingRule = FormatSettings::EscapingRule;
+    const FormatSettings format_settings;
+    RegexpFieldExtractor field_extractor;
+    PeekableReadBuffer buf;
+    ContextPtr context;
 };
 
 }
diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp
index eef97e15dd5..8a56c2ed5c7 100644
--- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp
@@ -1,7 +1,10 @@
 #include <IO/ReadHelpers.h>
 #include <Processors/Formats/Impl/TSKVRowInputFormat.h>
 #include <Formats/FormatFactory.h>
+#include <Formats/EscapingRuleUtils.h>
 #include <DataTypes/Serializations/SerializationNullable.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeNullable.h>
 
 
 namespace DB
@@ -211,6 +214,59 @@ void TSKVRowInputFormat::resetParser()
     name_buf.clear();
 }
 
+TSKVSchemaReader::TSKVSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
+    : IRowWithNamesSchemaReader(
+        in_,
+        format_settings_.max_rows_to_read_for_schema_inference,
+        getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::Escaped))
+    , format_settings(format_settings_)
+{
+}
+
+std::unordered_map<String, DataTypePtr> TSKVSchemaReader::readRowAndGetNamesAndDataTypes()
+{
+    if (first_row)
+    {
+        skipBOMIfExists(in);
+        first_row = false;
+    }
+
+    if (in.eof())
+        return {};
+
+    if (*in.position() == '\n')
+    {
+        ++in.position();
+        return {};
+    }
+
+    std::unordered_map<String, DataTypePtr> names_and_types;
+    StringRef name_ref;
+    String name_tmp;
+    String value;
+    do
+    {
+        bool has_value = readName(in, name_ref, name_tmp);
+        if (has_value)
+        {
+            readEscapedString(value, in);
+            names_and_types[String(name_ref)] = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Escaped);
+        }
+        else
+        {
+            /// The only thing that can go without value is `tskv` fragment that is ignored.
+            if (!(name_ref.size == 4 && 0 == memcmp(name_ref.data, "tskv", 4)))
+                throw Exception("Found field without value while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA);
+        }
+
+    }
+    while (checkChar('\t', in));
+
+    assertChar('\n', in);
+
+    return names_and_types;
+}
+
 void registerInputFormatTSKV(FormatFactory & factory)
 {
     factory.registerInputFormat("TSKV", [](
@@ -222,5 +278,12 @@ void registerInputFormatTSKV(FormatFactory & factory)
         return std::make_shared<TSKVRowInputFormat>(buf, sample, std::move(params), settings);
     });
 }
+void registerTSKVSchemaReader(FormatFactory & factory)
+{
+    factory.registerSchemaReader("TSKV", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+    {
+        return std::make_shared<TSKVSchemaReader>(buf, settings);
+    });
+}
 
 }
diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.h b/src/Processors/Formats/Impl/TSKVRowInputFormat.h
index 7d732bae691..6aef50a0f84 100644
--- a/src/Processors/Formats/Impl/TSKVRowInputFormat.h
+++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.h
@@ -2,6 +2,7 @@
 
 #include <Core/Block.h>
 #include <Processors/Formats/IRowInputFormat.h>
+#include <Processors/Formats/ISchemaReader.h>
 #include <Formats/FormatSettings.h>
 #include <Common/HashTable/HashMap.h>
 
@@ -52,4 +53,16 @@ private:
     /// for row like ..., non-nullable column name=\N, ...
 };
 
+class TSKVSchemaReader : public IRowWithNamesSchemaReader
+{
+public:
+    TSKVSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_);
+
+private:
+    std::unordered_map<String, DataTypePtr> readRowAndGetNamesAndDataTypes() override;
+
+    const FormatSettings format_settings;
+    bool first_row = true;
+};
+
 }
diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
index 1e6d238b202..bb844ec68ea 100644
--- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
@@ -1,13 +1,15 @@
 #include <IO/ReadHelpers.h>
 #include <IO/Operators.h>
-#include <IO/BufferWithOwnMemory.h>
 
-#include <Processors/Formats/Impl/TabSeparatedRowInputFormat.h>
-#include <Formats/verbosePrintString.h>
-#include <Formats/FormatFactory.h>
-#include <Formats/registerWithNamesAndTypes.h>
-#include <DataTypes/DataTypeLowCardinality.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/Serializations/SerializationNullable.h>
+#include <Formats/FormatFactory.h>
+#include <Formats/ReadSchemaUtils.h>
+#include <Formats/registerWithNamesAndTypes.h>
+#include <Formats/verbosePrintString.h>
+#include <Formats/EscapingRuleUtils.h>
+#include <Processors/Formats/Impl/TabSeparatedRowInputFormat.h>
 
 namespace DB
 {
@@ -38,40 +40,50 @@ TabSeparatedRowInputFormat::TabSeparatedRowInputFormat(
     bool with_types_,
     bool is_raw_,
     const FormatSettings & format_settings_)
-    : RowInputFormatWithNamesAndTypes(header_, in_, params_, with_names_, with_types_, format_settings_), is_raw(is_raw_)
+    : RowInputFormatWithNamesAndTypes(header_, in_, params_, with_names_, with_types_, format_settings_, std::make_unique<TabSeparatedFormatReader>(in_, format_settings_, is_raw_))
 {
 }
 
-void TabSeparatedRowInputFormat::skipFieldDelimiter()
+TabSeparatedFormatReader::TabSeparatedFormatReader(ReadBuffer & in_, const FormatSettings & format_settings_, bool is_raw_)
+    : FormatWithNamesAndTypesReader(in_, format_settings_), is_raw(is_raw_)
+{
+}
+
+void TabSeparatedFormatReader::skipFieldDelimiter()
 {
     assertChar('\t', *in);
 }
 
-void TabSeparatedRowInputFormat::skipRowEndDelimiter()
+void TabSeparatedFormatReader::skipRowEndDelimiter()
 {
     if (in->eof())
         return;
 
-    if (unlikely(row_num <= 1))
+    if (unlikely(first_row))
+    {
         checkForCarriageReturn(*in);
+        first_row = false;
+    }
 
     assertChar('\n', *in);
 }
 
-String TabSeparatedRowInputFormat::readFieldIntoString()
+String TabSeparatedFormatReader::readFieldIntoString()
 {
     String field;
-    readEscapedString(field, *in);
+    if (is_raw)
+        readString(field, *in);
+    else
+        readEscapedString(field, *in);
     return field;
 }
 
-void TabSeparatedRowInputFormat::skipField()
+void TabSeparatedFormatReader::skipField()
 {
-    NullOutput null_sink;
-    readEscapedStringInto(null_sink, *in);
+    readFieldIntoString();
 }
 
-void TabSeparatedRowInputFormat::skipHeaderRow()
+void TabSeparatedFormatReader::skipHeaderRow()
 {
     do
     {
@@ -82,7 +94,7 @@ void TabSeparatedRowInputFormat::skipHeaderRow()
     skipRowEndDelimiter();
 }
 
-std::vector<String> TabSeparatedRowInputFormat::readHeaderRow()
+std::vector<String> TabSeparatedFormatReader::readRow()
 {
     std::vector<String> fields;
     do
@@ -95,7 +107,7 @@ std::vector<String> TabSeparatedRowInputFormat::readHeaderRow()
     return fields;
 }
 
-bool TabSeparatedRowInputFormat::readField(IColumn & column, const DataTypePtr & type,
+bool TabSeparatedFormatReader::readField(IColumn & column, const DataTypePtr & type,
     const SerializationPtr & serialization, bool is_last_file_column, const String & /*column_name*/)
 {
     const bool at_delimiter = !is_last_file_column && !in->eof() && *in->position() == '\t';
@@ -118,6 +130,7 @@ bool TabSeparatedRowInputFormat::readField(IColumn & column, const DataTypePtr &
         return true;
     }
 
+
     if (as_nullable)
         return SerializationNullable::deserializeTextEscapedImpl(column, *in, format_settings, serialization);
 
@@ -125,7 +138,7 @@ bool TabSeparatedRowInputFormat::readField(IColumn & column, const DataTypePtr &
     return true;
 }
 
-bool TabSeparatedRowInputFormat::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out)
+bool TabSeparatedFormatReader::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out)
 {
     try
     {
@@ -156,7 +169,7 @@ bool TabSeparatedRowInputFormat::parseFieldDelimiterWithDiagnosticInfo(WriteBuff
     return true;
 }
 
-bool TabSeparatedRowInputFormat::parseRowEndWithDiagnosticInfo(WriteBuffer & out)
+bool TabSeparatedFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out)
 {
     if (in->eof())
         return true;
@@ -190,7 +203,7 @@ bool TabSeparatedRowInputFormat::parseRowEndWithDiagnosticInfo(WriteBuffer & out
     return true;
 }
 
-void TabSeparatedRowInputFormat::checkNullValueForNonNullable(DataTypePtr type)
+void TabSeparatedFormatReader::checkNullValueForNonNullable(DataTypePtr type)
 {
     bool can_be_parsed_as_null = type->isNullable() || type->isLowCardinalityNullable() || format_settings.null_as_default;
 
@@ -218,6 +231,28 @@ void TabSeparatedRowInputFormat::syncAfterError()
     skipToUnescapedNextLineOrEOF(*in);
 }
 
+TabSeparatedSchemaReader::TabSeparatedSchemaReader(
+    ReadBuffer & in_, bool with_names_, bool with_types_, bool is_raw_, const FormatSettings & format_settings_)
+    : FormatWithNamesAndTypesSchemaReader(
+        in_,
+        format_settings_.max_rows_to_read_for_schema_inference,
+        with_names_,
+        with_types_,
+        &reader,
+        getDefaultDataTypeForEscapingRule(is_raw_ ? FormatSettings::EscapingRule::Raw : FormatSettings::EscapingRule::Escaped))
+    , reader(in_, format_settings_, is_raw_)
+{
+}
+
+DataTypes TabSeparatedSchemaReader::readRowAndGetDataTypes()
+{
+    if (in.eof())
+        return {};
+
+    auto fields = reader.readRow();
+    return determineDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule());
+}
+
 void registerInputFormatTabSeparated(FormatFactory & factory)
 {
     for (bool is_raw : {false, true})
@@ -239,6 +274,23 @@ void registerInputFormatTabSeparated(FormatFactory & factory)
     }
 }
 
+void registerTSVSchemaReader(FormatFactory & factory)
+{
+    for (bool is_raw : {false, true})
+    {
+        auto register_func = [&](const String & format_name, bool with_names, bool with_types)
+        {
+            factory.registerSchemaReader(format_name, [with_names, with_types, is_raw](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+            {
+                return std::make_shared<TabSeparatedSchemaReader>(buf, with_names, with_types, is_raw, settings);
+            });
+        };
+
+        registerWithNamesAndTypes(is_raw ? "TabSeparatedRaw" : "TabSeparated", register_func);
+        registerWithNamesAndTypes(is_raw ? "TSVRaw" : "TSV", register_func);
+    }
+}
+
 static std::pair<bool, size_t> fileSegmentationEngineTabSeparatedImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, bool is_raw, size_t min_rows)
 {
     bool need_more_data = true;
diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h
index 6e2e283e792..1f2bfc255b8 100644
--- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h
+++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h
@@ -3,6 +3,7 @@
 #include <Core/Block.h>
 #include <Formats/FormatSettings.h>
 #include <Processors/Formats/RowInputFormatWithNamesAndTypes.h>
+#include <Processors/Formats/ISchemaReader.h>
 
 
 namespace DB
@@ -24,6 +25,13 @@ public:
 private:
     bool allowSyncAfterError() const override { return true; }
     void syncAfterError() override;
+    bool isGarbageAfterField(size_t, ReadBuffer::Position pos) override { return *pos != '\n' && *pos != '\t'; }
+};
+
+class TabSeparatedFormatReader : public FormatWithNamesAndTypesReader
+{
+public:
+    TabSeparatedFormatReader(ReadBuffer & in_, const FormatSettings & format_settings, bool is_raw_);
 
     bool readField(IColumn & column, const DataTypePtr & type,
                    const SerializationPtr & serialization, bool is_last_file_column, const String & column_name) override;
@@ -36,18 +44,34 @@ private:
     void skipFieldDelimiter() override;
     void skipRowEndDelimiter() override;
 
-    std::vector<String> readHeaderRow();
-    std::vector<String> readNames() override { return readHeaderRow(); }
-    std::vector<String> readTypes() override { return readHeaderRow(); }
+    std::vector<String> readRow();
+    std::vector<String> readNames() override { return readRow(); }
+    std::vector<String> readTypes() override { return readRow(); }
     String readFieldIntoString();
 
     void checkNullValueForNonNullable(DataTypePtr type) override;
 
     bool parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) override;
     bool parseRowEndWithDiagnosticInfo(WriteBuffer & out) override;
-    bool isGarbageAfterField(size_t, ReadBuffer::Position pos) override { return *pos != '\n' && *pos != '\t'; }
+    FormatSettings::EscapingRule getEscapingRule()
+    {
+        return is_raw ? FormatSettings::EscapingRule::Raw : FormatSettings::EscapingRule::Escaped;
+    }
 
+private:
     bool is_raw;
+    bool first_row = true;
+};
+
+class TabSeparatedSchemaReader : public FormatWithNamesAndTypesSchemaReader
+{
+public:
+    TabSeparatedSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool is_raw_, const FormatSettings & format_settings);
+
+private:
+    DataTypes readRowAndGetDataTypes() override;
+
+    TabSeparatedFormatReader reader;
 };
 
 }
diff --git a/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.cpp
index 5d87f5a0b14..37bd8daa502 100644
--- a/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.cpp
@@ -22,7 +22,10 @@ void TabSeparatedRowOutputFormat::writeLine(const std::vector<String> & values)
 {
     for (size_t i = 0; i < values.size(); ++i)
     {
-        writeEscapedString(values[i], out);
+        if (is_raw)
+            writeString(values[i], out);
+        else
+            writeEscapedString(values[i], out);
         if (i + 1 == values.size())
             writeRowEndDelimiter();
         else
diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp
index fccf6eb10df..06d6ba06bcc 100644
--- a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp
@@ -4,7 +4,6 @@
 #include <Formats/EscapingRuleUtils.h>
 #include <IO/Operators.h>
 #include <DataTypes/DataTypeNothing.h>
-#include <Interpreters/Context.h>
 #include <DataTypes/Serializations/SerializationNullable.h>
 
 namespace DB
@@ -12,13 +11,19 @@ namespace DB
 
 namespace ErrorCodes
 {
-extern const int ATTEMPT_TO_READ_AFTER_EOF;
-extern const int CANNOT_READ_ALL_DATA;
-extern const int CANNOT_PARSE_ESCAPE_SEQUENCE;
-extern const int CANNOT_PARSE_QUOTED_STRING;
-extern const int SYNTAX_ERROR;
+    extern const int ATTEMPT_TO_READ_AFTER_EOF;
+    extern const int CANNOT_READ_ALL_DATA;
+    extern const int CANNOT_PARSE_ESCAPE_SEQUENCE;
+    extern const int CANNOT_PARSE_QUOTED_STRING;
+    extern const int SYNTAX_ERROR;
 }
 
+[[noreturn]] static void throwUnexpectedEof(size_t row_num)
+{
+    throw ParsingException("Unexpected EOF while parsing row " + std::to_string(row_num) + ". "
+                           "Maybe last row has wrong format or input doesn't contain specified suffix before EOF.",
+                           ErrorCodes::CANNOT_READ_ALL_DATA);
+}
 
 TemplateRowInputFormat::TemplateRowInputFormat(
     const Block & header_,
@@ -41,37 +46,13 @@ TemplateRowInputFormat::TemplateRowInputFormat(const Block & header_, std::uniqu
     : RowInputFormatWithDiagnosticInfo(header_, *buf_, params_), buf(std::move(buf_)), data_types(header_.getDataTypes()),
       settings(std::move(settings_)), ignore_spaces(ignore_spaces_),
       format(std::move(format_)), row_format(std::move(row_format_)),
-      default_csv_delimiter(settings.csv.delimiter), row_between_delimiter(std::move(row_between_delimiter_))
+      default_csv_delimiter(settings.csv.delimiter), row_between_delimiter(row_between_delimiter_),
+      format_reader(std::make_unique<TemplateFormatReader>(*buf, ignore_spaces_, format, row_format, row_between_delimiter, settings))
 {
-    /// Validate format string for result set
-    bool has_data = false;
-    for (size_t i = 0; i < format.columnsCount(); ++i)
-    {
-        if (format.format_idx_to_column_idx[i])
-        {
-            if (*format.format_idx_to_column_idx[i] != 0)
-                format.throwInvalidFormat("Invalid input part", i);
-            if (has_data)
-                format.throwInvalidFormat("${data} can occur only once", i);
-            if (format.escaping_rules[i] != EscapingRule::None)
-                format.throwInvalidFormat("${data} must have empty or None deserialization type", i);
-            has_data = true;
-            format_data_idx = i;
-        }
-        else
-        {
-            if (format.escaping_rules[i] == EscapingRule::XML)
-                format.throwInvalidFormat("XML deserialization is not supported", i);
-        }
-    }
-
     /// Validate format string for rows
     std::vector<UInt8> column_in_format(header_.columns(), false);
     for (size_t i = 0; i < row_format.columnsCount(); ++i)
     {
-        if (row_format.escaping_rules[i] == EscapingRule::XML)
-            row_format.throwInvalidFormat("XML deserialization is not supported", i);
-
         if (row_format.format_idx_to_column_idx[i])
         {
             if (header_.columns() <= *row_format.format_idx_to_column_idx[i])
@@ -94,69 +75,7 @@ TemplateRowInputFormat::TemplateRowInputFormat(const Block & header_, std::uniqu
 
 void TemplateRowInputFormat::readPrefix()
 {
-    size_t last_successfully_parsed_idx = 0;
-    try
-    {
-        tryReadPrefixOrSuffix<void>(last_successfully_parsed_idx, format_data_idx);
-    }
-    catch (Exception & e)
-    {
-        format.throwInvalidFormat(e.message() + " While parsing prefix", last_successfully_parsed_idx);
-    }
-}
-
-/// Asserts delimiters and skips fields in prefix or suffix.
-/// tryReadPrefixOrSuffix<bool>(...) is used in checkForSuffix() to avoid throwing an exception after read of each row
-/// (most likely false will be returned on first call of checkString(...))
-template <typename ReturnType>
-ReturnType TemplateRowInputFormat::tryReadPrefixOrSuffix(size_t & input_part_beg, size_t input_part_end)
-{
-    static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
-
-    skipSpaces();
-    if constexpr (throw_exception)
-        assertString(format.delimiters[input_part_beg], *buf);
-    else
-    {
-        if (likely(!checkString(format.delimiters[input_part_beg], *buf)))
-            return ReturnType(false);
-    }
-
-    while (input_part_beg < input_part_end)
-    {
-        skipSpaces();
-        if constexpr (throw_exception)
-            skipField(format.escaping_rules[input_part_beg]);
-        else
-        {
-            try
-            {
-                skipField(format.escaping_rules[input_part_beg]);
-            }
-            catch (const Exception & e)
-            {
-                if (e.code() != ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF &&
-                    e.code() != ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE &&
-                    e.code() != ErrorCodes::CANNOT_PARSE_QUOTED_STRING)
-                    throw;
-                /// If it's parsing error, then suffix is not found
-                return ReturnType(false);
-            }
-        }
-        ++input_part_beg;
-
-        skipSpaces();
-        if constexpr (throw_exception)
-            assertString(format.delimiters[input_part_beg], *buf);
-        else
-        {
-            if (likely(!checkString(format.delimiters[input_part_beg], *buf)))
-                return ReturnType(false);
-        }
-    }
-
-    if constexpr (!throw_exception)
-        return ReturnType(true);
+    format_reader->readPrefix();
 }
 
 bool TemplateRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & extra)
@@ -165,9 +84,7 @@ bool TemplateRowInputFormat::readRow(MutableColumns & columns, RowReadExtension
     if (unlikely(end_of_stream))
         return false;
 
-    skipSpaces();
-
-    if (unlikely(checkForSuffix()))
+    if (unlikely(format_reader->checkForSuffix()))
     {
         end_of_stream = true;
         return false;
@@ -176,27 +93,24 @@ bool TemplateRowInputFormat::readRow(MutableColumns & columns, RowReadExtension
     updateDiagnosticInfo();
 
     if (likely(row_num != 1))
-        assertString(row_between_delimiter, *buf);
+        format_reader->skipRowBetweenDelimiter();
 
     extra.read_columns.assign(columns.size(), false);
 
     for (size_t i = 0; i < row_format.columnsCount(); ++i)
     {
-        skipSpaces();
-        assertString(row_format.delimiters[i], *buf);
-        skipSpaces();
+        format_reader->skipDelimiter(i);
+
         if (row_format.format_idx_to_column_idx[i])
         {
             size_t col_idx = *row_format.format_idx_to_column_idx[i];
             extra.read_columns[col_idx] = deserializeField(data_types[col_idx], serializations[col_idx], *columns[col_idx], i);
         }
         else
-            skipField(row_format.escaping_rules[i]);
-
+            format_reader->skipField(row_format.escaping_rules[i]);
     }
 
-    skipSpaces();
-    assertString(row_format.delimiters.back(), *buf);
+    format_reader->skipRowEndDelimiter();
 
     for (const auto & idx : always_default_columns)
         data_types[idx]->insertDefaultInto(*columns[idx]);
@@ -219,65 +133,21 @@ bool TemplateRowInputFormat::deserializeField(const DataTypePtr & type,
     catch (Exception & e)
     {
         if (e.code() == ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF)
-            throwUnexpectedEof();
+            throwUnexpectedEof(row_num);
         throw;
     }
 }
 
-void TemplateRowInputFormat::skipField(TemplateRowInputFormat::EscapingRule escaping_rule)
-{
-    try
-    {
-        skipFieldByEscapingRule(*buf, escaping_rule, settings);
-    }
-    catch (Exception & e)
-    {
-        if (e.code() == ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF)
-            throwUnexpectedEof();
-        throw;
-    }
-}
-
-/// Returns true if all rows have been read i.e. there are only suffix and spaces (if ignore_spaces == true) before EOF.
-/// Otherwise returns false
-bool TemplateRowInputFormat::checkForSuffix()
-{
-    PeekableReadBufferCheckpoint checkpoint{*buf};
-    bool suffix_found = false;
-    size_t last_successfully_parsed_idx = format_data_idx + 1;
-    try
-    {
-        suffix_found = tryReadPrefixOrSuffix<bool>(last_successfully_parsed_idx, format.columnsCount());
-    }
-    catch (const Exception & e)
-    {
-        if (e.code() != ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF &&
-            e.code() != ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE &&
-            e.code() != ErrorCodes::CANNOT_PARSE_QUOTED_STRING)
-            throw;
-    }
-
-    if (unlikely(suffix_found))
-    {
-        skipSpaces();
-        if (buf->eof())
-            return true;
-    }
-
-    buf->rollbackToCheckpoint();
-    return false;
-}
-
 bool TemplateRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out)
 {
     out << "Suffix does not match: ";
-    size_t last_successfully_parsed_idx = format_data_idx + 1;
+    size_t last_successfully_parsed_idx = format_reader->getFormatDataIdx() + 1;
     const ReadBuffer::Position row_begin_pos = buf->position();
     bool caught = false;
     try
     {
         PeekableReadBufferCheckpoint checkpoint{*buf, true};
-        tryReadPrefixOrSuffix<void>(last_successfully_parsed_idx, format.columnsCount());
+        format_reader->tryReadPrefixOrSuffix<void>(last_successfully_parsed_idx, format.columnsCount());
     }
     catch (Exception & e)
     {
@@ -309,7 +179,7 @@ bool TemplateRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & col
         if (!parseDelimiterWithDiagnosticInfo(out, *buf, row_format.delimiters[i], "delimiter before field " + std::to_string(i), ignore_spaces))
             return false;
 
-        skipSpaces();
+        format_reader->skipSpaces();
         if (row_format.format_idx_to_column_idx[i])
         {
             const auto & header = getPort().getHeader();
@@ -364,7 +234,7 @@ void TemplateRowInputFormat::tryDeserializeField(const DataTypePtr & type, IColu
     if (index)
         deserializeField(type, serializations[*index], column, file_column);
     else
-        skipField(row_format.escaping_rules[file_column]);
+        format_reader->skipField(row_format.escaping_rules[file_column]);
 }
 
 bool TemplateRowInputFormat::isGarbageAfterField(size_t, ReadBuffer::Position)
@@ -387,13 +257,6 @@ void TemplateRowInputFormat::syncAfterError()
     /// It will cause another parsing error.
 }
 
-void TemplateRowInputFormat::throwUnexpectedEof()
-{
-    throw ParsingException("Unexpected EOF while parsing row " + std::to_string(row_num) + ". "
-                    "Maybe last row has wrong format or input doesn't contain specified suffix before EOF.",
-                    ErrorCodes::CANNOT_READ_ALL_DATA);
-}
-
 void TemplateRowInputFormat::resetParser()
 {
     RowInputFormatWithDiagnosticInfo::resetParser();
@@ -407,6 +270,268 @@ void TemplateRowInputFormat::setReadBuffer(ReadBuffer & in_)
     IInputFormat::setReadBuffer(*buf);
 }
 
+TemplateFormatReader::TemplateFormatReader(
+    PeekableReadBuffer & buf_,
+    bool ignore_spaces_,
+    const ParsedTemplateFormatString & format_,
+    const ParsedTemplateFormatString & row_format_,
+    std::string row_between_delimiter_,
+    const FormatSettings & format_settings_)
+    : buf(&buf_)
+    , ignore_spaces(ignore_spaces_)
+    , format(format_)
+    , row_format(row_format_)
+    , row_between_delimiter(row_between_delimiter_)
+    , format_settings(format_settings_)
+{
+    /// Validate format string for result set
+    bool has_data = false;
+    for (size_t i = 0; i < format.columnsCount(); ++i)
+    {
+        if (format.format_idx_to_column_idx[i])
+        {
+            if (*format.format_idx_to_column_idx[i] != 0)
+                format.throwInvalidFormat("Invalid input part", i);
+            if (has_data)
+                format.throwInvalidFormat("${data} can occur only once", i);
+            if (format.escaping_rules[i] != EscapingRule::None)
+                format.throwInvalidFormat("${data} must have empty or None deserialization type", i);
+            has_data = true;
+            format_data_idx = i;
+        }
+        else
+        {
+            if (format.escaping_rules[i] == EscapingRule::XML)
+                format.throwInvalidFormat("XML deserialization is not supported", i);
+        }
+    }
+
+    /// Validate format string for rows
+    for (size_t i = 0; i < row_format.columnsCount(); ++i)
+    {
+        if (row_format.escaping_rules[i] == EscapingRule::XML)
+            row_format.throwInvalidFormat("XML deserialization is not supported", i);
+    }
+}
+
+void TemplateFormatReader::readPrefix()
+{
+    size_t last_successfully_parsed_idx = 0;
+    try
+    {
+        tryReadPrefixOrSuffix<void>(last_successfully_parsed_idx, format_data_idx);
+    }
+    catch (Exception & e)
+    {
+        format.throwInvalidFormat(e.message() + " While parsing prefix", last_successfully_parsed_idx);
+    }
+}
+
+void TemplateFormatReader::skipField(EscapingRule escaping_rule)
+{
+    try
+    {
+        skipFieldByEscapingRule(*buf, escaping_rule, format_settings);
+    }
+    catch (Exception & e)
+    {
+        if (e.code() == ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF)
+            throwUnexpectedEof(row_num);
+        throw;
+    }
+}
+
+/// Asserts delimiters and skips fields in prefix or suffix.
+/// tryReadPrefixOrSuffix<bool>(...) is used in checkForSuffix() to avoid throwing an exception after read of each row
+/// (most likely false will be returned on first call of checkString(...))
+template <typename ReturnType>
+ReturnType TemplateFormatReader::tryReadPrefixOrSuffix(size_t & input_part_beg, size_t input_part_end)
+{
+    static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
+
+    skipSpaces();
+    if constexpr (throw_exception)
+        assertString(format.delimiters[input_part_beg], *buf);
+    else
+    {
+        if (likely(!checkString(format.delimiters[input_part_beg], *buf)))
+            return ReturnType(false);
+    }
+
+    while (input_part_beg < input_part_end)
+    {
+        skipSpaces();
+        if constexpr (throw_exception)
+            skipField(format.escaping_rules[input_part_beg]);
+        else
+        {
+            try
+            {
+                skipField(format.escaping_rules[input_part_beg]);
+            }
+            catch (const Exception & e)
+            {
+                if (e.code() != ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF &&
+                    e.code() != ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE &&
+                    e.code() != ErrorCodes::CANNOT_PARSE_QUOTED_STRING)
+                    throw;
+                /// If it's parsing error, then suffix is not found
+                return ReturnType(false);
+            }
+        }
+        ++input_part_beg;
+
+        skipSpaces();
+        if constexpr (throw_exception)
+            assertString(format.delimiters[input_part_beg], *buf);
+        else
+        {
+            if (likely(!checkString(format.delimiters[input_part_beg], *buf)))
+                return ReturnType(false);
+        }
+    }
+
+    if constexpr (!throw_exception)
+        return ReturnType(true);
+}
+
+/// Returns true if all rows have been read i.e. there are only suffix and spaces (if ignore_spaces == true) before EOF.
+/// Otherwise returns false
+bool TemplateFormatReader::checkForSuffix()
+{
+    PeekableReadBufferCheckpoint checkpoint{*buf};
+    bool suffix_found = false;
+    size_t last_successfully_parsed_idx = format_data_idx + 1;
+    try
+    {
+        suffix_found = tryReadPrefixOrSuffix<bool>(last_successfully_parsed_idx, format.columnsCount());
+    }
+    catch (const Exception & e)
+    {
+        if (e.code() != ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF &&
+            e.code() != ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE &&
+            e.code() != ErrorCodes::CANNOT_PARSE_QUOTED_STRING)
+            throw;
+    }
+
+    if (unlikely(suffix_found))
+    {
+        skipSpaces();
+        if (buf->eof())
+            return true;
+    }
+
+    buf->rollbackToCheckpoint();
+    return false;
+}
+
+void TemplateFormatReader::skipDelimiter(size_t index)
+{
+    skipSpaces();
+    assertString(row_format.delimiters[index], *buf);
+    skipSpaces();
+}
+
+void TemplateFormatReader::skipRowEndDelimiter()
+{
+    ++row_num;
+    skipSpaces();
+    assertString(row_format.delimiters.back(), *buf);
+    skipSpaces();
+}
+
+void TemplateFormatReader::skipRowBetweenDelimiter()
+{
+    skipSpaces();
+    assertString(row_between_delimiter, *buf);
+    skipSpaces();
+}
+
+TemplateSchemaReader::TemplateSchemaReader(
+    ReadBuffer & in_,
+    bool ignore_spaces_,
+    const ParsedTemplateFormatString & format_,
+    const ParsedTemplateFormatString & row_format_,
+    std::string row_between_delimiter,
+    const FormatSettings & format_settings_,
+    ContextPtr context_)
+    : IRowSchemaReader(buf, format_settings_.max_rows_to_read_for_schema_inference)
+    , buf(in_)
+    , format(format_)
+    , row_format(row_format_)
+    , format_settings(format_settings_)
+    , context(context_)
+    , format_reader(buf, ignore_spaces_, format, row_format, row_between_delimiter, format_settings)
+{
+    setColumnNames(row_format.column_names);
+}
+
+DataTypes TemplateSchemaReader::readRowAndGetDataTypes()
+{
+    if (first_row)
+        format_reader.readPrefix();
+
+    if (format_reader.checkForSuffix())
+        return {};
+
+    if (first_row)
+        first_row = false;
+    else
+        format_reader.skipRowBetweenDelimiter();
+
+    DataTypes data_types;
+    data_types.reserve(row_format.columnsCount());
+    String field;
+    for (size_t i = 0; i != row_format.columnsCount(); ++i)
+    {
+        format_reader.skipDelimiter(i);
+        if (row_format.escaping_rules[i] == FormatSettings::EscapingRule::CSV)
+            format_settings.csv.delimiter = row_format.delimiters[i + 1].empty() ? format_settings.csv.delimiter : row_format.delimiters[i + 1].front();
+
+        field = readFieldByEscapingRule(buf, row_format.escaping_rules[i], format_settings);
+        data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, row_format.escaping_rules[i], context));
+    }
+
+    format_reader.skipRowEndDelimiter();
+    return data_types;
+}
+
+static ParsedTemplateFormatString fillResultSetFormat(const FormatSettings & settings)
+{
+    ParsedTemplateFormatString resultset_format;
+    if (settings.template_settings.resultset_format.empty())
+    {
+        /// Default format string: "${data}"
+        resultset_format.delimiters.resize(2);
+        resultset_format.escaping_rules.emplace_back(ParsedTemplateFormatString::EscapingRule::None);
+        resultset_format.format_idx_to_column_idx.emplace_back(0);
+        resultset_format.column_names.emplace_back("data");
+    }
+    else
+    {
+        /// Read format string from file
+        resultset_format = ParsedTemplateFormatString(
+            FormatSchemaInfo(settings.template_settings.resultset_format, "Template", false,
+                             settings.schema.is_server, settings.schema.format_schema_path),
+            [&](const String & partName) -> std::optional<size_t>
+            {
+                if (partName == "data")
+                    return 0;
+                throw Exception("Unknown input part " + partName,
+                                ErrorCodes::SYNTAX_ERROR);
+            });
+    }
+    return resultset_format;
+}
+
+static ParsedTemplateFormatString fillRowFormat(const FormatSettings & settings, ParsedTemplateFormatString::ColumnIdxGetter idx_getter, bool allow_indexes)
+{
+    return ParsedTemplateFormatString(
+        FormatSchemaInfo(
+            settings.template_settings.row_format, "Template", false, settings.schema.is_server, settings.schema.format_schema_path),
+        idx_getter, allow_indexes);
+}
+
 void registerInputFormatTemplate(FormatFactory & factory)
 {
     for (bool ignore_spaces : {false, true})
@@ -417,39 +542,34 @@ void registerInputFormatTemplate(FormatFactory & factory)
                 IRowInputFormat::Params params,
                 const FormatSettings & settings)
         {
-            ParsedTemplateFormatString resultset_format;
-            if (settings.template_settings.resultset_format.empty())
+            auto idx_getter = [&](const String & colName) -> std::optional<size_t>
             {
-                /// Default format string: "${data}"
-                resultset_format.delimiters.resize(2);
-                resultset_format.escaping_rules.emplace_back(ParsedTemplateFormatString::EscapingRule::None);
-                resultset_format.format_idx_to_column_idx.emplace_back(0);
-                resultset_format.column_names.emplace_back("data");
-            }
-            else
-            {
-                /// Read format string from file
-                resultset_format = ParsedTemplateFormatString(
-                        FormatSchemaInfo(settings.template_settings.resultset_format, "Template", false,
-                                         settings.schema.is_server, settings.schema.format_schema_path),
-                        [&](const String & partName) -> std::optional<size_t>
-                        {
-                            if (partName == "data")
-                                return 0;
-                            throw Exception("Unknown input part " + partName,
-                                            ErrorCodes::SYNTAX_ERROR);
-                        });
-            }
+                return sample.getPositionByName(colName);
+            };
 
-            ParsedTemplateFormatString row_format = ParsedTemplateFormatString(
-                    FormatSchemaInfo(settings.template_settings.row_format, "Template", false,
-                                     settings.schema.is_server, settings.schema.format_schema_path),
-                    [&](const String & colName) -> std::optional<size_t>
-                    {
-                        return sample.getPositionByName(colName);
-                    });
+            return std::make_shared<TemplateRowInputFormat>(
+                sample,
+                buf,
+                params,
+                settings,
+                ignore_spaces,
+                fillResultSetFormat(settings),
+                fillRowFormat(settings, idx_getter, true),
+                settings.template_settings.row_between_delimiter);
+        });
+    }
+}
 
-            return std::make_shared<TemplateRowInputFormat>(sample, buf, params, settings, ignore_spaces, resultset_format, row_format, settings.template_settings.row_between_delimiter);
+void registerTemplateSchemaReader(FormatFactory & factory)
+{
+    for (bool ignore_spaces : {false, true})
+    {
+        factory.registerSchemaReader(ignore_spaces ? "TemplateIgnoreSpaces" : "Template", [ignore_spaces](ReadBuffer & buf, const FormatSettings & settings, ContextPtr context)
+        {
+            size_t index = 0;
+            auto idx_getter = [&](const String &) -> std::optional<size_t> { return index++; };
+            auto row_format = fillRowFormat(settings, idx_getter, false);
+            return std::make_shared<TemplateSchemaReader>(buf, ignore_spaces, fillResultSetFormat(settings), row_format, settings.template_settings.row_between_delimiter, settings, context);
         });
     }
 }
diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.h b/src/Processors/Formats/Impl/TemplateRowInputFormat.h
index 61cd97413bf..755ad6cb39b 100644
--- a/src/Processors/Formats/Impl/TemplateRowInputFormat.h
+++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.h
@@ -2,15 +2,19 @@
 
 #include <Core/Block.h>
 #include <Processors/Formats/RowInputFormatWithDiagnosticInfo.h>
+#include <Processors/Formats/ISchemaReader.h>
 #include <Formats/FormatSettings.h>
 #include <Formats/ParsedTemplateFormatString.h>
 #include <IO/ReadHelpers.h>
 #include <IO/PeekableReadBuffer.h>
+#include <Interpreters/Context.h>
 
 
 namespace DB
 {
 
+class TemplateFormatReader;
+
 class TemplateRowInputFormat : public RowInputFormatWithDiagnosticInfo
 {
     using EscapingRule = FormatSettings::EscapingRule;
@@ -40,14 +44,6 @@ private:
     bool deserializeField(const DataTypePtr & type,
         const SerializationPtr & serialization, IColumn & column, size_t file_column);
 
-    void skipField(EscapingRule escaping_rule);
-    inline void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf); }
-
-    template <typename ReturnType = void>
-    ReturnType tryReadPrefixOrSuffix(size_t & input_part_beg, size_t input_part_end);
-    bool checkForSuffix();
-    [[noreturn]] void throwUnexpectedEof();
-
     bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override;
     void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) override;
 
@@ -63,12 +59,76 @@ private:
     const ParsedTemplateFormatString format;
     const ParsedTemplateFormatString row_format;
 
-    size_t format_data_idx;
     bool end_of_stream = false;
     std::vector<size_t> always_default_columns;
     const char default_csv_delimiter;
 
     const std::string row_between_delimiter;
+
+    std::unique_ptr<TemplateFormatReader> format_reader;
+};
+
+class TemplateFormatReader
+{
+    using EscapingRule = FormatSettings::EscapingRule;
+
+public:
+    TemplateFormatReader(
+        PeekableReadBuffer & buf_,
+        bool ignore_spaces_,
+        const ParsedTemplateFormatString & format_,
+        const ParsedTemplateFormatString & row_format_,
+        std::string row_between_delimiter,
+        const FormatSettings & format_settings_);
+
+    void readPrefix();
+    void skipField(EscapingRule escaping_rule);
+    inline void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf); }
+
+    template <typename ReturnType = void>
+    ReturnType tryReadPrefixOrSuffix(size_t & input_part_beg, size_t input_part_end);
+    bool checkForSuffix();
+
+    void setReadBuffer(PeekableReadBuffer & buf_) { buf = &buf_; }
+
+    void skipDelimiter(size_t index);
+    void skipRowEndDelimiter();
+    void skipRowBetweenDelimiter();
+
+    size_t getFormatDataIdx() const { return format_data_idx; }
+
+private:
+    PeekableReadBuffer * buf;
+    bool ignore_spaces;
+    const ParsedTemplateFormatString & format;
+    const ParsedTemplateFormatString & row_format;
+    const std::string row_between_delimiter;
+    const FormatSettings & format_settings;
+    size_t format_data_idx;
+    size_t row_num;
+};
+
+class TemplateSchemaReader : public IRowSchemaReader
+{
+public:
+    TemplateSchemaReader(ReadBuffer & in_,
+                         bool ignore_spaces_,
+                         const ParsedTemplateFormatString & format_,
+                         const ParsedTemplateFormatString & row_format_,
+                         std::string row_between_delimiter,
+                         const FormatSettings & format_settings_,
+                         ContextPtr context_);
+
+    DataTypes readRowAndGetDataTypes() override;
+
+private:
+    PeekableReadBuffer buf;
+    const ParsedTemplateFormatString format;
+    const ParsedTemplateFormatString row_format;
+    FormatSettings format_settings;
+    ContextPtr context;
+    TemplateFormatReader format_reader;
+    bool first_row = true;
 };
 
 bool parseDelimiterWithDiagnosticInfo(WriteBuffer & out, ReadBuffer & buf, const String & delimiter, const String & description, bool skip_spaces);
diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
index adf6d2e8a25..b58be3f5526 100644
--- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
@@ -5,6 +5,7 @@
 #include <Parsers/TokenIterator.h>
 #include <Processors/Formats/Impl/ValuesBlockInputFormat.h>
 #include <Formats/FormatFactory.h>
+#include <Formats/ReadSchemaUtils.h>
 #include <Core/Block.h>
 #include <base/find_symbols.h>
 #include <Common/typeid_cast.h>
@@ -15,6 +16,7 @@
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeMap.h>
 
+#include <base/logger_useful.h>
 
 namespace DB
 {
@@ -286,6 +288,50 @@ namespace
     }
 }
 
+/// Can be used in fileSegmentationEngine for parallel parsing of Values
+static bool skipToNextRow(PeekableReadBuffer * buf, size_t min_chunk_bytes, int balance)
+{
+    skipWhitespaceIfAny(*buf);
+    if (buf->eof() || *buf->position() == ';')
+        return false;
+    bool quoted = false;
+
+    size_t chunk_begin_buf_count = buf->count();
+    while (!buf->eof() && (balance || buf->count() - chunk_begin_buf_count < min_chunk_bytes))
+    {
+        buf->position() = find_first_symbols<'\\', '\'', ')', '('>(buf->position(), buf->buffer().end());
+        if (buf->position() == buf->buffer().end())
+            continue;
+        if (*buf->position() == '\\')
+        {
+            ++buf->position();
+            if (!buf->eof())
+                ++buf->position();
+        }
+        else if (*buf->position() == '\'')
+        {
+            quoted ^= true;
+            ++buf->position();
+        }
+        else if (*buf->position() == ')')
+        {
+            ++buf->position();
+            if (!quoted)
+                --balance;
+        }
+        else if (*buf->position() == '(')
+        {
+            ++buf->position();
+            if (!quoted)
+                ++balance;
+        }
+    }
+
+    if (!buf->eof() && *buf->position() == ',')
+        ++buf->position();
+    return true;
+}
+
 bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx)
 {
     const Block & header = getPort().getHeader();
@@ -293,7 +339,7 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx
     auto settings = context->getSettingsRef();
 
     /// We need continuous memory containing the expression to use Lexer
-    skipToNextRow(0, 1);
+    skipToNextRow(buf.get(), 0, 1);
     buf->makeContinuousMemoryFromCheckpointToPos();
     buf->rollbackToCheckpoint();
 
@@ -437,50 +483,6 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx
     return true;
 }
 
-/// Can be used in fileSegmentationEngine for parallel parsing of Values
-bool ValuesBlockInputFormat::skipToNextRow(size_t min_chunk_bytes, int balance)
-{
-    skipWhitespaceIfAny(*buf);
-    if (buf->eof() || *buf->position() == ';')
-        return false;
-    bool quoted = false;
-
-    size_t chunk_begin_buf_count = buf->count();
-    while (!buf->eof() && (balance || buf->count() - chunk_begin_buf_count < min_chunk_bytes))
-    {
-        buf->position() = find_first_symbols<'\\', '\'', ')', '('>(buf->position(), buf->buffer().end());
-        if (buf->position() == buf->buffer().end())
-            continue;
-        if (*buf->position() == '\\')
-        {
-            ++buf->position();
-            if (!buf->eof())
-                ++buf->position();
-        }
-        else if (*buf->position() == '\'')
-        {
-            quoted ^= true;
-            ++buf->position();
-        }
-        else if (*buf->position() == ')')
-        {
-            ++buf->position();
-            if (!quoted)
-                --balance;
-        }
-        else if (*buf->position() == '(')
-        {
-            ++buf->position();
-            if (!quoted)
-                ++balance;
-        }
-    }
-
-    if (!buf->eof() && *buf->position() == ',')
-        ++buf->position();
-    return true;
-}
-
 void ValuesBlockInputFormat::assertDelimiterAfterValue(size_t column_idx)
 {
     if (unlikely(!checkDelimiterAfterValue(column_idx)))
@@ -559,6 +561,63 @@ void ValuesBlockInputFormat::setReadBuffer(ReadBuffer & in_)
     IInputFormat::setReadBuffer(*buf);
 }
 
+ValuesSchemaReader::ValuesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, ContextPtr context_)
+    : IRowSchemaReader(buf, format_settings_.max_rows_to_read_for_schema_inference), buf(in_), context(context_)
+{
+}
+
+DataTypes ValuesSchemaReader::readRowAndGetDataTypes()
+{
+    if (first_row)
+    {
+        skipBOMIfExists(buf);
+        first_row = false;
+    }
+
+    skipWhitespaceIfAny(buf);
+    if (buf.eof())
+        return {};
+
+    assertChar('(', buf);
+    PeekableReadBufferCheckpoint checkpoint(buf);
+    skipToNextRow(&buf, 0, 1);
+    buf.makeContinuousMemoryFromCheckpointToPos();
+    buf.rollbackToCheckpoint();
+
+    Tokens tokens(buf.position(), buf.buffer().end());
+    IParser::Pos token_iterator(tokens, context->getSettingsRef().max_parser_depth);
+
+    DataTypes data_types;
+    bool finish = false;
+    while (!finish)
+    {
+        Expected expected;
+        ASTPtr ast;
+
+        bool parsed = parser.parse(token_iterator, ast, expected);
+        /// Consider delimiter after value (',' or ')') as part of expression
+        parsed &= token_iterator->type == TokenType::Comma || token_iterator->type == TokenType::ClosingRoundBracket;
+
+        if (!parsed)
+            throw Exception(ErrorCodes::SYNTAX_ERROR, "Cannot parse expression here: {}, token: {}",
+                            String(buf.position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf.buffer().end() - buf.position())), String(token_iterator.get().begin, token_iterator.get().end));
+
+        std::pair<Field, DataTypePtr> result = evaluateConstantExpression(ast, context);
+        data_types.push_back(generalizeDataType(result.second));
+
+        if (token_iterator->type == TokenType::ClosingRoundBracket)
+            finish = true;
+        ++token_iterator;
+        buf.position() = const_cast<char *>(token_iterator->begin);
+    }
+
+    skipWhitespaceIfAny(buf);
+    if (!buf.eof() && *buf.position() == ',')
+        ++buf.position();
+
+    return data_types;
+}
+
 void registerInputFormatValues(FormatFactory & factory)
 {
     factory.registerInputFormat("Values", [](
@@ -571,4 +630,12 @@ void registerInputFormatValues(FormatFactory & factory)
     });
 }
 
+void registerValuesSchemaReader(FormatFactory & factory)
+{
+    factory.registerSchemaReader("Values", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr context)
+    {
+        return std::make_shared<ValuesSchemaReader>(buf, settings, context);
+    });
+}
+
 }
diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h
index 5bbd4bea5ba..e1521955472 100644
--- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h
+++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h
@@ -7,6 +7,7 @@
 #include <Parsers/ExpressionListParsers.h>
 #include <Processors/Formats/IInputFormat.h>
 #include <Processors/Formats/IRowInputFormat.h>
+#include <Processors/Formats/ISchemaReader.h>
 #include <Processors/Formats/Impl/ConstantExpressionTemplate.h>
 
 namespace DB
@@ -68,8 +69,6 @@ private:
     void readPrefix();
     void readSuffix();
 
-    bool skipToNextRow(size_t min_chunk_bytes = 0, int balance = 0);
-
     std::unique_ptr<PeekableReadBuffer> buf;
 
     const RowInputFormatParams params;
@@ -95,4 +94,18 @@ private:
     BlockMissingValues block_missing_values;
 };
 
+class ValuesSchemaReader : public IRowSchemaReader
+{
+public:
+    ValuesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, ContextPtr context_);
+
+private:
+    DataTypes readRowAndGetDataTypes() override;
+
+    PeekableReadBuffer buf;
+    ContextPtr context;
+    ParserExpression parser;
+    bool first_row = true;
+};
+
 }
diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
index 87fa5ec1c4a..7720b01dc74 100644
--- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
+++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
@@ -1,5 +1,7 @@
 #include <Processors/Formats/RowInputFormatWithNamesAndTypes.h>
+#include <Processors/Formats/ISchemaReader.h>
 #include <DataTypes/DataTypeNothing.h>
+#include <DataTypes/DataTypeFactory.h>
 #include <IO/ReadHelpers.h>
 #include <IO/Operators.h>
 
@@ -9,6 +11,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int INCORRECT_DATA;
+    extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
 }
 
 RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes(
@@ -17,8 +20,13 @@ RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes(
     const Params & params_,
     bool with_names_,
     bool with_types_,
-    const FormatSettings & format_settings_)
-    : RowInputFormatWithDiagnosticInfo(header_, in_, params_), format_settings(format_settings_), with_names(with_names_), with_types(with_types_)
+    const FormatSettings & format_settings_,
+    std::unique_ptr<FormatWithNamesAndTypesReader> format_reader_)
+    : RowInputFormatWithDiagnosticInfo(header_, in_, params_)
+    , format_settings(format_settings_)
+    , with_names(with_names_)
+    , with_types(with_types_)
+    , format_reader(std::move(format_reader_))
 {
     const auto & sample = getPort().getHeader();
     size_t num_columns = sample.columns();
@@ -88,7 +96,7 @@ void RowInputFormatWithNamesAndTypes::readPrefix()
     }
 
     /// Skip prefix before names and types.
-    skipPrefixBeforeHeader();
+    format_reader->skipPrefixBeforeHeader();
 
     /// This is a bit of abstraction leakage, but we need it in parallel parsing:
     /// we check if this InputFormat is working with the "real" beginning of the data.
@@ -97,7 +105,7 @@ void RowInputFormatWithNamesAndTypes::readPrefix()
         if (format_settings.with_names_use_header)
         {
             std::vector<bool> read_columns(data_types.size(), false);
-            auto column_names = readNames();
+            auto column_names = format_reader->readNames();
             for (const auto & name : column_names)
                 addInputColumn(name, read_columns);
 
@@ -110,7 +118,7 @@ void RowInputFormatWithNamesAndTypes::readPrefix()
         else
         {
             setupAllColumnsByTableSchema();
-            skipNames();
+            format_reader->skipNames();
         }
     }
     else if (!column_mapping->is_set)
@@ -119,10 +127,10 @@ void RowInputFormatWithNamesAndTypes::readPrefix()
     if (with_types)
     {
         /// Skip delimiter between names and types.
-        skipRowBetweenDelimiter();
+        format_reader->skipRowBetweenDelimiter();
         if (format_settings.with_types_use_header)
         {
-            auto types = readTypes();
+            auto types = format_reader->readTypes();
             if (types.size() != column_mapping->column_indexes_for_input_fields.size())
                 throw Exception(
                     ErrorCodes::INCORRECT_DATA,
@@ -143,7 +151,7 @@ void RowInputFormatWithNamesAndTypes::readPrefix()
             }
         }
         else
-            skipTypes();
+            format_reader->skipTypes();
     }
 }
 
@@ -161,7 +169,7 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE
     if (unlikely(end_of_stream))
         return false;
 
-    if (unlikely(checkForSuffix()))
+    if (unlikely(format_reader->checkForSuffix()))
     {
         end_of_stream = true;
         return false;
@@ -170,9 +178,9 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE
     updateDiagnosticInfo();
 
     if (likely(row_num != 1 || (getCurrentUnitNumber() == 0 && (with_names || with_types))))
-        skipRowBetweenDelimiter();
+        format_reader->skipRowBetweenDelimiter();
 
-    skipRowStartDelimiter();
+    format_reader->skipRowStartDelimiter();
 
     ext.read_columns.resize(data_types.size());
     for (size_t file_column = 0; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column)
@@ -180,20 +188,20 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE
         const auto & column_index = column_mapping->column_indexes_for_input_fields[file_column];
         const bool is_last_file_column = file_column + 1 == column_mapping->column_indexes_for_input_fields.size();
         if (column_index)
-            ext.read_columns[*column_index] = readField(
+            ext.read_columns[*column_index] = format_reader->readField(
                 *columns[*column_index],
                 data_types[*column_index],
                 serializations[*column_index],
                 is_last_file_column,
                 column_mapping->names_of_columns[file_column]);
         else
-            skipField(file_column);
+            format_reader->skipField(file_column);
 
         if (!is_last_file_column)
-            skipFieldDelimiter();
+            format_reader->skipFieldDelimiter();
     }
 
-    skipRowEndDelimiter();
+    format_reader->skipRowEndDelimiter();
 
     insertDefaultsForNotSeenColumns(columns, ext);
 
@@ -218,13 +226,13 @@ void RowInputFormatWithNamesAndTypes::tryDeserializeField(const DataTypePtr & ty
     const auto & index = column_mapping->column_indexes_for_input_fields[file_column];
     if (index)
     {
-        checkNullValueForNonNullable(type);
+        format_reader->checkNullValueForNonNullable(type);
         const bool is_last_file_column = file_column + 1 == column_mapping->column_indexes_for_input_fields.size();
-        readField(column, type, serializations[*index], is_last_file_column, column_mapping->names_of_columns[file_column]);
+        format_reader->readField(column, type, serializations[*index], is_last_file_column, column_mapping->names_of_columns[file_column]);
     }
     else
     {
-        skipField(file_column);
+        format_reader->skipField(file_column);
     }
 }
 
@@ -236,13 +244,13 @@ bool RowInputFormatWithNamesAndTypes::parseRowAndPrintDiagnosticInfo(MutableColu
         return false;
     }
 
-    if (!tryParseSuffixWithDiagnosticInfo(out))
+    if (!format_reader->tryParseSuffixWithDiagnosticInfo(out))
         return false;
 
-    if (likely(row_num != 1) && !parseRowBetweenDelimiterWithDiagnosticInfo(out))
+    if (likely(row_num != 1) && !format_reader->parseRowBetweenDelimiterWithDiagnosticInfo(out))
         return false;
 
-    if (!parseRowStartWithDiagnosticInfo(out))
+    if (!format_reader->parseRowStartWithDiagnosticInfo(out))
         return false;
 
     for (size_t file_column = 0; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column)
@@ -266,22 +274,68 @@ bool RowInputFormatWithNamesAndTypes::parseRowAndPrintDiagnosticInfo(MutableColu
         /// Delimiters
         if (file_column + 1 != column_mapping->column_indexes_for_input_fields.size())
         {
-            if (!parseFieldDelimiterWithDiagnosticInfo(out))
+            if (!format_reader->parseFieldDelimiterWithDiagnosticInfo(out))
                 return false;
         }
     }
 
-    return parseRowEndWithDiagnosticInfo(out);
+    return format_reader->parseRowEndWithDiagnosticInfo(out);
 }
 
-
-void registerFileSegmentationEngineForFormatWithNamesAndTypes(
-    FormatFactory & factory, const String & base_format_name, FormatFactory::FileSegmentationEngine segmentation_engine)
+bool RowInputFormatWithNamesAndTypes::isGarbageAfterField(size_t index, ReadBuffer::Position pos)
 {
-    factory.registerFileSegmentationEngine(base_format_name, segmentation_engine);
-    factory.registerFileSegmentationEngine(base_format_name + "WithNames", segmentation_engine);
-    factory.registerFileSegmentationEngine(base_format_name + "WithNamesAndTypes", segmentation_engine);
+    return format_reader->isGarbageAfterField(index, pos);
 }
 
+void RowInputFormatWithNamesAndTypes::setReadBuffer(ReadBuffer & in_)
+{
+    format_reader->setReadBuffer(in_);
+    IInputFormat::setReadBuffer(in_);
+}
+
+FormatWithNamesAndTypesSchemaReader::FormatWithNamesAndTypesSchemaReader(
+    ReadBuffer & in_,
+    size_t max_rows_to_read_,
+    bool with_names_,
+    bool with_types_,
+    FormatWithNamesAndTypesReader * format_reader_,
+    DataTypePtr default_type_)
+    : IRowSchemaReader(in_, max_rows_to_read_, default_type_), with_names(with_names_), with_types(with_types_), format_reader(format_reader_)
+{
+}
+
+NamesAndTypesList FormatWithNamesAndTypesSchemaReader::readSchema()
+{
+    if (with_names || with_types)
+        skipBOMIfExists(in);
+
+    format_reader->skipPrefixBeforeHeader();
+
+    Names names;
+    if (with_names)
+        names = format_reader->readNames();
+
+    if (with_types)
+    {
+        format_reader->skipRowBetweenDelimiter();
+        std::vector<String> data_type_names = format_reader->readTypes();
+        if (data_type_names.size() != names.size())
+            throw Exception(
+                ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                "The number of column names {} differs with the number of types {}", names.size(), data_type_names.size());
+
+        NamesAndTypesList result;
+        for (size_t i = 0; i != data_type_names.size(); ++i)
+            result.emplace_back(names[i], DataTypeFactory::instance().get(data_type_names[i]));
+        return result;
+    }
+
+    if (!names.empty())
+        setColumnNames(names);
+
+    /// We should determine types by reading rows with data. Use the implementation from IRowSchemaReader.
+    return IRowSchemaReader::readSchema();
+}
 
 }
+
diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
index cd7cd34d7e6..25ffc8d6de2 100644
--- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
+++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
@@ -1,12 +1,15 @@
 #pragma once
 
 #include <Processors/Formats/RowInputFormatWithDiagnosticInfo.h>
+#include <Processors/Formats/ISchemaReader.h>
 #include <Formats/FormatSettings.h>
 #include <Formats/FormatFactory.h>
 
 namespace DB
 {
 
+class FormatWithNamesAndTypesReader;
+
 /// Base class for input formats with -WithNames and -WithNamesAndTypes suffixes.
 /// It accepts 2 parameters in constructor - with_names and with_types and implements
 /// input format depending on them:
@@ -20,7 +23,7 @@ namespace DB
 /// then reads/skips types. So you can this invariant.
 class RowInputFormatWithNamesAndTypes : public RowInputFormatWithDiagnosticInfo
 {
-public:
+protected:
     /** with_names - in the first line the header with column names
       * with_types - in the second line the header with column names
       */
@@ -28,44 +31,14 @@ public:
         const Block & header_,
         ReadBuffer & in_,
         const Params & params_,
-        bool with_names_, bool with_types_, const FormatSettings & format_settings_);
+        bool with_names_,
+        bool with_types_,
+        const FormatSettings & format_settings_,
+        std::unique_ptr<FormatWithNamesAndTypesReader> format_reader_);
 
     void resetParser() override;
-
-protected:
-    /// Read single field from input. Return false if there was no real value and we inserted default value.
-    virtual bool readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column, const String & column_name) = 0;
-
-    /// Skip single field, it's used to skip unknown columns.
-    virtual void skipField(size_t file_column) = 0;
-    /// Skip the whole row with names.
-    virtual void skipNames() = 0;
-    /// Skip the whole row with types.
-    virtual void skipTypes() = 0;
-
-    /// Skip delimiters, if any.
-    virtual void skipPrefixBeforeHeader() {}
-    virtual void skipRowStartDelimiter() {}
-    virtual void skipFieldDelimiter() {}
-    virtual void skipRowEndDelimiter() {}
-    virtual void skipRowBetweenDelimiter() {}
-
-    /// Check suffix.
-    virtual bool checkForSuffix() { return in->eof(); }
-
-    /// Methods for parsing with diagnostic info.
-    virtual void checkNullValueForNonNullable(DataTypePtr) {}
-    virtual bool parseRowStartWithDiagnosticInfo(WriteBuffer &) { return true; }
-    virtual bool parseFieldDelimiterWithDiagnosticInfo(WriteBuffer &) { return true; }
-    virtual bool parseRowEndWithDiagnosticInfo(WriteBuffer &) { return true;}
-    virtual bool parseRowBetweenDelimiterWithDiagnosticInfo(WriteBuffer &) { return true;}
-    virtual bool tryParseSuffixWithDiagnosticInfo(WriteBuffer &) { return true; }
-    bool isGarbageAfterField(size_t, ReadBuffer::Position) override {return false; }
-
-    /// Read row with names and return the list of them.
-    virtual std::vector<String> readNames() = 0;
-    /// Read row with types and return the list of them.
-    virtual std::vector<String> readTypes() = 0;
+    bool isGarbageAfterField(size_t index, ReadBuffer::Position pos) override;
+    void setReadBuffer(ReadBuffer & in_) override;
 
     const FormatSettings format_settings;
     DataTypes data_types;
@@ -84,10 +57,90 @@ private:
 
     bool with_names;
     bool with_types;
+    std::unique_ptr<FormatWithNamesAndTypesReader> format_reader;
     std::unordered_map<String, size_t> column_indexes_by_names;
 };
 
-void registerFileSegmentationEngineForFormatWithNamesAndTypes(
-    FormatFactory & factory, const String & base_format_name, FormatFactory::FileSegmentationEngine segmentation_engine);
+/// Base class for parsing data in input formats with -WithNames and -WithNamesAndTypes suffixes.
+/// Used for reading/skipping names/types/delimiters in specific format.
+class FormatWithNamesAndTypesReader
+{
+public:
+    explicit FormatWithNamesAndTypesReader(ReadBuffer & in_, const FormatSettings & format_settings_) : in(&in_), format_settings(format_settings_) {}
+
+    /// Read single field from input. Return false if there was no real value and we inserted default value.
+    virtual bool readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column, const String & column_name) = 0;
+
+    /// Methods for parsing with diagnostic info.
+    virtual void checkNullValueForNonNullable(DataTypePtr) {}
+    virtual bool parseRowStartWithDiagnosticInfo(WriteBuffer &) { return true; }
+    virtual bool parseFieldDelimiterWithDiagnosticInfo(WriteBuffer &) { return true; }
+    virtual bool parseRowEndWithDiagnosticInfo(WriteBuffer &) { return true;}
+    virtual bool parseRowBetweenDelimiterWithDiagnosticInfo(WriteBuffer &) { return true;}
+    virtual bool tryParseSuffixWithDiagnosticInfo(WriteBuffer &) { return true; }
+    virtual bool isGarbageAfterField(size_t, ReadBuffer::Position) { return false; }
+
+    /// Read row with names and return the list of them.
+    virtual std::vector<String> readNames() = 0;
+    /// Read row with types and return the list of them.
+    virtual std::vector<String> readTypes() = 0;
+
+    /// Skip single field, it's used to skip unknown columns.
+    virtual void skipField(size_t file_column) = 0;
+    /// Skip the whole row with names.
+    virtual void skipNames() = 0;
+    /// Skip the whole row with types.
+    virtual void skipTypes() = 0;
+
+    /// Skip delimiters, if any.
+    virtual void skipPrefixBeforeHeader() {}
+    virtual void skipRowStartDelimiter() {}
+    virtual void skipFieldDelimiter() {}
+    virtual void skipRowEndDelimiter() {}
+    virtual void skipRowBetweenDelimiter() {}
+
+    /// Check suffix.
+    virtual bool checkForSuffix() { return in->eof(); }
+
+    const FormatSettings & getFormatSettings() const { return format_settings; }
+
+    virtual void setReadBuffer(ReadBuffer & in_) { in = &in_; }
+
+    virtual ~FormatWithNamesAndTypesReader() = default;
+
+protected:
+    ReadBuffer * in;
+    const FormatSettings format_settings;
+};
+
+/// Base class for schema inference for formats with -WithNames and -WithNamesAndTypes suffixes.
+/// For formats with -WithNamesAndTypes suffix the schema will be determined by first two rows.
+/// For formats with -WithNames suffix the names of columns will be determined by the first row
+/// and types of columns by the rows with data.
+/// For formats without suffixes default column names will be used
+/// and types will be determined by the rows with data.
+class FormatWithNamesAndTypesSchemaReader : public IRowSchemaReader
+{
+public:
+    FormatWithNamesAndTypesSchemaReader(
+        ReadBuffer & in,
+        size_t max_rows_to_read_,
+        bool with_names_,
+        bool with_types_,
+        FormatWithNamesAndTypesReader * format_reader_,
+        DataTypePtr default_type_ = nullptr);
+
+    NamesAndTypesList readSchema() override;
+
+protected:
+    virtual DataTypes readRowAndGetDataTypes() override = 0;
+
+    bool with_names;
+    bool with_types;
+
+private:
+    FormatWithNamesAndTypesReader * format_reader;
+};
 
 }
+
diff --git a/src/Storages/ExternalDataSourceConfiguration.h b/src/Storages/ExternalDataSourceConfiguration.h
index 502f8b800e3..f33fd938092 100644
--- a/src/Storages/ExternalDataSourceConfiguration.h
+++ b/src/Storages/ExternalDataSourceConfiguration.h
@@ -88,7 +88,7 @@ struct URLBasedDataSourceConfiguration
     String url;
     String format;
     String compression_method = "auto";
-    String structure;
+    String structure = "auto";
 
     std::vector<std::pair<String, Field>> headers;
     String http_method;
diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index 2105228abf6..f22f6f66ced 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -14,7 +14,6 @@
 #include <Processors/Formats/IInputFormat.h>
 #include <Processors/Transforms/AddingDefaultsTransform.h>
 
-#include <IO/WriteHelpers.h>
 #include <IO/ReadHelpers.h>
 
 #include <Interpreters/Context.h>
@@ -29,6 +28,8 @@
 #include <Storages/HDFS/WriteBufferFromHDFS.h>
 #include <Storages/PartitionedSink.h>
 
+
+#include <Formats/ReadSchemaUtils.h>
 #include <Formats/FormatFactory.h>
 #include <Functions/FunctionsConversion.h>
 
@@ -51,10 +52,70 @@ namespace ErrorCodes
 {
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
     extern const int ACCESS_DENIED;
+    extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
 }
+namespace
+{
+    /* Recursive directory listing with matched paths as a result.
+     * Have the same method in StorageFile.
+     */
+    Strings LSWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, const String & for_match)
+    {
+        const size_t first_glob = for_match.find_first_of("*?{");
 
-static Strings listFilesWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, const String & for_match);
+        const size_t end_of_path_without_globs = for_match.substr(0, first_glob).rfind('/');
+        const String suffix_with_globs = for_match.substr(end_of_path_without_globs);   /// begin with '/'
+        const String prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); /// ends with '/'
 
+        const size_t next_slash = suffix_with_globs.find('/', 1);
+        re2::RE2 matcher(makeRegexpPatternFromGlobs(suffix_with_globs.substr(0, next_slash)));
+
+        HDFSFileInfo ls;
+        ls.file_info = hdfsListDirectory(fs.get(), prefix_without_globs.data(), &ls.length);
+        Strings result;
+        for (int i = 0; i < ls.length; ++i)
+        {
+            const String full_path = String(ls.file_info[i].mName);
+            const size_t last_slash = full_path.rfind('/');
+            const String file_name = full_path.substr(last_slash);
+            const bool looking_for_directory = next_slash != std::string::npos;
+            const bool is_directory = ls.file_info[i].mKind == 'D';
+            /// Condition with type of current file_info means what kind of path is it in current iteration of ls
+            if (!is_directory && !looking_for_directory)
+            {
+                if (re2::RE2::FullMatch(file_name, matcher))
+                {
+                    result.push_back(String(ls.file_info[i].mName));
+                }
+            }
+            else if (is_directory && looking_for_directory)
+            {
+                if (re2::RE2::FullMatch(file_name, matcher))
+                {
+                    Strings result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs, suffix_with_globs.substr(next_slash));
+                    /// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check.
+                    std::move(result_part.begin(), result_part.end(), std::back_inserter(result));
+                }
+            }
+        }
+
+        return result;
+    }
+
+    std::pair<String, String> getPathFromUriAndUriWithoutPath(const String & uri)
+    {
+        const size_t begin_of_path = uri.find('/', uri.find("//") + 2);
+        return {uri.substr(begin_of_path), uri.substr(0, begin_of_path)};
+    }
+
+    std::vector<String> getPathsList(const String & path_from_uri, const String & uri_without_path, ContextPtr context)
+    {
+        HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef());
+        HDFSFSPtr fs = createHDFSFS(builder.get());
+
+        return LSWithRegexpMatching("/", fs, path_from_uri);
+    }
+}
 
 StorageHDFS::StorageHDFS(
     const String & uri_,
@@ -79,25 +140,52 @@ StorageHDFS::StorageHDFS(
     checkHDFSURL(uri);
 
     StorageInMemoryMetadata storage_metadata;
-    storage_metadata.setColumns(columns_);
+
+    if (columns_.empty())
+    {
+        auto columns = getTableStructureFromData(format_name, uri, compression_method, context_);
+        storage_metadata.setColumns(columns);
+    }
+    else
+        storage_metadata.setColumns(columns_);
+
     storage_metadata.setConstraints(constraints_);
     storage_metadata.setComment(comment);
     setInMemoryMetadata(storage_metadata);
 }
 
+ColumnsDescription StorageHDFS::getTableStructureFromData(
+    const String & format,
+    const String & uri,
+    const String & compression_method,
+    ContextPtr ctx)
+{
+    auto read_buffer_creator = [&]()
+    {
+        const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri);
+        auto paths = getPathsList(path_from_uri, uri, ctx);
+        if (paths.empty())
+            throw Exception(
+                ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                "Cannot extract table structure from {} format file, because there are no files in HDFS with provided path. You must "
+                "specify table structure manually",
+                format);
+
+        auto compression = chooseCompressionMethod(paths[0], compression_method);
+        return wrapReadBufferWithCompressionMethod(
+            std::make_unique<ReadBufferFromHDFS>(uri_without_path, paths[0], ctx->getGlobalContext()->getConfigRef()), compression);
+    };
+
+    return readSchemaFromFormat(format, std::nullopt, read_buffer_creator, ctx);
+}
+
 class HDFSSource::DisclosedGlobIterator::Impl
 {
 public:
     Impl(ContextPtr context_, const String & uri)
     {
-        const size_t begin_of_path = uri.find('/', uri.find("//") + 2);
-        const String path_from_uri = uri.substr(begin_of_path);
-        const String uri_without_path = uri.substr(0, begin_of_path); /// ends without '/'
-
-        HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context_->getGlobalContext()->getConfigRef());
-        HDFSFSPtr fs = createHDFSFS(builder.get());
-
-        uris = listFilesWithRegexpMatching("/", fs, path_from_uri);
+        const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri);
+        uris = getPathsList(path_from_uri, uri_without_path, context_);
         for (auto & elem : uris)
             elem = uri_without_path + elem;
         uris_iter = uris.begin();
@@ -339,51 +427,6 @@ private:
 };
 
 
-/* Recursive directory listing with matched paths as a result.
- * Have the same method in StorageFile.
- */
-Strings listFilesWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, const String & for_match)
-{
-    const size_t first_glob = for_match.find_first_of("*?{");
-
-    const size_t end_of_path_without_globs = for_match.substr(0, first_glob).rfind('/');
-    const String suffix_with_globs = for_match.substr(end_of_path_without_globs);   /// begin with '/'
-    const String prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); /// ends with '/'
-
-    const size_t next_slash = suffix_with_globs.find('/', 1);
-    re2::RE2 matcher(makeRegexpPatternFromGlobs(suffix_with_globs.substr(0, next_slash)));
-
-    HDFSFileInfo ls;
-    ls.file_info = hdfsListDirectory(fs.get(), prefix_without_globs.data(), &ls.length);
-    Strings result;
-    for (int i = 0; i < ls.length; ++i)
-    {
-        const String full_path = String(ls.file_info[i].mName);
-        const size_t last_slash = full_path.rfind('/');
-        const String file_name = full_path.substr(last_slash);
-        const bool looking_for_directory = next_slash != std::string::npos;
-        const bool is_directory = ls.file_info[i].mKind == 'D';
-        /// Condition with type of current file_info means what kind of path is it in current iteration of ls
-        if (!is_directory && !looking_for_directory)
-        {
-            if (re2::RE2::FullMatch(file_name, matcher))
-            {
-                result.push_back(String(ls.file_info[i].mName));
-            }
-        }
-        else if (is_directory && looking_for_directory)
-        {
-            if (re2::RE2::FullMatch(file_name, matcher))
-            {
-                Strings result_part = listFilesWithRegexpMatching(fs::path(full_path) / "", fs, suffix_with_globs.substr(next_slash));
-                /// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check.
-                std::move(result_part.begin(), result_part.end(), std::back_inserter(result));
-            }
-        }
-    }
-    return result;
-}
-
 bool StorageHDFS::isColumnOriented() const
 {
     return format_name != "Distributed" && FormatFactory::instance().checkIfFormatIsColumnOriented(format_name);
@@ -400,6 +443,7 @@ Pipe StorageHDFS::read(
 {
     bool need_path_column = false;
     bool need_file_column = false;
+
     for (const auto & column : column_names)
     {
         if (column == "_path")
@@ -528,6 +572,7 @@ void registerStorageHDFS(StorageFactory & factory)
     },
     {
         .supports_sort_order = true, // for partition by
+        .supports_schema_inference = true,
         .source_access_type = AccessType::HDFS,
     });
 }
diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h
index 3e2f7a43127..9e845d8fd74 100644
--- a/src/Storages/HDFS/StorageHDFS.h
+++ b/src/Storages/HDFS/StorageHDFS.h
@@ -31,7 +31,7 @@ public:
         size_t max_block_size,
         unsigned num_streams) override;
 
-    SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override;
+    SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr /*context*/) override;
 
     void truncate(
         const ASTPtr & query,
@@ -49,6 +49,12 @@ public:
     /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV.
     bool isColumnOriented() const;
 
+    static ColumnsDescription getTableStructureFromData(
+        const String & format,
+        const String & uri,
+        const String & compression_method,
+        ContextPtr ctx);
+
 protected:
     friend class HDFSSource;
     StorageHDFS(
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index bc4a3659ae0..d3093643c31 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -225,7 +225,6 @@ MergeTreeData::MergeTreeData(
     {
         try
         {
-
             checkPartitionKeyAndInitMinMax(metadata_.partition_key);
             setProperties(metadata_, metadata_, attach);
             if (minmax_idx_date_column_pos == -1)
diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index cb52c8b86c0..fc3eff7459b 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -35,6 +35,7 @@ namespace ErrorCodes
     extern const int NO_ELEMENTS_IN_CONFIG;
     extern const int UNKNOWN_STORAGE;
     extern const int NO_REPLICA_NAME_GIVEN;
+    extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
 }
 
 
@@ -258,6 +259,34 @@ If you use the Replicated version of engines, see https://clickhouse.com/docs/en
     return help;
 }
 
+static ColumnsDescription getColumnsDescriptionFromZookeeper(const String & raw_zookeeper_path, ContextMutablePtr context)
+{
+    String zookeeper_name = zkutil::extractZooKeeperName(raw_zookeeper_path);
+    String zookeeper_path = zkutil::extractZooKeeperPath(raw_zookeeper_path, true);
+
+    if (!context->hasZooKeeper() && !context->hasAuxiliaryZooKeeper(zookeeper_name))
+        throw Exception{ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot get replica structure without zookeeper, you must specify the structure manually"};
+
+    zkutil::ZooKeeperPtr zookeeper;
+    try
+    {
+        if (zookeeper_name == StorageReplicatedMergeTree::getDefaultZooKeeperName())
+            zookeeper = context->getZooKeeper();
+        else
+            zookeeper = context->getAuxiliaryZooKeeper(zookeeper_name);
+    }
+    catch (...)
+    {
+        throw Exception{ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot get replica structure from zookeeper, because cannot get zookeeper: {}. You must specify structure manually", getCurrentExceptionMessage(false)};
+    }
+
+    if (!zookeeper->exists(zookeeper_path + "/replicas"))
+        throw Exception{ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot get replica structure, because there no other replicas in zookeeper. You must specify the structure manually"};
+
+    Coordination::Stat columns_stat;
+    return ColumnsDescription::parse(zookeeper->get(fs::path(zookeeper_path) / "columns", &columns_stat));
+}
+
 
 static StoragePtr create(const StorageFactory::Arguments & args)
 {
@@ -638,7 +667,14 @@ static StoragePtr create(const StorageFactory::Arguments & args)
     String date_column_name;
 
     StorageInMemoryMetadata metadata;
-    metadata.setColumns(args.columns);
+
+    ColumnsDescription columns;
+    if (args.columns.empty() && replicated)
+        columns = getColumnsDescriptionFromZookeeper(zookeeper_path, args.getContext());
+    else
+        columns = args.columns;
+
+    metadata.setColumns(columns);
     metadata.setComment(args.comment);
 
     std::unique_ptr<MergeTreeSettings> storage_settings;
@@ -705,12 +741,12 @@ static StoragePtr create(const StorageFactory::Arguments & args)
 
         if (args.query.columns_list && args.query.columns_list->indices)
             for (auto & index : args.query.columns_list->indices->children)
-                metadata.secondary_indices.push_back(IndexDescription::getIndexFromAST(index, args.columns, args.getContext()));
+                metadata.secondary_indices.push_back(IndexDescription::getIndexFromAST(index, columns, args.getContext()));
 
         if (args.query.columns_list && args.query.columns_list->projections)
             for (auto & projection_ast : args.query.columns_list->projections->children)
             {
-                auto projection = ProjectionDescription::getProjectionFromAST(projection_ast, args.columns, args.getContext());
+                auto projection = ProjectionDescription::getProjectionFromAST(projection_ast, columns, args.getContext());
                 metadata.projections.add(std::move(projection));
             }
 
@@ -720,10 +756,10 @@ static StoragePtr create(const StorageFactory::Arguments & args)
                 constraints.push_back(constraint);
         metadata.constraints = ConstraintsDescription(constraints);
 
-        auto column_ttl_asts = args.columns.getColumnTTLs();
+        auto column_ttl_asts = columns.getColumnTTLs();
         for (const auto & [name, ast] : column_ttl_asts)
         {
-            auto new_ttl_entry = TTLDescription::getTTLFromAST(ast, args.columns, args.getContext(), metadata.primary_key);
+            auto new_ttl_entry = TTLDescription::getTTLFromAST(ast, columns, args.getContext(), metadata.primary_key);
             metadata.column_ttls_by_name[name] = new_ttl_entry;
         }
 
@@ -850,6 +886,7 @@ void registerStorageMergeTree(StorageFactory & factory)
 
     features.supports_replication = true;
     features.supports_deduplication = true;
+    features.supports_schema_inference = true;
 
     factory.registerStorage("ReplicatedMergeTree", create, features);
     factory.registerStorage("ReplicatedCollapsingMergeTree", create, features);
diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp
index 87a8ea2315d..0cc401aa93c 100644
--- a/src/Storages/StorageBuffer.cpp
+++ b/src/Storages/StorageBuffer.cpp
@@ -126,7 +126,13 @@ StorageBuffer::StorageBuffer(
     , bg_pool(getContext()->getBufferFlushSchedulePool())
 {
     StorageInMemoryMetadata storage_metadata;
-    storage_metadata.setColumns(columns_);
+    if (columns_.empty())
+    {
+        auto dest_table = DatabaseCatalog::instance().getTable(destination_id, context_);
+        storage_metadata.setColumns(dest_table->getInMemoryMetadataPtr()->getColumns());
+    }
+    else
+        storage_metadata.setColumns(columns_);
     storage_metadata.setConstraints(constraints_);
     storage_metadata.setComment(comment);
     setInMemoryMetadata(storage_metadata);
@@ -1167,6 +1173,7 @@ void registerStorageBuffer(StorageFactory & factory)
     },
     {
         .supports_parallel_insert = true,
+        .supports_schema_inference = true,
     });
 }
 
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index ddf363e3957..19869b77106 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -13,6 +13,7 @@
 #include <Storages/Distributed/DistributedSink.h>
 #include <Storages/StorageFactory.h>
 #include <Storages/AlterCommands.h>
+#include <Storages/getStructureOfRemoteTable.h>
 
 #include <Columns/ColumnConst.h>
 
@@ -24,7 +25,6 @@
 #include <Common/randomSeed.h>
 #include <Common/formatReadable.h>
 
-#include <Parsers/ASTDropQuery.h>
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIdentifier.h>
@@ -42,7 +42,6 @@
 #include <Interpreters/ClusterProxy/executeQuery.h>
 #include <Interpreters/Cluster.h>
 #include <Interpreters/ExpressionAnalyzer.h>
-#include <Interpreters/InterpreterAlterQuery.h>
 #include <Interpreters/InterpreterDescribeQuery.h>
 #include <Interpreters/InterpreterSelectQuery.h>
 #include <Interpreters/JoinedTables.h>
@@ -63,7 +62,6 @@
 #include <Processors/Sources/RemoteSource.h>
 #include <Processors/Sinks/EmptySink.h>
 
-#include <Core/Field.h>
 #include <Core/Settings.h>
 
 #include <IO/ReadHelpers.h>
@@ -71,8 +69,6 @@
 #include <IO/Operators.h>
 #include <IO/ConnectionTimeoutsContext.h>
 
-#include <Poco/DirectoryIterator.h>
-
 #include <memory>
 #include <filesystem>
 #include <optional>
@@ -329,7 +325,16 @@ StorageDistributed::StorageDistributed(
     , rng(randomSeed())
 {
     StorageInMemoryMetadata storage_metadata;
-    storage_metadata.setColumns(columns_);
+    if (columns_.empty())
+    {
+        StorageID id = StorageID::createEmpty();
+        id.table_name = remote_table;
+        id.database_name = remote_database;
+        storage_metadata.setColumns(getStructureOfRemoteTable(*getCluster(), id, getContext(), remote_table_function_ptr));
+    }
+    else
+        storage_metadata.setColumns(columns_);
+
     storage_metadata.setConstraints(constraints_);
     storage_metadata.setComment(comment);
     setInMemoryMetadata(storage_metadata);
@@ -1398,6 +1403,7 @@ void registerStorageDistributed(StorageFactory & factory)
     {
         .supports_settings = true,
         .supports_parallel_insert = true,
+        .supports_schema_inference = true,
         .source_access_type = AccessType::REMOTE,
     });
 }
diff --git a/src/Storages/StorageFactory.h b/src/Storages/StorageFactory.h
index 20db1a44897..6ffa6327176 100644
--- a/src/Storages/StorageFactory.h
+++ b/src/Storages/StorageFactory.h
@@ -66,6 +66,7 @@ public:
         bool supports_deduplication = false;
         /// See also IStorage::supportsParallelInsert()
         bool supports_parallel_insert = false;
+        bool supports_schema_inference = false;
         AccessType source_access_type = AccessType::NONE;
     };
 
@@ -98,6 +99,7 @@ public:
         .supports_replication = false,
         .supports_deduplication = false,
         .supports_parallel_insert = false,
+        .supports_schema_inference = false,
         .source_access_type = AccessType::NONE,
     });
 
@@ -126,6 +128,12 @@ public:
 
     AccessType getSourceAccessType(const String & table_engine) const;
 
+    bool checkIfStorageSupportsSchemaInterface(const String & storage_name)
+    {
+        if (storages.contains(storage_name))
+            return storages[storage_name].features.supports_schema_inference;
+        return false;
+    }
 private:
     Storages storages;
 };
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 13a70af2ada..a479f982c70 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -15,8 +15,9 @@
 #include <IO/WriteBufferFromFile.h>
 #include <IO/WriteHelpers.h>
 
-#include <Formats/FormatFactory.h>
 #include <DataTypes/DataTypeString.h>
+#include <Formats/FormatFactory.h>
+#include <Formats/ReadSchemaUtils.h>
 #include <Processors/Sinks/SinkToStorage.h>
 #include <Processors/Transforms/AddingDefaultsTransform.h>
 
@@ -38,6 +39,7 @@
 #include <Processors/Sources/SourceWithProgress.h>
 #include <Processors/Formats/IOutputFormat.h>
 #include <Processors/Formats/IInputFormat.h>
+#include <Processors/Formats/ISchemaReader.h>
 #include <Processors/Sources/NullSource.h>
 #include <QueryPipeline/Pipe.h>
 #include <Processors/Executors/PullingPipelineExecutor.h>
@@ -63,6 +65,7 @@ namespace ErrorCodes
     extern const int INCOMPATIBLE_COLUMNS;
     extern const int CANNOT_STAT;
     extern const int LOGICAL_ERROR;
+    extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
 }
 
 namespace
@@ -135,6 +138,56 @@ void checkCreationIsAllowed(ContextPtr context_global, const std::string & db_di
         throw Exception("File must not be a directory", ErrorCodes::INCORRECT_FILE_NAME);
 }
 
+std::unique_ptr<ReadBuffer> createReadBuffer(
+    const String & current_path,
+    bool use_table_fd,
+    const String & storage_name,
+    int table_fd,
+    const String & compression_method,
+    ContextPtr context)
+{
+    std::unique_ptr<ReadBuffer> nested_buffer;
+    CompressionMethod method;
+
+    struct stat file_stat{};
+
+    if (use_table_fd)
+    {
+        /// Check if file descriptor allows random reads (and reading it twice).
+        if (0 != fstat(table_fd, &file_stat))
+            throwFromErrno("Cannot stat table file descriptor, inside " + storage_name, ErrorCodes::CANNOT_STAT);
+
+        if (S_ISREG(file_stat.st_mode))
+            nested_buffer = std::make_unique<ReadBufferFromFileDescriptorPRead>(table_fd);
+        else
+            nested_buffer = std::make_unique<ReadBufferFromFileDescriptor>(table_fd);
+
+        method = chooseCompressionMethod("", compression_method);
+    }
+    else
+    {
+        /// Check if file descriptor allows random reads (and reading it twice).
+        if (0 != stat(current_path.c_str(), &file_stat))
+            throwFromErrno("Cannot stat file " + current_path, ErrorCodes::CANNOT_STAT);
+
+        if (S_ISREG(file_stat.st_mode))
+            nested_buffer = std::make_unique<ReadBufferFromFilePRead>(current_path, context->getSettingsRef().max_read_buffer_size);
+        else
+            nested_buffer = std::make_unique<ReadBufferFromFile>(current_path, context->getSettingsRef().max_read_buffer_size);
+
+        method = chooseCompressionMethod(current_path, compression_method);
+    }
+
+    /// For clickhouse-local add progress callback to display progress bar.
+    if (context->getApplicationType() == Context::ApplicationType::LOCAL)
+    {
+        auto & in = static_cast<ReadBufferFromFileDescriptor &>(*nested_buffer);
+        in.setProgressCallback(context);
+    }
+
+    return wrapReadBufferWithCompressionMethod(std::move(nested_buffer), method);
+}
+
 }
 
 Strings StorageFile::getPathsList(const String & table_path, const String & user_files_path, ContextPtr context, size_t & total_bytes_to_read)
@@ -164,6 +217,42 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user
     return paths;
 }
 
+
+ColumnsDescription StorageFile::getTableStructureFromData(
+    const String & format,
+    const std::vector<String> & paths,
+    const String & compression_method,
+    const std::optional<FormatSettings> & format_settings,
+    ContextPtr context)
+{
+    if (format == "Distributed")
+    {
+        if (paths.empty())
+            throw Exception(
+                "Cannot get table structure from file, because no files match specified name", ErrorCodes::INCORRECT_FILE_NAME);
+
+        auto source = StorageDistributedDirectoryMonitor::createSourceFromFile(paths[0]);
+        return ColumnsDescription(source->getOutputs().front().getHeader().getNamesAndTypesList());
+    }
+
+    auto read_buffer_creator = [&]()
+    {
+        String path;
+        auto it = std::find_if(paths.begin(), paths.end(), [](const String & p){ return std::filesystem::exists(p); });
+        if (it == paths.end())
+            throw Exception(
+                ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                "Cannot extract table structure from {} format file, because there are no files with provided path. You must specify "
+                "table structure manually",
+                format);
+
+        path = *it;
+        return createReadBuffer(path, false, "File", -1, compression_method, context);
+    };
+
+    return readSchemaFromFormat(format, format_settings, read_buffer_creator, context);
+}
+
 bool StorageFile::isColumnOriented() const
 {
     return format_name != "Distributed" && FormatFactory::instance().checkIfFormatIsColumnOriented(format_name);
@@ -182,10 +271,13 @@ StorageFile::StorageFile(int table_fd_, CommonArguments args)
         throw Exception("Using file descriptor as source of storage isn't allowed for server daemons", ErrorCodes::DATABASE_ACCESS_DENIED);
     if (args.format_name == "Distributed")
         throw Exception("Distributed format is allowed only with explicit file path", ErrorCodes::INCORRECT_FILE_NAME);
+    if (args.columns.empty())
+        throw Exception("Automatic schema inference is not allowed when using file descriptor as source of storage", ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE);
 
     is_db_table = false;
     use_table_fd = true;
     table_fd = table_fd_;
+    setStorageMetadata(args);
 }
 
 StorageFile::StorageFile(const std::string & table_path_, const std::string & user_files_path, CommonArguments args)
@@ -194,22 +286,7 @@ StorageFile::StorageFile(const std::string & table_path_, const std::string & us
     is_db_table = false;
     paths = getPathsList(table_path_, user_files_path, args.getContext(), total_bytes_to_read);
     path_for_partitioned_write = table_path_;
-
-    if (args.format_name == "Distributed")
-    {
-        if (paths.empty())
-            throw Exception("Cannot get table structure from file, because no files match specified name", ErrorCodes::INCORRECT_FILE_NAME);
-
-        auto & first_path = paths[0];
-        Block header = StorageDistributedDirectoryMonitor::createSourceFromFile(first_path)->getOutputs().front().getHeader();
-
-        StorageInMemoryMetadata storage_metadata;
-        auto columns = ColumnsDescription(header.getNamesAndTypesList());
-        if (!args.columns.empty() && columns != args.columns)
-            throw Exception("Table structure and file structure are different", ErrorCodes::INCOMPATIBLE_COLUMNS);
-        storage_metadata.setColumns(columns);
-        setInMemoryMetadata(storage_metadata);
-    }
+    setStorageMetadata(args);
 }
 
 StorageFile::StorageFile(const std::string & relative_table_dir_path, CommonArguments args)
@@ -225,6 +302,8 @@ StorageFile::StorageFile(const std::string & relative_table_dir_path, CommonArgu
     paths = {getTablePath(table_dir_path, format_name)};
     if (fs::exists(paths[0]))
         total_bytes_to_read = fs::file_size(paths[0]);
+
+    setStorageMetadata(args);
 }
 
 StorageFile::StorageFile(CommonArguments args)
@@ -233,9 +312,21 @@ StorageFile::StorageFile(CommonArguments args)
     , format_settings(args.format_settings)
     , compression_method(args.compression_method)
     , base_path(args.getContext()->getPath())
+{
+}
+
+void StorageFile::setStorageMetadata(CommonArguments args)
 {
     StorageInMemoryMetadata storage_metadata;
-    if (args.format_name != "Distributed")
+
+    if (args.format_name == "Distributed" || args.columns.empty())
+    {
+        auto columns = getTableStructureFromData(format_name, paths, compression_method, format_settings, args.getContext());
+        if (!args.columns.empty() && args.columns != columns)
+            throw Exception("Table structure and file structure are different", ErrorCodes::INCOMPATIBLE_COLUMNS);
+        storage_metadata.setColumns(columns);
+    }
+    else
         storage_metadata.setColumns(args.columns);
 
     storage_metadata.setConstraints(args.constraints);
@@ -350,46 +441,7 @@ public:
                     }
                 }
 
-                std::unique_ptr<ReadBuffer> nested_buffer;
-                CompressionMethod method;
-
-                struct stat file_stat{};
-
-                if (storage->use_table_fd)
-                {
-                    /// Check if file descriptor allows random reads (and reading it twice).
-                    if (0 != fstat(storage->table_fd, &file_stat))
-                        throwFromErrno("Cannot stat table file descriptor, inside " + storage->getName(), ErrorCodes::CANNOT_STAT);
-
-                    if (S_ISREG(file_stat.st_mode))
-                        nested_buffer = std::make_unique<ReadBufferFromFileDescriptorPRead>(storage->table_fd);
-                    else
-                        nested_buffer = std::make_unique<ReadBufferFromFileDescriptor>(storage->table_fd);
-
-                    method = chooseCompressionMethod("", storage->compression_method);
-                }
-                else
-                {
-                    /// Check if file descriptor allows random reads (and reading it twice).
-                    if (0 != stat(current_path.c_str(), &file_stat))
-                        throwFromErrno("Cannot stat file " + current_path, ErrorCodes::CANNOT_STAT);
-
-                    if (S_ISREG(file_stat.st_mode))
-                        nested_buffer = std::make_unique<ReadBufferFromFilePRead>(current_path, context->getSettingsRef().max_read_buffer_size);
-                    else
-                        nested_buffer = std::make_unique<ReadBufferFromFile>(current_path, context->getSettingsRef().max_read_buffer_size);
-
-                    method = chooseCompressionMethod(current_path, storage->compression_method);
-                }
-
-                /// For clickhouse-local add progress callback to display progress bar.
-                if (context->getApplicationType() == Context::ApplicationType::LOCAL)
-                {
-                    auto & in = static_cast<ReadBufferFromFileDescriptor &>(*nested_buffer);
-                    in.setProgressCallback(context);
-                }
-
-                read_buf = wrapReadBufferWithCompressionMethod(std::move(nested_buffer), method);
+                read_buf = createReadBuffer(current_path, storage->use_table_fd, storage->getName(), storage->table_fd, storage->compression_method, context);
 
                 auto get_block_for_format = [&]() -> Block
                 {
@@ -853,7 +905,8 @@ void registerStorageFile(StorageFactory & factory)
 {
     StorageFactory::StorageFeatures storage_features{
         .supports_settings = true,
-        .source_access_type = AccessType::FILE
+        .supports_schema_inference = true,
+        .source_access_type = AccessType::FILE,
     };
 
     factory.registerStorage(
diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h
index f48d1c285da..6b015976589 100644
--- a/src/Storages/StorageFile.h
+++ b/src/Storages/StorageFile.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Storages/IStorage.h>
+
 #include <base/logger_useful.h>
 
 #include <atomic>
@@ -70,6 +71,13 @@ public:
 
     bool supportsPartitionBy() const override { return true; }
 
+    static ColumnsDescription getTableStructureFromData(
+        const String & format,
+        const std::vector<String> & paths,
+        const String & compression_method,
+        const std::optional<FormatSettings> & format_settings,
+        ContextPtr context);
+
 protected:
     friend class StorageFileSource;
     friend class StorageFileSink;
@@ -86,6 +94,8 @@ protected:
 private:
     explicit StorageFile(CommonArguments args);
 
+    void setStorageMetadata(CommonArguments args);
+
     std::string format_name;
     // We use format settings from global context + CREATE query for File table
     // function -- in this case, format_settings is set.
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index f82f9d21217..bdb7ddb744a 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -43,6 +43,7 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
     extern const int SAMPLING_NOT_SUPPORTED;
     extern const int ALTER_OF_COLUMN_IS_FORBIDDEN;
+    extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
 }
 
 StorageMerge::StorageMerge(
@@ -61,7 +62,7 @@ StorageMerge::StorageMerge(
     , database_is_regexp(database_is_regexp_)
 {
     StorageInMemoryMetadata storage_metadata;
-    storage_metadata.setColumns(columns_);
+    storage_metadata.setColumns(columns_.empty() ? getColumnsDescriptionFromSourceTables() : columns_);
     storage_metadata.setComment(comment);
     setInMemoryMetadata(storage_metadata);
 }
@@ -82,11 +83,19 @@ StorageMerge::StorageMerge(
     , database_is_regexp(database_is_regexp_)
 {
     StorageInMemoryMetadata storage_metadata;
-    storage_metadata.setColumns(columns_);
+    storage_metadata.setColumns(columns_.empty() ? getColumnsDescriptionFromSourceTables() : columns_);
     storage_metadata.setComment(comment);
     setInMemoryMetadata(storage_metadata);
 }
 
+ColumnsDescription StorageMerge::getColumnsDescriptionFromSourceTables() const
+{
+    auto table = getFirstTable([](auto && t) { return t; });
+    if (!table)
+        throw Exception{ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "There are no tables satisfied provided regexp, you must specify table structure manually"};
+    return table->getInMemoryMetadataPtr()->getColumns();
+}
+
 template <typename F>
 StoragePtr StorageMerge::getFirstTable(F && predicate) const
 {
@@ -762,7 +771,6 @@ void StorageMerge::convertingSourceStream(
 
 IStorage::ColumnSizeByName StorageMerge::getColumnSizes() const
 {
-
     auto first_materialized_mysql = getFirstTable([](const StoragePtr & table) { return table && table->getName() == "MaterializedMySQL"; });
     if (!first_materialized_mysql)
         return {};
@@ -816,6 +824,9 @@ void registerStorageMerge(StorageFactory & factory)
 
         return StorageMerge::create(
             args.table_id, args.columns, args.comment, source_database_name_or_regexp, is_regexp, table_name_regexp, args.getContext());
+    },
+    {
+        .supports_schema_inference = true
     });
 }
 
diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index 56adeab9279..ad3075efd08 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -132,6 +132,8 @@ protected:
 
     static SelectQueryInfo getModifiedQueryInfo(
         const SelectQueryInfo & query_info, ContextPtr modified_context, const StorageID & current_storage_id, bool is_merge_engine);
+
+    ColumnsDescription getColumnsDescriptionFromSourceTables() const;
 };
 
 }
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 0eab161195c..b8d7ca09ae0 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -3,7 +3,6 @@
 #include "Common/hex.h"
 #include <Common/Macros.h>
 #include <Common/StringUtils/StringUtils.h>
-#include <Common/ThreadPool.h>
 #include <Common/ZooKeeper/KeeperException.h>
 #include <Common/ZooKeeper/Types.h>
 #include <Common/escapeForFileName.h>
@@ -20,7 +19,6 @@
 #include <Storages/MergeTree/MergeTreeBackgroundExecutor.h>
 #include <Storages/MergeTree/MergedBlockOutputStream.h>
 #include <Storages/MergeTree/PinnedPartUUIDs.h>
-#include <Storages/MergeTree/PartitionPruner.h>
 #include <Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h>
 #include <Storages/MergeTree/ReplicatedMergeTreeSink.h>
 #include <Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h>
@@ -35,7 +33,6 @@
 #include <Storages/MergeTree/LeaderElection.h>
 
 
-#include <Databases/IDatabase.h>
 #include <Databases/DatabaseOnDisk.h>
 
 #include <Parsers/formatAST.h>
@@ -45,7 +42,6 @@
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/queryToString.h>
 #include <Parsers/ASTCheckQuery.h>
-#include <Parsers/ASTSetQuery.h>
 
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <Processors/Sources/RemoteSource.h>
@@ -68,7 +64,6 @@
 
 #include <Poco/DirectoryIterator.h>
 
-#include <base/range.h>
 #include <base/scope_guard.h>
 #include <base/scope_guard_safe.h>
 
@@ -194,56 +189,6 @@ zkutil::ZooKeeperPtr StorageReplicatedMergeTree::getZooKeeper() const
     return res;
 }
 
-static std::string normalizeZooKeeperPath(std::string zookeeper_path, bool check_starts_with_slash, Poco::Logger * log = nullptr)
-{
-    if (!zookeeper_path.empty() && zookeeper_path.back() == '/')
-        zookeeper_path.resize(zookeeper_path.size() - 1);
-    /// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it.
-    if (!zookeeper_path.empty() && zookeeper_path.front() != '/')
-    {
-        /// Do not allow this for new tables, print warning for tables created in old versions
-        if (check_starts_with_slash)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "ZooKeeper path must starts with '/', got '{}'", zookeeper_path);
-        if (log)
-            LOG_WARNING(log, "ZooKeeper path ('{}') does not start with '/'. It will not be supported in future releases");
-        zookeeper_path = "/" + zookeeper_path;
-    }
-
-    return zookeeper_path;
-}
-
-static String extractZooKeeperName(const String & path)
-{
-    static constexpr auto default_zookeeper_name = "default";
-    if (path.empty())
-        throw Exception("ZooKeeper path should not be empty", ErrorCodes::BAD_ARGUMENTS);
-    if (path[0] == '/')
-        return default_zookeeper_name;
-    auto pos = path.find(":/");
-    if (pos != String::npos && pos < path.find('/'))
-    {
-        auto zookeeper_name = path.substr(0, pos);
-        if (zookeeper_name.empty())
-            throw Exception("Zookeeper path should start with '/' or '<auxiliary_zookeeper_name>:/'", ErrorCodes::BAD_ARGUMENTS);
-        return zookeeper_name;
-    }
-    return default_zookeeper_name;
-}
-
-static String extractZooKeeperPath(const String & path, bool check_starts_with_slash, Poco::Logger * log = nullptr)
-{
-    if (path.empty())
-        throw Exception("ZooKeeper path should not be empty", ErrorCodes::BAD_ARGUMENTS);
-    if (path[0] == '/')
-        return normalizeZooKeeperPath(path, check_starts_with_slash, log);
-    auto pos = path.find(":/");
-    if (pos != String::npos && pos < path.find('/'))
-    {
-        return normalizeZooKeeperPath(path.substr(pos + 1, String::npos), check_starts_with_slash, log);
-    }
-    return normalizeZooKeeperPath(path, check_starts_with_slash, log);
-}
-
 static MergeTreePartInfo makeDummyDropRangeForMovePartitionOrAttachPartitionFrom(const String & partition_id)
 {
     /// NOTE We don't have special log entry type for MOVE PARTITION/ATTACH PARTITION FROM,
@@ -287,8 +232,8 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
                     true,                   /// require_part_metadata
                     attach,
                     [this] (const std::string & name) { enqueuePartForCheck(name); })
-    , zookeeper_name(extractZooKeeperName(zookeeper_path_))
-    , zookeeper_path(extractZooKeeperPath(zookeeper_path_, /* check_starts_with_slash */ !attach, log))
+    , zookeeper_name(zkutil::extractZooKeeperName(zookeeper_path_))
+    , zookeeper_path(zkutil::extractZooKeeperPath(zookeeper_path_, /* check_starts_with_slash */ !attach, log))
     , replica_name(replica_name_)
     , replica_path(fs::path(zookeeper_path) / "replicas" / replica_name_)
     , reader(*this)
@@ -5563,8 +5508,8 @@ void StorageReplicatedMergeTree::fetchPartition(
     info.table_id = getStorageID();
     info.table_id.uuid = UUIDHelpers::Nil;
     auto expand_from = query_context->getMacros()->expand(from_, info);
-    String auxiliary_zookeeper_name = extractZooKeeperName(expand_from);
-    String from = extractZooKeeperPath(expand_from, /* check_starts_with_slash */ true);
+    String auxiliary_zookeeper_name = zkutil::extractZooKeeperName(expand_from);
+    String from = zkutil::extractZooKeeperPath(expand_from, /* check_starts_with_slash */ true);
     if (from.empty())
         throw Exception("ZooKeeper path should not be empty", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
@@ -6640,7 +6585,7 @@ void StorageReplicatedMergeTree::movePartitionToShard(
     if (!move_part)
         throw Exception("MOVE PARTITION TO SHARD is not supported, use MOVE PART instead", ErrorCodes::NOT_IMPLEMENTED);
 
-    if (normalizeZooKeeperPath(zookeeper_path, /* check_starts_with_slash */ true) == normalizeZooKeeperPath(to, /* check_starts_with_slash */ true))
+    if (zkutil::normalizeZooKeeperPath(zookeeper_path, /* check_starts_with_slash */ true) == zkutil::normalizeZooKeeperPath(to, /* check_starts_with_slash */ true))
         throw Exception("Source and destination are the same", ErrorCodes::BAD_ARGUMENTS);
 
     auto zookeeper = getZooKeeper();
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index efff3d6a2d7..5d8b92ec217 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -280,6 +280,8 @@ public:
     // Return table id, common for different replicas
     String getTableSharedID() const;
 
+    static const String getDefaultZooKeeperName() { return default_zookeeper_name; }
+
 private:
     std::atomic_bool are_restoring_replica {false};
 
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 3a03ac3906c..3d988472b54 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -3,7 +3,6 @@
 
 #if USE_AWS_S3
 
-#include <Columns/ColumnString.h>
 #include <Common/isValidUTF8.h>
 
 #include <Functions/FunctionsConversion.h>
@@ -25,9 +24,9 @@
 
 #include <IO/ReadBufferFromS3.h>
 #include <IO/WriteBufferFromS3.h>
-#include <IO/WriteHelpers.h>
 
 #include <Formats/FormatFactory.h>
+#include <Formats/ReadSchemaUtils.h>
 
 #include <Processors/Transforms/AddingDefaultsTransform.h>
 #include <Processors/Formats/IOutputFormat.h>
@@ -70,6 +69,7 @@ namespace ErrorCodes
     extern const int S3_ERROR;
     extern const int UNEXPECTED_EXPRESSION;
     extern const int CANNOT_OPEN_FILE;
+    extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
 }
 
 class IOutputFormat;
@@ -480,13 +480,39 @@ StorageS3::StorageS3(
 {
     context_->getGlobalContext()->getRemoteHostFilter().checkURL(uri_.uri);
     StorageInMemoryMetadata storage_metadata;
-    storage_metadata.setColumns(columns_);
+
+    updateClientAndAuthSettings(context_, client_auth);
+    if (columns_.empty())
+    {
+        auto columns = getTableStructureFromDataImpl(format_name, client_auth, max_single_read_retries_, compression_method, distributed_processing_, format_settings, context_);
+        storage_metadata.setColumns(columns);
+    }
+    else
+        storage_metadata.setColumns(columns_);
+
     storage_metadata.setConstraints(constraints_);
     storage_metadata.setComment(comment);
     setInMemoryMetadata(storage_metadata);
-    updateClientAndAuthSettings(context_, client_auth);
 }
 
+std::shared_ptr<StorageS3Source::IteratorWrapper> StorageS3::createFileIterator(const ClientAuthentication & client_auth, bool distributed_processing, ContextPtr local_context)
+{
+    std::shared_ptr<StorageS3Source::IteratorWrapper> iterator_wrapper{nullptr};
+    if (distributed_processing)
+    {
+        return std::make_shared<StorageS3Source::IteratorWrapper>(
+            [callback = local_context->getReadTaskCallback()]() -> String {
+                return callback();
+        });
+    }
+
+    /// Iterate through disclosed globs and make a source for each file
+    auto glob_iterator = std::make_shared<StorageS3Source::DisclosedGlobIterator>(*client_auth.client, client_auth.uri);
+    return std::make_shared<StorageS3Source::IteratorWrapper>([glob_iterator]()
+    {
+        return glob_iterator->next();
+    });
+}
 
 Pipe StorageS3::read(
     const Names & column_names,
@@ -510,23 +536,7 @@ Pipe StorageS3::read(
             need_file_column = true;
     }
 
-    std::shared_ptr<StorageS3Source::IteratorWrapper> iterator_wrapper{nullptr};
-    if (distributed_processing)
-    {
-        iterator_wrapper = std::make_shared<StorageS3Source::IteratorWrapper>(
-            [callback = local_context->getReadTaskCallback()]() -> String {
-                return callback();
-        });
-    }
-    else
-    {
-        /// Iterate through disclosed globs and make a source for each file
-        auto glob_iterator = std::make_shared<StorageS3Source::DisclosedGlobIterator>(*client_auth.client, client_auth.uri);
-        iterator_wrapper = std::make_shared<StorageS3Source::IteratorWrapper>([glob_iterator]()
-        {
-            return glob_iterator->next();
-        });
-    }
+    std::shared_ptr<StorageS3Source::IteratorWrapper> iterator_wrapper = createFileIterator(client_auth, distributed_processing, local_context);
 
     for (size_t i = 0; i < num_streams; ++i)
     {
@@ -707,6 +717,51 @@ StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPt
     return configuration;
 }
 
+ColumnsDescription StorageS3::getTableStructureFromData(
+    const String & format,
+    const S3::URI & uri,
+    const String & access_key_id,
+    const String & secret_access_key,
+    UInt64 max_connections,
+    UInt64 max_single_read_retries,
+    const String & compression_method,
+    bool distributed_processing,
+    const std::optional<FormatSettings> & format_settings,
+    ContextPtr ctx)
+{
+    ClientAuthentication client_auth{uri, access_key_id, secret_access_key, max_connections, {}, {}};
+    updateClientAndAuthSettings(ctx, client_auth);
+    return getTableStructureFromDataImpl(format, client_auth, max_single_read_retries, compression_method, distributed_processing, format_settings, ctx);
+}
+
+ColumnsDescription StorageS3::getTableStructureFromDataImpl(
+    const String & format,
+    const ClientAuthentication & client_auth,
+    UInt64 max_single_read_retries,
+    const String & compression_method,
+    bool distributed_processing,
+    const std::optional<FormatSettings> & format_settings,
+    ContextPtr ctx)
+{
+    auto read_buffer_creator = [&]()
+    {
+        auto file_iterator = createFileIterator(client_auth, distributed_processing, ctx);
+        String current_key = (*file_iterator)();
+        if (current_key.empty())
+            throw Exception(
+                ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                "Cannot extract table structure from {} format file, because there are no files with provided path in S3. You must specify "
+                "table structure manually",
+                format);
+
+        return wrapReadBufferWithCompressionMethod(
+            std::make_unique<ReadBufferFromS3>(client_auth.client, client_auth.uri.bucket, current_key, max_single_read_retries, ctx->getReadSettings()),
+            chooseCompressionMethod(current_key, compression_method));
+    };
+
+    return readSchemaFromFormat(format, format_settings, read_buffer_creator, ctx);
+}
+
 
 void registerStorageS3Impl(const String & name, StorageFactory & factory)
 {
@@ -775,6 +830,7 @@ void registerStorageS3Impl(const String & name, StorageFactory & factory)
     {
         .supports_settings = true,
         .supports_sort_order = true, // for partition by
+        .supports_schema_inference = true,
         .source_access_type = AccessType::S3,
     });
 }
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index 248238379dc..0690040915d 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -147,8 +147,19 @@ public:
 
     static StorageS3Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context);
 
-private:
+    static ColumnsDescription getTableStructureFromData(
+        const String & format,
+        const S3::URI & uri,
+        const String & access_key_id,
+        const String & secret_access_key,
+        UInt64 max_connections,
+        UInt64 max_single_read_retries,
+        const String & compression_method,
+        bool distributed_processing,
+        const std::optional<FormatSettings> & format_settings,
+        ContextPtr ctx);
 
+private:
     friend class StorageS3Cluster;
     friend class TableFunctionS3Cluster;
 
@@ -175,6 +186,17 @@ private:
     ASTPtr partition_by;
 
     static void updateClientAndAuthSettings(ContextPtr, ClientAuthentication &);
+
+    static std::shared_ptr<StorageS3Source::IteratorWrapper> createFileIterator(const ClientAuthentication & client_auth, bool distributed_processing, ContextPtr local_context);
+
+    static ColumnsDescription getTableStructureFromDataImpl(
+        const String & format,
+        const ClientAuthentication & client_auth,
+        UInt64 max_single_read_retries,
+        const String & compression_method,
+        bool distributed_processing,
+        const std::optional<FormatSettings> & format_settings,
+        ContextPtr ctx);
 };
 
 }
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 0eec77ac8e7..471b460d349 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -13,8 +13,9 @@
 #include <IO/ConnectionTimeoutsContext.h>
 
 #include <Formats/FormatFactory.h>
-#include <Processors/Formats/IOutputFormat.h>
+#include <Formats/ReadSchemaUtils.h>
 #include <Processors/Formats/IInputFormat.h>
+#include <Processors/Formats/IOutputFormat.h>
 
 #include <Common/parseRemoteDescription.h>
 #include <Processors/Transforms/AddingDefaultsTransform.h>
@@ -40,7 +41,7 @@ namespace ErrorCodes
 
 IStorageURLBase::IStorageURLBase(
     const String & uri_,
-    ContextPtr /*context_*/,
+    ContextPtr context_,
     const StorageID & table_id_,
     const String & format_name_,
     const std::optional<FormatSettings> & format_settings_,
@@ -61,12 +62,48 @@ IStorageURLBase::IStorageURLBase(
     , partition_by(partition_by_)
 {
     StorageInMemoryMetadata storage_metadata;
-    storage_metadata.setColumns(columns_);
+    if (columns_.empty())
+    {
+        auto columns = getTableStructureFromData(format_name, uri, compression_method, headers, format_settings, context_);
+        storage_metadata.setColumns(columns);
+    }
+    else
+        storage_metadata.setColumns(columns_);
     storage_metadata.setConstraints(constraints_);
     storage_metadata.setComment(comment);
     setInMemoryMetadata(storage_metadata);
 }
 
+ColumnsDescription IStorageURLBase::getTableStructureFromData(
+    const String & format,
+    const String & uri,
+    const String & compression_method,
+    const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers,
+    const std::optional<FormatSettings> & format_settings,
+    ContextPtr context)
+{
+    auto read_buffer_creator = [&]()
+    {
+        auto parsed_uri = Poco::URI(uri);
+        return wrapReadBufferWithCompressionMethod(
+            std::make_unique<ReadWriteBufferFromHTTP>(
+                parsed_uri,
+                Poco::Net::HTTPRequest::HTTP_GET,
+                nullptr,
+                ConnectionTimeouts::getHTTPTimeouts(context),
+                Poco::Net::HTTPBasicCredentials{},
+                context->getSettingsRef().max_http_get_redirects,
+                DBMS_DEFAULT_BUFFER_SIZE,
+                context->getReadSettings(),
+                headers,
+                ReadWriteBufferFromHTTP::Range{},
+                context->getRemoteHostFilter()),
+            chooseCompressionMethod(parsed_uri.getPath(), compression_method));
+    };
+
+    return readSchemaFromFormat(format, format_settings, read_buffer_creator, context);
+}
+
 namespace
 {
     ReadWriteBufferFromHTTP::HTTPHeaderEntries getHeaders(
@@ -642,6 +679,7 @@ void registerStorageURL(StorageFactory & factory)
     },
     {
         .supports_settings = true,
+        .supports_schema_inference = true,
         .source_access_type = AccessType::URL,
     });
 }
diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h
index cf72352a183..790f01135d3 100644
--- a/src/Storages/StorageURL.h
+++ b/src/Storages/StorageURL.h
@@ -41,6 +41,14 @@ public:
 
     bool supportsPartitionBy() const override { return true; }
 
+    static ColumnsDescription getTableStructureFromData(
+        const String & format,
+        const String & uri,
+        const String & compression_method,
+        const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers,
+        const std::optional<FormatSettings> & format_settings,
+        ContextPtr context);
+
 protected:
     IStorageURLBase(
         const String & uri_,
diff --git a/src/TableFunctions/ITableFunction.cpp b/src/TableFunctions/ITableFunction.cpp
index fa7f6e52220..42b24abdbbe 100644
--- a/src/TableFunctions/ITableFunction.cpp
+++ b/src/TableFunctions/ITableFunction.cpp
@@ -15,25 +15,23 @@ namespace DB
 {
 
 StoragePtr ITableFunction::execute(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name,
-                                   ColumnsDescription cached_columns) const
+                                   ColumnsDescription cached_columns, bool use_global_context) const
 {
     ProfileEvents::increment(ProfileEvents::TableFunctionExecute);
     context->checkAccess(AccessType::CREATE_TEMPORARY_TABLE | StorageFactory::instance().getSourceAccessType(getStorageTypeName()));
 
+    auto context_to_use = use_global_context ? context->getGlobalContext() : context;
+
     if (cached_columns.empty())
         return executeImpl(ast_function, context, table_name, std::move(cached_columns));
 
-    /// We have table structure, so it's CREATE AS table_function().
-    /// We should use global context here because there will be no query context on server startup
-    /// and because storage lifetime is bigger than query context lifetime.
-    auto global_context = context->getGlobalContext();
     if (hasStaticStructure() && cached_columns == getActualTableStructure(context))
-        return executeImpl(ast_function, global_context, table_name, std::move(cached_columns));
+        return executeImpl(ast_function, context_to_use, table_name, std::move(cached_columns));
 
     auto this_table_function = shared_from_this();
     auto get_storage = [=]() -> StoragePtr
     {
-        return this_table_function->executeImpl(ast_function, global_context, table_name, cached_columns);
+        return this_table_function->executeImpl(ast_function, context_to_use, table_name, cached_columns);
     };
 
     /// It will request actual table structure and create underlying storage lazily
diff --git a/src/TableFunctions/ITableFunction.h b/src/TableFunctions/ITableFunction.h
index 56147ffd598..93cf5057e88 100644
--- a/src/TableFunctions/ITableFunction.h
+++ b/src/TableFunctions/ITableFunction.h
@@ -54,7 +54,7 @@ public:
 
     /// Create storage according to the query.
     StoragePtr
-    execute(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns_ = {}) const;
+    execute(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns_ = {}, bool use_global_context = false) const;
 
     virtual ~ITableFunction() = default;
 
diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp
index 699ad698bd8..4395c318983 100644
--- a/src/TableFunctions/ITableFunctionFileLike.cpp
+++ b/src/TableFunctions/ITableFunctionFileLike.cpp
@@ -1,4 +1,3 @@
-#include <TableFunctions/ITableFunction.h>
 #include <TableFunctions/ITableFunctionFileLike.h>
 #include <TableFunctions/parseColumnsListForTableFunction.h>
 
@@ -6,16 +5,16 @@
 #include <Parsers/ASTLiteral.h>
 
 #include <Common/Exception.h>
-#include <Common/typeid_cast.h>
 
 #include <Storages/StorageFile.h>
 #include <Storages/Distributed/DirectoryMonitor.h>
 
-#include <Interpreters/Context.h>
 #include <Interpreters/evaluateConstantExpression.h>
 
 #include <Processors/ISource.h>
 
+#include <Formats/FormatFactory.h>
+
 namespace DB
 {
 
@@ -23,10 +22,27 @@ namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-    extern const int INCORRECT_FILE_NAME;
     extern const int BAD_ARGUMENTS;
 }
 
+namespace
+{
+    void checkIfFormatSupportsAutoStructure(const String & name, const String & format)
+    {
+        if (name == "file" && format == "Distributed")
+            return;
+
+        if (FormatFactory::instance().checkIfFormatHasAnySchemaReader(format))
+            return;
+
+        throw Exception(
+            "Table function '" + name
+                + "' allows automatic structure determination only for formats that support schema inference and for Distributed format in table function "
+                  "'file'",
+            ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+    }
+}
+
 void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, ContextPtr context)
 {
     /// Parse args
@@ -46,21 +62,23 @@ void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, Context
     filename = args[0]->as<ASTLiteral &>().value.safeGet<String>();
     format = args[1]->as<ASTLiteral &>().value.safeGet<String>();
 
-    if (args.size() == 2 && getName() == "file")
+    if (args.size() == 2)
     {
-        if (format == "Distributed")
-            return;
-        throw Exception("Table function '" + getName() + "' allows 2 arguments only for Distributed format.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+        checkIfFormatSupportsAutoStructure(getName(), format);
+        return;
     }
 
     if (args.size() != 3 && args.size() != 4)
-        throw Exception("Table function '" + getName() + "' requires 3 or 4 arguments: filename, format, structure and compression method (default auto).",
+        throw Exception("Table function '" + getName() + "' requires 2, 3 or 4 arguments: filename, format, structure (default auto) and compression method (default auto)",
             ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
     structure = args[2]->as<ASTLiteral &>().value.safeGet<String>();
+    if (structure == "auto")
+        checkIfFormatSupportsAutoStructure(getName(), format);
+
     if (structure.empty())
         throw Exception(ErrorCodes::BAD_ARGUMENTS,
-            "Table structure is empty for table function '{}'",
+            "Table structure is empty for table function '{}'. If you want to use automatic schema inference, use 'auto'",
             ast_function->formatForErrorMessage());
 
     if (args.size() == 4)
@@ -69,25 +87,12 @@ void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, Context
 
 StoragePtr ITableFunctionFileLike::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const
 {
-    auto columns = getActualTableStructure(context);
+    ColumnsDescription columns;
+    if (structure != "auto")
+        columns = parseColumnsListFromString(structure, context);
     StoragePtr storage = getStorage(filename, format, columns, context, table_name, compression_method);
     storage->startup();
     return storage;
 }
 
-ColumnsDescription ITableFunctionFileLike::getActualTableStructure(ContextPtr context) const
-{
-    if (structure.empty())
-    {
-        assert(getName() == "file" && format == "Distributed");
-        size_t total_bytes_to_read = 0;
-        Strings paths = StorageFile::getPathsList(filename, context->getUserFilesPath(), context, total_bytes_to_read);
-        if (paths.empty())
-            throw Exception("Cannot get table structure from file, because no files match specified name", ErrorCodes::INCORRECT_FILE_NAME);
-        auto source = StorageDistributedDirectoryMonitor::createSourceFromFile(paths[0]);
-        return ColumnsDescription{source->getOutputs().front().getHeader().getNamesAndTypesList()};
-    }
-    return parseColumnsListFromString(structure, context);
-}
-
 }
diff --git a/src/TableFunctions/ITableFunctionFileLike.h b/src/TableFunctions/ITableFunctionFileLike.h
index 2069f02b0dd..2ceafdee229 100644
--- a/src/TableFunctions/ITableFunctionFileLike.h
+++ b/src/TableFunctions/ITableFunctionFileLike.h
@@ -8,7 +8,7 @@ class ColumnsDescription;
 class Context;
 
 /*
- * function(source, format, structure) - creates a temporary storage from formatted source
+ * function(source, format, structure[, compression_method]) - creates a temporary storage from formatted source
  */
 class ITableFunctionFileLike : public ITableFunction
 {
@@ -18,7 +18,7 @@ protected:
 
     String filename;
     String format;
-    String structure;
+    String structure = "auto";
     String compression_method = "auto";
 
 private:
@@ -28,8 +28,7 @@ private:
         const String & source, const String & format, const ColumnsDescription & columns, ContextPtr global_context,
         const std::string & table_name, const String & compression_method) const = 0;
 
-    ColumnsDescription getActualTableStructure(ContextPtr context) const override;
-
-    bool hasStaticStructure() const override { return true; }
+    bool hasStaticStructure() const override { return structure != "auto"; }
 };
+
 }
diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp
index d8bdb3b45c4..71aba5494e8 100644
--- a/src/TableFunctions/TableFunctionFile.cpp
+++ b/src/TableFunctions/TableFunctionFile.cpp
@@ -1,4 +1,5 @@
 #include <TableFunctions/TableFunctionFile.h>
+#include <TableFunctions/parseColumnsListForTableFunction.h>
 
 #include "registerTableFunctions.h"
 #include <Access/Common/AccessFlags.h>
@@ -9,11 +10,13 @@
 
 namespace DB
 {
+
 StoragePtr TableFunctionFile::getStorage(const String & source,
     const String & format_, const ColumnsDescription & columns,
     ContextPtr global_context, const std::string & table_name,
     const std::string & compression_method_) const
 {
+    LOG_DEBUG(&Poco::Logger::get("TableFunctionFile"), "getStorage");
     // For `file` table function, we are going to use format settings from the
     // query context.
     StorageFile::CommonArguments args{
@@ -30,8 +33,21 @@ StoragePtr TableFunctionFile::getStorage(const String & source,
     return StorageFile::create(source, global_context->getUserFilesPath(), args);
 }
 
+ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context) const
+{
+    if (structure == "auto")
+    {
+        size_t total_bytes_to_read = 0;
+        Strings paths = StorageFile::getPathsList(filename, context->getUserFilesPath(), context, total_bytes_to_read);
+        return StorageFile::getTableStructureFromData(format, paths, compression_method, std::nullopt, context);
+    }
+
+    return parseColumnsListFromString(structure, context);
+}
+
 void registerTableFunctionFile(TableFunctionFactory & factory)
 {
     factory.registerFunction<TableFunctionFile>();
 }
+
 }
diff --git a/src/TableFunctions/TableFunctionFile.h b/src/TableFunctions/TableFunctionFile.h
index 460656a7218..f26e4a9c06d 100644
--- a/src/TableFunctions/TableFunctionFile.h
+++ b/src/TableFunctions/TableFunctionFile.h
@@ -6,7 +6,7 @@
 namespace DB
 {
 
-/* file(path, format, structure) - creates a temporary storage from file
+/* file(path, format[, structure, compression]) - creates a temporary storage from file
  *
  * The file must be in the clickhouse data directory.
  * The relative path begins with the clickhouse data directory.
@@ -20,9 +20,13 @@ public:
         return name;
     }
 
+    ColumnsDescription getActualTableStructure(ContextPtr context) const override;
+
 private:
     StoragePtr getStorage(
         const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context,
         const std::string & table_name, const std::string & compression_method_) const override;
     const char * getStorageTypeName() const override { return "File"; }
-};}
+};
+
+}
diff --git a/src/TableFunctions/TableFunctionHDFS.cpp b/src/TableFunctions/TableFunctionHDFS.cpp
index 245674b0e06..b626f563977 100644
--- a/src/TableFunctions/TableFunctionHDFS.cpp
+++ b/src/TableFunctions/TableFunctionHDFS.cpp
@@ -6,9 +6,11 @@
 #include <Storages/ColumnsDescription.h>
 #include <TableFunctions/TableFunctionFactory.h>
 #include <TableFunctions/TableFunctionHDFS.h>
+#include <TableFunctions/parseColumnsListForTableFunction.h>
 
 namespace DB
 {
+
 StoragePtr TableFunctionHDFS::getStorage(
     const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context,
     const std::string & table_name, const String & compression_method_) const
@@ -24,12 +26,18 @@ StoragePtr TableFunctionHDFS::getStorage(
         compression_method_);
 }
 
+ColumnsDescription TableFunctionHDFS::getActualTableStructure(ContextPtr context) const
+{
+    if (structure == "auto")
+        return StorageHDFS::getTableStructureFromData(format, filename, compression_method, context);
+
+    return parseColumnsListFromString(structure, context);
+}
 
-#if USE_HDFS
 void registerTableFunctionHDFS(TableFunctionFactory & factory)
 {
     factory.registerFunction<TableFunctionHDFS>();
 }
-#endif
+
 }
 #endif
diff --git a/src/TableFunctions/TableFunctionHDFS.h b/src/TableFunctions/TableFunctionHDFS.h
index 70bdc67efc8..74139818209 100644
--- a/src/TableFunctions/TableFunctionHDFS.h
+++ b/src/TableFunctions/TableFunctionHDFS.h
@@ -12,7 +12,7 @@ namespace DB
 
 class Context;
 
-/* hdfs(URI, format, structure) - creates a temporary storage from hdfs files
+/* hdfs(URI, format[, structure, compression]) - creates a temporary storage from hdfs files
  *
  */
 class TableFunctionHDFS : public ITableFunctionFileLike
@@ -24,6 +24,8 @@ public:
         return name;
     }
 
+    ColumnsDescription getActualTableStructure(ContextPtr context) const override;
+
 private:
     StoragePtr getStorage(
         const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context,
diff --git a/src/TableFunctions/TableFunctionRemote.h b/src/TableFunctions/TableFunctionRemote.h
index 845c36182dc..976397ddc45 100644
--- a/src/TableFunctions/TableFunctionRemote.h
+++ b/src/TableFunctions/TableFunctionRemote.h
@@ -27,6 +27,7 @@ public:
     bool needStructureConversion() const override { return false; }
 
 private:
+
     StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override;
     const char * getStorageTypeName() const override { return "Distributed"; }
 
diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp
index e26c282c622..c4be01c6b5c 100644
--- a/src/TableFunctions/TableFunctionS3.cpp
+++ b/src/TableFunctions/TableFunctionS3.cpp
@@ -10,6 +10,7 @@
 #include <TableFunctions/parseColumnsListForTableFunction.h>
 #include <Parsers/ASTLiteral.h>
 #include <Storages/StorageS3.h>
+#include <Formats/FormatFactory.h>
 #include "registerTableFunctions.h"
 
 
@@ -28,6 +29,7 @@ void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr con
 
     const auto message = fmt::format(
         "The signature of table function {} could be the following:\n" \
+        " - url, format\n" \
         " - url, format, structure\n" \
         " - url, format, structure, compression_method\n" \
         " - url, access_key_id, secret_access_key, format, structure\n" \
@@ -69,17 +71,32 @@ void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr con
         /// Size -> argument indexes
         static auto size_to_args = std::map<size_t, std::map<String, size_t>>
         {
+            {2, {{"format", 1}}},
             {3, {{"format", 1}, {"structure", 2}}},
-            {4, {{"format", 1}, {"structure", 2}, {"compression_method", 3}}},
             {5, {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}}},
             {6, {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}, {"compression_method", 5}}}
         };
 
+        std::map<String, size_t> args_to_idx;
+        /// For 4 arguments we support 2 possible variants:
+        /// s3(source, format, structure, compression_method) and s3(source, access_key_id, access_key_id, format)
+        /// We can distinguish them by looking at the 4-th argument: check if it's a format name or not.
+        if (args.size() == 4)
+        {
+            auto last_arg = args[3]->as<ASTLiteral &>().value.safeGet<String>();
+            if (FormatFactory::instance().getAllFormats().contains(last_arg))
+                args_to_idx = {{"access_key_id", 1}, {"access_key_id", 2}, {"format", 3}};
+            else
+                args_to_idx = {{"format", 1}, {"structure", 2}, {"compression_method", 3}};
+        }
+        else
+        {
+            args_to_idx = size_to_args[args.size()];
+        }
+
         /// This argument is always the first
         configuration.url = args[0]->as<ASTLiteral &>().value.safeGet<String>();
 
-        auto & args_to_idx = size_to_args[args.size()];
-
         if (args_to_idx.contains("format"))
             configuration.format = args[args_to_idx["format"]]->as<ASTLiteral &>().value.safeGet<String>();
 
@@ -101,6 +118,21 @@ void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr con
 
 ColumnsDescription TableFunctionS3::getActualTableStructure(ContextPtr context) const
 {
+    if (s3_configuration->structure == "auto")
+    {
+        return StorageS3::getTableStructureFromData(
+            s3_configuration->format,
+            S3::URI(Poco::URI(s3_configuration->url)),
+            s3_configuration->access_key_id,
+            s3_configuration->secret_access_key,
+            context->getSettingsRef().s3_max_connections,
+            context->getSettingsRef().s3_max_single_read_retries,
+            s3_configuration->compression_method,
+            false,
+            std::nullopt,
+            context);
+    }
+
     return parseColumnsListFromString(s3_configuration->structure, context);
 }
 
@@ -113,6 +145,10 @@ StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, Context
     UInt64 max_single_part_upload_size = context->getSettingsRef().s3_max_single_part_upload_size;
     UInt64 max_connections = context->getSettingsRef().s3_max_connections;
 
+    ColumnsDescription columns;
+    if (s3_configuration->structure != "auto")
+        columns = parseColumnsListFromString(s3_configuration->structure, context);
+
     StoragePtr storage = StorageS3::create(
         s3_uri,
         s3_configuration->access_key_id,
diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h
index 8d4c1391236..374e653072e 100644
--- a/src/TableFunctions/TableFunctionS3.h
+++ b/src/TableFunctions/TableFunctionS3.h
@@ -13,7 +13,7 @@ namespace DB
 
 class Context;
 
-/* s3(source, [access_key_id, secret_access_key,] format, structure) - creates a temporary storage for a file in S3
+/* s3(source, [access_key_id, secret_access_key,] format, structure[, compression]) - creates a temporary storage for a file in S3
  */
 class TableFunctionS3 : public ITableFunction
 {
@@ -23,7 +23,7 @@ public:
     {
         return name;
     }
-    bool hasStaticStructure() const override { return true; }
+    bool hasStaticStructure() const override { return s3_configuration->structure != "auto"; }
 
 protected:
     StoragePtr executeImpl(
diff --git a/src/TableFunctions/TableFunctionURL.cpp b/src/TableFunctions/TableFunctionURL.cpp
index c3ea30f800f..7c4d7b4a444 100644
--- a/src/TableFunctions/TableFunctionURL.cpp
+++ b/src/TableFunctions/TableFunctionURL.cpp
@@ -2,11 +2,11 @@
 
 #include "registerTableFunctions.h"
 #include <Access/Common/AccessFlags.h>
-#include <Poco/URI.h>
 #include <Parsers/ASTFunction.h>
 #include <Storages/ColumnsDescription.h>
 #include <Storages/StorageURL.h>
 #include <TableFunctions/TableFunctionFactory.h>
+#include <TableFunctions/parseColumnsListForTableFunction.h>
 #include <Storages/StorageExternalDistributed.h>
 
 
@@ -59,20 +59,10 @@ void TableFunctionURL::parseArguments(const ASTPtr & ast_function, ContextPtr co
     }
 }
 
-
 StoragePtr TableFunctionURL::getStorage(
     const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context,
     const std::string & table_name, const String & compression_method_) const
 {
-    ReadWriteBufferFromHTTP::HTTPHeaderEntries headers;
-    for (const auto & [header, value] : configuration.headers)
-    {
-        auto value_literal = value.safeGet<String>();
-        if (header == "Range")
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Range headers are not allowed");
-        headers.emplace_back(std::make_pair(header, value_literal));
-    }
-
     return StorageURL::create(
         source,
         StorageID(getDatabaseName(), table_name),
@@ -83,10 +73,31 @@ StoragePtr TableFunctionURL::getStorage(
         String{},
         global_context,
         compression_method_,
-        headers,
+        getHeaders(),
         configuration.http_method);
 }
 
+ReadWriteBufferFromHTTP::HTTPHeaderEntries TableFunctionURL::getHeaders() const
+{
+    ReadWriteBufferFromHTTP::HTTPHeaderEntries headers;
+    for (const auto & [header, value] : configuration.headers)
+    {
+        auto value_literal = value.safeGet<String>();
+        if (header == "Range")
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Range headers are not allowed");
+        headers.emplace_back(std::make_pair(header, value_literal));
+    }
+    return headers;
+}
+
+ColumnsDescription TableFunctionURL::getActualTableStructure(ContextPtr context) const
+{
+    if (structure == "auto")
+        return StorageURL::getTableStructureFromData(format, filename, compression_method, getHeaders(), std::nullopt, context);
+
+    return parseColumnsListFromString(structure, context);
+}
+
 void registerTableFunctionURL(TableFunctionFactory & factory)
 {
     factory.registerFunction<TableFunctionURL>();
diff --git a/src/TableFunctions/TableFunctionURL.h b/src/TableFunctions/TableFunctionURL.h
index 9425112acb2..798a37dc478 100644
--- a/src/TableFunctions/TableFunctionURL.h
+++ b/src/TableFunctions/TableFunctionURL.h
@@ -2,6 +2,7 @@
 
 #include <TableFunctions/ITableFunctionFileLike.h>
 #include <Storages/ExternalDataSourceConfiguration.h>
+#include <IO/ReadWriteBufferFromHTTP.h>
 
 
 namespace DB
@@ -9,7 +10,7 @@ namespace DB
 
 class Context;
 
-/* url(source, format, structure) - creates a temporary storage from url
+/* url(source, format[, structure, compression]) - creates a temporary storage from url
  */
 class TableFunctionURL : public ITableFunctionFileLike
 {
@@ -20,6 +21,8 @@ public:
         return name;
     }
 
+    ColumnsDescription getActualTableStructure(ContextPtr context) const override;
+
 protected:
     void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
 
@@ -29,6 +32,8 @@ private:
         const std::string & table_name, const String & compression_method_) const override;
     const char * getStorageTypeName() const override { return "URL"; }
 
+    ReadWriteBufferFromHTTP::HTTPHeaderEntries getHeaders() const;
+
     URLBasedDataSourceConfiguration configuration;
 };
 
diff --git a/tests/ci/keeper_jepsen_check.py b/tests/ci/keeper_jepsen_check.py
index 2c2b8b4783f..b7acc92b0f3 100644
--- a/tests/ci/keeper_jepsen_check.py
+++ b/tests/ci/keeper_jepsen_check.py
@@ -122,7 +122,7 @@ if __name__ == "__main__":
 
     logging.info("Start at PR number %s, commit sha %s labels %s", pr_info.number, pr_info.sha, pr_info.labels)
 
-    if pr_info.number != 0 and 'jepsen-test' not in pr_info.labels():
+    if pr_info.number != 0 and 'jepsen-test' not in pr_info.labels:
         logging.info("Not jepsen test label in labels list, skipping")
         sys.exit(0)
 
diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py
index 692cda18f20..2085a5c1e2b 100644
--- a/tests/ci/run_check.py
+++ b/tests/ci/run_check.py
@@ -68,6 +68,7 @@ TRUSTED_CONTRIBUTORS = {e.lower() for e in [
     "YiuRULE",
     "zlobober",     # Developer of YT
     "ilejn",        # Arenadata, responsible for Kerberized Kafka
+    "thomoco",      # ClickHouse
 ]}
 
 
diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index 33ce94a7a29..f317fb5429a 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -310,6 +310,7 @@ def test_seekable_formats(started_cluster):
     result = node1.query(f"SELECT count() FROM {table_function}")
     assert(int(result) == 5000000)
 
+
 def test_read_table_with_default(started_cluster):
     hdfs_api = started_cluster.hdfs_api
 
@@ -322,6 +323,22 @@ def test_read_table_with_default(started_cluster):
         "select * from hdfs('hdfs://hdfs1:9000/simple_table_function', 'TSVWithNames', 'n UInt32, m UInt32 DEFAULT n * 2') FORMAT TSVWithNames") == output
 
 
+def test_schema_inference(started_cluster):
+    node1.query(f"insert into table function hdfs('hdfs://hdfs1:9000/native', 'Native', 'a Int32, b String') SELECT number, randomString(100) FROM numbers(5000000)")
+
+    result = node1.query(f"desc hdfs('hdfs://hdfs1:9000/native', 'Native')")
+    assert result == "a\tInt32\t\t\t\t\t\nb\tString\t\t\t\t\t\n"
+
+    result = node1.query(f"select count(*) from hdfs('hdfs://hdfs1:9000/native', 'Native')")
+    assert(int(result) == 5000000)
+
+    node1.query(f"create table schema_inference engine=HDFS('hdfs://hdfs1:9000/native', 'Native')")
+    result = node1.query(f"desc schema_inference")
+    assert result == "a\tInt32\t\t\t\t\t\nb\tString\t\t\t\t\t\n"
+
+    result = node1.query(f"select count(*) from schema_inference")
+    assert(int(result) == 5000000)
+
 
 def test_hdfsCluster(started_cluster):
     hdfs_api = started_cluster.hdfs_api
diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py
index 2c2a9e41509..a3d99159cb2 100644
--- a/tests/integration/test_storage_rabbitmq/test.py
+++ b/tests/integration/test_storage_rabbitmq/test.py
@@ -35,6 +35,17 @@ def rabbitmq_check_result(result, check=False, ref_file='test_rabbitmq_json.refe
         else:
             return TSV(result) == TSV(reference)
 
+def wait_rabbitmq_to_start(rabbitmq_docker_id, timeout=180):
+    start = time.time()
+    while time.time() - start < timeout:
+        try:
+            if instance.cluster.check_rabbitmq_is_available(rabbitmq_docker_id):
+                logging.debug("RabbitMQ is available")
+                return
+            time.sleep(0.5)
+        except Exception as ex:
+            logging.debug("Can't connect to RabbitMQ " + str(ex))
+            time.sleep(0.5)
 
 def kill_rabbitmq(rabbitmq_id):
     p = subprocess.Popen(('docker', 'stop', rabbitmq_id), stdout=subprocess.PIPE)
@@ -45,7 +56,7 @@ def kill_rabbitmq(rabbitmq_id):
 def revive_rabbitmq(rabbitmq_id):
     p = subprocess.Popen(('docker', 'start', rabbitmq_id), stdout=subprocess.PIPE)
     p.communicate()
-    return p.returncode == 0
+    wait_rabbitmq_to_start(rabbitmq_id)
 
 
 # Fixtures
diff --git a/tests/integration/test_storage_s3/configs/named_collections.xml b/tests/integration/test_storage_s3/configs/named_collections.xml
index dfcbeeb2d4a..efadedc1bde 100644
--- a/tests/integration/test_storage_s3/configs/named_collections.xml
+++ b/tests/integration/test_storage_s3/configs/named_collections.xml
@@ -15,5 +15,10 @@
             <access_key_id>minio</access_key_id>
             <secret_access_key>minio123</secret_access_key>
         </s3_orc>
+        <s3_native>
+            <url>http://minio1:9001/root/test_native</url>
+            <access_key_id>minio</access_key_id>
+            <secret_access_key>minio123</secret_access_key>
+        </s3_native>
     </named_collections>
 </clickhouse>
diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index f3c4b1dd0cf..885a37f875c 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -126,7 +126,7 @@ def run_query(instance, query, stdin=None, settings=None):
     pytest.param("'wrongid','wrongkey',", False, 'xz', id="xz"),
     pytest.param("'wrongid','wrongkey',", False, 'zstd', id="zstd")
 ])
-def test_put(started_cluster, maybe_auth, positive, compression):
+def _test_put(started_cluster, maybe_auth, positive, compression):
     # type: (ClickHouseCluster) -> None
 
     bucket = started_cluster.minio_bucket if not maybe_auth else started_cluster.minio_restricted_bucket
@@ -148,7 +148,7 @@ def test_put(started_cluster, maybe_auth, positive, compression):
         assert values_csv == get_s3_file_content(started_cluster, bucket, filename)
 
 
-def test_partition_by(started_cluster):
+def _test_partition_by(started_cluster):
     bucket = started_cluster.minio_bucket
     instance = started_cluster.instances["dummy"]  # type: ClickHouseInstance
     table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
@@ -173,7 +173,7 @@ def test_partition_by(started_cluster):
     assert "78,43,45\n" == get_s3_file_content(started_cluster, bucket, "test2_45.csv")
 
 
-def test_partition_by_string_column(started_cluster):
+def _test_partition_by_string_column(started_cluster):
     bucket = started_cluster.minio_bucket
     instance = started_cluster.instances["dummy"]  # type: ClickHouseInstance
     table_format = "col_num UInt32, col_str String"
@@ -191,7 +191,7 @@ def test_partition_by_string_column(started_cluster):
     assert '78,"你好"\n' == get_s3_file_content(started_cluster, bucket, "test_你好.csv")
 
 
-def test_partition_by_const_column(started_cluster):
+def _test_partition_by_const_column(started_cluster):
     bucket = started_cluster.minio_bucket
     instance = started_cluster.instances["dummy"]  # type: ClickHouseInstance
     table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
@@ -212,7 +212,7 @@ def test_partition_by_const_column(started_cluster):
     "space",
     "plus"
 ])
-def test_get_file_with_special(started_cluster, special):
+def _test_get_file_with_special(started_cluster, special):
     symbol = {"space": " ", "plus": "+"}[special]
     urlsafe_symbol = {"space": "%20", "plus": "%2B"}[special]
     auth = "'minio','minio123',"
@@ -239,7 +239,7 @@ def test_get_file_with_special(started_cluster, special):
     "plus",
     "plus2"
 ])
-def test_get_path_with_special(started_cluster, special):
+def _test_get_path_with_special(started_cluster, special):
     symbol = {"space": "%20", "plus": "%2B", "plus2": "%2B"}[special]
     safe_symbol = {"space": "%20", "plus": "+", "plus2": "%2B"}[special]
     auth = "'minio','minio123',"
@@ -253,7 +253,7 @@ def test_get_path_with_special(started_cluster, special):
 @pytest.mark.parametrize("auth", [
     pytest.param("'minio','minio123',", id="minio")
 ])
-def test_empty_put(started_cluster, auth):
+def _test_empty_put(started_cluster, auth):
     # type: (ClickHouseCluster, str) -> None
 
     bucket = started_cluster.minio_bucket
@@ -291,7 +291,7 @@ def test_empty_put(started_cluster, auth):
     pytest.param("'minio','minio123',", True, id="auth_positive"),
     pytest.param("'wrongid','wrongkey',", False, id="negative"),
 ])
-def test_put_csv(started_cluster, maybe_auth, positive):
+def _test_put_csv(started_cluster, maybe_auth, positive):
     # type: (ClickHouseCluster, bool, str) -> None
 
     bucket = started_cluster.minio_bucket if not maybe_auth else started_cluster.minio_restricted_bucket
@@ -313,7 +313,7 @@ def test_put_csv(started_cluster, maybe_auth, positive):
 
 
 # Test put and get with S3 server redirect.
-def test_put_get_with_redirect(started_cluster):
+def _test_put_get_with_redirect(started_cluster):
     # type: (ClickHouseCluster) -> None
 
     bucket = started_cluster.minio_bucket
@@ -340,7 +340,7 @@ def test_put_get_with_redirect(started_cluster):
 
 
 # Test put with restricted S3 server redirect.
-def test_put_with_zero_redirect(started_cluster):
+def _test_put_with_zero_redirect(started_cluster):
     # type: (ClickHouseCluster) -> None
 
     bucket = started_cluster.minio_bucket
@@ -367,7 +367,7 @@ def test_put_with_zero_redirect(started_cluster):
         assert exception_raised
 
 
-def test_put_get_with_globs(started_cluster):
+def _test_put_get_with_globs(started_cluster):
     # type: (ClickHouseCluster) -> None
     unique_prefix = random.randint(1,10000)
     bucket = started_cluster.minio_bucket
@@ -399,7 +399,7 @@ def test_put_get_with_globs(started_cluster):
     pytest.param("'wrongid','wrongkey'", False, id="negative"),
     # ("'minio','minio123',",True), Redirect with credentials not working with nginx.
 ])
-def test_multipart_put(started_cluster, maybe_auth, positive):
+def _test_multipart_put(started_cluster, maybe_auth, positive):
     # type: (ClickHouseCluster) -> None
 
     bucket = started_cluster.minio_bucket if not maybe_auth else started_cluster.minio_restricted_bucket
@@ -439,7 +439,7 @@ def test_multipart_put(started_cluster, maybe_auth, positive):
         assert csv_data == get_s3_file_content(started_cluster, bucket, filename)
 
 
-def test_remote_host_filter(started_cluster):
+def _test_remote_host_filter(started_cluster):
     instance = started_cluster.instances["restricted_dummy"]
     format = "column1 UInt32, column2 UInt32, column3 UInt32"
 
@@ -457,7 +457,7 @@ def test_remote_host_filter(started_cluster):
     pytest.param("''", id="1_argument"),
     pytest.param("'','','','','',''", id="6_arguments"),
 ])
-def test_wrong_s3_syntax(started_cluster, s3_storage_args):
+def _test_wrong_s3_syntax(started_cluster, s3_storage_args):
     instance = started_cluster.instances["dummy"]  # type: ClickHouseInstance
     expected_err_msg = "Code: 42"  # NUMBER_OF_ARGUMENTS_DOESNT_MATCH
 
@@ -466,7 +466,7 @@ def test_wrong_s3_syntax(started_cluster, s3_storage_args):
 
 
 # https://en.wikipedia.org/wiki/One_Thousand_and_One_Nights
-def test_s3_glob_scheherazade(started_cluster):
+def _test_s3_glob_scheherazade(started_cluster):
     bucket = started_cluster.minio_bucket
     instance = started_cluster.instances["dummy"]  # type: ClickHouseInstance
     table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
@@ -535,7 +535,7 @@ def replace_config(old, new):
     config.close()
 
 
-def test_custom_auth_headers(started_cluster):
+def _test_custom_auth_headers(started_cluster):
     table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
     filename = "test.csv"
     get_query = "select * from s3('http://resolver:8080/{bucket}/{file}', 'CSV', '{table_format}')".format(
@@ -566,7 +566,7 @@ def test_custom_auth_headers(started_cluster):
     instance.query("DROP TABLE test")
 
 
-def test_custom_auth_headers_exclusion(started_cluster):
+def _test_custom_auth_headers_exclusion(started_cluster):
     table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
     filename = "test.csv"
     get_query = f"SELECT * FROM s3('http://resolver:8080/{started_cluster.minio_restricted_bucket}/restricteddirectory/{filename}', 'CSV', '{table_format}')"
@@ -580,7 +580,7 @@ def test_custom_auth_headers_exclusion(started_cluster):
     assert 'Forbidden Error' in ei.value.stderr
 
 
-def test_infinite_redirect(started_cluster):
+def _test_infinite_redirect(started_cluster):
     bucket = "redirected"
     table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
     filename = "test.csv"
@@ -598,7 +598,7 @@ def test_infinite_redirect(started_cluster):
     pytest.param("bin", "gzip", id="bin"),
     pytest.param("gz", "auto", id="gz"),
 ])
-def test_storage_s3_get_gzip(started_cluster, extension, method):
+def _test_storage_s3_get_gzip(started_cluster, extension, method):
     bucket = started_cluster.minio_bucket
     instance = started_cluster.instances["dummy"]
     filename = f"test_get_gzip.{extension}"
@@ -638,7 +638,7 @@ def test_storage_s3_get_gzip(started_cluster, extension, method):
     run_query(instance, f"DROP TABLE {name}")
 
 
-def test_storage_s3_get_unstable(started_cluster):
+def _test_storage_s3_get_unstable(started_cluster):
     bucket = started_cluster.minio_bucket
     instance = started_cluster.instances["dummy"]
     table_format = "column1 Int64, column2 Int64, column3 Int64, column4 Int64"
@@ -647,7 +647,7 @@ def test_storage_s3_get_unstable(started_cluster):
     assert result.splitlines() == ["500001,500000,0"]
 
 
-def test_storage_s3_put_uncompressed(started_cluster):
+def _test_storage_s3_put_uncompressed(started_cluster):
     bucket = started_cluster.minio_bucket
     instance = started_cluster.instances["dummy"]
     filename = "test_put_uncompressed.bin"
@@ -684,7 +684,7 @@ def test_storage_s3_put_uncompressed(started_cluster):
     pytest.param("bin", "gzip", id="bin"),
     pytest.param("gz", "auto", id="gz")
 ])
-def test_storage_s3_put_gzip(started_cluster, extension, method):
+def _test_storage_s3_put_gzip(started_cluster, extension, method):
     bucket = started_cluster.minio_bucket
     instance = started_cluster.instances["dummy"]
     filename = f"test_put_gzip.{extension}"
@@ -721,7 +721,7 @@ def test_storage_s3_put_gzip(started_cluster, extension, method):
     assert sum([ int(i.split(',')[1]) for i in uncompressed_content.splitlines() ]) == 708
 
 
-def test_truncate_table(started_cluster):
+def _test_truncate_table(started_cluster):
     bucket = started_cluster.minio_bucket
     instance = started_cluster.instances["dummy"]  # type: ClickHouseInstance
     name = "truncate"
@@ -745,7 +745,7 @@ def test_truncate_table(started_cluster):
     assert instance.query("SELECT * FROM {}".format(name)) == ""
 
 
-def test_predefined_connection_configuration(started_cluster):
+def _test_predefined_connection_configuration(started_cluster):
     bucket = started_cluster.minio_bucket
     instance = started_cluster.instances["dummy"]  # type: ClickHouseInstance
     name = "test_table"
@@ -762,7 +762,7 @@ def test_predefined_connection_configuration(started_cluster):
 
 
 result = ""
-def test_url_reconnect_in_the_middle(started_cluster):
+def _test_url_reconnect_in_the_middle(started_cluster):
     bucket = started_cluster.minio_bucket
     instance = started_cluster.instances["dummy"]
     table_format = "id String, data String"
@@ -783,7 +783,7 @@ def test_url_reconnect_in_the_middle(started_cluster):
                 f"""select sum(cityHash64(x)) from (select toUInt64(id) + sleep(0.1) as x from
                 url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{filename}', 'TSV', '{table_format}')
                 settings http_max_tries = 10, http_retry_max_backoff_ms=2000, http_send_timeout=1, http_receive_timeout=1)""")
-            assert(int(result), 3914219105369203805)
+            assert(int(result) == 3914219105369203805)
 
         thread = threading.Thread(target=select)
         thread.start()
@@ -796,10 +796,10 @@ def test_url_reconnect_in_the_middle(started_cluster):
 
         thread.join()
 
-        assert(int(result), 3914219105369203805)
+        assert(int(result) == 3914219105369203805)
 
 
-def test_seekable_formats(started_cluster):
+def _test_seekable_formats(started_cluster):
     bucket = started_cluster.minio_bucket
     instance = started_cluster.instances["dummy"]  # type: ClickHouseInstance
 
@@ -821,7 +821,7 @@ def test_seekable_formats(started_cluster):
     assert(int(result[:3]) < 200)
 
 
-def test_seekable_formats_url(started_cluster):
+def _test_seekable_formats_url(started_cluster):
     bucket = started_cluster.minio_bucket
     instance = started_cluster.instances["dummy"]
 
@@ -865,3 +865,53 @@ def test_insert_with_path_with_globs(started_cluster):
 
     table_function_3 = f"s3('http://minio1:9001/root/test_parquet*', 'minio', 'minio123', 'Parquet', 'a Int32, b String')"
     instance.query_and_get_error(f"insert into table function {table_function_3} SELECT number, randomString(100) FROM numbers(500)")
+
+
+def test_s3_schema_inference(started_cluster):
+    bucket = started_cluster.minio_bucket
+    instance = started_cluster.instances["dummy"]
+
+    instance.query(f"insert into table function s3(s3_native, structure='a Int32, b String', format='Native') select number, randomString(100) from numbers(5000000)")
+    result = instance.query(f"desc s3(s3_native, format='Native')")
+    assert result == "a\tInt32\t\t\t\t\t\nb\tString\t\t\t\t\t\n"
+
+    result = instance.query(f"select count(*) from s3(s3_native, format='Native')")
+    assert(int(result) == 5000000)
+
+    instance.query(f"create table schema_inference engine=S3(s3_native, format='Native')")
+    result = instance.query(f"desc schema_inference")
+    assert result == "a\tInt32\t\t\t\t\t\nb\tString\t\t\t\t\t\n"
+
+    result = instance.query(f"select count(*) from schema_inference")
+    assert(int(result) == 5000000)
+
+    
+    table_function = f"url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_native', 'Native')"
+    result = instance.query(f"desc {table_function}")
+    assert result == "a\tInt32\t\t\t\t\t\nb\tString\t\t\t\t\t\n"
+
+    result = instance.query(f"select count(*) from {table_function}")
+    assert(int(result) == 5000000)
+
+    instance.query(f"create table schema_inference_2 engine=URL('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_native', 'Native')")
+    result = instance.query(f"desc schema_inference_2")
+    assert result == "a\tInt32\t\t\t\t\t\nb\tString\t\t\t\t\t\n"
+
+    result = instance.query(f"select count(*) from schema_inference_2")
+    assert(int(result) == 5000000)
+
+
+def test_empty_file(started_cluster):
+    bucket = started_cluster.minio_bucket
+    instance = started_cluster.instances["dummy"]
+
+    name = "empty"
+    url = f'http://{started_cluster.minio_ip}:{MINIO_INTERNAL_PORT}/{bucket}/{name}'
+
+    minio = started_cluster.minio_client
+    minio.put_object(bucket, name, io.BytesIO(b""), 0)
+
+    table_function = f"s3('{url}', 'CSV', 'id Int32')"
+    result = instance.query(f"SELECT count() FROM {table_function}")
+    assert(int(result) == 0)
+
diff --git a/tests/queries/0_stateless/00646_url_engine.python b/tests/queries/0_stateless/00646_url_engine.python
index 85ae3e776ed..4f47e819328 100644
--- a/tests/queries/0_stateless/00646_url_engine.python
+++ b/tests/queries/0_stateless/00646_url_engine.python
@@ -156,6 +156,7 @@ def test_select(table_name="", schema="str String,numuint UInt32,numint Int32,do
     if table_name:
         get_ch_answer("drop table if exists {}".format(table_name))
 
+
 def test_insert(table_name="", schema="str String,numuint UInt32,numint Int32,double Float64", requests_insert=[], requests_select=[], answers=[]):
     with open(CSV_DATA, 'w') as f: # flush test file
         f.write('')
diff --git a/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh
index 2731e4bcce3..8d9e2689e26 100755
--- a/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh
+++ b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh
@@ -25,15 +25,15 @@ SELECT * FROM enum_mapping_protobuf_00825;
 EOF
 
 BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_enum_mapping.XXXXXX.binary")
-$CLICKHOUSE_CLIENT --query "SELECT * FROM enum_mapping_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_enum_mapping:Message'" > "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM enum_mapping_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_enum_mapping:EnumMessage'" > "$BINARY_FILE_PATH"
 
 # Check the output in the protobuf format
 echo
-$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_enum_mapping:Message" --input "$BINARY_FILE_PATH"
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_enum_mapping:EnumMessage" --input "$BINARY_FILE_PATH"
 
 # Check the input in the protobuf format (now the table contains the same data twice).
 echo
-$CLICKHOUSE_CLIENT --query "INSERT INTO enum_mapping_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_enum_mapping:Message'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "INSERT INTO enum_mapping_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_enum_mapping:EnumMessage'" < "$BINARY_FILE_PATH"
 $CLICKHOUSE_CLIENT --query "SELECT * FROM enum_mapping_protobuf_00825"
 
 rm "$BINARY_FILE_PATH"
diff --git a/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.yaodzJ.binary b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.yaodzJ.binary
new file mode 100644
index 00000000000..4b7b97a300f
Binary files /dev/null and b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.yaodzJ.binary differ
diff --git a/tests/queries/0_stateless/02149_external_schema_inference.reference b/tests/queries/0_stateless/02149_external_schema_inference.reference
new file mode 100644
index 00000000000..875659c7fb6
--- /dev/null
+++ b/tests/queries/0_stateless/02149_external_schema_inference.reference
@@ -0,0 +1,168 @@
+Protobuf
+
+a_b_c	Array(Array(Array(Int32)))					
+
+a	String					
+b_c	Array(Array(Float64))					
+
+x	Enum8(\'FIRST\' = 0, \'SECOND\' = 1, \'TEN\' = 10, \'HUNDRED\' = 100)					
+
+a	Map(String, UInt32)					
+
+x_y_z	Array(Array(Int32))					
+
+uuid	String					
+name	String					
+surname	String					
+gender	Enum8(\'female\' = 0, \'male\' = 1)					
+birthDate	UInt32					
+photo	String					
+phoneNumber	String					
+isOnline	UInt8					
+visitTime	UInt32					
+age	UInt32					
+zodiacSign	Enum8(\'aries\' = 0, \'taurus\' = 1, \'gemini\' = 2, \'cancer\' = 3, \'leo\' = 4, \'virgo\' = 5, \'libra\' = 6, \'scorpius\' = 7, \'sagittarius\' = 8, \'capricorn\' = 9, \'aquarius\' = 10, \'pisces\' = 11)					
+songs	Array(String)					
+color	Array(UInt32)					
+hometown	String					
+location	Array(Float32)					
+pi	Float64					
+lotteryWin	Float64					
+someRatio	Float32					
+temperature	Float32					
+randomBigNumber	Int64					
+measureUnits	Array(Tuple(unit String, coef Float32))					
+nestiness_a_b_c	Tuple(d UInt32, e Array(UInt32))					
+
+location	Array(Int32)					
+pi	Float32					
+uuid	String					
+newFieldBool	UInt8					
+name	String					
+gender	Enum8(\'male\' = 0, \'female\' = 1)					
+zodiacSign	Int32					
+birthDate	Int64					
+age	String					
+isOnline	Enum8(\'offline\' = 0, \'online\' = 1)					
+someRatio	Float64					
+visitTime	UInt64					
+newMessage	Tuple(empty Array(Tuple()), z Float32)					
+randomBigNumber	Int64					
+newFieldInt	Array(Int32)					
+color	Array(Float32)					
+lotteryWin	UInt64					
+surname	String					
+phoneNumber	UInt64					
+temperature	Int32					
+newFieldStr	String					
+measureUnits_unit	Array(String)					
+measureUnits_coef	Array(Float32)					
+nestiness_a_b_c_d	UInt32					
+nestiness_a_b_c_e	Array(UInt32)					
+
+uuid	String					
+name	String					
+surname	String					
+gender	String					
+birthDate	String					
+phoneNumber	String					
+isOnline	String					
+visitTime	String					
+age	String					
+zodiacSign	String					
+songs	Array(String)					
+color	Array(String)					
+hometown	String					
+location	Array(String)					
+pi	String					
+lotteryWin	String					
+someRatio	String					
+temperature	String					
+randomBigNumber	String					
+measureUnits	Tuple(unit Array(String), coef Array(String))					
+nestiness_a_b_c	Tuple(d String, e Array(String))					
+
+uuid	String					
+name	String					
+surname	String					
+gender	Enum8(\'female\' = 0, \'male\' = 1)					
+birthDate	UInt32					
+photo	String					
+phoneNumber	String					
+isOnline	UInt8					
+visitTime	UInt32					
+age	UInt32					
+zodiacSign	Enum8(\'aries\' = 0, \'taurus\' = 1, \'gemini\' = 2, \'cancer\' = 3, \'leo\' = 4, \'virgo\' = 5, \'libra\' = 6, \'scorpius\' = 7, \'sagittarius\' = 8, \'capricorn\' = 9, \'aquarius\' = 10, \'pisces\' = 11)					
+songs	Array(String)					
+color	Array(UInt32)					
+hometown	String					
+location	Array(Float32)					
+pi	Float64					
+lotteryWin	Float64					
+someRatio	Float32					
+temperature	Float32					
+randomBigNumber	Int64					
+measureunits	Tuple(coef Array(Float32), unit Array(String))					
+nestiness_a_b_c	Tuple(d UInt32, e Array(UInt32))					
+newFieldStr	String					
+newFieldInt	Int32					
+newBool	UInt8					
+
+identifier	String					
+modules	Array(Tuple(module_id UInt32, supply UInt32, temp UInt32, nodes Array(Tuple(node_id UInt32, opening_time UInt32, closing_time UInt32, current UInt32, coords_y Float32))))					
+
+Capnproto
+
+value	Enum8(\'one\' = 0, \'two\' = 1, \'tHrEe\' = 2)					
+
+value	UInt64					
+list1	Array(UInt64)					
+list2	Array(Array(Array(UInt64)))					
+
+lc1	String					
+lc2	Nullable(String)					
+lc3	Array(Nullable(String))					
+
+value	UInt64					
+nested	Tuple(a Tuple(b UInt64, c Array(Array(UInt64))), d Array(Tuple(e Array(Array(Tuple(f UInt64, g UInt64))), h Array(Tuple(k Array(UInt64))))))					
+
+nested	Tuple(value Array(UInt64), array Array(Array(UInt64)), tuple Array(Tuple(one UInt64, two UInt64)))					
+
+a	Tuple(b UInt64, c Tuple(d UInt64, e Tuple(f UInt64)))					
+
+nullable	Nullable(UInt64)					
+array	Array(Nullable(UInt64))					
+tuple	Tuple(nullable Nullable(UInt64))					
+
+int8	Int8					
+uint8	UInt8					
+int16	Int16					
+uint16	UInt16					
+int32	Int32					
+uint32	UInt32					
+int64	Int64					
+uint64	UInt64					
+float32	Float32					
+float64	Float64					
+string	String					
+fixed	String					
+data	String					
+date	UInt16					
+datetime	UInt32					
+datetime64	Int64					
+
+value	UInt64					
+tuple1	Tuple(one UInt64, two Tuple(three UInt64, four UInt64))					
+tuple2	Tuple(nested1 Tuple(nested2 Tuple(x UInt64)))					
+
+RawBLOB
+
+raw_blob	String					
+
+LineAsString
+
+line	String					
+
+JSONAsString
+
+json	String					
diff --git a/tests/queries/0_stateless/02149_external_schema_inference.sh b/tests/queries/0_stateless/02149_external_schema_inference.sh
new file mode 100755
index 00000000000..df2b9a43565
--- /dev/null
+++ b/tests/queries/0_stateless/02149_external_schema_inference.sh
@@ -0,0 +1,91 @@
+#!/usr/bin/env bash
+# Tags: no-parallel, no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+
+USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+FILE_NAME=test_02149.data
+DATA_FILE=$USER_FILES_PATH/$FILE_NAME
+
+touch $DATA_FILE
+
+SCHEMADIR=$(clickhouse-client --query "select * from file('$FILE_NAME', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)")
+CLIENT_SCHEMADIR=$CURDIR/format_schemas
+SERVER_SCHEMADIR=test_02149
+mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR
+cp -r $CLIENT_SCHEMADIR/* $SCHEMADIR/$SERVER_SCHEMADIR/
+
+echo -e "Protobuf\n"
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'Protobuf') settings format_schema='$SERVER_SCHEMADIR/00825_protobuf_format_array_3dim:ABC'"
+
+echo
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'Protobuf') settings format_schema='$SERVER_SCHEMADIR/00825_protobuf_format_array_of_arrays:AA'"
+
+echo
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'Protobuf') settings format_schema='$SERVER_SCHEMADIR/00825_protobuf_format_enum_mapping.proto:EnumMessage'"
+
+echo
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'Protobuf') settings format_schema='$SERVER_SCHEMADIR/00825_protobuf_format_map:Message'"
+
+echo
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'Protobuf') settings format_schema='$SERVER_SCHEMADIR/00825_protobuf_format_nested_in_nested:MessageType'"
+
+echo
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'Protobuf') settings format_schema='$SERVER_SCHEMADIR/00825_protobuf_format_persons:Person'"
+
+echo
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'Protobuf') settings format_schema='$SERVER_SCHEMADIR/00825_protobuf_format_persons:AltPerson'"
+
+echo
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'Protobuf') settings format_schema='$SERVER_SCHEMADIR/00825_protobuf_format_persons:StrPerson'"
+
+echo
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'Protobuf') settings format_schema='$SERVER_SCHEMADIR/00825_protobuf_format_persons_syntax2:Syntax2Person'"
+
+echo
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'Protobuf') settings format_schema='$SERVER_SCHEMADIR/00825_protobuf_format_skipped_column_in_nested:UpdateMessage'"
+
+
+echo -e "\nCapnproto\n"
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CapnProto') settings format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message'"
+
+echo
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CapnProto') settings format_schema='$SERVER_SCHEMADIR/02030_capnp_lists:Message'"
+
+echo
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CapnProto') settings format_schema='$SERVER_SCHEMADIR/02030_capnp_low_cardinality:Message'"
+
+echo
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CapnProto') settings format_schema='$SERVER_SCHEMADIR/02030_capnp_nested_lists_and_tuples:Message'"
+
+echo
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CapnProto') settings format_schema='$SERVER_SCHEMADIR/02030_capnp_nested_table:Message'"
+
+echo
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CapnProto') settings format_schema='$SERVER_SCHEMADIR/02030_capnp_nested_tuples:Message'"
+
+echo
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CapnProto') settings format_schema='$SERVER_SCHEMADIR/02030_capnp_nullable:Message'"
+
+echo
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CapnProto') settings format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'"
+
+echo
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CapnProto') settings format_schema='$SERVER_SCHEMADIR/02030_capnp_tuples:Message'"
+
+echo -e "\nRawBLOB\n"
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'RawBLOB')"
+
+echo -e "\nLineAsString\n"
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'LineAsString')"
+
+echo -e "\nJSONAsString\n"
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'JSONAsString')"
+
+
+
+rm -rf ${SCHEMADIR:?}/$SERVER_SCHEMADIR
+rm $DATA_FILE
diff --git a/tests/queries/0_stateless/02149_schema_inference.reference b/tests/queries/0_stateless/02149_schema_inference.reference
new file mode 100644
index 00000000000..f46e3bee101
--- /dev/null
+++ b/tests/queries/0_stateless/02149_schema_inference.reference
@@ -0,0 +1,170 @@
+TSV
+c1	Nullable(String)					
+c2	Nullable(String)					
+c3	Nullable(String)					
+c4	Nullable(String)					
+42	Some string	[1, 2, 3, 4]	(1, 2, 3)
+42	abcd	[]	(4, 5, 6)
+TSVWithNames
+number	Nullable(String)					
+string	Nullable(String)					
+array	Nullable(String)					
+tuple	Nullable(String)					
+42	Some string	[1, 2, 3, 4]	(1, 2, 3)
+42	abcd	[]	(4, 5, 6)
+CSV
+c1	Nullable(Float64)					
+c2	Nullable(String)					
+c3	Array(Tuple(Array(Nullable(Float64)), Nullable(String)))					
+c4	Array(Nullable(Float64))					
+\N	Some string	[([1,2.3],'String'),([],NULL)]	[1,NULL,3]
+42	\N	[([1,2.3],'String'),([3],'abcd')]	[4,5,6]
+c1	Nullable(String)					
+c2	Nullable(String)					
+42	String
+String	42
+c1	Nullable(String)					
+c2	Nullable(String)					
+\N	[NULL, NULL]
+\N	[]
+CSVWithNames
+a	Nullable(Float64)					
+b	Nullable(String)					
+c	Array(Tuple(Array(Nullable(Float64)), Nullable(String)))					
+d	Array(Nullable(Float64))					
+\N	Some string	[([1,2.3],'String'),([],NULL)]	[1,NULL,3]
+42	\N	[([1,2.3],'String'),([3],'abcd')]	[4,5,6]
+JSONCompactEachRow
+c1	Nullable(Float64)					
+c2	Array(Tuple(Nullable(Float64), Nullable(String)))					
+c3	Map(String, Nullable(Float64))					
+c4	Nullable(UInt8)					
+42.42	[(1,'String'),(2,'abcd')]	{'key':42,'key2':24}	1
+c1	Nullable(Float64)					
+c2	Array(Tuple(Nullable(Float64), Nullable(String)))					
+c3	Map(String, Nullable(Float64))					
+c4	Nullable(UInt8)					
+\N	[(1,'String'),(2,NULL)]	{'key':NULL,'key2':24}	\N
+32	[(2,'String 2'),(3,'hello')]	{'key3':4242,'key4':2424}	1
+JSONCompactEachRowWithNames
+a	Nullable(Float64)					
+b	Array(Tuple(Nullable(Float64), Nullable(String)))					
+c	Map(String, Nullable(Float64))					
+d	Nullable(UInt8)					
+42.42	[(1,'String'),(2,'abcd')]	{'key':42,'key2':24}	1
+JSONEachRow
+d	Nullable(UInt8)					
+b	Array(Tuple(Nullable(Float64), Nullable(String)))					
+c	Map(String, Nullable(Float64))					
+a	Nullable(Float64)					
+1	[(1,'String'),(2,'abcd')]	{'key':42,'key2':24}	42.42
+d	Nullable(UInt8)					
+b	Array(Tuple(Nullable(Float64), Nullable(String)))					
+c	Map(String, Nullable(Float64))					
+a	Nullable(Float64)					
+\N	[(1,'String'),(2,NULL)]	{'key':NULL,'key2':24}	\N
+1	[(2,'String 2'),(3,'hello')]	{'key3':4242,'key4':2424}	32
+b	Nullable(String)					
+c	Array(Nullable(Float64))					
+a	Nullable(Float64)					
+s1	[]	1
+\N	[2]	2
+\N	[]	\N
+\N	[]	\N
+\N	[3]	\N
+TSKV
+b	Nullable(String)					
+c	Nullable(String)					
+a	Nullable(String)					
+s1	\N	1
+}	[2]	2
+\N	\N	\N
+\N	\N	\N
+\N	[3]	\N
+Values
+c1	Nullable(Float64)					
+c2	Nullable(String)					
+c3	Array(Nullable(Float64))					
+c4	Tuple(Nullable(Float64), Nullable(String))					
+c5	Tuple(Array(Nullable(Float64)), Array(Tuple(Nullable(Float64), Nullable(String))))					
+42.42	Some string	[1,2,3]	(1,'2')	([1,2],[(3,'4'),(5,'6')])
+c1	Nullable(Float64)					
+c2	Nullable(String)					
+c3	Array(Nullable(Float64))					
+c4	Tuple(Nullable(Float64), Nullable(Float64))					
+c5	Tuple(Array(Nullable(Float64)), Array(Tuple(Nullable(Float64), Nullable(String))))					
+42.42	\N	[1,NULL,3]	(1,NULL)	([1,2],[(3,'4'),(5,'6')])
+\N	Some string	[10]	(1,2)	([],[])
+Regexp
+c1	Nullable(String)					
+c2	Nullable(String)					
+c3	Nullable(String)					
+42	Some string 1	[([1, 2, 3], String 1), ([], String 1)]
+2	Some string 2	[([4, 5, 6], String 2), ([], String 2)]
+312	Some string 3	[([1, 2, 3], String 2), ([], String 2)]
+c1	Nullable(Float64)					
+c2	Nullable(String)					
+c3	Array(Tuple(Array(Nullable(Float64)), Nullable(String)))					
+42	Some string 1	[([1,2,3],'String 1'),([],'String 1')]
+3	Some string 2	[([3,5,1],'String 2'),([],'String 2')]
+244	Some string 3	[([],'String 3'),([],'String 3')]
+c1	Nullable(Float64)					
+c2	Nullable(String)					
+c3	Array(Tuple(Array(Nullable(Float64)), Nullable(String)))					
+42	Some string 1	[([1,2,3],'String 1'),([],'String 1')]
+2	Some string 2	[([],'String 2'),([],'String 2')]
+43	Some string 3	[([1,5,3],'String 3'),([],'String 3')]
+c1	Nullable(Float64)					
+c2	Nullable(String)					
+c3	Array(Tuple(Array(Nullable(Float64)), Nullable(String)))					
+42	Some string 1	[([1,2,3],'String 1'),([1],'String 1')]
+52	Some string 2	[([],'String 2'),([1],'String 2')]
+24	Some string 3	[([1,2,3],'String 3'),([1],'String 3')]
+CustomSeparated
+c1	Nullable(Float64)					
+c2	Nullable(String)					
+c3	Array(Tuple(Array(Nullable(Float64)), Nullable(String)))					
+42.42	Some string 1	[([1,2,3],'String 1'),([1],'String 1')]
+42	Some string 2	[([],'String 2'),([],'String 2')]
+\N	Some string 3	[([1,2,3],'String 3'),([1],'String 3')]
+c1	Nullable(Float64)					
+c2	Nullable(String)					
+c3	Array(Tuple(Array(Nullable(Float64)), Nullable(String)))					
+42.42	Some string 1	[([1,2,3],'String 1'),([1],'String 1')]
+42	Some string 2	[([],'String 2'),([],'String 2')]
+\N	Some string 3	[([1,2,3],'String 3'),([1],'String 3')]
+c1	Nullable(Float64)					
+c2	Nullable(String)					
+c3	Array(Tuple(Array(Nullable(Float64)), Nullable(String)))					
+42.42	Some string 1	[([1,2,3],'String 1'),([1],'String 1')]
+42	Some string 2	[([],'String 2'),([],'String 2')]
+\N	Some string 3	[([1,2,3],'String 3'),([1],'String 3')]
+Template
+column_1	Nullable(Float64)					
+column_2	Nullable(String)					
+column_3	Array(Tuple(Array(Nullable(Float64)), Nullable(String)))					
+42.42	Some string 1	[([1,2,3],'String 1'),([1],'String 1')]
+42	Some string 2	[([],'String 2'),([],'String 2')]
+\N	Some string 3	[([1,2,3],'String 3'),([1],'String 3')]
+column_1	Nullable(Float64)					
+column_2	Nullable(String)					
+column_3	Array(Tuple(Array(Nullable(Float64)), Nullable(String)))					
+42.42	Some string 1	[([1,2,3],'String 1'),([1],'String 1')]
+42	Some string 2	[([],'String 2'),([],'String 2')]
+\N	Some string 3	[([1,2,3],'String 3'),([1],'String 3')]
+column_1	Nullable(Float64)					
+column_2	Nullable(String)					
+column_3	Array(Tuple(Array(Nullable(Float64)), Nullable(String)))					
+42.42	Some string 1	[([1,2,3],'String 1'),([1],'String 1')]
+42	Some string 2	[([],'String 2'),([],'String 2')]
+\N	Some string 3	[([1,2,3],'String 3'),([1],'String 3')]
+MsgPack
+c1	Nullable(Int64)					
+c2	Nullable(Int64)					
+c3	Nullable(Float32)					
+c4	Nullable(String)					
+c5	Array(Array(Nullable(Int64)))					
+c6	Map(Int64, Array(Nullable(Int64)))					
+\N	0	0	Str: 0	[[0,1],[0]]	{0:[0,1]}
+1	\N	1	Str: 1	[[1,2],[1]]	{1:[1,2]}
+\N	2	2	Str: 2	[[2,3],[2]]	{2:[2,3]}
diff --git a/tests/queries/0_stateless/02149_schema_inference.sh b/tests/queries/0_stateless/02149_schema_inference.sh
new file mode 100755
index 00000000000..1ccec240627
--- /dev/null
+++ b/tests/queries/0_stateless/02149_schema_inference.sh
@@ -0,0 +1,251 @@
+#!/usr/bin/env bash
+# Tags: no-parallel, no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+
+USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+FILE_NAME=test_02149.data
+DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME
+
+touch $DATA_FILE
+
+SCHEMADIR=$(clickhouse-client --query "select * from file('$FILE_NAME', 'Template', 'val1 char') settings format_template_row='nonexist'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist)")
+
+echo "TSV"
+
+echo -e "42\tSome string\t[1, 2, 3, 4]\t(1, 2, 3)
+42\tabcd\t[]\t(4, 5, 6)" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo "TSVWithNames"
+
+echo -e "number\tstring\tarray\ttuple
+42\tSome string\t[1, 2, 3, 4]\t(1, 2, 3)
+42\tabcd\t[]\t(4, 5, 6)" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSVWithNames')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSVWithNames')"
+
+echo "CSV"
+
+echo -e "\N,\"Some string\",\"[([1, 2.3], 'String'), ([], NULL)]\",\"[1, NULL, 3]\"
+42,\N,\"[([1, 2.3], 'String'), ([3.], 'abcd')]\",\"[4, 5, 6]\"" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')"
+
+echo -e "42,\"String\"
+\"String\",42" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')"
+
+echo -e "\N,\"[NULL, NULL]\"
+\N,[]" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')"
+
+echo "CSVWithNames"
+
+echo -e "a,b,c,d
+\N,\"Some string\",\"[([1, 2.3], 'String'), ([], NULL)]\",\"[1, NULL, 3]\"
+42,\N,\"[([1, 2.3], 'String'), ([3.], 'abcd')]\",\"[4, 5, 6]\"" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSVWithNames')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSVWithNames')"
+
+echo "JSONCompactEachRow"
+
+echo -e "[42.42, [[1, \"String\"], [2, \"abcd\"]], {\"key\" : 42, \"key2\" : 24}, true]" > $DATA_FILE
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'JSONCompactEachRow')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'JSONCompactEachRow')"
+
+echo -e "[null, [[1, \"String\"], [2, null]], {\"key\" : null, \"key2\" : 24}, null]
+[32, [[2, \"String 2\"], [3, \"hello\"]], {\"key3\" : 4242, \"key4\" : 2424}, true]"  > $DATA_FILE
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'JSONCompactEachRow')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'JSONCompactEachRow')"
+
+echo "JSONCompactEachRowWithNames"
+
+echo -e "[\"a\", \"b\", \"c\", \"d\"]
+[42.42, [[1, \"String\"], [2, \"abcd\"]], {\"key\" : 42, \"key2\" : 24}, true]" > $DATA_FILE
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'JSONCompactEachRowWithNames')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'JSONCompactEachRowWithNames')"
+
+
+echo "JSONEachRow"
+echo -e '{"a" : 42.42, "b" : [[1, "String"], [2, "abcd"]], "c" : {"key" : 42, "key2" : 24}, "d" : true}' > $DATA_FILE
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'JSONEachRow')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'JSONEachRow')"
+
+echo -e '{"a" : null, "b" : [[1, "String"], [2, null]], "c" : {"key" : null, "key2" : 24}, "d" : null}
+{"a" : 32, "b" : [[2, "String 2"], [3, "hello"]], "c" : {"key3" : 4242, "key4" : 2424}, "d" : true}'  > $DATA_FILE
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'JSONEachRow')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'JSONEachRow')"
+
+echo -e '{"a" : 1, "b" : "s1", "c" : null}
+{"c" : [2], "a" : 2, "b" : null}
+{}
+{"a" : null}
+{"c" : [3], "a" : null}'  > $DATA_FILE
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'JSONEachRow')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'JSONEachRow')"
+
+
+echo "TSKV"
+
+echo -e 'a=1\tb=s1\tc=\N
+c=[2]\ta=2\tb=\N}
+
+a=\N
+c=[3]\ta=\N'  > $DATA_FILE
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSKV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSKV')"
+
+
+echo "Values"
+
+echo -e "(42.42, 'Some string', [1, 2, 3], (1, '2'), ([1, 2], [(3, '4'), (5, '6')]))" > $DATA_FILE
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'Values')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'Values')"
+
+echo -e "(42.42, NULL, [1, NULL, 3], (1, NULL), ([1, 2], [(3, '4'), (5, '6')])), (NULL, 'Some string', [10], (1, 2), ([], []))" > $DATA_FILE
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'Values')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'Values')"
+
+
+echo "Regexp"
+
+REGEXP="^Line: value_1=(.+?), value_2=(.+?), value_3=(.+?)"
+
+echo "Line: value_1=42, value_2=Some string 1, value_3=[([1, 2, 3], String 1), ([], String 1)]
+Line: value_1=2, value_2=Some string 2, value_3=[([4, 5, 6], String 2), ([], String 2)]
+Line: value_1=312, value_2=Some string 3, value_3=[([1, 2, 3], String 2), ([], String 2)]" > $DATA_FILE
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'Regexp') settings format_regexp='$REGEXP', format_regexp_escaping_rule='Escaped'"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'Regexp') settings format_regexp='$REGEXP', format_regexp_escaping_rule='Escaped'"
+
+
+echo "Line: value_1=42, value_2=\"Some string 1\", value_3=\"[([1, 2, 3], 'String 1'), ([], 'String 1')]\"
+Line: value_1=3, value_2=\"Some string 2\", value_3=\"[([3, 5, 1], 'String 2'), ([], 'String 2')]\"
+Line: value_1=244, value_2=\"Some string 3\", value_3=\"[([], 'String 3'), ([], 'String 3')]\"" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'Regexp') settings format_regexp='$REGEXP', format_regexp_escaping_rule='CSV'"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'Regexp') settings format_regexp='$REGEXP', format_regexp_escaping_rule='CSV'"
+
+
+echo "Line: value_1=42, value_2='Some string 1', value_3=[([1, 2, 3], 'String 1'), ([], 'String 1')]
+Line: value_1=2, value_2='Some string 2', value_3=[([], 'String 2'), ([], 'String 2')]
+Line: value_1=43, value_2='Some string 3', value_3=[([1, 5, 3], 'String 3'), ([], 'String 3')]" > $DATA_FILE
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'Regexp') settings format_regexp='$REGEXP', format_regexp_escaping_rule='Quoted'"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'Regexp') settings format_regexp='$REGEXP', format_regexp_escaping_rule='Quoted'"
+
+
+echo "Line: value_1=42, value_2=\"Some string 1\", value_3=[[[1, 2, 3], \"String 1\"], [[1], \"String 1\"]]
+Line: value_1=52, value_2=\"Some string 2\", value_3=[[[], \"String 2\"], [[1], \"String 2\"]]
+Line: value_1=24, value_2=\"Some string 3\", value_3=[[[1, 2, 3], \"String 3\"], [[1], \"String 3\"]]" > $DATA_FILE
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'Regexp') settings format_regexp='$REGEXP', format_regexp_escaping_rule='JSON'"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'Regexp') settings format_regexp='$REGEXP', format_regexp_escaping_rule='JSON'"
+
+
+echo "CustomSeparated"
+
+CUSTOM_SETTINGS="SETTINGS format_custom_row_before_delimiter='<row_before_delimiter>', format_custom_row_after_delimiter='<row_after_delimiter>\n', format_custom_row_between_delimiter='<row_between_delimiter>\n', format_custom_result_before_delimiter='<result_before_delimiter>\n', format_custom_result_after_delimiter='<result_after_delimiter>\n', format_custom_field_delimiter='<field_delimiter>'"
+
+echo -e "<result_before_delimiter>
+<row_before_delimiter>42.42<field_delimiter>\"Some string 1\"<field_delimiter>\"[([1, 2, 3], 'String 1'), ([1], 'String 1')]\"<row_after_delimiter>
+<row_between_delimiter>
+<row_before_delimiter>42<field_delimiter>\"Some string 2\"<field_delimiter>\"[([], 'String 2'), ([], 'String 2')]\"<row_after_delimiter>
+<row_between_delimiter>
+<row_before_delimiter>\N<field_delimiter>\"Some string 3\"<field_delimiter>\"[([1, 2, 3], 'String 3'), ([1], 'String 3')]\"<row_after_delimiter>
+<result_after_delimiter>" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CustomSeparated') $CUSTOM_SETTINGS, format_custom_escaping_rule='CSV'"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CustomSeparated') $CUSTOM_SETTINGS, format_custom_escaping_rule='CSV'"
+
+echo -e "<result_before_delimiter>
+<row_before_delimiter>42.42<field_delimiter>'Some string 1'<field_delimiter>[([1, 2, 3], 'String 1'), ([1], 'String 1')]<row_after_delimiter>
+<row_between_delimiter>
+<row_before_delimiter>42<field_delimiter>'Some string 2'<field_delimiter>[([], 'String 2'), ([], 'String 2')]<row_after_delimiter>
+<row_between_delimiter>
+<row_before_delimiter>NULL<field_delimiter>'Some string 3'<field_delimiter>[([1, 2, 3], 'String 3'), ([1], 'String 3')]<row_after_delimiter>
+<result_after_delimiter>" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CustomSeparated') $CUSTOM_SETTINGS, format_custom_escaping_rule='Quoted'"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CustomSeparated') $CUSTOM_SETTINGS, format_custom_escaping_rule='Quoted'"
+
+echo -e "<result_before_delimiter>
+<row_before_delimiter>42.42<field_delimiter>\"Some string 1\"<field_delimiter>[[[1, 2, 3], \"String 1\"], [[1], \"String 1\"]]<row_after_delimiter>
+<row_between_delimiter>
+<row_before_delimiter>42<field_delimiter>\"Some string 2\"<field_delimiter>[[[], \"String 2\"], [[], \"String 2\"]]<row_after_delimiter>
+<row_between_delimiter>
+<row_before_delimiter>null<field_delimiter>\"Some string 3\"<field_delimiter>[[[1, 2, 3], \"String 3\"], [[1], \"String 3\"]]<row_after_delimiter>
+<result_after_delimiter>" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CustomSeparated') $CUSTOM_SETTINGS, format_custom_escaping_rule='JSON'"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CustomSeparated') $CUSTOM_SETTINGS, format_custom_escaping_rule='JSON'"
+
+
+echo "Template"
+
+echo -e "<result_before_delimiter>
+\${data}<result_after_delimiter>" > $SCHEMADIR/resultset_format_02149
+
+echo -e "<row_before_delimiter>\${column_1:CSV}<field_delimiter_1>\${column_2:CSV}<field_delimiter_2>\${column_3:CSV}<row_after_delimiter>" > $SCHEMADIR/row_format_02149
+
+TEMPLATE_SETTINGS="SETTINGS format_template_rows_between_delimiter='<row_between_delimiter>\n', format_template_row='row_format_02149', format_template_resultset='resultset_format_02149'"
+
+echo -e "<result_before_delimiter>
+<row_before_delimiter>42.42<field_delimiter_1>\"Some string 1\"<field_delimiter_2>\"[([1, 2, 3], 'String 1'), ([1], 'String 1')]\"<row_after_delimiter>
+<row_between_delimiter>
+<row_before_delimiter>42<field_delimiter_1>\"Some string 2\"<field_delimiter_2>\"[([], 'String 2'), ([], 'String 2')]\"<row_after_delimiter>
+<row_between_delimiter>
+<row_before_delimiter>\N<field_delimiter_1>\"Some string 3\"<field_delimiter_2>\"[([1, 2, 3], 'String 3'), ([1], 'String 3')]\"<row_after_delimiter>
+<result_after_delimiter>" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'Template') $TEMPLATE_SETTINGS"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'Template') $TEMPLATE_SETTINGS"
+
+echo -e "<row_before_delimiter>\${column_1:Quoted}<field_delimiter_1>\${column_2:Quoted}<field_delimiter_2>\${column_3:Quoted}<row_after_delimiter>" > $SCHEMADIR/row_format_02149
+
+echo -e "<result_before_delimiter>
+<row_before_delimiter>42.42<field_delimiter_1>'Some string 1'<field_delimiter_2>[([1, 2, 3], 'String 1'), ([1], 'String 1')]<row_after_delimiter>
+<row_between_delimiter>
+<row_before_delimiter>42<field_delimiter_1>'Some string 2'<field_delimiter_2>[([], 'String 2'), ([], 'String 2')]<row_after_delimiter>
+<row_between_delimiter>
+<row_before_delimiter>NULL<field_delimiter_1>'Some string 3'<field_delimiter_2>[([1, 2, 3], 'String 3'), ([1], 'String 3')]<row_after_delimiter>
+<result_after_delimiter>" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'Template') $TEMPLATE_SETTINGS"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'Template') $TEMPLATE_SETTINGS"
+
+echo -e "<row_before_delimiter>\${column_1:JSON}<field_delimiter_1>\${column_2:JSON}<field_delimiter_2>\${column_3:JSON}<row_after_delimiter>" > $SCHEMADIR/row_format_02149
+
+echo -e "<result_before_delimiter>
+<row_before_delimiter>42.42<field_delimiter_1>\"Some string 1\"<field_delimiter_2>[[[1, 2, 3], \"String 1\"], [[1], \"String 1\"]]<row_after_delimiter>
+<row_between_delimiter>
+<row_before_delimiter>42<field_delimiter_1>\"Some string 2\"<field_delimiter_2>[[[], \"String 2\"], [[], \"String 2\"]]<row_after_delimiter>
+<row_between_delimiter>
+<row_before_delimiter>null<field_delimiter_1>\"Some string 3\"<field_delimiter_2>[[[1, 2, 3], \"String 3\"], [[1], \"String 3\"]]<row_after_delimiter>
+<result_after_delimiter>" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'Template') $TEMPLATE_SETTINGS"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'Template') $TEMPLATE_SETTINGS"
+
+
+echo "MsgPack"
+
+$CLICKHOUSE_CLIENT -q "select toInt32(number % 2 ? number : NULL) as int, toUInt64(number % 2 ? NULL : number) as uint, toFloat32(number) as float, concat('Str: ', toString(number)) as str, [[number, number + 1], [number]] as arr, map(number, [number, number + 1]) as map from numbers(3) format MsgPack" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'MsgPack') settings input_format_msgpack_number_of_columns=6"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'MsgPack') settings input_format_msgpack_number_of_columns=6"
+
+
+rm $SCHEMADIR/resultset_format_02149 $SCHEMADIR/row_format_02149
+rm $DATA_FILE
+
diff --git a/tests/queries/0_stateless/02149_schema_inference_create_table_syntax.reference b/tests/queries/0_stateless/02149_schema_inference_create_table_syntax.reference
new file mode 100644
index 00000000000..dae12318ce0
--- /dev/null
+++ b/tests/queries/0_stateless/02149_schema_inference_create_table_syntax.reference
@@ -0,0 +1,40 @@
+0	Str: 0	[0,1]
+1	Str: 1	[1,2]
+2	Str: 2	[2,3]
+3	Str: 3	[3,4]
+4	Str: 4	[4,5]
+5	Str: 5	[5,6]
+6	Str: 6	[6,7]
+7	Str: 7	[7,8]
+8	Str: 8	[8,9]
+9	Str: 9	[9,10]
+0	0	[0,1]
+1	1	[1,2]
+2	2	[2,3]
+3	3	[3,4]
+4	4	[4,5]
+5	5	[5,6]
+6	6	[6,7]
+7	7	[7,8]
+8	8	[8,9]
+9	9	[9,10]
+0	0	[0,1]
+1	1	[1,2]
+2	2	[2,3]
+3	3	[3,4]
+4	4	[4,5]
+5	5	[5,6]
+6	6	[6,7]
+7	7	[7,8]
+8	8	[8,9]
+9	9	[9,10]
+0	0	[0,1]
+1	1	[1,2]
+2	2	[2,3]
+3	3	[3,4]
+4	4	[4,5]
+5	5	[5,6]
+6	6	[6,7]
+7	7	[7,8]
+8	8	[8,9]
+9	9	[9,10]
diff --git a/tests/queries/0_stateless/02149_schema_inference_create_table_syntax.sh b/tests/queries/0_stateless/02149_schema_inference_create_table_syntax.sh
new file mode 100755
index 00000000000..f00f2531dd0
--- /dev/null
+++ b/tests/queries/0_stateless/02149_schema_inference_create_table_syntax.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+# Tags: no-parallel, no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+
+USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+mkdir $USER_FILES_PATH/test_02149
+FILE_NAME=test_02149/data.Parquet
+DATA_FILE=$USER_FILES_PATH/$FILE_NAME
+
+$CLICKHOUSE_CLIENT -q "select number as num, concat('Str: ', toString(number)) as str, [number, number + 1] as arr from numbers(10) format Parquet" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "drop table if exists test_02149"
+$CLICKHOUSE_CLIENT -q "create table test_02149 engine=File('Parquet', '$FILE_NAME')"
+$CLICKHOUSE_CLIENT -q "select * from test_02149"
+$CLICKHOUSE_CLIENT -q "drop table test_02149"
+
+$CLICKHOUSE_CLIENT -q "create table test_02149 (x UInt32, s String, a Array(UInt32)) engine=Memory"
+$CLICKHOUSE_CLIENT -q "insert into test_02149 select number, toString(number), [number, number + 1] from numbers(10)"
+
+$CLICKHOUSE_CLIENT -q "drop table if exists test_merge"
+$CLICKHOUSE_CLIENT -q "create table test_merge engine=Merge(currentDatabase(), 'test_02149')"
+$CLICKHOUSE_CLIENT -q "select * from test_merge"
+$CLICKHOUSE_CLIENT -q "drop table test_merge"
+
+$CLICKHOUSE_CLIENT -q "drop table if exists test_distributed"
+$CLICKHOUSE_CLIENT -q "create table test_distributed engine=Distributed(test_shard_localhost, currentDatabase(), 'test_02149')"
+$CLICKHOUSE_CLIENT -q "select * from test_distributed"
+$CLICKHOUSE_CLIENT -q "drop table test_distributed"
+
+$CLICKHOUSE_CLIENT -q "drop table if exists test_buffer"
+$CLICKHOUSE_CLIENT -q "create table test_buffer engine=Buffer(currentDatabase(), 'test_02149', 16, 10, 100, 10000, 1000000, 10000000, 100000000)"
+$CLICKHOUSE_CLIENT -q "select * from test_buffer"
+$CLICKHOUSE_CLIENT -q "drop table test_buffer"
+
+rm -rf ${USER_FILES_PATH:?}/test_02149
+
diff --git a/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference b/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference
new file mode 100644
index 00000000000..d3d2d86d696
--- /dev/null
+++ b/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference
@@ -0,0 +1,435 @@
+Arrow
+int8	Int8					
+uint8	UInt8					
+int16	Int16					
+uint16	UInt16					
+int32	Int32					
+uint32	UInt32					
+int64	Int64					
+uint64	UInt64					
+0	0	0	0	0	0	0	0
+-1	1	-1	1	-1	1	-1	1
+float32	Float32					
+float64	Float64					
+decimal32	Decimal(9, 5)					
+decimal64	Decimal(18, 5)					
+0	0	0	0
+1.2	0.7692307692307692	3.33333	333.33333
+date	UInt16					
+date32	Date32					
+0	1970-01-01
+1	1970-01-02
+str	String					
+fixed_string	String					
+Str: 0	100
+Str: 1	200
+array	Array(UInt64)					
+tuple	Tuple(`tuple.0` UInt64, `tuple.1` String)					
+map	Map(String, UInt64)					
+[0,1]	(0,'0')	{'0':0}
+[1,2]	(1,'1')	{'1':1}
+nested1	Array(Tuple(`nested1.0` Array(UInt64), `nested1.1` Map(String, UInt64)))					
+nested2	Tuple(`nested2.0` Tuple(`nested2.0.0` Array(Array(UInt64)), `nested2.0.1` Map(UInt64, Array(Tuple(`nested2.0.1.0` UInt64, `nested2.0.1.1` String)))), `nested2.1` UInt8)					
+[([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
+[([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
+ArrowStream
+int8	Int8					
+uint8	UInt8					
+int16	Int16					
+uint16	UInt16					
+int32	Int32					
+uint32	UInt32					
+int64	Int64					
+uint64	UInt64					
+0	0	0	0	0	0	0	0
+-1	1	-1	1	-1	1	-1	1
+float32	Float32					
+float64	Float64					
+decimal32	Decimal(9, 5)					
+decimal64	Decimal(18, 5)					
+0	0	0	0
+1.2	0.7692307692307692	3.33333	333.33333
+date	UInt16					
+date32	Date32					
+0	1970-01-01
+1	1970-01-02
+str	String					
+fixed_string	String					
+Str: 0	100
+Str: 1	200
+array	Array(UInt64)					
+tuple	Tuple(`tuple.0` UInt64, `tuple.1` String)					
+map	Map(String, UInt64)					
+[0,1]	(0,'0')	{'0':0}
+[1,2]	(1,'1')	{'1':1}
+nested1	Array(Tuple(`nested1.0` Array(UInt64), `nested1.1` Map(String, UInt64)))					
+nested2	Tuple(`nested2.0` Tuple(`nested2.0.0` Array(Array(UInt64)), `nested2.0.1` Map(UInt64, Array(Tuple(`nested2.0.1.0` UInt64, `nested2.0.1.1` String)))), `nested2.1` UInt8)					
+[([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
+[([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
+Parquet
+int8	Int8					
+uint8	UInt8					
+int16	Int16					
+uint16	UInt16					
+int32	Int32					
+uint32	Int64					
+int64	Int64					
+uint64	UInt64					
+0	0	0	0	0	0	0	0
+-1	1	-1	1	-1	1	-1	1
+float32	Float32					
+float64	Float64					
+decimal32	Decimal(9, 5)					
+decimal64	Decimal(18, 5)					
+0	0	0	0
+1.2	0.7692307692307692	3.33333	333.33333
+date	UInt16					
+date32	Date32					
+0	1970-01-01
+1	1970-01-02
+str	String					
+fixed_string	String					
+Str: 0	100
+Str: 1	200
+array	Array(UInt64)					
+tuple	Tuple(`tuple.0` UInt64, `tuple.1` String)					
+map	Map(String, UInt64)					
+[0,1]	(0,'0')	{'0':0}
+[1,2]	(1,'1')	{'1':1}
+nested1	Array(Tuple(`nested1.0` Array(UInt64), `nested1.1` Map(String, UInt64)))					
+nested2	Tuple(`nested2.0` Tuple(`nested2.0.0` Array(Array(UInt64)), `nested2.0.1` Map(UInt64, Array(Tuple(`nested2.0.1.0` UInt64, `nested2.0.1.1` String)))), `nested2.1` UInt8)					
+[([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
+[([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
+ORC
+int8	Int8					
+uint8	Int8					
+int16	Int16					
+uint16	Int16					
+int32	Int32					
+uint32	Int32					
+int64	Int64					
+uint64	Int64					
+0	0	0	0	0	0	0	0
+-1	1	-1	1	-1	1	-1	1
+float32	Float32					
+float64	Float64					
+decimal32	Decimal(9, 5)					
+decimal64	Decimal(18, 5)					
+0	0	0	0
+1.2	0.7692307692307692	3.33333	333.33333
+date	Date32					
+date32	Date32					
+1970-01-01	1970-01-01
+1970-01-02	1970-01-02
+str	String					
+fixed_string	String					
+Str: 0	100
+Str: 1	200
+array	Array(Int64)					
+tuple	Tuple(`tuple.0` Int64, `tuple.1` String)					
+map	Map(String, Int64)					
+[0,1]	(0,'0')	{'0':0}
+[1,2]	(1,'1')	{'1':1}
+nested1	Array(Tuple(`nested1.0` Array(Int64), `nested1.1` Map(String, Int64)))					
+nested2	Tuple(`nested2.0` Tuple(`nested2.0.0` Array(Array(Int64)), `nested2.0.1` Map(Int64, Array(Tuple(`nested2.0.1.0` Int64, `nested2.0.1.1` String)))), `nested2.1` Int8)					
+[([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
+[([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
+Native
+int8	Int8					
+uint8	UInt8					
+int16	Int16					
+uint16	UInt16					
+int32	Int32					
+uint32	UInt32					
+int64	Int64					
+uint64	UInt64					
+0	0	0	0	0	0	0	0
+-1	1	-1	1	-1	1	-1	1
+float32	Float32					
+float64	Float64					
+decimal32	Decimal(9, 5)					
+decimal64	Decimal(18, 5)					
+0	0	0	0
+1.2	0.7692307692307692	3.33333	333.33333
+date	Date					
+date32	Date32					
+1970-01-01	1970-01-01
+1970-01-02	1970-01-02
+str	String					
+fixed_string	FixedString(3)					
+Str: 0	100
+Str: 1	200
+array	Array(UInt64)					
+tuple	Tuple(UInt64, String)					
+map	Map(String, UInt64)					
+[0,1]	(0,'0')	{'0':0}
+[1,2]	(1,'1')	{'1':1}
+nested1	Array(Tuple(Array(UInt64), Map(String, UInt64)))					
+nested2	Tuple(Tuple(Array(Array(UInt64)), Map(UInt64, Array(Tuple(UInt64, String)))), UInt8)					
+[([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
+[([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
+TSVWithNamesAndTypes
+int8	Int8					
+uint8	UInt8					
+int16	Int16					
+uint16	UInt16					
+int32	Int32					
+uint32	UInt32					
+int64	Int64					
+uint64	UInt64					
+0	0	0	0	0	0	0	0
+-1	1	-1	1	-1	1	-1	1
+float32	Float32					
+float64	Float64					
+decimal32	Decimal(9, 5)					
+decimal64	Decimal(18, 5)					
+0	0	0	0
+1.2	0.7692307692307692	3.33333	333.33333
+date	Date					
+date32	Date32					
+1970-01-01	1970-01-01
+1970-01-02	1970-01-02
+str	String					
+fixed_string	FixedString(3)					
+Str: 0	100
+Str: 1	200
+array	Array(UInt64)					
+tuple	Tuple(UInt64, String)					
+map	Map(String, UInt64)					
+[0,1]	(0,'0')	{'0':0}
+[1,2]	(1,'1')	{'1':1}
+nested1	Array(Tuple(Array(UInt64), Map(String, UInt64)))					
+nested2	Tuple(Tuple(Array(Array(UInt64)), Map(UInt64, Array(Tuple(UInt64, String)))), UInt8)					
+[([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
+[([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
+TSVRawWithNamesAndTypes
+int8	Int8					
+uint8	UInt8					
+int16	Int16					
+uint16	UInt16					
+int32	Int32					
+uint32	UInt32					
+int64	Int64					
+uint64	UInt64					
+0	0	0	0	0	0	0	0
+-1	1	-1	1	-1	1	-1	1
+float32	Float32					
+float64	Float64					
+decimal32	Decimal(9, 5)					
+decimal64	Decimal(18, 5)					
+0	0	0	0
+1.2	0.7692307692307692	3.33333	333.33333
+date	Date					
+date32	Date32					
+1970-01-01	1970-01-01
+1970-01-02	1970-01-02
+str	String					
+fixed_string	FixedString(3)					
+Str: 0	100
+Str: 1	200
+array	Array(UInt64)					
+tuple	Tuple(UInt64, String)					
+map	Map(String, UInt64)					
+[0,1]	(0,'0')	{'0':0}
+[1,2]	(1,'1')	{'1':1}
+nested1	Array(Tuple(Array(UInt64), Map(String, UInt64)))					
+nested2	Tuple(Tuple(Array(Array(UInt64)), Map(UInt64, Array(Tuple(UInt64, String)))), UInt8)					
+[([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
+[([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
+CSVWithNamesAndTypes
+int8	Int8					
+uint8	UInt8					
+int16	Int16					
+uint16	UInt16					
+int32	Int32					
+uint32	UInt32					
+int64	Int64					
+uint64	UInt64					
+0	0	0	0	0	0	0	0
+-1	1	-1	1	-1	1	-1	1
+float32	Float32					
+float64	Float64					
+decimal32	Decimal(9, 5)					
+decimal64	Decimal(18, 5)					
+0	0	0	0
+1.2	0.7692307692307692	3.33333	333.33333
+date	Date					
+date32	Date32					
+1970-01-01	1970-01-01
+1970-01-02	1970-01-02
+str	String					
+fixed_string	FixedString(3)					
+Str: 0	100
+Str: 1	200
+array	Array(UInt64)					
+tuple	Tuple(UInt64, String)					
+map	Map(String, UInt64)					
+[0,1]	(0,'0')	{'0':0}
+[1,2]	(1,'1')	{'1':1}
+nested1	Array(Tuple(Array(UInt64), Map(String, UInt64)))					
+nested2	Tuple(Tuple(Array(Array(UInt64)), Map(UInt64, Array(Tuple(UInt64, String)))), UInt8)					
+[([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
+[([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
+JSONCompactEachRowWithNamesAndTypes
+int8	Int8					
+uint8	UInt8					
+int16	Int16					
+uint16	UInt16					
+int32	Int32					
+uint32	UInt32					
+int64	Int64					
+uint64	UInt64					
+0	0	0	0	0	0	0	0
+-1	1	-1	1	-1	1	-1	1
+float32	Float32					
+float64	Float64					
+decimal32	Decimal(9, 5)					
+decimal64	Decimal(18, 5)					
+0	0	0	0
+1.2	0.7692307692307692	3.33333	333.33333
+date	Date					
+date32	Date32					
+1970-01-01	1970-01-01
+1970-01-02	1970-01-02
+str	String					
+fixed_string	FixedString(3)					
+Str: 0	100
+Str: 1	200
+array	Array(UInt64)					
+tuple	Tuple(UInt64, String)					
+map	Map(String, UInt64)					
+[0,1]	(0,'0')	{'0':0}
+[1,2]	(1,'1')	{'1':1}
+nested1	Array(Tuple(Array(UInt64), Map(String, UInt64)))					
+nested2	Tuple(Tuple(Array(Array(UInt64)), Map(UInt64, Array(Tuple(UInt64, String)))), UInt8)					
+[([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
+[([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
+JSONCompactStringsEachRowWithNamesAndTypes
+int8	Int8					
+uint8	UInt8					
+int16	Int16					
+uint16	UInt16					
+int32	Int32					
+uint32	UInt32					
+int64	Int64					
+uint64	UInt64					
+0	0	0	0	0	0	0	0
+-1	1	-1	1	-1	1	-1	1
+float32	Float32					
+float64	Float64					
+decimal32	Decimal(9, 5)					
+decimal64	Decimal(18, 5)					
+0	0	0	0
+1.2	0.7692307692307692	3.33333	333.33333
+date	Date					
+date32	Date32					
+1970-01-01	1970-01-01
+1970-01-02	1970-01-02
+str	String					
+fixed_string	FixedString(3)					
+Str: 0	100
+Str: 1	200
+array	Array(UInt64)					
+tuple	Tuple(UInt64, String)					
+map	Map(String, UInt64)					
+[0,1]	(0,'0')	{'0':0}
+[1,2]	(1,'1')	{'1':1}
+nested1	Array(Tuple(Array(UInt64), Map(String, UInt64)))					
+nested2	Tuple(Tuple(Array(Array(UInt64)), Map(UInt64, Array(Tuple(UInt64, String)))), UInt8)					
+[([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
+[([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
+RowBinaryWithNamesAndTypes
+int8	Int8					
+uint8	UInt8					
+int16	Int16					
+uint16	UInt16					
+int32	Int32					
+uint32	UInt32					
+int64	Int64					
+uint64	UInt64					
+0	0	0	0	0	0	0	0
+-1	1	-1	1	-1	1	-1	1
+float32	Float32					
+float64	Float64					
+decimal32	Decimal(9, 5)					
+decimal64	Decimal(18, 5)					
+0	0	0	0
+1.2	0.7692307692307692	3.33333	333.33333
+date	Date					
+date32	Date32					
+1970-01-01	1970-01-01
+1970-01-02	1970-01-02
+str	String					
+fixed_string	FixedString(3)					
+Str: 0	100
+Str: 1	200
+array	Array(UInt64)					
+tuple	Tuple(UInt64, String)					
+map	Map(String, UInt64)					
+[0,1]	(0,'0')	{'0':0}
+[1,2]	(1,'1')	{'1':1}
+nested1	Array(Tuple(Array(UInt64), Map(String, UInt64)))					
+nested2	Tuple(Tuple(Array(Array(UInt64)), Map(UInt64, Array(Tuple(UInt64, String)))), UInt8)					
+[([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
+[([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
+CustomSeparatedWithNamesAndTypes
+int8	Int8					
+uint8	UInt8					
+int16	Int16					
+uint16	UInt16					
+int32	Int32					
+uint32	UInt32					
+int64	Int64					
+uint64	UInt64					
+0	0	0	0	0	0	0	0
+-1	1	-1	1	-1	1	-1	1
+float32	Float32					
+float64	Float64					
+decimal32	Decimal(9, 5)					
+decimal64	Decimal(18, 5)					
+0	0	0	0
+1.2	0.7692307692307692	3.33333	333.33333
+date	Date					
+date32	Date32					
+1970-01-01	1970-01-01
+1970-01-02	1970-01-02
+str	String					
+fixed_string	FixedString(3)					
+Str: 0	100
+Str: 1	200
+array	Array(UInt64)					
+tuple	Tuple(UInt64, String)					
+map	Map(String, UInt64)					
+[0,1]	(0,'0')	{'0':0}
+[1,2]	(1,'1')	{'1':1}
+nested1	Array(Tuple(Array(UInt64), Map(String, UInt64)))					
+nested2	Tuple(Tuple(Array(Array(UInt64)), Map(UInt64, Array(Tuple(UInt64, String)))), UInt8)					
+[([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
+[([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
+Avro
+CustomSeparatedWithNamesAndTypes
+int8	Int32					
+uint8	Int32					
+int16	Int32					
+uint16	Int32					
+int32	Int32					
+uint32	Int32					
+int64	Int64					
+uint64	Int64					
+0	0	0	0	0	0	0	0
+-1	1	-1	1	-1	1	-1	1
+float32	Float32					
+float64	Float64					
+0	0
+1.2	0.7692307692307692
+date	Int32					
+0
+1
+str	String					
+fixed_string	FixedString(3)					
+Str: 0	100
+Str: 1	200
+array	Array(Int64)					
+nested	Array(Array(Array(Int64)))					
+[0,1]	[[[0],[1]]]
+[1,2]	[[[1],[2]]]
diff --git a/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.sh b/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.sh
new file mode 100755
index 00000000000..d263ef63681
--- /dev/null
+++ b/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+# Tags: no-parallel, no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+
+USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+FILE_NAME=test_02149.data
+DATA_FILE=$USER_FILES_PATH/$FILE_NAME
+
+for format in Arrow ArrowStream Parquet ORC Native TSVWithNamesAndTypes TSVRawWithNamesAndTypes CSVWithNamesAndTypes JSONCompactEachRowWithNamesAndTypes JSONCompactStringsEachRowWithNamesAndTypes RowBinaryWithNamesAndTypes CustomSeparatedWithNamesAndTypes
+do
+    echo $format
+    $CLICKHOUSE_CLIENT -q "select toInt8(-number) as int8, toUInt8(number) as uint8, toInt16(-number) as int16, toUInt16(number) as uint16, toInt32(-number) as int32, toUInt32(number) as uint32, toInt64(-number) as int64, toUInt64(number) as uint64 from numbers(2) format $format" > $DATA_FILE
+    $CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', '$format')"
+    $CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', '$format')"
+
+    $CLICKHOUSE_CLIENT -q "select toFloat32(number * 1.2) as float32, toFloat64(number / 1.3) as float64, toDecimal32(number / 0.3, 5) as decimal32, toDecimal64(number / 0.003, 5) as decimal64 from numbers(2) format $format" > $DATA_FILE
+    $CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', '$format')"
+    $CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', '$format')"
+
+    $CLICKHOUSE_CLIENT -q "select toDate(number) as date, toDate32(number) as date32 from numbers(2) format $format" > $DATA_FILE
+    $CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', '$format')"
+    $CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', '$format')"
+
+    $CLICKHOUSE_CLIENT -q "select concat('Str: ', toString(number)) as str, toFixedString(toString((number + 1) * 100 % 1000), 3) as fixed_string from numbers(2) format $format" > $DATA_FILE
+    $CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', '$format')"
+    $CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', '$format')"
+ 
+    $CLICKHOUSE_CLIENT -q "select [number, number + 1] as array, (number, toString(number)) as tuple, map(toString(number), number) as map from numbers(2) format $format" > $DATA_FILE
+    $CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', '$format')"
+    $CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', '$format')"
+
+    $CLICKHOUSE_CLIENT -q "select [([number, number + 1], map('42', number)), ([], map()), ([42], map('42', 42))] as nested1, (([[number], [number + 1], []], map(number, [(number, '42'), (number + 1, '42')])), 42) as nested2 from numbers(2) format $format" > $DATA_FILE
+    $CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', '$format')"
+    $CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', '$format')"
+done
+
+echo "Avro"
+
+echo $format
+$CLICKHOUSE_CLIENT -q "select toInt8(-number) as int8, toUInt8(number) as uint8, toInt16(-number) as int16, toUInt16(number) as uint16, toInt32(-number) as int32, toUInt32(number) as uint32, toInt64(-number) as int64, toUInt64(number) as uint64 from numbers(2) format Avro" > $DATA_FILE
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'Avro')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'Avro')"
+
+$CLICKHOUSE_CLIENT -q "select toFloat32(number * 1.2) as float32, toFloat64(number / 1.3) as float64 from numbers(2) format Avro" > $DATA_FILE
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'Avro')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'Avro')"
+
+$CLICKHOUSE_CLIENT -q "select toDate(number) as date from numbers(2) format Avro" > $DATA_FILE
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'Avro')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'Avro')"
+
+$CLICKHOUSE_CLIENT -q "select concat('Str: ', toString(number)) as str, toFixedString(toString((number + 1) * 100 % 1000), 3) as fixed_string from numbers(2) format Avro" > $DATA_FILE
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'Avro')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'Avro')"
+ 
+$CLICKHOUSE_CLIENT -q "select [number, number + 1] as array, [[[number], [number + 1]]] as nested from numbers(2) format Avro" > $DATA_FILE
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'Avro')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'Avro')"
+
+rm $DATA_FILE
+
diff --git a/tests/queries/0_stateless/02155_create_table_w_timezone.reference b/tests/queries/0_stateless/02155_create_table_w_timezone.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02155_create_table_w_timezone.sql b/tests/queries/0_stateless/02155_create_table_w_timezone.sql
new file mode 100644
index 00000000000..0b72122ce39
--- /dev/null
+++ b/tests/queries/0_stateless/02155_create_table_w_timezone.sql
@@ -0,0 +1,8 @@
+create table t02155_t64_tz ( a DateTime64(9, America/Chicago)) Engine = Memory; -- { clientError 62 }
+create table t02155_t_tz ( a DateTime(America/Chicago)) Engine = Memory; -- { clientError 62 }
+
+create table t02155_t64_tz ( a DateTime64(9, 'America/Chicago')) Engine = Memory; 
+create table t02155_t_tz ( a DateTime('America/Chicago')) Engine = Memory; 
+
+drop table t02155_t64_tz;
+drop table t02155_t_tz;
diff --git a/tests/queries/0_stateless/02155_parse_date_lowcard_default_throw.reference b/tests/queries/0_stateless/02155_parse_date_lowcard_default_throw.reference
new file mode 100644
index 00000000000..e599dcc71e5
--- /dev/null
+++ b/tests/queries/0_stateless/02155_parse_date_lowcard_default_throw.reference
@@ -0,0 +1 @@
+2016-07-15 00:00:00
diff --git a/tests/queries/0_stateless/02155_parse_date_lowcard_default_throw.sql b/tests/queries/0_stateless/02155_parse_date_lowcard_default_throw.sql
new file mode 100644
index 00000000000..703cf1fed7a
--- /dev/null
+++ b/tests/queries/0_stateless/02155_parse_date_lowcard_default_throw.sql
@@ -0,0 +1 @@
+SELECT parseDateTimeBestEffort(toLowCardinality(materialize('15-JUL-16')));
diff --git a/tests/queries/0_stateless/format_schemas/00825_protobuf_format_enum_mapping.proto b/tests/queries/0_stateless/format_schemas/00825_protobuf_format_enum_mapping.proto
index ba558dbbadb..048a689d021 100644
--- a/tests/queries/0_stateless/format_schemas/00825_protobuf_format_enum_mapping.proto
+++ b/tests/queries/0_stateless/format_schemas/00825_protobuf_format_enum_mapping.proto
@@ -1,6 +1,6 @@
 syntax = "proto3";
 
-message Message
+message EnumMessage
 {
   enum Enum
   {
@@ -10,4 +10,4 @@ message Message
     HUNDRED = 100;
   };
   Enum x = 1;
-};
\ No newline at end of file
+};