From f2ee3837cd382e847724e03b0af4075639c7588f Mon Sep 17 00:00:00 2001
From: Daniil Ivanik <61067749+divanik@users.noreply.github.com>
Date: Mon, 9 Mar 2026 11:29:31 +0000
Subject: [PATCH 01/14] Merge pull request #98140 from
 grantholly-clickhouse/parquet_metadata_cache_v2

Parquet metadata cache v2
---
 docs/en/sql-reference/statements/system.md    |   4 +
 programs/local/LocalServer.cpp                |  16 +++
 programs/server/Server.cpp                    |  16 +++
 src/Access/Common/AccessType.h                |   1 +
 src/Common/CurrentMetrics.cpp                 |   2 +
 src/Common/ProfileEvents.cpp                  |   3 +
 src/Core/Defines.h                            |   4 +
 src/Core/ServerSettings.cpp                   |   4 +
 src/Core/Settings.cpp                         |   8 ++
 src/Core/SettingsChangesHistory.cpp           |  45 ++++++
 src/Formats/FormatFactory.cpp                 |  92 +++++++++++--
 src/Formats/FormatFactory.h                   |  71 +++++++++-
 src/Interpreters/Context.cpp                  |  48 +++++++
 src/Interpreters/Context.h                    |   8 ++
 src/Interpreters/InterpreterSystemQuery.cpp   |   9 ++
 src/Parsers/ASTSystemQuery.cpp                |   1 +
 src/Parsers/ASTSystemQuery.h                  |   1 +
 src/Parsers/ParserSystemQuery.cpp             |   1 +
 src/Processors/Formats/IInputFormat.h         |   3 -
 .../Formats/Impl/Parquet/ReadManager.cpp      |   4 +-
 .../Formats/Impl/ParquetBlockInputFormat.cpp  | 122 +++++++++--------
 .../Formats/Impl/ParquetBlockInputFormat.h    |   7 -
 .../Formats/Impl/ParquetMetadataCache.cpp     | 128 ++++++++++++++++++
 .../Formats/Impl/ParquetMetadataCache.h       | 101 ++++++++++++++
 .../Impl/ParquetMetadataInputFormat.cpp       |  44 +-----
 .../Formats/Impl/ParquetMetadataInputFormat.h |  10 --
 .../Impl/ParquetV3BlockInputFormat.cpp        |  30 +++-
 .../Formats/Impl/ParquetV3BlockInputFormat.h  |  11 +-
 .../StorageObjectStorageSource.cpp            |  54 ++++++--
 src/Storages/System/StorageSystemFormats.cpp  |   4 +-
 tests/integration/test_storage_delta/test.py  |   2 +-
 .../01271_show_privileges.reference           |   1 +
 .../03707_parquet_metadata_cache.reference    |   9 ++
 .../03707_parquet_metadata_cache.sql          |  59 ++++++++
 .../03723_parquet_prefetcher_read_big_at.sql  |   2 +-
 ...7_parquet_v3_setting_override_s3.reference |   1 +
 .../04027_parquet_v3_setting_override_s3.sql  |  20 +++
 .../data_minio/03707_cache_test.parquet       | Bin 0 -> 1195 bytes
 38 files changed, 797 insertions(+), 149 deletions(-)
 create mode 100644 src/Processors/Formats/Impl/ParquetMetadataCache.cpp
 create mode 100644 src/Processors/Formats/Impl/ParquetMetadataCache.h
 create mode 100644 tests/queries/0_stateless/03707_parquet_metadata_cache.reference
 create mode 100644 tests/queries/0_stateless/03707_parquet_metadata_cache.sql
 create mode 100644 tests/queries/0_stateless/04027_parquet_v3_setting_override_s3.reference
 create mode 100644 tests/queries/0_stateless/04027_parquet_v3_setting_override_s3.sql
 create mode 100644 tests/queries/0_stateless/data_minio/03707_cache_test.parquet

diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md
index de12fbc9ed2e..5fdafd0c0b34 100644
--- a/docs/en/sql-reference/statements/system.md
+++ b/docs/en/sql-reference/statements/system.md
@@ -101,6 +101,10 @@ Clears the mark cache.
 
 Clears the iceberg metadata cache.
 
+## SYSTEM DROP PARQUET METADATA CACHE {#drop-parquet-metadata-cache}
+
+Clears the parquet metadata cache.
+
 ## SYSTEM CLEAR|DROP TEXT INDEX CACHES {#drop-text-index-caches}
 
 Clears the text index's header, dictionary and postings caches.
diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index aed93185172e..a1fd5c5df2c5 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -123,6 +123,10 @@ namespace ServerSetting
     extern const ServerSettingsUInt64 iceberg_metadata_files_cache_size;
     extern const ServerSettingsUInt64 iceberg_metadata_files_cache_max_entries;
     extern const ServerSettingsDouble iceberg_metadata_files_cache_size_ratio;
+    extern const ServerSettingsString parquet_metadata_cache_policy;
+    extern const ServerSettingsUInt64 parquet_metadata_cache_size;
+    extern const ServerSettingsUInt64 parquet_metadata_cache_max_entries;
+    extern const ServerSettingsDouble parquet_metadata_cache_size_ratio;
     extern const ServerSettingsUInt64 max_active_parts_loading_thread_pool_size;
     extern const ServerSettingsUInt64 max_io_thread_pool_free_size;
     extern const ServerSettingsUInt64 max_io_thread_pool_size;
@@ -972,6 +976,18 @@ void LocalServer::processConfig()
     }
     global_context->setIcebergMetadataFilesCache(iceberg_metadata_files_cache_policy, iceberg_metadata_files_cache_size, iceberg_metadata_files_cache_max_entries, iceberg_metadata_files_cache_size_ratio);
 #endif
+#if USE_PARQUET
+    String parquet_metadata_cache_policy = server_settings[ServerSetting::parquet_metadata_cache_policy];
+    size_t parquet_metadata_cache_size = server_settings[ServerSetting::parquet_metadata_cache_size];
+    size_t parquet_metadata_cache_max_entries = server_settings[ServerSetting::parquet_metadata_cache_max_entries];
+    double parquet_metadata_cache_size_ratio = server_settings[ServerSetting::parquet_metadata_cache_size_ratio];
+    if (parquet_metadata_cache_size > max_cache_size)
+    {
+        parquet_metadata_cache_size = max_cache_size;
+        LOG_INFO(log, "Lowered Parquet metadata cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(parquet_metadata_cache_size));
+    }
+    global_context->setParquetMetadataCache(parquet_metadata_cache_policy, parquet_metadata_cache_size, parquet_metadata_cache_max_entries, parquet_metadata_cache_size_ratio);
+#endif
 
     Names allowed_disks_table_engines;
     splitInto<','>(allowed_disks_table_engines, server_settings[ServerSetting::allowed_disks_for_table_engines].value);
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 891b305c910e..7e03b26648c0 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -267,6 +267,10 @@ namespace ServerSetting
     extern const ServerSettingsUInt64 iceberg_metadata_files_cache_size;
     extern const ServerSettingsUInt64 iceberg_metadata_files_cache_max_entries;
     extern const ServerSettingsDouble iceberg_metadata_files_cache_size_ratio;
+    extern const ServerSettingsString parquet_metadata_cache_policy;
+    extern const ServerSettingsUInt64 parquet_metadata_cache_size;
+    extern const ServerSettingsUInt64 parquet_metadata_cache_max_entries;
+    extern const ServerSettingsDouble parquet_metadata_cache_size_ratio;
     extern const ServerSettingsUInt64 io_thread_pool_queue_size;
     extern const ServerSettingsBool jemalloc_enable_global_profiler;
     extern const ServerSettingsBool jemalloc_collect_global_profile_samples_in_trace_log;
@@ -2038,6 +2042,18 @@ try
     }
     global_context->setIcebergMetadataFilesCache(iceberg_metadata_files_cache_policy, iceberg_metadata_files_cache_size, iceberg_metadata_files_cache_max_entries, iceberg_metadata_files_cache_size_ratio);
 #endif
+#if USE_PARQUET
+    String parquet_metadata_cache_policy = server_settings[ServerSetting::parquet_metadata_cache_policy];
+    size_t parquet_metadata_cache_size = server_settings[ServerSetting::parquet_metadata_cache_size];
+    size_t parquet_metadata_cache_max_entries = server_settings[ServerSetting::parquet_metadata_cache_max_entries];
+    double parquet_metadata_cache_size_ratio = server_settings[ServerSetting::parquet_metadata_cache_size_ratio];
+    if (parquet_metadata_cache_size > max_cache_size)
+    {
+        parquet_metadata_cache_size = max_cache_size;
+        LOG_INFO(log, "Lowered Parquet metadata cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(parquet_metadata_cache_size));
+    }
+    global_context->setParquetMetadataCache(parquet_metadata_cache_policy, parquet_metadata_cache_size, parquet_metadata_cache_max_entries, parquet_metadata_cache_size_ratio);
+#endif
 
     Names allowed_disks_table_engines;
     splitInto<','>(allowed_disks_table_engines, server_settings[ServerSetting::allowed_disks_for_table_engines].value);
diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h
index 3c4c177bcff5..35b79cb21b2d 100644
--- a/src/Access/Common/AccessType.h
+++ b/src/Access/Common/AccessType.h
@@ -317,6 +317,7 @@ enum class AccessType : uint8_t
     M(SYSTEM_PREWARM_MARK_CACHE, "SYSTEM PREWARM MARK, PREWARM MARK CACHE, PREWARM MARKS", GLOBAL, SYSTEM_DROP_CACHE) \
     M(SYSTEM_DROP_MARK_CACHE, "SYSTEM CLEAR MARK CACHE, SYSTEM DROP MARK, DROP MARK CACHE, DROP MARKS", GLOBAL, SYSTEM_DROP_CACHE) \
     M(SYSTEM_DROP_ICEBERG_METADATA_CACHE, "SYSTEM CLEAR ICEBERG_METADATA_CACHE, SYSTEM DROP ICEBERG_METADATA_CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
+    M(SYSTEM_DROP_PARQUET_METADATA_CACHE, "SYSTEM DROP PARQUET_METADATA_CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
     M(SYSTEM_PREWARM_PRIMARY_INDEX_CACHE, "SYSTEM PREWARM PRIMARY INDEX, PREWARM PRIMARY INDEX CACHE, PREWARM PRIMARY INDEX", GLOBAL, SYSTEM_DROP_CACHE) \
     M(SYSTEM_DROP_PRIMARY_INDEX_CACHE, "SYSTEM CLEAR PRIMARY INDEX CACHE, SYSTEM DROP PRIMARY INDEX, DROP PRIMARY INDEX CACHE, DROP PRIMARY INDEX", GLOBAL, SYSTEM_DROP_CACHE) \
     M(SYSTEM_DROP_UNCOMPRESSED_CACHE, "SYSTEM CLEAR UNCOMPRESSED CACHE, SYSTEM DROP UNCOMPRESSED, DROP UNCOMPRESSED CACHE, DROP UNCOMPRESSED", GLOBAL, SYSTEM_DROP_CACHE) \
diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp
index d32293c0de6f..18c355befb93 100644
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@@ -318,6 +318,8 @@
     M(AsyncInsertCacheSize, "Number of async insert hash id in cache") \
     M(IcebergMetadataFilesCacheBytes, "Size of the Iceberg metadata cache in bytes") \
     M(IcebergMetadataFilesCacheFiles, "Number of cached files in the Iceberg metadata cache") \
+    M(ParquetMetadataCacheBytes, "Size of the Parquet metadata cache in bytes") \
+    M(ParquetMetadataCacheFiles, "Number of cached files in the Parquet metadata cache") \
     M(AvroSchemaCacheBytes, "Size of the Avro schema cache in bytes") \
     M(AvroSchemaCacheCells, "Number of cached Avro schemas") \
     M(AvroSchemaRegistryCacheBytes, "Size of the Avro schema registry cache in bytes") \
diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index 8aba8a63b392..65880308cb62 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -102,6 +102,9 @@
     M(IcebergMetadataFilesCacheMisses, "Number of times iceberg metadata files have not been found in the iceberg metadata cache and had to be read from (remote) disk.", ValueType::Number) \
     M(IcebergMetadataFilesCacheWeightLost, "Approximate number of bytes evicted from the iceberg metadata cache.", ValueType::Number) \
     M(IcebergMetadataReadWaitTimeMicroseconds, "Total time data readers spend waiting for iceberg metadata files to be read and parsed, summed across all reader threads.", ValueType::Microseconds) \
+    M(ParquetMetadataCacheHits, "Number of times parquet metadata has been found in the cache.", ValueType::Number) \
+    M(ParquetMetadataCacheMisses, "Number of times parquet metadata has not been found in the cache and had to be read from disk.", ValueType::Number) \
+    M(ParquetMetadataCacheWeightLost, "Approximate number of bytes evicted from the parquet metadata cache.", ValueType::Number) \
     M(IcebergIteratorInitializationMicroseconds, "Total time spent on synchronous initialization of iceberg data iterators.", ValueType::Microseconds) \
     M(IcebergMetadataUpdateMicroseconds, "Total time spent on synchronous initialization of iceberg data iterators.", ValueType::Microseconds) \
     M(IcebergMetadataReturnedObjectInfos, "Total number of returned object infos from iceberg iterator.", ValueType::Number) \
diff --git a/src/Core/Defines.h b/src/Core/Defines.h
index 6d4d7af70503..4c369d5c2531 100644
--- a/src/Core/Defines.h
+++ b/src/Core/Defines.h
@@ -119,6 +119,10 @@ static constexpr auto DEFAULT_ICEBERG_METADATA_CACHE_POLICY = "SLRU";
 static constexpr auto DEFAULT_ICEBERG_METADATA_CACHE_MAX_SIZE = 1_GiB;
 static constexpr auto DEFAULT_ICEBERG_METADATA_CACHE_SIZE_RATIO = 0.5;
 static constexpr auto DEFAULT_ICEBERG_METADATA_CACHE_MAX_ENTRIES = 1000;
+static constexpr auto DEFAULT_PARQUET_METADATA_CACHE_POLICY = "SLRU";
+static constexpr auto DEFAULT_PARQUET_METADATA_CACHE_MAX_SIZE = 512_MiB;
+static constexpr auto DEFAULT_PARQUET_METADATA_CACHE_SIZE_RATIO = 0.5;
+static constexpr auto DEFAULT_PARQUET_METADATA_CACHE_MAX_ENTRIES = 5000;
 static constexpr auto DEFAULT_QUERY_CONDITION_CACHE_POLICY = "SLRU";
 static constexpr auto DEFAULT_QUERY_CONDITION_CACHE_MAX_SIZE = 100_MiB;
 static constexpr auto DEFAULT_QUERY_CONDITION_CACHE_SIZE_RATIO = 0.5l;
diff --git a/src/Core/ServerSettings.cpp b/src/Core/ServerSettings.cpp
index bfab31f62819..da7155a6b19d 100644
--- a/src/Core/ServerSettings.cpp
+++ b/src/Core/ServerSettings.cpp
@@ -498,6 +498,10 @@ namespace
     DECLARE(UInt64, iceberg_metadata_files_cache_size, DEFAULT_ICEBERG_METADATA_CACHE_MAX_SIZE, "Maximum size of iceberg metadata cache in bytes. Zero means disabled.", 0) \
     DECLARE(UInt64, iceberg_metadata_files_cache_max_entries, DEFAULT_ICEBERG_METADATA_CACHE_MAX_ENTRIES, "Maximum size of iceberg metadata files cache in entries. Zero means disabled.", 0) \
     DECLARE(Double, iceberg_metadata_files_cache_size_ratio, DEFAULT_ICEBERG_METADATA_CACHE_SIZE_RATIO, "The size of the protected queue (in case of SLRU policy) in the iceberg metadata cache relative to the cache's total size.", 0) \
+    DECLARE(String, parquet_metadata_cache_policy, DEFAULT_PARQUET_METADATA_CACHE_POLICY, "Parquet metadata cache policy name.", 0) \
+    DECLARE(UInt64, parquet_metadata_cache_size, DEFAULT_PARQUET_METADATA_CACHE_MAX_SIZE, "Maximum size of parquet metadata cache in bytes. Zero means disabled.", 0) \
+    DECLARE(UInt64, parquet_metadata_cache_max_entries, DEFAULT_PARQUET_METADATA_CACHE_MAX_ENTRIES, "Maximum size of parquet metadata files cache in entries. Zero means disabled.", 0) \
+    DECLARE(Double, parquet_metadata_cache_size_ratio, DEFAULT_PARQUET_METADATA_CACHE_SIZE_RATIO, "The size of the protected queue (in case of SLRU policy) in the parquet metadata cache relative to the cache's total size.", 0) \
     DECLARE(String, allowed_disks_for_table_engines, "", "List of disks allowed for use with Iceberg", 0) \
     DECLARE(String, vector_similarity_index_cache_policy, DEFAULT_VECTOR_SIMILARITY_INDEX_CACHE_POLICY, "Vector similarity index cache policy name.", 0) \
     DECLARE(UInt64, vector_similarity_index_cache_size, DEFAULT_VECTOR_SIMILARITY_INDEX_CACHE_MAX_SIZE, R"(Size of cache for vector similarity indexes. Zero means disabled.
diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp
index 6e9d3ad9b276..0ae83fd9f05d 100644
--- a/src/Core/Settings.cpp
+++ b/src/Core/Settings.cpp
@@ -5161,6 +5161,14 @@ If turned on, iceberg table function and iceberg storage may utilize the iceberg
 
 Possible values:
 
+- 0 - Disabled
+- 1 - Enabled
+)", 0) \
+    DECLARE(Bool, use_parquet_metadata_cache, true, R"(
+If turned on, parquet format may utilize the parquet metadata cache.
+
+Possible values:
+
 - 0 - Disabled
 - 1 - Enabled
 )", 0) \
diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index 1088a8163b0b..d4af04807e38 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -45,6 +45,51 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
             // {"object_storage_max_nodes", 0, 0, "Antalya: New setting"},
             {"s3_propagate_credentials_to_other_storages", false, false, "New setting"},
             {"export_merge_tree_part_filename_pattern", "", "{part_name}_{checksum}", "New setting"},
+            {"optimize_syntax_fuse_functions", false, true, "The optimization is production-ready"},
+            {"allow_calculating_subcolumns_sizes_for_merge_tree_reading", false, true, "Allow calculating subcolumns sizes for merge tree reading to improve read tasks splitting"},
+            {"use_parquet_metadata_cache", false, true, "Enables cache of parquet file metadata."},
+            {"allow_nullable_tuple_in_extracted_subcolumns", false, false, "New setting controlling whether extracted Tuple subcolumns can be nullable."},
+            {"use_text_index_tokens_cache", false, false, "New setting"},
+            {"delta_lake_reload_schema_for_consistency", false, false, "New setting to control whether DeltaLake reloads schema before each query for consistency."},
+            {"use_partition_pruning", true, true, "New setting controlling whether MergeTree uses partition key for pruning. 'use_partition_key' is an alias for this setting."},
+            {"use_partition_key", true, true, "Alias for setting 'use_partition_pruning'."},
+            {"webassembly_udf_max_fuel", 100'000, 100'000, "New setting to limit CPU instructions (fuel) per WebAssembly UDF instance execution."},
+            {"webassembly_udf_max_memory", 128_MiB, 128_MiB, "New setting to limit memory per WebAssembly UDF instance."},
+            {"webassembly_udf_max_input_block_size", 0, 0, "New setting to limit input block size for WebAssembly UDFs."},
+            {"webassembly_udf_max_instances", 32, 32, "New setting to limit the number of parallel WebAssembly UDF instances per function."},
+            {"mysql_datatypes_support_level", "", "decimal,datetime64,date2Date32", "Enable modern MySQL type mappings by default."},
+            {"allow_experimental_json_lazy_type_hints", false, false, "New experimental setting for lazy JSON type hints"},
+        });
+        addSettingsChanges(settings_changes_history, "26.2",
+        {
+            {"allow_fuzz_query_functions", false, false, "New setting to enable the fuzzQuery function."},
+            {"ast_fuzzer_runs", 0, 0, "New setting to enable server-side AST fuzzer."},
+            {"ast_fuzzer_any_query", false, false, "New setting to allow fuzzing all query types, not just read-only."},
+            {"check_named_collection_dependencies", true, true, "New setting to check if dropping a named collection would break dependent tables."},
+            {"async_insert", false, true, "Enable async inserts by default."},
+            {"deduplicate_blocks_in_dependent_materialized_views", false, true, "Enable deduplication for dependent materialized views by default."},
+            {"deduplicate_insert", "backward_compatible_choice", "enable", "Enable deduplication for all sync and async inserts by default."},
+            {"enable_join_runtime_filters", false, true, "Enabled this optimization"},
+            {"parallel_replicas_filter_pushdown", false, false, "New setting"},
+            {"optimize_dry_run_check_part", true, true, "New setting"},
+            {"parallel_non_joined_rows_processing", true, true, "New setting to enable parallel processing of non-joined rows in RIGHT/FULL parallel_hash joins."},
+            {"enable_automatic_decision_for_merging_across_partitions_for_final", true, true, "New setting"},
+            {"enable_full_text_index", false, true, "The text index is now GA"},
+            {"allow_experimental_full_text_index", false, true, "The text index is now GA"},
+            {"use_page_cache_for_local_disks", false, false, "New setting to use userspace page cache for local disks"},
+            {"use_page_cache_for_object_storage", false, false, "New setting to use userspace page cache for object storage table functions"},
+            {"use_statistics_cache", false, true, "Enable statistics cache"},
+            {"apply_row_policy_after_final", false, true, "Enabling apply_row_policy_after_final by default, as if was in 25.8 before #87303"},
+            {"ignore_format_null_for_explain", false, true, "FORMAT Null is now ignored for EXPLAIN queries by default"},
+            {"input_format_connection_handling", false, false, "New setting to allow parsing and processing remaining data in the buffer if the connection closes unexpectedly"},
+            {"input_format_max_block_wait_ms", 0, 0, "New setting to limit maximum wait time in milliseconds before a block is emitted by input format"},
+            {"allow_insert_into_iceberg", false, false, "Insert into iceberg was moved to Beta"},
+            {"allow_experimental_insert_into_iceberg", false, false, "Insert into iceberg was moved to Beta"},
+            {"output_format_arrow_date_as_uint16", true, false, "Write Date as Arrow DATE32 instead of plain UInt16 by default."},
+            {"jemalloc_profile_text_output_format", "collapsed", "collapsed", "New setting to control output format for system.jemalloc_profile_text table. Possible values: 'raw', 'symbolized', 'collapsed'"},
+            {"jemalloc_profile_text_symbolize_with_inline", true, true, "New setting to control whether to include inline frames when symbolizing jemalloc heap profile. When enabled, inline frames are included at the cost of slower symbolization; when disabled, they are skipped for faster output"},
+            {"jemalloc_profile_text_collapsed_use_count", false, false, "New setting to aggregate by allocation count instead of bytes in the collapsed jemalloc heap profile format"},
+            {"opentelemetry_start_keeper_trace_probability", "auto", "auto", "New setting"},
         });
         addSettingsChanges(settings_changes_history, "26.1",
         {
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 3220ad6f01a6..b90621811875 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -14,6 +14,7 @@
 #include <Processors/Formats/Impl/ParallelFormattingOutputFormat.h>
 #include <Processors/Formats/Impl/ParallelParsingInputFormat.h>
 #include <Processors/Formats/Impl/ValuesBlockInputFormat.h>
+#include <Disks/DiskObjectStorage/ObjectStorages/IObjectStorage.h>
 #include <Poco/URI.h>
 #include <Common/Exception.h>
 #include <Common/KnownObjectNames.h>
@@ -407,12 +408,13 @@ void FormatFactory::registerFileBucketInfo(const String & format, FileBucketInfo
     creators.file_bucket_info_creator = std::move(bucket_info);
 }
 
-InputFormatPtr FormatFactory::getInput(
+InputFormatPtr FormatFactory::getInputImpl(
     const String & name,
     ReadBuffer & _buf,
     const Block & sample,
     const ContextPtr & context,
     UInt64 max_block_size,
+    const std::optional<RelativePathWithMetadata> & object_with_metadata,
     const std::optional<FormatSettings> & _format_settings,
     FormatParserSharedResourcesPtr parser_shared_resources,
     FormatFilterInfoPtr format_filter_info,
@@ -424,7 +426,7 @@ InputFormatPtr FormatFactory::getInput(
     const std::optional<UInt64> & min_block_size_bytes) const
 {
     const auto& creators = getCreators(name);
-    if (!creators.input_creator && !creators.random_access_input_creator)
+    if (!creators.input_creator && !creators.random_access_input_creator && !creators.random_access_input_creator_with_metadata)
         throw Exception(ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_INPUT, "Format {} is not suitable for input", name);
 
     /// Some formats use this thread pool. Lazily initialize it.
@@ -434,7 +436,10 @@ InputFormatPtr FormatFactory::getInput(
     const FormatSettings format_settings = _format_settings ? *_format_settings : getFormatSettings(context);
     const Settings & settings = context->getSettingsRef();
 
-    if (format_filter_info && (format_filter_info->prewhere_info || format_filter_info->row_level_filter) && (!creators.random_access_input_creator || !creators.prewhere_support_checker || !creators.prewhere_support_checker(format_settings)))
+    if (format_filter_info
+        && (format_filter_info->prewhere_info || format_filter_info->row_level_filter)
+        && ((!creators.random_access_input_creator && !creators.random_access_input_creator_with_metadata)
+            || !creators.prewhere_support_checker || !creators.prewhere_support_checker(format_settings)))
         throw Exception(ErrorCodes::LOGICAL_ERROR, "{} passed to format that doesn't support it",
             format_filter_info->prewhere_info ? "PREWHERE" : "ROW LEVEL FILTER");
 
@@ -465,7 +470,8 @@ InputFormatPtr FormatFactory::getInput(
 
     size_t max_parsing_threads = parser_shared_resources->getParsingThreadsPerReader();
     bool parallel_parsing = max_parsing_threads > 1 && settings[Setting::input_format_parallel_parsing]
-        && creators.file_segmentation_engine_creator && !creators.random_access_input_creator && !need_only_count;
+        && creators.file_segmentation_engine_creator && !(creators.random_access_input_creator || creators.random_access_input_creator_with_metadata)
+        && !need_only_count;
 
     if (settings[Setting::max_memory_usage]
         && settings[Setting::min_chunk_bytes_for_parallel_parsing] * max_parsing_threads * 2 > settings[Setting::max_memory_usage])
@@ -482,10 +488,10 @@ InputFormatPtr FormatFactory::getInput(
             parallel_parsing = false;
     }
 
-    // Create the InputFormat in one of 3 ways.
+    // Create the InputFormat in one of a few ways.
 
     InputFormatPtr format;
-
+    // 1. Try parallel processing first
     if (parallel_parsing)
     {
         const auto & input_getter = creators.input_creator;
@@ -511,11 +517,21 @@ InputFormatPtr FormatFactory::getInput(
 
         format = std::make_shared<ParallelParsingInputFormat>(params);
     }
+    // 2. Prefer to use metadata-aware creator if we have object metadata
+    else if (creators.random_access_input_creator_with_metadata && object_with_metadata.has_value())
+    {
+        format = creators.random_access_input_creator_with_metadata(
+            buf, sample, format_settings, context->getReadSettings(), is_remote_fs,
+            parser_shared_resources, format_filter_info, object_with_metadata);
+    }
+    // 3. Use the normal random access creator for formats that need to jump around in the file
     else if (creators.random_access_input_creator)
     {
         format = creators.random_access_input_creator(
-            buf, sample, format_settings, context->getReadSettings(), is_remote_fs, parser_shared_resources, format_filter_info);
+            buf, sample, format_settings, context->getReadSettings(), is_remote_fs,
+            parser_shared_resources, format_filter_info);
     }
+    // 4. Use the normal creator for sequential reading
     else
     {
         format = creators.input_creator(buf, sample, row_input_format_params, format_settings);
@@ -540,6 +556,53 @@ InputFormatPtr FormatFactory::getInput(
     return format;
 }
 
+InputFormatPtr FormatFactory::getInput(
+    const String & name,
+    ReadBuffer & buf,
+    const Block & sample,
+    const ContextPtr & context,
+    UInt64 max_block_size,
+    const std::optional<FormatSettings> & format_settings,
+    FormatParserSharedResourcesPtr parser_shared_resources,
+    FormatFilterInfoPtr format_filter_info,
+    bool is_remote_fs,
+    CompressionMethod compression,
+    bool need_only_count,
+    const std::optional<UInt64> & max_block_size_bytes,
+    const std::optional<UInt64> & min_block_size_rows,
+    const std::optional<UInt64> & min_block_size_bytes) const
+{
+    return getInputImpl(name, buf, sample, context, max_block_size, std::nullopt,
+                       format_settings, parser_shared_resources, format_filter_info,
+                       is_remote_fs, compression, need_only_count,
+                       max_block_size_bytes, min_block_size_rows, min_block_size_bytes);
+}
+
+// Overload with metadata
+InputFormatPtr FormatFactory::getInputWithMetadata(
+    const String & name,
+    ReadBuffer & buf,
+    const Block & sample,
+    const ContextPtr & context,
+    UInt64 max_block_size,
+    const std::optional<RelativePathWithMetadata> & object_with_metadata,
+    const std::optional<FormatSettings> & format_settings,
+    FormatParserSharedResourcesPtr parser_shared_resources,
+    FormatFilterInfoPtr format_filter_info,
+    bool is_remote_fs,
+    CompressionMethod compression,
+    bool need_only_count,
+    const std::optional<UInt64> & max_block_size_bytes,
+    const std::optional<UInt64> & min_block_size_rows,
+    const std::optional<UInt64> & min_block_size_bytes) const
+{
+    chassert(object_with_metadata.has_value());
+    return getInputImpl(name, buf, sample, context, max_block_size, object_with_metadata,
+                       format_settings, parser_shared_resources, format_filter_info,
+                       is_remote_fs, compression, need_only_count,
+                       max_block_size_bytes, min_block_size_rows, min_block_size_bytes);
+}
+
 std::unique_ptr<ReadBuffer> FormatFactory::wrapReadBufferIfNeeded(
     ReadBuffer & buf,
     CompressionMethod compression,
@@ -765,6 +828,17 @@ void FormatFactory::registerRandomAccessInputFormat(const String & name, RandomA
     KnownFormatNames::instance().add(name, /* case_insensitive = */ true);
 }
 
+void FormatFactory::registerRandomAccessInputFormatWithMetadata(const String & name, RandomAccessInputCreatorWithMetadata input_creator)
+{
+    chassert(input_creator);
+    auto & creators = getOrCreateCreators(name);
+    if (creators.input_creator || creators.random_access_input_creator_with_metadata)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Input format {} is already registered", name);
+    creators.random_access_input_creator_with_metadata = std::move(input_creator);
+    registerFileExtension(name, name);
+    KnownFormatNames::instance().add(name, /* case_insensitive = */ true);
+}
+
 void FormatFactory::registerNonTrivialPrefixAndSuffixChecker(const String & name, NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker)
 {
     auto & target = getOrCreateCreators(name).non_trivial_prefix_and_suffix_checker;
@@ -993,7 +1067,7 @@ String FormatFactory::getAdditionalInfoForSchemaCache(const String & name, const
 bool FormatFactory::isInputFormat(const String & name) const
 {
     auto it = dict.find(boost::to_lower_copy(name));
-    return it != dict.end() && (it->second.input_creator || it->second.random_access_input_creator);
+    return it != dict.end() && (it->second.input_creator || it->second.random_access_input_creator || it->second.random_access_input_creator_with_metadata);
 }
 
 bool FormatFactory::isOutputFormat(const String & name) const
@@ -1052,7 +1126,7 @@ std::vector<String> FormatFactory::getAllInputFormats() const
     std::vector<String> input_formats;
     for (const auto & [format_name, creators] : dict)
     {
-        if (creators.input_creator || creators.random_access_input_creator)
+        if (creators.input_creator || creators.random_access_input_creator || creators.random_access_input_creator_with_metadata)
             input_formats.push_back(format_name);
     }
 
diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h
index 3186ca7ad95d..ae924e274167 100644
--- a/src/Formats/FormatFactory.h
+++ b/src/Formats/FormatFactory.h
@@ -7,6 +7,7 @@
 #include <IO/CompressionMethod.h>
 #include <IO/ParallelReadBuffer.h>
 #include <Interpreters/Context_fwd.h>
+#include <Disks/DiskObjectStorage/ObjectStorages/IObjectStorage.h>
 #include <base/types.h>
 #include <Common/Allocator.h>
 #include <Common/NamePrompter.h>
@@ -112,11 +113,21 @@ class FormatFactory final : private boost::noncopyable, public IHints<2>
         FormatParserSharedResourcesPtr parser_shared_resources,
         FormatFilterInfoPtr format_filter_info)>;
 
+    using RandomAccessInputCreatorWithMetadata = std::function<InputFormatPtr(
+        ReadBuffer & buf,
+        const Block & header,
+        const FormatSettings & settings,
+        const ReadSettings & read_settings,
+        bool is_remote_fs,
+        FormatParserSharedResourcesPtr parser_shared_resources,
+        FormatFilterInfoPtr format_filter_info,
+        const std::optional<RelativePathWithMetadata> & object_with_metadata)>;
+
     using OutputCreator = std::function<OutputFormatPtr(
-            WriteBuffer & buf,
-            const Block & sample,
-            const FormatSettings & settings,
-            FormatFilterInfoPtr format_filter_info)>;
+        WriteBuffer & buf,
+        const Block & sample,
+        const FormatSettings & settings,
+        FormatFilterInfoPtr format_filter_info)>;
 
     /// Some input formats can have non trivial readPrefix() and readSuffix(),
     /// so in some cases there is no possibility to use parallel parsing.
@@ -130,7 +141,10 @@ class FormatFactory final : private boost::noncopyable, public IHints<2>
     /// Obtain HTTP content-type for the output format.
     using ContentTypeGetter = std::function<String(const std::optional<FormatSettings> & settings)>;
 
-    using SchemaReaderCreator = std::function<SchemaReaderPtr(ReadBuffer & in, const FormatSettings & settings)>;
+    using SchemaReaderCreator = std::function<SchemaReaderPtr(
+        ReadBuffer & in,
+        const FormatSettings & settings)>;
+
     using ExternalSchemaReaderCreator = std::function<ExternalSchemaReaderPtr(const FormatSettings & settings)>;
 
     /// Some formats can extract different schemas from the same source depending on
@@ -153,6 +167,7 @@ class FormatFactory final : private boost::noncopyable, public IHints<2>
         FileBucketInfoCreator file_bucket_info_creator;
         BucketSplitterCreator bucket_splitter_creator;
         RandomAccessInputCreator random_access_input_creator;
+        RandomAccessInputCreatorWithMetadata random_access_input_creator_with_metadata;
         OutputCreator output_creator;
         FileSegmentationEngineCreator file_segmentation_engine_creator;
         SchemaReaderCreator schema_reader_creator;
@@ -171,6 +186,22 @@ class FormatFactory final : private boost::noncopyable, public IHints<2>
     using FormatsDictionary = std::unordered_map<String, Creators>;
     using FileExtensionFormats = std::unordered_map<String, String>;
 
+    InputFormatPtr getInputImpl(
+        const String & name,
+        ReadBuffer & buf,
+        const Block & sample,
+        const ContextPtr & context,
+        UInt64 max_block_size,
+        const std::optional<RelativePathWithMetadata> & object_with_metadata,
+        const std::optional<FormatSettings> & format_settings,
+        FormatParserSharedResourcesPtr parser_shared_resources,
+        FormatFilterInfoPtr format_filter_info,
+        bool is_remote_fs,
+        CompressionMethod compression,
+        bool need_only_count,
+        const std::optional<UInt64> & max_block_size_bytes,
+        const std::optional<UInt64> & min_block_size_rows,
+        const std::optional<UInt64> & min_block_size_bytes) const;
 public:
     static FormatFactory & instance();
 
@@ -199,6 +230,27 @@ class FormatFactory final : private boost::noncopyable, public IHints<2>
         const std::optional<UInt64> & min_block_size_rows = std::nullopt,
         const std::optional<UInt64> & min_block_size_bytes = std::nullopt) const;
 
+    /// much the same as getInput but allows for passing metadata from object storage
+    InputFormatPtr getInputWithMetadata(
+        const String & name,
+        ReadBuffer & buf,
+        const Block & sample,
+        const ContextPtr & context,
+        UInt64 max_block_size,
+        const std::optional<RelativePathWithMetadata> & object_with_metadata,
+        const std::optional<FormatSettings> & format_settings = std::nullopt,
+        FormatParserSharedResourcesPtr parser_shared_resources = nullptr,
+        FormatFilterInfoPtr format_filter_info = nullptr,
+        // affects things like buffer sizes and parallel reading
+        bool is_remote_fs = false,
+        // allows to do: buf -> parallel read -> decompression,
+        // because parallel read after decompression is not possible
+        CompressionMethod compression = CompressionMethod::None,
+        bool need_only_count = false,
+        const std::optional<UInt64> & max_block_size_bytes = std::nullopt,
+        const std::optional<UInt64> & min_block_size_rows = std::nullopt,
+        const std::optional<UInt64> & min_block_size_bytes = std::nullopt) const;
+
     /// Checks all preconditions. Returns ordinary format if parallel formatting cannot be done.
     OutputFormatPtr getOutputFormatParallelIfPossible(
         const String & name,
@@ -222,6 +274,14 @@ class FormatFactory final : private boost::noncopyable, public IHints<2>
     /// Content-Type to set when sending HTTP response with this output format.
     String getContentType(const String & name, const std::optional<FormatSettings> & settings) const;
 
+    /// overload for formats that support object storage metadata
+    SchemaReaderPtr getSchemaReader(
+        const String & name,
+        ReadBuffer & buf,
+        const ContextPtr & context,
+        const RelativePathWithMetadata & metadata,
+        const std::optional<FormatSettings> & format_settings = std::nullopt) const;
+
     SchemaReaderPtr getSchemaReader(
         const String & name,
         ReadBuffer & buf,
@@ -250,6 +310,7 @@ class FormatFactory final : private boost::noncopyable, public IHints<2>
     /// Register format by its name.
     void registerInputFormat(const String & name, InputCreator input_creator);
     void registerRandomAccessInputFormat(const String & name, RandomAccessInputCreator input_creator);
+    void registerRandomAccessInputFormatWithMetadata(const String & name, RandomAccessInputCreatorWithMetadata input_creator_with_metadata);
     void registerOutputFormat(const String & name, OutputCreator output_creator);
 
     /// Register file extension for format
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index bc75c583fc30..be97518806e3 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -43,6 +43,8 @@
 #include <Storages/MergeTree/TextIndexCache.h>
 #include <Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadataFilesCache.h>
 #include <Storages/ObjectStorageQueue/ObjectStorageQueueMetadataFactory.h>
+#include <Processors/Formats/Impl/ParquetMetadataCache.h>
+#include <Storages/StreamingStorageRegistry.h>
 #include <Storages/MergeTree/VectorSimilarityIndexCache.h>
 #include <Storages/Distributed/DistributedSettings.h>
 #include <Storages/CompressionCodecSelector.h>
@@ -535,6 +537,9 @@ struct ContextSharedPart : boost::noncopyable
     mutable MMappedFileCachePtr mmap_cache TSA_GUARDED_BY(mutex);                     /// Cache of mmapped files to avoid frequent open/map/unmap/close and to reuse from several threads.
 #if USE_AVRO
     mutable IcebergMetadataFilesCachePtr iceberg_metadata_files_cache TSA_GUARDED_BY(mutex);   /// Cache of deserialized iceberg metadata files.
+#endif
+#if USE_PARQUET
+    mutable ParquetMetadataCachePtr parquet_metadata_cache TSA_GUARDED_BY(mutex);   /// Cache of deserialized parquet metadata files.
 #endif
     AsynchronousMetrics * asynchronous_metrics TSA_GUARDED_BY(mutex) = nullptr;       /// Points to asynchronous metrics
     mutable PageCachePtr page_cache TSA_GUARDED_BY(mutex);                            /// Userspace page cache.
@@ -4115,6 +4120,49 @@ void Context::clearIcebergMetadataFilesCache() const
 }
 #endif
 
+#if USE_PARQUET
+void Context::setParquetMetadataCache(const String & cache_policy, size_t max_size_in_bytes, size_t max_entries, double size_ratio)
+{
+    std::lock_guard lock(shared->mutex);
+
+    if (shared->parquet_metadata_cache)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Parquet metadata cache has been already created.");
+
+    shared->parquet_metadata_cache = std::make_shared<ParquetMetadataCache>(cache_policy, max_size_in_bytes, max_entries, size_ratio);
+}
+
+void Context::updateParquetMetadataCacheConfiguration(const Poco::Util::AbstractConfiguration & config)
+{
+    std::lock_guard lock(shared->mutex);
+
+    if (!shared->parquet_metadata_cache)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Parquet metadata cache was not created yet.");
+
+    size_t max_size_in_bytes = config.getUInt64("parquet_metadata_cache_size", DEFAULT_PARQUET_METADATA_CACHE_MAX_SIZE);
+    size_t max_entries = config.getUInt64("parquet_metadata_cache_max_entries", DEFAULT_PARQUET_METADATA_CACHE_MAX_ENTRIES);
+    shared->parquet_metadata_cache->setMaxSizeInBytes(max_size_in_bytes);
+    shared->parquet_metadata_cache->setMaxCount(max_entries);
+}
+
+std::shared_ptr<ParquetMetadataCache> Context::getParquetMetadataCache() const
+{
+    SharedLockGuard lock(shared->mutex);
+
+    if (!shared->parquet_metadata_cache)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Parquet metadata cache was not created yet.");
+    return shared->parquet_metadata_cache;
+}
+
+void Context::clearParquetMetadataCache() const
+{
+    auto cache = getParquetMetadataCache();
+
+    /// Clear the cache without holding context mutex to avoid blocking context for a long time
+    if (cache)
+        cache->clear();
+}
+#endif
+
 void Context::setQueryConditionCache(const String & cache_policy, size_t max_size_in_bytes, double size_ratio)
 {
     std::lock_guard lock(shared->mutex);
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 00b4d8b9fb85..2dfdd1691b1d 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -102,6 +102,7 @@ class PageCache;
 class MMappedFileCache;
 class UncompressedCache;
 class IcebergMetadataFilesCache;
+class ParquetMetadataCache;
 class VectorSimilarityIndexCache;
 class TextIndexDictionaryBlockCache;
 class TextIndexHeaderCache;
@@ -1376,6 +1377,13 @@ class Context: public ContextData, public std::enable_shared_from_this<Context>
     void clearIcebergMetadataFilesCache() const;
 #endif
 
+#if USE_PARQUET
+    void setParquetMetadataCache(const String & cache_policy, size_t max_size_in_bytes, size_t max_entries, double size_ratio);
+    void updateParquetMetadataCacheConfiguration(const Poco::Util::AbstractConfiguration & config);
+    std::shared_ptr<ParquetMetadataCache> getParquetMetadataCache() const;
+    void clearParquetMetadataCache() const;
+#endif
+
     void setAllowedDisksForTableEngines(std::unordered_set<String> && allowed_disks_) { allowed_disks = std::move(allowed_disks_); }
     const std::unordered_set<String> & getAllowedDisksForTableEngines() const { return allowed_disks; }
 
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index 8993c8a025bf..86c721125aed 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -403,6 +403,14 @@ BlockIO InterpreterSystemQuery::execute()
             break;
 #else
             throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "The server was compiled without the support for AVRO");
+#endif
+        case Type::CLEAR_PARQUET_METADATA_CACHE:
+#if USE_PARQUET
+            getContext()->checkAccess(AccessType::SYSTEM_DROP_PARQUET_METADATA_CACHE);
+            system_context->clearParquetMetadataCache();
+            break;
+#else
+            throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "The server was compiled without the support for Parquet");
 #endif
         case Type::CLEAR_PRIMARY_INDEX_CACHE:
             getContext()->checkAccess(AccessType::SYSTEM_DROP_PRIMARY_INDEX_CACHE);
@@ -2012,6 +2020,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster()
         case Type::CLEAR_CONNECTIONS_CACHE:
         case Type::CLEAR_MARK_CACHE:
         case Type::CLEAR_ICEBERG_METADATA_CACHE:
+        case Type::CLEAR_PARQUET_METADATA_CACHE:
         case Type::CLEAR_PRIMARY_INDEX_CACHE:
         case Type::CLEAR_MMAP_CACHE:
         case Type::CLEAR_QUERY_CONDITION_CACHE:
diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp
index 28b399e10ef3..15e723682f53 100644
--- a/src/Parsers/ASTSystemQuery.cpp
+++ b/src/Parsers/ASTSystemQuery.cpp
@@ -578,6 +578,7 @@ void ASTSystemQuery::formatImpl(WriteBuffer & ostr, const FormatSettings & setti
         case Type::DROP_PARQUET_METADATA_CACHE:
         case Type::CLEAR_ICEBERG_METADATA_CACHE:
         case Type::DROP_OBJECT_STORAGE_LIST_OBJECTS_CACHE:
+        case Type::CLEAR_PARQUET_METADATA_CACHE:
         case Type::RESET_COVERAGE:
         case Type::RESTART_REPLICAS:
         case Type::JEMALLOC_PURGE:
diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h
index 358d4806257d..443bc537abc1 100644
--- a/src/Parsers/ASTSystemQuery.h
+++ b/src/Parsers/ASTSystemQuery.h
@@ -45,6 +45,7 @@ class ASTSystemQuery : public IAST, public ASTQueryWithOnCluster
         CLEAR_COMPILED_EXPRESSION_CACHE,
         DROP_PARQUET_METADATA_CACHE,
         CLEAR_ICEBERG_METADATA_CACHE,
+        CLEAR_PARQUET_METADATA_CACHE,
         CLEAR_FILESYSTEM_CACHE,
         CLEAR_DISTRIBUTED_CACHE,
         CLEAR_DISK_METADATA_CACHE,
diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp
index a70c0feca955..38e1a7ff7e1f 100644
--- a/src/Parsers/ParserSystemQuery.cpp
+++ b/src/Parsers/ParserSystemQuery.cpp
@@ -258,6 +258,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &
             {"DROP QUERY CACHE", Type::CLEAR_QUERY_CACHE},
             {"DROP COMPILED EXPRESSION CACHE", Type::CLEAR_COMPILED_EXPRESSION_CACHE},
             {"DROP ICEBERG METADATA CACHE", Type::CLEAR_ICEBERG_METADATA_CACHE},
+            {"DROP PARQUET METADATA CACHE", Type::CLEAR_PARQUET_METADATA_CACHE},
             {"DROP FILESYSTEM CACHE", Type::CLEAR_FILESYSTEM_CACHE},
             {"DROP DISTRIBUTED CACHE", Type::CLEAR_DISTRIBUTED_CACHE},
             {"DROP DISK METADATA CACHE", Type::CLEAR_DISK_METADATA_CACHE},
diff --git a/src/Processors/Formats/IInputFormat.h b/src/Processors/Formats/IInputFormat.h
index bf0eccf2fa88..1a9cb76450f7 100644
--- a/src/Processors/Formats/IInputFormat.h
+++ b/src/Processors/Formats/IInputFormat.h
@@ -129,9 +129,6 @@ class IInputFormat : public ISource
 
     void needOnlyCount() { need_only_count = true; }
 
-    /// Set additional info/key/id related to underlying storage of the ReadBuffer
-    virtual void setStorageRelatedUniqueKey(const Settings & /*settings*/, const String & /*key*/) {}
-
 protected:
     ReadBuffer & getReadBuffer() const { chassert(in); return *in; }
 
diff --git a/src/Processors/Formats/Impl/Parquet/ReadManager.cpp b/src/Processors/Formats/Impl/Parquet/ReadManager.cpp
index d88f9b59a6cb..e08a71019927 100644
--- a/src/Processors/Formats/Impl/Parquet/ReadManager.cpp
+++ b/src/Processors/Formats/Impl/Parquet/ReadManager.cpp
@@ -47,7 +47,9 @@ std::optional<size_t> AtomicBitSet::findFirst()
 void ReadManager::init(FormatParserSharedResourcesPtr parser_shared_resources_, const std::optional<std::vector<size_t>> & buckets_to_read_)
 {
     parser_shared_resources = parser_shared_resources_;
-    reader.file_metadata = Reader::readFileMetaData(reader.prefetcher);
+
+    if (reader.file_metadata.schema.empty())
+        reader.file_metadata = Reader::readFileMetaData(reader.prefetcher);
 
     if (buckets_to_read_)
     {
diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
index 3a60e4666ce0..1a4f3fd70539 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
@@ -1,5 +1,7 @@
 #include <atomic>
 #include <memory>
+#include <fmt/format.h>
+#include <Core/Settings.h>
 #include <mutex>
 #include <optional>
 #include <Processors/Formats/Impl/ParquetBlockInputFormat.h>
@@ -31,13 +33,13 @@
 #include <Processors/Formats/Impl/ArrowBufferedStreams.h>
 #include <Processors/Formats/Impl/ArrowColumnToCHColumn.h>
 #include <Processors/Formats/Impl/ArrowFieldIndexUtil.h>
+#include <Interpreters/Context.h>
+#include <Common/CurrentThread.h>
 #include <DataTypes/NestedUtils.h>
 #include <DataTypes/DataTypeLowCardinality.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <Common/FieldAccurateComparison.h>
 #include <Processors/Formats/Impl/Parquet/parquetBloomFilterHash.h>
-#include <Interpreters/Context.h>
-#include <Processors/Formats/Impl/ParquetFileMetaDataCache.h>
 #include <Interpreters/convertFieldToType.h>
 #include <Storages/MergeTree/KeyCondition.h>
 #include <Processors/Formats/Impl/ParquetV3BlockInputFormat.h>
@@ -69,12 +71,7 @@ namespace DB
 
 namespace Setting
 {
-    extern const SettingsBool input_format_parquet_use_metadata_cache;
-}
-
-namespace ServerSetting
-{
-    extern const ServerSettingsUInt64 input_format_parquet_metadata_cache_max_size;
+    extern const SettingsBool use_parquet_metadata_cache;
 }
 
 namespace ErrorCodes
@@ -83,6 +80,7 @@ namespace ErrorCodes
     extern const int MEMORY_LIMIT_EXCEEDED;
     extern const int CANNOT_READ_ALL_DATA;
     extern const int CANNOT_PARSE_NUMBER;
+    extern const int LOGICAL_ERROR;
 }
 
 namespace
@@ -567,30 +565,6 @@ std::shared_ptr<parquet::FileMetaData> ParquetBlockInputFormat::readMetadataFrom
     return parquet::ReadMetaData(arrow_file);
 }
 
-std::shared_ptr<parquet::FileMetaData> ParquetBlockInputFormat::getFileMetaData()
-{
-    // in-memory cache is not implemented for local file operations, only for remote files
-    // there is a chance the user sets `input_format_parquet_use_metadata_cache=1` for a local file operation
-    // and the cache_key won't be set. Therefore, we also need to check for metadata_cache.key
-    if (!metadata_cache.use_cache || metadata_cache.key.empty())
-    {
-        return readMetadataFromFile();
-    }
-
-    auto [parquet_file_metadata, loaded] = ParquetFileMetaDataCache::instance()->getOrSet(
-        metadata_cache.key,
-        [&]()
-        {
-            return readMetadataFromFile();
-        }
-    );
-    if (loaded)
-        ProfileEvents::increment(ProfileEvents::ParquetMetaDataCacheMisses);
-    else
-        ProfileEvents::increment(ProfileEvents::ParquetMetaDataCacheHits);
-    return parquet_file_metadata;
-}
-
 void ParquetBlockInputFormat::createArrowFileIfNotCreated()
 {
     if (arrow_file)
@@ -750,7 +724,7 @@ void ParquetBlockInputFormat::initializeIfNeeded()
     if (is_stopped)
         return;
 
-    metadata = getFileMetaData();
+    metadata = parquet::ReadMetaData(arrow_file);
     if (buckets_to_read)
     {
         std::unordered_set<size_t> set_to_read(buckets_to_read->row_group_ids.begin(), buckets_to_read->row_group_ids.end());
@@ -936,12 +910,6 @@ void ParquetBlockInputFormat::initializeIfNeeded()
     }
 }
 
-void ParquetBlockInputFormat::setStorageRelatedUniqueKey(const Settings & settings, const String & key_)
-{
-    metadata_cache.key = key_;
-    metadata_cache.use_cache = settings[Setting::input_format_parquet_use_metadata_cache];
-}
-
 void ParquetBlockInputFormat::initializeRowGroupBatchReader(size_t row_group_batch_idx)
 {
     bool row_group_prefetch = io_pool != nullptr;
@@ -1476,8 +1444,7 @@ void registerInputFormatParquet(FormatFactory & factory)
             return std::make_shared<ParquetFileBucketInfo>();
         }
     );
-
-    factory.registerRandomAccessInputFormat(
+    factory.registerRandomAccessInputFormatWithMetadata(
         "Parquet",
         [](ReadBuffer & buf,
            const Block & sample,
@@ -1485,31 +1452,74 @@ void registerInputFormatParquet(FormatFactory & factory)
            const ReadSettings & read_settings,
            bool is_remote_fs,
            FormatParserSharedResourcesPtr parser_shared_resources,
-           FormatFilterInfoPtr format_filter_info) -> InputFormatPtr
+           FormatFilterInfoPtr format_filter_info,
+           const std::optional<RelativePathWithMetadata> & object_with_metadata) -> InputFormatPtr
         {
+            auto lambda_logger = getLogger("ParquetMetadataCache");
             size_t min_bytes_for_seek
                 = is_remote_fs ? read_settings.remote_read_min_bytes_for_seek : settings.parquet.local_read_min_bytes_for_seek;
             if (settings.parquet.use_native_reader_v3)
             {
+                LOG_TRACE(lambda_logger, "using native reader v3 in ParquetBlockInputFormat with metadata cache");
+                ParquetMetadataCachePtr metadata_cache = CurrentThread::getQueryContext()->getParquetMetadataCache();
                 return std::make_shared<ParquetV3BlockInputFormat>(
                     buf,
                     std::make_shared<const Block>(sample),
                     settings,
                     std::move(parser_shared_resources),
                     std::move(format_filter_info),
-                    min_bytes_for_seek);
+                    min_bytes_for_seek,
+                    metadata_cache,
+                    object_with_metadata
+                );
             }
             else
             {
-                return std::make_shared<ParquetBlockInputFormat>(
-                    buf,
-                    std::make_shared<const Block>(sample),
-                    settings,
-                    std::move(parser_shared_resources),
-                    std::move(format_filter_info),
-                    min_bytes_for_seek);
+                throw Exception(
+                    ErrorCodes::LOGICAL_ERROR,
+                    "Implementation of ParquetBlockInputFormat using arrow reader didn't require blob metadata for initialization");
             }
         });
+    factory.registerRandomAccessInputFormat(
+        "Parquet",
+        [](ReadBuffer & buf,
+        const Block & sample,
+        const FormatSettings & settings,
+        const ReadSettings & read_settings,
+        bool is_remote_fs,
+        FormatParserSharedResourcesPtr parser_shared_resources,
+        FormatFilterInfoPtr format_filter_info) -> InputFormatPtr
+    {
+        auto lambda_logger = getLogger("ParquetMetadataCache");
+        size_t min_bytes_for_seek
+            = is_remote_fs ? read_settings.remote_read_min_bytes_for_seek : settings.parquet.local_read_min_bytes_for_seek;
+        if (settings.parquet.use_native_reader_v3)
+        {
+            LOG_TRACE(lambda_logger, "using native reader v3 in ParquetBlockInputFormat with no metadata cache");
+            return std::make_shared<ParquetV3BlockInputFormat>(
+                buf,
+                std::make_shared<const Block>(sample),
+                settings,
+                std::move(parser_shared_resources),
+                std::move(format_filter_info),
+                min_bytes_for_seek,
+                nullptr,
+                std::nullopt
+            );
+        }
+        else
+        {
+            LOG_TRACE(lambda_logger, "using arrow reader in ParquetBlockInputFormat without metadata cache");
+            return std::make_shared<ParquetBlockInputFormat>(
+                buf,
+                    std::make_shared<const Block>(sample),
+                settings,
+                std::move(parser_shared_resources),
+                std::move(format_filter_info),
+                min_bytes_for_seek
+            );
+        }
+    });
     factory.markFormatSupportsSubsetOfColumns("Parquet");
     factory.registerPrewhereSupportChecker("Parquet", [](const FormatSettings & settings)
     {
@@ -1524,15 +1534,21 @@ void registerParquetSchemaReader(FormatFactory & factory)
             return std::make_shared<ParquetBucketSplitter>();
         });
     factory.registerSchemaReader(
-        "Parquet",
-        [](ReadBuffer & buf, const FormatSettings & settings) -> SchemaReaderPtr
+        "Parquet", [](ReadBuffer & buf, const FormatSettings & settings) -> SchemaReaderPtr
         {
+            auto lambda_logger = getLogger("ParquetMetadataCache");
             if (settings.parquet.use_native_reader_v3)
+            {
+                LOG_TRACE(lambda_logger, "using native reader v3 in ParquetSchemaReader");
                 return std::make_shared<NativeParquetSchemaReader>(buf, settings);
+            }
             else
+            {
+                LOG_TRACE(lambda_logger, "using arrow reader in ParquetSchemaReader");
                 return std::make_shared<ArrowParquetSchemaReader>(buf, settings);
+            }
         }
-        );
+    );
 
     factory.registerAdditionalInfoForSchemaCacheGetter(
         "Parquet",
diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h
index 29e34171e432..a971a29e9884 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h
@@ -369,13 +369,6 @@ class ParquetBlockInputFormat : public IInputFormat
     bool is_initialized = false;
     std::optional<std::unordered_map<String, String>> parquet_names_to_clickhouse;
     std::optional<std::unordered_map<String, String>> clickhouse_names_to_parquet;
-    struct Cache
-    {
-        String key;
-        bool use_cache = false;
-    };
-
-    Cache metadata_cache;
 };
 
 class ArrowParquetSchemaReader : public ISchemaReader
diff --git a/src/Processors/Formats/Impl/ParquetMetadataCache.cpp b/src/Processors/Formats/Impl/ParquetMetadataCache.cpp
new file mode 100644
index 000000000000..dce344a0554b
--- /dev/null
+++ b/src/Processors/Formats/Impl/ParquetMetadataCache.cpp
@@ -0,0 +1,128 @@
+#include <Processors/Formats/Impl/ParquetMetadataCache.h>
+
+#if USE_PARQUET
+
+namespace CurrentMetrics
+{
+extern const Metric ParquetMetadataCacheBytes;
+extern const Metric ParquetMetadataCacheFiles;
+}
+
+namespace ProfileEvents
+{
+extern const Event ParquetMetadataCacheWeightLost;
+}
+
+namespace DB
+{
+
+bool ParquetMetadataCacheKey::operator==(const ParquetMetadataCacheKey & other) const
+{
+    return file_path == other.file_path && etag == other.etag;
+}
+
+size_t ParquetMetadataCacheKeyHash::operator()(const ParquetMetadataCacheKey & key) const
+{
+    size_t hash = 0;
+    boost::hash_combine(hash, CityHash_v1_0_2::CityHash64(key.file_path.data(), key.file_path.size()));
+    boost::hash_combine(hash, CityHash_v1_0_2::CityHash64(key.etag.data(), key.etag.size()));
+    return hash;
+}
+
+ParquetMetadataCacheCell::ParquetMetadataCacheCell(parquet::format::FileMetaData metadata_)
+    : metadata(std::move(metadata_))
+    , memory_bytes(calculateMemorySize() + SIZE_IN_MEMORY_OVERHEAD)
+{
+}
+size_t ParquetMetadataCacheCell::calculateMemorySize() const
+{
+    size_t total_size = sizeof(metadata);
+
+    // SchemaElements parquet::format::SchemaElement
+    for (const parquet::format::SchemaElement & element : metadata.schema)
+    {
+        total_size += sizeof(element);
+        total_size += element.name.capacity();
+    }
+
+    // RowGroups parquet::format::RowGroup
+    for (const parquet::format::RowGroup & row_group : metadata.row_groups)
+    {
+        total_size += sizeof(row_group);
+        total_size += row_group.sorting_columns.capacity() * sizeof(parquet::format::SortingColumn);
+        for (const parquet::format::ColumnChunk & column_chunk : row_group.columns)
+        {
+            total_size += sizeof(column_chunk);
+
+            total_size += column_chunk.file_path.capacity();
+            total_size += column_chunk.encrypted_column_metadata.capacity();
+
+            // ColumnMetaData parquet::format::ColumnMetaData
+            for (const auto & path : column_chunk.meta_data.path_in_schema)
+                total_size += path.capacity();
+
+            for (const auto & kv : column_chunk.meta_data.key_value_metadata)
+                total_size += kv.key.capacity() + kv.value.capacity();
+
+            total_size += column_chunk.meta_data.encodings.capacity() * sizeof(parquet::format::Encoding::type);
+            total_size += column_chunk.meta_data.encoding_stats.capacity() * sizeof(parquet::format::PageEncodingStats);
+            total_size += column_chunk.meta_data.geospatial_statistics.geospatial_types.capacity() * sizeof(int32_t);
+
+            total_size += column_chunk.meta_data.statistics.min_value.capacity();
+            total_size += column_chunk.meta_data.statistics.max_value.capacity();
+            total_size += column_chunk.meta_data.statistics.min.capacity();
+            total_size += column_chunk.meta_data.statistics.max.capacity();
+
+            total_size += column_chunk.meta_data.size_statistics.repetition_level_histogram.capacity() * sizeof(int64_t);
+            total_size += column_chunk.meta_data.size_statistics.definition_level_histogram.capacity() * sizeof(int64_t);
+        }
+    }
+
+    // KeyValueMetadata parquet::format::KeyValueMetadata
+    for (const auto & kv : metadata.key_value_metadata)
+        total_size += kv.key.capacity() + kv.value.capacity();
+
+    // ColumnOrder parquet::format::ColumnOrders
+    for (const auto & order : metadata.column_orders)
+        total_size += sizeof(order);
+
+    // Top-level fields
+    total_size += metadata.created_by.capacity();
+    total_size += metadata.footer_signing_key_metadata.capacity();
+
+    /// String fields for metadata.encryption_algorithm.* generally are either very small
+    /// within SSO limits or are empty in the case where we don't use encryption at all
+    /// so I'm skipping accounting for them as they'll fall within our SIZE_IN_MEMORY_OVERHEAD
+    return total_size;
+}
+
+size_t ParquetMetadataCacheWeightFunction::operator()(const ParquetMetadataCacheCell & cell) const
+{
+    return cell.memory_bytes;
+}
+
+ParquetMetadataCache::ParquetMetadataCache(
+    const String & cache_policy,
+    size_t max_size_in_bytes,
+    size_t max_count,
+    double size_ratio)
+    : Base(cache_policy,
+        CurrentMetrics::ParquetMetadataCacheBytes,
+        CurrentMetrics::ParquetMetadataCacheFiles,
+        max_size_in_bytes,
+        max_count,
+        size_ratio)
+    , log(getLogger("ParquetMetadataCache"))
+{
+}
+ParquetMetadataCacheKey ParquetMetadataCache::createKey(const String & file_path, const String & file_attr)
+{
+    return ParquetMetadataCacheKey{file_path, file_attr};
+}
+void ParquetMetadataCache::onEntryRemoval(const size_t weight_loss, const MappedPtr &)
+{
+    LOG_TRACE(log, "cache eviction");
+    ProfileEvents::increment(ProfileEvents::ParquetMetadataCacheWeightLost, weight_loss);
+}
+}
+#endif
diff --git a/src/Processors/Formats/Impl/ParquetMetadataCache.h b/src/Processors/Formats/Impl/ParquetMetadataCache.h
new file mode 100644
index 000000000000..3306c555562d
--- /dev/null
+++ b/src/Processors/Formats/Impl/ParquetMetadataCache.h
@@ -0,0 +1,101 @@
+#pragma once
+#include "config.h"
+
+#if USE_PARQUET
+
+#include <boost/functional/hash.hpp>
+#include <Common/CacheBase.h>
+#include <Common/HashTable/Hash.h>
+#include <Common/ProfileEvents.h>
+#include <Common/CurrentMetrics.h>
+#include <Common/logger_useful.h>
+#include <parquet/file_reader.h>
+#include <parquet/metadata.h>
+#include <boost/noncopyable.hpp>
+#include <IO/WithFileName.h>
+#include <generated/parquet_types.h>
+
+#if USE_AWS_S3
+#include <IO/ReadBufferFromS3.h>
+#include <Disks/DiskObjectStorage/ObjectStorages/IObjectStorage.h>
+#endif
+
+namespace ProfileEvents
+{
+    extern const Event ParquetMetadataCacheMisses;
+    extern const Event ParquetMetadataCacheHits;
+}
+
+namespace DB
+{
+
+struct ParquetMetadataCacheKey
+{
+    String file_path;
+    String etag;
+    bool operator==(const ParquetMetadataCacheKey & other) const;
+};
+
+/// Hash function for ParquetMetadataCacheKey
+struct ParquetMetadataCacheKeyHash
+{
+    size_t operator()(const ParquetMetadataCacheKey & key) const;
+};
+
+/// Cache cell containing Parquet metadata
+struct ParquetMetadataCacheCell : private boost::noncopyable
+{
+    parquet::format::FileMetaData metadata;
+    UInt64 memory_bytes;
+    explicit ParquetMetadataCacheCell(parquet::format::FileMetaData metadata_);
+private:
+    static constexpr size_t SIZE_IN_MEMORY_OVERHEAD = 200;
+    size_t calculateMemorySize() const;
+};
+
+/// Weight function for metadata cache
+struct ParquetMetadataCacheWeightFunction
+{
+    size_t operator()(const ParquetMetadataCacheCell & cell) const;
+};
+
+/// Parquet metadata cache
+class ParquetMetadataCache : public CacheBase<ParquetMetadataCacheKey, ParquetMetadataCacheCell, ParquetMetadataCacheKeyHash, ParquetMetadataCacheWeightFunction>
+{
+public:
+    using Base = CacheBase<ParquetMetadataCacheKey, ParquetMetadataCacheCell, ParquetMetadataCacheKeyHash, ParquetMetadataCacheWeightFunction>;
+    ParquetMetadataCache(const String & cache_policy, size_t max_size_in_bytes, size_t max_count, double size_ratio);
+    static ParquetMetadataCacheKey createKey(const String & file_path, const String & file_attr);
+    /// Get or load Parquet metadata with caching
+    template <typename LoadFunc>
+    parquet::format::FileMetaData getOrSetMetadata(const ParquetMetadataCacheKey & key, LoadFunc && load_fn)
+    {
+        auto load_fn_wrapper = [&]()
+        {
+            auto metadata = load_fn();
+            LOG_TRACE(log, "got metadata from cache {} | {}", key.file_path, key.etag);
+            return std::make_shared<ParquetMetadataCacheCell>(std::move(metadata));
+        };
+        auto result = Base::getOrSet(key, load_fn_wrapper);
+        if (result.second)
+        {
+            LOG_TRACE(log, "cache miss {} | {}", key.file_path, key.etag);
+            ProfileEvents::increment(ProfileEvents::ParquetMetadataCacheMisses);
+        }
+        else
+        {
+            LOG_TRACE(log, "cache hit {} | {}", key.file_path, key.etag);
+            ProfileEvents::increment(ProfileEvents::ParquetMetadataCacheHits);
+        }
+        return result.first->metadata;
+    }
+
+private:
+    LoggerPtr log;
+    /// Called for each individual entry being evicted from cache
+    void onEntryRemoval(size_t weight_loss, const MappedPtr &) override;
+};
+
+using ParquetMetadataCachePtr = std::shared_ptr<ParquetMetadataCache>;
+}
+#endif
diff --git a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp
index f7c2dab49db1..359cb3585d8c 100644
--- a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp
@@ -41,11 +41,6 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
-namespace Setting
-{
-extern const SettingsBool input_format_parquet_use_metadata_cache;
-}
-
 static NamesAndTypesList getHeaderForParquetMetadata()
 {
     NamesAndTypesList names_and_types{
@@ -144,35 +139,10 @@ void checkHeader(const Block & header)
 static std::shared_ptr<parquet::FileMetaData> getFileMetadata(
     ReadBuffer & in,
     const FormatSettings & format_settings,
-    std::atomic<int> & is_stopped,
-    ParquetMetadataInputFormat::Cache metadata_cache)
+    std::atomic<int> & is_stopped)
 {
-    // in-memory cache is not implemented for local file operations, only for remote files
-    // there is a chance the user sets `input_format_parquet_use_metadata_cache=1` for a local file operation
-    // and the cache_key won't be set. Therefore, we also need to check for metadata_cache.key
-    if (!metadata_cache.use_cache || metadata_cache.key.empty())
-    {
-        auto arrow_file = asArrowFile(in, format_settings, is_stopped, "Parquet", PARQUET_MAGIC_BYTES, /* avoid_buffering */ true);
-        return parquet::ReadMetaData(arrow_file);
-    }
-
-    auto [parquet_file_metadata, loaded] = ParquetFileMetaDataCache::instance()->getOrSet(
-        metadata_cache.key,
-        [&]()
-        {
-            auto arrow_file = asArrowFile(in, format_settings, is_stopped, "Parquet", PARQUET_MAGIC_BYTES, /* avoid_buffering */ true);
-            return parquet::ReadMetaData(arrow_file);
-        }
-    );
-
-    if (loaded)
-        ProfileEvents::increment(ProfileEvents::ParquetMetaDataCacheMisses);
-    else
-        ProfileEvents::increment(ProfileEvents::ParquetMetaDataCacheHits);
-
-    return parquet_file_metadata;
-
-
+    auto arrow_file = asArrowFile(in, format_settings, is_stopped, "Parquet", PARQUET_MAGIC_BYTES, /* avoid_buffering */ true);
+    return parquet::ReadMetaData(arrow_file);
 }
 
 ParquetMetadataInputFormat::ParquetMetadataInputFormat(ReadBuffer & in_, SharedHeader header_, const FormatSettings & format_settings_)
@@ -187,7 +157,7 @@ Chunk ParquetMetadataInputFormat::read()
     if (done)
         return res;
 
-    auto metadata = getFileMetadata(*in, format_settings, is_stopped, metadata_cache);
+    auto metadata = getFileMetadata(*in, format_settings, is_stopped);
 
     const auto & header = getPort().getHeader();
     auto names_and_types = getHeaderForParquetMetadata();
@@ -528,12 +498,6 @@ void ParquetMetadataInputFormat::resetParser()
     done = false;
 }
 
-void ParquetMetadataInputFormat::setStorageRelatedUniqueKey(const Settings & settings, const String & key_)
-{
-    metadata_cache.key = key_;
-    metadata_cache.use_cache = settings[Setting::input_format_parquet_use_metadata_cache];
-}
-
 ParquetMetadataSchemaReader::ParquetMetadataSchemaReader(ReadBuffer & in_)
     : ISchemaReader(in_)
 {
diff --git a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.h b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.h
index 6b667dcc5b1e..81cf7890ee7e 100644
--- a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.h
+++ b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.h
@@ -62,14 +62,6 @@ class ParquetMetadataInputFormat : public IInputFormat
 
     void resetParser() override;
 
-    void setStorageRelatedUniqueKey(const Settings & settings, const String & key_) override;
-
-    struct Cache
-    {
-        String key;
-        bool use_cache = false;
-    };
-
 private:
     Chunk read() override;
 
@@ -86,8 +78,6 @@ class ParquetMetadataInputFormat : public IInputFormat
     const FormatSettings format_settings;
     bool done = false;
     std::atomic<int> is_stopped{0};
-
-    Cache metadata_cache;
 };
 
 class ParquetMetadataSchemaReader : public ISchemaReader
diff --git a/src/Processors/Formats/Impl/ParquetV3BlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetV3BlockInputFormat.cpp
index cea175837710..f0398658408b 100644
--- a/src/Processors/Formats/Impl/ParquetV3BlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetV3BlockInputFormat.cpp
@@ -37,12 +37,16 @@ ParquetV3BlockInputFormat::ParquetV3BlockInputFormat(
     const FormatSettings & format_settings_,
     FormatParserSharedResourcesPtr parser_shared_resources_,
     FormatFilterInfoPtr format_filter_info_,
-    size_t min_bytes_for_seek)
+    size_t min_bytes_for_seek,
+    ParquetMetadataCachePtr metadata_cache_,
+    const std::optional<RelativePathWithMetadata> & object_with_metadata_)
     : IInputFormat(header_, &buf)
     , format_settings(format_settings_)
     , read_options(convertReadOptions(format_settings))
     , parser_shared_resources(parser_shared_resources_)
     , format_filter_info(format_filter_info_)
+    , metadata_cache(metadata_cache_)
+    , object_with_metadata(object_with_metadata_)
 {
     read_options.min_bytes_for_seek = min_bytes_for_seek;
     read_options.bytes_per_read_task = min_bytes_for_seek * 4;
@@ -94,12 +98,29 @@ void ParquetV3BlockInputFormat::initializeIfNeeded()
             std::lock_guard lock(reader_mutex);
             reader.emplace();
             reader->reader.prefetcher.init(in, read_options, parser_shared_resources);
+            reader->reader.file_metadata = getFileMetadata(reader->reader.prefetcher);
             reader->reader.init(read_options, getPort().getHeader(), format_filter_info);
             reader->init(parser_shared_resources, buckets_to_read ? std::optional(buckets_to_read->row_group_ids) : std::nullopt);
         }
     }
 }
 
+parquet::format::FileMetaData ParquetV3BlockInputFormat::getFileMetadata(Parquet::Prefetcher & prefetcher) const
+{
+    if (metadata_cache && object_with_metadata.has_value() && object_with_metadata->metadata.has_value())
+    {
+        String file_name = object_with_metadata->getPath();
+        String etag = object_with_metadata->metadata->etag;
+        ParquetMetadataCacheKey cache_key = ParquetMetadataCache::createKey(file_name, etag);
+        return metadata_cache->getOrSetMetadata(
+            cache_key, [&]() { return Parquet::Reader::readFileMetaData(prefetcher); });
+    }
+    else
+    {
+        return Parquet::Reader::readFileMetaData(prefetcher);
+    }
+}
+
 Chunk ParquetV3BlockInputFormat::read()
 {
     if (need_only_count)
@@ -110,7 +131,8 @@ Chunk ParquetV3BlockInputFormat::read()
         /// Don't init Reader and ReadManager if we only need file metadata.
         Parquet::Prefetcher temp_prefetcher;
         temp_prefetcher.init(in, read_options, parser_shared_resources);
-        auto file_metadata = Parquet::Reader::readFileMetaData(temp_prefetcher);
+        parquet::format::FileMetaData file_metadata = getFileMetadata(temp_prefetcher);
+
 
         auto chunk = getChunkForCount(size_t(file_metadata.num_rows));
         chunk.getChunkInfos().add(std::make_shared<ChunkInfoRowNumbers>(0));
@@ -155,9 +177,9 @@ void ParquetV3BlockInputFormat::resetParser()
     IInputFormat::resetParser();
 }
 
-NativeParquetSchemaReader::NativeParquetSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings)
+NativeParquetSchemaReader::NativeParquetSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
     : ISchemaReader(in_)
-    , read_options(convertReadOptions(format_settings))
+    , read_options(convertReadOptions(format_settings_))
 {
 }
 
diff --git a/src/Processors/Formats/Impl/ParquetV3BlockInputFormat.h b/src/Processors/Formats/Impl/ParquetV3BlockInputFormat.h
index e90559d93d90..dffd7c28605e 100644
--- a/src/Processors/Formats/Impl/ParquetV3BlockInputFormat.h
+++ b/src/Processors/Formats/Impl/ParquetV3BlockInputFormat.h
@@ -6,6 +6,7 @@
 #include <Processors/Formats/IInputFormat.h>
 #include <Processors/Formats/Impl/Parquet/ReadManager.h>
 #include <Processors/Formats/ISchemaReader.h>
+#include <Processors/Formats/Impl/ParquetMetadataCache.h>
 #include <Processors/Formats/Impl/ParquetBlockInputFormat.h>
 
 namespace DB
@@ -20,7 +21,9 @@ class ParquetV3BlockInputFormat : public IInputFormat
         const FormatSettings & format_settings,
         FormatParserSharedResourcesPtr parser_shared_resources_,
         FormatFilterInfoPtr format_filter_info_,
-        size_t min_bytes_for_seek);
+        size_t min_bytes_for_seek,
+        ParquetMetadataCachePtr metadata_cache_ = nullptr,
+        const std::optional<RelativePathWithMetadata> & object_with_metadata_ = std::nullopt);
 
     void resetParser() override;
 
@@ -44,6 +47,8 @@ class ParquetV3BlockInputFormat : public IInputFormat
     Parquet::ReadOptions read_options;
     FormatParserSharedResourcesPtr parser_shared_resources;
     FormatFilterInfoPtr format_filter_info;
+    ParquetMetadataCachePtr metadata_cache;
+    const std::optional<RelativePathWithMetadata> object_with_metadata;
 
     /// (This mutex is not important. It protects `reader.emplace` in a weird case where onCancel()
     ///  may be called in parallel with first read(). ReadManager itself is thread safe for that,
@@ -58,6 +63,8 @@ class ParquetV3BlockInputFormat : public IInputFormat
 
     void initializeIfNeeded();
     std::shared_ptr<ParquetFileBucketInfo> buckets_to_read;
+
+    parquet::format::FileMetaData getFileMetadata(Parquet::Prefetcher & prefetcher) const;
 };
 
 class NativeParquetSchemaReader : public ISchemaReader
@@ -72,7 +79,7 @@ class NativeParquetSchemaReader : public ISchemaReader
     void initializeIfNeeded();
 
     Parquet::ReadOptions read_options;
-    Parquet::parq::FileMetaData file_metadata;
+    parquet::format::FileMetaData file_metadata;
     bool initialized = false;
 };
 
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index d3ff0c5a6906..51bfbd10ef75 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -19,6 +19,7 @@
 #include <Interpreters/ExpressionActions.h>
 #include <Interpreters/convertFieldToType.h>
 #include <Processors/Executors/PullingPipelineExecutor.h>
+#include <Processors/Formats/Impl/ParquetMetadataCache.h>
 #include <Processors/Sources/ConstChunkGenerator.h>
 #include <Processors/Transforms/AddingDefaultsTransform.h>
 #include <Processors/Transforms/ExpressionTransform.h>
@@ -79,6 +80,8 @@ namespace Setting
     extern const SettingsUInt64 s3_path_filter_limit;
     extern const SettingsBool allow_experimental_iceberg_read_optimization;
     extern const SettingsBool use_object_storage_list_objects_cache;
+    extern const SettingsBool use_parquet_metadata_cache;
+    extern const SettingsBool input_format_parquet_use_native_reader_v3;
 }
 
 namespace ErrorCodes
@@ -856,6 +859,8 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
             return std::make_shared<FormatFilterInfo>(format_filter_info->filter_actions_dag, format_filter_info->context.lock(), mapper, nullptr, nullptr);
         }();
 
+        chassert(object_info->getObjectMetadata().has_value());
+
         LOG_DEBUG(
             log,
             "Reading object '{}', size: {} bytes, with format: {}",
@@ -863,8 +868,37 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
             object_info->getObjectMetadata()->size_bytes,
             object_info->getFileFormat().value_or(configuration->getFormat()));
 
-        auto input_format = FormatFactory::instance().getInput(
-            object_info->getFileFormat().value_or(configuration->getFormat()),
+        bool use_native_reader_v3 = format_settings.has_value()
+            ? format_settings->parquet.use_native_reader_v3
+            : context_->getSettingsRef()[Setting::input_format_parquet_use_native_reader_v3];
+
+        InputFormatPtr input_format;
+        if (context_->getSettingsRef()[Setting::use_parquet_metadata_cache] && use_native_reader_v3
+            && (object_info->getFileFormat().value_or(configuration->format) == "Parquet")
+            && !object_info->getObjectMetadata()->etag.empty())
+        {
+            const std::optional<RelativePathWithMetadata> object_with_metadata = object_info->relative_path_with_metadata;
+            input_format = FormatFactory::instance().getInputWithMetadata(
+                object_info->getFileFormat().value_or(configuration->format),
+                *read_buf,
+                initial_header,
+                context_,
+                max_block_size,
+                object_with_metadata,
+                format_settings,
+                parser_shared_resources,
+                filter_info,
+                true /* is_remote_fs */,
+                compression_method,
+                need_only_count,
+                std::nullopt /*min_block_size_bytes*/,
+                std::nullopt /*min_block_size_rows*/,
+                std::nullopt /*max_block_size_bytes*/);
+        }
+        else
+        {
+            input_format = FormatFactory::instance().getInput(
+            object_info->getFileFormat().value_or(configuration->format),
             *read_buf,
             initial_header,
             context_,
@@ -875,6 +909,7 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
             true /* is_remote_fs */,
             compression_method,
             need_only_count);
+        }
 
         input_format->setBucketsToRead(object_info->file_bucket_info);
         input_format->setSerializationHints(read_from_format_info.serialization_hints);
@@ -882,14 +917,6 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
         if (need_only_count)
             input_format->needOnlyCount();
 
-        if (!object_info->getPath().empty())
-        {
-            if (const auto & metadata = object_info->relative_path_with_metadata.metadata)
-            {
-                input_format->setStorageRelatedUniqueKey(context_->getSettingsRef(), object_info->getPath() + ":" + metadata->etag);
-            }
-        }
-
         builder.init(Pipe(input_format));
 
         configuration->addDeleteTransformers(object_info, builder, format_settings, parser_shared_resources, context_);
@@ -1001,9 +1028,14 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBuffer(
                 || object_storage->getType() == ObjectStorageType::S3);
     }
 
-    /// We need object metadata for two cases:
+    bool use_page_cache = !use_distributed_cache && !use_filesystem_cache
+        && effective_read_settings.page_cache && effective_read_settings.use_page_cache_for_object_storage;
+
+
+    /// We need object metadata for a few use cases:
     /// 1. object size suggests whether we need to use prefetch
     /// 2. object etag suggests a cache key in case we use filesystem cache
+    /// 3. object etag as a cache key for parquet metadata caching
     if (!object_info.metadata)
         object_info.metadata = object_storage->getObjectMetadata(object_info.getPath(), /*with_tags=*/ false);
 
diff --git a/src/Storages/System/StorageSystemFormats.cpp b/src/Storages/System/StorageSystemFormats.cpp
index 7be5139b08c7..eb3be6e23427 100644
--- a/src/Storages/System/StorageSystemFormats.cpp
+++ b/src/Storages/System/StorageSystemFormats.cpp
@@ -36,11 +36,11 @@ void StorageSystemFormats::fillData(MutableColumns & res_columns, ContextPtr con
 
         const auto & [name, creators] = pair;
         String format_name = creators.name;
-        bool has_input_format(creators.input_creator != nullptr || creators.random_access_input_creator != nullptr);
+        bool has_input_format(creators.input_creator != nullptr || creators.random_access_input_creator != nullptr || creators.random_access_input_creator_with_metadata != nullptr);
         bool has_output_format(creators.output_creator != nullptr);
         bool supports_parallel_parsing(creators.file_segmentation_engine_creator != nullptr || creators.random_access_input_creator != nullptr);
         String content_type = has_output_format ? FormatFactory::instance().getContentType(format_name, settings) : "";
-        bool supports_random_access = creators.random_access_input_creator != nullptr;
+        bool supports_random_access = creators.random_access_input_creator != nullptr || creators.random_access_input_creator_with_metadata != nullptr;
         bool has_schema_inference = creators.schema_reader_creator != nullptr;
         bool has_external_schema = creators.external_schema_reader_creator != nullptr;
         bool prefers_large_blocks = creators.prefers_large_blocks;
diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py
index 639e70102fcd..fd594b8b4d30 100644
--- a/tests/integration/test_storage_delta/test.py
+++ b/tests/integration/test_storage_delta/test.py
@@ -2336,7 +2336,7 @@ def test_column_pruning(started_cluster):
     query_id = f"query_{TABLE_NAME}_2"
     assert sum == int(
         instance.query(
-            f"SELECT sum(id) FROM {table_function} SETTINGS enable_filesystem_cache=0, max_read_buffer_size_remote_fs=100, remote_read_min_bytes_for_seek=1, input_format_parquet_use_native_reader_v3=1",
+            f"SELECT sum(id) FROM {table_function} SETTINGS enable_filesystem_cache=0, max_read_buffer_size_remote_fs=100, remote_read_min_bytes_for_seek=1, input_format_parquet_use_native_reader_v3=1, use_parquet_metadata_cache=0",
             query_id=query_id,
         )
     )
diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference
index 7c157dc83556..b5dc8c719e8f 100644
--- a/tests/queries/0_stateless/01271_show_privileges.reference
+++ b/tests/queries/0_stateless/01271_show_privileges.reference
@@ -127,6 +127,7 @@ SYSTEM DROP CONNECTIONS CACHE	['SYSTEM CLEAR CONNECTIONS CACHE','SYSTEM DROP CON
 SYSTEM PREWARM MARK CACHE	['SYSTEM PREWARM MARK','PREWARM MARK CACHE','PREWARM MARKS']	GLOBAL	SYSTEM DROP CACHE
 SYSTEM DROP MARK CACHE	['SYSTEM CLEAR MARK CACHE','SYSTEM DROP MARK','DROP MARK CACHE','DROP MARKS']	GLOBAL	SYSTEM DROP CACHE
 SYSTEM DROP ICEBERG METADATA CACHE	['SYSTEM CLEAR ICEBERG_METADATA_CACHE','SYSTEM DROP ICEBERG_METADATA_CACHE']	GLOBAL	SYSTEM DROP CACHE
+SYSTEM DROP PARQUET METADATA CACHE	['SYSTEM DROP PARQUET_METADATA_CACHE']	GLOBAL	SYSTEM DROP CACHE
 SYSTEM PREWARM PRIMARY INDEX CACHE	['SYSTEM PREWARM PRIMARY INDEX','PREWARM PRIMARY INDEX CACHE','PREWARM PRIMARY INDEX']	GLOBAL	SYSTEM DROP CACHE
 SYSTEM DROP PRIMARY INDEX CACHE	['SYSTEM CLEAR PRIMARY INDEX CACHE','SYSTEM DROP PRIMARY INDEX','DROP PRIMARY INDEX CACHE','DROP PRIMARY INDEX']	GLOBAL	SYSTEM DROP CACHE
 SYSTEM DROP UNCOMPRESSED CACHE	['SYSTEM CLEAR UNCOMPRESSED CACHE','SYSTEM DROP UNCOMPRESSED','DROP UNCOMPRESSED CACHE','DROP UNCOMPRESSED']	GLOBAL	SYSTEM DROP CACHE
diff --git a/tests/queries/0_stateless/03707_parquet_metadata_cache.reference b/tests/queries/0_stateless/03707_parquet_metadata_cache.reference
new file mode 100644
index 000000000000..57876557fba1
--- /dev/null
+++ b/tests/queries/0_stateless/03707_parquet_metadata_cache.reference
@@ -0,0 +1,9 @@
+1	test1	1.23
+2	test2	4.5600000000000005
+0	1
+1	TEST1	2.23
+2	TEST2	5.5600000000000005
+1	0
+test1	2.46
+test2	9.120000000000001
+0	1
diff --git a/tests/queries/0_stateless/03707_parquet_metadata_cache.sql b/tests/queries/0_stateless/03707_parquet_metadata_cache.sql
new file mode 100644
index 000000000000..3f95b172a1b3
--- /dev/null
+++ b/tests/queries/0_stateless/03707_parquet_metadata_cache.sql
@@ -0,0 +1,59 @@
+-- Tags: no-fasttest, no-parallel, no-parallel-replicas, no-random-settings
+-- no-fasttest: depends on s3 storage
+-- no-parallel: cache is system-wide and tests can affect each other in unexpected way
+-- no-parallel-replicas: profile events are not available on the second replica
+
+/*
+Because the parquet metadata cache is system-wide, parallel runs of 
+SYSTEM DROP PARQUET METADATA CACHE will lead to non-deterministic results
+*/
+SET log_queries = 1;
+SYSTEM DROP PARQUET METADATA CACHE;
+
+-- Triggers caching of the file
+-- should be a cache miss as we load the cache the first time
+SELECT *
+FROM s3(s3_conn, filename = '03707_cache_test.parquet', format = 'Parquet')
+SETTINGS log_comment = '03707-first-test-query', use_parquet_metadata_cache = 1;
+
+SYSTEM FLUSH LOGS query_log;
+
+SELECT
+    ProfileEvents['ParquetMetadataCacheHits'] AS hits,
+    ProfileEvents['ParquetMetadataCacheMisses'] AS misses
+FROM system.query_log
+WHERE (log_comment = '03707-first-test-query') AND (type = 'QueryFinish') AND (current_database = currentDatabase());
+
+-- Should be a cache hit as we use the same file
+SELECT
+    id,
+    upper(name),
+    value + 1
+FROM s3(s3_conn, filename = '03707_cache_test.parquet', format = 'Parquet')
+SETTINGS log_comment = '03707-second-test-query', use_parquet_metadata_cache = 1;
+
+SYSTEM FLUSH LOGS query_log;
+
+SELECT
+    ProfileEvents['ParquetMetadataCacheHits'] AS hits,
+    ProfileEvents['ParquetMetadataCacheMisses'] AS misses
+FROM system.query_log
+WHERE (log_comment = '03707-second-test-query') AND (type = 'QueryFinish') AND (current_database = currentDatabase());
+
+SYSTEM DROP PARQUET METADATA CACHE;
+
+-- Should be back to a cache miss since we dropped the cache
+SELECT
+    name,
+    value + value
+FROM s3(s3_conn, filename = '03707_cache_test.parquet', format = 'Parquet')
+SETTINGS log_comment = '03707-third-test-query', use_parquet_metadata_cache = 1;
+
+SYSTEM FLUSH LOGS query_log;
+
+SELECT
+    ProfileEvents['ParquetMetadataCacheHits'] AS hits,
+    ProfileEvents['ParquetMetadataCacheMisses'] AS misses
+FROM system.query_log
+WHERE (log_comment = '03707-third-test-query') AND (type = 'QueryFinish') AND (current_database = currentDatabase());
+
diff --git a/tests/queries/0_stateless/03723_parquet_prefetcher_read_big_at.sql b/tests/queries/0_stateless/03723_parquet_prefetcher_read_big_at.sql
index 157faad932c9..c6bb1af5956d 100644
--- a/tests/queries/0_stateless/03723_parquet_prefetcher_read_big_at.sql
+++ b/tests/queries/0_stateless/03723_parquet_prefetcher_read_big_at.sql
@@ -17,7 +17,7 @@ SELECT *
 FROM t_parquet_prefetcher_read_big_at
 ORDER BY a,c
 FORMAT Null
-SETTINGS log_comment = 'test_03723_parquet_prefetcher_read_big_at';
+SETTINGS log_comment = 'test_03723_parquet_prefetcher_read_big_at', use_parquet_metadata_cache = 0;
 -- Ensure that profiling is available for analysis
 SYSTEM FLUSH LOGS query_log;
 -- Check profiling data to visualize what logic has been used
diff --git a/tests/queries/0_stateless/04027_parquet_v3_setting_override_s3.reference b/tests/queries/0_stateless/04027_parquet_v3_setting_override_s3.reference
new file mode 100644
index 000000000000..573541ac9702
--- /dev/null
+++ b/tests/queries/0_stateless/04027_parquet_v3_setting_override_s3.reference
@@ -0,0 +1 @@
+0
diff --git a/tests/queries/0_stateless/04027_parquet_v3_setting_override_s3.sql b/tests/queries/0_stateless/04027_parquet_v3_setting_override_s3.sql
new file mode 100644
index 000000000000..ea89ae7d75f9
--- /dev/null
+++ b/tests/queries/0_stateless/04027_parquet_v3_setting_override_s3.sql
@@ -0,0 +1,20 @@
+-- Tags: no-fasttest
+-- Tag no-fasttest: Depends on S3
+
+-- Test that `input_format_parquet_use_native_reader_v3` setting at SELECT time
+-- overrides the setting specified at CREATE TABLE time.
+
+SET s3_truncate_on_insert = 1;
+
+INSERT INTO FUNCTION s3(s3_conn, url = 'http://localhost:11111/test/04027_parquet_v3_setting_override.parquet', format = Parquet)
+SELECT number AS id FROM numbers(1);
+
+DROP TABLE IF EXISTS t_04027_parquet_v3_override;
+
+CREATE TABLE t_04027_parquet_v3_override (id UInt64)
+ENGINE = S3(s3_conn, url = 'http://localhost:11111/test/04027_parquet_v3_setting_override.parquet', format = Parquet)
+SETTINGS input_format_parquet_use_native_reader_v3 = false;
+
+SELECT * FROM t_04027_parquet_v3_override ORDER BY id LIMIT 5 SETTINGS input_format_parquet_use_native_reader_v3 = true;
+
+DROP TABLE t_04027_parquet_v3_override;
diff --git a/tests/queries/0_stateless/data_minio/03707_cache_test.parquet b/tests/queries/0_stateless/data_minio/03707_cache_test.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..8616977c038db258b4dce5039de21f242d8b9738
GIT binary patch
literal 1195
zcmb_cO=uHA6n?wejWG?F)VyI=7Fom$5hP|eKO_=$i-H9s7Ed_^l5~r~{)kDcc*-G&
zhzAu9Dm@5#($b6dRy=qTg@QfkwTj0gf*>MVe6wqm)b>ztc4z0!oA>j*c^^AAa@Ilx
zqnO5*z2(Jli8cU`e0J!kQgHwz+Q>9%J*<7)i-1Ao_+t6{n_Y(I7sQ}ZQG|)62;*3c
zKEAs~ZULE;_f9`O!&f8&Wjz5=U6Y&R7-0&#F0oe<ukz5w2xW>v(f8vCeEsBkY5>$t
z0F{p0ZmVte1)Gu-t@TG+z*B{>t$B;JCBQq1fe{~|GF_W;J%|q^wkq+t0AG}i%EmZl
z7WaXg=-_Ja^QXb<>jEsCe19dr@a5Ri@Vkc>jto15Gf@m(+8n1A>YHhyzGGn%-If*H
z-C<zZuRxuxO?LbeRwQ;?=4&dysv6r!;q|~V41oRwg&rCaaJpU(NbZ3tR8bj{)UrYI
zE-}ZUb2_A9Iwr{@o)`sF`o|nV5gt*~OCmYL!c=tl8qIwGdQ8@Rj?p4Q5>Y*!-&fu5
zuie-FxkCMK(c;WtRcteICuuH#mw$_ED$YwnMnx}=23w7D?o<tffN}x^VH11G^Se!f
z2_vNO7?OZ8T@PTG6#j(+Q*=WwZHw%DDTGyu!#)gQoV^WW68QGH$nWo@D1wme>ku-w
z9>#;f|0^T-H<5x8+=!sY9uHvw*pC2Hc!({jkUVvKve`I4I@4*n=B#_M)ts3&?Oe*T
zQ<j;mcbb#qX0}#L*NZtTpJ}-Ec;3zz3h7KPZ)ei=iFCH^PFUG`-fmb2^pfD6_rfir
J${78oegWj~?$7`L

literal 0
HcmV?d00001


From ad52c2fa4014a5b8af98d07c00ff857d1cdb675b Mon Sep 17 00:00:00 2001
From: Arthur Passos <arthur.ti@outlook.com>
Date: Tue, 24 Mar 2026 18:55:15 -0300
Subject: [PATCH 02/14] sad to be removing all of this

---
 programs/server/Server.cpp                    |  9 ---
 src/Access/Common/AccessType.h                |  1 -
 src/Common/ProfileEvents.cpp                  |  2 -
 src/Core/FormatFactorySettings.h              |  1 -
 src/Core/ServerSettings.cpp                   |  1 -
 src/Interpreters/InterpreterSystemQuery.cpp   | 15 -----
 src/Parsers/ASTSystemQuery.cpp                |  1 -
 src/Parsers/ASTSystemQuery.h                  |  1 -
 .../Formats/Impl/ParquetBlockInputFormat.cpp  | 30 ---------
 .../Formats/Impl/ParquetBlockInputFormat.h    |  6 --
 .../Formats/Impl/ParquetFileMetaDataCache.cpp | 30 ---------
 .../Formats/Impl/ParquetFileMetaDataCache.h   | 35 -----------
 .../Impl/ParquetMetadataInputFormat.cpp       |  6 --
 .../disable_parquet_metadata_caching.xml      |  7 ---
 tests/integration/test_storage_delta/test.py  |  7 +--
 .../01271_show_privileges.reference           |  1 -
 .../0_stateless/02995_settings_25_11_1.tsv    |  1 -
 ...et_object_storage_metadata_cache.reference |  8 ---
 ..._parquet_object_storage_metadata_cache.sql | 63 -------------------
 19 files changed, 2 insertions(+), 223 deletions(-)
 delete mode 100644 src/Processors/Formats/Impl/ParquetFileMetaDataCache.cpp
 delete mode 100644 src/Processors/Formats/Impl/ParquetFileMetaDataCache.h
 delete mode 100644 tests/integration/test_storage_delta/configs/users.d/disable_parquet_metadata_caching.xml
 delete mode 100644 tests/queries/0_stateless/03299_parquet_object_storage_metadata_cache.reference
 delete mode 100644 tests/queries/0_stateless/03299_parquet_object_storage_metadata_cache.sql

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 7e03b26648c0..d60c7d6d654e 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -164,10 +164,6 @@
 #   include <azure/core/diagnostics/logger.hpp>
 #endif
 
-#if USE_PARQUET
-#   include <Processors/Formats/Impl/ParquetFileMetaDataCache.h>
-#endif
-
 
 /// A minimal file used when the server is run without installation
 constexpr unsigned char resource_embedded_xml[] =
@@ -427,7 +423,6 @@ namespace ServerSetting
     extern const ServerSettingsUInt64 object_storage_list_objects_cache_ttl;
     extern const ServerSettingsUInt64 object_storage_list_objects_cache_size;
     extern const ServerSettingsUInt64 object_storage_list_objects_cache_max_entries;
-    extern const ServerSettingsUInt64 input_format_parquet_metadata_cache_max_size;
 }
 
 namespace ErrorCodes
@@ -2773,10 +2768,6 @@ try
 
     auto replicas_reconnector = ReplicasReconnector::init(global_context);
 
-#if USE_PARQUET
-    ParquetFileMetaDataCache::instance()->setMaxSizeInBytes(server_settings[ServerSetting::input_format_parquet_metadata_cache_max_size]);
-#endif
-
     /// Set current database name before loading tables and databases because
     /// system logs may copy global context.
     std::string default_database = server_settings[ServerSetting::default_database];
diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h
index 35b79cb21b2d..3c4c177bcff5 100644
--- a/src/Access/Common/AccessType.h
+++ b/src/Access/Common/AccessType.h
@@ -317,7 +317,6 @@ enum class AccessType : uint8_t
     M(SYSTEM_PREWARM_MARK_CACHE, "SYSTEM PREWARM MARK, PREWARM MARK CACHE, PREWARM MARKS", GLOBAL, SYSTEM_DROP_CACHE) \
     M(SYSTEM_DROP_MARK_CACHE, "SYSTEM CLEAR MARK CACHE, SYSTEM DROP MARK, DROP MARK CACHE, DROP MARKS", GLOBAL, SYSTEM_DROP_CACHE) \
     M(SYSTEM_DROP_ICEBERG_METADATA_CACHE, "SYSTEM CLEAR ICEBERG_METADATA_CACHE, SYSTEM DROP ICEBERG_METADATA_CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
-    M(SYSTEM_DROP_PARQUET_METADATA_CACHE, "SYSTEM DROP PARQUET_METADATA_CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
     M(SYSTEM_PREWARM_PRIMARY_INDEX_CACHE, "SYSTEM PREWARM PRIMARY INDEX, PREWARM PRIMARY INDEX CACHE, PREWARM PRIMARY INDEX", GLOBAL, SYSTEM_DROP_CACHE) \
     M(SYSTEM_DROP_PRIMARY_INDEX_CACHE, "SYSTEM CLEAR PRIMARY INDEX CACHE, SYSTEM DROP PRIMARY INDEX, DROP PRIMARY INDEX CACHE, DROP PRIMARY INDEX", GLOBAL, SYSTEM_DROP_CACHE) \
     M(SYSTEM_DROP_UNCOMPRESSED_CACHE, "SYSTEM CLEAR UNCOMPRESSED CACHE, SYSTEM DROP UNCOMPRESSED, DROP UNCOMPRESSED CACHE, DROP UNCOMPRESSED", GLOBAL, SYSTEM_DROP_CACHE) \
diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index 65880308cb62..fd667a3f34cf 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -1330,8 +1330,6 @@ The server successfully detected this situation and will download merged part fr
     M(ObjectStorageListObjectsCacheMisses, "Number of times object storage list objects operation miss the cache.", ValueType::Number) \
     M(ObjectStorageListObjectsCacheExactMatchHits, "Number of times object storage list objects operation hit the cache with an exact match.", ValueType::Number) \
     M(ObjectStorageListObjectsCachePrefixMatchHits, "Number of times object storage list objects operation miss the cache using prefix matching.", ValueType::Number) \
-    M(ParquetMetaDataCacheHits, "Number of times the read from filesystem cache hit the cache.", ValueType::Number) \
-    M(ParquetMetaDataCacheMisses, "Number of times the read from filesystem cache miss the cache.", ValueType::Number) \
 
 #ifdef APPLY_FOR_EXTERNAL_EVENTS
     #define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) APPLY_FOR_EXTERNAL_EVENTS(M)
diff --git a/src/Core/FormatFactorySettings.h b/src/Core/FormatFactorySettings.h
index 3ee1da38c3d2..a55c47d6a9b7 100644
--- a/src/Core/FormatFactorySettings.h
+++ b/src/Core/FormatFactorySettings.h
@@ -1527,7 +1527,6 @@ Allow to write information about geo columns in parquet metadata and encode colu
     DECLARE(Bool, into_outfile_create_parent_directories, false, R"(
 Automatically create parent directories when using INTO OUTFILE if they do not already exists.
 )", 0) \
-    DECLARE(Bool, input_format_parquet_use_metadata_cache, true, R"(Enable parquet file metadata caching)", 0) \
 // End of FORMAT_FACTORY_SETTINGS
 
 #define OBSOLETE_FORMAT_SETTINGS(M, ALIAS) \
diff --git a/src/Core/ServerSettings.cpp b/src/Core/ServerSettings.cpp
index da7155a6b19d..baf2f8f36028 100644
--- a/src/Core/ServerSettings.cpp
+++ b/src/Core/ServerSettings.cpp
@@ -1488,7 +1488,6 @@ The policy on how to perform a scheduling of CPU slots specified by `concurrent_
     DECLARE(UInt64, object_storage_list_objects_cache_max_entries, 1000, "Maximum size of ObjectStorage list objects cache in entries. Zero means disabled.", 0)                      \
     DECLARE(UInt64, object_storage_list_objects_cache_ttl, 3600, "Time to live of records in ObjectStorage list objects cache in seconds. Zero means unlimited", 0) \
     DECLARE(Bool, enable_experimental_export_merge_tree_partition_feature, false, "Enable export replicated merge tree partition feature. It is experimental and not yet ready for production use.", 0) \
-    DECLARE(UInt64, input_format_parquet_metadata_cache_max_size, 500000000, "Maximum size of parquet file metadata cache", 0)
 
 /// Settings with a path are server settings with at least one layer of nesting that have a fixed structure (no lists, lists, enumerations, repetitions, ...).
 #define LIST_OF_SERVER_SETTINGS_WITH_PATH(DECLARE, ALIAS) \
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index 86c721125aed..1fc65daf50fb 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -78,10 +78,6 @@
 #include <Formats/ProtobufSchemas.h>
 #endif
 
-#if USE_PARQUET
-#include <Processors/Formats/Impl/ParquetFileMetaDataCache.h>
-#endif
-
 #if USE_AWS_S3
 #include <IO/S3/Client.h>
 #endif
@@ -466,16 +462,6 @@ BlockIO InterpreterSystemQuery::execute()
             getContext()->clearQueryResultCache(query.query_result_cache_tag);
             break;
         }
-        case Type::DROP_PARQUET_METADATA_CACHE:
-        {
-#if USE_PARQUET
-            getContext()->checkAccess(AccessType::SYSTEM_DROP_PARQUET_METADATA_CACHE);
-            ParquetFileMetaDataCache::instance()->clear();
-            break;
-#else
-            throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "The server was compiled without the support for Parquet");
-#endif
-        }
         case Type::CLEAR_COMPILED_EXPRESSION_CACHE:
 #if USE_EMBEDDED_COMPILER
             getContext()->checkAccess(AccessType::SYSTEM_DROP_COMPILED_EXPRESSION_CACHE);
@@ -2040,7 +2026,6 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster()
         case Type::CLEAR_PAGE_CACHE:
         case Type::CLEAR_SCHEMA_CACHE:
         case Type::CLEAR_FORMAT_SCHEMA_CACHE:
-        case Type::DROP_PARQUET_METADATA_CACHE:
         case Type::CLEAR_S3_CLIENT_CACHE:
         case Type::DROP_OBJECT_STORAGE_LIST_OBJECTS_CACHE:
         {
diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp
index 15e723682f53..c0e40ff3766a 100644
--- a/src/Parsers/ASTSystemQuery.cpp
+++ b/src/Parsers/ASTSystemQuery.cpp
@@ -575,7 +575,6 @@ void ASTSystemQuery::formatImpl(WriteBuffer & ostr, const FormatSettings & setti
         case Type::CLEAR_TEXT_INDEX_CACHES:
         case Type::CLEAR_COMPILED_EXPRESSION_CACHE:
         case Type::CLEAR_S3_CLIENT_CACHE:
-        case Type::DROP_PARQUET_METADATA_CACHE:
         case Type::CLEAR_ICEBERG_METADATA_CACHE:
         case Type::DROP_OBJECT_STORAGE_LIST_OBJECTS_CACHE:
         case Type::CLEAR_PARQUET_METADATA_CACHE:
diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h
index 443bc537abc1..640ec5667742 100644
--- a/src/Parsers/ASTSystemQuery.h
+++ b/src/Parsers/ASTSystemQuery.h
@@ -43,7 +43,6 @@ class ASTSystemQuery : public IAST, public ASTQueryWithOnCluster
         CLEAR_QUERY_CONDITION_CACHE,
         CLEAR_QUERY_CACHE,
         CLEAR_COMPILED_EXPRESSION_CACHE,
-        DROP_PARQUET_METADATA_CACHE,
         CLEAR_ICEBERG_METADATA_CACHE,
         CLEAR_PARQUET_METADATA_CACHE,
         CLEAR_FILESYSTEM_CACHE,
diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
index 1a4f3fd70539..d5cf2854483d 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
@@ -51,8 +51,6 @@ namespace ProfileEvents
     extern const Event ParquetFetchWaitTimeMicroseconds;
     extern const Event ParquetReadRowGroups;
     extern const Event ParquetPrunedRowGroups;
-    extern const Event ParquetMetaDataCacheHits;
-    extern const Event ParquetMetaDataCacheMisses;
 }
 
 namespace CurrentMetrics
@@ -559,25 +557,6 @@ static std::vector<Range> getHyperrectangleForRowGroup(const parquet::FileMetaDa
     return hyperrectangle;
 }
 
-std::shared_ptr<parquet::FileMetaData> ParquetBlockInputFormat::readMetadataFromFile()
-{
-    createArrowFileIfNotCreated();
-    return parquet::ReadMetaData(arrow_file);
-}
-
-void ParquetBlockInputFormat::createArrowFileIfNotCreated()
-{
-    if (arrow_file)
-    {
-        return;
-    }
-
-    // Create arrow file adapter.
-    // TODO: Make the adapter do prefetching on IO threads, based on the full set of ranges that
-    //       we'll need to read (which we know in advance). Use max_download_threads for that.
-    arrow_file = asArrowFile(*in, format_settings, is_stopped, "Parquet", PARQUET_MAGIC_BYTES, /* avoid_buffering */ true);
-}
-
 std::unordered_set<std::size_t> getBloomFilterFilteringColumnKeys(const KeyCondition::RPN & rpn)
 {
     std::unordered_set<std::size_t> column_keys;
@@ -840,8 +819,6 @@ void ParquetBlockInputFormat::initializeIfNeeded()
         }
     }
 
-    bool has_row_groups_to_read = false;
-
     auto skip_row_group_based_on_filters = [&](int row_group)
     {
         if (!format_settings.parquet.filter_push_down && !format_settings.parquet.bloom_filter_push_down)
@@ -900,13 +877,6 @@ void ParquetBlockInputFormat::initializeIfNeeded()
         row_group_batches.back().total_bytes_compressed += row_group_size;
         auto rows = adaptive_chunk_size(row_group);
         row_group_batches.back().adaptive_chunk_size = rows ? rows : format_settings.parquet.max_block_size;
-
-        has_row_groups_to_read = true;
-    }
-
-    if (has_row_groups_to_read)
-    {
-        createArrowFileIfNotCreated();
     }
 }
 
diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h
index a971a29e9884..28cf9ce60c80 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h
@@ -94,7 +94,6 @@ class ParquetBlockInputFormat : public IInputFormat
     size_t getApproxBytesReadForChunk() const override { return previous_approx_bytes_read_for_chunk; }
 
     void setBucketsToRead(const FileBucketInfoPtr & buckets_to_read_) override;
-    void setStorageRelatedUniqueKey(const Settings & settings, const String & key_) override;
 
 private:
     Chunk read() override;
@@ -114,11 +113,6 @@ class ParquetBlockInputFormat : public IInputFormat
 
     void threadFunction(size_t row_group_batch_idx);
 
-    void createArrowFileIfNotCreated();
-    std::shared_ptr<parquet::FileMetaData> readMetadataFromFile();
-
-    std::shared_ptr<parquet::FileMetaData> getFileMetaData();
-
     inline bool supportPrefetch() const;
 
     // Data layout in the file:
diff --git a/src/Processors/Formats/Impl/ParquetFileMetaDataCache.cpp b/src/Processors/Formats/Impl/ParquetFileMetaDataCache.cpp
deleted file mode 100644
index 4aeb1d48bada..000000000000
--- a/src/Processors/Formats/Impl/ParquetFileMetaDataCache.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-#include <Processors/Formats/Impl/ParquetFileMetaDataCache.h>
-
-#if USE_PARQUET
-
-#include <parquet/metadata.h>
-
-namespace DB
-{
-
-size_t ParquetFileMetaDataWeightFunction::operator()(const parquet::FileMetaData & metadata) const
-{
-    /// TODO fix-me: using the size on disk is not ideal, but it is the simplest and best we can do for now.
-    /// this implementation is only used by the v1 reader, which is going to be deprecated and a new implementation for the v3
-    /// reader will be added in the future.
-    return metadata.size();
-}
-
-ParquetFileMetaDataCache::ParquetFileMetaDataCache()
-    : CacheBase<String, parquet::FileMetaData, std::hash<String>, ParquetFileMetaDataWeightFunction>(CurrentMetrics::end(), CurrentMetrics::end(), 0)
-{}
-
-ParquetFileMetaDataCache * ParquetFileMetaDataCache::instance()
-{
-    static ParquetFileMetaDataCache instance;
-    return &instance;
-}
-
-}
-
-#endif
diff --git a/src/Processors/Formats/Impl/ParquetFileMetaDataCache.h b/src/Processors/Formats/Impl/ParquetFileMetaDataCache.h
deleted file mode 100644
index 4578f2582391..000000000000
--- a/src/Processors/Formats/Impl/ParquetFileMetaDataCache.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#pragma once
-
-#include "config.h"
-
-#if USE_PARQUET
-
-namespace parquet
-{
-
-class FileMetaData;
-
-}
-
-#include <Common/CacheBase.h>
-
-namespace DB
-{
-
-struct ParquetFileMetaDataWeightFunction
-{
-    size_t operator()(const parquet::FileMetaData & metadata) const;
-};
-
-class ParquetFileMetaDataCache : public CacheBase<String, parquet::FileMetaData, std::hash<String>, ParquetFileMetaDataWeightFunction>
-{
-public:
-    static ParquetFileMetaDataCache * instance();
-
-private:
-    ParquetFileMetaDataCache();
-};
-
-}
-
-#endif
diff --git a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp
index 359cb3585d8c..2403b82b3f94 100644
--- a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp
@@ -27,12 +27,6 @@
 #include <Processors/Formats/Impl/ParquetFileMetaDataCache.h>
 
 
-namespace ProfileEvents
-{
-extern const Event ParquetMetaDataCacheHits;
-extern const Event ParquetMetaDataCacheMisses;
-}
-
 namespace DB
 {
 
diff --git a/tests/integration/test_storage_delta/configs/users.d/disable_parquet_metadata_caching.xml b/tests/integration/test_storage_delta/configs/users.d/disable_parquet_metadata_caching.xml
deleted file mode 100644
index bc34464e30da..000000000000
--- a/tests/integration/test_storage_delta/configs/users.d/disable_parquet_metadata_caching.xml
+++ /dev/null
@@ -1,7 +0,0 @@
-<clickhouse>
-    <profiles>
-        <default>
-            <input_format_parquet_use_metadata_cache>0</input_format_parquet_use_metadata_cache>
-        </default>
-    </profiles>
-</clickhouse>
diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py
index fd594b8b4d30..fe59f3c5c96d 100644
--- a/tests/integration/test_storage_delta/test.py
+++ b/tests/integration/test_storage_delta/test.py
@@ -110,7 +110,6 @@ def started_cluster():
             user_configs=[
                 "configs/users.d/users.xml",
                 "configs/users.d/enable_writes.xml",
-                "configs/users.d/disable_parquet_metadata_caching.xml",
             ],
             env_variables={
                 "RUST_BACKTRACE": "1",
@@ -132,7 +131,6 @@ def started_cluster():
             user_configs=[
                 "configs/users.d/users.xml",
                 "configs/users.d/enable_writes.xml",
-                "configs/users.d/disable_parquet_metadata_caching.xml",
             ],
             with_minio=True,
             stay_alive=True,
@@ -183,7 +181,6 @@ def started_cluster():
             user_configs=[
                 "configs/users.d/users.xml",
                 "configs/users.d/disabled_delta_kernel.xml",
-                "configs/users.d/disable_parquet_metadata_caching.xml",
             ],
             with_minio=True,
             with_azurite=True,
@@ -1386,7 +1383,7 @@ def test_session_token(started_cluster):
     parquet_data_path = create_initial_data_file(
         started_cluster,
         instance,
-        "SELECT toUInt64(number), toString(number) FROM numbers(100) SETTINGS input_format_parquet_use_metadata_cache=0",
+        "SELECT toUInt64(number), toString(number) FROM numbers(100)",
         TABLE_NAME,
         node_name=node_name,
     )
@@ -1399,7 +1396,7 @@ def test_session_token(started_cluster):
             f"""
     SELECT count() FROM deltaLake(
         'http://{started_cluster.minio_host}:{started_cluster.minio_port}/{started_cluster.minio_bucket}/{TABLE_NAME}/',
-        SETTINGS allow_experimental_delta_kernel_rs=1, input_format_parquet_use_metadata_cache=0)
+        SETTINGS allow_experimental_delta_kernel_rs=1)
     """
         )
     )
diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference
index b5dc8c719e8f..7c157dc83556 100644
--- a/tests/queries/0_stateless/01271_show_privileges.reference
+++ b/tests/queries/0_stateless/01271_show_privileges.reference
@@ -127,7 +127,6 @@ SYSTEM DROP CONNECTIONS CACHE	['SYSTEM CLEAR CONNECTIONS CACHE','SYSTEM DROP CON
 SYSTEM PREWARM MARK CACHE	['SYSTEM PREWARM MARK','PREWARM MARK CACHE','PREWARM MARKS']	GLOBAL	SYSTEM DROP CACHE
 SYSTEM DROP MARK CACHE	['SYSTEM CLEAR MARK CACHE','SYSTEM DROP MARK','DROP MARK CACHE','DROP MARKS']	GLOBAL	SYSTEM DROP CACHE
 SYSTEM DROP ICEBERG METADATA CACHE	['SYSTEM CLEAR ICEBERG_METADATA_CACHE','SYSTEM DROP ICEBERG_METADATA_CACHE']	GLOBAL	SYSTEM DROP CACHE
-SYSTEM DROP PARQUET METADATA CACHE	['SYSTEM DROP PARQUET_METADATA_CACHE']	GLOBAL	SYSTEM DROP CACHE
 SYSTEM PREWARM PRIMARY INDEX CACHE	['SYSTEM PREWARM PRIMARY INDEX','PREWARM PRIMARY INDEX CACHE','PREWARM PRIMARY INDEX']	GLOBAL	SYSTEM DROP CACHE
 SYSTEM DROP PRIMARY INDEX CACHE	['SYSTEM CLEAR PRIMARY INDEX CACHE','SYSTEM DROP PRIMARY INDEX','DROP PRIMARY INDEX CACHE','DROP PRIMARY INDEX']	GLOBAL	SYSTEM DROP CACHE
 SYSTEM DROP UNCOMPRESSED CACHE	['SYSTEM CLEAR UNCOMPRESSED CACHE','SYSTEM DROP UNCOMPRESSED','DROP UNCOMPRESSED CACHE','DROP UNCOMPRESSED']	GLOBAL	SYSTEM DROP CACHE
diff --git a/tests/queries/0_stateless/02995_settings_25_11_1.tsv b/tests/queries/0_stateless/02995_settings_25_11_1.tsv
index 0969de0f86de..1fc5f4b2f8b1 100644
--- a/tests/queries/0_stateless/02995_settings_25_11_1.tsv
+++ b/tests/queries/0_stateless/02995_settings_25_11_1.tsv
@@ -646,7 +646,6 @@ input_format_parquet_case_insensitive_column_matching	0
 input_format_parquet_enable_json_parsing	1
 input_format_parquet_enable_row_group_prefetch	1
 input_format_parquet_filter_push_down	1
-input_format_parquet_use_metadata_cache	1
 input_format_parquet_import_nested	0
 input_format_parquet_local_file_min_bytes_for_seek	8192
 input_format_parquet_local_time_as_utc	1
diff --git a/tests/queries/0_stateless/03299_parquet_object_storage_metadata_cache.reference b/tests/queries/0_stateless/03299_parquet_object_storage_metadata_cache.reference
deleted file mode 100644
index c87ad9008b60..000000000000
--- a/tests/queries/0_stateless/03299_parquet_object_storage_metadata_cache.reference
+++ /dev/null
@@ -1,8 +0,0 @@
-10
-10
-10
-10
-10
-10
-0
-10
diff --git a/tests/queries/0_stateless/03299_parquet_object_storage_metadata_cache.sql b/tests/queries/0_stateless/03299_parquet_object_storage_metadata_cache.sql
deleted file mode 100644
index c82b3f4dc0e3..000000000000
--- a/tests/queries/0_stateless/03299_parquet_object_storage_metadata_cache.sql
+++ /dev/null
@@ -1,63 +0,0 @@
--- Tags: no-parallel, no-fasttest, no-parallel-replicas
-
-SET input_format_parquet_use_native_reader_v3=0;
-
-DROP TABLE IF EXISTS t_parquet_03262;
-
-CREATE TABLE t_parquet_03262 (a UInt64)
-ENGINE = S3(s3_conn, filename = 'test_03262_{_partition_id}', format = Parquet)
-PARTITION BY a;
-
-INSERT INTO t_parquet_03262 SELECT number FROM numbers(10) SETTINGS s3_truncate_on_insert=1;
-
-SELECT COUNT(*)
-FROM s3(s3_conn, filename = 'test_03262_*', format = Parquet)
-SETTINGS input_format_parquet_use_metadata_cache=1, use_query_condition_cache=0,optimize_count_from_files=0;
-
-SELECT COUNT(*)
-FROM s3(s3_conn, filename = 'test_03262_*', format = Parquet)
-SETTINGS input_format_parquet_use_metadata_cache=1, use_query_condition_cache=0, optimize_count_from_files=0, log_comment='test_03262_parquet_metadata_cache';
-
-SELECT COUNT(*)
-FROM s3(s3_conn, filename = 'test_03262_*', format = ParquetMetadata)
-SETTINGS input_format_parquet_use_metadata_cache=1, use_query_condition_cache=0, optimize_count_from_files=0, log_comment='test_03262_parquet_metadata_format_metadata_cache';
-
-SYSTEM FLUSH LOGS;
-
-SELECT ProfileEvents['ParquetMetaDataCacheHits']
-FROM system.query_log
-where log_comment = 'test_03262_parquet_metadata_cache'
-AND type = 'QueryFinish'
-ORDER BY event_time desc
-LIMIT 1 SETTINGS use_query_condition_cache=0;
-
-SELECT ProfileEvents['ParquetMetaDataCacheHits']
-FROM system.query_log
-where log_comment = 'test_03262_parquet_metadata_format_metadata_cache'
-AND type = 'QueryFinish'
-ORDER BY event_time desc
-LIMIT 1 SETTINGS use_query_condition_cache=0;
-
-SYSTEM DROP PARQUET METADATA CACHE;
-
-SELECT COUNT(*)
-FROM s3(s3_conn, filename = 'test_03262_*', format = Parquet)
-SETTINGS input_format_parquet_use_metadata_cache=1, use_query_condition_cache=0, optimize_count_from_files=0, log_comment='test_03262_parquet_metadata_cache_cache_empty';
-
-SYSTEM FLUSH LOGS;
-
-SELECT ProfileEvents['ParquetMetaDataCacheHits']
-FROM system.query_log
-where log_comment = 'test_03262_parquet_metadata_cache_cache_empty'
-AND type = 'QueryFinish'
-ORDER BY event_time desc
-LIMIT 1 SETTINGS use_query_condition_cache=0;
-
-SELECT ProfileEvents['ParquetMetaDataCacheMisses']
-FROM system.query_log
-where log_comment = 'test_03262_parquet_metadata_cache_cache_empty'
-AND type = 'QueryFinish'
-ORDER BY event_time desc
-LIMIT 1 SETTINGS use_query_condition_cache=0;
-
-DROP TABLE t_parquet_03262;

From b96233c87afa3fd78f7816a3c118fa78a3fa27f8 Mon Sep 17 00:00:00 2001
From: Arthur Passos <arthur.ti@outlook.com>
Date: Wed, 25 Mar 2026 08:47:39 -0300
Subject: [PATCH 03/14] progress

---
 .../ObjectStorage/StorageObjectStorageSource.cpp       | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index 51bfbd10ef75..d90ea0030aac 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -874,12 +874,12 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
 
         InputFormatPtr input_format;
         if (context_->getSettingsRef()[Setting::use_parquet_metadata_cache] && use_native_reader_v3
-            && (object_info->getFileFormat().value_or(configuration->format) == "Parquet")
+            && (object_info->getFileFormat().value_or(configuration->getFormat()) == "Parquet")
             && !object_info->getObjectMetadata()->etag.empty())
         {
             const std::optional<RelativePathWithMetadata> object_with_metadata = object_info->relative_path_with_metadata;
             input_format = FormatFactory::instance().getInputWithMetadata(
-                object_info->getFileFormat().value_or(configuration->format),
+                object_info->getFileFormat().value_or(configuration->getFormat()),
                 *read_buf,
                 initial_header,
                 context_,
@@ -898,7 +898,7 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
         else
         {
             input_format = FormatFactory::instance().getInput(
-            object_info->getFileFormat().value_or(configuration->format),
+            object_info->getFileFormat().value_or(configuration->getFormat()),
             *read_buf,
             initial_header,
             context_,
@@ -1028,10 +1028,6 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBuffer(
                 || object_storage->getType() == ObjectStorageType::S3);
     }
 
-    bool use_page_cache = !use_distributed_cache && !use_filesystem_cache
-        && effective_read_settings.page_cache && effective_read_settings.use_page_cache_for_object_storage;
-
-
     /// We need object metadata for a few use cases:
     /// 1. object size suggests whether we need to use prefetch
     /// 2. object etag suggests a cache key in case we use filesystem cache

From e49d465a71aab8548e4bbc8c2f1cdb9587332bff Mon Sep 17 00:00:00 2001
From: Arthur Passos <arthur.ti@outlook.com>
Date: Wed, 25 Mar 2026 09:11:06 -0300
Subject: [PATCH 04/14] not required

---
 src/Interpreters/Context.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index be97518806e3..f6d49e8c887a 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -44,7 +44,6 @@
 #include <Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadataFilesCache.h>
 #include <Storages/ObjectStorageQueue/ObjectStorageQueueMetadataFactory.h>
 #include <Processors/Formats/Impl/ParquetMetadataCache.h>
-#include <Storages/StreamingStorageRegistry.h>
 #include <Storages/MergeTree/VectorSimilarityIndexCache.h>
 #include <Storages/Distributed/DistributedSettings.h>
 #include <Storages/CompressionCodecSelector.h>

From c4c89e88f486fae133f76cf2e43d2b9a3b1433aa Mon Sep 17 00:00:00 2001
From: Arthur Passos <arthur.ti@outlook.com>
Date: Wed, 25 Mar 2026 09:26:48 -0300
Subject: [PATCH 05/14] a

---
 src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp
index 2403b82b3f94..66c11d637e69 100644
--- a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp
@@ -24,7 +24,6 @@
 #include <DataTypes/NestedUtils.h>
 #include <Core/Settings.h>
 #include <Common/ProfileEvents.h>
-#include <Processors/Formats/Impl/ParquetFileMetaDataCache.h>
 
 
 namespace DB

From a2d060d45e1d7f78a024db0f838aee8d56f544cc Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@hotmail.com>
Date: Wed, 11 Feb 2026 20:10:51 +0000
Subject: [PATCH 06/14] Merge pull request #96545 from
 vitlibar/update-arrow-to-23.0.0

Update arrow to version 23.0.0
---
 contrib/arrow                                 |  2 +-
 contrib/arrow-cmake/CMakeLists.txt            | 69 ++++++++++++++-----
 .../arrow-cmake/cpp/src/arrow/util/config.h   | 29 +++++---
 .../cpp/src/arrow/util/config_internal.h      | 24 +++++++
 contrib/arrow-cmake/flight.cmake              | 11 ++-
 contrib/flatbuffers                           |  2 +-
 .../Formats/Impl/Parquet/Decoding.cpp         |  2 +-
 src/Processors/Formats/Impl/Parquet/Write.cpp | 10 +--
 .../Formats/Impl/ParquetBlockOutputFormat.cpp |  6 +-
 src/Server/ArrowFlightHandler.cpp             |  4 +-
 src/Storages/Hive/HiveFile.cpp                |  4 +-
 .../DataLakes/DeltaLakeMetadata.cpp           | 10 ++-
 .../02735_parquet_encoder.reference           |  2 +-
 13 files changed, 120 insertions(+), 55 deletions(-)
 create mode 100644 contrib/arrow-cmake/cpp/src/arrow/util/config_internal.h

diff --git a/contrib/arrow b/contrib/arrow
index 68dd24ec070e..82192eef72dc 160000
--- a/contrib/arrow
+++ b/contrib/arrow
@@ -1 +1 @@
-Subproject commit 68dd24ec070e18633a1b78112a463cfa333c6ada
+Subproject commit 82192eef72dc8f1288fc4a87ee1337d24d627622
diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt
index d578b8aa29e2..ac38dd56402e 100644
--- a/contrib/arrow-cmake/CMakeLists.txt
+++ b/contrib/arrow-cmake/CMakeLists.txt
@@ -24,17 +24,25 @@ if (NOT ENABLE_PARQUET)
     return()
 endif()
 
-# Freebsd: ../contrib/arrow/cpp/src/arrow/util/bit-util.h:27:10: fatal error: endian.h: No such file or directory
-if (OS_FREEBSD)
-    message (FATAL_ERROR "Using internal parquet library on FreeBSD is not supported")
+# Support C11
+if(NOT DEFINED CMAKE_C_STANDARD)
+  set(CMAKE_C_STANDARD 11)
+endif()
+
+# This ensures that a standard higher than the minimum can be passed correctly
+if(NOT DEFINED CMAKE_CXX_STANDARD)
+  set(CMAKE_CXX_STANDARD 20)
+elseif(${CMAKE_CXX_STANDARD} VERSION_LESS 20)
+  message(FATAL_ERROR "Cannot set a CMAKE_CXX_STANDARD smaller than 20")
 endif()
 
-set (CMAKE_CXX_STANDARD 17)
+# We require a C++20 compliant compiler
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
-set(ARROW_VERSION "11.0.0")
+set(ARROW_VERSION "23.0.0")
 string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}")
 
-set(ARROW_VERSION_MAJOR "11")
+set(ARROW_VERSION_MAJOR "23")
 set(ARROW_VERSION_MINOR "0")
 set(ARROW_VERSION_PATCH "0")
 
@@ -221,8 +229,8 @@ target_include_directories(_orc SYSTEM PRIVATE
 set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/arrow")
 
 set(ARROW_GENERATED_SRC_DIR ${CMAKE_CURRENT_BINARY_DIR}/cpp/src)
-configure_file(${LIBRARY_DIR}/util/config.h.cmake ${ARROW_GENERATED_SRC_DIR}/arrow/util/config.h)
-
+configure_file("${LIBRARY_DIR}/util/config.h.cmake" "${ARROW_GENERATED_SRC_DIR}/arrow/util/config.h" ESCAPE_QUOTES)
+configure_file("${LIBRARY_DIR}/util/config_internal.h.cmake" "${ARROW_GENERATED_SRC_DIR}/arrow/util/config_internal.h" ESCAPE_QUOTES)
 
 # arrow/cpp/src/arrow/CMakeLists.txt (ARROW_SRCS + ARROW_COMPUTE + ARROW_IPC)
 # find . \( -iname \*.cc -o -iname \*.cpp -o -iname \*.c \) | sort | awk '{print "\"${LIBRARY_DIR}" substr($1,2) "\"" }' | grep -v 'test.cc' | grep -v 'json' | grep -v 'flight' \|
@@ -250,6 +258,7 @@ set(ARROW_SRCS
         "${LIBRARY_DIR}/array/concatenate.cc"
         "${LIBRARY_DIR}/array/data.cc"
         "${LIBRARY_DIR}/array/diff.cc"
+        "${LIBRARY_DIR}/array/statistics.cc"
         "${LIBRARY_DIR}/array/util.cc"
         "${LIBRARY_DIR}/array/validate.cc"
         "${LIBRARY_DIR}/buffer.cc"
@@ -276,7 +285,6 @@ set(ARROW_SRCS
         "${LIBRARY_DIR}/compute/kernels/codegen_internal.cc"
         "${LIBRARY_DIR}/compute/kernels/hash_aggregate.cc"
         "${LIBRARY_DIR}/compute/kernels/ree_util_internal.cc"
-        "${LIBRARY_DIR}/compute/kernels/row_encoder.cc"
         "${LIBRARY_DIR}/compute/kernels/scalar_arithmetic.cc"
         "${LIBRARY_DIR}/compute/kernels/scalar_boolean.cc"
         "${LIBRARY_DIR}/compute/kernels/scalar_cast_boolean.cc"
@@ -298,6 +306,7 @@ set(ARROW_SRCS
         "${LIBRARY_DIR}/compute/kernels/scalar_temporal_binary.cc"
         "${LIBRARY_DIR}/compute/kernels/scalar_temporal_unary.cc"
         "${LIBRARY_DIR}/compute/kernels/scalar_validity.cc"
+        "${LIBRARY_DIR}/compute/kernels/temporal_internal.cc"
         "${LIBRARY_DIR}/compute/kernels/util_internal.cc"
         "${LIBRARY_DIR}/compute/kernels/vector_array_sort.cc"
         "${LIBRARY_DIR}/compute/kernels/vector_cumulative_ops.cc"
@@ -313,6 +322,7 @@ set(ARROW_SRCS
         "${LIBRARY_DIR}/compute/kernels/vector_selection_internal.cc"
         "${LIBRARY_DIR}/compute/kernels/vector_selection_take_internal.cc"
         "${LIBRARY_DIR}/compute/kernels/vector_sort.cc"
+        "${LIBRARY_DIR}/compute/kernels/vector_swizzle.cc"
         "${LIBRARY_DIR}/compute/key_hash_internal.cc"
         "${LIBRARY_DIR}/compute/key_map_internal.cc"
         "${LIBRARY_DIR}/compute/light_array_internal.cc"
@@ -326,7 +336,11 @@ set(ARROW_SRCS
         "${LIBRARY_DIR}/config.cc"
         "${LIBRARY_DIR}/datum.cc"
         "${LIBRARY_DIR}/device.cc"
+        "${LIBRARY_DIR}/device_allocation_type_set.cc"
         "${LIBRARY_DIR}/extension_type.cc"
+        "${LIBRARY_DIR}/extension/bool8.cc"
+        "${LIBRARY_DIR}/extension/json.cc"
+        "${LIBRARY_DIR}/extension/uuid.cc"
         "${LIBRARY_DIR}/integration/c_data_integration_internal.cc"
         "${LIBRARY_DIR}/io/buffered.cc"
         "${LIBRARY_DIR}/io/caching.cc"
@@ -374,7 +388,10 @@ set(ARROW_SRCS
         "${LIBRARY_DIR}/util/bitmap_builders.cc"
         "${LIBRARY_DIR}/util/bitmap_ops.cc"
         "${LIBRARY_DIR}/util/bpacking.cc"
+        "${LIBRARY_DIR}/util/bpacking_scalar.cc"
+        "${LIBRARY_DIR}/util/bpacking_simd_default.cc"
         "${LIBRARY_DIR}/util/byte_size.cc"
+        "${LIBRARY_DIR}/util/byte_stream_split_internal.cc"
         "${LIBRARY_DIR}/util/cancel.cc"
         "${LIBRARY_DIR}/util/compression.cc"
         "${LIBRARY_DIR}/util/counting_semaphore.cc"
@@ -384,20 +401,25 @@ set(ARROW_SRCS
         "${LIBRARY_DIR}/util/decimal.cc"
         "${LIBRARY_DIR}/util/delimiting.cc"
         "${LIBRARY_DIR}/util/dict_util.cc"
+        "${LIBRARY_DIR}/util/fixed_width_internal.cc"
         "${LIBRARY_DIR}/util/float16.cc"
         "${LIBRARY_DIR}/util/formatting.cc"
         "${LIBRARY_DIR}/util/future.cc"
+        "${LIBRARY_DIR}/util/fuzz_internal.cc"
         "${LIBRARY_DIR}/util/hashing.cc"
         "${LIBRARY_DIR}/util/int_util.cc"
         "${LIBRARY_DIR}/util/io_util.cc"
         "${LIBRARY_DIR}/util/key_value_metadata.cc"
         "${LIBRARY_DIR}/util/list_util.cc"
+        "${LIBRARY_DIR}/util/logger.cc"
         "${LIBRARY_DIR}/util/logging.cc"
+        "${LIBRARY_DIR}/util/math_internal.cc"
         "${LIBRARY_DIR}/util/memory.cc"
         "${LIBRARY_DIR}/util/mutex.cc"
         "${LIBRARY_DIR}/util/ree_util.cc"
+        "${LIBRARY_DIR}/util/secure_string.cc"
         "${LIBRARY_DIR}/util/string.cc"
-        "${LIBRARY_DIR}/util/string_builder.cc"
+        "${LIBRARY_DIR}/util/string_util.cc"
         "${LIBRARY_DIR}/util/task_group.cc"
         "${LIBRARY_DIR}/util/tdigest.cc"
         "${LIBRARY_DIR}/util/thread_pool.cc"
@@ -410,7 +432,7 @@ set(ARROW_SRCS
         "${LIBRARY_DIR}/util/utf8.cc"
         "${LIBRARY_DIR}/util/value_parsing.cc"
         "${LIBRARY_DIR}/vendored/base64.cpp"
-        "${LIBRARY_DIR}/vendored/datetime/tz.cpp"
+        "${LIBRARY_DIR}/vendored/datetime.cpp"
         "${LIBRARY_DIR}/vendored/double-conversion/bignum-dtoa.cc"
         "${LIBRARY_DIR}/vendored/double-conversion/bignum.cc"
         "${LIBRARY_DIR}/vendored/double-conversion/cached-powers.cc"
@@ -442,6 +464,12 @@ set(ARROW_SRCS
         "${ARROW_SRC_DIR}/arrow/adapters/orc/options.cc"
         )
 
+add_definitions(-DARROW_WITH_BROTLI)
+SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_brotli.cc" ${ARROW_SRCS})
+
+add_definitions(-DARROW_WITH_BZ2)
+SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_bz2.cc" ${ARROW_SRCS})
+
 add_definitions(-DARROW_WITH_LZ4)
 SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_lz4.cc" ${ARROW_SRCS})
 
@@ -454,9 +482,6 @@ SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zlib.cc" ${ARROW_SRCS})
 add_definitions(-DARROW_WITH_ZSTD)
 SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zstd.cc" ${ARROW_SRCS})
 
-add_definitions(-DARROW_WITH_BROTLI)
-SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_brotli.cc" ${ARROW_SRCS})
-
 
 add_library(_arrow ${ARROW_SRCS})
 add_library(ch_contrib::arrow ALIAS _arrow)
@@ -473,6 +498,8 @@ target_link_libraries(_arrow PRIVATE
     ch_contrib::zlib
     ch_contrib::zstd
     ch_contrib::brotli
+    ch_contrib::bzip2
+    ch_contrib::curl
 )
 target_link_libraries(_arrow PUBLIC _orc)
 
@@ -493,19 +520,23 @@ set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/parquet")
 set(GEN_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/generated")
 # arrow/cpp/src/parquet/CMakeLists.txt
 set(PARQUET_SRCS
+        "${LIBRARY_DIR}/arrow/fuzz_internal.cc"
         "${LIBRARY_DIR}/arrow/path_internal.cc"
         "${LIBRARY_DIR}/arrow/reader.cc"
         "${LIBRARY_DIR}/arrow/reader_internal.cc"
         "${LIBRARY_DIR}/arrow/schema.cc"
         "${LIBRARY_DIR}/arrow/schema_internal.cc"
+        "${LIBRARY_DIR}/arrow/variant_internal.cc"
         "${LIBRARY_DIR}/arrow/writer.cc"
         "${LIBRARY_DIR}/benchmark_util.cc"
         "${LIBRARY_DIR}/bloom_filter.cc"
         "${LIBRARY_DIR}/bloom_filter_reader.cc"
+        "${LIBRARY_DIR}/chunker_internal.cc"
         "${LIBRARY_DIR}/column_reader.cc"
         "${LIBRARY_DIR}/column_scanner.cc"
         "${LIBRARY_DIR}/column_writer.cc"
-        "${LIBRARY_DIR}/encoding.cc"
+        "${LIBRARY_DIR}/decoder.cc"
+        "${LIBRARY_DIR}/encoder.cc"
         "${LIBRARY_DIR}/encryption/crypto_factory.cc"
         "${LIBRARY_DIR}/encryption/encryption.cc"
         "${LIBRARY_DIR}/encryption/encryption_internal.cc"
@@ -525,6 +556,9 @@ set(PARQUET_SRCS
         "${LIBRARY_DIR}/exception.cc"
         "${LIBRARY_DIR}/file_reader.cc"
         "${LIBRARY_DIR}/file_writer.cc"
+        "${LIBRARY_DIR}/geospatial/statistics.cc"
+        "${LIBRARY_DIR}/geospatial/util_internal.cc"
+        "${LIBRARY_DIR}/geospatial/util_json_internal.cc"
         "${LIBRARY_DIR}/level_comparison.cc"
         "${LIBRARY_DIR}/level_comparison_avx2.cc"
         "${LIBRARY_DIR}/level_conversion.cc"
@@ -535,13 +569,13 @@ set(PARQUET_SRCS
         "${LIBRARY_DIR}/printer.cc"
         "${LIBRARY_DIR}/properties.cc"
         "${LIBRARY_DIR}/schema.cc"
+        "${LIBRARY_DIR}/size_statistics.cc"
         "${LIBRARY_DIR}/statistics.cc"
         "${LIBRARY_DIR}/stream_reader.cc"
         "${LIBRARY_DIR}/stream_writer.cc"
         "${LIBRARY_DIR}/types.cc"
         "${LIBRARY_DIR}/xxhasher.cc"
 
-        "${GEN_LIBRARY_DIR}/parquet_constants.cpp"
         "${GEN_LIBRARY_DIR}/parquet_types.cpp"
         )
 #list(TRANSFORM PARQUET_SRCS PREPEND "${LIBRARY_DIR}/") # cmake 3.12
@@ -558,7 +592,8 @@ target_link_libraries(_parquet
     PRIVATE
         boost::headers_only
         boost::regex
-        OpenSSL::Crypto OpenSSL::SSL)
+        OpenSSL::Crypto OpenSSL::SSL
+        ch_contrib::rapidjson)
 
 if (SANITIZE STREQUAL "undefined")
     target_compile_options(_parquet PRIVATE -fno-sanitize=undefined)
diff --git a/contrib/arrow-cmake/cpp/src/arrow/util/config.h b/contrib/arrow-cmake/cpp/src/arrow/util/config.h
index cacff7b16cb5..5fa5782f364f 100644
--- a/contrib/arrow-cmake/cpp/src/arrow/util/config.h
+++ b/contrib/arrow-cmake/cpp/src/arrow/util/config.h
@@ -15,25 +15,21 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#define ARROW_VERSION_MAJOR 11
+#define ARROW_VERSION_MAJOR 23
 #define ARROW_VERSION_MINOR 0
 #define ARROW_VERSION_PATCH 0
 #define ARROW_VERSION ((ARROW_VERSION_MAJOR * 1000) + ARROW_VERSION_MINOR) * 1000 + ARROW_VERSION_PATCH
 
-#define ARROW_VERSION_STRING "11.0.0"
+#define ARROW_VERSION_STRING "23.0.0"
 
-#define ARROW_SO_VERSION "1100"
-#define ARROW_FULL_SO_VERSION "1100.0.0"
+#define ARROW_SO_VERSION "2300"
+#define ARROW_FULL_SO_VERSION "2300.0.0"
 
 #define ARROW_CXX_COMPILER_ID "Clang"
 #define ARROW_CXX_COMPILER_VERSION "ClickHouse"
-#define ARROW_CXX_COMPILER_FLAGS ""
 
 #define ARROW_BUILD_TYPE ""
 
-#define ARROW_GIT_ID ""
-#define ARROW_GIT_DESCRIPTION ""
-
 #define ARROW_PACKAGE_KIND ""
 
 /* #undef ARROW_COMPUTE */
@@ -47,15 +43,26 @@
 /* #undef ARROW_JEMALLOC */
 /* #undef ARROW_JEMALLOC_VENDORED */
 /* #undef ARROW_JSON */
+/* #undef ARROW_MIMALLOC */
 /* #undef ARROW_ORC */
 /* #undef ARROW_PARQUET */
 /* #undef ARROW_SUBSTRAIT */
 
+/* #undef ARROW_AZURE */
+/* #undef ARROW_ENABLE_THREADING */
 /* #undef ARROW_GCS */
+/* #undef ARROW_HDFS */
 /* #undef ARROW_S3 */
+/* #undef ARROW_USE_GLOG */
 /* #undef ARROW_USE_NATIVE_INT128 */
+/* #undef ARROW_WITH_BROTLI */
+/* #undef ARROW_WITH_BZ2 */
+/* #undef ARROW_WITH_LZ4 */
 /* #undef ARROW_WITH_MUSL */
 /* #undef ARROW_WITH_OPENTELEMETRY */
-/* #undef ARROW_WITH_UCX */
-
-/* #undef GRPCPP_PP_INCLUDE */
+/* #undef ARROW_WITH_RE2 */
+/* #undef ARROW_WITH_SNAPPY */
+/* #undef ARROW_WITH_UTF8PROC */
+/* #undef ARROW_WITH_ZLIB */
+/* #undef ARROW_WITH_ZSTD */
+/* #undef PARQUET_REQUIRE_ENCRYPTION */
diff --git a/contrib/arrow-cmake/cpp/src/arrow/util/config_internal.h b/contrib/arrow-cmake/cpp/src/arrow/util/config_internal.h
new file mode 100644
index 000000000000..01112c4db37d
--- /dev/null
+++ b/contrib/arrow-cmake/cpp/src/arrow/util/config_internal.h
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// These variables are not exposed as they can make compilation caching
+// and increment builds less efficient.
+
+#define ARROW_CXX_COMPILER_FLAGS ""
+
+#define ARROW_GIT_ID ""
+#define ARROW_GIT_DESCRIPTION ""
diff --git a/contrib/arrow-cmake/flight.cmake b/contrib/arrow-cmake/flight.cmake
index f29fcdec1e0b..c028ab8de13c 100644
--- a/contrib/arrow-cmake/flight.cmake
+++ b/contrib/arrow-cmake/flight.cmake
@@ -37,11 +37,6 @@ add_custom_command(
 # protobuf-internal.cc
 set(ARROW_FLIGHT_SRCS
         ${ARROW_FLIGHT_GENERATED_SRC_DIR}/Flight.pb.cc
-        ${ARROW_FLIGHT_SRC_DIR}/transport/grpc/grpc_client.cc
-        ${ARROW_FLIGHT_SRC_DIR}/transport/grpc/grpc_server.cc
-        ${ARROW_FLIGHT_SRC_DIR}/transport/grpc/protocol_grpc_internal.cc
-        ${ARROW_FLIGHT_SRC_DIR}/transport/grpc/serialization_internal.cc
-        ${ARROW_FLIGHT_SRC_DIR}/transport/grpc/util_internal.cc
         ${ARROW_FLIGHT_SRC_DIR}/client.cc
         ${ARROW_FLIGHT_SRC_DIR}/client_cookie_middleware.cc
         ${ARROW_FLIGHT_SRC_DIR}/client_tracing_middleware.cc
@@ -50,10 +45,14 @@ set(ARROW_FLIGHT_SRCS
         ${ARROW_FLIGHT_SRC_DIR}/serialization_internal.cc
         ${ARROW_FLIGHT_SRC_DIR}/server.cc
         ${ARROW_FLIGHT_SRC_DIR}/server_auth.cc
-        ${ARROW_FLIGHT_SRC_DIR}/server_middleware.cc
         ${ARROW_FLIGHT_SRC_DIR}/server_tracing_middleware.cc
         ${ARROW_FLIGHT_SRC_DIR}/transport.cc
         ${ARROW_FLIGHT_SRC_DIR}/transport_server.cc
+        ${ARROW_FLIGHT_SRC_DIR}/transport/grpc/grpc_client.cc
+        ${ARROW_FLIGHT_SRC_DIR}/transport/grpc/grpc_server.cc
+        ${ARROW_FLIGHT_SRC_DIR}/transport/grpc/protocol_grpc_internal.cc
+        ${ARROW_FLIGHT_SRC_DIR}/transport/grpc/serialization_internal.cc
+        ${ARROW_FLIGHT_SRC_DIR}/transport/grpc/util_internal.cc
         ${ARROW_FLIGHT_SRC_DIR}/types.cc
 )
 
diff --git a/contrib/flatbuffers b/contrib/flatbuffers
index 0100f6a57798..0bed8cd4a001 160000
--- a/contrib/flatbuffers
+++ b/contrib/flatbuffers
@@ -1 +1 @@
-Subproject commit 0100f6a5779831fa7a651e4b67ef389a8752bd9b
+Subproject commit 0bed8cd4a001850de9591563df99b435349ba05e
diff --git a/src/Processors/Formats/Impl/Parquet/Decoding.cpp b/src/Processors/Formats/Impl/Parquet/Decoding.cpp
index 609ecfd32dc4..f137cdf3f01a 100644
--- a/src/Processors/Formats/Impl/Parquet/Decoding.cpp
+++ b/src/Processors/Formats/Impl/Parquet/Decoding.cpp
@@ -4,7 +4,7 @@
 #include <Columns/ColumnString.h>
 #include <Common/FloatUtils.h>
 
-#include <arrow/util/bit_stream_utils.h>
+#include <arrow/util/bit_stream_utils_internal.h>
 
 namespace DB::ErrorCodes
 {
diff --git a/src/Processors/Formats/Impl/Parquet/Write.cpp b/src/Processors/Formats/Impl/Parquet/Write.cpp
index 35b5e2c59891..42af8e997f0a 100644
--- a/src/Processors/Formats/Impl/Parquet/Write.cpp
+++ b/src/Processors/Formats/Impl/Parquet/Write.cpp
@@ -3,7 +3,7 @@
 #include <arrow/util/key_value_metadata.h>
 #include <parquet/encoding.h>
 #include <parquet/schema.h>
-#include <arrow/util/rle_encoding.h>
+#include <arrow/util/rle_encoding_internal.h>
 #include <arrow/util/crc32.h>
 #include <lz4.h>
 #include <Poco/JSON/JSON.h>
@@ -651,19 +651,19 @@ PODArray<char> & compress(PODArray<char> & source, PODArray<char> & scratch, Com
 
 void encodeRepDefLevelsRLE(const UInt8 * data, size_t size, UInt8 max_level, PODArray<char> & out)
 {
-    using arrow::util::RleEncoder;
+    using arrow::util::RleBitPackedEncoder;
 
     chassert(max_level > 0);
     size_t offset = out.size();
     size_t prefix_size = sizeof(Int32);
 
     int bit_width = bitScanReverse(max_level) + 1;
-    int max_rle_size = RleEncoder::MaxBufferSize(bit_width, static_cast<int>(size)) +
-                       RleEncoder::MinBufferSize(bit_width);
+    auto max_rle_size = RleBitPackedEncoder::MaxBufferSize(bit_width, static_cast<int>(size)) +
+                        RleBitPackedEncoder::MinBufferSize(bit_width);
 
     out.resize(offset + prefix_size + max_rle_size);
 
-    RleEncoder encoder(reinterpret_cast<uint8_t *>(out.data() + offset + prefix_size), max_rle_size, bit_width);
+    RleBitPackedEncoder encoder(reinterpret_cast<uint8_t *>(out.data() + offset + prefix_size), static_cast<int>(max_rle_size), bit_width);
     for (size_t i = 0; i < size; ++i)
         encoder.Put(data[i]);
     encoder.Flush();
diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp
index b5729d12b039..fc55f5469beb 100644
--- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp
@@ -365,9 +365,9 @@ void ParquetBlockOutputFormat::writeUsingArrow(std::vector<Chunk> chunks)
             builder.compression_level(static_cast<int>(format_settings.parquet.output_compression_level));
         }
 
-        // Writing page index is disabled by default.
-        if (format_settings.parquet.write_page_index)
-            builder.enable_write_page_index();
+        // Writing page index is enabled by default.
+        if (!format_settings.parquet.write_page_index)
+            builder.disable_write_page_index();
 
         parquet::ArrowWriterProperties::Builder writer_props_builder;
         if (format_settings.parquet.output_compliant_nested_types)
diff --git a/src/Server/ArrowFlightHandler.cpp b/src/Server/ArrowFlightHandler.cpp
index 77877a329a31..e7a4e4888b1f 100644
--- a/src/Server/ArrowFlightHandler.cpp
+++ b/src/Server/ArrowFlightHandler.cpp
@@ -972,7 +972,7 @@ arrow::Status ArrowFlightHandler::GetFlightInfo(
                         total_bytes += block.bytes();
                         auto ticket_info = calls_data->createTicket(std::make_shared<Block>(std::move(block)), ch_to_arrow_converter);
                         arrow::flight::FlightEndpoint endpoint;
-                        endpoint.ticket = arrow::flight::Ticket{.ticket = ticket_info->ticket};
+                        endpoint.ticket = arrow::flight::Ticket{ticket_info->ticket};
                         endpoint.expiration_time = ticket_info->expiration_time;
                         endpoints.emplace_back(endpoint);
                     }
@@ -1132,7 +1132,7 @@ arrow::Status ArrowFlightHandler::PollFlightInfo(
             if (poll_info->ticket)
             {
                 arrow::flight::FlightEndpoint endpoint;
-                endpoint.ticket = arrow::flight::Ticket{.ticket = *poll_info->ticket};
+                endpoint.ticket = arrow::flight::Ticket{*poll_info->ticket};
                 endpoint.expiration_time = calls_data->getTicketExpirationTime(*poll_info->ticket);
                 endpoints.emplace_back(endpoint);
             }
diff --git a/src/Storages/Hive/HiveFile.cpp b/src/Storages/Hive/HiveFile.cpp
index c44349b4ac1e..9677cfc69bde 100644
--- a/src/Storages/Hive/HiveFile.cpp
+++ b/src/Storages/Hive/HiveFile.cpp
@@ -285,7 +285,9 @@ void HiveParquetFile::prepareReader()
     in = std::make_unique<ReadBufferFromHDFS>(namenode_url, path, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings());
     auto format_settings = getFormatSettings(getContext());
     std::atomic<int> is_stopped{0};
-    THROW_ARROW_NOT_OK(parquet::arrow::OpenFile(asArrowFile(*in, format_settings, is_stopped, "Parquet", PARQUET_MAGIC_BYTES), arrow::default_memory_pool(), &reader));
+    auto open_file_res = parquet::arrow::OpenFile(asArrowFile(*in, format_settings, is_stopped, "Parquet", PARQUET_MAGIC_BYTES), arrow::default_memory_pool());
+    THROW_ARROW_NOT_OK(open_file_res.status());
+    reader = *std::move(open_file_res);
 }
 
 void HiveParquetFile::loadSplitMinMaxIndexesImpl()
diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
index 4eb9e1f095ff..66227ae68caa 100644
--- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
@@ -498,12 +498,10 @@ struct DeltaLakeMetadataImpl
 
         std::atomic<int> is_stopped{0};
 
-        std::unique_ptr<parquet::arrow::FileReader> reader;
-        THROW_ARROW_NOT_OK(
-            parquet::arrow::OpenFile(
-                asArrowFile(*buf, format_settings, is_stopped, "Parquet", PARQUET_MAGIC_BYTES),
-                arrow::default_memory_pool(),
-                &reader));
+        auto open_file_res = parquet::arrow::OpenFile(
+            asArrowFile(*buf, format_settings, is_stopped, "Parquet", PARQUET_MAGIC_BYTES), arrow::default_memory_pool());
+        THROW_ARROW_NOT_OK(open_file_res.status());
+        auto reader = *std::move(open_file_res);
 
         ArrowColumnToCHColumn column_reader(
             header, "Parquet",
diff --git a/tests/queries/0_stateless/02735_parquet_encoder.reference b/tests/queries/0_stateless/02735_parquet_encoder.reference
index d26a331ec2fb..c89b3a6c5300 100644
--- a/tests/queries/0_stateless/02735_parquet_encoder.reference
+++ b/tests/queries/0_stateless/02735_parquet_encoder.reference
@@ -43,7 +43,7 @@ datetime	DateTime64(3, \'UTC\')
 (1000000,NULL,NULL,'0','-1294970296')
 (1000000,NULL,NULL,'-2147483296','2147481000')
 (100000,900000,NULL,'100009','999999')
-[(2,NULL,NULL,'','[]')]
+[(2,NULL,NULL,NULL,NULL)]
 1	1
 0	1
 5090915589685802007

From 0906024c2dca03b0f22a06c304054abd2596515a Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 11 Mar 2026 19:53:33 +0000
Subject: [PATCH 07/14] Merge pull request #99231 from
 azat/fix-parquet-cache-crash

Avoid crash possible crash in Parquet with metadata cache enabled
---
 src/Formats/FormatFactory.cpp                 |  2 +-
 src/Formats/FormatFactory.h                   |  3 +-
 .../Formats/Impl/ParquetBlockInputFormat.cpp  |  5 +-
 ..._metadata_cache_no_query_context.reference |  1 +
 ...quet_v3_metadata_cache_no_query_context.sh | 67 +++++++++++++++++++
 5 files changed, 74 insertions(+), 4 deletions(-)
 create mode 100644 tests/queries/0_stateless/04034_parquet_v3_metadata_cache_no_query_context.reference
 create mode 100755 tests/queries/0_stateless/04034_parquet_v3_metadata_cache_no_query_context.sh

diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index b90621811875..97bf2abe9628 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -522,7 +522,7 @@ InputFormatPtr FormatFactory::getInputImpl(
     {
         format = creators.random_access_input_creator_with_metadata(
             buf, sample, format_settings, context->getReadSettings(), is_remote_fs,
-            parser_shared_resources, format_filter_info, object_with_metadata);
+            parser_shared_resources, format_filter_info, object_with_metadata, context);
     }
     // 3. Use the normal random access creator for formats that need to jump around in the file
     else if (creators.random_access_input_creator)
diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h
index ae924e274167..6df5484850c2 100644
--- a/src/Formats/FormatFactory.h
+++ b/src/Formats/FormatFactory.h
@@ -121,7 +121,8 @@ class FormatFactory final : private boost::noncopyable, public IHints<2>
         bool is_remote_fs,
         FormatParserSharedResourcesPtr parser_shared_resources,
         FormatFilterInfoPtr format_filter_info,
-        const std::optional<RelativePathWithMetadata> & object_with_metadata)>;
+        const std::optional<RelativePathWithMetadata> & object_with_metadata,
+        const ContextPtr & context)>;
 
     using OutputCreator = std::function<OutputFormatPtr(
         WriteBuffer & buf,
diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
index d5cf2854483d..c25418fb3268 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
@@ -1423,7 +1423,8 @@ void registerInputFormatParquet(FormatFactory & factory)
            bool is_remote_fs,
            FormatParserSharedResourcesPtr parser_shared_resources,
            FormatFilterInfoPtr format_filter_info,
-           const std::optional<RelativePathWithMetadata> & object_with_metadata) -> InputFormatPtr
+           const std::optional<RelativePathWithMetadata> & object_with_metadata,
+           const ContextPtr & context) -> InputFormatPtr
         {
             auto lambda_logger = getLogger("ParquetMetadataCache");
             size_t min_bytes_for_seek
@@ -1431,7 +1432,7 @@ void registerInputFormatParquet(FormatFactory & factory)
             if (settings.parquet.use_native_reader_v3)
             {
                 LOG_TRACE(lambda_logger, "using native reader v3 in ParquetBlockInputFormat with metadata cache");
-                ParquetMetadataCachePtr metadata_cache = CurrentThread::getQueryContext()->getParquetMetadataCache();
+                ParquetMetadataCachePtr metadata_cache = context->getParquetMetadataCache();
                 return std::make_shared<ParquetV3BlockInputFormat>(
                     buf,
                     std::make_shared<const Block>(sample),
diff --git a/tests/queries/0_stateless/04034_parquet_v3_metadata_cache_no_query_context.reference b/tests/queries/0_stateless/04034_parquet_v3_metadata_cache_no_query_context.reference
new file mode 100644
index 000000000000..ea4483ec305e
--- /dev/null
+++ b/tests/queries/0_stateless/04034_parquet_v3_metadata_cache_no_query_context.reference
@@ -0,0 +1 @@
+100	4950
diff --git a/tests/queries/0_stateless/04034_parquet_v3_metadata_cache_no_query_context.sh b/tests/queries/0_stateless/04034_parquet_v3_metadata_cache_no_query_context.sh
new file mode 100755
index 000000000000..90cd0dea3e48
--- /dev/null
+++ b/tests/queries/0_stateless/04034_parquet_v3_metadata_cache_no_query_context.sh
@@ -0,0 +1,67 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+# Tag no-fasttest: Depends on S3
+
+# Regression test: S3Queue background threads read Parquet files without a
+# query context on CurrentThread. Before the fix, the format factory lambda
+# called CurrentThread::getQueryContext()->getParquetMetadataCache() which
+# crashed because getQueryContext() returned null in background threads.
+
+set -e
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+S3_PATH="test/${CLICKHOUSE_DATABASE}_04034"
+
+# Write a Parquet file to S3
+$CLICKHOUSE_CLIENT -q "
+    INSERT INTO FUNCTION s3(s3_conn, url = 'http://localhost:11111/${S3_PATH}/data.parquet', format = Parquet)
+    SELECT number AS id, toString(number) AS name FROM numbers(100)
+    SETTINGS s3_truncate_on_insert = 1
+"
+
+# Create destination table
+$CLICKHOUSE_CLIENT -q "
+    CREATE TABLE ${CLICKHOUSE_DATABASE}.dest (id UInt64, name String)
+    ENGINE = MergeTree ORDER BY id
+"
+
+# Create S3Queue table reading Parquet with metadata cache + native reader v3.
+# The background processing thread has no query context on CurrentThread.
+$CLICKHOUSE_CLIENT --send_logs_level=error -q "
+    CREATE TABLE ${CLICKHOUSE_DATABASE}.queue (id UInt64, name String)
+    ENGINE = S3Queue('http://localhost:11111/${S3_PATH}/*.parquet', 'Parquet')
+    SETTINGS
+        keeper_path = '/clickhouse/${CLICKHOUSE_DATABASE}/04034_s3queue',
+        mode = 'unordered',
+        after_processing = 'keep',
+        s3queue_processing_threads_num = 1,
+        s3queue_polling_min_timeout_ms = 100,
+        s3queue_polling_max_timeout_ms = 500,
+        input_format_parquet_use_native_reader_v3 = 1,
+        use_parquet_metadata_cache = 1
+"
+
+# Create MV to trigger background reads from S3Queue into dest table
+$CLICKHOUSE_CLIENT -q "
+    CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}.mv TO ${CLICKHOUSE_DATABASE}.dest
+    AS SELECT id, name FROM ${CLICKHOUSE_DATABASE}.queue
+"
+
+# Wait for S3Queue to process the file (up to 30 seconds)
+for _ in {1..60}; do
+    count=$($CLICKHOUSE_CLIENT -q "SELECT count() FROM ${CLICKHOUSE_DATABASE}.dest")
+    if [ "$count" -ge 100 ]; then
+        break
+    fi
+    sleep 0.5
+done
+
+$CLICKHOUSE_CLIENT -q "SELECT count(), sum(id) FROM ${CLICKHOUSE_DATABASE}.dest"
+
+# Cleanup
+$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS ${CLICKHOUSE_DATABASE}.mv SYNC"
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.queue SYNC"
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.dest SYNC"

From bd0e554de9f49b08888657b24ed034daa66b6ae6 Mon Sep 17 00:00:00 2001
From: Grant Holly <grant.holly@clickhouse.com>
Date: Wed, 11 Mar 2026 23:33:01 +0000
Subject: [PATCH 08/14] Merge pull request #99230 from
 grantholly-clickhouse/add_parquet_format_check_to_metadata_cache

Add parquet format check to metadata cache
---
 src/Formats/FormatFactory.cpp                     |  3 ++-
 .../03707_parquet_metadata_cache.reference        |  3 +++
 .../0_stateless/03707_parquet_metadata_cache.sql  | 15 +++++++++++++++
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 97bf2abe9628..5ebb293588f6 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -518,7 +518,8 @@ InputFormatPtr FormatFactory::getInputImpl(
         format = std::make_shared<ParallelParsingInputFormat>(params);
     }
     // 2. Prefer to use metadata-aware creator if we have object metadata
-    else if (creators.random_access_input_creator_with_metadata && object_with_metadata.has_value())
+    else if (creators.random_access_input_creator_with_metadata
+        && object_with_metadata.has_value() && format_settings.parquet.use_native_reader_v3)
     {
         format = creators.random_access_input_creator_with_metadata(
             buf, sample, format_settings, context->getReadSettings(), is_remote_fs,
diff --git a/tests/queries/0_stateless/03707_parquet_metadata_cache.reference b/tests/queries/0_stateless/03707_parquet_metadata_cache.reference
index 57876557fba1..f0f6f1bf5b4c 100644
--- a/tests/queries/0_stateless/03707_parquet_metadata_cache.reference
+++ b/tests/queries/0_stateless/03707_parquet_metadata_cache.reference
@@ -3,7 +3,10 @@
 0	1
 1	TEST1	2.23
 2	TEST2	5.5600000000000005
+1	TEST1	2.23
+2	TEST2	5.5600000000000005
 1	0
+0	0
 test1	2.46
 test2	9.120000000000001
 0	1
diff --git a/tests/queries/0_stateless/03707_parquet_metadata_cache.sql b/tests/queries/0_stateless/03707_parquet_metadata_cache.sql
index 3f95b172a1b3..460550baf6e0 100644
--- a/tests/queries/0_stateless/03707_parquet_metadata_cache.sql
+++ b/tests/queries/0_stateless/03707_parquet_metadata_cache.sql
@@ -2,6 +2,7 @@
 -- no-fasttest: depends on s3 storage
 -- no-parallel: cache is system-wide and tests can affect each other in unexpected way
 -- no-parallel-replicas: profile events are not available on the second replica
+-- no-random-settings: we need to test the interaction of specific setting combinations
 
 /*
 Because the parquet metadata cache is system-wide, parallel runs of 
@@ -32,6 +33,14 @@ SELECT
 FROM s3(s3_conn, filename = '03707_cache_test.parquet', format = 'Parquet')
 SETTINGS log_comment = '03707-second-test-query', use_parquet_metadata_cache = 1;
 
+-- Should not be a cache hit because we are not using the native v3 reader
+SELECT
+    id,
+    upper(name),
+    value + 1
+FROM s3(s3_conn, filename = '03707_cache_test.parquet', format = 'Parquet')
+SETTINGS log_comment = '03707-second-test-query-no-v3', use_parquet_metadata_cache = 1, input_format_parquet_use_native_reader_v3 = 0;
+
 SYSTEM FLUSH LOGS query_log;
 
 SELECT
@@ -40,6 +49,12 @@ SELECT
 FROM system.query_log
 WHERE (log_comment = '03707-second-test-query') AND (type = 'QueryFinish') AND (current_database = currentDatabase());
 
+SELECT
+    ProfileEvents['ParquetMetadataCacheHits'] AS hits,
+    ProfileEvents['ParquetMetadataCacheMisses'] AS misses
+FROM system.query_log
+WHERE (log_comment = '03707-second-test-query-no-v3') AND (type = 'QueryFinish') AND (current_database = currentDatabase());
+
 SYSTEM DROP PARQUET METADATA CACHE;
 
 -- Should be back to a cache miss since we dropped the cache

From 86b261bcb6c2fb9cd53c49a54622aef3ce8b1c3e Mon Sep 17 00:00:00 2001
From: Arthur Passos <arthur.ti@outlook.com>
Date: Wed, 25 Mar 2026 09:52:07 -0300
Subject: [PATCH 09/14] rmv unnecessary settings history

---
 src/Core/SettingsChangesHistory.cpp | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index d4af04807e38..a20d039ddd30 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -45,20 +45,7 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
             // {"object_storage_max_nodes", 0, 0, "Antalya: New setting"},
             {"s3_propagate_credentials_to_other_storages", false, false, "New setting"},
             {"export_merge_tree_part_filename_pattern", "", "{part_name}_{checksum}", "New setting"},
-            {"optimize_syntax_fuse_functions", false, true, "The optimization is production-ready"},
-            {"allow_calculating_subcolumns_sizes_for_merge_tree_reading", false, true, "Allow calculating subcolumns sizes for merge tree reading to improve read tasks splitting"},
             {"use_parquet_metadata_cache", false, true, "Enables cache of parquet file metadata."},
-            {"allow_nullable_tuple_in_extracted_subcolumns", false, false, "New setting controlling whether extracted Tuple subcolumns can be nullable."},
-            {"use_text_index_tokens_cache", false, false, "New setting"},
-            {"delta_lake_reload_schema_for_consistency", false, false, "New setting to control whether DeltaLake reloads schema before each query for consistency."},
-            {"use_partition_pruning", true, true, "New setting controlling whether MergeTree uses partition key for pruning. 'use_partition_key' is an alias for this setting."},
-            {"use_partition_key", true, true, "Alias for setting 'use_partition_pruning'."},
-            {"webassembly_udf_max_fuel", 100'000, 100'000, "New setting to limit CPU instructions (fuel) per WebAssembly UDF instance execution."},
-            {"webassembly_udf_max_memory", 128_MiB, 128_MiB, "New setting to limit memory per WebAssembly UDF instance."},
-            {"webassembly_udf_max_input_block_size", 0, 0, "New setting to limit input block size for WebAssembly UDFs."},
-            {"webassembly_udf_max_instances", 32, 32, "New setting to limit the number of parallel WebAssembly UDF instances per function."},
-            {"mysql_datatypes_support_level", "", "decimal,datetime64,date2Date32", "Enable modern MySQL type mappings by default."},
-            {"allow_experimental_json_lazy_type_hints", false, false, "New experimental setting for lazy JSON type hints"},
         });
         addSettingsChanges(settings_changes_history, "26.2",
         {

From 8dd0d8fa80c2f5b63b7462e584483493a644656e Mon Sep 17 00:00:00 2001
From: Arthur Passos <arthur.ti@outlook.com>
Date: Wed, 25 Mar 2026 11:21:52 -0300
Subject: [PATCH 10/14] rmv more history

---
 src/Core/SettingsChangesHistory.cpp | 31 -----------------------------
 1 file changed, 31 deletions(-)

diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index a20d039ddd30..f502fcb141d8 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -47,37 +47,6 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
             {"export_merge_tree_part_filename_pattern", "", "{part_name}_{checksum}", "New setting"},
             {"use_parquet_metadata_cache", false, true, "Enables cache of parquet file metadata."},
         });
-        addSettingsChanges(settings_changes_history, "26.2",
-        {
-            {"allow_fuzz_query_functions", false, false, "New setting to enable the fuzzQuery function."},
-            {"ast_fuzzer_runs", 0, 0, "New setting to enable server-side AST fuzzer."},
-            {"ast_fuzzer_any_query", false, false, "New setting to allow fuzzing all query types, not just read-only."},
-            {"check_named_collection_dependencies", true, true, "New setting to check if dropping a named collection would break dependent tables."},
-            {"async_insert", false, true, "Enable async inserts by default."},
-            {"deduplicate_blocks_in_dependent_materialized_views", false, true, "Enable deduplication for dependent materialized views by default."},
-            {"deduplicate_insert", "backward_compatible_choice", "enable", "Enable deduplication for all sync and async inserts by default."},
-            {"enable_join_runtime_filters", false, true, "Enabled this optimization"},
-            {"parallel_replicas_filter_pushdown", false, false, "New setting"},
-            {"optimize_dry_run_check_part", true, true, "New setting"},
-            {"parallel_non_joined_rows_processing", true, true, "New setting to enable parallel processing of non-joined rows in RIGHT/FULL parallel_hash joins."},
-            {"enable_automatic_decision_for_merging_across_partitions_for_final", true, true, "New setting"},
-            {"enable_full_text_index", false, true, "The text index is now GA"},
-            {"allow_experimental_full_text_index", false, true, "The text index is now GA"},
-            {"use_page_cache_for_local_disks", false, false, "New setting to use userspace page cache for local disks"},
-            {"use_page_cache_for_object_storage", false, false, "New setting to use userspace page cache for object storage table functions"},
-            {"use_statistics_cache", false, true, "Enable statistics cache"},
-            {"apply_row_policy_after_final", false, true, "Enabling apply_row_policy_after_final by default, as if was in 25.8 before #87303"},
-            {"ignore_format_null_for_explain", false, true, "FORMAT Null is now ignored for EXPLAIN queries by default"},
-            {"input_format_connection_handling", false, false, "New setting to allow parsing and processing remaining data in the buffer if the connection closes unexpectedly"},
-            {"input_format_max_block_wait_ms", 0, 0, "New setting to limit maximum wait time in milliseconds before a block is emitted by input format"},
-            {"allow_insert_into_iceberg", false, false, "Insert into iceberg was moved to Beta"},
-            {"allow_experimental_insert_into_iceberg", false, false, "Insert into iceberg was moved to Beta"},
-            {"output_format_arrow_date_as_uint16", true, false, "Write Date as Arrow DATE32 instead of plain UInt16 by default."},
-            {"jemalloc_profile_text_output_format", "collapsed", "collapsed", "New setting to control output format for system.jemalloc_profile_text table. Possible values: 'raw', 'symbolized', 'collapsed'"},
-            {"jemalloc_profile_text_symbolize_with_inline", true, true, "New setting to control whether to include inline frames when symbolizing jemalloc heap profile. When enabled, inline frames are included at the cost of slower symbolization; when disabled, they are skipped for faster output"},
-            {"jemalloc_profile_text_collapsed_use_count", false, false, "New setting to aggregate by allocation count instead of bytes in the collapsed jemalloc heap profile format"},
-            {"opentelemetry_start_keeper_trace_probability", "auto", "auto", "New setting"},
-        });
         addSettingsChanges(settings_changes_history, "26.1",
         {
             {"parallel_replicas_filter_pushdown", false, false, "New setting"},

From f0350c848bbf3230f572a7cee5cc40e0bf68476a Mon Sep 17 00:00:00 2001
From: Arthur Passos <arthur.ti@outlook.com>
Date: Wed, 25 Mar 2026 14:06:40 -0300
Subject: [PATCH 11/14] rmv history

---
 src/Core/SettingsChangesHistory.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index f502fcb141d8..1610dbce906c 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -239,7 +239,6 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
             {"allow_database_unity_catalog", false, true, "Turned ON by default for Antalya (alias)."},
             {"allow_database_glue_catalog", false, true, "Turned ON by default for Antalya (alias)."},
 
-            {"input_format_parquet_use_metadata_cache", true, true, "New setting, turned ON by default"}, // https://github.com/Altinity/ClickHouse/pull/586
             {"iceberg_timezone_for_timestamptz", "UTC", "UTC", "New setting."},
             {"object_storage_remote_initiator", false, false, "New setting."},
             {"allow_experimental_iceberg_read_optimization", true, true, "New setting."},

From fa8ab8fe4a15660316a6dcc47721b8f86c788544 Mon Sep 17 00:00:00 2001
From: Arthur Passos <arthur.ti@outlook.com>
Date: Thu, 26 Mar 2026 09:11:46 -0300
Subject: [PATCH 12/14] fix test

---
 tests/integration/test_s3_cluster/test.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/integration/test_s3_cluster/test.py b/tests/integration/test_s3_cluster/test.py
index a1397da6eea6..56f922212d43 100644
--- a/tests/integration/test_s3_cluster/test.py
+++ b/tests/integration/test_s3_cluster/test.py
@@ -935,7 +935,7 @@ def test_hive_partitioning(started_cluster, allow_experimental_analyzer):
             FROM s3('http://minio1:9001/root/data/hive/key=**.parquet', 'minio', '{minio_secret_key}', 'Parquet', 'key Int32, value Int32')
             WHERE key <= 2
             FORMAT TSV
-            SETTINGS enable_filesystem_cache = 0, use_query_cache = 0, use_cache_for_count_from_files = 0, use_hive_partitioning = 0
+            SETTINGS enable_filesystem_cache = 0, use_query_cache = 0, use_cache_for_count_from_files = 0, use_hive_partitioning = 0, use_parquet_metadata_cache = 0
         """,
         query_id=query_id_full,
     )
@@ -949,7 +949,7 @@ def test_hive_partitioning(started_cluster, allow_experimental_analyzer):
             FROM s3('http://minio1:9001/root/data/hive/key=**.parquet', 'minio', '{minio_secret_key}', 'Parquet', 'key Int32, value Int32')
             WHERE key <= 2
             FORMAT TSV
-            SETTINGS enable_filesystem_cache = 0, use_query_cache = 0, use_cache_for_count_from_files = 0, use_hive_partitioning = 1
+            SETTINGS enable_filesystem_cache = 0, use_query_cache = 0, use_cache_for_count_from_files = 0, use_hive_partitioning = 1, use_parquet_metadata_cache = 0
         """,
         query_id=query_id_optimized,
     )
@@ -963,7 +963,7 @@ def test_hive_partitioning(started_cluster, allow_experimental_analyzer):
             FROM s3Cluster(cluster_simple, 'http://minio1:9001/root/data/hive/key=**.parquet', 'minio', '{minio_secret_key}', 'Parquet', 'key Int32, value Int32')
             WHERE key <= 2
             FORMAT TSV
-            SETTINGS enable_filesystem_cache = 0, use_query_cache = 0, use_cache_for_count_from_files = 0, use_hive_partitioning = 0
+            SETTINGS enable_filesystem_cache = 0, use_query_cache = 0, use_cache_for_count_from_files = 0, use_hive_partitioning = 0, use_parquet_metadata_cache = 0
         """,
         query_id=query_id_cluster_full,
     )
@@ -977,7 +977,7 @@ def test_hive_partitioning(started_cluster, allow_experimental_analyzer):
             FROM s3Cluster(cluster_simple, 'http://minio1:9001/root/data/hive/key=**.parquet', 'minio', '{minio_secret_key}', 'Parquet', 'key Int32, value Int32')
             WHERE key <= 2
             FORMAT TSV
-            SETTINGS enable_filesystem_cache = 0, use_query_cache = 0, use_cache_for_count_from_files = 0, use_hive_partitioning = 1
+            SETTINGS enable_filesystem_cache = 0, use_query_cache = 0, use_cache_for_count_from_files = 0, use_hive_partitioning = 1, use_parquet_metadata_cache = 0
         """,
         query_id=query_id_cluster_optimized,
     )

From 691c4a52724fc43cc6160440d2a43eabc0b38a75 Mon Sep 17 00:00:00 2001
From: Arthur Passos <arthur.ti@outlook.com>
Date: Thu, 26 Mar 2026 13:17:34 -0300
Subject: [PATCH 13/14] mark settings a obsolete

---
 src/Core/FormatFactorySettings.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Core/FormatFactorySettings.h b/src/Core/FormatFactorySettings.h
index a55c47d6a9b7..b8d7d4b8c4bd 100644
--- a/src/Core/FormatFactorySettings.h
+++ b/src/Core/FormatFactorySettings.h
@@ -1536,6 +1536,7 @@ Automatically create parent directories when using INTO OUTFILE if they do not a
     MAKE_OBSOLETE(M, Bool, input_format_orc_import_nested, false) \
     MAKE_OBSOLETE(M, Bool, output_format_enable_streaming, false) \
     MAKE_OBSOLETE(M, Bool, input_format_parquet_use_native_reader, false) \
+    MAKE_OBSOLETE(M, Bool, input_format_parquet_use_metadata_cache, false) \
 
 #endif // __CLION_IDE__
 

From 571b99e992c8a341d9d4336bdd5015c338632dd9 Mon Sep 17 00:00:00 2001
From: Arthur Passos <arthur.ti@outlook.com>
Date: Thu, 26 Mar 2026 19:15:15 -0300
Subject: [PATCH 14/14] he

---
 src/Core/SettingsChangesHistory.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index 1610dbce906c..9e8bc71e6b34 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -46,6 +46,7 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
             {"s3_propagate_credentials_to_other_storages", false, false, "New setting"},
             {"export_merge_tree_part_filename_pattern", "", "{part_name}_{checksum}", "New setting"},
             {"use_parquet_metadata_cache", false, true, "Enables cache of parquet file metadata."},
+            {"input_format_parquet_use_metadata_cache", true, false, "Obsolete. No-op"}, // https://github.com/Altinity/ClickHouse/pull/586
         });
         addSettingsChanges(settings_changes_history, "26.1",
         {
@@ -238,7 +239,7 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
             {"allow_database_iceberg", false, true, "Turned ON by default for Antalya (alias)."},
             {"allow_database_unity_catalog", false, true, "Turned ON by default for Antalya (alias)."},
             {"allow_database_glue_catalog", false, true, "Turned ON by default for Antalya (alias)."},
-
+            {"input_format_parquet_use_metadata_cache", true, true, "New setting, turned ON by default"}, // https://github.com/Altinity/ClickHouse/pull/586
             {"iceberg_timezone_for_timestamptz", "UTC", "UTC", "New setting."},
             {"object_storage_remote_initiator", false, false, "New setting."},
             {"allow_experimental_iceberg_read_optimization", true, true, "New setting."},