From a1edeca23c94c0c7fe068e2fb36c32d419609b77 Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Wed, 27 May 2026 16:21:17 +0200 Subject: [PATCH] Do not make 'describe table' query when schema is known --- src/Storages/IStorageCluster.cpp | 5 ++++ src/TableFunctions/CMakeLists.txt | 1 + src/TableFunctions/TableFunctionRemote.cpp | 3 +++ src/TableFunctions/TableFunctionRemote.h | 3 +++ tests/integration/test_s3_cluster/test.py | 24 +++++++++---------- .../test_remote_initiator.py | 4 +++- 6 files changed, 27 insertions(+), 13 deletions(-) diff --git a/src/Storages/IStorageCluster.cpp b/src/Storages/IStorageCluster.cpp index 962b123234bb..35ebcfee8dfe 100644 --- a/src/Storages/IStorageCluster.cpp +++ b/src/Storages/IStorageCluster.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -500,6 +501,10 @@ IStorageCluster::RemoteCallVariables IStorageCluster::convertToRemote( auto remote_function = TableFunctionFactory::instance().get(remote_query, new_context); + std::shared_ptr remote_table_function = std::dynamic_pointer_cast(remote_function); + if (remote_table_function) + remote_table_function->setActualTableStructure(getInMemoryMetadata().columns); + auto storage = remote_function->execute(query_to_send, new_context, remote_function_name); return RemoteCallVariables{storage, new_context}; diff --git a/src/TableFunctions/CMakeLists.txt b/src/TableFunctions/CMakeLists.txt index ccdab2fc41b2..eb1c67d2018d 100644 --- a/src/TableFunctions/CMakeLists.txt +++ b/src/TableFunctions/CMakeLists.txt @@ -12,6 +12,7 @@ extract_into_parent_list(clickhouse_table_functions_sources dbms_sources ITableFunction.cpp TableFunctionView.cpp TableFunctionFactory.cpp + TableFunctionRemote.cpp ) extract_into_parent_list(clickhouse_table_functions_headers dbms_headers ITableFunction.h diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index c7c758ea2531..78662254f9a3 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -356,6 +356,9 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & /*ast_function*/, Con ColumnsDescription TableFunctionRemote::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { + if (!remote_table_columns.empty()) + return remote_table_columns; + assert(cluster); return getStructureOfRemoteTable(*cluster, remote_table_id, context, remote_table_function_ptr); } diff --git a/src/TableFunctions/TableFunctionRemote.h b/src/TableFunctions/TableFunctionRemote.h index 498339231153..47e8f1c27efa 100644 --- a/src/TableFunctions/TableFunctionRemote.h +++ b/src/TableFunctions/TableFunctionRemote.h @@ -28,6 +28,8 @@ class TableFunctionRemote : public ITableFunction void setRemoteTableFunction(ASTPtr remote_table_function_ptr_) { remote_table_function_ptr = remote_table_function_ptr_; } + void setActualTableStructure(ColumnsDescription remote_table_columns_) { remote_table_columns = remote_table_columns_; } + private: StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; @@ -44,6 +46,7 @@ class TableFunctionRemote : public ITableFunction StorageID remote_table_id = StorageID::createEmpty(); ASTPtr remote_table_function_ptr; ASTPtr sharding_key = nullptr; + ColumnsDescription remote_table_columns; }; } diff --git a/tests/integration/test_s3_cluster/test.py b/tests/integration/test_s3_cluster/test.py index b990c0709f08..bf9eebe319ef 100644 --- a/tests/integration/test_s3_cluster/test.py +++ b/tests/integration/test_s3_cluster/test.py @@ -832,8 +832,8 @@ def test_object_storage_remote_initiator(started_cluster): """ ).splitlines() - # initial node + describe table + remote initiator + 2 subqueries on replicas - assert queries == ["5"] + # initial node + remote initiator + 2 subqueries on replicas + assert queries == ["4"] # Cluster with dots in the host names query_id = uuid.uuid4().hex @@ -860,8 +860,8 @@ def test_object_storage_remote_initiator(started_cluster): """ ).splitlines() - # initial node + describe table + remote initiator + 2 subqueries on replicas - assert queries == ["5"] + # initial node + remote initiator + 2 subqueries on replicas + assert queries == ["4"] users = node.query( f""" @@ -902,8 +902,8 @@ def test_object_storage_remote_initiator(started_cluster): """ ).splitlines() - # initial node + describe table + remote initiator + 2 subqueries on replicas - assert queries == ["5"] + # initial node + remote initiator + 2 subqueries on replicas + assert queries == ["4"] users = node.query( f""" @@ -963,8 +963,8 @@ def test_object_storage_remote_initiator(started_cluster): """ ).splitlines() - # initial node + describe table + remote initiator + 2 subqueries on replicas - assert queries == ["5"] + # initial node + remote initiator + 2 subqueries on replicas + assert queries == ["4"] users = node.query( f""" @@ -1427,8 +1427,8 @@ def test_object_storage_remote_initiator_without_cluster_function(started_cluste """ ).splitlines() - # initial node + describe table + remote initiator - assert queries == ["3"] + # initial node + remote initiator + assert queries == ["2"] users = node.query( f""" @@ -1472,8 +1472,8 @@ def test_object_storage_remote_initiator_without_cluster_function(started_cluste """ ).splitlines() - # initial node + describe table + remote initiator + 2 subqueries on replicas - assert queries == ["5"] + # initial node + remote initiator + 2 subqueries on replicas + assert queries == ["4"] users = node.query( f""" diff --git a/tests/integration/test_storage_iceberg_with_spark/test_remote_initiator.py b/tests/integration/test_storage_iceberg_with_spark/test_remote_initiator.py index ba0a61f9a998..a5b833c8a2ea 100644 --- a/tests/integration/test_storage_iceberg_with_spark/test_remote_initiator.py +++ b/tests/integration/test_storage_iceberg_with_spark/test_remote_initiator.py @@ -64,6 +64,7 @@ def flush_logs(): FROM clusterAllReplicas('cluster_simple', system.query_log) WHERE type='QueryFinish' AND initial_query_id='{query_id}' """) + # initial node + 3 subqueries on replicas assert queries == "4\n" query_id = uuid.uuid4().hex @@ -83,7 +84,8 @@ def flush_logs(): FROM clusterAllReplicas('cluster_simple', system.query_log) WHERE type='QueryFinish' AND initial_query_id='{query_id}' """) - assert queries == "6\n" + # initial node + remote initiator + 3 subqueries on replicas + assert queries == "5\n" query_id = uuid.uuid4().hex res = instance.query(f"""