Skip to content

Commit 9f25950

Browse files
committed
Add AI powered SQL generation using ClickHouse client's API.
1 parent fb88b77 commit 9f25950

File tree

10 files changed

+457
-2
lines changed

10 files changed

+457
-2
lines changed

chdb/__init__.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,73 @@ def query(sql, output_format="CSV", path="", udf_path="", params=None):
214214
# alias for query
215215
sql = query
216216

217+
218+
def generate_sql(
219+
prompt,
220+
*,
221+
path=":memory:",
222+
ai_api_key=None,
223+
ai_base_url=None,
224+
ai_model=None,
225+
ai_provider=None,
226+
ai_temperature=None,
227+
ai_max_tokens=None,
228+
ai_timeout_seconds=None,
229+
ai_system_prompt=None,
230+
ai_max_steps=None,
231+
ai_enable_schema_access=None,
232+
):
233+
"""Generate SQL text from a natural language prompt using the configured AI provider."""
234+
conn_str = ":memory:" if path in (None, "", ":memory:") else f"{path}"
235+
if g_udf_path != "":
236+
if "?" in conn_str:
237+
conn_str = f"{conn_str}&udf_path={g_udf_path}"
238+
else:
239+
conn_str = f"{conn_str}?udf_path={g_udf_path}"
240+
241+
def append_ai_params(connection_string: str) -> str:
242+
params = []
243+
if ai_api_key:
244+
params.append(("ai_api_key", ai_api_key))
245+
if ai_base_url:
246+
params.append(("ai_base_url", ai_base_url))
247+
if ai_model:
248+
params.append(("ai_model", ai_model))
249+
if ai_provider:
250+
params.append(("ai_provider", ai_provider))
251+
if ai_temperature is not None:
252+
params.append(("ai_temperature", str(ai_temperature)))
253+
if ai_max_tokens is not None:
254+
params.append(("ai_max_tokens", str(ai_max_tokens)))
255+
if ai_timeout_seconds is not None:
256+
params.append(("ai_timeout_seconds", str(ai_timeout_seconds)))
257+
if ai_system_prompt:
258+
params.append(("ai_system_prompt", ai_system_prompt))
259+
if ai_max_steps is not None:
260+
params.append(("ai_max_steps", str(ai_max_steps)))
261+
if ai_enable_schema_access is not None:
262+
params.append(("ai_enable_schema_access", "1" if ai_enable_schema_access else "0"))
263+
264+
if not params:
265+
return connection_string
266+
267+
suffix = "&".join(f"{k}={v}" for k, v in params)
268+
if "?" in connection_string:
269+
if connection_string.endswith("?") or connection_string.endswith("&"):
270+
return f"{connection_string}{suffix}"
271+
return f"{connection_string}&{suffix}"
272+
return f"{connection_string}?{suffix}"
273+
274+
conn_str = append_ai_params(conn_str)
275+
276+
conn = _chdb.connect(conn_str)
277+
try:
278+
if not hasattr(conn, "generate_sql"):
279+
raise RuntimeError("AI SQL generation is not available in this build.")
280+
return conn.generate_sql(prompt)
281+
finally:
282+
conn.close()
283+
217284
PyReader = _chdb.PyReader
218285

219286
from . import dbapi, session, udf, utils # noqa: E402
@@ -225,6 +292,7 @@ def query(sql, output_format="CSV", path="", udf_path="", params=None):
225292
"ChdbError",
226293
"query",
227294
"sql",
295+
"generate_sql",
228296
"chdb_version",
229297
"engine_version",
230298
"to_df",

chdb/build.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ CMAKE_ARGS="-DCMAKE_BUILD_TYPE=${build_type} -DENABLE_THINLTO=0 -DENABLE_TESTS=0
9595
-DENABLE_KAFKA=1 -DENABLE_LIBPQXX=1 -DENABLE_NATS=0 -DENABLE_AMQPCPP=0 -DENABLE_NURAFT=0 \
9696
-DENABLE_CASSANDRA=0 -DENABLE_ODBC=0 -DENABLE_NLP=0 \
9797
-DENABLE_LDAP=0 \
98+
-DENABLE_CLIENT_AI=1 \
9899
${MYSQL} \
99100
${HDFS} \
100101
-DENABLE_LIBRARIES=0 ${RUST_FEATURES} \

chdb/session/state.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,12 @@ def query(self, sql, fmt="CSV", udf_path="", params=None):
207207
# alias sql = query
208208
sql = query
209209

210+
def generate_sql(self, prompt: str) -> str:
211+
"""Generate SQL text from a natural language prompt using the configured AI provider."""
212+
if self._conn is None:
213+
raise RuntimeError("Session is closed.")
214+
return self._conn.generate_sql(prompt)
215+
210216
def send_query(self, sql, fmt="CSV", params=None) -> StreamingResult:
211217
"""Execute a SQL query and return a streaming result iterator.
212218

chdb/state/sqlitelike.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,12 @@ def query(self, query: str, format: str = "CSV", params=None) -> Any:
466466
result = self._conn.query(query, format, params=params or {})
467467
return result_func(result)
468468

469+
def generate_sql(self, prompt: str) -> str:
470+
"""Generate SQL text from a natural language prompt using the configured AI provider."""
471+
if not hasattr(self._conn, "generate_sql"):
472+
raise RuntimeError("AI SQL generation is not available in this build.")
473+
return self._conn.generate_sql(prompt)
474+
469475
def send_query(self, query: str, format: str = "CSV", params=None) -> StreamingResult:
470476
"""Execute a SQL query and return a streaming result iterator.
471477
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
#include "AIQueryProcessor.h"
2+
3+
#include "chdb-internal.h"
4+
#include "PybindWrapper.h"
5+
6+
#include <pybind11/pybind11.h>
7+
#include <pybind11/detail/non_limited_api.h>
8+
9+
#if USE_CLIENT_AI
10+
#include <Client/AI/AIClientFactory.h>
11+
#include <Client/AI/AISQLGenerator.h>
12+
#endif
13+
14+
#include <cstdlib>
15+
#include <iostream>
16+
#include <stdexcept>
17+
18+
namespace py = pybind11;
19+
20+
#if USE_CLIENT_AI
21+
22+
AIQueryProcessor::AIQueryProcessor(chdb_connection * connection_, const DB::AIConfiguration & config_)
23+
: connection(connection_), ai_config(config_)
24+
{
25+
}
26+
27+
AIQueryProcessor::~AIQueryProcessor() = default;
28+
29+
namespace
30+
{
31+
void applyEnvFallback(DB::AIConfiguration & config)
32+
{
33+
if (config.api_key.empty())
34+
{
35+
if (const char * api_key = std::getenv("AI_API_KEY"))
36+
config.api_key = api_key;
37+
else if (const char * openai_key = std::getenv("OPENAI_API_KEY"))
38+
config.api_key = openai_key;
39+
else if (const char * anthropic_key = std::getenv("ANTHROPIC_API_KEY"))
40+
config.api_key = anthropic_key;
41+
}
42+
}
43+
}
44+
45+
std::string AIQueryProcessor::executeQueryForAI(const std::string & query)
46+
{
47+
chdb_result * result = chdb_query_n(*connection, query.data(), query.size(), "TSV", 3);
48+
const auto & error_msg = CHDB::chdb_result_error_string(result);
49+
if (!error_msg.empty())
50+
{
51+
std::string msg_copy(error_msg);
52+
chdb_destroy_query_result(result);
53+
throw std::runtime_error(msg_copy);
54+
}
55+
56+
std::string data(chdb_result_buffer(result), chdb_result_length(result));
57+
chdb_destroy_query_result(result);
58+
return data;
59+
}
60+
61+
void AIQueryProcessor::initializeGenerator()
62+
{
63+
if (generator)
64+
return;
65+
66+
// If a custom base URL is provided but provider is empty, default to OpenAI-compatible.
67+
if (ai_config.provider.empty() && !ai_config.base_url.empty())
68+
ai_config.provider = "openai";
69+
70+
applyEnvFallback(ai_config);
71+
72+
if (ai_config.api_key.empty())
73+
throw std::runtime_error("AI SQL generator is not configured. Provide ai_api_key (or set OPENAI_API_KEY/ANTHROPIC_API_KEY) when creating the connection or session.");
74+
75+
auto ai_result = DB::AIClientFactory::createClient(ai_config);
76+
77+
if (ai_result.no_configuration_found || !ai_result.client.has_value())
78+
throw std::runtime_error("AI SQL generator is not configured. Provide ai_api_key (or set OPENAI_API_KEY/ANTHROPIC_API_KEY) when creating the connection or session.");
79+
80+
auto query_executor = [this](const std::string & query_text) { return executeQueryForAI(query_text); };
81+
std::cerr << "[chdb] AI SQL generator using provider=" << (ai_config.provider.empty() ? "<auto>" : ai_config.provider)
82+
<< ", model=" << (ai_config.model.empty() ? "<default>" : ai_config.model)
83+
<< ", base_url=" << (ai_config.base_url.empty() ? "<default>" : ai_config.base_url) << std::endl;
84+
generator = std::make_unique<DB::AISQLGenerator>(ai_config, std::move(ai_result.client.value()), query_executor, std::cerr);
85+
}
86+
87+
std::string AIQueryProcessor::generateSQLFromPrompt(const std::string & prompt)
88+
{
89+
initializeGenerator();
90+
91+
if (!generator)
92+
throw std::runtime_error("AI SQL generator is not configured. Provide ai_api_key (or set OPENAI_API_KEY/ANTHROPIC_API_KEY) when creating the connection or session.");
93+
94+
std::string sql;
95+
{
96+
py::gil_scoped_release release;
97+
sql = generator->generateSQL(prompt);
98+
}
99+
100+
if (sql.empty())
101+
throw std::runtime_error("AI did not return a SQL query.");
102+
103+
return sql;
104+
}
105+
106+
std::string AIQueryProcessor::generateSQL(const std::string & prompt)
107+
{
108+
return generateSQLFromPrompt(prompt);
109+
}
110+
111+
#else
112+
113+
AIQueryProcessor::AIQueryProcessor(chdb_connection *, const DB::AIConfiguration &) : connection(nullptr) { }
114+
AIQueryProcessor::~AIQueryProcessor() = default;
115+
std::string AIQueryProcessor::executeQueryForAI(const std::string &) { return {}; }
116+
void AIQueryProcessor::initializeGenerator() { }
117+
std::string AIQueryProcessor::generateSQLFromPrompt(const std::string &) { return {}; }
118+
std::string AIQueryProcessor::generateSQL(const std::string &) { return {}; }
119+
120+
#endif

programs/local/AIQueryProcessor.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#pragma once
2+
3+
#include "chdb.h"
4+
#include <Client/AI/AISQLGenerator.h>
5+
#include <Client/AI/AIConfiguration.h>
6+
7+
#include <memory>
8+
#include <string>
9+
10+
/// AI query processor that delegates to AISQLGenerator.
11+
class AIQueryProcessor
12+
{
13+
public:
14+
AIQueryProcessor(chdb_connection * connection_, const DB::AIConfiguration & config_);
15+
~AIQueryProcessor();
16+
17+
/// Generate SQL using the configured AI provider.
18+
std::string generateSQL(const std::string & prompt);
19+
20+
private:
21+
chdb_connection * connection;
22+
std::unique_ptr<DB::AISQLGenerator> generator;
23+
DB::AIConfiguration ai_config;
24+
25+
std::string executeQueryForAI(const std::string & query);
26+
std::string generateSQLFromPrompt(const std::string & prompt);
27+
void initializeGenerator();
28+
};

programs/local/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ if (USE_PYTHON)
4444
PandasDataFrame.cpp
4545
PandasDataFrameBuilder.cpp
4646
PandasScan.cpp
47+
AIQueryProcessor.cpp
4748
PyArrowStreamFactory.cpp
4849
PyArrowTable.cpp
4950
PybindWrapper.cpp
@@ -156,5 +157,9 @@ if (TARGET ch_contrib::pybind11_stubs)
156157
target_compile_definitions(clickhouse-local-lib PRIVATE Py_LIMITED_API=0x03080000)
157158
endif()
158159

160+
if (ENABLE_CLIENT_AI AND TARGET ch_contrib::ai-sdk-cpp)
161+
target_link_libraries(clickhouse-local-lib PRIVATE ch_contrib::ai-sdk-cpp)
162+
endif()
163+
159164
# Always use internal readpassphrase
160165
target_link_libraries(clickhouse-local-lib PRIVATE readpassphrase)

0 commit comments

Comments
 (0)