Skip to content

Commit 0401915

Browse files
committed
Generate xml and py files for UDF
1 parent 97d73b9 commit 0401915

File tree

3 files changed

+76
-0
lines changed

3 files changed

+76
-0
lines changed

chdb/udf/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .udf import *

chdb/udf/udf.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import functools
2+
import inspect
3+
import os
4+
import tempfile
5+
from xml.etree import ElementTree as ET
6+
7+
tempdir = tempfile.TemporaryDirectory()
8+
# os.chdir(tempdir.name)
9+
os.chdir("user_scripts")
10+
# print(f"Current working directory: {os.getcwd()}")
11+
12+
def generate_udf(func_name, args, return_type, udf_body):
13+
# generate python script
14+
with open(f"{func_name}.py", "w") as f:
15+
f.write("#!/usr/bin/python3\n")
16+
f.write("import sys\n")
17+
f.write("\n")
18+
for line in udf_body.split("\n"):
19+
f.write(f"{line}\n")
20+
f.write("\n")
21+
f.write("if __name__ == '__main__':\n")
22+
f.write(" for line in sys.stdin:\n")
23+
f.write(" args = line.strip().split('\t')\n")
24+
for i, arg in enumerate(args):
25+
f.write(f" {arg} = args[{i}]\n")
26+
f.write(f" print({func_name}({', '.join(args)}))\n")
27+
f.write(" sys.stdout.flush()\n")
28+
os.chmod(f"{func_name}.py", 0o755)
29+
# generate xml file
30+
xml_file = "udf_config.xml"
31+
root = ET.Element('functions')
32+
if os.path.exists(xml_file):
33+
tree = ET.parse(xml_file)
34+
root = tree.getroot()
35+
function = ET.SubElement(root, 'function')
36+
ET.SubElement(function, 'type').text = 'executable'
37+
ET.SubElement(function, 'name').text = func_name
38+
ET.SubElement(function, 'return_type').text = return_type
39+
ET.SubElement(function, 'format').text = 'TabSeparated'
40+
ET.SubElement(function, 'command').text = f"{func_name}.py"
41+
for arg in args:
42+
argument = ET.SubElement(function, 'argument')
43+
# We use TabSeparated format, so assume all arguments are strings
44+
ET.SubElement(argument, 'type').text = 'String'
45+
ET.SubElement(argument, 'name').text = arg
46+
tree = ET.ElementTree(root)
47+
tree.write(xml_file)
48+
49+
def to_clickhouse_udf(return_type="String"):
50+
def decorator(func):
51+
func_name = func.__name__
52+
sig = inspect.signature(func)
53+
args = list(sig.parameters.keys())
54+
src = inspect.getsource(func)
55+
udf_body = src.split("\n", 1)[1]
56+
generate_udf(func_name, args, return_type, udf_body)
57+
@functools.wraps(func)
58+
def wrapper(*args, **kwargs):
59+
return func(*args, **kwargs)
60+
return wrapper
61+
return decorator
62+

tests/test_udf.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#!python3
2+
3+
from chdb.udf import *
4+
5+
@to_clickhouse_udf()
6+
def sum_udf(lhs, rhs):
7+
import time
8+
time.sleep(1)
9+
return int(lhs) + int(rhs)
10+
11+
@to_clickhouse_udf(return_type="Int32")
12+
def mul_udf(lhs, rhs):
13+
return int(lhs) * int(rhs)

0 commit comments

Comments
 (0)