1515from __future__ import annotations
1616
1717import logging
18- from typing import TYPE_CHECKING , Callable , Optional
18+ from typing import Callable , Optional , Protocol , runtime_checkable , TYPE_CHECKING
1919
20- if TYPE_CHECKING :
21- import bigframes .series
22- from bigframes .session import Session
20+ import dataclasses
2321
2422import google .api_core .exceptions
2523from google .cloud import bigquery
2826from bigframes .functions import _function_session as bff_session
2927from bigframes .functions import function_typing , udf_def
3028
29+ if TYPE_CHECKING :
30+ import bigframes .core .col
31+ from bigframes .session import Session
32+ import bigframes .series
33+
3134logger = logging .getLogger (__name__ )
3235
3336
@@ -90,13 +93,13 @@ def _try_import_routine(
9093
9194def _try_import_row_routine (
9295 routine : bigquery .Routine , session : bigframes .Session
93- ) -> BigqueryCallableRowRoutine :
96+ ) -> BigqueryCallableRoutine :
9497 udf_def = _routine_as_udf_def (routine , is_row_processor = True )
9598
9699 is_remote = (
97100 hasattr (routine , "remote_function_options" ) and routine .remote_function_options
98101 )
99- return BigqueryCallableRowRoutine (udf_def , session , is_managed = not is_remote )
102+ return BigqueryCallableRoutine (udf_def , session , is_managed = not is_remote )
100103
101104
102105def _routine_as_udf_def (
@@ -117,7 +120,6 @@ def _routine_as_udf_def(
117120 )
118121
119122
120- # TODO(b/399894805): Support managed function.
121123def read_gbq_function (
122124 function_name : str ,
123125 * ,
@@ -152,6 +154,18 @@ def read_gbq_function(
152154 return _try_import_routine (routine , session )
153155
154156
157+ @runtime_checkable
158+ class Udf (Protocol ):
159+ """
160+ Protocol for all BigFrames user-defined functions.
161+
162+ Has @runtime_checkable so functions like df.apply() can dispatch UDFs with isinstance() checks.
163+ """
164+
165+ @property
166+ def udf_def (self ) -> udf_def .BigqueryUdf : ...
167+
168+
155169class BigqueryCallableRoutine :
156170 """
157171 A reference to a routine in the context of a session.
@@ -178,8 +192,8 @@ def __call__(self, *args, **kwargs):
178192 if self ._local_fun :
179193 return self ._local_fun (* args , ** kwargs )
180194 # avoid circular imports
181- import bigframes .session ._io .bigquery as bf_io_bigquery
182195 from bigframes .core .compile .sqlglot import sql as sg_sql
196+ import bigframes .session ._io .bigquery as bf_io_bigquery
183197
184198 args_string = ", " .join ([sg_sql .to_sql (sg_sql .literal (v )) for v in args ])
185199 sql = f"SELECT `{ str (self ._udf_def .routine_ref )} `({ args_string } )"
@@ -202,7 +216,7 @@ def bigframes_remote_function(self):
202216
203217 @property
204218 def is_row_processor (self ) -> bool :
205- return False
219+ return self . udf_def . signature . is_row_processor
206220
207221 @property
208222 def udf_def (self ) -> udf_def .BigqueryUdf :
@@ -225,75 +239,16 @@ def bigframes_bigquery_function_output_dtype(self):
225239 return self .udf_def .signature .output .emulating_type .bf_type
226240
227241
228- class BigqueryCallableRowRoutine :
229- """
230- A reference to a routine in the context of a session.
231-
232- Can be used both directly as a callable, or as an input to dataframe ops that take a callable.
233- """
234-
235- def __init__ (
236- self ,
237- udf_def : udf_def .BigqueryUdf ,
238- session : bigframes .Session ,
239- * ,
240- local_func : Optional [Callable ] = None ,
241- cloud_function_ref : Optional [str ] = None ,
242- is_managed : bool = False ,
243- ):
244- assert udf_def .signature .is_row_processor
245- self ._udf_def = udf_def
246- self ._session = session
247- self ._local_fun = local_func
248- self ._cloud_function = cloud_function_ref
249- self ._is_managed = is_managed
242+ @dataclasses .dataclass (frozen = True )
243+ class UdfRoutine :
244+ func : Callable
245+ # Try not to depend on this, bq managed function creation will be deferred later
246+ # And this ref will be replaced with requirements rather to support lazy creation
247+ _udf_def : udf_def .BigqueryUdf
250248
251249 def __call__ (self , * args , ** kwargs ):
252- if self ._local_fun :
253- return self ._local_fun (* args , ** kwargs )
254- # avoid circular imports
255- import bigframes .session ._io .bigquery as bf_io_bigquery
256- from bigframes .core .compile .sqlglot import sql as sg_sql
257-
258- args_string = ", " .join ([sg_sql .to_sql (sg_sql .literal (v )) for v in args ])
259- sql = f"SELECT `{ str (self ._udf_def .routine_ref )} `({ args_string } )"
260- iter , job = bf_io_bigquery .start_query_with_client (
261- self ._session .bqclient ,
262- sql = sql ,
263- query_with_job = True ,
264- job_config = bigquery .QueryJobConfig (),
265- publisher = self ._session ._publisher ,
266- ) # type: ignore
267- return list (iter .to_arrow ().to_pydict ().values ())[0 ][0 ]
268-
269- @property
270- def bigframes_bigquery_function (self ) -> str :
271- return str (self ._udf_def .routine_ref )
272-
273- @property
274- def bigframes_remote_function (self ):
275- return None if self ._is_managed else str (self ._udf_def .routine_ref )
276-
277- @property
278- def is_row_processor (self ) -> bool :
279- return True
250+ return self .func (* args , ** kwargs )
280251
281252 @property
282253 def udf_def (self ) -> udf_def .BigqueryUdf :
283254 return self ._udf_def
284-
285- @property
286- def bigframes_cloud_function (self ) -> Optional [str ]:
287- return self ._cloud_function
288-
289- @property
290- def input_dtypes (self ):
291- return tuple (arg .bf_type for arg in self .udf_def .signature .inputs )
292-
293- @property
294- def output_dtype (self ):
295- return self .udf_def .signature .output .bf_type
296-
297- @property
298- def bigframes_bigquery_function_output_dtype (self ):
299- return self .udf_def .signature .output .emulating_type .bf_type
0 commit comments