@@ -705,6 +705,113 @@ def generate_table(
705705 return session .read_gbq_query (query )
706706
707707
708+ @log_adapter .method_logger (custom_base_name = "bigquery_ai" )
709+ def embed (
710+ content : str | series .Series | pd .Series ,
711+ * ,
712+ endpoint : str | None = None ,
713+ model : str | None = None ,
714+ task_type : (
715+ Literal [
716+ "retrieval_query" ,
717+ "retrieval_document" ,
718+ "semantic_similarity" ,
719+ "classification" ,
720+ "clustering" ,
721+ "question_answering" ,
722+ "fact_verification" ,
723+ "code_retrieval_query" ,
724+ ]
725+ | None
726+ ) = None ,
727+ title : str | None = None ,
728+ model_params : Mapping [Any , Any ] | None = None ,
729+ connection_id : str | None = None ,
730+ ) -> series .Series :
731+ """
732+ Creates embeddings from text or image data in BigQuery.
733+
734+ **Examples:**
735+
736+ >>> import bigframes.pandas as bpd
737+ >>> import bigframes.bigquery as bbq
738+ >>> bbq.ai.embed("dog", endpoint="text-embedding-005") # doctest: +SKIP
739+ 0 {'result': array([ 1.78243860e-03, -1.10658340...
740+
741+ >>> s = bpd.Series(['dog']) # doctest: +SKIP
742+ >>> bbq.ai.embed(s, endpoint='text-embedding-005') # doctest: +SKIP
743+ 0 {'result': array([ 1.78243860e-03, -1.10658340...
744+
745+ Args:
746+ content (str | Series):
747+ A string literal or a Series (either BigFrames series or pandas Series) that provides the text or image to embed.
748+ endpoint (str, optional):
749+ A string value that specifies a supported Vertex AI embedding model endpoint to use.
750+ The endpoint value that you specify must include the model version, for example,
751+ `"text-embedding-005"`. If you specify this parameter, you can't specify the
752+ `model` parameter.
753+ model (str, optional):
754+ A string value that specifies a built-in embedding model. The only supported value is
755+ `"embeddinggemma-300m"`. If you specify this parameter, you can't specify the `endpoint`,
756+ `title`, `model_params`, or `connection_id` parameters.
757+ task_type (str, optional):
758+ A string literal that specifies the intended downstream application to help the model
759+ produce better quality embeddings. Accepts `"retrieval_query"`, `"retrieval_document"`,
760+ `"semantic_similarity"`, `"classification"`, `"clustering"`, `"question_answering"`,
761+ `"fact_verification"`, `"code_retrieval_query"`.
762+ title (str, optional):
763+ A string value that specifies the document title, which the model uses to improve
764+ embedding quality. You can only use this parameter if you specify `"retrieval_document"`
765+ for the `task_type` value.
766+ model_params (Mapping[Any, Any], optional):
767+ A JSON literal that provides additional parameters to the model. For example,
768+ `{"outputDimensionality": 768}` lets you specify the number of dimensions to use when
769+ generating embeddings.
770+ connection_id (str, optional):
771+ A STRING value specifying the connection to use to communicate with the model, in the
772+ format `PROJECT_ID.LOCATION.CONNECTION_ID`. For example, `myproject.us.myconnection`.
773+ If not provided, the query uses your end-user credential.
774+
775+ Returns:
776+ bigframes.series.Series: A new struct Series with the result data. The struct contains these fields:
777+ * "result": an ARRAY<FLOAT64> value containing the generated embeddings.
778+ * "status": a STRING value that contains the API response status for the corresponding row. This value is empty if the operation was successful.
779+ """
780+
781+ if model is not None :
782+ if any (x is not None for x in [endpoint , title , model_params , connection_id ]):
783+ raise ValueError (
784+ "You cannot specify endpoint, title, model_params, or connection_id when the model is set."
785+ )
786+ elif endpoint is None :
787+ raise ValueError (
788+ "You must specify exactly one of 'endpoint' or 'model' argument."
789+ )
790+
791+ if title is not None and task_type != "retrieval_document" :
792+ raise ValueError (
793+ "You can only use 'title' parameter if you specify retrieval_document for the task_type value."
794+ )
795+
796+ operator = ai_ops .AIEmbed (
797+ endpoint = endpoint ,
798+ model = model ,
799+ task_type = task_type ,
800+ title = title ,
801+ model_params = json .dumps (model_params ) if model_params else None ,
802+ connection_id = connection_id ,
803+ )
804+
805+ if isinstance (content , str ):
806+ return series .Series ([content ])._apply_unary_op (operator )
807+ elif isinstance (content , pd .Series ):
808+ return series .Series (content )._apply_unary_op (operator )
809+ elif isinstance (content , series .Series ):
810+ return content ._apply_unary_op (operator )
811+ else :
812+ raise ValueError (f"Unsupported 'content' parameter type: { type (content )} " )
813+
814+
708815@log_adapter .method_logger (custom_base_name = "bigquery_ai" )
709816def if_ (
710817 prompt : PROMPT_TYPE ,
0 commit comments