`_
+
+ :param inference_id: The inference Id
+ :param input: Inference input. Either a string or an array of strings.
+ :param task_settings: Optional task settings
+ :param timeout: Specifies the amount of time to wait for the inference request
+ to complete.
+ """
+ if inference_id in SKIP_IN_PATH:
+ raise ValueError("Empty value passed for parameter 'inference_id'")
+ if input is None and body is None:
+ raise ValueError("Empty value passed for parameter 'input'")
+ __path_parts: t.Dict[str, str] = {"inference_id": _quote(inference_id)}
+ __path = f'/_inference/completion/{__path_parts["inference_id"]}'
+ __query: t.Dict[str, t.Any] = {}
+ __body: t.Dict[str, t.Any] = body if body is not None else {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if pretty is not None:
+ __query["pretty"] = pretty
+ if timeout is not None:
+ __query["timeout"] = timeout
+ if not __body:
+ if input is not None:
+ __body["input"] = input
+ if task_settings is not None:
+ __body["task_settings"] = task_settings
+ if not __body:
+ __body = None # type: ignore[assignment]
+ __headers = {"accept": "application/json"}
+ if __body is not None:
+ __headers["content-type"] = "application/json"
+ return await self.perform_request( # type: ignore[return-value]
+ "POST",
+ __path,
+ params=__query,
+ headers=__headers,
+ body=__body,
+ endpoint_id="inference.completion",
+ path_parts=__path_parts,
+ )
+
@_rewrite_parameters()
async def delete(
self,
@@ -33,7 +101,13 @@ async def delete(
task_type: t.Optional[
t.Union[
str,
- t.Literal["completion", "rerank", "sparse_embedding", "text_embedding"],
+ t.Literal[
+ "chat_completion",
+ "completion",
+ "rerank",
+ "sparse_embedding",
+ "text_embedding",
+ ],
]
] = None,
dry_run: t.Optional[bool] = None,
@@ -102,7 +176,13 @@ async def get(
task_type: t.Optional[
t.Union[
str,
- t.Literal["completion", "rerank", "sparse_embedding", "text_embedding"],
+ t.Literal[
+ "chat_completion",
+ "completion",
+ "rerank",
+ "sparse_embedding",
+ "text_embedding",
+ ],
]
] = None,
inference_id: t.Optional[str] = None,
@@ -155,24 +235,188 @@ async def get(
)
@_rewrite_parameters(
- body_fields=("input", "query", "task_settings"),
+ body_name="inference_config",
)
- async def inference(
+ async def put(
self,
*,
inference_id: str,
- input: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ inference_config: t.Optional[t.Mapping[str, t.Any]] = None,
+ body: t.Optional[t.Mapping[str, t.Any]] = None,
task_type: t.Optional[
t.Union[
str,
- t.Literal["completion", "rerank", "sparse_embedding", "text_embedding"],
+ t.Literal[
+ "chat_completion",
+ "completion",
+ "rerank",
+ "sparse_embedding",
+ "text_embedding",
+ ],
]
] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
human: t.Optional[bool] = None,
pretty: t.Optional[bool] = None,
+ ) -> ObjectApiResponse[t.Any]:
+ """
+ .. raw:: html
+
+ Create an inference endpoint.
+ When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
+ After creating the endpoint, wait for the model deployment to complete before using it.
+ To verify the deployment status, use the get trained model statistics API.
+ Look for "state": "fully_allocated"
in the response and ensure that the "allocation_count"
matches the "target_allocation_count"
.
+ Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
+ IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.
+ For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.
+ However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.
+
+
+ ``_
+
+ :param inference_id: The inference Id
+ :param inference_config:
+ :param task_type: The task type
+ """
+ if inference_id in SKIP_IN_PATH:
+ raise ValueError("Empty value passed for parameter 'inference_id'")
+ if inference_config is None and body is None:
+ raise ValueError(
+ "Empty value passed for parameters 'inference_config' and 'body', one of them should be set."
+ )
+ elif inference_config is not None and body is not None:
+ raise ValueError("Cannot set both 'inference_config' and 'body'")
+ __path_parts: t.Dict[str, str]
+ if task_type not in SKIP_IN_PATH and inference_id not in SKIP_IN_PATH:
+ __path_parts = {
+ "task_type": _quote(task_type),
+ "inference_id": _quote(inference_id),
+ }
+ __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["inference_id"]}'
+ elif inference_id not in SKIP_IN_PATH:
+ __path_parts = {"inference_id": _quote(inference_id)}
+ __path = f'/_inference/{__path_parts["inference_id"]}'
+ else:
+ raise ValueError("Couldn't find a path for the given parameters")
+ __query: t.Dict[str, t.Any] = {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if pretty is not None:
+ __query["pretty"] = pretty
+ __body = inference_config if inference_config is not None else body
+ __headers = {"accept": "application/json", "content-type": "application/json"}
+ return await self.perform_request( # type: ignore[return-value]
+ "PUT",
+ __path,
+ params=__query,
+ headers=__headers,
+ body=__body,
+ endpoint_id="inference.put",
+ path_parts=__path_parts,
+ )
+
+ @_rewrite_parameters(
+ body_fields=("service", "service_settings"),
+ )
+ async def put_watsonx(
+ self,
+ *,
+ task_type: t.Union[str, t.Literal["text_embedding"]],
+ watsonx_inference_id: str,
+ service: t.Optional[t.Union[str, t.Literal["watsonxai"]]] = None,
+ service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ pretty: t.Optional[bool] = None,
+ body: t.Optional[t.Dict[str, t.Any]] = None,
+ ) -> ObjectApiResponse[t.Any]:
+ """
+ .. raw:: html
+
+ Create a Watsonx inference endpoint.
+ Creates an inference endpoint to perform an inference task with the watsonxai
service.
+ You need an IBM Cloud Databases for Elasticsearch deployment to use the watsonxai
inference service.
+ You can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.
+ When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
+ After creating the endpoint, wait for the model deployment to complete before using it.
+ To verify the deployment status, use the get trained model statistics API.
+ Look for "state": "fully_allocated"
in the response and ensure that the "allocation_count"
matches the "target_allocation_count"
.
+ Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
+
+
+ ``_
+
+ :param task_type: The task type. The only valid task type for the model to perform
+ is `text_embedding`.
+ :param watsonx_inference_id: The unique identifier of the inference endpoint.
+ :param service: The type of service supported for the specified task type. In
+ this case, `watsonxai`.
+ :param service_settings: Settings used to install the inference model. These
+ settings are specific to the `watsonxai` service.
+ """
+ if task_type in SKIP_IN_PATH:
+ raise ValueError("Empty value passed for parameter 'task_type'")
+ if watsonx_inference_id in SKIP_IN_PATH:
+ raise ValueError("Empty value passed for parameter 'watsonx_inference_id'")
+ if service is None and body is None:
+ raise ValueError("Empty value passed for parameter 'service'")
+ if service_settings is None and body is None:
+ raise ValueError("Empty value passed for parameter 'service_settings'")
+ __path_parts: t.Dict[str, str] = {
+ "task_type": _quote(task_type),
+ "watsonx_inference_id": _quote(watsonx_inference_id),
+ }
+ __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["watsonx_inference_id"]}'
+ __query: t.Dict[str, t.Any] = {}
+ __body: t.Dict[str, t.Any] = body if body is not None else {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if pretty is not None:
+ __query["pretty"] = pretty
+ if not __body:
+ if service is not None:
+ __body["service"] = service
+ if service_settings is not None:
+ __body["service_settings"] = service_settings
+ if not __body:
+ __body = None # type: ignore[assignment]
+ __headers = {"accept": "application/json"}
+ if __body is not None:
+ __headers["content-type"] = "application/json"
+ return await self.perform_request( # type: ignore[return-value]
+ "PUT",
+ __path,
+ params=__query,
+ headers=__headers,
+ body=__body,
+ endpoint_id="inference.put_watsonx",
+ path_parts=__path_parts,
+ )
+
+ @_rewrite_parameters(
+ body_fields=("input", "query", "task_settings"),
+ )
+ async def rerank(
+ self,
+ *,
+ inference_id: str,
+ input: t.Optional[t.Union[str, t.Sequence[str]]] = None,
query: t.Optional[str] = None,
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ pretty: t.Optional[bool] = None,
task_settings: t.Optional[t.Any] = None,
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
body: t.Optional[t.Dict[str, t.Any]] = None,
@@ -180,14 +424,7 @@ async def inference(
"""
.. raw:: html
- Perform inference on the service.
- This API enables you to use machine learning models to perform specific tasks on data that you provide as an input.
- It returns a response with the results of the tasks.
- The inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.
-
- info
- The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.
-
+ Perform rereanking inference on the service
``_
@@ -196,9 +433,7 @@ async def inference(
:param input: The text on which you want to perform the inference task. It can
be a single string or an array. > info > Inference endpoints for the `completion`
task type currently only support a single string as input.
- :param task_type: The type of inference task that the model performs.
- :param query: The query input, which is required only for the `rerank` task.
- It is not required for other tasks.
+ :param query: Query input.
:param task_settings: Task settings for the individual inference request. These
settings are specific to the task type you specified and override the task
settings specified when initializing the service.
@@ -208,18 +443,10 @@ async def inference(
raise ValueError("Empty value passed for parameter 'inference_id'")
if input is None and body is None:
raise ValueError("Empty value passed for parameter 'input'")
- __path_parts: t.Dict[str, str]
- if task_type not in SKIP_IN_PATH and inference_id not in SKIP_IN_PATH:
- __path_parts = {
- "task_type": _quote(task_type),
- "inference_id": _quote(inference_id),
- }
- __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["inference_id"]}'
- elif inference_id not in SKIP_IN_PATH:
- __path_parts = {"inference_id": _quote(inference_id)}
- __path = f'/_inference/{__path_parts["inference_id"]}'
- else:
- raise ValueError("Couldn't find a path for the given parameters")
+ if query is None and body is None:
+ raise ValueError("Empty value passed for parameter 'query'")
+ __path_parts: t.Dict[str, str] = {"inference_id": _quote(inference_id)}
+ __path = f'/_inference/rerank/{__path_parts["inference_id"]}'
__query: t.Dict[str, t.Any] = {}
__body: t.Dict[str, t.Any] = body if body is not None else {}
if error_trace is not None:
@@ -250,71 +477,48 @@ async def inference(
params=__query,
headers=__headers,
body=__body,
- endpoint_id="inference.inference",
+ endpoint_id="inference.rerank",
path_parts=__path_parts,
)
@_rewrite_parameters(
- body_name="inference_config",
+ body_fields=("input", "task_settings"),
)
- async def put(
+ async def sparse_embedding(
self,
*,
inference_id: str,
- inference_config: t.Optional[t.Mapping[str, t.Any]] = None,
- body: t.Optional[t.Mapping[str, t.Any]] = None,
- task_type: t.Optional[
- t.Union[
- str,
- t.Literal["completion", "rerank", "sparse_embedding", "text_embedding"],
- ]
- ] = None,
+ input: t.Optional[t.Union[str, t.Sequence[str]]] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
human: t.Optional[bool] = None,
pretty: t.Optional[bool] = None,
+ task_settings: t.Optional[t.Any] = None,
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
+ body: t.Optional[t.Dict[str, t.Any]] = None,
) -> ObjectApiResponse[t.Any]:
"""
.. raw:: html
- Create an inference endpoint.
- When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
- After creating the endpoint, wait for the model deployment to complete before using it.
- To verify the deployment status, use the get trained model statistics API.
- Look for "state": "fully_allocated"
in the response and ensure that the "allocation_count"
matches the "target_allocation_count"
.
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
- IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.
- For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.
- However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.
+ Perform sparse embedding inference on the service
- ``_
+ ``_
:param inference_id: The inference Id
- :param inference_config:
- :param task_type: The task type
+ :param input: Inference input. Either a string or an array of strings.
+ :param task_settings: Optional task settings
+ :param timeout: Specifies the amount of time to wait for the inference request
+ to complete.
"""
if inference_id in SKIP_IN_PATH:
raise ValueError("Empty value passed for parameter 'inference_id'")
- if inference_config is None and body is None:
- raise ValueError(
- "Empty value passed for parameters 'inference_config' and 'body', one of them should be set."
- )
- elif inference_config is not None and body is not None:
- raise ValueError("Cannot set both 'inference_config' and 'body'")
- __path_parts: t.Dict[str, str]
- if task_type not in SKIP_IN_PATH and inference_id not in SKIP_IN_PATH:
- __path_parts = {
- "task_type": _quote(task_type),
- "inference_id": _quote(inference_id),
- }
- __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["inference_id"]}'
- elif inference_id not in SKIP_IN_PATH:
- __path_parts = {"inference_id": _quote(inference_id)}
- __path = f'/_inference/{__path_parts["inference_id"]}'
- else:
- raise ValueError("Couldn't find a path for the given parameters")
+ if input is None and body is None:
+ raise ValueError("Empty value passed for parameter 'input'")
+ __path_parts: t.Dict[str, str] = {"inference_id": _quote(inference_id)}
+ __path = f'/_inference/sparse_embedding/{__path_parts["inference_id"]}'
__query: t.Dict[str, t.Any] = {}
+ __body: t.Dict[str, t.Any] = body if body is not None else {}
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -323,14 +527,92 @@ async def put(
__query["human"] = human
if pretty is not None:
__query["pretty"] = pretty
- __body = inference_config if inference_config is not None else body
- __headers = {"accept": "application/json", "content-type": "application/json"}
+ if timeout is not None:
+ __query["timeout"] = timeout
+ if not __body:
+ if input is not None:
+ __body["input"] = input
+ if task_settings is not None:
+ __body["task_settings"] = task_settings
+ if not __body:
+ __body = None # type: ignore[assignment]
+ __headers = {"accept": "application/json"}
+ if __body is not None:
+ __headers["content-type"] = "application/json"
return await self.perform_request( # type: ignore[return-value]
- "PUT",
+ "POST",
__path,
params=__query,
headers=__headers,
body=__body,
- endpoint_id="inference.put",
+ endpoint_id="inference.sparse_embedding",
+ path_parts=__path_parts,
+ )
+
+ @_rewrite_parameters(
+ body_fields=("input", "task_settings"),
+ )
+ async def text_embedding(
+ self,
+ *,
+ inference_id: str,
+ input: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ pretty: t.Optional[bool] = None,
+ task_settings: t.Optional[t.Any] = None,
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
+ body: t.Optional[t.Dict[str, t.Any]] = None,
+ ) -> ObjectApiResponse[t.Any]:
+ """
+ .. raw:: html
+
+ Perform text embedding inference on the service
+
+
+ ``_
+
+ :param inference_id: The inference Id
+ :param input: Inference input. Either a string or an array of strings.
+ :param task_settings: Optional task settings
+ :param timeout: Specifies the amount of time to wait for the inference request
+ to complete.
+ """
+ if inference_id in SKIP_IN_PATH:
+ raise ValueError("Empty value passed for parameter 'inference_id'")
+ if input is None and body is None:
+ raise ValueError("Empty value passed for parameter 'input'")
+ __path_parts: t.Dict[str, str] = {"inference_id": _quote(inference_id)}
+ __path = f'/_inference/text_embedding/{__path_parts["inference_id"]}'
+ __query: t.Dict[str, t.Any] = {}
+ __body: t.Dict[str, t.Any] = body if body is not None else {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if pretty is not None:
+ __query["pretty"] = pretty
+ if timeout is not None:
+ __query["timeout"] = timeout
+ if not __body:
+ if input is not None:
+ __body["input"] = input
+ if task_settings is not None:
+ __body["task_settings"] = task_settings
+ if not __body:
+ __body = None # type: ignore[assignment]
+ __headers = {"accept": "application/json"}
+ if __body is not None:
+ __headers["content-type"] = "application/json"
+ return await self.perform_request( # type: ignore[return-value]
+ "POST",
+ __path,
+ params=__query,
+ headers=__headers,
+ body=__body,
+ endpoint_id="inference.text_embedding",
path_parts=__path_parts,
)
diff --git a/elasticsearch_serverless/_async/client/ml.py b/elasticsearch_serverless/_async/client/ml.py
index d83994d..0b3391e 100644
--- a/elasticsearch_serverless/_async/client/ml.py
+++ b/elasticsearch_serverless/_async/client/ml.py
@@ -1543,7 +1543,6 @@ async def get_trained_models(
],
]
] = None,
- include_model_definition: t.Optional[bool] = None,
pretty: t.Optional[bool] = None,
size: t.Optional[int] = None,
tags: t.Optional[t.Union[str, t.Sequence[str]]] = None,
@@ -1573,8 +1572,6 @@ async def get_trained_models(
:param from_: Skips the specified number of models.
:param include: A comma delimited string of optional fields to include in the
response body.
- :param include_model_definition: parameter is deprecated! Use [include=definition]
- instead
:param size: Specifies the maximum number of models to obtain.
:param tags: A comma delimited string of tags. A trained model can have many
tags, or none. When supplied, only trained models that contain all the supplied
@@ -1604,8 +1601,6 @@ async def get_trained_models(
__query["human"] = human
if include is not None:
__query["include"] = include
- if include_model_definition is not None:
- __query["include_model_definition"] = include_model_definition
if pretty is not None:
__query["pretty"] = pretty
if size is not None:
diff --git a/elasticsearch_serverless/_sync/client/indices.py b/elasticsearch_serverless/_sync/client/indices.py
index 20d274e..7315fe1 100644
--- a/elasticsearch_serverless/_sync/client/indices.py
+++ b/elasticsearch_serverless/_sync/client/indices.py
@@ -1567,7 +1567,7 @@ def migrate_to_data_stream(
The write index for the alias becomes the write index for the stream.
- ``_
+ ``_
:param name: Name of the index alias to convert to a data stream.
:param master_timeout: Period to wait for a connection to the master node. If
@@ -1623,7 +1623,7 @@ def modify_data_stream(
Performs one or more data stream modification actions in a single atomic operation.
- ``_
+ ``_
:param actions: Actions to perform.
"""
@@ -1689,7 +1689,7 @@ def put_alias(
Adds a data stream or index to an alias.
- ``_
+ ``_
:param index: Comma-separated list of data streams or indices to add. Supports
wildcards (`*`). Wildcard patterns that match both data streams and indices
@@ -1796,7 +1796,7 @@ def put_data_lifecycle(
Update the data stream lifecycle of the specified data streams.
- ``_
+ ``_
:param name: Comma-separated list of data streams used to limit the request.
Supports wildcards (`*`). To target all data streams use `*` or `_all`.
@@ -1924,7 +1924,7 @@ def put_index_template(
If an entry already exists with the same key, then it is overwritten by the new definition.
- ``_
+ ``_
:param name: Index or template name
:param allow_auto_create: This setting overrides the value of the `action.auto_create_index`
@@ -2845,7 +2845,7 @@ def update_aliases(
Adds a data stream or index to an alias.
- ``_
+ ``_
:param actions: Actions to perform.
:param master_timeout: Period to wait for a connection to the master node. If
@@ -2924,7 +2924,7 @@ def validate_query(
Validates a query without running it.
- ``_
+ ``_
:param index: Comma-separated list of data streams, indices, and aliases to search.
Supports wildcards (`*`). To search all data streams or indices, omit this
diff --git a/elasticsearch_serverless/_sync/client/inference.py b/elasticsearch_serverless/_sync/client/inference.py
index 5f455b9..73aff0b 100644
--- a/elasticsearch_serverless/_sync/client/inference.py
+++ b/elasticsearch_serverless/_sync/client/inference.py
@@ -25,6 +25,74 @@
class InferenceClient(NamespacedClient):
+ @_rewrite_parameters(
+ body_fields=("input", "task_settings"),
+ )
+ def completion(
+ self,
+ *,
+ inference_id: str,
+ input: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ pretty: t.Optional[bool] = None,
+ task_settings: t.Optional[t.Any] = None,
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
+ body: t.Optional[t.Dict[str, t.Any]] = None,
+ ) -> ObjectApiResponse[t.Any]:
+ """
+ .. raw:: html
+
+ Perform completion inference on the service
+
+
+ ``_
+
+ :param inference_id: The inference Id
+ :param input: Inference input. Either a string or an array of strings.
+ :param task_settings: Optional task settings
+ :param timeout: Specifies the amount of time to wait for the inference request
+ to complete.
+ """
+ if inference_id in SKIP_IN_PATH:
+ raise ValueError("Empty value passed for parameter 'inference_id'")
+ if input is None and body is None:
+ raise ValueError("Empty value passed for parameter 'input'")
+ __path_parts: t.Dict[str, str] = {"inference_id": _quote(inference_id)}
+ __path = f'/_inference/completion/{__path_parts["inference_id"]}'
+ __query: t.Dict[str, t.Any] = {}
+ __body: t.Dict[str, t.Any] = body if body is not None else {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if pretty is not None:
+ __query["pretty"] = pretty
+ if timeout is not None:
+ __query["timeout"] = timeout
+ if not __body:
+ if input is not None:
+ __body["input"] = input
+ if task_settings is not None:
+ __body["task_settings"] = task_settings
+ if not __body:
+ __body = None # type: ignore[assignment]
+ __headers = {"accept": "application/json"}
+ if __body is not None:
+ __headers["content-type"] = "application/json"
+ return self.perform_request( # type: ignore[return-value]
+ "POST",
+ __path,
+ params=__query,
+ headers=__headers,
+ body=__body,
+ endpoint_id="inference.completion",
+ path_parts=__path_parts,
+ )
+
@_rewrite_parameters()
def delete(
self,
@@ -33,7 +101,13 @@ def delete(
task_type: t.Optional[
t.Union[
str,
- t.Literal["completion", "rerank", "sparse_embedding", "text_embedding"],
+ t.Literal[
+ "chat_completion",
+ "completion",
+ "rerank",
+ "sparse_embedding",
+ "text_embedding",
+ ],
]
] = None,
dry_run: t.Optional[bool] = None,
@@ -102,7 +176,13 @@ def get(
task_type: t.Optional[
t.Union[
str,
- t.Literal["completion", "rerank", "sparse_embedding", "text_embedding"],
+ t.Literal[
+ "chat_completion",
+ "completion",
+ "rerank",
+ "sparse_embedding",
+ "text_embedding",
+ ],
]
] = None,
inference_id: t.Optional[str] = None,
@@ -155,24 +235,188 @@ def get(
)
@_rewrite_parameters(
- body_fields=("input", "query", "task_settings"),
+ body_name="inference_config",
)
- def inference(
+ def put(
self,
*,
inference_id: str,
- input: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ inference_config: t.Optional[t.Mapping[str, t.Any]] = None,
+ body: t.Optional[t.Mapping[str, t.Any]] = None,
task_type: t.Optional[
t.Union[
str,
- t.Literal["completion", "rerank", "sparse_embedding", "text_embedding"],
+ t.Literal[
+ "chat_completion",
+ "completion",
+ "rerank",
+ "sparse_embedding",
+ "text_embedding",
+ ],
]
] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
human: t.Optional[bool] = None,
pretty: t.Optional[bool] = None,
+ ) -> ObjectApiResponse[t.Any]:
+ """
+ .. raw:: html
+
+ Create an inference endpoint.
+ When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
+ After creating the endpoint, wait for the model deployment to complete before using it.
+ To verify the deployment status, use the get trained model statistics API.
+ Look for "state": "fully_allocated"
in the response and ensure that the "allocation_count"
matches the "target_allocation_count"
.
+ Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
+ IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.
+ For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.
+ However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.
+
+
+ ``_
+
+ :param inference_id: The inference Id
+ :param inference_config:
+ :param task_type: The task type
+ """
+ if inference_id in SKIP_IN_PATH:
+ raise ValueError("Empty value passed for parameter 'inference_id'")
+ if inference_config is None and body is None:
+ raise ValueError(
+ "Empty value passed for parameters 'inference_config' and 'body', one of them should be set."
+ )
+ elif inference_config is not None and body is not None:
+ raise ValueError("Cannot set both 'inference_config' and 'body'")
+ __path_parts: t.Dict[str, str]
+ if task_type not in SKIP_IN_PATH and inference_id not in SKIP_IN_PATH:
+ __path_parts = {
+ "task_type": _quote(task_type),
+ "inference_id": _quote(inference_id),
+ }
+ __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["inference_id"]}'
+ elif inference_id not in SKIP_IN_PATH:
+ __path_parts = {"inference_id": _quote(inference_id)}
+ __path = f'/_inference/{__path_parts["inference_id"]}'
+ else:
+ raise ValueError("Couldn't find a path for the given parameters")
+ __query: t.Dict[str, t.Any] = {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if pretty is not None:
+ __query["pretty"] = pretty
+ __body = inference_config if inference_config is not None else body
+ __headers = {"accept": "application/json", "content-type": "application/json"}
+ return self.perform_request( # type: ignore[return-value]
+ "PUT",
+ __path,
+ params=__query,
+ headers=__headers,
+ body=__body,
+ endpoint_id="inference.put",
+ path_parts=__path_parts,
+ )
+
+ @_rewrite_parameters(
+ body_fields=("service", "service_settings"),
+ )
+ def put_watsonx(
+ self,
+ *,
+ task_type: t.Union[str, t.Literal["text_embedding"]],
+ watsonx_inference_id: str,
+ service: t.Optional[t.Union[str, t.Literal["watsonxai"]]] = None,
+ service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ pretty: t.Optional[bool] = None,
+ body: t.Optional[t.Dict[str, t.Any]] = None,
+ ) -> ObjectApiResponse[t.Any]:
+ """
+ .. raw:: html
+
+ Create a Watsonx inference endpoint.
+ Creates an inference endpoint to perform an inference task with the watsonxai
service.
+ You need an IBM Cloud Databases for Elasticsearch deployment to use the watsonxai
inference service.
+ You can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.
+ When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
+ After creating the endpoint, wait for the model deployment to complete before using it.
+ To verify the deployment status, use the get trained model statistics API.
+ Look for "state": "fully_allocated"
in the response and ensure that the "allocation_count"
matches the "target_allocation_count"
.
+ Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
+
+
+ ``_
+
+ :param task_type: The task type. The only valid task type for the model to perform
+ is `text_embedding`.
+ :param watsonx_inference_id: The unique identifier of the inference endpoint.
+ :param service: The type of service supported for the specified task type. In
+ this case, `watsonxai`.
+ :param service_settings: Settings used to install the inference model. These
+ settings are specific to the `watsonxai` service.
+ """
+ if task_type in SKIP_IN_PATH:
+ raise ValueError("Empty value passed for parameter 'task_type'")
+ if watsonx_inference_id in SKIP_IN_PATH:
+ raise ValueError("Empty value passed for parameter 'watsonx_inference_id'")
+ if service is None and body is None:
+ raise ValueError("Empty value passed for parameter 'service'")
+ if service_settings is None and body is None:
+ raise ValueError("Empty value passed for parameter 'service_settings'")
+ __path_parts: t.Dict[str, str] = {
+ "task_type": _quote(task_type),
+ "watsonx_inference_id": _quote(watsonx_inference_id),
+ }
+ __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["watsonx_inference_id"]}'
+ __query: t.Dict[str, t.Any] = {}
+ __body: t.Dict[str, t.Any] = body if body is not None else {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if pretty is not None:
+ __query["pretty"] = pretty
+ if not __body:
+ if service is not None:
+ __body["service"] = service
+ if service_settings is not None:
+ __body["service_settings"] = service_settings
+ if not __body:
+ __body = None # type: ignore[assignment]
+ __headers = {"accept": "application/json"}
+ if __body is not None:
+ __headers["content-type"] = "application/json"
+ return self.perform_request( # type: ignore[return-value]
+ "PUT",
+ __path,
+ params=__query,
+ headers=__headers,
+ body=__body,
+ endpoint_id="inference.put_watsonx",
+ path_parts=__path_parts,
+ )
+
+ @_rewrite_parameters(
+ body_fields=("input", "query", "task_settings"),
+ )
+ def rerank(
+ self,
+ *,
+ inference_id: str,
+ input: t.Optional[t.Union[str, t.Sequence[str]]] = None,
query: t.Optional[str] = None,
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ pretty: t.Optional[bool] = None,
task_settings: t.Optional[t.Any] = None,
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
body: t.Optional[t.Dict[str, t.Any]] = None,
@@ -180,14 +424,7 @@ def inference(
"""
.. raw:: html
- Perform inference on the service.
- This API enables you to use machine learning models to perform specific tasks on data that you provide as an input.
- It returns a response with the results of the tasks.
- The inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.
-
- info
- The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.
-
+ Perform rereanking inference on the service
``_
@@ -196,9 +433,7 @@ def inference(
:param input: The text on which you want to perform the inference task. It can
be a single string or an array. > info > Inference endpoints for the `completion`
task type currently only support a single string as input.
- :param task_type: The type of inference task that the model performs.
- :param query: The query input, which is required only for the `rerank` task.
- It is not required for other tasks.
+ :param query: Query input.
:param task_settings: Task settings for the individual inference request. These
settings are specific to the task type you specified and override the task
settings specified when initializing the service.
@@ -208,18 +443,10 @@ def inference(
raise ValueError("Empty value passed for parameter 'inference_id'")
if input is None and body is None:
raise ValueError("Empty value passed for parameter 'input'")
- __path_parts: t.Dict[str, str]
- if task_type not in SKIP_IN_PATH and inference_id not in SKIP_IN_PATH:
- __path_parts = {
- "task_type": _quote(task_type),
- "inference_id": _quote(inference_id),
- }
- __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["inference_id"]}'
- elif inference_id not in SKIP_IN_PATH:
- __path_parts = {"inference_id": _quote(inference_id)}
- __path = f'/_inference/{__path_parts["inference_id"]}'
- else:
- raise ValueError("Couldn't find a path for the given parameters")
+ if query is None and body is None:
+ raise ValueError("Empty value passed for parameter 'query'")
+ __path_parts: t.Dict[str, str] = {"inference_id": _quote(inference_id)}
+ __path = f'/_inference/rerank/{__path_parts["inference_id"]}'
__query: t.Dict[str, t.Any] = {}
__body: t.Dict[str, t.Any] = body if body is not None else {}
if error_trace is not None:
@@ -250,71 +477,48 @@ def inference(
params=__query,
headers=__headers,
body=__body,
- endpoint_id="inference.inference",
+ endpoint_id="inference.rerank",
path_parts=__path_parts,
)
@_rewrite_parameters(
- body_name="inference_config",
+ body_fields=("input", "task_settings"),
)
- def put(
+ def sparse_embedding(
self,
*,
inference_id: str,
- inference_config: t.Optional[t.Mapping[str, t.Any]] = None,
- body: t.Optional[t.Mapping[str, t.Any]] = None,
- task_type: t.Optional[
- t.Union[
- str,
- t.Literal["completion", "rerank", "sparse_embedding", "text_embedding"],
- ]
- ] = None,
+ input: t.Optional[t.Union[str, t.Sequence[str]]] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
human: t.Optional[bool] = None,
pretty: t.Optional[bool] = None,
+ task_settings: t.Optional[t.Any] = None,
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
+ body: t.Optional[t.Dict[str, t.Any]] = None,
) -> ObjectApiResponse[t.Any]:
"""
.. raw:: html
- Create an inference endpoint.
- When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
- After creating the endpoint, wait for the model deployment to complete before using it.
- To verify the deployment status, use the get trained model statistics API.
- Look for "state": "fully_allocated"
in the response and ensure that the "allocation_count"
matches the "target_allocation_count"
.
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
- IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.
- For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.
- However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.
+ Perform sparse embedding inference on the service
- ``_
+ ``_
:param inference_id: The inference Id
- :param inference_config:
- :param task_type: The task type
+ :param input: Inference input. Either a string or an array of strings.
+ :param task_settings: Optional task settings
+ :param timeout: Specifies the amount of time to wait for the inference request
+ to complete.
"""
if inference_id in SKIP_IN_PATH:
raise ValueError("Empty value passed for parameter 'inference_id'")
- if inference_config is None and body is None:
- raise ValueError(
- "Empty value passed for parameters 'inference_config' and 'body', one of them should be set."
- )
- elif inference_config is not None and body is not None:
- raise ValueError("Cannot set both 'inference_config' and 'body'")
- __path_parts: t.Dict[str, str]
- if task_type not in SKIP_IN_PATH and inference_id not in SKIP_IN_PATH:
- __path_parts = {
- "task_type": _quote(task_type),
- "inference_id": _quote(inference_id),
- }
- __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["inference_id"]}'
- elif inference_id not in SKIP_IN_PATH:
- __path_parts = {"inference_id": _quote(inference_id)}
- __path = f'/_inference/{__path_parts["inference_id"]}'
- else:
- raise ValueError("Couldn't find a path for the given parameters")
+ if input is None and body is None:
+ raise ValueError("Empty value passed for parameter 'input'")
+ __path_parts: t.Dict[str, str] = {"inference_id": _quote(inference_id)}
+ __path = f'/_inference/sparse_embedding/{__path_parts["inference_id"]}'
__query: t.Dict[str, t.Any] = {}
+ __body: t.Dict[str, t.Any] = body if body is not None else {}
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -323,14 +527,92 @@ def put(
__query["human"] = human
if pretty is not None:
__query["pretty"] = pretty
- __body = inference_config if inference_config is not None else body
- __headers = {"accept": "application/json", "content-type": "application/json"}
+ if timeout is not None:
+ __query["timeout"] = timeout
+ if not __body:
+ if input is not None:
+ __body["input"] = input
+ if task_settings is not None:
+ __body["task_settings"] = task_settings
+ if not __body:
+ __body = None # type: ignore[assignment]
+ __headers = {"accept": "application/json"}
+ if __body is not None:
+ __headers["content-type"] = "application/json"
return self.perform_request( # type: ignore[return-value]
- "PUT",
+ "POST",
__path,
params=__query,
headers=__headers,
body=__body,
- endpoint_id="inference.put",
+ endpoint_id="inference.sparse_embedding",
+ path_parts=__path_parts,
+ )
+
+ @_rewrite_parameters(
+ body_fields=("input", "task_settings"),
+ )
+ def text_embedding(
+ self,
+ *,
+ inference_id: str,
+ input: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ pretty: t.Optional[bool] = None,
+ task_settings: t.Optional[t.Any] = None,
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
+ body: t.Optional[t.Dict[str, t.Any]] = None,
+ ) -> ObjectApiResponse[t.Any]:
+ """
+ .. raw:: html
+
+ Perform text embedding inference on the service
+
+
+ ``_
+
+ :param inference_id: The inference Id
+ :param input: Inference input. Either a string or an array of strings.
+ :param task_settings: Optional task settings
+ :param timeout: Specifies the amount of time to wait for the inference request
+ to complete.
+ """
+ if inference_id in SKIP_IN_PATH:
+ raise ValueError("Empty value passed for parameter 'inference_id'")
+ if input is None and body is None:
+ raise ValueError("Empty value passed for parameter 'input'")
+ __path_parts: t.Dict[str, str] = {"inference_id": _quote(inference_id)}
+ __path = f'/_inference/text_embedding/{__path_parts["inference_id"]}'
+ __query: t.Dict[str, t.Any] = {}
+ __body: t.Dict[str, t.Any] = body if body is not None else {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if pretty is not None:
+ __query["pretty"] = pretty
+ if timeout is not None:
+ __query["timeout"] = timeout
+ if not __body:
+ if input is not None:
+ __body["input"] = input
+ if task_settings is not None:
+ __body["task_settings"] = task_settings
+ if not __body:
+ __body = None # type: ignore[assignment]
+ __headers = {"accept": "application/json"}
+ if __body is not None:
+ __headers["content-type"] = "application/json"
+ return self.perform_request( # type: ignore[return-value]
+ "POST",
+ __path,
+ params=__query,
+ headers=__headers,
+ body=__body,
+ endpoint_id="inference.text_embedding",
path_parts=__path_parts,
)
diff --git a/elasticsearch_serverless/_sync/client/ml.py b/elasticsearch_serverless/_sync/client/ml.py
index 6a15802..3bae42b 100644
--- a/elasticsearch_serverless/_sync/client/ml.py
+++ b/elasticsearch_serverless/_sync/client/ml.py
@@ -1543,7 +1543,6 @@ def get_trained_models(
],
]
] = None,
- include_model_definition: t.Optional[bool] = None,
pretty: t.Optional[bool] = None,
size: t.Optional[int] = None,
tags: t.Optional[t.Union[str, t.Sequence[str]]] = None,
@@ -1573,8 +1572,6 @@ def get_trained_models(
:param from_: Skips the specified number of models.
:param include: A comma delimited string of optional fields to include in the
response body.
- :param include_model_definition: parameter is deprecated! Use [include=definition]
- instead
:param size: Specifies the maximum number of models to obtain.
:param tags: A comma delimited string of tags. A trained model can have many
tags, or none. When supplied, only trained models that contain all the supplied
@@ -1604,8 +1601,6 @@ def get_trained_models(
__query["human"] = human
if include is not None:
__query["include"] = include
- if include_model_definition is not None:
- __query["include_model_definition"] = include_model_definition
if pretty is not None:
__query["pretty"] = pretty
if size is not None: