diff --git a/packages/tasks-gen/package.json b/packages/tasks-gen/package.json index 81f836b6f0..65784baef7 100644 --- a/packages/tasks-gen/package.json +++ b/packages/tasks-gen/package.json @@ -15,13 +15,16 @@ "inference-codegen": "tsx scripts/inference-codegen.ts && prettier --write ../tasks/src/tasks/*/inference.ts", "inference-tgi-import": "tsx scripts/inference-tgi-import.ts && prettier --write ../tasks/src/tasks/text-generation/spec/*.json && prettier --write ../tasks/src/tasks/chat-completion/spec/*.json", "inference-tei-import": "tsx scripts/inference-tei-import.ts && prettier --write ../tasks/src/tasks/feature-extraction/spec/*.json", + "inference-oai-import": "tsx scripts/inference-oai-import.ts && prettier --write ../tasks/src/tasks/chat-completion/spec-oai/*.json", "test": "vitest run" }, "type": "module", "author": "Hugging Face", "license": "MIT", "devDependencies": { + "@types/js-yaml": "^4.0.9", "@types/node": "^20.11.5", + "js-yaml": "^4.1.0", "quicktype-core": "https://github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz", "type-fest": "^3.13.1" }, diff --git a/packages/tasks-gen/scripts/inference-codegen.ts b/packages/tasks-gen/scripts/inference-codegen.ts index 349b2870a2..4325c6a7dd 100644 --- a/packages/tasks-gen/scripts/inference-codegen.ts +++ b/packages/tasks-gen/scripts/inference-codegen.ts @@ -1,8 +1,8 @@ -import type { SerializedRenderResult } from "quicktype-core"; -import { quicktype, InputData, JSONSchemaInput, FetchingJSONSchemaStore } from "quicktype-core"; -import * as fs from "node:fs/promises"; import { existsSync as pathExists } from "node:fs"; +import * as fs from "node:fs/promises"; import * as path from "node:path/posix"; +import type { SerializedRenderResult } from "quicktype-core"; +import { FetchingJSONSchemaStore, InputData, JSONSchemaInput, quicktype } from "quicktype-core"; import ts from "typescript"; const TYPESCRIPT_HEADER_FILE = ` @@ -272,15 +272,21 @@ const allTasks = await Promise.all( .filter((entry) => entry.name !== "placeholder") .map(async (entry) => ({ task: entry.name, dirPath: path.join(entry.path, entry.name) })) ); +//For DEMO purposes only: for chat-completion, let's use the specs in \spec-oai const allSpecFiles = [ path.join(tasksDir, "common-definitions.json"), ...allTasks - .flatMap(({ dirPath }) => [path.join(dirPath, "spec", "input.json"), path.join(dirPath, "spec", "output.json")]) + .flatMap(({ task, dirPath }) => { + const specDirName = task === "chat-completion" ? "spec-oai" : "spec"; + const specPath = path.join(dirPath, specDirName); + return [path.join(specPath, "input.json"), path.join(specPath, "output.json")]; + }) .filter((filepath) => pathExists(filepath)), ]; for (const { task, dirPath } of allTasks) { - const taskSpecDir = path.join(dirPath, "spec"); + const specDirName = task === "chat-completion" ? "spec-oai" : "spec"; + const taskSpecDir = path.join(dirPath, specDirName); if (!(pathExists(path.join(taskSpecDir, "input.json")) && pathExists(path.join(taskSpecDir, "output.json")))) { console.debug(`No spec found for task ${task} - skipping`); continue; diff --git a/packages/tasks-gen/scripts/inference-oai-import.ts b/packages/tasks-gen/scripts/inference-oai-import.ts new file mode 100644 index 0000000000..2535930eb5 --- /dev/null +++ b/packages/tasks-gen/scripts/inference-oai-import.ts @@ -0,0 +1,208 @@ +/* + * Fetches OAI specs and generates JSON schema for input, output and stream_output of + * text-generation and chat-completion tasks. + * See https://platform.openai.com/docs/api-reference/chat/create + */ +import fs from "fs/promises"; +import yaml from "js-yaml"; +import { existsSync as pathExists } from "node:fs"; +import * as path from "node:path/posix"; +import type { JsonObject, JsonValue } from "type-fest"; + +const URL = "https://raw.githubusercontent.com/openai/openai-openapi/refs/heads/master/openapi.yaml"; + +const rootDirFinder = function (): string { + let currentPath = path.normalize(import.meta.url); + + while (currentPath !== "/") { + if (pathExists(path.join(currentPath, "package.json"))) { + return currentPath; + } + + currentPath = path.normalize(path.join(currentPath, "..")); + } + + return "/"; +}; + +const rootDir = path.join(rootDirFinder(), "..", "tasks"); +const tasksDir = path.join(rootDir, "src", "tasks"); + +function toCamelCase(str: string, joiner = "") { + return str + .split(/[-_]/) + .map((part) => part.charAt(0).toUpperCase() + part.slice(1)) + .join(joiner); +} + +function nameNestedObjects(schema: JsonValue, parentName: string, parentKey?: string): void { + if (!schema || typeof schema !== "object") { + return; + } + + if (Array.isArray(schema)) { + for (const item of schema) { + nameNestedObjects(item, parentName); + } + return; + } + + // Process object properties + for (const [key, value] of Object.entries(schema)) { + if (!value || typeof value !== "object") { + continue; + } + + if (key === "properties" && typeof value === "object" && !Array.isArray(value)) { + // Process each property that has a nested object type + for (const [propKey, propValue] of Object.entries(value)) { + if (propValue && typeof propValue === "object" && !Array.isArray(propValue)) { + const propObj = propValue as JsonObject; + + // For object types without a title or a ref + if (propObj.type === "object" && !propObj.title && !propObj.$ref) { + const typeName = `${parentName}${toCamelCase(propKey)}`; + propObj.title = typeName; + } + + nameNestedObjects(propValue, parentName, propKey); + } + } + } else if (key === "items" && typeof value === "object") { + const itemObj = value as JsonObject; + if (itemObj.type === "object" && !itemObj.title && !itemObj.$ref) { + const typeName = `${parentName}${parentKey ? toCamelCase(parentKey) : ""}Item`; + itemObj.title = typeName; + } + nameNestedObjects(value, parentName, parentKey); + } else { + const nextParentName = + key === "definitions" || key === "$defs" + ? parentName + : key === "properties" + ? parentName + : ((value as JsonObject).title as string) || parentName; + + nameNestedObjects(value, nextParentName, key); + } + } +} + +async function _extractAndAdapt(task: string, mainComponentName: string, type: "input" | "output" | "stream_output") { + console.debug(`✨ Importing`, task, type); + + console.debug(" 📥 Fetching OpenAI specs"); + const response = await fetch(URL); + + const openapi = yaml.load(await response.text()) as any; + const components: Record = openapi["components"]["schemas"]; + + const camelName = toCamelCase(task); + const camelFullName = camelName + toCamelCase(type); + const mainComponent = components[mainComponentName]; + const filteredComponents: Record = {}; + + function _scan(data: JsonValue) { + if (Array.isArray(data) || data instanceof Array) { + for (const item of data) { + _scan(item); + } + } else if (data && typeof data === "object") { + for (const key of Object.keys(data)) { + // Only process external $refs pointing to components + if (key === "$ref" && typeof data[key] === "string" && !(data[key] as string).startsWith("#/$defs/")) { + const ref = (data[key] as string).split("/").pop() ?? ""; + if (!components[ref]) { + // If the ref doesn't exist in the original components, it might be a mistake or an internal ref we should ignore + console.warn(` ⚠️ Reference not found in original components, skipping: ${data[key]}`); + continue; + } + + // Add reference to components to export (and scan it too) + let newRef = camelFullName + ref.replace(camelName, ""); + newRef = newRef.replace("InputInput", "Input").replace("OutputOutput", "Output"); + if (!filteredComponents[newRef]) { + const componentCopy = JSON.parse(JSON.stringify(components[ref])); + componentCopy["title"] = newRef; + filteredComponents[newRef] = componentCopy; + _scan(componentCopy); + } + + // Updating the reference to new format + data[key] = `#/$defs/${newRef}`; + } else if (key !== "$ref") { + _scan(data[key]); + } + } + } + } + + console.debug(" 🏗️ Merging component definitions"); + const mergedProperties: Record = {}; + const mergedRequired = new Set(); + + if (mainComponent.allOf && Array.isArray(mainComponent.allOf)) { + for (const part of mainComponent.allOf) { + let componentPart: JsonObject | undefined; + if (part.$ref && typeof part.$ref === "string") { + const ref = part.$ref.split("/").pop() ?? ""; + if (!components[ref]) { + throw new Error(`Reference not found in components during allOf merge: ${part.$ref}`); + } + componentPart = components[ref]; + } else { + componentPart = part; + } + + if (componentPart?.properties && typeof componentPart.properties === "object") { + Object.assign(mergedProperties, componentPart.properties); + } + // Merge required fields + if (componentPart?.required && Array.isArray(componentPart.required)) { + // Ensure req is treated as string, as required fields should be strings + componentPart.required.forEach((req: JsonValue) => { + if (typeof req === "string") { + mergedRequired.add(req); + } + }); + } + } + } else { + // Fallback if no allOf + if (mainComponent.properties) { + Object.assign(mergedProperties, mainComponent.properties); + } + if (mainComponent.required && Array.isArray(mainComponent.required)) { + mainComponent.required.forEach((req: string) => mergedRequired.add(req)); + } + } + + console.debug(" 📦 Packaging jsonschema and scanning final properties"); + const inputSchema: JsonObject = { + $id: `/inference/schemas/${task}/${type}.json`, + $schema: "http://json-schema.org/draft-06/schema#", + description: `${toCamelCase(task, " ")} ${toCamelCase( + type, + " " + )}.\n\nAuto-generated from OAI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-oai-import.ts.`, + title: camelFullName, + type: "object", + required: Array.from(mergedRequired), + properties: mergedProperties, + $defs: filteredComponents, + }; + + _scan(inputSchema.properties); + + console.debug(" 🧩 Naming nested objects to avoid random prefixes"); + nameNestedObjects(inputSchema, camelFullName); + + const specPath = path.join(tasksDir, task, "spec-oai", `${type}.json`); + console.debug(" 📂 Exporting", specPath); + await fs.writeFile(specPath, JSON.stringify(inputSchema, null, 4)); +} + +await _extractAndAdapt("chat-completion", "CreateChatCompletionRequest", "input"); +await _extractAndAdapt("chat-completion", "CreateChatCompletionResponse", "output"); +await _extractAndAdapt("chat-completion", "CreateChatCompletionStreamResponse", "stream_output"); +console.debug("✅ All done!"); diff --git a/packages/tasks/.python_generated/chat_completion.py b/packages/tasks/.python_generated/chat_completion.py new file mode 100644 index 0000000000..326c1f341e --- /dev/null +++ b/packages/tasks/.python_generated/chat_completion.py @@ -0,0 +1,1118 @@ + +# Inference code generated from the JSON schema spec in @huggingface/tasks. +# +# See: +# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts +# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks. + +from typing import Literal, Optional, Dict, Any, List, Union +from dataclasses import dataclass + + +AudioFormat = Literal["wav", "mp3", "flac", "opus", "pcm16"] + + +@dataclass +class ChatCompletionInputAudio: + """Parameters for audio output. Required when audio output is requested with + `modalities: ["audio"]`. [Learn more](/docs/guides/audio). + """ + format: 'AudioFormat' + """Specifies the output audio format. Must be one of `wav`, `mp3`, `flac`, + `opus`, or `pcm16`. + """ + voice: str + """The voice the model uses to respond. Supported voices are + `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, and `shimmer`. + """ + + +@dataclass +class ChatCompletionInputFunctionCallOption: + """Specifying a particular function via `{"name": "my_function"}` forces the model to call + that function. + """ + name: str + """The name of the function to call.""" + + +FunctionCallEnum = Literal["none", "auto"] + + +@dataclass +class ChatCompletionInputFunctions: + name: str + """The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and + dashes, with a maximum length of 64. + """ + description: Optional[str] + """A description of what the function does, used by the model to choose when and how to call + the function. + """ + parameters: Optional[Dict[str, Any]] + + +@dataclass +class ChatCompletionInputRequestAssistantMessageAudio: + """Data about a previous audio response from the model. + [Learn more](/docs/guides/audio). + """ + id: str + """Unique identifier for a previous audio response from the model.""" + + +@dataclass +class ChatCompletionInputRequestMessageContentPartFileFile: + file_data: Optional[str] + """The base64 encoded file data, used when passing the file to the model + as a string. + """ + file_id: Optional[str] + """The ID of an uploaded file to use as input.""" + filename: Optional[str] + """The name of the file, used when passing the file to the model as a + string. + """ + + +Detail = Literal["auto", "low", "high"] + + +@dataclass +class ChatCompletionInputRequestMessageContentPartImageImageURL: + url: str + """Either a URL of the image or the base64 encoded image data.""" + detail: Optional['Detail'] + """Specifies the detail level of the image. Learn more in the [Vision + guide](/docs/guides/vision#low-or-high-fidelity-image-understanding). + """ + + +InputAudioFormat = Literal["wav", "mp3"] + + +@dataclass +class ChatCompletionInputRequestMessageContentPartAudioInputAudio: + data: str + """Base64 encoded audio data.""" + format: 'InputAudioFormat' + """The format of the encoded audio data. Currently supports "wav" and "mp3".""" + + +PurpleType = Literal["text", "image_url", "input_audio", "file", "refusal"] + + +@dataclass +class ChatCompletionInputRequestMessageContentPart: + """An array of content parts with a defined type. For developer messages, only type `text` + is supported. + + Learn about [text inputs](/docs/guides/text-generation). + + + An array of content parts with a defined type. Supported options differ based on the + [model](/docs/models) being used to generate the response. Can contain text inputs. + + An array of content parts with a defined type. For system messages, only type `text` is + supported. + + An array of content parts with a defined type. For tool messages, only type `text` is + supported. + + An array of content parts with a defined type. Supported options differ based on the + [model](/docs/models) being used to generate the response. Can contain text, image, or + audio inputs. + + Learn about [image inputs](/docs/guides/vision). + + + Learn about [audio inputs](/docs/guides/audio). + + + Learn about [file inputs](/docs/guides/text) for text generation. + + + An array of content parts with a defined type. Can be one or more of type `text`, or + exactly one of type `refusal`. + """ + type: 'PurpleType' + """The type of the content part. + + The type of the content part. Always `input_audio`. + + The type of the content part. Always `file`. + """ + file: Optional[ChatCompletionInputRequestMessageContentPartFileFile] + image_url: Optional[ChatCompletionInputRequestMessageContentPartImageImageURL] + input_audio: Optional[ChatCompletionInputRequestMessageContentPartAudioInputAudio] + refusal: Optional[str] + """The refusal message generated by the model.""" + text: Optional[str] + """The text content.""" + + +@dataclass +class ChatCompletionInputRequestAssistantMessageFunctionCall: + """Deprecated and replaced by `tool_calls`. The name and arguments of a function that should + be called, as generated by the model. + """ + arguments: str + """The arguments to call the function with, as generated by the model in JSON format. Note + that the model does not always generate valid JSON, and may hallucinate parameters not + defined by your function schema. Validate the arguments in your code before calling your + function. + """ + name: str + """The name of the function to call.""" + + +ChatCompletionInputRequestMessageRole = Literal["developer", "system", "user", "assistant", "tool", "function"] + + +@dataclass +class ChatCompletionInputMessageToolCallFunction: + """The function that the model called.""" + arguments: str + """The arguments to call the function with, as generated by the model in JSON format. Note + that the model does not always generate valid JSON, and may hallucinate parameters not + defined by your function schema. Validate the arguments in your code before calling your + function. + """ + name: str + """The name of the function to call.""" + + +ToolCallType = Literal["function"] + + +@dataclass +class ChatCompletionInputMessageToolCall: + """The tool calls generated by the model, such as function calls.""" + function: ChatCompletionInputMessageToolCallFunction + """The function that the model called.""" + id: str + """The ID of the tool call.""" + type: 'ToolCallType' + """The type of the tool. Currently, only `function` is supported.""" + + +@dataclass +class ChatCompletionInputRequestMessage: + """Developer-provided instructions that the model should follow, regardless of + messages sent by the user. With o1 models and newer, `developer` messages + replace the previous `system` messages. + + + Developer-provided instructions that the model should follow, regardless of + messages sent by the user. With o1 models and newer, use `developer` messages + for this purpose instead. + + + Messages sent by an end user, containing prompts or additional context + information. + + + Messages sent by the model in response to user messages. + """ + role: 'ChatCompletionInputRequestMessageRole' + """The role of the messages author, in this case `developer`. + + The role of the messages author, in this case `system`. + + The role of the messages author, in this case `user`. + + The role of the messages author, in this case `assistant`. + + The role of the messages author, in this case `tool`. + + The role of the messages author, in this case `function`. + """ + audio: Optional[ChatCompletionInputRequestAssistantMessageAudio] + """Data about a previous audio response from the model. + [Learn more](/docs/guides/audio). + """ + content: Optional[Union[List[ChatCompletionInputRequestMessageContentPart], str]] + """The contents of the developer message. + + The contents of the system message. + + The contents of the user message. + + + The contents of the assistant message. Required unless `tool_calls` or `function_call` is + specified. + + + The contents of the tool message. + + The contents of the function message. + """ + function_call: Optional[ChatCompletionInputRequestAssistantMessageFunctionCall] + """Deprecated and replaced by `tool_calls`. The name and arguments of a function that should + be called, as generated by the model. + """ + name: Optional[str] + """An optional name for the participant. Provides the model information to differentiate + between participants of the same role. + + The name of the function to call. + """ + refusal: Optional[str] + """The refusal message by the assistant.""" + tool_call_id: Optional[str] + """Tool call that this message is responding to.""" + tool_calls: Optional[List[ChatCompletionInputMessageToolCall]] + + +ChatCompletionInputResponseModality = Literal["text", "audio"] + + +FluffyType = Literal["text"] + + +@dataclass +class ChatCompletionInputRequest: + """An array of content parts with a defined type. For developer messages, only type `text` + is supported. + + Learn about [text inputs](/docs/guides/text-generation). + + + An array of content parts with a defined type. Supported options differ based on the + [model](/docs/models) being used to generate the response. Can contain text inputs. + + An array of content parts with a defined type. For system messages, only type `text` is + supported. + + An array of content parts with a defined type. For tool messages, only type `text` is + supported. + """ + text: str + """The text content.""" + type: 'FluffyType' + """The type of the content part.""" + + +PredictionType = Literal["content"] + + +@dataclass +class ChatCompletionInputPredictionContent: + """Configuration for a [Predicted Output](/docs/guides/predicted-outputs), + which can greatly improve response times when large parts of the model + response are known ahead of time. This is most common when you are + regenerating a file with only minor changes to most of the content. + + + Static predicted output content, such as the content of a text file that is + being regenerated. + """ + content: Union[List[ChatCompletionInputRequest], str] + """The content that should be matched when generating a model response. + If generated tokens would match this content, the entire model response + can be returned much more quickly. + """ + type: 'PredictionType' + """The type of the predicted content you want to provide. This type is + currently always `content`. + """ + + +ReasoningEffortEnum = Literal["low", "medium", "high"] + + +@dataclass +class JSONSchema: + """Structured Outputs configuration options, including a JSON Schema.""" + name: str + """The name of the response format. Must be a-z, A-Z, 0-9, or contain + underscores and dashes, with a maximum length of 64. + """ + description: Optional[str] + """A description of what the response format is for, used by the model to + determine how to respond in the format. + """ + schema: Optional[Dict[str, Any]] + strict: Optional[bool] + """Whether to enable strict schema adherence when generating the output. + If set to true, the model will always follow the exact schema defined + in the `schema` field. Only a subset of JSON Schema is supported when + `strict` is `true`. To learn more, read the [Structured Outputs + guide](/docs/guides/structured-outputs). + """ + + +ResponseFormatType = Literal["text", "json_schema", "json_object"] + + +@dataclass +class ChatCompletionInputResponseFormat: + """An object specifying the format that the model must output. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables + Structured Outputs which ensures the model will match your supplied JSON + schema. Learn more in the [Structured Outputs + guide](/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables the older JSON mode, which + ensures the message the model generates is valid JSON. Using `json_schema` + is preferred for models that support it. + + + Default response format. Used to generate text responses. + + + JSON Schema response format. Used to generate structured JSON responses. + Learn more about [Structured Outputs](/docs/guides/structured-outputs). + + + JSON object response format. An older method of generating JSON responses. + Using `json_schema` is recommended for models that support it. Note that the + model will not generate JSON without a system or user message instructing it + to do so. + """ + type: 'ResponseFormatType' + """The type of response format being defined. Always `text`. + + The type of response format being defined. Always `json_schema`. + + The type of response format being defined. Always `json_object`. + """ + json_schema: Optional[JSONSchema] + """Structured Outputs configuration options, including a JSON Schema.""" + + +ChatCompletionInputServiceTier = Literal["auto", "default"] + + +@dataclass +class ChatCompletionInputStreamOptions: + """Options for streaming response. Only set this when you set `stream: true`.""" + include_usage: Optional[bool] + """If set, an additional chunk will be streamed before the `data: [DONE]` + message. The `usage` field on this chunk shows the token usage statistics + for the entire request, and the `choices` field will always be an empty + array. + + All other chunks will also include a `usage` field, but with a null + value. **NOTE:** If the stream is interrupted, you may not receive the + final usage chunk which contains the total token usage for the request. + """ + + +@dataclass +class ChatCompletionInputNamedToolChoiceFunction: + name: str + """The name of the function to call.""" + + +@dataclass +class ChatCompletionInputNamedToolChoice: + """Specifies a tool the model should use. Use to force the model to call a specific function.""" + function: ChatCompletionInputNamedToolChoiceFunction + type: 'ToolCallType' + """The type of the tool. Currently, only `function` is supported.""" + + +ChatCompletionInputToolChoiceOptionEnum = Literal["none", "auto", "required"] + + +@dataclass +class ChatCompletionInputFunctionObject: + name: str + """The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and + dashes, with a maximum length of 64. + """ + description: Optional[str] + """A description of what the function does, used by the model to choose when and how to call + the function. + """ + parameters: Optional[Dict[str, Any]] + strict: Optional[bool] + """Whether to enable strict schema adherence when generating the function call. If set to + true, the model will follow the exact schema defined in the `parameters` field. Only a + subset of JSON Schema is supported when `strict` is `true`. Learn more about Structured + Outputs in the [function calling guide](docs/guides/function-calling). + """ + + +@dataclass +class ChatCompletionInputTool: + function: ChatCompletionInputFunctionObject + type: 'ToolCallType' + """The type of the tool. Currently, only `function` is supported.""" + + +@dataclass +class ChatCompletionInputWebSearchLocation: + """Approximate location parameters for the search.""" + city: Optional[str] + """Free text input for the city of the user, e.g. `San Francisco`.""" + country: Optional[str] + """The two-letter + [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of the user, + e.g. `US`. + """ + region: Optional[str] + """Free text input for the region of the user, e.g. `California`.""" + timezone: Optional[str] + """The [IANA timezone](https://timeapi.io/documentation/iana-timezones) + of the user, e.g. `America/Los_Angeles`. + """ + + +UserLocationType = Literal["approximate"] + + +@dataclass +class ChatCompletionInputUserLocation: + """Approximate location parameters for the search.""" + approximate: ChatCompletionInputWebSearchLocation + type: 'UserLocationType' + """The type of location approximation. Always `approximate`.""" + + +@dataclass +class WebSearch: + """This tool searches the web for relevant results to use in a response. + Learn more about the [web search tool](/docs/guides/tools-web-search?api-mode=chat). + """ + search_context_size: Optional['ReasoningEffortEnum'] + user_location: Optional[ChatCompletionInputUserLocation] + """Approximate location parameters for the search.""" + + +@dataclass +class ChatCompletionInput: + """Chat Completion Input. + + Auto-generated from OAI specs. + For more details, check out + https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-oai-import.ts. + """ + messages: List[ChatCompletionInputRequestMessage] + """A list of messages comprising the conversation so far. Depending on the + [model](/docs/models) you use, different message types (modalities) are + supported, like [text](/docs/guides/text-generation), + [images](/docs/guides/vision), and [audio](/docs/guides/audio). + """ + model: str + """Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI + offers a wide range of models with different capabilities, performance + characteristics, and price points. Refer to the [model guide](/docs/models) + to browse and compare available models. + """ + audio: Optional[ChatCompletionInputAudio] + """Parameters for audio output. Required when audio output is requested with + `modalities: ["audio"]`. [Learn more](/docs/guides/audio). + """ + frequency_penalty: Optional[float] + """Number between -2.0 and 2.0. Positive values penalize new tokens based on + their existing frequency in the text so far, decreasing the model's + likelihood to repeat the same line verbatim. + """ + function_call: Optional[Union[ChatCompletionInputFunctionCallOption, 'FunctionCallEnum']] + """Deprecated in favor of `tool_choice`. + + Controls which (if any) function is called by the model. + + `none` means the model will not call a function and instead generates a + message. + + `auto` means the model can pick between generating a message or calling a + function. + + Specifying a particular function via `{"name": "my_function"}` forces the + model to call that function. + + `none` is the default when no functions are present. `auto` is the default + if functions are present. + """ + functions: Optional[List[ChatCompletionInputFunctions]] + """Deprecated in favor of `tools`. + + A list of functions the model may generate JSON inputs for. + """ + logit_bias: Optional[Dict[str, int]] + """Modify the likelihood of specified tokens appearing in the completion. + + Accepts a JSON object that maps tokens (specified by their token ID in the + tokenizer) to an associated bias value from -100 to 100. Mathematically, + the bias is added to the logits generated by the model prior to sampling. + The exact effect will vary per model, but values between -1 and 1 should + decrease or increase likelihood of selection; values like -100 or 100 + should result in a ban or exclusive selection of the relevant token. + """ + logprobs: Optional[bool] + """Whether to return log probabilities of the output tokens or not. If true, + returns the log probabilities of each output token returned in the + `content` of `message`. + """ + max_completion_tokens: Optional[int] + """An upper bound for the number of tokens that can be generated for a completion, including + visible output tokens and [reasoning tokens](/docs/guides/reasoning). + """ + max_tokens: Optional[int] + """The maximum number of [tokens](/tokenizer) that can be generated in the + chat completion. This value can be used to control + [costs](https://openai.com/api/pricing/) for text generated via API. + + This value is now deprecated in favor of `max_completion_tokens`, and is + not compatible with [o1 series models](/docs/guides/reasoning). + """ + modalities: Optional[List['ChatCompletionInputResponseModality']] + n: Optional[int] + """How many chat completion choices to generate for each input message. Note that you will + be charged based on the number of generated tokens across all of the choices. Keep `n` as + `1` to minimize costs. + """ + parallel_tool_calls: Optional[bool] + prediction: Optional[ChatCompletionInputPredictionContent] + """Configuration for a [Predicted Output](/docs/guides/predicted-outputs), + which can greatly improve response times when large parts of the model + response are known ahead of time. This is most common when you are + regenerating a file with only minor changes to most of the content. + """ + presence_penalty: Optional[float] + """Number between -2.0 and 2.0. Positive values penalize new tokens based on + whether they appear in the text so far, increasing the model's likelihood + to talk about new topics. + """ + reasoning_effort: Optional['ReasoningEffortEnum'] + response_format: Optional[ChatCompletionInputResponseFormat] + """An object specifying the format that the model must output. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables + Structured Outputs which ensures the model will match your supplied JSON + schema. Learn more in the [Structured Outputs + guide](/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables the older JSON mode, which + ensures the message the model generates is valid JSON. Using `json_schema` + is preferred for models that support it. + """ + seed: Optional[int] + """This feature is in Beta. + If specified, our system will make a best effort to sample deterministically, such that + repeated requests with the same `seed` and parameters should return the same result. + Determinism is not guaranteed, and you should refer to the `system_fingerprint` response + parameter to monitor changes in the backend. + """ + service_tier: Optional['ChatCompletionInputServiceTier'] + """Specifies the latency tier to use for processing the request. This parameter is relevant + for customers subscribed to the scale tier service: + - If set to 'auto', and the Project is Scale tier enabled, the system + will utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will be + processed using the default service tier with a lower uptime SLA and no latency + guarentee. + - If set to 'default', the request will be processed using the default service tier with + a lower uptime SLA and no latency guarentee. + - When not set, the default behavior is 'auto'. + + When this parameter is set, the response body will include the `service_tier` utilized. + """ + stop: Optional[Union[List[str], str]] + store: Optional[bool] + """Whether or not to store the output of this chat completion request for + use in our [model distillation](/docs/guides/distillation) or + [evals](/docs/guides/evals) products. + """ + stream: Optional[bool] + """If set to true, the model response data will be streamed to the client + as it is generated using [server-sent + events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + See the [Streaming section below](/docs/api-reference/chat/streaming) + for more information, along with the [streaming + responses](/docs/guides/streaming-responses) + guide for more information on how to handle the streaming events. + """ + stream_options: Optional[ChatCompletionInputStreamOptions] + tool_choice: Optional[Union[ChatCompletionInputNamedToolChoice, 'ChatCompletionInputToolChoiceOptionEnum']] + tools: Optional[List[ChatCompletionInputTool]] + """A list of tools the model may call. Currently, only functions are supported as a tool. + Use this to provide a list of functions the model may generate JSON inputs for. A max of + 128 functions are supported. + """ + top_logprobs: Optional[int] + """An integer between 0 and 20 specifying the number of most likely tokens to + return at each token position, each with an associated log probability. + `logprobs` must be set to `true` if this parameter is used. + """ + web_search_options: Optional[WebSearch] + """This tool searches the web for relevant results to use in a response. + Learn more about the [web search tool](/docs/guides/tools-web-search?api-mode=chat). + """ + + +FinishReason = Literal["stop", "length", "tool_calls", "content_filter", "function_call"] + + +@dataclass +class ChatCompletionOutputTokenLogprobTopLogprobsItem: + bytes: List[int] + """A list of integers representing the UTF-8 bytes representation of the token. Useful in + instances where characters are represented by multiple tokens and their byte + representations must be combined to generate the correct text representation. Can be + `null` if there is no bytes representation for the token. + """ + logprob: float + """The log probability of this token, if it is within the top 20 most likely tokens. + Otherwise, the value `-9999.0` is used to signify that the token is very unlikely. + """ + token: str + """The token.""" + + +@dataclass +class ChatCompletionOutputTokenLogprob: + bytes: List[int] + """A list of integers representing the UTF-8 bytes representation of the token. Useful in + instances where characters are represented by multiple tokens and their byte + representations must be combined to generate the correct text representation. Can be + `null` if there is no bytes representation for the token. + """ + logprob: float + """The log probability of this token, if it is within the top 20 most likely tokens. + Otherwise, the value `-9999.0` is used to signify that the token is very unlikely. + """ + token: str + """The token.""" + top_logprobs: List[ChatCompletionOutputTokenLogprobTopLogprobsItem] + """List of the most likely tokens and their log probability, at this token position. In rare + cases, there may be fewer than the number of requested `top_logprobs` returned. + """ + + +@dataclass +class ChatCompletionOutputLogprobs: + """Log probability information for the choice.""" + content: List[ChatCompletionOutputTokenLogprob] + """A list of message content tokens with log probability information.""" + refusal: List[ChatCompletionOutputTokenLogprob] + """A list of message refusal tokens with log probability information.""" + + +AnnotationType = Literal["url_citation"] + + +@dataclass +class ChatCompletionOutputResponseMessageURLCitation: + """A URL citation when using web search.""" + end_index: int + """The index of the last character of the URL citation in the message.""" + start_index: int + """The index of the first character of the URL citation in the message.""" + title: str + """The title of the web resource.""" + url: str + """The URL of the web resource.""" + + +@dataclass +class ChatCompletionOutputResponseMessageAnnotationsItem: + """A URL citation when using web search.""" + type: 'AnnotationType' + """The type of the URL citation. Always `url_citation`.""" + url_citation: ChatCompletionOutputResponseMessageURLCitation + """A URL citation when using web search.""" + + +@dataclass +class ChatCompletionOutputResponseMessageAudio: + """If the audio output modality is requested, this object contains data + about the audio response from the model. [Learn more](/docs/guides/audio). + """ + data: str + """Base64 encoded audio bytes generated by the model, in the format + specified in the request. + """ + expires_at: int + """The Unix timestamp (in seconds) for when this audio response will + no longer be accessible on the server for use in multi-turn + conversations. + """ + id: str + """Unique identifier for this audio response.""" + transcript: str + """Transcript of the audio generated by the model.""" + + +@dataclass +class ChatCompletionOutputResponseMessageFunctionCall: + """Deprecated and replaced by `tool_calls`. The name and arguments of a function that should + be called, as generated by the model. + """ + arguments: str + """The arguments to call the function with, as generated by the model in JSON format. Note + that the model does not always generate valid JSON, and may hallucinate parameters not + defined by your function schema. Validate the arguments in your code before calling your + function. + """ + name: str + """The name of the function to call.""" + + +MessageRole = Literal["assistant"] + + +@dataclass +class ChatCompletionOutputMessageToolCallFunction: + """The function that the model called.""" + arguments: str + """The arguments to call the function with, as generated by the model in JSON format. Note + that the model does not always generate valid JSON, and may hallucinate parameters not + defined by your function schema. Validate the arguments in your code before calling your + function. + """ + name: str + """The name of the function to call.""" + + +@dataclass +class ChatCompletionOutputMessageToolCall: + """The tool calls generated by the model, such as function calls.""" + function: ChatCompletionOutputMessageToolCallFunction + """The function that the model called.""" + id: str + """The ID of the tool call.""" + type: 'ToolCallType' + """The type of the tool. Currently, only `function` is supported.""" + + +@dataclass +class ChatCompletionOutputResponseMessage: + """A chat completion message generated by the model.""" + content: str + """The contents of the message.""" + refusal: str + """The refusal message generated by the model.""" + role: 'MessageRole' + """The role of the author of this message.""" + annotations: Optional[List[ChatCompletionOutputResponseMessageAnnotationsItem]] + """Annotations for the message, when applicable, as when using the + [web search tool](/docs/guides/tools-web-search?api-mode=chat). + """ + audio: Optional[ChatCompletionOutputResponseMessageAudio] + """If the audio output modality is requested, this object contains data + about the audio response from the model. [Learn more](/docs/guides/audio). + """ + function_call: Optional[ChatCompletionOutputResponseMessageFunctionCall] + """Deprecated and replaced by `tool_calls`. The name and arguments of a function that should + be called, as generated by the model. + """ + tool_calls: Optional[List[ChatCompletionOutputMessageToolCall]] + + +@dataclass +class ChatCompletionOutputChoicesItem: + finish_reason: 'FinishReason' + """The reason the model stopped generating tokens. This will be `stop` if the model hit a + natural stop point or a provided stop sequence, + `length` if the maximum number of tokens specified in the request was reached, + `content_filter` if content was omitted due to a flag from our content filters, + `tool_calls` if the model called a tool, or `function_call` (deprecated) if the model + called a function. + """ + index: int + """The index of the choice in the list of choices.""" + logprobs: ChatCompletionOutputLogprobs + """Log probability information for the choice.""" + message: ChatCompletionOutputResponseMessage + + +ChatCompletionOutputObject = Literal["chat.completion"] + + +ChatCompletionOutputServiceTier = Literal["scale", "default"] + + +@dataclass +class ChatCompletionOutputCompletionUsageCompletionTokensDetails: + """Breakdown of tokens used in a completion.""" + accepted_prediction_tokens: Optional[int] + """When using Predicted Outputs, the number of tokens in the + prediction that appeared in the completion. + """ + audio_tokens: Optional[int] + """Audio input tokens generated by the model.""" + reasoning_tokens: Optional[int] + """Tokens generated by the model for reasoning.""" + rejected_prediction_tokens: Optional[int] + """When using Predicted Outputs, the number of tokens in the + prediction that did not appear in the completion. However, like + reasoning tokens, these tokens are still counted in the total + completion tokens for purposes of billing, output, and context window + limits. + """ + + +@dataclass +class ChatCompletionOutputCompletionUsagePromptTokensDetails: + """Breakdown of tokens used in the prompt.""" + audio_tokens: Optional[int] + """Audio input tokens present in the prompt.""" + cached_tokens: Optional[int] + """Cached tokens present in the prompt.""" + + +@dataclass +class ChatCompletionOutputCompletionUsage: + """Usage statistics for the completion request.""" + completion_tokens: int + """Number of tokens in the generated completion.""" + prompt_tokens: int + """Number of tokens in the prompt.""" + total_tokens: int + """Total number of tokens used in the request (prompt + completion).""" + completion_tokens_details: Optional[ChatCompletionOutputCompletionUsageCompletionTokensDetails] + """Breakdown of tokens used in a completion.""" + prompt_tokens_details: Optional[ChatCompletionOutputCompletionUsagePromptTokensDetails] + """Breakdown of tokens used in the prompt.""" + + +@dataclass +class ChatCompletionOutput: + """Chat Completion Output. + + Auto-generated from OAI specs. + For more details, check out + https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-oai-import.ts. + """ + choices: List[ChatCompletionOutputChoicesItem] + """A list of chat completion choices. Can be more than one if `n` is greater than 1.""" + created: int + """The Unix timestamp (in seconds) of when the chat completion was created.""" + id: str + """A unique identifier for the chat completion.""" + model: str + """The model used for the chat completion.""" + object: 'ChatCompletionOutputObject' + """The object type, which is always `chat.completion`.""" + service_tier: Optional['ChatCompletionOutputServiceTier'] + """The service tier used for processing the request.""" + system_fingerprint: Optional[str] + """This fingerprint represents the backend configuration that the model runs with. + + Can be used in conjunction with the `seed` request parameter to understand when backend + changes have been made that might impact determinism. + """ + usage: Optional[ChatCompletionOutputCompletionUsage] + + +@dataclass +class ChatCompletionStreamOutputStreamResponseDeltaFunctionCall: + """Deprecated and replaced by `tool_calls`. The name and arguments of a function that should + be called, as generated by the model. + """ + arguments: Optional[str] + """The arguments to call the function with, as generated by the model in JSON format. Note + that the model does not always generate valid JSON, and may hallucinate parameters not + defined by your function schema. Validate the arguments in your code before calling your + function. + """ + name: Optional[str] + """The name of the function to call.""" + + +DeltaRole = Literal["developer", "system", "user", "assistant", "tool"] + + +@dataclass +class ChatCompletionStreamOutputMessageToolCallChunkFunction: + arguments: Optional[str] + """The arguments to call the function with, as generated by the model in JSON format. Note + that the model does not always generate valid JSON, and may hallucinate parameters not + defined by your function schema. Validate the arguments in your code before calling your + function. + """ + name: Optional[str] + """The name of the function to call.""" + + +@dataclass +class ChatCompletionStreamOutputMessageToolCallChunk: + index: int + function: Optional[ChatCompletionStreamOutputMessageToolCallChunkFunction] + id: Optional[str] + """The ID of the tool call.""" + type: Optional['ToolCallType'] + """The type of the tool. Currently, only `function` is supported.""" + + +@dataclass +class ChatCompletionStreamOutputStreamResponseDelta: + """A chat completion delta generated by streamed model responses.""" + content: Optional[str] + """The contents of the chunk message.""" + function_call: Optional[ChatCompletionStreamOutputStreamResponseDeltaFunctionCall] + """Deprecated and replaced by `tool_calls`. The name and arguments of a function that should + be called, as generated by the model. + """ + refusal: Optional[str] + """The refusal message generated by the model.""" + role: Optional['DeltaRole'] + """The role of the author of this message.""" + tool_calls: Optional[List[ChatCompletionStreamOutputMessageToolCallChunk]] + + +@dataclass +class ChatCompletionStreamOutputTokenLogprobTopLogprobsItem: + bytes: List[int] + """A list of integers representing the UTF-8 bytes representation of the token. Useful in + instances where characters are represented by multiple tokens and their byte + representations must be combined to generate the correct text representation. Can be + `null` if there is no bytes representation for the token. + """ + logprob: float + """The log probability of this token, if it is within the top 20 most likely tokens. + Otherwise, the value `-9999.0` is used to signify that the token is very unlikely. + """ + token: str + """The token.""" + + +@dataclass +class ChatCompletionStreamOutputTokenLogprob: + bytes: List[int] + """A list of integers representing the UTF-8 bytes representation of the token. Useful in + instances where characters are represented by multiple tokens and their byte + representations must be combined to generate the correct text representation. Can be + `null` if there is no bytes representation for the token. + """ + logprob: float + """The log probability of this token, if it is within the top 20 most likely tokens. + Otherwise, the value `-9999.0` is used to signify that the token is very unlikely. + """ + token: str + """The token.""" + top_logprobs: List[ChatCompletionStreamOutputTokenLogprobTopLogprobsItem] + """List of the most likely tokens and their log probability, at this token position. In rare + cases, there may be fewer than the number of requested `top_logprobs` returned. + """ + + +@dataclass +class ChatCompletionStreamOutputLogprobs: + """Log probability information for the choice.""" + content: List[ChatCompletionStreamOutputTokenLogprob] + """A list of message content tokens with log probability information.""" + refusal: List[ChatCompletionStreamOutputTokenLogprob] + """A list of message refusal tokens with log probability information.""" + + +@dataclass +class ChatCompletionStreamOutputChoicesItem: + delta: ChatCompletionStreamOutputStreamResponseDelta + finish_reason: 'FinishReason' + """The reason the model stopped generating tokens. This will be `stop` if the model hit a + natural stop point or a provided stop sequence, + `length` if the maximum number of tokens specified in the request was reached, + `content_filter` if content was omitted due to a flag from our content filters, + `tool_calls` if the model called a tool, or `function_call` (deprecated) if the model + called a function. + """ + index: int + """The index of the choice in the list of choices.""" + logprobs: Optional[ChatCompletionStreamOutputLogprobs] + """Log probability information for the choice.""" + + +ChatCompletionStreamOutputObject = Literal["chat.completion.chunk"] + + +@dataclass +class ChatCompletionStreamOutputCompletionUsageCompletionTokensDetails: + """Breakdown of tokens used in a completion.""" + accepted_prediction_tokens: Optional[int] + """When using Predicted Outputs, the number of tokens in the + prediction that appeared in the completion. + """ + audio_tokens: Optional[int] + """Audio input tokens generated by the model.""" + reasoning_tokens: Optional[int] + """Tokens generated by the model for reasoning.""" + rejected_prediction_tokens: Optional[int] + """When using Predicted Outputs, the number of tokens in the + prediction that did not appear in the completion. However, like + reasoning tokens, these tokens are still counted in the total + completion tokens for purposes of billing, output, and context window + limits. + """ + + +@dataclass +class ChatCompletionStreamOutputCompletionUsagePromptTokensDetails: + """Breakdown of tokens used in the prompt.""" + audio_tokens: Optional[int] + """Audio input tokens present in the prompt.""" + cached_tokens: Optional[int] + """Cached tokens present in the prompt.""" + + +@dataclass +class ChatCompletionStreamOutputCompletionUsage: + """An optional field that will only be present when you set + `stream_options: {"include_usage": true}` in your request. When present, it + contains a null value **except for the last chunk** which contains the + token usage statistics for the entire request. + + **NOTE:** If the stream is interrupted or cancelled, you may not + receive the final usage chunk which contains the total token usage for + the request. + + + Usage statistics for the completion request. + """ + completion_tokens: int + """Number of tokens in the generated completion.""" + prompt_tokens: int + """Number of tokens in the prompt.""" + total_tokens: int + """Total number of tokens used in the request (prompt + completion).""" + completion_tokens_details: Optional[ChatCompletionStreamOutputCompletionUsageCompletionTokensDetails] + """Breakdown of tokens used in a completion.""" + prompt_tokens_details: Optional[ChatCompletionStreamOutputCompletionUsagePromptTokensDetails] + """Breakdown of tokens used in the prompt.""" + + +@dataclass +class ChatCompletionStreamOutput: + """Chat Completion Stream Output. + + Auto-generated from OAI specs. + For more details, check out + https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-oai-import.ts. + """ + choices: List[ChatCompletionStreamOutputChoicesItem] + """A list of chat completion choices. Can contain more than one elements if `n` is greater + than 1. Can also be empty for the + last chunk if you set `stream_options: {"include_usage": true}`. + """ + created: int + """The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has + the same timestamp. + """ + id: str + """A unique identifier for the chat completion. Each chunk has the same ID.""" + model: str + """The model to generate the completion.""" + object: 'ChatCompletionStreamOutputObject' + """The object type, which is always `chat.completion.chunk`.""" + service_tier: Optional['ChatCompletionOutputServiceTier'] + """The service tier used for processing the request.""" + system_fingerprint: Optional[str] + """This fingerprint represents the backend configuration that the model runs with. + Can be used in conjunction with the `seed` request parameter to understand when backend + changes have been made that might impact determinism. + """ + usage: Optional[ChatCompletionStreamOutputCompletionUsage] + """An optional field that will only be present when you set + `stream_options: {"include_usage": true}` in your request. When present, it + contains a null value **except for the last chunk** which contains the + token usage statistics for the entire request. + + **NOTE:** If the stream is interrupted or cancelled, you may not + receive the final usage chunk which contains the total token usage for + the request. + """ diff --git a/packages/tasks/src/tasks/chat-completion/inference.ts b/packages/tasks/src/tasks/chat-completion/inference.ts index 1dc674bf64..3014a031df 100644 --- a/packages/tasks/src/tasks/chat-completion/inference.ts +++ b/packages/tasks/src/tasks/chat-completion/inference.ts @@ -6,306 +6,1495 @@ /** * Chat Completion Input. * - * Auto-generated from TGI specs. + * Auto-generated from OAI specs. * For more details, check out - * https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts. + * https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-oai-import.ts. */ export interface ChatCompletionInput { /** - * Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing - * frequency in the text so far, - * decreasing the model's likelihood to repeat the same line verbatim. + * Parameters for audio output. Required when audio output is requested with + * `modalities: ["audio"]`. [Learn more](/docs/guides/audio). + */ + audio?: ChatCompletionInputAudio; + /** + * Number between -2.0 and 2.0. Positive values penalize new tokens based on + * their existing frequency in the text so far, decreasing the model's + * likelihood to repeat the same line verbatim. */ frequency_penalty?: number; /** - * UNUSED - * Modify the likelihood of specified tokens appearing in the completion. Accepts a JSON - * object that maps tokens - * (specified by their token ID in the tokenizer) to an associated bias value from -100 to - * 100. Mathematically, - * the bias is added to the logits generated by the model prior to sampling. The exact - * effect will vary per model, - * but values between -1 and 1 should decrease or increase likelihood of selection; values - * like -100 or 100 should - * result in a ban or exclusive selection of the relevant token. + * Deprecated in favor of `tool_choice`. + * + * Controls which (if any) function is called by the model. + * + * `none` means the model will not call a function and instead generates a + * message. + * + * `auto` means the model can pick between generating a message or calling a + * function. + * + * Specifying a particular function via `{"name": "my_function"}` forces the + * model to call that function. + * + * `none` is the default when no functions are present. `auto` is the default + * if functions are present. + */ + function_call?: FunctionCallUnion; + /** + * Deprecated in favor of `tools`. + * + * A list of functions the model may generate JSON inputs for. + */ + functions?: ChatCompletionInputFunctions[]; + /** + * Modify the likelihood of specified tokens appearing in the completion. + * + * Accepts a JSON object that maps tokens (specified by their token ID in the + * tokenizer) to an associated bias value from -100 to 100. Mathematically, + * the bias is added to the logits generated by the model prior to sampling. + * The exact effect will vary per model, but values between -1 and 1 should + * decrease or increase likelihood of selection; values like -100 or 100 + * should result in a ban or exclusive selection of the relevant token. + */ + logit_bias?: { + [key: string]: number; + }; + /** + * Whether to return log probabilities of the output tokens or not. If true, + * returns the log probabilities of each output token returned in the + * `content` of `message`. */ - logit_bias?: number[]; + logprobs?: boolean; + /** + * An upper bound for the number of tokens that can be generated for a completion, including + * visible output tokens and [reasoning tokens](/docs/guides/reasoning). + */ + max_completion_tokens?: number; + /** + * The maximum number of [tokens](/tokenizer) that can be generated in the + * chat completion. This value can be used to control + * [costs](https://openai.com/api/pricing/) for text generated via API. + * + * This value is now deprecated in favor of `max_completion_tokens`, and is + * not compatible with [o1 series models](/docs/guides/reasoning). + */ + max_tokens?: number; + /** + * A list of messages comprising the conversation so far. Depending on the + * [model](/docs/models) you use, different message types (modalities) are + * supported, like [text](/docs/guides/text-generation), + * [images](/docs/guides/vision), and [audio](/docs/guides/audio). + */ + messages: ChatCompletionInputRequestMessage[]; + modalities?: ChatCompletionInputResponseModality[]; + /** + * Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI + * offers a wide range of models with different capabilities, performance + * characteristics, and price points. Refer to the [model guide](/docs/models) + * to browse and compare available models. + */ + model: string; + /** + * How many chat completion choices to generate for each input message. Note that you will + * be charged based on the number of generated tokens across all of the choices. Keep `n` as + * `1` to minimize costs. + */ + n?: number; + parallel_tool_calls?: boolean; + /** + * Configuration for a [Predicted Output](/docs/guides/predicted-outputs), + * which can greatly improve response times when large parts of the model + * response are known ahead of time. This is most common when you are + * regenerating a file with only minor changes to most of the content. + */ + prediction?: ChatCompletionInputPredictionContent; + /** + * Number between -2.0 and 2.0. Positive values penalize new tokens based on + * whether they appear in the text so far, increasing the model's likelihood + * to talk about new topics. + */ + presence_penalty?: number; + reasoning_effort?: ReasoningEffortEnum; + /** + * An object specifying the format that the model must output. + * + * Setting to `{ "type": "json_schema", "json_schema": {...} }` enables + * Structured Outputs which ensures the model will match your supplied JSON + * schema. Learn more in the [Structured Outputs + * guide](/docs/guides/structured-outputs). + * + * Setting to `{ "type": "json_object" }` enables the older JSON mode, which + * ensures the message the model generates is valid JSON. Using `json_schema` + * is preferred for models that support it. + */ + response_format?: ChatCompletionInputResponseFormat; + /** + * This feature is in Beta. + * If specified, our system will make a best effort to sample deterministically, such that + * repeated requests with the same `seed` and parameters should return the same result. + * Determinism is not guaranteed, and you should refer to the `system_fingerprint` response + * parameter to monitor changes in the backend. + */ + seed?: number; + /** + * Specifies the latency tier to use for processing the request. This parameter is relevant + * for customers subscribed to the scale tier service: + * - If set to 'auto', and the Project is Scale tier enabled, the system + * will utilize scale tier credits until they are exhausted. + * - If set to 'auto', and the Project is not Scale tier enabled, the request will be + * processed using the default service tier with a lower uptime SLA and no latency + * guarentee. + * - If set to 'default', the request will be processed using the default service tier with + * a lower uptime SLA and no latency guarentee. + * - When not set, the default behavior is 'auto'. + * + * When this parameter is set, the response body will include the `service_tier` utilized. + */ + service_tier?: ChatCompletionInputServiceTier; + stop?: ChatCompletionInputStopConfiguration; + /** + * Whether or not to store the output of this chat completion request for + * use in our [model distillation](/docs/guides/distillation) or + * [evals](/docs/guides/evals) products. + */ + store?: boolean; + /** + * If set to true, the model response data will be streamed to the client + * as it is generated using [server-sent + * events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + * See the [Streaming section below](/docs/api-reference/chat/streaming) + * for more information, along with the [streaming + * responses](/docs/guides/streaming-responses) + * guide for more information on how to handle the streaming events. + */ + stream?: boolean; + stream_options?: ChatCompletionInputStreamOptions; + tool_choice?: ChatCompletionInputToolChoiceOption; + /** + * A list of tools the model may call. Currently, only functions are supported as a tool. + * Use this to provide a list of functions the model may generate JSON inputs for. A max of + * 128 functions are supported. + */ + tools?: ChatCompletionInputTool[]; + /** + * An integer between 0 and 20 specifying the number of most likely tokens to + * return at each token position, each with an associated log probability. + * `logprobs` must be set to `true` if this parameter is used. + */ + top_logprobs?: number; + /** + * This tool searches the web for relevant results to use in a response. + * Learn more about the [web search tool](/docs/guides/tools-web-search?api-mode=chat). + */ + web_search_options?: WebSearch; + [property: string]: unknown; +} +/** + * Parameters for audio output. Required when audio output is requested with + * `modalities: ["audio"]`. [Learn more](/docs/guides/audio). + */ +export interface ChatCompletionInputAudio { + /** + * Specifies the output audio format. Must be one of `wav`, `mp3`, `flac`, + * `opus`, or `pcm16`. + */ + format: AudioFormat; + /** + * The voice the model uses to respond. Supported voices are + * `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, and `shimmer`. + */ + voice: string; + [property: string]: unknown; +} +/** + * Specifies the output audio format. Must be one of `wav`, `mp3`, `flac`, + * `opus`, or `pcm16`. + */ +export type AudioFormat = "wav" | "mp3" | "flac" | "opus" | "pcm16"; +/** + * Deprecated in favor of `tool_choice`. + * + * Controls which (if any) function is called by the model. + * + * `none` means the model will not call a function and instead generates a + * message. + * + * `auto` means the model can pick between generating a message or calling a + * function. + * + * Specifying a particular function via `{"name": "my_function"}` forces the + * model to call that function. + * + * `none` is the default when no functions are present. `auto` is the default + * if functions are present. + */ +export type FunctionCallUnion = FunctionCallEnum | ChatCompletionInputFunctionCallOption; +/** + * `none` means the model will not call a function and instead generates a message. `auto` + * means the model can pick between generating a message or calling a function. + */ +export type FunctionCallEnum = "none" | "auto"; +/** + * Specifying a particular function via `{"name": "my_function"}` forces the model to call + * that function. + */ +export interface ChatCompletionInputFunctionCallOption { + /** + * The name of the function to call. + */ + name: string; + [property: string]: unknown; +} +export interface ChatCompletionInputFunctions { + /** + * A description of what the function does, used by the model to choose when and how to call + * the function. + */ + description?: string; + /** + * The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and + * dashes, with a maximum length of 64. + */ + name: string; + parameters?: { + [key: string]: unknown; + }; + [property: string]: unknown; +} +/** + * Developer-provided instructions that the model should follow, regardless of + * messages sent by the user. With o1 models and newer, `developer` messages + * replace the previous `system` messages. + * + * + * Developer-provided instructions that the model should follow, regardless of + * messages sent by the user. With o1 models and newer, use `developer` messages + * for this purpose instead. + * + * + * Messages sent by an end user, containing prompts or additional context + * information. + * + * + * Messages sent by the model in response to user messages. + */ +export interface ChatCompletionInputRequestMessage { + /** + * Data about a previous audio response from the model. + * [Learn more](/docs/guides/audio). + */ + audio?: ChatCompletionInputRequestAssistantMessageAudio; + /** + * The contents of the developer message. + * + * The contents of the system message. + * + * The contents of the user message. + * + * + * The contents of the assistant message. Required unless `tool_calls` or `function_call` is + * specified. + * + * + * The contents of the tool message. + * + * The contents of the function message. + */ + content?: ChatCompletionInputRequestMessageContent; + /** + * Deprecated and replaced by `tool_calls`. The name and arguments of a function that should + * be called, as generated by the model. + */ + function_call?: ChatCompletionInputRequestAssistantMessageFunctionCall; + /** + * An optional name for the participant. Provides the model information to differentiate + * between participants of the same role. + * + * The name of the function to call. + */ + name?: string; + /** + * The refusal message by the assistant. + */ + refusal?: string; + /** + * The role of the messages author, in this case `developer`. + * + * The role of the messages author, in this case `system`. + * + * The role of the messages author, in this case `user`. + * + * The role of the messages author, in this case `assistant`. + * + * The role of the messages author, in this case `tool`. + * + * The role of the messages author, in this case `function`. + */ + role: ChatCompletionInputRequestMessageRole; + /** + * Tool call that this message is responding to. + */ + tool_call_id?: string; + tool_calls?: ChatCompletionInputMessageToolCall[]; + [property: string]: unknown; +} +/** + * Data about a previous audio response from the model. + * [Learn more](/docs/guides/audio). + */ +export interface ChatCompletionInputRequestAssistantMessageAudio { + /** + * Unique identifier for a previous audio response from the model. + */ + id: string; + [property: string]: unknown; +} +export type ChatCompletionInputRequestMessageContent = ChatCompletionInputRequestMessageContentPart[] | string; +/** + * An array of content parts with a defined type. For developer messages, only type `text` + * is supported. + * + * Learn about [text inputs](/docs/guides/text-generation). + * + * + * An array of content parts with a defined type. Supported options differ based on the + * [model](/docs/models) being used to generate the response. Can contain text inputs. + * + * An array of content parts with a defined type. For system messages, only type `text` is + * supported. + * + * An array of content parts with a defined type. For tool messages, only type `text` is + * supported. + * + * An array of content parts with a defined type. Supported options differ based on the + * [model](/docs/models) being used to generate the response. Can contain text, image, or + * audio inputs. + * + * Learn about [image inputs](/docs/guides/vision). + * + * + * Learn about [audio inputs](/docs/guides/audio). + * + * + * Learn about [file inputs](/docs/guides/text) for text generation. + * + * + * An array of content parts with a defined type. Can be one or more of type `text`, or + * exactly one of type `refusal`. + */ +export interface ChatCompletionInputRequestMessageContentPart { + file?: ChatCompletionInputRequestMessageContentPartFileFile; + image_url?: ChatCompletionInputRequestMessageContentPartImageImageURL; + input_audio?: ChatCompletionInputRequestMessageContentPartAudioInputAudio; + /** + * The refusal message generated by the model. + */ + refusal?: string; + /** + * The text content. + */ + text?: string; + /** + * The type of the content part. + * + * The type of the content part. Always `input_audio`. + * + * The type of the content part. Always `file`. + */ + type: PurpleType; + [property: string]: unknown; +} +export interface ChatCompletionInputRequestMessageContentPartFileFile { + /** + * The base64 encoded file data, used when passing the file to the model + * as a string. + */ + file_data?: string; + /** + * The ID of an uploaded file to use as input. + */ + file_id?: string; + /** + * The name of the file, used when passing the file to the model as a + * string. + */ + filename?: string; + [property: string]: unknown; +} +export interface ChatCompletionInputRequestMessageContentPartImageImageURL { + /** + * Specifies the detail level of the image. Learn more in the [Vision + * guide](/docs/guides/vision#low-or-high-fidelity-image-understanding). + */ + detail?: Detail; + /** + * Either a URL of the image or the base64 encoded image data. + */ + url: string; + [property: string]: unknown; +} +/** + * Specifies the detail level of the image. Learn more in the [Vision + * guide](/docs/guides/vision#low-or-high-fidelity-image-understanding). + */ +export type Detail = "auto" | "low" | "high"; +export interface ChatCompletionInputRequestMessageContentPartAudioInputAudio { + /** + * Base64 encoded audio data. + */ + data: string; + /** + * The format of the encoded audio data. Currently supports "wav" and "mp3". + */ + format: InputAudioFormat; + [property: string]: unknown; +} +/** + * The format of the encoded audio data. Currently supports "wav" and "mp3". + */ +export type InputAudioFormat = "wav" | "mp3"; +/** + * The type of the content part. + * + * The type of the content part. Always `input_audio`. + * + * The type of the content part. Always `file`. + */ +export type PurpleType = "text" | "image_url" | "input_audio" | "file" | "refusal"; +/** + * Deprecated and replaced by `tool_calls`. The name and arguments of a function that should + * be called, as generated by the model. + */ +export interface ChatCompletionInputRequestAssistantMessageFunctionCall { + /** + * The arguments to call the function with, as generated by the model in JSON format. Note + * that the model does not always generate valid JSON, and may hallucinate parameters not + * defined by your function schema. Validate the arguments in your code before calling your + * function. + */ + arguments: string; + /** + * The name of the function to call. + */ + name: string; + [property: string]: unknown; +} +/** + * The role of the messages author, in this case `developer`. + * + * The role of the messages author, in this case `system`. + * + * The role of the messages author, in this case `user`. + * + * The role of the messages author, in this case `assistant`. + * + * The role of the messages author, in this case `tool`. + * + * The role of the messages author, in this case `function`. + */ +export type ChatCompletionInputRequestMessageRole = "developer" | "system" | "user" | "assistant" | "tool" | "function"; +/** + * The tool calls generated by the model, such as function calls. + */ +export interface ChatCompletionInputMessageToolCall { + /** + * The function that the model called. + */ + function: ChatCompletionInputMessageToolCallFunction; + /** + * The ID of the tool call. + */ + id: string; + /** + * The type of the tool. Currently, only `function` is supported. + */ + type: "function"; + [property: string]: unknown; +} +/** + * The function that the model called. + */ +export interface ChatCompletionInputMessageToolCallFunction { + /** + * The arguments to call the function with, as generated by the model in JSON format. Note + * that the model does not always generate valid JSON, and may hallucinate parameters not + * defined by your function schema. Validate the arguments in your code before calling your + * function. + */ + arguments: string; + /** + * The name of the function to call. + */ + name: string; + [property: string]: unknown; +} +/** + * The type of the tool. Currently, only `function` is supported. + */ +/** + * Output types that you would like the model to generate. + * Most models are capable of generating text, which is the default: + * + * `["text"]` + * + * The `gpt-4o-audio-preview` model can also be used to + * [generate audio](/docs/guides/audio). To request that this model generate + * both text and audio responses, you can use: + * + * `["text", "audio"]` + */ +export type ChatCompletionInputResponseModality = "text" | "audio"; +/** + * Configuration for a [Predicted Output](/docs/guides/predicted-outputs), + * which can greatly improve response times when large parts of the model + * response are known ahead of time. This is most common when you are + * regenerating a file with only minor changes to most of the content. + * + * + * Static predicted output content, such as the content of a text file that is + * being regenerated. + */ +export interface ChatCompletionInputPredictionContent { + /** + * The content that should be matched when generating a model response. + * If generated tokens would match this content, the entire model response + * can be returned much more quickly. + */ + content: PredictionContent; + /** + * The type of the predicted content you want to provide. This type is + * currently always `content`. + */ + type: "content"; + [property: string]: unknown; +} +/** + * The contents of the system message. + * + * The contents of the tool message. + */ +export type PredictionContent = ChatCompletionInputRequest[] | string; +/** + * An array of content parts with a defined type. For developer messages, only type `text` + * is supported. + * + * Learn about [text inputs](/docs/guides/text-generation). + * + * + * An array of content parts with a defined type. Supported options differ based on the + * [model](/docs/models) being used to generate the response. Can contain text inputs. + * + * An array of content parts with a defined type. For system messages, only type `text` is + * supported. + * + * An array of content parts with a defined type. For tool messages, only type `text` is + * supported. + */ +export interface ChatCompletionInputRequest { + /** + * The text content. + */ + text: string; + /** + * The type of the content part. + */ + type: "text"; + [property: string]: unknown; +} +/** + * The type of the content part. + */ +/** + * The type of the predicted content you want to provide. This type is + * currently always `content`. + */ +/** + * **o-series models only** + * + * Constrains effort on reasoning for + * [reasoning models](https://platform.openai.com/docs/guides/reasoning). + * Currently supported values are `low`, `medium`, and `high`. Reducing + * reasoning effort can result in faster responses and fewer tokens used + * on reasoning in a response. + * + * + * High level guidance for the amount of context window space to use for the + * search. One of `low`, `medium`, or `high`. `medium` is the default. + */ +export type ReasoningEffortEnum = "low" | "medium" | "high"; +/** + * An object specifying the format that the model must output. + * + * Setting to `{ "type": "json_schema", "json_schema": {...} }` enables + * Structured Outputs which ensures the model will match your supplied JSON + * schema. Learn more in the [Structured Outputs + * guide](/docs/guides/structured-outputs). + * + * Setting to `{ "type": "json_object" }` enables the older JSON mode, which + * ensures the message the model generates is valid JSON. Using `json_schema` + * is preferred for models that support it. + * + * + * Default response format. Used to generate text responses. + * + * + * JSON Schema response format. Used to generate structured JSON responses. + * Learn more about [Structured Outputs](/docs/guides/structured-outputs). + * + * + * JSON object response format. An older method of generating JSON responses. + * Using `json_schema` is recommended for models that support it. Note that the + * model will not generate JSON without a system or user message instructing it + * to do so. + */ +export interface ChatCompletionInputResponseFormat { + /** + * Structured Outputs configuration options, including a JSON Schema. + */ + json_schema?: JSONSchema; + /** + * The type of response format being defined. Always `text`. + * + * The type of response format being defined. Always `json_schema`. + * + * The type of response format being defined. Always `json_object`. + */ + type: ResponseFormatType; + [property: string]: unknown; +} +/** + * Structured Outputs configuration options, including a JSON Schema. + */ +export interface JSONSchema { + /** + * A description of what the response format is for, used by the model to + * determine how to respond in the format. + */ + description?: string; + /** + * The name of the response format. Must be a-z, A-Z, 0-9, or contain + * underscores and dashes, with a maximum length of 64. + */ + name: string; + schema?: { + [key: string]: unknown; + }; + /** + * Whether to enable strict schema adherence when generating the output. + * If set to true, the model will always follow the exact schema defined + * in the `schema` field. Only a subset of JSON Schema is supported when + * `strict` is `true`. To learn more, read the [Structured Outputs + * guide](/docs/guides/structured-outputs). + */ + strict?: boolean; + [property: string]: unknown; +} +/** + * The type of response format being defined. Always `text`. + * + * The type of response format being defined. Always `json_schema`. + * + * The type of response format being defined. Always `json_object`. + */ +export type ResponseFormatType = "text" | "json_schema" | "json_object"; +/** + * Specifies the latency tier to use for processing the request. This parameter is relevant + * for customers subscribed to the scale tier service: + * - If set to 'auto', and the Project is Scale tier enabled, the system + * will utilize scale tier credits until they are exhausted. + * - If set to 'auto', and the Project is not Scale tier enabled, the request will be + * processed using the default service tier with a lower uptime SLA and no latency + * guarentee. + * - If set to 'default', the request will be processed using the default service tier with + * a lower uptime SLA and no latency guarentee. + * - When not set, the default behavior is 'auto'. + * + * When this parameter is set, the response body will include the `service_tier` utilized. + */ +export type ChatCompletionInputServiceTier = "auto" | "default"; +/** + * Up to 4 sequences where the API will stop generating further tokens. The + * returned text will not contain the stop sequence. + */ +export type ChatCompletionInputStopConfiguration = string[] | string; +/** + * Options for streaming response. Only set this when you set `stream: true`. + */ +export interface ChatCompletionInputStreamOptions { + /** + * If set, an additional chunk will be streamed before the `data: [DONE]` + * message. The `usage` field on this chunk shows the token usage statistics + * for the entire request, and the `choices` field will always be an empty + * array. + * + * All other chunks will also include a `usage` field, but with a null + * value. **NOTE:** If the stream is interrupted, you may not receive the + * final usage chunk which contains the total token usage for the request. + */ + include_usage?: boolean; + [property: string]: unknown; +} +/** + * Controls which (if any) tool is called by the model. + * `none` means the model will not call any tool and instead generates a message. + * `auto` means the model can pick between generating a message or calling one or more + * tools. + * `required` means the model must call one or more tools. + * Specifying a particular tool via `{"type": "function", "function": {"name": + * "my_function"}}` forces the model to call that tool. + * + * `none` is the default when no tools are present. `auto` is the default if tools are + * present. + */ +export type ChatCompletionInputToolChoiceOption = + | ChatCompletionInputToolChoiceOptionEnum + | ChatCompletionInputNamedToolChoice; +/** + * `none` means the model will not call any tool and instead generates a message. `auto` + * means the model can pick between generating a message or calling one or more tools. + * `required` means the model must call one or more tools. + */ +export type ChatCompletionInputToolChoiceOptionEnum = "none" | "auto" | "required"; +/** + * Specifies a tool the model should use. Use to force the model to call a specific function. + */ +export interface ChatCompletionInputNamedToolChoice { + function: ChatCompletionInputNamedToolChoiceFunction; + /** + * The type of the tool. Currently, only `function` is supported. + */ + type: "function"; + [property: string]: unknown; +} +export interface ChatCompletionInputNamedToolChoiceFunction { + /** + * The name of the function to call. + */ + name: string; + [property: string]: unknown; +} +export interface ChatCompletionInputTool { + function: ChatCompletionInputFunctionObject; + /** + * The type of the tool. Currently, only `function` is supported. + */ + type: "function"; + [property: string]: unknown; +} +export interface ChatCompletionInputFunctionObject { + /** + * A description of what the function does, used by the model to choose when and how to call + * the function. + */ + description?: string; + /** + * The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and + * dashes, with a maximum length of 64. + */ + name: string; + parameters?: { + [key: string]: unknown; + }; + /** + * Whether to enable strict schema adherence when generating the function call. If set to + * true, the model will follow the exact schema defined in the `parameters` field. Only a + * subset of JSON Schema is supported when `strict` is `true`. Learn more about Structured + * Outputs in the [function calling guide](docs/guides/function-calling). + */ + strict?: boolean; + [property: string]: unknown; +} +/** + * This tool searches the web for relevant results to use in a response. + * Learn more about the [web search tool](/docs/guides/tools-web-search?api-mode=chat). + */ +export interface WebSearch { + search_context_size?: ReasoningEffortEnum; + /** + * Approximate location parameters for the search. + */ + user_location?: ChatCompletionInputUserLocation; + [property: string]: unknown; +} +/** + * Approximate location parameters for the search. + */ +export interface ChatCompletionInputUserLocation { + approximate: ChatCompletionInputWebSearchLocation; + /** + * The type of location approximation. Always `approximate`. + */ + type: "approximate"; + [property: string]: unknown; +} +/** + * Approximate location parameters for the search. + */ +export interface ChatCompletionInputWebSearchLocation { + /** + * Free text input for the city of the user, e.g. `San Francisco`. + */ + city?: string; + /** + * The two-letter + * [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of the user, + * e.g. `US`. + */ + country?: string; + /** + * Free text input for the region of the user, e.g. `California`. + */ + region?: string; + /** + * The [IANA timezone](https://timeapi.io/documentation/iana-timezones) + * of the user, e.g. `America/Los_Angeles`. + */ + timezone?: string; + [property: string]: unknown; +} +/** + * The type of location approximation. Always `approximate`. + */ +/** + * Chat Completion Output. + * + * Auto-generated from OAI specs. + * For more details, check out + * https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-oai-import.ts. + */ +export interface ChatCompletionOutput { + /** + * A list of chat completion choices. Can be more than one if `n` is greater than 1. + */ + choices: ChatCompletionOutputChoicesItem[]; + /** + * The Unix timestamp (in seconds) of when the chat completion was created. + */ + created: number; + /** + * A unique identifier for the chat completion. + */ + id: string; + /** + * The model used for the chat completion. + */ + model: string; + /** + * The object type, which is always `chat.completion`. + */ + object: "chat.completion"; + /** + * The service tier used for processing the request. + */ + service_tier?: ChatCompletionOutputServiceTier; + /** + * This fingerprint represents the backend configuration that the model runs with. + * + * Can be used in conjunction with the `seed` request parameter to understand when backend + * changes have been made that might impact determinism. + */ + system_fingerprint?: string; + usage?: ChatCompletionOutputCompletionUsage; + [property: string]: unknown; +} +export interface ChatCompletionOutputChoicesItem { + /** + * The reason the model stopped generating tokens. This will be `stop` if the model hit a + * natural stop point or a provided stop sequence, + * `length` if the maximum number of tokens specified in the request was reached, + * `content_filter` if content was omitted due to a flag from our content filters, + * `tool_calls` if the model called a tool, or `function_call` (deprecated) if the model + * called a function. + */ + finish_reason: FinishReason; + /** + * The index of the choice in the list of choices. + */ + index: number; + /** + * Log probability information for the choice. + */ + logprobs: ChatCompletionOutputLogprobs; + message: ChatCompletionOutputResponseMessage; + [property: string]: unknown; +} +/** + * The reason the model stopped generating tokens. This will be `stop` if the model hit a + * natural stop point or a provided stop sequence, + * `length` if the maximum number of tokens specified in the request was reached, + * `content_filter` if content was omitted due to a flag from our content filters, + * `tool_calls` if the model called a tool, or `function_call` (deprecated) if the model + * called a function. + */ +export type FinishReason = "stop" | "length" | "tool_calls" | "content_filter" | "function_call"; +/** + * Log probability information for the choice. + */ +export interface ChatCompletionOutputLogprobs { + /** + * A list of message content tokens with log probability information. + */ + content: ChatCompletionOutputTokenLogprob[]; + /** + * A list of message refusal tokens with log probability information. + */ + refusal: ChatCompletionOutputTokenLogprob[]; + [property: string]: unknown; +} +export interface ChatCompletionOutputTokenLogprob { + /** + * A list of integers representing the UTF-8 bytes representation of the token. Useful in + * instances where characters are represented by multiple tokens and their byte + * representations must be combined to generate the correct text representation. Can be + * `null` if there is no bytes representation for the token. + */ + bytes: number[]; + /** + * The log probability of this token, if it is within the top 20 most likely tokens. + * Otherwise, the value `-9999.0` is used to signify that the token is very unlikely. + */ + logprob: number; + /** + * The token. + */ + token: string; + /** + * List of the most likely tokens and their log probability, at this token position. In rare + * cases, there may be fewer than the number of requested `top_logprobs` returned. + */ + top_logprobs: ChatCompletionOutputTokenLogprobTopLogprobsItem[]; + [property: string]: unknown; +} +export interface ChatCompletionOutputTokenLogprobTopLogprobsItem { + /** + * A list of integers representing the UTF-8 bytes representation of the token. Useful in + * instances where characters are represented by multiple tokens and their byte + * representations must be combined to generate the correct text representation. Can be + * `null` if there is no bytes representation for the token. + */ + bytes: number[]; + /** + * The log probability of this token, if it is within the top 20 most likely tokens. + * Otherwise, the value `-9999.0` is used to signify that the token is very unlikely. + */ + logprob: number; + /** + * The token. + */ + token: string; + [property: string]: unknown; +} +/** + * A chat completion message generated by the model. + */ +export interface ChatCompletionOutputResponseMessage { /** - * Whether to return log probabilities of the output tokens or not. If true, returns the log - * probabilities of each - * output token returned in the content of message. + * Annotations for the message, when applicable, as when using the + * [web search tool](/docs/guides/tools-web-search?api-mode=chat). */ - logprobs?: boolean; + annotations?: ChatCompletionOutputResponseMessageAnnotationsItem[]; /** - * The maximum number of tokens that can be generated in the chat completion. + * If the audio output modality is requested, this object contains data + * about the audio response from the model. [Learn more](/docs/guides/audio). */ - max_tokens?: number; + audio?: ChatCompletionOutputResponseMessageAudio; /** - * A list of messages comprising the conversation so far. + * The contents of the message. */ - messages: ChatCompletionInputMessage[]; + content: string; /** - * [UNUSED] ID of the model to use. See the model endpoint compatibility table for details - * on which models work with the Chat API. + * Deprecated and replaced by `tool_calls`. The name and arguments of a function that should + * be called, as generated by the model. */ - model?: string; + function_call?: ChatCompletionOutputResponseMessageFunctionCall; /** - * UNUSED - * How many chat completion choices to generate for each input message. Note that you will - * be charged based on the - * number of generated tokens across all of the choices. Keep n as 1 to minimize costs. + * The refusal message generated by the model. */ - n?: number; + refusal: string; /** - * Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they - * appear in the text so far, - * increasing the model's likelihood to talk about new topics + * The role of the author of this message. */ - presence_penalty?: number; - response_format?: ChatCompletionInputGrammarType; - seed?: number; + role: "assistant"; + tool_calls?: ChatCompletionOutputMessageToolCall[]; + [property: string]: unknown; +} +/** + * A URL citation when using web search. + */ +export interface ChatCompletionOutputResponseMessageAnnotationsItem { /** - * Up to 4 sequences where the API will stop generating further tokens. + * The type of the URL citation. Always `url_citation`. */ - stop?: string[]; - stream?: boolean; - stream_options?: ChatCompletionInputStreamOptions; + type: "url_citation"; /** - * What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the - * output more random, while - * lower values like 0.2 will make it more focused and deterministic. - * - * We generally recommend altering this or `top_p` but not both. + * A URL citation when using web search. */ - temperature?: number; - tool_choice?: ChatCompletionInputToolChoice; + url_citation: ChatCompletionOutputResponseMessageURLCitation; + [property: string]: unknown; +} +/** + * The type of the URL citation. Always `url_citation`. + */ +/** + * A URL citation when using web search. + */ +export interface ChatCompletionOutputResponseMessageURLCitation { /** - * A prompt to be appended before the tools + * The index of the last character of the URL citation in the message. */ - tool_prompt?: string; + end_index: number; /** - * A list of tools the model may call. Currently, only functions are supported as a tool. - * Use this to provide a list of - * functions the model may generate JSON inputs for. + * The index of the first character of the URL citation in the message. */ - tools?: ChatCompletionInputTool[]; + start_index: number; /** - * An integer between 0 and 5 specifying the number of most likely tokens to return at each - * token position, each with - * an associated log probability. logprobs must be set to true if this parameter is used. + * The title of the web resource. */ - top_logprobs?: number; + title: string; /** - * An alternative to sampling with temperature, called nucleus sampling, where the model - * considers the results of the - * tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% - * probability mass are considered. + * The URL of the web resource. */ - top_p?: number; - [property: string]: unknown; -} -export interface ChatCompletionInputMessage { - content?: ChatCompletionInputMessageContent; - name?: string; - role: string; - tool_calls?: ChatCompletionInputToolCall[]; - [property: string]: unknown; -} -export type ChatCompletionInputMessageContent = ChatCompletionInputMessageChunk[] | string; -export interface ChatCompletionInputMessageChunk { - image_url?: ChatCompletionInputURL; - text?: string; - type: ChatCompletionInputMessageChunkType; - [property: string]: unknown; -} -export interface ChatCompletionInputURL { url: string; [property: string]: unknown; } -export type ChatCompletionInputMessageChunkType = "text" | "image_url"; -export interface ChatCompletionInputToolCall { - function: ChatCompletionInputFunctionDefinition; +/** + * If the audio output modality is requested, this object contains data + * about the audio response from the model. [Learn more](/docs/guides/audio). + */ +export interface ChatCompletionOutputResponseMessageAudio { + /** + * Base64 encoded audio bytes generated by the model, in the format + * specified in the request. + */ + data: string; + /** + * The Unix timestamp (in seconds) for when this audio response will + * no longer be accessible on the server for use in multi-turn + * conversations. + */ + expires_at: number; + /** + * Unique identifier for this audio response. + */ id: string; - type: string; - [property: string]: unknown; -} -export interface ChatCompletionInputFunctionDefinition { - description?: string; - name: string; - parameters: unknown; - [property: string]: unknown; -} -export interface ChatCompletionInputGrammarType { - type: ChatCompletionInputGrammarTypeType; /** - * A string that represents a [JSON Schema](https://json-schema.org/). - * - * JSON Schema is a declarative language that allows to annotate JSON documents - * with types and descriptions. + * Transcript of the audio generated by the model. */ - value: unknown; + transcript: string; [property: string]: unknown; } -export type ChatCompletionInputGrammarTypeType = "json" | "regex"; -export interface ChatCompletionInputStreamOptions { +/** + * Deprecated and replaced by `tool_calls`. The name and arguments of a function that should + * be called, as generated by the model. + */ +export interface ChatCompletionOutputResponseMessageFunctionCall { /** - * If set, an additional chunk will be streamed before the data: [DONE] message. The usage - * field on this chunk shows the token usage statistics for the entire request, and the - * choices field will always be an empty array. All other chunks will also include a usage - * field, but with a null value. + * The arguments to call the function with, as generated by the model in JSON format. Note + * that the model does not always generate valid JSON, and may hallucinate parameters not + * defined by your function schema. Validate the arguments in your code before calling your + * function. */ - include_usage?: boolean; + arguments: string; + /** + * The name of the function to call. + */ + name: string; [property: string]: unknown; } /** - * - * + * The role of the author of this message. */ -export type ChatCompletionInputToolChoice = ChatCompletionInputToolChoiceEnum | ChatCompletionInputToolChoiceObject; /** - * Means the model can pick between generating a message or calling one or more tools. - * - * Means the model will not call any tool and instead generates a message. - * - * Means the model must call one or more tools. + * The tool calls generated by the model, such as function calls. */ -export type ChatCompletionInputToolChoiceEnum = "auto" | "none" | "required"; -export interface ChatCompletionInputToolChoiceObject { - function: ChatCompletionInputFunctionName; - [property: string]: unknown; -} -export interface ChatCompletionInputFunctionName { - name: string; - [property: string]: unknown; -} -export interface ChatCompletionInputTool { - function: ChatCompletionInputFunctionDefinition; - type: string; +export interface ChatCompletionOutputMessageToolCall { + /** + * The function that the model called. + */ + function: ChatCompletionOutputMessageToolCallFunction; + /** + * The ID of the tool call. + */ + id: string; + /** + * The type of the tool. Currently, only `function` is supported. + */ + type: "function"; [property: string]: unknown; } /** - * Chat Completion Output. - * - * Auto-generated from TGI specs. - * For more details, check out - * https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts. + * The function that the model called. */ -export interface ChatCompletionOutput { - choices: ChatCompletionOutputComplete[]; - created: number; - id: string; - model: string; - system_fingerprint: string; - usage: ChatCompletionOutputUsage; - [property: string]: unknown; -} -export interface ChatCompletionOutputComplete { - finish_reason: string; - index: number; - logprobs?: ChatCompletionOutputLogprobs; - message: ChatCompletionOutputMessage; - [property: string]: unknown; -} -export interface ChatCompletionOutputLogprobs { - content: ChatCompletionOutputLogprob[]; - [property: string]: unknown; -} -export interface ChatCompletionOutputLogprob { - logprob: number; - token: string; - top_logprobs: ChatCompletionOutputTopLogprob[]; - [property: string]: unknown; -} -export interface ChatCompletionOutputTopLogprob { - logprob: number; - token: string; - [property: string]: unknown; -} -export interface ChatCompletionOutputMessage { - content?: string; - role: string; - tool_call_id?: string; - tool_calls?: ChatCompletionOutputToolCall[]; - [property: string]: unknown; -} -export interface ChatCompletionOutputToolCall { - function: ChatCompletionOutputFunctionDefinition; - id: string; - type: string; - [property: string]: unknown; -} -export interface ChatCompletionOutputFunctionDefinition { +export interface ChatCompletionOutputMessageToolCallFunction { + /** + * The arguments to call the function with, as generated by the model in JSON format. Note + * that the model does not always generate valid JSON, and may hallucinate parameters not + * defined by your function schema. Validate the arguments in your code before calling your + * function. + */ arguments: string; - description?: string; + /** + * The name of the function to call. + */ name: string; [property: string]: unknown; } -export interface ChatCompletionOutputUsage { +/** + * The object type, which is always `chat.completion`. + */ +/** + * The service tier used for processing the request. + */ +export type ChatCompletionOutputServiceTier = "scale" | "default"; +/** + * Usage statistics for the completion request. + */ +export interface ChatCompletionOutputCompletionUsage { + /** + * Number of tokens in the generated completion. + */ completion_tokens: number; + /** + * Breakdown of tokens used in a completion. + */ + completion_tokens_details?: ChatCompletionOutputCompletionUsageCompletionTokensDetails; + /** + * Number of tokens in the prompt. + */ prompt_tokens: number; + /** + * Breakdown of tokens used in the prompt. + */ + prompt_tokens_details?: ChatCompletionOutputCompletionUsagePromptTokensDetails; + /** + * Total number of tokens used in the request (prompt + completion). + */ total_tokens: number; [property: string]: unknown; } +/** + * Breakdown of tokens used in a completion. + */ +export interface ChatCompletionOutputCompletionUsageCompletionTokensDetails { + /** + * When using Predicted Outputs, the number of tokens in the + * prediction that appeared in the completion. + */ + accepted_prediction_tokens?: number; + /** + * Audio input tokens generated by the model. + */ + audio_tokens?: number; + /** + * Tokens generated by the model for reasoning. + */ + reasoning_tokens?: number; + /** + * When using Predicted Outputs, the number of tokens in the + * prediction that did not appear in the completion. However, like + * reasoning tokens, these tokens are still counted in the total + * completion tokens for purposes of billing, output, and context window + * limits. + */ + rejected_prediction_tokens?: number; + [property: string]: unknown; +} +/** + * Breakdown of tokens used in the prompt. + */ +export interface ChatCompletionOutputCompletionUsagePromptTokensDetails { + /** + * Audio input tokens present in the prompt. + */ + audio_tokens?: number; + /** + * Cached tokens present in the prompt. + */ + cached_tokens?: number; + [property: string]: unknown; +} /** * Chat Completion Stream Output. * - * Auto-generated from TGI specs. + * Auto-generated from OAI specs. * For more details, check out - * https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts. + * https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-oai-import.ts. */ export interface ChatCompletionStreamOutput { - choices: ChatCompletionStreamOutputChoice[]; + /** + * A list of chat completion choices. Can contain more than one elements if `n` is greater + * than 1. Can also be empty for the + * last chunk if you set `stream_options: {"include_usage": true}`. + */ + choices: ChatCompletionStreamOutputChoicesItem[]; + /** + * The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has + * the same timestamp. + */ created: number; + /** + * A unique identifier for the chat completion. Each chunk has the same ID. + */ id: string; + /** + * The model to generate the completion. + */ model: string; - system_fingerprint: string; - usage?: ChatCompletionStreamOutputUsage; + /** + * The object type, which is always `chat.completion.chunk`. + */ + object: "chat.completion.chunk"; + /** + * The service tier used for processing the request. + */ + service_tier?: ChatCompletionOutputServiceTier; + /** + * This fingerprint represents the backend configuration that the model runs with. + * Can be used in conjunction with the `seed` request parameter to understand when backend + * changes have been made that might impact determinism. + */ + system_fingerprint?: string; + /** + * An optional field that will only be present when you set + * `stream_options: {"include_usage": true}` in your request. When present, it + * contains a null value **except for the last chunk** which contains the + * token usage statistics for the entire request. + * + * **NOTE:** If the stream is interrupted or cancelled, you may not + * receive the final usage chunk which contains the total token usage for + * the request. + */ + usage?: ChatCompletionStreamOutputCompletionUsage; [property: string]: unknown; } -export interface ChatCompletionStreamOutputChoice { - delta: ChatCompletionStreamOutputDelta; - finish_reason?: string; +export interface ChatCompletionStreamOutputChoicesItem { + delta: ChatCompletionStreamOutputStreamResponseDelta; + /** + * The reason the model stopped generating tokens. This will be `stop` if the model hit a + * natural stop point or a provided stop sequence, + * `length` if the maximum number of tokens specified in the request was reached, + * `content_filter` if content was omitted due to a flag from our content filters, + * `tool_calls` if the model called a tool, or `function_call` (deprecated) if the model + * called a function. + */ + finish_reason: FinishReason; + /** + * The index of the choice in the list of choices. + */ index: number; + /** + * Log probability information for the choice. + */ logprobs?: ChatCompletionStreamOutputLogprobs; [property: string]: unknown; } -export interface ChatCompletionStreamOutputDelta { +/** + * A chat completion delta generated by streamed model responses. + */ +export interface ChatCompletionStreamOutputStreamResponseDelta { + /** + * The contents of the chunk message. + */ content?: string; - role: string; - tool_call_id?: string; - tool_calls?: ChatCompletionStreamOutputDeltaToolCall[]; + /** + * Deprecated and replaced by `tool_calls`. The name and arguments of a function that should + * be called, as generated by the model. + */ + function_call?: ChatCompletionStreamOutputStreamResponseDeltaFunctionCall; + /** + * The refusal message generated by the model. + */ + refusal?: string; + /** + * The role of the author of this message. + */ + role?: DeltaRole; + tool_calls?: ChatCompletionStreamOutputMessageToolCallChunk[]; [property: string]: unknown; } -export interface ChatCompletionStreamOutputDeltaToolCall { - function: ChatCompletionStreamOutputFunction; - id: string; +/** + * Deprecated and replaced by `tool_calls`. The name and arguments of a function that should + * be called, as generated by the model. + */ +export interface ChatCompletionStreamOutputStreamResponseDeltaFunctionCall { + /** + * The arguments to call the function with, as generated by the model in JSON format. Note + * that the model does not always generate valid JSON, and may hallucinate parameters not + * defined by your function schema. Validate the arguments in your code before calling your + * function. + */ + arguments?: string; + /** + * The name of the function to call. + */ + name?: string; + [property: string]: unknown; +} +/** + * The role of the author of this message. + */ +export type DeltaRole = "developer" | "system" | "user" | "assistant" | "tool"; +export interface ChatCompletionStreamOutputMessageToolCallChunk { + function?: ChatCompletionStreamOutputMessageToolCallChunkFunction; + /** + * The ID of the tool call. + */ + id?: string; index: number; - type: string; + /** + * The type of the tool. Currently, only `function` is supported. + */ + type?: "function"; [property: string]: unknown; } -export interface ChatCompletionStreamOutputFunction { - arguments: string; +export interface ChatCompletionStreamOutputMessageToolCallChunkFunction { + /** + * The arguments to call the function with, as generated by the model in JSON format. Note + * that the model does not always generate valid JSON, and may hallucinate parameters not + * defined by your function schema. Validate the arguments in your code before calling your + * function. + */ + arguments?: string; + /** + * The name of the function to call. + */ name?: string; [property: string]: unknown; } +/** + * Log probability information for the choice. + */ export interface ChatCompletionStreamOutputLogprobs { - content: ChatCompletionStreamOutputLogprob[]; + /** + * A list of message content tokens with log probability information. + */ + content: ChatCompletionStreamOutputTokenLogprob[]; + /** + * A list of message refusal tokens with log probability information. + */ + refusal: ChatCompletionStreamOutputTokenLogprob[]; [property: string]: unknown; } -export interface ChatCompletionStreamOutputLogprob { +export interface ChatCompletionStreamOutputTokenLogprob { + /** + * A list of integers representing the UTF-8 bytes representation of the token. Useful in + * instances where characters are represented by multiple tokens and their byte + * representations must be combined to generate the correct text representation. Can be + * `null` if there is no bytes representation for the token. + */ + bytes: number[]; + /** + * The log probability of this token, if it is within the top 20 most likely tokens. + * Otherwise, the value `-9999.0` is used to signify that the token is very unlikely. + */ logprob: number; + /** + * The token. + */ token: string; - top_logprobs: ChatCompletionStreamOutputTopLogprob[]; + /** + * List of the most likely tokens and their log probability, at this token position. In rare + * cases, there may be fewer than the number of requested `top_logprobs` returned. + */ + top_logprobs: ChatCompletionStreamOutputTokenLogprobTopLogprobsItem[]; [property: string]: unknown; } -export interface ChatCompletionStreamOutputTopLogprob { +export interface ChatCompletionStreamOutputTokenLogprobTopLogprobsItem { + /** + * A list of integers representing the UTF-8 bytes representation of the token. Useful in + * instances where characters are represented by multiple tokens and their byte + * representations must be combined to generate the correct text representation. Can be + * `null` if there is no bytes representation for the token. + */ + bytes: number[]; + /** + * The log probability of this token, if it is within the top 20 most likely tokens. + * Otherwise, the value `-9999.0` is used to signify that the token is very unlikely. + */ logprob: number; + /** + * The token. + */ token: string; [property: string]: unknown; } -export interface ChatCompletionStreamOutputUsage { +/** + * The object type, which is always `chat.completion.chunk`. + */ +/** + * An optional field that will only be present when you set + * `stream_options: {"include_usage": true}` in your request. When present, it + * contains a null value **except for the last chunk** which contains the + * token usage statistics for the entire request. + * + * **NOTE:** If the stream is interrupted or cancelled, you may not + * receive the final usage chunk which contains the total token usage for + * the request. + * + * + * Usage statistics for the completion request. + */ +export interface ChatCompletionStreamOutputCompletionUsage { + /** + * Number of tokens in the generated completion. + */ completion_tokens: number; + /** + * Breakdown of tokens used in a completion. + */ + completion_tokens_details?: ChatCompletionStreamOutputCompletionUsageCompletionTokensDetails; + /** + * Number of tokens in the prompt. + */ prompt_tokens: number; + /** + * Breakdown of tokens used in the prompt. + */ + prompt_tokens_details?: ChatCompletionStreamOutputCompletionUsagePromptTokensDetails; + /** + * Total number of tokens used in the request (prompt + completion). + */ total_tokens: number; [property: string]: unknown; } +/** + * Breakdown of tokens used in a completion. + */ +export interface ChatCompletionStreamOutputCompletionUsageCompletionTokensDetails { + /** + * When using Predicted Outputs, the number of tokens in the + * prediction that appeared in the completion. + */ + accepted_prediction_tokens?: number; + /** + * Audio input tokens generated by the model. + */ + audio_tokens?: number; + /** + * Tokens generated by the model for reasoning. + */ + reasoning_tokens?: number; + /** + * When using Predicted Outputs, the number of tokens in the + * prediction that did not appear in the completion. However, like + * reasoning tokens, these tokens are still counted in the total + * completion tokens for purposes of billing, output, and context window + * limits. + */ + rejected_prediction_tokens?: number; + [property: string]: unknown; +} +/** + * Breakdown of tokens used in the prompt. + */ +export interface ChatCompletionStreamOutputCompletionUsagePromptTokensDetails { + /** + * Audio input tokens present in the prompt. + */ + audio_tokens?: number; + /** + * Cached tokens present in the prompt. + */ + cached_tokens?: number; + [property: string]: unknown; +} diff --git a/packages/tasks/src/tasks/chat-completion/spec-oai/input.json b/packages/tasks/src/tasks/chat-completion/spec-oai/input.json new file mode 100644 index 0000000000..c0eec142e7 --- /dev/null +++ b/packages/tasks/src/tasks/chat-completion/spec-oai/input.json @@ -0,0 +1,1110 @@ +{ + "$id": "/inference/schemas/chat-completion/input.json", + "$schema": "http://json-schema.org/draft-06/schema#", + "description": "Chat Completion Input.\n\nAuto-generated from OAI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-oai-import.ts.", + "title": "ChatCompletionInput", + "type": "object", + "required": ["model", "messages"], + "properties": { + "messages": { + "description": "A list of messages comprising the conversation so far. Depending on the\n[model](/docs/models) you use, different message types (modalities) are\nsupported, like [text](/docs/guides/text-generation),\n[images](/docs/guides/vision), and [audio](/docs/guides/audio).\n", + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/$defs/ChatCompletionInputRequestMessage" + } + }, + "model": { + "description": "Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI\noffers a wide range of models with different capabilities, performance\ncharacteristics, and price points. Refer to the [model guide](/docs/models)\nto browse and compare available models.\n", + "$ref": "#/$defs/ChatCompletionInputModelIdsShared" + }, + "modalities": { + "$ref": "#/$defs/ChatCompletionInputResponseModalities" + }, + "reasoning_effort": { + "$ref": "#/$defs/ChatCompletionInputReasoningEffort" + }, + "max_completion_tokens": { + "description": "An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and [reasoning tokens](/docs/guides/reasoning).\n", + "type": "integer", + "nullable": true + }, + "frequency_penalty": { + "type": "number", + "default": 0, + "minimum": -2, + "maximum": 2, + "nullable": true, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on\ntheir existing frequency in the text so far, decreasing the model's\nlikelihood to repeat the same line verbatim.\n" + }, + "presence_penalty": { + "type": "number", + "default": 0, + "minimum": -2, + "maximum": 2, + "nullable": true, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on\nwhether they appear in the text so far, increasing the model's likelihood\nto talk about new topics.\n" + }, + "web_search_options": { + "type": "object", + "title": "Web search", + "description": "This tool searches the web for relevant results to use in a response.\nLearn more about the [web search tool](/docs/guides/tools-web-search?api-mode=chat).\n", + "properties": { + "user_location": { + "type": "object", + "nullable": true, + "required": ["type", "approximate"], + "description": "Approximate location parameters for the search.\n", + "properties": { + "type": { + "type": "string", + "description": "The type of location approximation. Always `approximate`.\n", + "enum": ["approximate"], + "x-stainless-const": true + }, + "approximate": { + "$ref": "#/$defs/ChatCompletionInputWebSearchLocation" + } + }, + "title": "ChatCompletionInputUserLocation" + }, + "search_context_size": { + "$ref": "#/$defs/ChatCompletionInputWebSearchContextSize" + } + } + }, + "top_logprobs": { + "description": "An integer between 0 and 20 specifying the number of most likely tokens to\nreturn at each token position, each with an associated log probability.\n`logprobs` must be set to `true` if this parameter is used.\n", + "type": "integer", + "minimum": 0, + "maximum": 20, + "nullable": true + }, + "response_format": { + "description": "An object specifying the format that the model must output.\n\nSetting to `{ \"type\": \"json_schema\", \"json_schema\": {...} }` enables\nStructured Outputs which ensures the model will match your supplied JSON\nschema. Learn more in the [Structured Outputs\nguide](/docs/guides/structured-outputs).\n\nSetting to `{ \"type\": \"json_object\" }` enables the older JSON mode, which\nensures the message the model generates is valid JSON. Using `json_schema`\nis preferred for models that support it.\n", + "oneOf": [ + { + "$ref": "#/$defs/ChatCompletionInputResponseFormatText" + }, + { + "$ref": "#/$defs/ChatCompletionInputResponseFormatJsonSchema" + }, + { + "$ref": "#/$defs/ChatCompletionInputResponseFormatJsonObject" + } + ], + "x-oaiExpandable": true + }, + "service_tier": { + "description": "Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service:\n - If set to 'auto', and the Project is Scale tier enabled, the system\n will utilize scale tier credits until they are exhausted.\n - If set to 'auto', and the Project is not Scale tier enabled, the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee.\n - If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee.\n - When not set, the default behavior is 'auto'.\n\n When this parameter is set, the response body will include the `service_tier` utilized.\n", + "type": "string", + "enum": ["auto", "default"], + "nullable": true, + "default": "auto" + }, + "audio": { + "type": "object", + "nullable": true, + "description": "Parameters for audio output. Required when audio output is requested with\n`modalities: [\"audio\"]`. [Learn more](/docs/guides/audio).\n", + "required": ["voice", "format"], + "x-oaiExpandable": true, + "properties": { + "voice": { + "$ref": "#/$defs/ChatCompletionInputVoiceIdsShared", + "description": "The voice the model uses to respond. Supported voices are \n`alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, and `shimmer`.\n" + }, + "format": { + "type": "string", + "enum": ["wav", "mp3", "flac", "opus", "pcm16"], + "description": "Specifies the output audio format. Must be one of `wav`, `mp3`, `flac`,\n`opus`, or `pcm16`.\n" + } + }, + "title": "ChatCompletionInputAudio" + }, + "store": { + "type": "boolean", + "default": false, + "nullable": true, + "description": "Whether or not to store the output of this chat completion request for \nuse in our [model distillation](/docs/guides/distillation) or\n[evals](/docs/guides/evals) products.\n" + }, + "stream": { + "description": "If set to true, the model response data will be streamed to the client\nas it is generated using [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).\nSee the [Streaming section below](/docs/api-reference/chat/streaming)\nfor more information, along with the [streaming responses](/docs/guides/streaming-responses)\nguide for more information on how to handle the streaming events.\n", + "type": "boolean", + "nullable": true, + "default": false + }, + "stop": { + "$ref": "#/$defs/ChatCompletionInputStopConfiguration" + }, + "logit_bias": { + "type": "object", + "x-oaiTypeLabel": "map", + "default": null, + "nullable": true, + "additionalProperties": { + "type": "integer" + }, + "description": "Modify the likelihood of specified tokens appearing in the completion.\n\nAccepts a JSON object that maps tokens (specified by their token ID in the\ntokenizer) to an associated bias value from -100 to 100. Mathematically,\nthe bias is added to the logits generated by the model prior to sampling.\nThe exact effect will vary per model, but values between -1 and 1 should\ndecrease or increase likelihood of selection; values like -100 or 100\nshould result in a ban or exclusive selection of the relevant token.\n", + "title": "ChatCompletionInputLogitBias" + }, + "logprobs": { + "description": "Whether to return log probabilities of the output tokens or not. If true,\nreturns the log probabilities of each output token returned in the\n`content` of `message`.\n", + "type": "boolean", + "default": false, + "nullable": true + }, + "max_tokens": { + "description": "The maximum number of [tokens](/tokenizer) that can be generated in the\nchat completion. This value can be used to control\n[costs](https://openai.com/api/pricing/) for text generated via API.\n\nThis value is now deprecated in favor of `max_completion_tokens`, and is\nnot compatible with [o1 series models](/docs/guides/reasoning).\n", + "type": "integer", + "nullable": true, + "deprecated": true + }, + "n": { + "type": "integer", + "minimum": 1, + "maximum": 128, + "default": 1, + "example": 1, + "nullable": true, + "description": "How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep `n` as `1` to minimize costs." + }, + "prediction": { + "nullable": true, + "x-oaiExpandable": true, + "description": "Configuration for a [Predicted Output](/docs/guides/predicted-outputs),\nwhich can greatly improve response times when large parts of the model\nresponse are known ahead of time. This is most common when you are\nregenerating a file with only minor changes to most of the content.\n", + "oneOf": [ + { + "$ref": "#/$defs/ChatCompletionInputPredictionContent" + } + ] + }, + "seed": { + "type": "integer", + "format": "int64", + "nullable": true, + "description": "This feature is in Beta.\nIf specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.\nDeterminism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.\n", + "x-oaiMeta": { + "beta": true + } + }, + "stream_options": { + "$ref": "#/$defs/ChatCompletionInputStreamOptions" + }, + "tools": { + "type": "array", + "description": "A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported.\n", + "items": { + "$ref": "#/$defs/ChatCompletionInputTool" + } + }, + "tool_choice": { + "$ref": "#/$defs/ChatCompletionInputToolChoiceOption" + }, + "parallel_tool_calls": { + "$ref": "#/$defs/ChatCompletionInputParallelToolCalls" + }, + "function_call": { + "deprecated": true, + "description": "Deprecated in favor of `tool_choice`.\n\nControls which (if any) function is called by the model.\n\n`none` means the model will not call a function and instead generates a\nmessage.\n\n`auto` means the model can pick between generating a message or calling a\nfunction.\n\nSpecifying a particular function via `{\"name\": \"my_function\"}` forces the\nmodel to call that function.\n\n`none` is the default when no functions are present. `auto` is the default\nif functions are present.\n", + "oneOf": [ + { + "type": "string", + "description": "`none` means the model will not call a function and instead generates a message. `auto` means the model can pick between generating a message or calling a function.\n", + "enum": ["none", "auto"] + }, + { + "$ref": "#/$defs/ChatCompletionInputFunctionCallOption" + } + ], + "x-oaiExpandable": true + }, + "functions": { + "deprecated": true, + "description": "Deprecated in favor of `tools`.\n\nA list of functions the model may generate JSON inputs for.\n", + "type": "array", + "minItems": 1, + "maxItems": 128, + "items": { + "$ref": "#/$defs/ChatCompletionInputFunctions" + } + } + }, + "$defs": { + "ChatCompletionInputRequestMessage": { + "oneOf": [ + { + "$ref": "#/$defs/ChatCompletionInputRequestDeveloperMessage" + }, + { + "$ref": "#/$defs/ChatCompletionInputRequestSystemMessage" + }, + { + "$ref": "#/$defs/ChatCompletionInputRequestUserMessage" + }, + { + "$ref": "#/$defs/ChatCompletionInputRequestAssistantMessage" + }, + { + "$ref": "#/$defs/ChatCompletionInputRequestToolMessage" + }, + { + "$ref": "#/$defs/ChatCompletionInputRequestFunctionMessage" + } + ], + "x-oaiExpandable": true, + "title": "ChatCompletionInputRequestMessage" + }, + "ChatCompletionInputRequestDeveloperMessage": { + "type": "object", + "title": "ChatCompletionInputRequestDeveloperMessage", + "description": "Developer-provided instructions that the model should follow, regardless of\nmessages sent by the user. With o1 models and newer, `developer` messages\nreplace the previous `system` messages.\n", + "properties": { + "content": { + "description": "The contents of the developer message.", + "oneOf": [ + { + "type": "string", + "description": "The contents of the developer message.", + "title": "Text content" + }, + { + "type": "array", + "description": "An array of content parts with a defined type. For developer messages, only type `text` is supported.", + "title": "Array of content parts", + "items": { + "$ref": "#/$defs/ChatCompletionInputRequestMessageContentPartText" + }, + "minItems": 1 + } + ] + }, + "role": { + "type": "string", + "enum": ["developer"], + "description": "The role of the messages author, in this case `developer`.", + "x-stainless-const": true + }, + "name": { + "type": "string", + "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role." + } + }, + "required": ["content", "role"] + }, + "ChatCompletionInputRequestMessageContentPartText": { + "type": "object", + "title": "ChatCompletionInputRequestMessageContentPartText", + "description": "Learn about [text inputs](/docs/guides/text-generation).\n", + "properties": { + "type": { + "type": "string", + "enum": ["text"], + "description": "The type of the content part.", + "x-stainless-const": true + }, + "text": { + "type": "string", + "description": "The text content." + } + }, + "required": ["type", "text"] + }, + "ChatCompletionInputRequestSystemMessage": { + "type": "object", + "title": "ChatCompletionInputRequestSystemMessage", + "description": "Developer-provided instructions that the model should follow, regardless of\nmessages sent by the user. With o1 models and newer, use `developer` messages\nfor this purpose instead.\n", + "properties": { + "content": { + "description": "The contents of the system message.", + "oneOf": [ + { + "type": "string", + "description": "The contents of the system message.", + "title": "Text content" + }, + { + "type": "array", + "description": "An array of content parts with a defined type. For system messages, only type `text` is supported.", + "title": "Array of content parts", + "items": { + "$ref": "#/$defs/ChatCompletionInputRequestSystemMessageContentPart" + }, + "minItems": 1 + } + ] + }, + "role": { + "type": "string", + "enum": ["system"], + "description": "The role of the messages author, in this case `system`.", + "x-stainless-const": true + }, + "name": { + "type": "string", + "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role." + } + }, + "required": ["content", "role"] + }, + "ChatCompletionInputRequestSystemMessageContentPart": { + "oneOf": [ + { + "$ref": "#/$defs/ChatCompletionInputRequestMessageContentPartText" + } + ], + "x-oaiExpandable": true, + "title": "ChatCompletionInputRequestSystemMessageContentPart" + }, + "ChatCompletionInputRequestUserMessage": { + "type": "object", + "title": "ChatCompletionInputRequestUserMessage", + "description": "Messages sent by an end user, containing prompts or additional context\ninformation.\n", + "properties": { + "content": { + "description": "The contents of the user message.\n", + "oneOf": [ + { + "type": "string", + "description": "The text contents of the message.", + "title": "Text content" + }, + { + "type": "array", + "description": "An array of content parts with a defined type. Supported options differ based on the [model](/docs/models) being used to generate the response. Can contain text, image, or audio inputs.", + "title": "Array of content parts", + "items": { + "$ref": "#/$defs/ChatCompletionInputRequestUserMessageContentPart" + }, + "minItems": 1 + } + ], + "x-oaiExpandable": true + }, + "role": { + "type": "string", + "enum": ["user"], + "description": "The role of the messages author, in this case `user`.", + "x-stainless-const": true + }, + "name": { + "type": "string", + "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role." + } + }, + "required": ["content", "role"] + }, + "ChatCompletionInputRequestUserMessageContentPart": { + "oneOf": [ + { + "$ref": "#/$defs/ChatCompletionInputRequestMessageContentPartText" + }, + { + "$ref": "#/$defs/ChatCompletionInputRequestMessageContentPartImage" + }, + { + "$ref": "#/$defs/ChatCompletionInputRequestMessageContentPartAudio" + }, + { + "$ref": "#/$defs/ChatCompletionInputRequestMessageContentPartFile" + } + ], + "x-oaiExpandable": true, + "title": "ChatCompletionInputRequestUserMessageContentPart" + }, + "ChatCompletionInputRequestMessageContentPartImage": { + "type": "object", + "title": "ChatCompletionInputRequestMessageContentPartImage", + "description": "Learn about [image inputs](/docs/guides/vision).\n", + "properties": { + "type": { + "type": "string", + "enum": ["image_url"], + "description": "The type of the content part.", + "x-stainless-const": true + }, + "image_url": { + "type": "object", + "properties": { + "url": { + "type": "string", + "description": "Either a URL of the image or the base64 encoded image data.", + "format": "uri" + }, + "detail": { + "type": "string", + "description": "Specifies the detail level of the image. Learn more in the [Vision guide](/docs/guides/vision#low-or-high-fidelity-image-understanding).", + "enum": ["auto", "low", "high"], + "default": "auto" + } + }, + "required": ["url"], + "title": "ChatCompletionInputRequestMessageContentPartImageImageUrl" + } + }, + "required": ["type", "image_url"] + }, + "ChatCompletionInputRequestMessageContentPartAudio": { + "type": "object", + "title": "ChatCompletionInputRequestMessageContentPartAudio", + "description": "Learn about [audio inputs](/docs/guides/audio).\n", + "properties": { + "type": { + "type": "string", + "enum": ["input_audio"], + "description": "The type of the content part. Always `input_audio`.", + "x-stainless-const": true + }, + "input_audio": { + "type": "object", + "properties": { + "data": { + "type": "string", + "description": "Base64 encoded audio data." + }, + "format": { + "type": "string", + "enum": ["wav", "mp3"], + "description": "The format of the encoded audio data. Currently supports \"wav\" and \"mp3\".\n" + } + }, + "required": ["data", "format"], + "title": "ChatCompletionInputRequestMessageContentPartAudioInputAudio" + } + }, + "required": ["type", "input_audio"] + }, + "ChatCompletionInputRequestMessageContentPartFile": { + "type": "object", + "title": "ChatCompletionInputRequestMessageContentPartFile", + "description": "Learn about [file inputs](/docs/guides/text) for text generation.\n", + "properties": { + "type": { + "type": "string", + "enum": ["file"], + "description": "The type of the content part. Always `file`.", + "x-stainless-const": true + }, + "file": { + "type": "object", + "properties": { + "filename": { + "type": "string", + "description": "The name of the file, used when passing the file to the model as a \nstring.\n" + }, + "file_data": { + "type": "string", + "description": "The base64 encoded file data, used when passing the file to the model \nas a string.\n" + }, + "file_id": { + "type": "string", + "description": "The ID of an uploaded file to use as input.\n" + } + }, + "title": "ChatCompletionInputRequestMessageContentPartFileFile" + } + }, + "required": ["type", "file"] + }, + "ChatCompletionInputRequestAssistantMessage": { + "type": "object", + "title": "ChatCompletionInputRequestAssistantMessage", + "description": "Messages sent by the model in response to user messages.\n", + "properties": { + "content": { + "x-oaiExpandable": true, + "nullable": true, + "oneOf": [ + { + "type": "string", + "description": "The contents of the assistant message.", + "title": "Text content" + }, + { + "type": "array", + "description": "An array of content parts with a defined type. Can be one or more of type `text`, or exactly one of type `refusal`.", + "title": "Array of content parts", + "items": { + "$ref": "#/$defs/ChatCompletionInputRequestAssistantMessageContentPart" + }, + "minItems": 1 + } + ], + "description": "The contents of the assistant message. Required unless `tool_calls` or `function_call` is specified.\n" + }, + "refusal": { + "nullable": true, + "type": "string", + "description": "The refusal message by the assistant." + }, + "role": { + "type": "string", + "enum": ["assistant"], + "description": "The role of the messages author, in this case `assistant`.", + "x-stainless-const": true + }, + "name": { + "type": "string", + "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role." + }, + "audio": { + "type": "object", + "nullable": true, + "x-oaiExpandable": true, + "description": "Data about a previous audio response from the model. \n[Learn more](/docs/guides/audio).\n", + "required": ["id"], + "properties": { + "id": { + "type": "string", + "description": "Unique identifier for a previous audio response from the model.\n" + } + }, + "title": "ChatCompletionInputRequestAssistantMessageAudio" + }, + "tool_calls": { + "$ref": "#/$defs/ChatCompletionInputMessageToolCalls" + }, + "function_call": { + "type": "object", + "deprecated": true, + "description": "Deprecated and replaced by `tool_calls`. The name and arguments of a function that should be called, as generated by the model.", + "nullable": true, + "properties": { + "arguments": { + "type": "string", + "description": "The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function." + }, + "name": { + "type": "string", + "description": "The name of the function to call." + } + }, + "required": ["arguments", "name"], + "title": "ChatCompletionInputRequestAssistantMessageFunctionCall" + } + }, + "required": ["role"] + }, + "ChatCompletionInputRequestAssistantMessageContentPart": { + "oneOf": [ + { + "$ref": "#/$defs/ChatCompletionInputRequestMessageContentPartText" + }, + { + "$ref": "#/$defs/ChatCompletionInputRequestMessageContentPartRefusal" + } + ], + "x-oaiExpandable": true, + "title": "ChatCompletionInputRequestAssistantMessageContentPart" + }, + "ChatCompletionInputRequestMessageContentPartRefusal": { + "type": "object", + "title": "ChatCompletionInputRequestMessageContentPartRefusal", + "properties": { + "type": { + "type": "string", + "enum": ["refusal"], + "description": "The type of the content part.", + "x-stainless-const": true + }, + "refusal": { + "type": "string", + "description": "The refusal message generated by the model." + } + }, + "required": ["type", "refusal"] + }, + "ChatCompletionInputMessageToolCalls": { + "type": "array", + "description": "The tool calls generated by the model, such as function calls.", + "items": { + "$ref": "#/$defs/ChatCompletionInputMessageToolCall" + }, + "title": "ChatCompletionInputMessageToolCalls" + }, + "ChatCompletionInputMessageToolCall": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "The ID of the tool call." + }, + "type": { + "type": "string", + "enum": ["function"], + "description": "The type of the tool. Currently, only `function` is supported.", + "x-stainless-const": true + }, + "function": { + "type": "object", + "description": "The function that the model called.", + "properties": { + "name": { + "type": "string", + "description": "The name of the function to call." + }, + "arguments": { + "type": "string", + "description": "The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function." + } + }, + "required": ["name", "arguments"], + "title": "ChatCompletionInputMessageToolCallFunction" + } + }, + "required": ["id", "type", "function"], + "title": "ChatCompletionInputMessageToolCall" + }, + "ChatCompletionInputRequestToolMessage": { + "type": "object", + "title": "ChatCompletionInputRequestToolMessage", + "properties": { + "role": { + "type": "string", + "enum": ["tool"], + "description": "The role of the messages author, in this case `tool`.", + "x-stainless-const": true + }, + "content": { + "oneOf": [ + { + "type": "string", + "description": "The contents of the tool message.", + "title": "Text content" + }, + { + "type": "array", + "description": "An array of content parts with a defined type. For tool messages, only type `text` is supported.", + "title": "Array of content parts", + "items": { + "$ref": "#/$defs/ChatCompletionInputRequestToolMessageContentPart" + }, + "minItems": 1 + } + ], + "description": "The contents of the tool message." + }, + "tool_call_id": { + "type": "string", + "description": "Tool call that this message is responding to." + } + }, + "required": ["role", "content", "tool_call_id"] + }, + "ChatCompletionInputRequestToolMessageContentPart": { + "oneOf": [ + { + "$ref": "#/$defs/ChatCompletionInputRequestMessageContentPartText" + } + ], + "x-oaiExpandable": true, + "title": "ChatCompletionInputRequestToolMessageContentPart" + }, + "ChatCompletionInputRequestFunctionMessage": { + "type": "object", + "title": "ChatCompletionInputRequestFunctionMessage", + "deprecated": true, + "properties": { + "role": { + "type": "string", + "enum": ["function"], + "description": "The role of the messages author, in this case `function`.", + "x-stainless-const": true + }, + "content": { + "nullable": true, + "type": "string", + "description": "The contents of the function message." + }, + "name": { + "type": "string", + "description": "The name of the function to call." + } + }, + "required": ["role", "content", "name"] + }, + "ChatCompletionInputModelIdsShared": { + "example": "gpt-4o", + "anyOf": [ + { + "type": "string" + }, + { + "type": "string", + "enum": [ + "o3-mini", + "o3-mini-2025-01-31", + "o1", + "o1-2024-12-17", + "o1-preview", + "o1-preview-2024-09-12", + "o1-mini", + "o1-mini-2024-09-12", + "gpt-4o", + "gpt-4o-2024-11-20", + "gpt-4o-2024-08-06", + "gpt-4o-2024-05-13", + "gpt-4o-audio-preview", + "gpt-4o-audio-preview-2024-10-01", + "gpt-4o-audio-preview-2024-12-17", + "gpt-4o-mini-audio-preview", + "gpt-4o-mini-audio-preview-2024-12-17", + "gpt-4o-search-preview", + "gpt-4o-mini-search-preview", + "gpt-4o-search-preview-2025-03-11", + "gpt-4o-mini-search-preview-2025-03-11", + "chatgpt-4o-latest", + "gpt-4o-mini", + "gpt-4o-mini-2024-07-18", + "gpt-4-turbo", + "gpt-4-turbo-2024-04-09", + "gpt-4-0125-preview", + "gpt-4-turbo-preview", + "gpt-4-1106-preview", + "gpt-4-vision-preview", + "gpt-4", + "gpt-4-0314", + "gpt-4-0613", + "gpt-4-32k", + "gpt-4-32k-0314", + "gpt-4-32k-0613", + "gpt-3.5-turbo", + "gpt-3.5-turbo-16k", + "gpt-3.5-turbo-0301", + "gpt-3.5-turbo-0613", + "gpt-3.5-turbo-1106", + "gpt-3.5-turbo-0125", + "gpt-3.5-turbo-16k-0613" + ] + } + ], + "title": "ChatCompletionInputModelIdsShared" + }, + "ChatCompletionInputResponseModalities": { + "type": "array", + "nullable": true, + "description": "Output types that you would like the model to generate.\nMost models are capable of generating text, which is the default:\n\n`[\"text\"]`\n\nThe `gpt-4o-audio-preview` model can also be used to \n[generate audio](/docs/guides/audio). To request that this model generate \nboth text and audio responses, you can use:\n\n`[\"text\", \"audio\"]`\n", + "items": { + "type": "string", + "enum": ["text", "audio"] + }, + "title": "ChatCompletionInputResponseModalities" + }, + "ChatCompletionInputReasoningEffort": { + "type": "string", + "enum": ["low", "medium", "high"], + "default": "medium", + "nullable": true, + "description": "**o-series models only** \n\nConstrains effort on reasoning for \n[reasoning models](https://platform.openai.com/docs/guides/reasoning).\nCurrently supported values are `low`, `medium`, and `high`. Reducing\nreasoning effort can result in faster responses and fewer tokens used\non reasoning in a response.\n", + "title": "ChatCompletionInputReasoningEffort" + }, + "ChatCompletionInputWebSearchLocation": { + "type": "object", + "title": "ChatCompletionInputWebSearchLocation", + "description": "Approximate location parameters for the search.", + "properties": { + "country": { + "type": "string", + "description": "The two-letter \n[ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of the user,\ne.g. `US`.\n" + }, + "region": { + "type": "string", + "description": "Free text input for the region of the user, e.g. `California`.\n" + }, + "city": { + "type": "string", + "description": "Free text input for the city of the user, e.g. `San Francisco`.\n" + }, + "timezone": { + "type": "string", + "description": "The [IANA timezone](https://timeapi.io/documentation/iana-timezones) \nof the user, e.g. `America/Los_Angeles`.\n" + } + } + }, + "ChatCompletionInputWebSearchContextSize": { + "type": "string", + "description": "High level guidance for the amount of context window space to use for the \nsearch. One of `low`, `medium`, or `high`. `medium` is the default.\n", + "enum": ["low", "medium", "high"], + "default": "medium", + "title": "ChatCompletionInputWebSearchContextSize" + }, + "ChatCompletionInputResponseFormatText": { + "type": "object", + "title": "ChatCompletionInputResponseFormatText", + "description": "Default response format. Used to generate text responses.\n", + "properties": { + "type": { + "type": "string", + "description": "The type of response format being defined. Always `text`.", + "enum": ["text"], + "x-stainless-const": true + } + }, + "required": ["type"] + }, + "ChatCompletionInputResponseFormatJsonSchema": { + "type": "object", + "title": "ChatCompletionInputResponseFormatJsonSchema", + "description": "JSON Schema response format. Used to generate structured JSON responses.\nLearn more about [Structured Outputs](/docs/guides/structured-outputs).\n", + "properties": { + "type": { + "type": "string", + "description": "The type of response format being defined. Always `json_schema`.", + "enum": ["json_schema"], + "x-stainless-const": true + }, + "json_schema": { + "type": "object", + "title": "JSON schema", + "description": "Structured Outputs configuration options, including a JSON Schema.\n", + "properties": { + "description": { + "type": "string", + "description": "A description of what the response format is for, used by the model to\ndetermine how to respond in the format.\n" + }, + "name": { + "type": "string", + "description": "The name of the response format. Must be a-z, A-Z, 0-9, or contain\nunderscores and dashes, with a maximum length of 64.\n" + }, + "schema": { + "$ref": "#/$defs/ChatCompletionInputResponseFormatJsonSchemaSchema" + }, + "strict": { + "type": "boolean", + "nullable": true, + "default": false, + "description": "Whether to enable strict schema adherence when generating the output.\nIf set to true, the model will always follow the exact schema defined\nin the `schema` field. Only a subset of JSON Schema is supported when\n`strict` is `true`. To learn more, read the [Structured Outputs\nguide](/docs/guides/structured-outputs).\n" + } + }, + "required": ["name"] + } + }, + "required": ["type", "json_schema"] + }, + "ChatCompletionInputResponseFormatJsonSchemaSchema": { + "type": "object", + "title": "ChatCompletionInputResponseFormatJsonSchemaSchema", + "description": "The schema for the response format, described as a JSON Schema object.\nLearn how to build JSON schemas [here](https://json-schema.org/).\n", + "additionalProperties": true + }, + "ChatCompletionInputResponseFormatJsonObject": { + "type": "object", + "title": "ChatCompletionInputResponseFormatJsonObject", + "description": "JSON object response format. An older method of generating JSON responses.\nUsing `json_schema` is recommended for models that support it. Note that the\nmodel will not generate JSON without a system or user message instructing it\nto do so.\n", + "properties": { + "type": { + "type": "string", + "description": "The type of response format being defined. Always `json_object`.", + "enum": ["json_object"], + "x-stainless-const": true + } + }, + "required": ["type"] + }, + "ChatCompletionInputVoiceIdsShared": { + "example": "ash", + "anyOf": [ + { + "type": "string" + }, + { + "type": "string", + "enum": ["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"] + } + ], + "title": "ChatCompletionInputVoiceIdsShared" + }, + "ChatCompletionInputStopConfiguration": { + "description": "Up to 4 sequences where the API will stop generating further tokens. The\nreturned text will not contain the stop sequence.\n", + "default": null, + "nullable": true, + "oneOf": [ + { + "type": "string", + "default": "<|endoftext|>", + "example": "\n", + "nullable": true + }, + { + "type": "array", + "minItems": 1, + "maxItems": 4, + "items": { + "type": "string", + "example": "[\"\\n\"]" + } + } + ], + "title": "ChatCompletionInputStopConfiguration" + }, + "ChatCompletionInputPredictionContent": { + "type": "object", + "title": "ChatCompletionInputPredictionContent", + "description": "Static predicted output content, such as the content of a text file that is\nbeing regenerated.\n", + "required": ["type", "content"], + "properties": { + "type": { + "type": "string", + "enum": ["content"], + "description": "The type of the predicted content you want to provide. This type is\ncurrently always `content`.\n", + "x-stainless-const": true + }, + "content": { + "x-oaiExpandable": true, + "description": "The content that should be matched when generating a model response.\nIf generated tokens would match this content, the entire model response\ncan be returned much more quickly.\n", + "oneOf": [ + { + "type": "string", + "title": "Text content", + "description": "The content used for a Predicted Output. This is often the\ntext of a file you are regenerating with minor changes.\n" + }, + { + "type": "array", + "description": "An array of content parts with a defined type. Supported options differ based on the [model](/docs/models) being used to generate the response. Can contain text inputs.", + "title": "Array of content parts", + "items": { + "$ref": "#/$defs/ChatCompletionInputRequestMessageContentPartText" + }, + "minItems": 1 + } + ] + } + } + }, + "ChatCompletionInputStreamOptions": { + "description": "Options for streaming response. Only set this when you set `stream: true`.\n", + "type": "object", + "nullable": true, + "default": null, + "properties": { + "include_usage": { + "type": "boolean", + "description": "If set, an additional chunk will be streamed before the `data: [DONE]`\nmessage. The `usage` field on this chunk shows the token usage statistics\nfor the entire request, and the `choices` field will always be an empty\narray. \n\nAll other chunks will also include a `usage` field, but with a null\nvalue. **NOTE:** If the stream is interrupted, you may not receive the\nfinal usage chunk which contains the total token usage for the request.\n" + } + }, + "title": "ChatCompletionInputStreamOptions" + }, + "ChatCompletionInputTool": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["function"], + "description": "The type of the tool. Currently, only `function` is supported.", + "x-stainless-const": true + }, + "function": { + "$ref": "#/$defs/ChatCompletionInputFunctionObject" + } + }, + "required": ["type", "function"], + "title": "ChatCompletionInputTool" + }, + "ChatCompletionInputFunctionObject": { + "type": "object", + "properties": { + "description": { + "type": "string", + "description": "A description of what the function does, used by the model to choose when and how to call the function." + }, + "name": { + "type": "string", + "description": "The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64." + }, + "parameters": { + "$ref": "#/$defs/ChatCompletionInputFunctionParameters" + }, + "strict": { + "type": "boolean", + "nullable": true, + "default": false, + "description": "Whether to enable strict schema adherence when generating the function call. If set to true, the model will follow the exact schema defined in the `parameters` field. Only a subset of JSON Schema is supported when `strict` is `true`. Learn more about Structured Outputs in the [function calling guide](docs/guides/function-calling)." + } + }, + "required": ["name"], + "title": "ChatCompletionInputFunctionObject" + }, + "ChatCompletionInputFunctionParameters": { + "type": "object", + "description": "The parameters the functions accepts, described as a JSON Schema object. See the [guide](/docs/guides/function-calling) for examples, and the [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for documentation about the format. \n\nOmitting `parameters` defines a function with an empty parameter list.", + "additionalProperties": true, + "title": "ChatCompletionInputFunctionParameters" + }, + "ChatCompletionInputToolChoiceOption": { + "description": "Controls which (if any) tool is called by the model.\n`none` means the model will not call any tool and instead generates a message.\n`auto` means the model can pick between generating a message or calling one or more tools.\n`required` means the model must call one or more tools.\nSpecifying a particular tool via `{\"type\": \"function\", \"function\": {\"name\": \"my_function\"}}` forces the model to call that tool.\n\n`none` is the default when no tools are present. `auto` is the default if tools are present.\n", + "oneOf": [ + { + "type": "string", + "description": "`none` means the model will not call any tool and instead generates a message. `auto` means the model can pick between generating a message or calling one or more tools. `required` means the model must call one or more tools.\n", + "enum": ["none", "auto", "required"] + }, + { + "$ref": "#/$defs/ChatCompletionInputNamedToolChoice" + } + ], + "x-oaiExpandable": true, + "title": "ChatCompletionInputToolChoiceOption" + }, + "ChatCompletionInputNamedToolChoice": { + "type": "object", + "description": "Specifies a tool the model should use. Use to force the model to call a specific function.", + "properties": { + "type": { + "type": "string", + "enum": ["function"], + "description": "The type of the tool. Currently, only `function` is supported.", + "x-stainless-const": true + }, + "function": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The name of the function to call." + } + }, + "required": ["name"], + "title": "ChatCompletionInputNamedToolChoiceFunction" + } + }, + "required": ["type", "function"], + "title": "ChatCompletionInputNamedToolChoice" + }, + "ChatCompletionInputParallelToolCalls": { + "description": "Whether to enable [parallel function calling](/docs/guides/function-calling#configuring-parallel-function-calling) during tool use.", + "type": "boolean", + "default": true, + "title": "ChatCompletionInputParallelToolCalls" + }, + "ChatCompletionInputFunctionCallOption": { + "type": "object", + "description": "Specifying a particular function via `{\"name\": \"my_function\"}` forces the model to call that function.\n", + "properties": { + "name": { + "type": "string", + "description": "The name of the function to call." + } + }, + "required": ["name"], + "title": "ChatCompletionInputFunctionCallOption" + }, + "ChatCompletionInputFunctions": { + "type": "object", + "deprecated": true, + "properties": { + "description": { + "type": "string", + "description": "A description of what the function does, used by the model to choose when and how to call the function." + }, + "name": { + "type": "string", + "description": "The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64." + }, + "parameters": { + "$ref": "#/$defs/ChatCompletionInputFunctionParameters" + } + }, + "required": ["name"], + "title": "ChatCompletionInputFunctions" + } + } +} diff --git a/packages/tasks/src/tasks/chat-completion/spec-oai/output.json b/packages/tasks/src/tasks/chat-completion/spec-oai/output.json new file mode 100644 index 0000000000..eefa7c2434 --- /dev/null +++ b/packages/tasks/src/tasks/chat-completion/spec-oai/output.json @@ -0,0 +1,362 @@ +{ + "$id": "/inference/schemas/chat-completion/output.json", + "$schema": "http://json-schema.org/draft-06/schema#", + "description": "Chat Completion Output.\n\nAuto-generated from OAI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-oai-import.ts.", + "title": "ChatCompletionOutput", + "type": "object", + "required": ["choices", "created", "id", "model", "object"], + "properties": { + "id": { + "type": "string", + "description": "A unique identifier for the chat completion." + }, + "choices": { + "type": "array", + "description": "A list of chat completion choices. Can be more than one if `n` is greater than 1.", + "items": { + "type": "object", + "required": ["finish_reason", "index", "message", "logprobs"], + "properties": { + "finish_reason": { + "type": "string", + "description": "The reason the model stopped generating tokens. This will be `stop` if the model hit a natural stop point or a provided stop sequence,\n`length` if the maximum number of tokens specified in the request was reached,\n`content_filter` if content was omitted due to a flag from our content filters,\n`tool_calls` if the model called a tool, or `function_call` (deprecated) if the model called a function.\n", + "enum": ["stop", "length", "tool_calls", "content_filter", "function_call"] + }, + "index": { + "type": "integer", + "description": "The index of the choice in the list of choices." + }, + "message": { + "$ref": "#/$defs/ChatCompletionOutputResponseMessage" + }, + "logprobs": { + "description": "Log probability information for the choice.", + "type": "object", + "nullable": true, + "properties": { + "content": { + "description": "A list of message content tokens with log probability information.", + "type": "array", + "items": { + "$ref": "#/$defs/ChatCompletionOutputTokenLogprob" + }, + "nullable": true + }, + "refusal": { + "description": "A list of message refusal tokens with log probability information.", + "type": "array", + "items": { + "$ref": "#/$defs/ChatCompletionOutputTokenLogprob" + }, + "nullable": true + } + }, + "required": ["content", "refusal"], + "title": "ChatCompletionOutputLogprobs" + } + }, + "title": "ChatCompletionOutputChoicesItem" + } + }, + "created": { + "type": "integer", + "description": "The Unix timestamp (in seconds) of when the chat completion was created." + }, + "model": { + "type": "string", + "description": "The model used for the chat completion." + }, + "service_tier": { + "description": "The service tier used for processing the request.", + "type": "string", + "enum": ["scale", "default"], + "example": "scale", + "nullable": true + }, + "system_fingerprint": { + "type": "string", + "description": "This fingerprint represents the backend configuration that the model runs with.\n\nCan be used in conjunction with the `seed` request parameter to understand when backend changes have been made that might impact determinism.\n" + }, + "object": { + "type": "string", + "description": "The object type, which is always `chat.completion`.", + "enum": ["chat.completion"], + "x-stainless-const": true + }, + "usage": { + "$ref": "#/$defs/ChatCompletionOutputCompletionUsage" + } + }, + "$defs": { + "ChatCompletionOutputResponseMessage": { + "type": "object", + "description": "A chat completion message generated by the model.", + "properties": { + "content": { + "type": "string", + "description": "The contents of the message.", + "nullable": true + }, + "refusal": { + "type": "string", + "description": "The refusal message generated by the model.", + "nullable": true + }, + "tool_calls": { + "$ref": "#/$defs/ChatCompletionOutputMessageToolCalls" + }, + "annotations": { + "type": "array", + "description": "Annotations for the message, when applicable, as when using the\n[web search tool](/docs/guides/tools-web-search?api-mode=chat).\n", + "items": { + "type": "object", + "description": "A URL citation when using web search.\n", + "required": ["type", "url_citation"], + "properties": { + "type": { + "type": "string", + "description": "The type of the URL citation. Always `url_citation`.", + "enum": ["url_citation"], + "x-stainless-const": true + }, + "url_citation": { + "type": "object", + "description": "A URL citation when using web search.", + "required": ["end_index", "start_index", "url", "title"], + "properties": { + "end_index": { + "type": "integer", + "description": "The index of the last character of the URL citation in the message." + }, + "start_index": { + "type": "integer", + "description": "The index of the first character of the URL citation in the message." + }, + "url": { + "type": "string", + "description": "The URL of the web resource." + }, + "title": { + "type": "string", + "description": "The title of the web resource." + } + }, + "title": "ChatCompletionOutputResponseMessageUrlCitation" + } + }, + "title": "ChatCompletionOutputResponseMessageAnnotationsItem" + } + }, + "role": { + "type": "string", + "enum": ["assistant"], + "description": "The role of the author of this message.", + "x-stainless-const": true + }, + "function_call": { + "type": "object", + "deprecated": true, + "description": "Deprecated and replaced by `tool_calls`. The name and arguments of a function that should be called, as generated by the model.", + "properties": { + "arguments": { + "type": "string", + "description": "The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function." + }, + "name": { + "type": "string", + "description": "The name of the function to call." + } + }, + "required": ["name", "arguments"], + "title": "ChatCompletionOutputResponseMessageFunctionCall" + }, + "audio": { + "type": "object", + "nullable": true, + "description": "If the audio output modality is requested, this object contains data\nabout the audio response from the model. [Learn more](/docs/guides/audio).\n", + "x-oaiExpandable": true, + "required": ["id", "expires_at", "data", "transcript"], + "properties": { + "id": { + "type": "string", + "description": "Unique identifier for this audio response." + }, + "expires_at": { + "type": "integer", + "description": "The Unix timestamp (in seconds) for when this audio response will\nno longer be accessible on the server for use in multi-turn\nconversations.\n" + }, + "data": { + "type": "string", + "description": "Base64 encoded audio bytes generated by the model, in the format\nspecified in the request.\n" + }, + "transcript": { + "type": "string", + "description": "Transcript of the audio generated by the model." + } + }, + "title": "ChatCompletionOutputResponseMessageAudio" + } + }, + "required": ["role", "content", "refusal"], + "title": "ChatCompletionOutputResponseMessage" + }, + "ChatCompletionOutputMessageToolCalls": { + "type": "array", + "description": "The tool calls generated by the model, such as function calls.", + "items": { + "$ref": "#/$defs/ChatCompletionOutputMessageToolCall" + }, + "title": "ChatCompletionOutputMessageToolCalls" + }, + "ChatCompletionOutputMessageToolCall": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "The ID of the tool call." + }, + "type": { + "type": "string", + "enum": ["function"], + "description": "The type of the tool. Currently, only `function` is supported.", + "x-stainless-const": true + }, + "function": { + "type": "object", + "description": "The function that the model called.", + "properties": { + "name": { + "type": "string", + "description": "The name of the function to call." + }, + "arguments": { + "type": "string", + "description": "The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function." + } + }, + "required": ["name", "arguments"], + "title": "ChatCompletionOutputMessageToolCallFunction" + } + }, + "required": ["id", "type", "function"], + "title": "ChatCompletionOutputMessageToolCall" + }, + "ChatCompletionOutputTokenLogprob": { + "type": "object", + "properties": { + "token": { + "description": "The token.", + "type": "string" + }, + "logprob": { + "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value `-9999.0` is used to signify that the token is very unlikely.", + "type": "number" + }, + "bytes": { + "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be `null` if there is no bytes representation for the token.", + "type": "array", + "items": { + "type": "integer" + }, + "nullable": true + }, + "top_logprobs": { + "description": "List of the most likely tokens and their log probability, at this token position. In rare cases, there may be fewer than the number of requested `top_logprobs` returned.", + "type": "array", + "items": { + "type": "object", + "properties": { + "token": { + "description": "The token.", + "type": "string" + }, + "logprob": { + "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value `-9999.0` is used to signify that the token is very unlikely.", + "type": "number" + }, + "bytes": { + "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be `null` if there is no bytes representation for the token.", + "type": "array", + "items": { + "type": "integer" + }, + "nullable": true + } + }, + "required": ["token", "logprob", "bytes"], + "title": "ChatCompletionOutputTokenLogprobTopLogprobsItem" + } + } + }, + "required": ["token", "logprob", "bytes", "top_logprobs"], + "title": "ChatCompletionOutputTokenLogprob" + }, + "ChatCompletionOutputCompletionUsage": { + "type": "object", + "description": "Usage statistics for the completion request.", + "properties": { + "completion_tokens": { + "type": "integer", + "default": 0, + "description": "Number of tokens in the generated completion." + }, + "prompt_tokens": { + "type": "integer", + "default": 0, + "description": "Number of tokens in the prompt." + }, + "total_tokens": { + "type": "integer", + "default": 0, + "description": "Total number of tokens used in the request (prompt + completion)." + }, + "completion_tokens_details": { + "type": "object", + "description": "Breakdown of tokens used in a completion.", + "properties": { + "accepted_prediction_tokens": { + "type": "integer", + "default": 0, + "description": "When using Predicted Outputs, the number of tokens in the\nprediction that appeared in the completion.\n" + }, + "audio_tokens": { + "type": "integer", + "default": 0, + "description": "Audio input tokens generated by the model." + }, + "reasoning_tokens": { + "type": "integer", + "default": 0, + "description": "Tokens generated by the model for reasoning." + }, + "rejected_prediction_tokens": { + "type": "integer", + "default": 0, + "description": "When using Predicted Outputs, the number of tokens in the\nprediction that did not appear in the completion. However, like\nreasoning tokens, these tokens are still counted in the total\ncompletion tokens for purposes of billing, output, and context window\nlimits.\n" + } + }, + "title": "ChatCompletionOutputCompletionUsageCompletionTokensDetails" + }, + "prompt_tokens_details": { + "type": "object", + "description": "Breakdown of tokens used in the prompt.", + "properties": { + "audio_tokens": { + "type": "integer", + "default": 0, + "description": "Audio input tokens present in the prompt." + }, + "cached_tokens": { + "type": "integer", + "default": 0, + "description": "Cached tokens present in the prompt." + } + }, + "title": "ChatCompletionOutputCompletionUsagePromptTokensDetails" + } + }, + "required": ["prompt_tokens", "completion_tokens", "total_tokens"], + "title": "ChatCompletionOutputCompletionUsage" + } + } +} diff --git a/packages/tasks/src/tasks/chat-completion/spec-oai/stream_output.json b/packages/tasks/src/tasks/chat-completion/spec-oai/stream_output.json new file mode 100644 index 0000000000..6e423d534b --- /dev/null +++ b/packages/tasks/src/tasks/chat-completion/spec-oai/stream_output.json @@ -0,0 +1,290 @@ +{ + "$id": "/inference/schemas/chat-completion/stream_output.json", + "$schema": "http://json-schema.org/draft-06/schema#", + "description": "Chat Completion Stream Output.\n\nAuto-generated from OAI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-oai-import.ts.", + "title": "ChatCompletionStreamOutput", + "type": "object", + "required": ["choices", "created", "id", "model", "object"], + "properties": { + "id": { + "type": "string", + "description": "A unique identifier for the chat completion. Each chunk has the same ID." + }, + "choices": { + "type": "array", + "description": "A list of chat completion choices. Can contain more than one elements if `n` is greater than 1. Can also be empty for the\nlast chunk if you set `stream_options: {\"include_usage\": true}`.\n", + "items": { + "type": "object", + "required": ["delta", "finish_reason", "index"], + "properties": { + "delta": { + "$ref": "#/$defs/ChatCompletionStreamOutputStreamResponseDelta" + }, + "logprobs": { + "description": "Log probability information for the choice.", + "type": "object", + "nullable": true, + "properties": { + "content": { + "description": "A list of message content tokens with log probability information.", + "type": "array", + "items": { + "$ref": "#/$defs/ChatCompletionStreamOutputTokenLogprob" + }, + "nullable": true + }, + "refusal": { + "description": "A list of message refusal tokens with log probability information.", + "type": "array", + "items": { + "$ref": "#/$defs/ChatCompletionStreamOutputTokenLogprob" + }, + "nullable": true + } + }, + "required": ["content", "refusal"], + "title": "ChatCompletionStreamOutputLogprobs" + }, + "finish_reason": { + "type": "string", + "description": "The reason the model stopped generating tokens. This will be `stop` if the model hit a natural stop point or a provided stop sequence,\n`length` if the maximum number of tokens specified in the request was reached,\n`content_filter` if content was omitted due to a flag from our content filters,\n`tool_calls` if the model called a tool, or `function_call` (deprecated) if the model called a function.\n", + "enum": ["stop", "length", "tool_calls", "content_filter", "function_call"], + "nullable": true + }, + "index": { + "type": "integer", + "description": "The index of the choice in the list of choices." + } + }, + "title": "ChatCompletionStreamOutputChoicesItem" + } + }, + "created": { + "type": "integer", + "description": "The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has the same timestamp." + }, + "model": { + "type": "string", + "description": "The model to generate the completion." + }, + "service_tier": { + "description": "The service tier used for processing the request.", + "type": "string", + "enum": ["scale", "default"], + "example": "scale", + "nullable": true + }, + "system_fingerprint": { + "type": "string", + "description": "This fingerprint represents the backend configuration that the model runs with.\nCan be used in conjunction with the `seed` request parameter to understand when backend changes have been made that might impact determinism.\n" + }, + "object": { + "type": "string", + "description": "The object type, which is always `chat.completion.chunk`.", + "enum": ["chat.completion.chunk"], + "x-stainless-const": true + }, + "usage": { + "$ref": "#/$defs/ChatCompletionStreamOutputCompletionUsage", + "nullable": true, + "description": "An optional field that will only be present when you set\n`stream_options: {\"include_usage\": true}` in your request. When present, it\ncontains a null value **except for the last chunk** which contains the\ntoken usage statistics for the entire request.\n\n**NOTE:** If the stream is interrupted or cancelled, you may not\nreceive the final usage chunk which contains the total token usage for\nthe request.\n" + } + }, + "$defs": { + "ChatCompletionStreamOutputStreamResponseDelta": { + "type": "object", + "description": "A chat completion delta generated by streamed model responses.", + "properties": { + "content": { + "type": "string", + "description": "The contents of the chunk message.", + "nullable": true + }, + "function_call": { + "deprecated": true, + "type": "object", + "description": "Deprecated and replaced by `tool_calls`. The name and arguments of a function that should be called, as generated by the model.", + "properties": { + "arguments": { + "type": "string", + "description": "The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function." + }, + "name": { + "type": "string", + "description": "The name of the function to call." + } + }, + "title": "ChatCompletionStreamOutputStreamResponseDeltaFunctionCall" + }, + "tool_calls": { + "type": "array", + "items": { + "$ref": "#/$defs/ChatCompletionStreamOutputMessageToolCallChunk" + } + }, + "role": { + "type": "string", + "enum": ["developer", "system", "user", "assistant", "tool"], + "description": "The role of the author of this message." + }, + "refusal": { + "type": "string", + "description": "The refusal message generated by the model.", + "nullable": true + } + }, + "title": "ChatCompletionStreamOutputStreamResponseDelta" + }, + "ChatCompletionStreamOutputMessageToolCallChunk": { + "type": "object", + "properties": { + "index": { + "type": "integer" + }, + "id": { + "type": "string", + "description": "The ID of the tool call." + }, + "type": { + "type": "string", + "enum": ["function"], + "description": "The type of the tool. Currently, only `function` is supported.", + "x-stainless-const": true + }, + "function": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The name of the function to call." + }, + "arguments": { + "type": "string", + "description": "The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function." + } + }, + "title": "ChatCompletionStreamOutputMessageToolCallChunkFunction" + } + }, + "required": ["index"], + "title": "ChatCompletionStreamOutputMessageToolCallChunk" + }, + "ChatCompletionStreamOutputTokenLogprob": { + "type": "object", + "properties": { + "token": { + "description": "The token.", + "type": "string" + }, + "logprob": { + "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value `-9999.0` is used to signify that the token is very unlikely.", + "type": "number" + }, + "bytes": { + "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be `null` if there is no bytes representation for the token.", + "type": "array", + "items": { + "type": "integer" + }, + "nullable": true + }, + "top_logprobs": { + "description": "List of the most likely tokens and their log probability, at this token position. In rare cases, there may be fewer than the number of requested `top_logprobs` returned.", + "type": "array", + "items": { + "type": "object", + "properties": { + "token": { + "description": "The token.", + "type": "string" + }, + "logprob": { + "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value `-9999.0` is used to signify that the token is very unlikely.", + "type": "number" + }, + "bytes": { + "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be `null` if there is no bytes representation for the token.", + "type": "array", + "items": { + "type": "integer" + }, + "nullable": true + } + }, + "required": ["token", "logprob", "bytes"], + "title": "ChatCompletionStreamOutputTokenLogprobTopLogprobsItem" + } + } + }, + "required": ["token", "logprob", "bytes", "top_logprobs"], + "title": "ChatCompletionStreamOutputTokenLogprob" + }, + "ChatCompletionStreamOutputCompletionUsage": { + "type": "object", + "description": "Usage statistics for the completion request.", + "properties": { + "completion_tokens": { + "type": "integer", + "default": 0, + "description": "Number of tokens in the generated completion." + }, + "prompt_tokens": { + "type": "integer", + "default": 0, + "description": "Number of tokens in the prompt." + }, + "total_tokens": { + "type": "integer", + "default": 0, + "description": "Total number of tokens used in the request (prompt + completion)." + }, + "completion_tokens_details": { + "type": "object", + "description": "Breakdown of tokens used in a completion.", + "properties": { + "accepted_prediction_tokens": { + "type": "integer", + "default": 0, + "description": "When using Predicted Outputs, the number of tokens in the\nprediction that appeared in the completion.\n" + }, + "audio_tokens": { + "type": "integer", + "default": 0, + "description": "Audio input tokens generated by the model." + }, + "reasoning_tokens": { + "type": "integer", + "default": 0, + "description": "Tokens generated by the model for reasoning." + }, + "rejected_prediction_tokens": { + "type": "integer", + "default": 0, + "description": "When using Predicted Outputs, the number of tokens in the\nprediction that did not appear in the completion. However, like\nreasoning tokens, these tokens are still counted in the total\ncompletion tokens for purposes of billing, output, and context window\nlimits.\n" + } + }, + "title": "ChatCompletionStreamOutputCompletionUsageCompletionTokensDetails" + }, + "prompt_tokens_details": { + "type": "object", + "description": "Breakdown of tokens used in the prompt.", + "properties": { + "audio_tokens": { + "type": "integer", + "default": 0, + "description": "Audio input tokens present in the prompt." + }, + "cached_tokens": { + "type": "integer", + "default": 0, + "description": "Cached tokens present in the prompt." + } + }, + "title": "ChatCompletionStreamOutputCompletionUsagePromptTokensDetails" + } + }, + "required": ["prompt_tokens", "completion_tokens", "total_tokens"], + "title": "ChatCompletionStreamOutputCompletionUsage" + } + } +}