{"openapi":"3.1.0","info":{"title":"API Reference","version":"1.0.0"},"paths":{"/v1/audio/speech":{"post":{"operationId":"speech","summary":"Create Speech","description":"Synthesize speech audio from text or SSML. Returns the complete audio\nfile plus billing and speech-mark metadata in a single JSON response.\nFor low-latency playback or long-form text, use POST /v1/audio/stream.","tags":["subpackage_audio"],"parameters":[{"name":"Authorization","in":"header","description":"Enter your API key with the `Bearer` prefix, e.g. 'Bearer sk_...'.","required":true,"schema":{"type":"string"}},{"name":"Speechify-Version","in":"header","required":false,"schema":{"type":"string"}}],"responses":{"200":{"description":"Synthesized speech audio for the requested input.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GetSpeechResponse"}}}},"400":{"description":"The request was malformed or failed validation. The response\nbody is the standard `Error` envelope; for validation\nfailures `error.fields` enumerates the offending fields as\na `path -> message` map (code = `validation_failed`).\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"401":{"description":"Authentication is missing or invalid. The request did not\ncarry a recognised credential (Firebase ID token, API key, or\nworker JWT).\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"402":{"description":"The workspace has insufficient credits, or the request needs a\nplan tier the workspace is not on (e.g. voice cloning). Distinct\nfrom `Forbidden` so SDK consumers can drive upgrade UX.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403":{"description":"The credential authenticated, but is not authorised for this\nresource - typically a workspace-role gate (owner / admin\nrequired) or a cross-tenant access attempt.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404":{"description":"The referenced resource does not exist or is not visible to\nthe caller's workspace.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"429":{"description":"Rate limit or concurrency limit exceeded. `error.code` distinguishes\nrequest-rate limiting (`rate_limited`) from concurrency exhaustion\n(`concurrency_limit_reached`). Carries `Retry-After` and the\nrequest-rate budget headers; a concurrency-exhaustion 429 also carries\n`RateLimit-Remaining-Calls: 0`.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500":{"description":"An unexpected server-side error occurred. Safe to retry with\nexponential backoff for idempotent requests.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"502":{"description":"An upstream dependency (the TTS composer or voice-metadata\nservice) returned a 5xx. The raw upstream detail is not\nforwarded - the cause is in the server log; the response is a\nfixed `upstream_failure` envelope. Safe to retry.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503":{"description":"A downstream dependency is degraded or the endpoint is\nintentionally disabled (e.g. phone-number purchase before\nops setup).\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}},"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/GetSpeechRequest"}}}}}},"/v1/audio/stream":{"post":{"operationId":"stream","summary":"Stream Speech","description":"Synthesize speech and stream the audio back as it is generated, for\nlow-latency playback. The Accept header selects the audio container;\nthe response is raw audio bytes (HTTP chunked). For Base64-encoded\naudio with speech-mark metadata in a single JSON response, use\nPOST /v1/audio/speech.","tags":["subpackage_audio"],"parameters":[{"name":"Authorization","in":"header","description":"Enter your API key with the `Bearer` prefix, e.g. 'Bearer sk_...'.","required":true,"schema":{"type":"string"}},{"name":"Speechify-Version","in":"header","required":false,"schema":{"type":"string"}},{"name":"Accept","in":"header","description":"Selects the audio container/codec for the streamed response. The\nresponse Content-Type echoes this value, except `audio/pcm` returns\n`audio/L16` with rate and channels parameters (raw 16-bit linear\nPCM, 24 kHz mono, little-endian).","required":true,"schema":{"$ref":"#/components/schemas/V1AudioStreamPostParametersAccept"}}],"responses":{"200":{"description":"Streamed audio. The Content-Type matches the Accept header except\nfor `audio/pcm`, which returns `audio/L16` with rate and channels\nparameters (see the Accept header description).","content":{"application/octet-stream":{"schema":{"type":"string","format":"binary"}}}},"400":{"description":"The request was malformed or failed validation. The response\nbody is the standard `Error` envelope; for validation\nfailures `error.fields` enumerates the offending fields as\na `path -> message` map (code = `validation_failed`).\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"401":{"description":"Authentication is missing or invalid. The request did not\ncarry a recognised credential (Firebase ID token, API key, or\nworker JWT).\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"402":{"description":"The workspace has insufficient credits, or the request needs a\nplan tier the workspace is not on (e.g. voice cloning). Distinct\nfrom `Forbidden` so SDK consumers can drive upgrade UX.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403":{"description":"The credential authenticated, but is not authorised for this\nresource - typically a workspace-role gate (owner / admin\nrequired) or a cross-tenant access attempt.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404":{"description":"The referenced resource does not exist or is not visible to\nthe caller's workspace.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"429":{"description":"Rate limit or concurrency limit exceeded. `error.code` distinguishes\nrequest-rate limiting (`rate_limited`) from concurrency exhaustion\n(`concurrency_limit_reached`). Carries `Retry-After` and the\nrequest-rate budget headers; a concurrency-exhaustion 429 also carries\n`RateLimit-Remaining-Calls: 0`.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500":{"description":"An unexpected server-side error occurred. Safe to retry with\nexponential backoff for idempotent requests.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"502":{"description":"An upstream dependency (the TTS composer or voice-metadata\nservice) returned a 5xx. The raw upstream detail is not\nforwarded - the cause is in the server log; the response is a\nfixed `upstream_failure` envelope. Safe to retry.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503":{"description":"A downstream dependency is degraded or the endpoint is\nintentionally disabled (e.g. phone-number purchase before\nops setup).\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}},"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/GetStreamRequest"}}}}}},"/v1/voices":{"get":{"operationId":"list","summary":"List Voices","description":"Lists the voices available to the caller - the shared voice\ncatalog plus the workspace's personal cloned voices. By default\nthe full catalogue is returned in one response. Pagination is\nopt-in: pass `limit` (and then `cursor` from the previous\nresponse) to page through the list while `has_more` is true. Max\npage size is 200.","tags":["subpackage_voices"],"parameters":[{"name":"cursor","in":"query","description":"Opaque pagination cursor from a previous response.","required":false,"schema":{"type":"string"}},{"name":"limit","in":"query","description":"Max items per page (default 50, max 200).","required":false,"schema":{"type":"integer","default":50}},{"name":"Authorization","in":"header","description":"Enter your API key with the `Bearer` prefix, e.g. 'Bearer sk_...'.","required":true,"schema":{"type":"string"}},{"name":"Speechify-Version","in":"header","required":false,"schema":{"type":"string"}}],"responses":{"200":{"description":"The voice catalogue (or a page of it when `limit` is set).","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListVoicesResponse"}}}},"400":{"description":"The request was malformed or failed validation. The response\nbody is the standard `Error` envelope; for validation\nfailures `error.fields` enumerates the offending fields as\na `path -> message` map (code = `validation_failed`).\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"401":{"description":"Authentication is missing or invalid. The request did not\ncarry a recognised credential (Firebase ID token, API key, or\nworker JWT).\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403":{"description":"The credential authenticated, but is not authorised for this\nresource - typically a workspace-role gate (owner / admin\nrequired) or a cross-tenant access attempt.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"429":{"description":"Rate limit or concurrency limit exceeded. `error.code` distinguishes\nrequest-rate limiting (`rate_limited`) from concurrency exhaustion\n(`concurrency_limit_reached`). Carries `Retry-After` and the\nrequest-rate budget headers; a concurrency-exhaustion 429 also carries\n`RateLimit-Remaining-Calls: 0`.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500":{"description":"An unexpected server-side error occurred. Safe to retry with\nexponential backoff for idempotent requests.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}},"post":{"operationId":"create","summary":"Create Voice","description":"Create a personal (cloned) voice for the user","tags":["subpackage_voices"],"parameters":[{"name":"Authorization","in":"header","description":"Enter your API key with the `Bearer` prefix, e.g. 'Bearer sk_...'.","required":true,"schema":{"type":"string"}},{"name":"Speechify-Version","in":"header","required":false,"schema":{"type":"string"}}],"responses":{"201":{"description":"A created voice","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GetVoice"}}}},"400":{"description":"The request was malformed or failed validation. The response\nbody is the standard `Error` envelope; for validation\nfailures `error.fields` enumerates the offending fields as\na `path -> message` map (code = `validation_failed`).\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"401":{"description":"Authentication is missing or invalid. The request did not\ncarry a recognised credential (Firebase ID token, API key, or\nworker JWT).\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"402":{"description":"The workspace has insufficient credits, or the request needs a\nplan tier the workspace is not on (e.g. voice cloning). Distinct\nfrom `Forbidden` so SDK consumers can drive upgrade UX.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403":{"description":"The credential authenticated, but is not authorised for this\nresource - typically a workspace-role gate (owner / admin\nrequired) or a cross-tenant access attempt.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"422":{"description":"The request was well-formed but semantically rejected -\ntypically a referential integrity violation (e.g. flow node\nreferences an audio asset in another workspace) or a state\nmachine refusal.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"429":{"description":"Rate limit or concurrency limit exceeded. `error.code` distinguishes\nrequest-rate limiting (`rate_limited`) from concurrency exhaustion\n(`concurrency_limit_reached`). Carries `Retry-After` and the\nrequest-rate budget headers; a concurrency-exhaustion 429 also carries\n`RateLimit-Remaining-Calls: 0`.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500":{"description":"An unexpected server-side error occurred. Safe to retry with\nexponential backoff for idempotent requests.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"502":{"description":"An upstream dependency (the TTS composer or voice-metadata\nservice) returned a 5xx. The raw upstream detail is not\nforwarded - the cause is in the server log; the response is a\nfixed `upstream_failure` envelope. Safe to retry.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503":{"description":"A downstream dependency is degraded or the endpoint is\nintentionally disabled (e.g. phone-number purchase before\nops setup).\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}},"requestBody":{"content":{"multipart/form-data":{"schema":{"type":"object","properties":{"name":{"type":"string","description":"Name of the personal voice"},"locale":{"type":"string","default":"en-US","description":"Native language (locale) of the personal voice (e.g. en-US, es-ES, etc.)"},"gender":{"$ref":"#/components/schemas/V1VoicesPostRequestBodyContentMultipartFormDataSchemaGender","description":"Gender marker for the personal voice\nmale GenderMale\nfemale GenderFemale\nnot_specified GenderNotSpecified"},"sample":{"type":"string","format":"binary","description":"Audio sample file"},"avatar":{"type":"string","format":"binary","description":"Avatar image file"},"consent":{"type":"string","description":"A **string** representing the user consent information in JSON format\nThis should include the fullName and email of the consenting individual.\nFor example, `{\"fullName\": \"John Doe\", \"email\": \"john@example.com\"}`"}},"required":["name","gender","sample","consent"]}}}}}},"/v1/voices/{voice_id}":{"get":{"operationId":"get","summary":"Get Voice","description":"Fetch a single voice by id - a shared catalogue voice or one of\nthe caller's own personal (cloned) voices. A personal voice that\nbelongs to another workspace returns 404, identical to an\nunknown id, so voice inventory is never enumerable across tenants.","tags":["subpackage_voices"],"parameters":[{"name":"voice_id","in":"path","description":"The ID of the voice to fetch","required":true,"schema":{"type":"string"}},{"name":"Authorization","in":"header","description":"Enter your API key with the `Bearer` prefix, e.g. 'Bearer sk_...'.","required":true,"schema":{"type":"string"}},{"name":"Speechify-Version","in":"header","required":false,"schema":{"type":"string"}}],"responses":{"200":{"description":"The voice.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GetVoice"}}}},"400":{"description":"The request was malformed or failed validation. The response\nbody is the standard `Error` envelope; for validation\nfailures `error.fields` enumerates the offending fields as\na `path -> message` map (code = `validation_failed`).\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"401":{"description":"Authentication is missing or invalid. The request did not\ncarry a recognised credential (Firebase ID token, API key, or\nworker JWT).\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404":{"description":"The referenced resource does not exist or is not visible to\nthe caller's workspace.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"429":{"description":"Rate limit or concurrency limit exceeded. `error.code` distinguishes\nrequest-rate limiting (`rate_limited`) from concurrency exhaustion\n(`concurrency_limit_reached`). Carries `Retry-After` and the\nrequest-rate budget headers; a concurrency-exhaustion 429 also carries\n`RateLimit-Remaining-Calls: 0`.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500":{"description":"An unexpected server-side error occurred. Safe to retry with\nexponential backoff for idempotent requests.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"502":{"description":"An upstream dependency (the TTS composer or voice-metadata\nservice) returned a 5xx. The raw upstream detail is not\nforwarded - the cause is in the server log; the response is a\nfixed `upstream_failure` envelope. Safe to retry.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503":{"description":"A downstream dependency is degraded or the endpoint is\nintentionally disabled (e.g. phone-number purchase before\nops setup).\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}},"delete":{"operationId":"delete","summary":"Delete Voice","description":"Delete a personal (cloned) voice","tags":["subpackage_voices"],"parameters":[{"name":"voice_id","in":"path","description":"The ID of the voice to delete","required":true,"schema":{"type":"string"}},{"name":"Authorization","in":"header","description":"Enter your API key with the `Bearer` prefix, e.g. 'Bearer sk_...'.","required":true,"schema":{"type":"string"}},{"name":"Speechify-Version","in":"header","required":false,"schema":{"type":"string"}}],"responses":{"204":{"description":"Voice deleted successfully","content":{"application/json":{"schema":{"type":"object","properties":{}}}}},"400":{"description":"The request was malformed or failed validation. The response\nbody is the standard `Error` envelope; for validation\nfailures `error.fields` enumerates the offending fields as\na `path -> message` map (code = `validation_failed`).\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"401":{"description":"Authentication is missing or invalid. The request did not\ncarry a recognised credential (Firebase ID token, API key, or\nworker JWT).\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403":{"description":"The credential authenticated, but is not authorised for this\nresource - typically a workspace-role gate (owner / admin\nrequired) or a cross-tenant access attempt.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404":{"description":"The referenced resource does not exist or is not visible to\nthe caller's workspace.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"429":{"description":"Rate limit or concurrency limit exceeded. `error.code` distinguishes\nrequest-rate limiting (`rate_limited`) from concurrency exhaustion\n(`concurrency_limit_reached`). Carries `Retry-After` and the\nrequest-rate budget headers; a concurrency-exhaustion 429 also carries\n`RateLimit-Remaining-Calls: 0`.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500":{"description":"An unexpected server-side error occurred. Safe to retry with\nexponential backoff for idempotent requests.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"502":{"description":"An upstream dependency (the TTS composer or voice-metadata\nservice) returned a 5xx. The raw upstream detail is not\nforwarded - the cause is in the server log; the response is a\nfixed `upstream_failure` envelope. Safe to retry.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503":{"description":"A downstream dependency is degraded or the endpoint is\nintentionally disabled (e.g. phone-number purchase before\nops setup).\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}}},"/v1/voices/{voice_id}/sample":{"get":{"operationId":"download-sample","summary":"Download Voice Sample","description":"Download a personal (cloned) voice sample","tags":["subpackage_voices"],"parameters":[{"name":"voice_id","in":"path","description":"The ID of the voice to download sample for","required":true,"schema":{"type":"string"}},{"name":"Authorization","in":"header","description":"Enter your API key with the `Bearer` prefix, e.g. 'Bearer sk_...'.","required":true,"schema":{"type":"string"}},{"name":"Speechify-Version","in":"header","required":false,"schema":{"type":"string"}}],"responses":{"200":{"description":"Voice sample audio file","content":{"application/octet-stream":{"schema":{"type":"string","format":"binary"}}}},"400":{"description":"The request was malformed or failed validation. The response\nbody is the standard `Error` envelope; for validation\nfailures `error.fields` enumerates the offending fields as\na `path -> message` map (code = `validation_failed`).\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"401":{"description":"Authentication is missing or invalid. The request did not\ncarry a recognised credential (Firebase ID token, API key, or\nworker JWT).\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403":{"description":"The credential authenticated, but is not authorised for this\nresource - typically a workspace-role gate (owner / admin\nrequired) or a cross-tenant access attempt.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404":{"description":"The referenced resource does not exist or is not visible to\nthe caller's workspace.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"429":{"description":"Rate limit or concurrency limit exceeded. `error.code` distinguishes\nrequest-rate limiting (`rate_limited`) from concurrency exhaustion\n(`concurrency_limit_reached`). Carries `Retry-After` and the\nrequest-rate budget headers; a concurrency-exhaustion 429 also carries\n`RateLimit-Remaining-Calls: 0`.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500":{"description":"An unexpected server-side error occurred. Safe to retry with\nexponential backoff for idempotent requests.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"502":{"description":"An upstream dependency (the TTS composer or voice-metadata\nservice) returned a 5xx. The raw upstream detail is not\nforwarded - the cause is in the server log; the response is a\nfixed `upstream_failure` envelope. Safe to retry.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503":{"description":"A downstream dependency is degraded or the endpoint is\nintentionally disabled (e.g. phone-number purchase before\nops setup).\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}}}},"servers":[{"url":"https://api.speechify.ai","description":"https://api.speechify.ai"}],"components":{"schemas":{"GetSpeechRequestAudioFormat":{"type":"string","enum":["wav","mp3","ogg","aac","pcm"],"default":"wav","description":"The format for the output audio. Note, that the current default is \"wav\", but there's no guarantee it will not change in the future. We recommend always passing the specific param you expect.","title":"GetSpeechRequestAudioFormat"},"GetSpeechRequestModel":{"type":"string","enum":["simba-english","simba-multilingual","simba-3.0"],"default":"simba-english","description":"Model used for audio synthesis. `simba-english` is optimized for English, `simba-multilingual` for non-English or mixed input. `simba-3.0` is the streaming-native model with lower TTFB and richer expressivity. Currently English only; multilingual coming soon. Non-English voices return 400 until multilingual support ships.","title":"GetSpeechRequestModel"},"GetSpeechOptionsRequest":{"type":"object","properties":{"loudness_normalization":{"type":"boolean","default":false,"description":"Determines whether to normalize the audio loudness to a standard level.\nWhen enabled, loudness normalization aligns the audio output to the following standards:\nIntegrated loudness: -14 LUFS\nTrue peak: -2 dBTP\nLoudness range: 7 LU\nIf disabled, the audio loudness will match the original loudness of the selected voice, which may vary significantly and be either too quiet or too loud.\nEnabling loudness normalization can increase latency due to additional processing required for audio level adjustments."},"text_normalization":{"type":"boolean","default":true,"description":"Determines whether to normalize the text. If enabled, it will transform numbers, dates, etc. into words. For example, \"55\" is normalized into \"fifty five\".\nThis can increase latency due to additional processing required for text normalization."}},"description":"GetSpeechOptionsRequest is the wrapper for request parameters to the client","title":"GetSpeechOptionsRequest"},"GetSpeechRequest":{"type":"object","properties":{"audio_format":{"$ref":"#/components/schemas/GetSpeechRequestAudioFormat","default":"wav","description":"The format for the output audio. Note, that the current default is \"wav\", but there's no guarantee it will not change in the future. We recommend always passing the specific param you expect."},"input":{"type":"string","description":"Plain text or SSML to be synthesized to speech.\nRefer to https://docs.speechify.ai/docs/api-limits for the input size limits.\nEmotion, Pitch and Speed Rate are configured in the ssml input, please refer to the ssml documentation for more information: https://docs.speechify.ai/docs/ssml#prosody"},"language":{"type":"string","description":"Language of the input. Follow the format of an ISO 639-1 language code and an ISO 3166-1 region code, separated by a hyphen, e.g. en-US.\nPlease refer to the list of the supported languages and recommendations regarding this parameter: https://docs.speechify.ai/docs/language-support."},"model":{"$ref":"#/components/schemas/GetSpeechRequestModel","default":"simba-english","description":"Model used for audio synthesis. `simba-english` is optimized for English, `simba-multilingual` for non-English or mixed input. `simba-3.0` is the streaming-native model with lower TTFB and richer expressivity. Currently English only; multilingual coming soon. Non-English voices return 400 until multilingual support ships."},"options":{"$ref":"#/components/schemas/GetSpeechOptionsRequest"},"voice_id":{"type":"string","description":"Id of the voice to be used for synthesizing speech. Refer to /v1/voices endpoint for available voices"}},"required":["input","voice_id"],"description":"Request body for POST /v1/audio/speech.","title":"GetSpeechRequest"},"GetSpeechResponseAudioFormat":{"type":"string","enum":["wav","mp3","ogg","aac","pcm"],"description":"The format of the audio data","title":"GetSpeechResponseAudioFormat"},"NestedChunk":{"type":"object","properties":{"end":{"type":"integer","format":"int64"},"end_time":{"type":"number","format":"double"},"start":{"type":"integer","format":"int64"},"start_time":{"type":"number","format":"double"},"type":{"type":"string"},"value":{"type":"string"}},"description":"It details the type of segment, its start and end points in the text, and its start and end times in the synthesized speech audio.","title":"NestedChunk"},"SpeechMarks":{"type":"object","properties":{"chunks":{"type":"array","items":{"$ref":"#/components/schemas/NestedChunk"},"description":"Array of NestedChunk, each providing detailed segment information within the synthesized speech."},"end":{"type":"integer","format":"int64"},"end_time":{"type":"number","format":"double"},"start":{"type":"integer","format":"int64"},"start_time":{"type":"number","format":"double"},"type":{"type":"string"},"value":{"type":"string"}},"required":["chunks","end","end_time","start","start_time","type"],"description":"It is used to annotate the audio data with metadata about the synthesis process, like word timing or phoneme details.","title":"SpeechMarks"},"GetSpeechResponse":{"type":"object","properties":{"audio_data":{"type":"string","format":"byte","description":"Synthesized speech audio, Base64-encoded"},"audio_format":{"$ref":"#/components/schemas/GetSpeechResponseAudioFormat","description":"The format of the audio data"},"billable_characters_count":{"type":"integer","format":"int64","description":"The number of billable characters processed in the request."},"speech_marks":{"$ref":"#/components/schemas/SpeechMarks"}},"required":["audio_data","audio_format","billable_characters_count","speech_marks"],"title":"GetSpeechResponse"},"ErrorCode":{"type":"string","enum":["bad_request","validation_failed","unauthorized","payment_required","forbidden","not_found","method_not_allowed","conflict","idempotency_conflict","payload_too_large","unsupported_media_type","rate_limited","concurrency_limit_reached","invalid_api_version","internal_error","upstream_failure","service_unavailable","caller_not_found","credential_not_found","credential_in_use","agent_not_found","kb_not_found","kb_document_not_found","kb_folder_not_found","tool_not_found","conversation_not_found","phone_number_not_found","sip_trunk_not_found","voice_not_found","audio_asset_not_found","builtin_not_found","batch_not_found","agent_test_not_found","workspace_not_found","invite_not_found","insufficient_scope","purchased_numbers_not_included","phone_number_quota_reached","batch_calls_not_included","voice_cloning_not_included","workspace_last_owner","workspace_last_workspace","invite_email_mismatch","invite_already_pending","tool_config_shared"],"description":"Stable machine-readable error code. Additive only: codes are\nnever renamed, only deprecated. SDKs may map each code to a\ntyped exception class. Status-code semantics:\n4xx codes describe caller-fixable issues; 5xx codes describe\nserver-side failures and are safe to retry with backoff for\nidempotent requests.\n","title":"ErrorCode"},"ErrorDetail":{"type":"object","properties":{"code":{"$ref":"#/components/schemas/ErrorCode"},"message":{"type":"string","description":"Human-readable explanation of this specific occurrence.\nSafe to surface in UI banners or pass to support. The\nwording can change between releases; clients should\nmatch on `code`, not on the message string.\n"},"fields":{"type":"object","additionalProperties":{"type":"string"},"description":"Per-field validation errors as `path -> message`. Only\npresent on 400 responses caused by request validation\n(typically code=`validation_failed`). Keys are field\npaths in dotted/bracket notation; values are short\nhuman explanations safe to inline-surface next to the\noffending form field.\n"}},"required":["code","message"],"title":"ErrorDetail"},"Error":{"type":"object","properties":{"error":{"$ref":"#/components/schemas/ErrorDetail"},"request_id":{"type":"string","description":"Server-side request identifier. Echoes the\n`X-Request-ID` response header. Stable across the\nrequest's lifetime, written to structured logs, and\nuseful when reporting issues.\n"}},"required":["error"],"description":"Standard error envelope returned on every non-2xx response.\nContent-Type is `application/json`. The shape mirrors OpenAI /\nAnthropic / Stripe style: a machine-readable `error.code` for\nSDK consumers to switch on, a human `error.message` for UI,\nand an optional `error.fields` map for per-field validation\nerrors. `request_id` matches the `X-Request-ID` response\nheader and is what customers quote when filing support\ntickets.\n","title":"Error"},"V1AudioStreamPostParametersAccept":{"type":"string","enum":["audio/mpeg","audio/ogg","audio/aac","audio/pcm"],"title":"V1AudioStreamPostParametersAccept"},"GetStreamRequestModel":{"type":"string","enum":["simba-english","simba-multilingual","simba-3.0"],"default":"simba-english","description":"Model used for audio synthesis. `simba-english` is optimized for English, `simba-multilingual` for non-English or mixed input. `simba-3.0` is the streaming-native model with lower TTFB and richer expressivity. Currently English only; multilingual coming soon. Non-English voices return 400 until multilingual support ships.","title":"GetStreamRequestModel"},"GetStreamOptionsRequest":{"type":"object","properties":{"loudness_normalization":{"type":"boolean","default":false,"description":"Determines whether to normalize the audio loudness to a standard level.\nWhen enabled, loudness normalization aligns the audio output to the following standards:\nIntegrated loudness: -14 LUFS\nTrue peak: -2 dBTP\nLoudness range: 7 LU\nIf disabled, the audio loudness will match the original loudness of the selected voice, which may vary significantly and be either too quiet or too loud.\nEnabling loudness normalization can increase latency due to additional processing required for audio level adjustments."},"text_normalization":{"type":"boolean","default":false,"description":"Determines whether to normalize the text. If enabled, it will transform numbers, dates, etc. into words. For example, \"55\" is normalized into \"fifty five\".\nThis can increase latency due to additional processing required for text normalization."}},"description":"GetStreamOptionsRequest is the wrapper for request parameters to the client","title":"GetStreamOptionsRequest"},"GetStreamRequest":{"type":"object","properties":{"input":{"type":"string","description":"Plain text or SSML to be synthesized to speech.\nRefer to https://docs.speechify.ai/docs/api-limits for the input size limits.\nEmotion, Pitch and Speed Rate are configured in the ssml input, please refer to the ssml documentation for more information: https://docs.speechify.ai/docs/ssml#prosody"},"language":{"type":"string","description":"Language of the input. Follow the format of an ISO 639-1 language code and an ISO 3166-1 region code, separated by a hyphen, e.g. en-US.\nPlease refer to the list of the supported languages and recommendations regarding this parameter: https://docs.speechify.ai/docs/language-support."},"model":{"$ref":"#/components/schemas/GetStreamRequestModel","default":"simba-english","description":"Model used for audio synthesis. `simba-english` is optimized for English, `simba-multilingual` for non-English or mixed input. `simba-3.0` is the streaming-native model with lower TTFB and richer expressivity. Currently English only; multilingual coming soon. Non-English voices return 400 until multilingual support ships."},"options":{"$ref":"#/components/schemas/GetStreamOptionsRequest"},"voice_id":{"type":"string","description":"Id of the voice to be used for synthesizing speech. Refer to /v1/voices endpoint for available voices"}},"required":["input","voice_id"],"description":"GetStreamRequest is the wrapper for request parameters to the client","title":"GetStreamRequest"},"GetVoiceGender":{"type":"string","enum":["male","female","not_specified"],"title":"GetVoiceGender"},"GetVoiceLanguage":{"type":"object","properties":{"locale":{"type":"string"},"preview_audio":{"type":["string","null"]}},"required":["locale"],"title":"GetVoiceLanguage"},"GetVoicesModelName":{"type":"string","enum":["simba-english","simba-multilingual","simba-3.0"],"title":"GetVoicesModelName"},"GetVoicesModel":{"type":"object","properties":{"languages":{"type":"array","items":{"$ref":"#/components/schemas/GetVoiceLanguage"}},"name":{"$ref":"#/components/schemas/GetVoicesModelName"}},"required":["languages","name"],"title":"GetVoicesModel"},"GetVoiceType":{"type":"string","enum":["shared","personal"],"title":"GetVoiceType"},"GetVoice":{"type":"object","properties":{"avatar_image":{"type":["string","null"]},"display_name":{"type":"string"},"gender":{"$ref":"#/components/schemas/GetVoiceGender"},"locale":{"type":"string"},"id":{"type":"string"},"models":{"type":"array","items":{"$ref":"#/components/schemas/GetVoicesModel"}},"preview_audio":{"type":["string","null"]},"tags":{"type":["array","null"],"items":{"type":"string"}},"type":{"$ref":"#/components/schemas/GetVoiceType"}},"required":["display_name","gender","locale","id","models","type"],"title":"GetVoice"},"ListVoicesResponse":{"type":"object","properties":{"next_cursor":{"type":["string","null"],"description":"Opaque keyset cursor for the next page. Pass back as the\n`cursor` request parameter. `null` when the caller has\nreached the end of the list (`has_more` is also `false`\nin that case).\n"},"has_more":{"type":"boolean","description":"True when more rows exist beyond this page."},"voices":{"type":"array","items":{"$ref":"#/components/schemas/GetVoice"}}},"required":["next_cursor","has_more","voices"],"description":"Payload for `GET /v1/voices` - the shared voice catalog plus the\nworkspace's personal cloned voices. Carries the shared pagination\nmetadata so the list can be paged when `limit` is supplied;\n`next_cursor` is null and `has_more` is false when the full\ncatalogue is returned in one response.\n","title":"ListVoicesResponse"},"V1VoicesPostRequestBodyContentMultipartFormDataSchemaGender":{"type":"string","enum":["male","female","not_specified"],"description":"Gender marker for the personal voice\nmale GenderMale\nfemale GenderFemale\nnot_specified GenderNotSpecified","title":"V1VoicesPostRequestBodyContentMultipartFormDataSchemaGender"}},"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","description":"Enter your API key with the `Bearer` prefix, e.g. 'Bearer sk_...'."}}}}