OpenAI API Compatibility
CompactifAI API provides OpenAI-compatible endpoints that allow you to use existing applications and libraries that work with OpenAI’s API with minimal changes. This compatibility layer makes it easy to quickly evaluate CompactifAI models, switching from other providers with just a few code changes.
Getting Started
Section titled “Getting Started”- Replace OpenAI API key with your CompactifAI key
- Update base URL to your CompactifAI endpoint
- Use CompactifAI model names (e.g., “cai-llama-3-1-8b-slim”)
from openai import OpenAI
# Initialize the client with your CompactifAI API endpointclient = OpenAI( api_key="your-compactifai-api-key", # CompactifAI API key base_url="https://your-compactifai-api-endpoint/v1" # Replace with your endpoint)
# Chat completionschat_completion = client.chat.completions.create( model="cai-llama-3-1-8b-slim", # Use any one of the available CompactifAI model messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello, what can you tell me about CompactifAI?"} ], temperature=0.7, max_tokens=256)print(chat_completion.choices[0].message.content)
# Streaming chat completionsstream = client.chat.completions.create( model="cai-llama-3-1-8b-slim", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Tell me about artificial intelligence"} ], temperature=0.7, max_tokens=256, stream=True)
for chunk in stream: if chunk.choices[0].delta.content is not None: print(chunk.choices[0].delta.content, end="")
# Audio transcriptions (JSON only)with open("meeting_minutes.mp3", "rb") as audio_file: transcription = client.audio.transcriptions.create( model="whisper-large-v3", file=audio_file, language="en", temperature=0 )print(transcription.text)Compatible Endpoints
Section titled “Compatible Endpoints”| Endpoint | Description |
|---|---|
/v1/chat/completions | Chat conversations |
/v1/completions | Text completions |
/v1/responses | Responses API (models with supports_responses: true only) |
/v1/audio/transcriptions | Speech-to-text transcription (non-streaming JSON) |
/v1/models | List available models |
Request Examples
Section titled “Request Examples”Chat Completions
Section titled “Chat Completions”{ "model": "cai-llama-3-1-8b-slim", "messages": [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello, I'm Camillo"}, {"role": "user", "content": "What is my name? What is the capital of Colombia?"} ], "temperature": 0.7, "max_tokens": 128, "stop": ["###"], "n": 1, "user": "user-123"}Chat Completions with Streaming
Section titled “Chat Completions with Streaming”{ "model": "cai-llama-3-1-8b-slim", "messages": [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Tell me about artificial intelligence"} ], "temperature": 0.7, "max_tokens": 128, "stream": true, "user": "user-123"}Chat Completions with Image Understanding
Section titled “Chat Completions with Image Understanding”{ "model": "mistral-small-3-1", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "What is in this image?" }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/86/170586-120-7E23E561/Taj-Mahal-Agra-India.jpg" } } ] } ], "temperature": 0.7, "max_tokens": 128, "stream": true, "user": "user-123"}Chat Completions with Audio Input
Section titled “Chat Completions with Audio Input”For models that advertise supports_audio: true in GET /models, you can send audio in chat using an input_audio content part.
{ "model": "nemotron-3-nano-omni", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "What is this audio file about?" }, { "type": "input_audio", "input_audio": { "data": "BASE64_AUDIO_HERE", "format": "wav" } } ] } ], "max_tokens": 256}Text Completions
Section titled “Text Completions”{ "model": "cai-llama-3-1-8b-slim", "prompt": "What is the capital of France?", "temperature": 0.7, "max_tokens": 128, "stop": ["###"], "user": "user-123"}Responses
Section titled “Responses”{ "model": "hypernova-60b", "input": "Say hello in one short sentence.", "store": false, "temperature": 0.7, "max_output_tokens": 128, "text": { "format": { "type": "text" } }, "reasoning": { "effort": null, "summary": null }}Responses with Streaming
Section titled “Responses with Streaming”{ "model": "hypernova-60b", "input": "Tell me about artificial intelligence.", "temperature": 0.7, "max_output_tokens": 256, "stream": true}Audio Transcriptions (Python SDK)
Section titled “Audio Transcriptions (Python SDK)”from openai import OpenAI
client = OpenAI( api_key="your-compactifai-api-key", base_url="https://your-compactifai-api-endpoint/v1")
with open("meeting_minutes.mp3", "rb") as audio_file: transcription = client.audio.transcriptions.create( model="whisper-large-v3", file=audio_file, language="en", temperature=0 )
print(transcription.text)Response Format
Section titled “Response Format”Responses are structured to be compatible with OpenAI’s format:
Chat Completion Response
Section titled “Chat Completion Response”{ "id": "6a172d30ce8e4f34b4b830f8347c3911", "created":1749600000, "model": "hypernova-60b", "object": "chat.completion", "choices": [ { "index": 0, "message": { "role": "assistant", "content": "Hello Camillo. It's nice to meet you.\n\nYour name is Camillo.\n\nThe capital of Colombia is Bogotá." }, "finish_reason": "stop" } ], "usage": { "prompt_tokens": 25, "completion_tokens": 35, "total_tokens": 60 }}Completion Response
Section titled “Completion Response”{ "id": "1861edc39ce648e3862a0b6ae9b7687b", "object": "text_completion", "created":1749600000, "model": "cai-llama-3-1-8b-slim", "choices": [ { "text": "The capital of France is Paris.", "index": 0, "finish_reason": "stop" } ], "usage": { "prompt_tokens": 5, "completion_tokens": 7, "total_tokens": 12 }}Responses
Section titled “Responses”{ "id": "resp_123", "object": "response", "created_at": 1749600000, "model": "hypernova-60b", "status": "completed", "output": [ { "type": "message", "role": "assistant", "content": [ { "type": "output_text", "text": "Hello! How can I help you today?" } ] } ], "usage": { "input_tokens": 12, "output_tokens": 10, "total_tokens": 22 }, "completed_at": 1749600005}Supported Endpoints
Section titled “Supported Endpoints”The CompactifAI OpenAI compatibility layer supports the following endpoints:
Chat Completions
Section titled “Chat Completions”Request Fields
Section titled “Request Fields”| Field | Support Status |
|---|---|
| model | Full (use CompactifAI model names) |
| messages | Full (system, user, assistant roles) |
| max_tokens | Full |
| min_tokens | Full (CompactifAI extension) |
| temperature | Full |
| top_p | Full |
| n | Partial (must be 1) |
| stream | Full |
| stop | Full |
| user | Full |
| presence_penalty | Full |
| frequency_penalty | Full |
| logit_bias | Ignored |
| logprobs | Ignored |
| top_logprobs | Ignored |
| seed | Ignored |
| tools | Full |
| tool_choice | Full (required, auto, none, or a specific tool function) |
| function_call | Ignored (deprecated) |
| function | Ignored (deprecated) |
| parallel_tool_calls | Ignored |
| response_format | Ignored |
| max_completion_tokens | Ignored |
| data_sources | Ignored (Azure-specific) |
Response Fields
Section titled “Response Fields”| Field | Support Status |
|---|---|
| id | Full |
| created | Full |
| model | Full |
| object | Full |
| choices | Full |
| usage | Full |
For more details, see our API reference or have a look at the OpenAI chat completions documentation.
Text Completions
Section titled “Text Completions”Request Fields
Section titled “Request Fields”| Field | Support Status |
|---|---|
| model | Full (use CompactifAI model names) |
| prompt | Full |
| max_tokens | Full |
| min_tokens | Full (CompactifAI extension) |
| temperature | Full |
| top_p | Full |
| stop | Full |
| user | Full |
| best_of | Ignored |
| echo | Ignored |
| logit_bias | Ignored |
| logprobs | Ignored |
| seed | Ignored |
| suffix | Ignored |
Response Fields
Section titled “Response Fields”| Field | Support Status |
|---|---|
| id | Full |
| created | Full |
| model | Full |
| object | Full |
| choices | Full |
| usage | Full |
For more details, see our API reference or have a look at the OpenAI text completions documentation.
Responses
Section titled “Responses”Request Fields
Section titled “Request Fields”| Field | Support Status |
|---|---|
| model | Partial (must be a configured model with supports_responses: true in GET /v1/models) |
| input | Full (required) |
| store | Full (default false) |
| file_id | Full (optional; gateway resolves file via configured file store and sends file to the backend) |
| temperature | Full |
| max_output_tokens | Full |
| stream | Full |
Response Fields
Section titled “Response Fields”| Field | Support Status |
|---|---|
| id | Full |
| object | Full |
| created_at | Full |
| model | Full |
| status | Full |
| output | Full |
| usage | Full |
| completed_at | Full |
Key Differences from OpenAI API
Section titled “Key Differences from OpenAI API”- Authentication: Uses CompactifAI’s authentication
- Models: Different model names (e.g., “cai-llama-3-1-8b-slim” instead of “gpt-4”)
- Endpoint fields: Some OpenAI-specific fields not supported
- Responses:
POST /v1/responsesis supported for eligible models.
SDK Compatibility
Section titled “SDK Compatibility”Tested with:
For other SDKs and libraries that are built to work with OpenAI’s API, you should be able to use them by changing the base URL to point to your CompactifAI API endpoint.
SDK Examples
Section titled “SDK Examples”from openai import OpenAI
client = OpenAI( api_key='your-compactifai-api-key', base_url='https://your-compactifai-api-endpoint/v1',)
def main(): # Regular completion completion = client.chat.completions.create( model='cai-llama-3-1-8b-slim', messages=[ {'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Tell me about CompactifAI.'} ], temperature=0.7, max_tokens=256 )
print(completion.choices[0].message.content)
# Streaming completion stream = client.chat.completions.create( model='cai-llama-3-1-8b-slim', messages=[ {'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Tell me about artificial intelligence.'} ], temperature=0.7, max_tokens=256, stream=True )
for chunk in stream: if chunk.choices[0].delta.content is not None: print(chunk.choices[0].delta.content, end="")
if __name__ == "__main__": main()import OpenAI from 'openai';
const client = new OpenAI({apiKey: 'your-compactifai-api-key',baseURL: 'https://your-compactifai-api-endpoint/v1',});
async function main() {// Regular completionconst completion = await client.chat.completions.create({ model: 'cai-llama-3-1-8b-slim', messages: [ { role: 'system', content: 'You are a helpful assistant.' }, { role: 'user', content: 'Tell me about CompactifAI.' } ], temperature: 0.7, max_tokens: 256});
console.log(completion.choices[0].message.content);
// Streaming completionconst stream = await client.chat.completions.create({ model: 'cai-llama-3-1-8b-slim', messages: [ { role: 'system', content: 'You are a helpful assistant.' }, { role: 'user', content: 'Tell me about artificial intelligence.' } ], temperature: 0.7, max_tokens: 256, stream: true});
for await (const chunk of stream) { if (chunk.choices[0]?.delta?.content) { process.stdout.write(chunk.choices[0].delta.content); }}}
main();Error Handling
Section titled “Error Handling”CompactifAI API maintains consistent error formats with the OpenAI API. However, the detailed error messages may differ. We recommend using the error messages primarily for logging and debugging purposes.