{"version":1,"pages":[{"id":"kzTlst3tKo255yz4YpDi","title":"Quickstart","pathname":"/docs","siteSpaceId":"sitesp_mNBL6","emoji":"1f44b","description":"Get started with Infron","breadcrumbs":[{"label":"Document"},{"label":"Overview"}]},{"id":"HYwJ9ECRWrwWZf9Iv4cb","title":"Text","pathname":"/docs/overview/quickstart/text","siteSpaceId":"sitesp_mNBL6","description":"Text Generation Quickstart","breadcrumbs":[{"label":"Document"},{"label":"Overview"},{"label":"Quickstart","emoji":"1f44b"}]},{"id":"1bAdbFWqJy8GTNfKwF8p","title":"Image","pathname":"/docs/overview/quickstart/image","siteSpaceId":"sitesp_mNBL6","description":"Image Generation Quickstart","breadcrumbs":[{"label":"Document"},{"label":"Overview"},{"label":"Quickstart","emoji":"1f44b"}]},{"id":"CqjyssSCfkdus5B6pIcG","title":"Video","pathname":"/docs/overview/quickstart/video","siteSpaceId":"sitesp_mNBL6","description":"Video Generation Quickstart","breadcrumbs":[{"label":"Document"},{"label":"Overview"},{"label":"Quickstart","emoji":"1f44b"}]},{"id":"cvdJ3cyZw0OlUuJzh4CZ","title":"Audio","pathname":"/docs/overview/quickstart/audio","siteSpaceId":"sitesp_mNBL6","description":"Audio Generation Quickstart","breadcrumbs":[{"label":"Document"},{"label":"Overview"},{"label":"Quickstart","emoji":"1f44b"}]},{"id":"1fuCFLPtcMUjVcHlNnFR","title":"Search","pathname":"/docs/overview/quickstart/search","siteSpaceId":"sitesp_mNBL6","description":"Search, Deepsearch & Extract Quickstart","breadcrumbs":[{"label":"Document"},{"label":"Overview"},{"label":"Quickstart","emoji":"1f44b"}]},{"id":"dPJ3lH8ZZVPUTMqlzPTQ","title":"Embedding","pathname":"/docs/overview/quickstart/embedding","siteSpaceId":"sitesp_mNBL6","description":"Embedding & Reranker Quickstart","breadcrumbs":[{"label":"Document"},{"label":"Overview"},{"label":"Quickstart","emoji":"1f44b"}]},{"id":"sQF9BTt8eG2vZIQoEGf5","title":"Batch","pathname":"/docs/overview/quickstart/batch","siteSpaceId":"sitesp_mNBL6","description":"Batch API Quickstart","breadcrumbs":[{"label":"Document"},{"label":"Overview"},{"label":"Quickstart","emoji":"1f44b"}]},{"id":"JjjojIyKxaiBPzzLwvtg","title":"Platform Overview","pathname":"/docs/overview/introduction","siteSpaceId":"sitesp_mNBL6","icon":"globe-pointer","description":"Infron - The world’s first AI Model Marketplace and Inference Provider Routing Platform","breadcrumbs":[{"label":"Document"},{"label":"Overview"}]},{"id":"CyH2xJQs9yWJ1S8BYNav","title":"FAQ","pathname":"/docs/overview/faq","siteSpaceId":"sitesp_mNBL6","icon":"question","description":"Common questions about Infron AI.","breadcrumbs":[{"label":"Document"},{"label":"Overview"}]},{"id":"QdB1wA7WedVMpWG6omBA","title":"Pricing and Fee Structure","pathname":"/docs/overview/pricing-and-fee-structure","siteSpaceId":"sitesp_mNBL6","icon":"money-bills","description":"Understanding Infron’s usage-based pricing model.","breadcrumbs":[{"label":"Document"},{"label":"Overview"}]},{"id":"u0Iopmf5CGGFEpLtjuv7","title":"Inference Provider Routing","pathname":"/docs/routing-and-gateway/inference-provider-routing","siteSpaceId":"sitesp_mNBL6","description":"Route requests to the best inference provider","breadcrumbs":[{"label":"Document"},{"label":"Routing & Gateway"}]},{"id":"YrlOtdZA8cyZueuTNgvi","title":"BYOK","pathname":"/docs/routing-and-gateway/byok","siteSpaceId":"sitesp_mNBL6","description":"Bring your own provider API keys","breadcrumbs":[{"label":"Document"},{"label":"Routing & Gateway"}]},{"id":"ovd49vuay9tPTdFM99P4","title":"Available Providers","pathname":"/docs/routing-and-gateway/available-providers","siteSpaceId":"sitesp_mNBL6","breadcrumbs":[{"label":"Document"},{"label":"Routing & Gateway"}]},{"id":"4eNL0x13BrMsW0Vboy82","title":"Zero Completion Insurance","pathname":"/docs/features/zero-completion-insurance","siteSpaceId":"sitesp_mNBL6","description":"Infron will not charge you for zero token responses","breadcrumbs":[{"label":"Document"},{"label":"Features"}]},{"id":"RUTZ7Tpz2l0JZbm7saJ6","title":"Zero Data Retention","pathname":"/docs/features/zero-data-retention","siteSpaceId":"sitesp_mNBL6","description":"How Infron gives you control over your data","breadcrumbs":[{"label":"Document"},{"label":"Features"}]},{"id":"oa0XskrCEgbVlYaOnehE","title":"Structured Outputs","pathname":"/docs/features/structured-outputs","siteSpaceId":"sitesp_mNBL6","description":"Return structured data from your models.","breadcrumbs":[{"label":"Document"},{"label":"Features"}]},{"id":"DSQqnsAqJPnIRyPgQJ1S","title":"Tool Calling","pathname":"/docs/features/tool-calling","siteSpaceId":"sitesp_mNBL6","description":"Tool & Function Calling - Use tools in your prompts","breadcrumbs":[{"label":"Document"},{"label":"Features"}]},{"id":"vgnIccAmDw8Hw0R3rrvO","title":"Prompt Caching","pathname":"/docs/features/prompt-caching","siteSpaceId":"sitesp_mNBL6","description":"Prompt Cache in Infron","breadcrumbs":[{"label":"Document"},{"label":"Features"}]},{"id":"y0YvqPBbQfJAtZH7dOwx","title":"Multimodal Input","pathname":"/docs/features/multimodal-input","siteSpaceId":"sitesp_mNBL6","description":"Send images, PDFs, and audio to Infron AI models","breadcrumbs":[{"label":"Document"},{"label":"Features"}]},{"id":"7HnlXUCn7VMKMgLe46k4","title":"Images Inputs","pathname":"/docs/features/multimodal-input/images-inputs","siteSpaceId":"sitesp_mNBL6","icon":"file-png","description":"How to send images and PDFs to Infron AI","breadcrumbs":[{"label":"Document"},{"label":"Features"},{"label":"Multimodal Input"}]},{"id":"RdO3RjBkwUeCxg8pHHlU","title":"PDF Inputs","pathname":"/docs/features/multimodal-input/pdf-inputs","siteSpaceId":"sitesp_mNBL6","icon":"file-pdf","description":"How to send PDFs to Infron AI models","breadcrumbs":[{"label":"Document"},{"label":"Features"},{"label":"Multimodal Input"}]},{"id":"xZQrElU3hviWGtfvprg1","title":"Audio Inputs","pathname":"/docs/features/multimodal-input/audio-inputs","siteSpaceId":"sitesp_mNBL6","icon":"file-mp4","description":"How to send audio files to Infron AI models","breadcrumbs":[{"label":"Document"},{"label":"Features"},{"label":"Multimodal Input"}]},{"id":"H8VykHjvfr12XJlhK1K3","title":"Video Inputs","pathname":"/docs/features/multimodal-input/video-inputs","siteSpaceId":"sitesp_mNBL6","icon":"file-mov","breadcrumbs":[{"label":"Document"},{"label":"Features"},{"label":"Multimodal Input"}]},{"id":"sUQPi1dTBpGU8D2HXpel","title":"Reasoning & Thinking","pathname":"/docs/features/reasoning-and-thinking","siteSpaceId":"sitesp_mNBL6","breadcrumbs":[{"label":"Document"},{"label":"Features"}]},{"id":"bmej4MFjy0sYtLIbgPwO","title":"1M Token Long Context Window","pathname":"/docs/features/1m-token-long-context-window","siteSpaceId":"sitesp_mNBL6","breadcrumbs":[{"label":"Document"},{"label":"Features"}]},{"id":"2mqKCdMAi9BxJ4DUHfaN","title":"Web Search","pathname":"/docs/features/web-search","siteSpaceId":"sitesp_mNBL6","description":"This document explains how to use the Web Search feature on the Infron platform.","breadcrumbs":[{"label":"Document"},{"label":"Features"}]},{"id":"mAM4r3u0zqPkXpWicLOM","title":"Overview","pathname":"/docs/frameworks-and-integrations/overview","siteSpaceId":"sitesp_mNBL6","icon":"cubes","description":"Using Infron AI with Popular Frameworks and Integrations","breadcrumbs":[{"label":"Document"},{"label":"Frameworks and Integrations"}]},{"id":"zsHFyc2tVslxqegr3rEr","title":"OpenAI SDK","pathname":"/docs/frameworks-and-integrations/openai-sdk","siteSpaceId":"sitesp_mNBL6","icon":"cubes","description":"Using Infron AI with OpenAI SDK","breadcrumbs":[{"label":"Document"},{"label":"Frameworks and Integrations"}]},{"id":"X1TD9y6h3JRJrBgZuwts","title":"LangChain","pathname":"/docs/frameworks-and-integrations/langchain","siteSpaceId":"sitesp_mNBL6","icon":"cubes","description":"Using Infron AI with LangChain","breadcrumbs":[{"label":"Document"},{"label":"Frameworks and Integrations"}]},{"id":"KEx8k6qVzfBWw63XxRJl","title":"PydanticAI","pathname":"/docs/frameworks-and-integrations/pydanticai","siteSpaceId":"sitesp_mNBL6","icon":"cubes","description":"Using Infron AI with PydanticAI","breadcrumbs":[{"label":"Document"},{"label":"Frameworks and Integrations"}]},{"id":"Q7ePhANlFH33fyTeO1Lb","title":"Langfuse","pathname":"/docs/frameworks-and-integrations/langfuse","siteSpaceId":"sitesp_mNBL6","icon":"cubes","description":"Using Infron AI with Langfuse","breadcrumbs":[{"label":"Document"},{"label":"Frameworks and Integrations"}]},{"id":"D4UFkja3ip2PuWASwwB1","title":"n8n","pathname":"/docs/frameworks-and-integrations/n8n","siteSpaceId":"sitesp_mNBL6","icon":"cubes","description":"Build AI automations with Infron AI & n8n","breadcrumbs":[{"label":"Document"},{"label":"Frameworks and Integrations"}]},{"id":"sS8UsILwwl0D4nc7Dly8","title":"Claude Code Integration Guide","pathname":"/docs/frameworks-and-integrations/claude-code-integration-guide","siteSpaceId":"sitesp_mNBL6","icon":"cubes","description":"Use Claude Agent SDK and Claude Code with Infron AI models","breadcrumbs":[{"label":"Document"},{"label":"Frameworks and Integrations"}]},{"id":"N3uSC9lHaCdJI0fhO5Fj","title":"Anthropic SDK Compatibility","pathname":"/docs/frameworks-and-integrations/anthropic-sdk-compatibility","siteSpaceId":"sitesp_mNBL6","icon":"cubes","description":"Use Anthropic SDK with Infron AI models","breadcrumbs":[{"label":"Document"},{"label":"Frameworks and Integrations"}]},{"id":"H8OjBu3hTQRHTbjjnu4m","title":"OpenAI Codex CLI","pathname":"/docs/frameworks-and-integrations/openai-codex-cli","siteSpaceId":"sitesp_mNBL6","icon":"cubes","description":"Use Codex CLI with Infron AI models","breadcrumbs":[{"label":"Document"},{"label":"Frameworks and Integrations"}]},{"id":"rGnhtuIrRpRAJoDAv7tG","title":"OpenAI Agents SDK","pathname":"/docs/frameworks-and-integrations/openai-agents-sdk","siteSpaceId":"sitesp_mNBL6","icon":"cubes","description":"Use OpenAI Agents SDK with Infron AI models","breadcrumbs":[{"label":"Document"},{"label":"Frameworks and Integrations"}]},{"id":"IBYdvWzDNtKGSwPutHUh","title":"LiteLLM","pathname":"/docs/frameworks-and-integrations/litellm","siteSpaceId":"sitesp_mNBL6","icon":"cubes","description":"Integration with LiteLLM's OpenAI-Compatible Endpoints with Infron AI","breadcrumbs":[{"label":"Document"},{"label":"Frameworks and Integrations"}]},{"id":"27nSvAkrcuR18FQNRsh7","title":"OpenCode","pathname":"/docs/frameworks-and-integrations/opencode","siteSpaceId":"sitesp_mNBL6","icon":"cubes","description":"Kickstart OpenCode with Infron","breadcrumbs":[{"label":"Document"},{"label":"Frameworks and Integrations"}]},{"id":"B57edXnSL6Odkq9aK79k","title":"OpenClaw","pathname":"/docs/frameworks-and-integrations/openclaw","siteSpaceId":"sitesp_mNBL6","icon":"cubes","description":"Guide to Using OpenClaw with Infron","breadcrumbs":[{"label":"Document"},{"label":"Frameworks and Integrations"}]},{"id":"Ak4XCBigCcoxSD8ngyJx","title":"OpenWork","pathname":"/docs/frameworks-and-integrations/openwork","siteSpaceId":"sitesp_mNBL6","icon":"cubes","breadcrumbs":[{"label":"Document"},{"label":"Frameworks and Integrations"}]},{"id":"lQsze0lHMucEH736BBBj","title":"Hermes Agent","pathname":"/docs/frameworks-and-integrations/hermes-agent","siteSpaceId":"sitesp_mNBL6","icon":"cubes","breadcrumbs":[{"label":"Document"},{"label":"Frameworks and Integrations"}]},{"id":"7xfYJnf8Verps374Om8E","title":"Billing Tracking","pathname":"/docs/observability/billing-tracking","siteSpaceId":"sitesp_mNBL6","description":"Get cost details and usage details in every call","breadcrumbs":[{"label":"Document"},{"label":"Observability"}]},{"id":"SCdqZWvfsOAhFvF1LvoJ","title":"Billing Logs","pathname":"/docs/observability/billing-logs","siteSpaceId":"sitesp_mNBL6","breadcrumbs":[{"label":"Document"},{"label":"Observability"}]},{"id":"RaoDJYyxvLZ5mANTYJRU","title":"Latency","pathname":"/docs/observability/latency","siteSpaceId":"sitesp_mNBL6","description":"Understanding Infron's performance characteristics.","breadcrumbs":[{"label":"Document"},{"label":"Observability"}]},{"id":"YjRoRGUejYjkAdLm2Gbu","title":"Performance Analysis","pathname":"/docs/observability/performance-analysis","siteSpaceId":"sitesp_mNBL6","description":"Performance Analysis","breadcrumbs":[{"label":"Document"},{"label":"Observability"}]},{"id":"riqsMherkNg2Vlyt0xJk","title":"Test Token Cache Rate","pathname":"/docs/test-cases/test-token-cache-rate","siteSpaceId":"sitesp_mNBL6","description":"Infron AI Token cache Rate A/B Testing Guide.","breadcrumbs":[{"label":"Document"},{"label":"Test Cases"}]},{"id":"ZJZvbTCKWPBegzaHo6FY","title":"Performance Stress Testing","pathname":"/docs/test-cases/performance-stress-testing","siteSpaceId":"sitesp_mNBL6","description":"Infron AI Performance Stress Testing Guide.","breadcrumbs":[{"label":"Document"},{"label":"Test Cases"}]},{"id":"QwHFLVleY7QejI9V4gc3","title":"Privacy and Logging","pathname":"/docs/support/privacy-logging","siteSpaceId":"sitesp_mNBL6","description":"Making sure your data is safe","breadcrumbs":[{"label":"Document"},{"label":"Support"}]},{"id":"OjU2jy2A9TNGktHfwsBL","title":"Contact Us","pathname":"/docs/support/contact-us","siteSpaceId":"sitesp_mNBL6","breadcrumbs":[{"label":"Document"},{"label":"Support"}]},{"id":"vLq2J9ZA2tCNYMMawT6n","title":"Join Community","pathname":"/docs/support/join-community","siteSpaceId":"sitesp_mNBL6","breadcrumbs":[{"label":"Document"},{"label":"Support"}]},{"id":"CRzLvmTC3tIDo8l4g4os","title":"Streaming","pathname":"/docs/llm-apis","siteSpaceId":"sitesp_RfF1T","description":"The Infron API allows streaming responses from any model. This is useful for building chat interfaces or other applications where the UI should update as the model generates the response.","breadcrumbs":[{"label":"LLM APIs"},{"label":"API Guides"}]},{"id":"8QyGsTvRgf1PVdALzk5u","title":"Authentication","pathname":"/docs/llm-apis/api-guides/authentication","siteSpaceId":"sitesp_RfF1T","description":"API Authentication","breadcrumbs":[{"label":"LLM APIs"},{"label":"API Guides"}]},{"id":"1QXbSpA0CPUi22uZpmxN","title":"Errors code","pathname":"/docs/llm-apis/api-guides/errors-code","siteSpaceId":"sitesp_RfF1T","description":"API Errors","breadcrumbs":[{"label":"LLM APIs"},{"label":"API Guides"}]},{"id":"7FvWQMF0kTK7HGhlQfmo","title":"Overview","pathname":"/docs/llm-apis/openai-compatible-api/overview","siteSpaceId":"sitesp_RfF1T","icon":"bolt","breadcrumbs":[{"label":"LLM APIs"},{"label":" OpenAI-compatible API"}]},{"id":"0a43ea1e692970b6bf2ae03375fe97fe3b57ee67","title":"Create a chat completion","pathname":"/docs/llm-apis/openai-compatible-api/create-a-chat-completion","siteSpaceId":"sitesp_RfF1T","description":"","breadcrumbs":[{"label":"LLM APIs"},{"label":" OpenAI-compatible API"}]},{"id":"09d34fc3767afd88b61e835be6fe12baf0f19f75","title":"Chat with Images Inputs","pathname":"/docs/llm-apis/openai-compatible-api/chat-with-images-inputs","siteSpaceId":"sitesp_RfF1T","description":"","breadcrumbs":[{"label":"LLM APIs"},{"label":" OpenAI-compatible API"}]},{"id":"2bc7c1470b6ecdc2ac920dae884c6df401f6a3a7","title":"Chat with PDF Inputs","pathname":"/docs/llm-apis/openai-compatible-api/chat-with-pdf-inputs","siteSpaceId":"sitesp_RfF1T","description":"","breadcrumbs":[{"label":"LLM APIs"},{"label":" OpenAI-compatible API"}]},{"id":"d07681bc06f2c56a36808e330c36f5392d4f21a1","title":"Chat with Tool Calling","pathname":"/docs/llm-apis/openai-compatible-api/chat-with-tool-calling","siteSpaceId":"sitesp_RfF1T","description":"","breadcrumbs":[{"label":"LLM APIs"},{"label":" OpenAI-compatible API"}]},{"id":"d137fb89bccbcb7583a31403eef45f0f9a88b522","title":"Chat with Structured Outputs","pathname":"/docs/llm-apis/openai-compatible-api/chat-with-structured-outputs","siteSpaceId":"sitesp_RfF1T","description":"","breadcrumbs":[{"label":"LLM APIs"},{"label":" OpenAI-compatible API"}]},{"id":"1084ac685cac76bb5c3c1820a8ec0566aaf463d0","title":"Reasoning configuration","pathname":"/docs/llm-apis/openai-compatible-api/reasoning-configuration","siteSpaceId":"sitesp_RfF1T","description":"","breadcrumbs":[{"label":"LLM APIs"},{"label":" OpenAI-compatible API"}]},{"id":"ab20f06a0a1a4ce491a90fec581973769f3822a0","title":"Explicit Caching","pathname":"/docs/llm-apis/openai-compatible-api/explicit-caching","siteSpaceId":"sitesp_RfF1T","description":"","breadcrumbs":[{"label":"LLM APIs"},{"label":" OpenAI-compatible API"}]},{"id":"f9e540f007dc9f040efbb656af496f2ff30a7bc7","title":"Chat with OpenAI Compatible Web Search","pathname":"/docs/llm-apis/openai-compatible-api/chat-with-openai-compatible-web-search","siteSpaceId":"sitesp_RfF1T","description":"","breadcrumbs":[{"label":"LLM APIs"},{"label":" OpenAI-compatible API"}]},{"id":"a9edac5ab7b0e712d8728460d9b612b8a142a445","title":"Chat with OpenAI Compatible Web Fetch","pathname":"/docs/llm-apis/openai-compatible-api/chat-with-openai-compatible-web-fetch","siteSpaceId":"sitesp_RfF1T","description":"","breadcrumbs":[{"label":"LLM APIs"},{"label":" OpenAI-compatible API"}]},{"id":"i73g4LZQanoLj7XtSO18","title":"Overview","pathname":"/docs/llm-apis/anthropic-compatible-api/overview","siteSpaceId":"sitesp_RfF1T","icon":"bolt","breadcrumbs":[{"label":"LLM APIs"},{"label":"Anthropic-compatible API"}]},{"id":"e6eceb65f704010f8ffffb6df5928fab174fb012","title":"Create a message","pathname":"/docs/llm-apis/anthropic-compatible-api/create-a-message","siteSpaceId":"sitesp_RfF1T","description":"","breadcrumbs":[{"label":"LLM APIs"},{"label":"Anthropic-compatible API"}]},{"id":"ec8e014496edcaed86967401c5a05f6b7b6e2849","title":"Chat with Tool Calling","pathname":"/docs/llm-apis/anthropic-compatible-api/chat-with-tool-calling","siteSpaceId":"sitesp_RfF1T","description":"","breadcrumbs":[{"label":"LLM APIs"},{"label":"Anthropic-compatible API"}]},{"id":"e16e09e2926debe5847164bd703f9321873da5ba","title":"Reasoning configuration","pathname":"/docs/llm-apis/anthropic-compatible-api/reasoning-configuration","siteSpaceId":"sitesp_RfF1T","description":"","breadcrumbs":[{"label":"LLM APIs"},{"label":"Anthropic-compatible API"}]},{"id":"d03e549f3a67cff56b6171c875157dd190ba9ed2","title":"Chat with Web Search","pathname":"/docs/llm-apis/anthropic-compatible-api/chat-with-web-search","siteSpaceId":"sitesp_RfF1T","description":"","breadcrumbs":[{"label":"LLM APIs"},{"label":"Anthropic-compatible API"}]},{"id":"97d2803f828172151a0fafdd48b961d1e180be18","title":"Chat with File Attachments","pathname":"/docs/llm-apis/anthropic-compatible-api/chat-with-file-attachments","siteSpaceId":"sitesp_RfF1T","description":"","breadcrumbs":[{"label":"LLM APIs"},{"label":"Anthropic-compatible API"}]},{"id":"H0kdeblgutMLwjDlVvVA","title":"Overview","pathname":"/docs/llm-apis/openresponses-api/overview","siteSpaceId":"sitesp_RfF1T","icon":"bolt","breadcrumbs":[{"label":"LLM APIs"},{"label":"OpenResponses API"}]},{"id":"4fbfa179ed99171de8c862184d1e970275c6398d","title":"Create a response","pathname":"/docs/llm-apis/openresponses-api/create-a-response","siteSpaceId":"sitesp_RfF1T","description":"","breadcrumbs":[{"label":"LLM APIs"},{"label":"OpenResponses API"}]},{"id":"1fd6e2925bb19573ee9b71e3a8cf0cc860f66b79","title":"Chat with Tool Calling","pathname":"/docs/llm-apis/openresponses-api/chat-with-tool-calling","siteSpaceId":"sitesp_RfF1T","description":"","breadcrumbs":[{"label":"LLM APIs"},{"label":"OpenResponses API"}]},{"id":"WcZZWbCMK2jH8M5EeuKL","title":"Overview","pathname":"/docs/llm-apis/embeddings-api/overview","siteSpaceId":"sitesp_RfF1T","icon":"bolt","description":"Generate vector embeddings from text","breadcrumbs":[{"label":"LLM APIs"},{"label":"Embeddings API"}]},{"id":"fb20749227baedeb5f7a97cda8a01f4a1f0e657d","title":"Submit an embedding request","pathname":"/docs/llm-apis/embeddings-api/submit-an-embedding-request","siteSpaceId":"sitesp_RfF1T","description":"","breadcrumbs":[{"label":"LLM APIs"},{"label":"Embeddings API"}]},{"id":"e4o0J4bf8BvhY41O1muW","title":"Overview","pathname":"/docs/llm-apis/rerank-api/overview","siteSpaceId":"sitesp_RfF1T","icon":"bolt","description":"Generate ranks of relevancy between the query and documents","breadcrumbs":[{"label":"LLM APIs"},{"label":"Rerank API"}]},{"id":"a7316fba9e431bf1ae6b56261f18112c98cdc855","title":"Submit an ranking request","pathname":"/docs/llm-apis/rerank-api/submit-an-ranking-request","siteSpaceId":"sitesp_RfF1T","description":"","breadcrumbs":[{"label":"LLM APIs"},{"label":"Rerank API"}]},{"id":"hEv7BxWPNxJTlt7hk7iB","title":"Overview","pathname":"/docs/media-apis","siteSpaceId":"sitesp_RfKkp","description":"Infron is a unified AI platform that provides access to 500+ state-of-the-art models for image generation, video creation, audio synthesis, and more.","breadcrumbs":[{"label":"Media APIs"},{"label":"API GUIDES"}]},{"id":"UwADHoicsh5VqKzixbsD","title":"Overview","pathname":"/docs/search-apis","siteSpaceId":"sitesp_RfK8A","icon":"bolt","description":"Infron Web Search Model & Agent Integration Overview","breadcrumbs":[{"label":"Search APIs"}]},{"id":"a73470229748261847ea26eca7672716548c9ee2","title":"Tavily","pathname":"/docs/search-apis/search-api/tavily","siteSpaceId":"sitesp_RfK8A","description":"","breadcrumbs":[{"label":"Search APIs"},{"label":"Search API"}]},{"id":"16f318209638d570ca56efcd0a1f21863f5c7f07","title":"Jina","pathname":"/docs/search-apis/search-api/jina","siteSpaceId":"sitesp_RfK8A","description":"","breadcrumbs":[{"label":"Search APIs"},{"label":"Search API"}]},{"id":"8a18c4f4e2d03270b846fa4d0029685fcf76473f","title":"Firecrawl","pathname":"/docs/search-apis/search-api/firecrawl","siteSpaceId":"sitesp_RfK8A","description":"","breadcrumbs":[{"label":"Search APIs"},{"label":"Search API"}]},{"id":"df1a86bfa0a384dfce6320b14e0b370a08dc5854","title":"Perplexity","pathname":"/docs/search-apis/search-api/perplexity","siteSpaceId":"sitesp_RfK8A","description":"","breadcrumbs":[{"label":"Search APIs"},{"label":"Search API"}]},{"id":"c9fbd25b693b0f136abe9f9215c9bc00f3fba77f","title":"Exa","pathname":"/docs/search-apis/search-api/exa","siteSpaceId":"sitesp_RfK8A","description":"","breadcrumbs":[{"label":"Search APIs"},{"label":"Search API"}]},{"id":"c6e10c2b7d0fe4be1485d63aed0bb7331b555459","title":"Cloudsway","pathname":"/docs/search-apis/search-api/cloudsway","siteSpaceId":"sitesp_RfK8A","description":"","breadcrumbs":[{"label":"Search APIs"},{"label":"Search API"}]},{"id":"4775350ef5dbf04777d9316c3bfac410d7992f33","title":"Tavily","pathname":"/docs/search-apis/extract-api/tavily","siteSpaceId":"sitesp_RfK8A","description":"","breadcrumbs":[{"label":"Search APIs"},{"label":"Extract API"}]},{"id":"XELCqOCQZk5pKoNFKgim","title":"Overview","pathname":"/docs/batch-apis","siteSpaceId":"sitesp_5uVL9","icon":"bolt","breadcrumbs":[{"label":"Batch APIs"}]},{"id":"e9f20f4e444a6e008ceda45e2907c5875478fc42","title":"Create New Batch","pathname":"/docs/batch-apis/llm-batch-api/create-new-batch","siteSpaceId":"sitesp_5uVL9","description":"","breadcrumbs":[{"label":"Batch APIs"},{"label":"LLM Batch API"}]},{"id":"145aaf645bcffc94188c7f360b296e22c5f1168a","title":"Get Status of a Batch","pathname":"/docs/batch-apis/llm-batch-api/get-status-of-a-batch","siteSpaceId":"sitesp_5uVL9","description":"","breadcrumbs":[{"label":"Batch APIs"},{"label":"LLM Batch API"}]},{"id":"e529d600df5f45131cc214f07fe564c73f9ec33f","title":"Cancel a Batch","pathname":"/docs/batch-apis/llm-batch-api/cancel-a-batch","siteSpaceId":"sitesp_5uVL9","description":"","breadcrumbs":[{"label":"Batch APIs"},{"label":"LLM Batch API"}]},{"id":"72f9138a003ad2c87b188b356c4100f4d824c010","title":"Get remaining credits","pathname":"/docs/billing-apis","siteSpaceId":"sitesp_684Pa","description":"","breadcrumbs":[{"label":"Billing APIs"},{"label":"Credits"}]},{"id":"jOIkMekHbQ6KJnKeM38P","title":"Overview","pathname":"/docs/billing-apis/usage-and-cost/overview","siteSpaceId":"sitesp_684Pa","icon":"bolt","description":"Track AI Model Token Usage and Cost Breakdowns","breadcrumbs":[{"label":"Billing APIs"},{"label":"Usage & Cost"}]},{"id":"3d7372a4ec747e659ba24d399327f57fd11a1086","title":"Get cost & usage details (non streaming)","pathname":"/docs/billing-apis/usage-and-cost/get-cost-and-usage-details-non-streaming","siteSpaceId":"sitesp_684Pa","description":"","breadcrumbs":[{"label":"Billing APIs"},{"label":"Usage & Cost"}]},{"id":"e8be5cdb7942a1f44186c720e675dd87c3b7cbf9","title":"Get cost & usage details (streaming)","pathname":"/docs/billing-apis/usage-and-cost/get-cost-and-usage-details-streaming","siteSpaceId":"sitesp_684Pa","description":"","breadcrumbs":[{"label":"Billing APIs"},{"label":"Usage & Cost"}]},{"id":"743f72202f419f3d3744160abe1b42956fe7b468","title":"Request preview","pathname":"/docs/models-and-providers-apis","siteSpaceId":"sitesp_mYPgy","description":"","breadcrumbs":[{"label":"Models & Providers APIs"},{"label":"SANDBOX API"}]},{"id":"e070139725c84c65766a5411838be4d397460380","title":"Response preview","pathname":"/docs/models-and-providers-apis/sandbox-api/response-preview","siteSpaceId":"sitesp_mYPgy","description":"","breadcrumbs":[{"label":"Models & Providers APIs"},{"label":"SANDBOX API"}]},{"id":"2be150ad93b78a230fc570cfd720789a860da7bd","title":"List all models and their properties","pathname":"/docs/models-and-providers-apis/models-api/list-all-models-and-their-properties","siteSpaceId":"sitesp_mYPgy","description":"","breadcrumbs":[{"label":"Models & Providers APIs"},{"label":"Models API"}]},{"id":"7FvWQMF0kTK7HGhlQfmo","title":"What is LLM inference?","pathname":"/docs/llm-inference-handbook","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"LLM inference basics","icon":"book-open"}]},{"id":"aC1Nmf6ucZ8i4kkuokcf","title":"How does LLM inference work?","pathname":"/docs/llm-inference-handbook/llm-inference-basics/how-does-llm-inference-work","siteSpaceId":"sitesp_IUb8c","description":"During inference, an LLM generates text one token at a time, using its internal attention mechanisms and knowledge of previous context.","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"LLM inference basics","icon":"book-open"}]},{"id":"sStxypUOlrmlUWIYX0fp","title":"Where is LLM inference run?","pathname":"/docs/llm-inference-handbook/llm-inference-basics/where-is-llm-inference-run","siteSpaceId":"sitesp_IUb8c","description":"","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"LLM inference basics","icon":"book-open"}]},{"id":"YgqDiL39J7DO6GOytX73","title":"Training vs. Inference","pathname":"/docs/llm-inference-handbook/llm-inference-basics/training-vs.-inference","siteSpaceId":"sitesp_IUb8c","description":"LLM training and inference are two different phases in the lifecycle of a model.","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"LLM inference basics","icon":"book-open"}]},{"id":"i3NQUNZGBrmEmCa5in9f","title":"What is Serverless inference?","pathname":"/docs/llm-inference-handbook/llm-inference-basics/what-is-serverless-inference","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"LLM inference basics","icon":"book-open"}]},{"id":"ioC5KkFaEs1MMT9Q0E3t","title":"What is Server-based inference?","pathname":"/docs/llm-inference-handbook/llm-inference-basics/what-is-server-based-inference","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"LLM inference basics","icon":"book-open"}]},{"id":"JFlaIDoznsayvBfq97NG","title":"Serverless vs. Self-hosted LLM inference","pathname":"/docs/llm-inference-handbook/llm-inference-basics/serverless-vs.-self-hosted-llm-inference","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"LLM inference basics","icon":"book-open"}]},{"id":"GiWuLPTpBfbjsIDVE3iE","title":"Serverless vs. Server-based LLM inference","pathname":"/docs/llm-inference-handbook/llm-inference-basics/serverless-vs.-server-based-llm-inference","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"LLM inference basics","icon":"book-open"}]},{"id":"mYZQmOL51bIleqVgeBeZ","title":"What is distributed inference?","pathname":"/docs/llm-inference-handbook/llm-inference-basics/what-is-distributed-inference","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"LLM inference basics","icon":"book-open"}]},{"id":"sIKX9PMRqHEXYUG3KEzI","title":"Unified API compatibility","pathname":"/docs/llm-inference-handbook/llm-inference-advanced/unified-api-compatibility","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"LLM Inference Advanced","icon":"microchip"}]},{"id":"2fn3muOH8HXBe3eLK6a6","title":"OpenAI-compatible API","pathname":"/docs/llm-inference-handbook/llm-inference-advanced/unified-api-compatibility/openai-compatible-api","siteSpaceId":"sitesp_IUb8c","description":"Once an LLM is running, you’ll need a standard way to interact with it. That’s where the OpenAI-compatible API comes in.","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"LLM Inference Advanced","icon":"microchip"},{"label":"Unified API compatibility"}]},{"id":"4R76mz26381849NaceZK","title":"Structured outputs","pathname":"/docs/llm-inference-handbook/llm-inference-advanced/unified-api-compatibility/structured-outputs","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"LLM Inference Advanced","icon":"microchip"},{"label":"Unified API compatibility"}]},{"id":"LmcFRCBKK5g7ZgEtrbQY","title":"Function calling","pathname":"/docs/llm-inference-handbook/llm-inference-advanced/unified-api-compatibility/function-calling","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"LLM Inference Advanced","icon":"microchip"},{"label":"Unified API compatibility"}]},{"id":"i73g4LZQanoLj7XtSO18","title":"Choosing the right model","pathname":"/docs/llm-inference-handbook/llm-inference-advanced/editor","siteSpaceId":"sitesp_IUb8c","description":"The first step is deciding what type of model fits your use case. Here’s a breakdown of common model types when it comes to LLMs.","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"LLM Inference Advanced","icon":"microchip"}]},{"id":"150pqUvCrXgDpElm3Frd","title":"Choosing the right provider","pathname":"/docs/llm-inference-handbook/llm-inference-advanced/choosing-the-right-provider","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"LLM Inference Advanced","icon":"microchip"}]},{"id":"bs6hJtInW1ZSUWAUUNBC","title":"Choosing the right GPU","pathname":"/docs/llm-inference-handbook/llm-inference-advanced/choosing-the-right-gpu","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"LLM Inference Advanced","icon":"microchip"}]},{"id":"sHQR4WOC7VmRsfYEWclB","title":"LLM fine-tuning","pathname":"/docs/llm-inference-handbook/llm-inference-advanced/llm-fine-tuning","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"LLM Inference Advanced","icon":"microchip"}]},{"id":"Z9fAGRJlttbg46WhXEzq","title":"LLM quantization","pathname":"/docs/llm-inference-handbook/llm-inference-advanced/llm-quantization","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"LLM Inference Advanced","icon":"microchip"}]},{"id":"mawevnIUMPX4xTQWCZN4","title":"Choosing the right inference framework","pathname":"/docs/llm-inference-handbook/llm-inference-advanced/choosing-the-right-inference-framework","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"LLM Inference Advanced","icon":"microchip"}]},{"id":"8StGW9tkI1qEVjdCJZ38","title":"What is LLM inference infrastructure?","pathname":"/docs/llm-inference-handbook/infrastructure-and-operations/what-is-llm-inference-infrastructure","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"Infrastructure and operations","icon":"gear-complex"}]},{"id":"iJwCOBLsiwGoNmwGSZdG","title":"Challenges in building infrastructure for LLM inference","pathname":"/docs/llm-inference-handbook/infrastructure-and-operations/challenges-in-building-infrastructure-for-llm-inference","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"Infrastructure and operations","icon":"gear-complex"}]},{"id":"DxIl8vP4j5UTJnQNazqe","title":"How to build faster inference for open-source models","pathname":"/docs/llm-inference-handbook/infrastructure-and-operations/how-to-build-faster-inference-for-open-source-models","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"Infrastructure and operations","icon":"gear-complex"}]},{"id":"zFzdcEaneqcSckbYJI2X","title":"Multi-cloud and cross-region inference","pathname":"/docs/llm-inference-handbook/infrastructure-and-operations/multi-cloud-and-cross-region-inference","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"Infrastructure and operations","icon":"gear-complex"}]},{"id":"6lN3NI7BCuOUjNnHSVDw","title":"On-prem LLM deployments","pathname":"/docs/llm-inference-handbook/infrastructure-and-operations/on-prem-llm-deployments","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"Infrastructure and operations","icon":"gear-complex"}]},{"id":"7wcBYU58Zfx4JHewxyMB","title":"InferenceOps and management","pathname":"/docs/llm-inference-handbook/infrastructure-and-operations/inferenceops-and-management","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"Infrastructure and operations","icon":"gear-complex"}]},{"id":"Z1r60oiZj0w89veHUXTM","title":"Key metrics for LLM inference","pathname":"/docs/llm-inference-handbook/inference-optimization/key-metrics-for-llm-inference","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"Inference optimization","icon":"bolt"}]},{"id":"mnG0N9iJ7QwlM1BQ4frA","title":"Static, dynamic and continuous batching","pathname":"/docs/llm-inference-handbook/inference-optimization/static-dynamic-and-continuous-batching","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"Inference optimization","icon":"bolt"}]},{"id":"J8y74o6yOVcy9GDecw1h","title":"Speculative decoding","pathname":"/docs/llm-inference-handbook/inference-optimization/speculative-decoding","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"Inference optimization","icon":"bolt"}]},{"id":"Bb1TfL12uGIet8pu6i8w","title":"Prefill-decode disaggregation","pathname":"/docs/llm-inference-handbook/inference-optimization/prefill-decode-disaggregation","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"Inference optimization","icon":"bolt"}]},{"id":"2q6i3zbg5HlVDZN0M4AW","title":"Prefix caching","pathname":"/docs/llm-inference-handbook/inference-optimization/prefix-caching","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"Inference optimization","icon":"bolt"}]},{"id":"EOdYVKUz3mJSo7hT4un3","title":"KV cache offloading","pathname":"/docs/llm-inference-handbook/inference-optimization/kv-cache-offloading","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"Inference optimization","icon":"bolt"}]},{"id":"MQMAtssZUP2aRXaWAFdE","title":"Offline batch inference","pathname":"/docs/llm-inference-handbook/inference-optimization/offline-batch-inference","siteSpaceId":"sitesp_IUb8c","breadcrumbs":[{"label":"LLM Inference Handbook"},{"label":"Inference optimization","icon":"bolt"}]}]}