LiteLLM - Getting Started
Call 100+ LLMs using the same Input/Output Format
Basic usage
pip install litellm
- OpenAI
- Anthropic
- VertexAI
- HuggingFace
- Azure OpenAI
- Ollama
from litellm import completion
import os
## set ENV variables
os.environ["OPENAI_API_KEY"] = "your-api-key"
response = completion(
model="gpt-3.5-turbo",
messages=[{ "content": "Hello, how are you?","role": "user"}]
)
from litellm import completion
import os
## set ENV variables
os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
response = completion(
model="claude-2",
messages=[{ "content": "Hello, how are you?","role": "user"}]
)
from litellm import completion
import os
# auth: run 'gcloud auth application-default'
os.environ["VERTEX_PROJECT"] = "hardy-device-386718"
os.environ["VERTEX_LOCATION"] = "us-central1"
response = completion(
model="chat-bison",
messages=[{ "content": "Hello, how are you?","role": "user"}]
)
from litellm import completion
import os
os.environ["HUGGINGFACE_API_KEY"] = "huggingface_api_key"
# e.g. Call 'WizardLM/WizardCoder-Python-34B-V1.0' hosted on HF Inference endpoints
response = completion(
model="huggingface/WizardLM/WizardCoder-Python-34B-V1.0",
messages=[{ "content": "Hello, how are you?","role": "user"}],
api_base="https://my-endpoint.huggingface.cloud"
)
print(response)
from litellm import completion
import os
## set ENV variables
os.environ["AZURE_API_KEY"] = ""
os.environ["AZURE_API_BASE"] = ""
os.environ["AZURE_API_VERSION"] = ""
# azure call
response = completion(
"azure/<your_deployment_id>",
messages = [{ "content": "Hello, how are you?","role": "user"}]
)
from litellm import completion
response = completion(
model="ollama/llama2",
messages = [{ "content": "Hello, how are you?","role": "user"}],
api_base="http://localhost:11434"
)
Streaming
Set stream=True
in the completion
args.
- OpenAI
- Anthropic
- VertexAI
- HuggingFace
- Azure OpenAI
- Ollama
from litellm import completion
import os
## set ENV variables
os.environ["OPENAI_API_KEY"] = "your-api-key"
response = completion(
model="gpt-3.5-turbo",
messages=[{ "content": "Hello, how are you?","role": "user"}],
stream=True,
)
from litellm import completion
import os
## set ENV variables
os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
response = completion(
model="claude-2",
messages=[{ "content": "Hello, how are you?","role": "user"}],
stream=True,
)
from litellm import completion
import os
# auth: run 'gcloud auth application-default'
os.environ["VERTEX_PROJECT"] = "hardy-device-386718"
os.environ["VERTEX_LOCATION"] = "us-central1"
response = completion(
model="chat-bison",
messages=[{ "content": "Hello, how are you?","role": "user"}],
stream=True,
)
from litellm import completion
import os
os.environ["HUGGINGFACE_API_KEY"] = "huggingface_api_key"
# e.g. Call 'WizardLM/WizardCoder-Python-34B-V1.0' hosted on HF Inference endpoints
response = completion(
model="huggingface/WizardLM/WizardCoder-Python-34B-V1.0",
messages=[{ "content": "Hello, how are you?","role": "user"}],
api_base="https://my-endpoint.huggingface.cloud",
stream=True,
)
print(response)
from litellm import completion
import os
## set ENV variables
os.environ["AZURE_API_KEY"] = ""
os.environ["AZURE_API_BASE"] = ""
os.environ["AZURE_API_VERSION"] = ""
# azure call
response = completion(
"azure/<your_deployment_id>",
messages = [{ "content": "Hello, how are you?","role": "user"}],
stream=True,
)
from litellm import completion
response = completion(
model="ollama/llama2",
messages = [{ "content": "Hello, how are you?","role": "user"}],
api_base="http://localhost:11434",
stream=True,
)
Exception handling
LiteLLM maps exceptions across all supported providers to the OpenAI exceptions. All our exceptions inherit from OpenAI's exception types, so any error-handling you have for that, should work out of the box with LiteLLM.
from openai.errors import OpenAIError
from litellm import completion
os.environ["ANTHROPIC_API_KEY"] = "bad-key"
try:
# some code
completion(model="claude-instant-1", messages=[{"role": "user", "content": "Hey, how's it going?"}])
except OpenAIError as e:
print(e)
Calculate Costs, Usage, Latency
Pass the completion response to litellm.completion_cost(completion_response=response)
and get the cost
from litellm import completion, completion_cost
import os
os.environ["OPENAI_API_KEY"] = "your-api-key"
response = completion(
model="gpt-3.5-turbo",
messages=[{ "content": "Hello, how are you?","role": "user"}]
)
cost = completion_cost(completion_response=response)
print("Cost for completion call with gpt-3.5-turbo: ", f"${float(cost):.10f}")
Output
Cost for completion call with gpt-3.5-turbo: $0.0000775000
LiteLLM API
The LiteLLM API allows you to access LLMs you might not have access to (example Claude-2)
- Claude-2
- GPT-4
- Llama2-70B
- Falcon-40B
import os
from litellm import completion
# use the LiteLLM API Key
os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
messages = [{ "content": "Hello, how are you?","role": "user"}]
response = completion(model="togethercomputer/falcon-40b-instruct", messages=messages)
print(response)
import os
from litellm import completion
# use the LiteLLM API Key
os.environ["OPENAI_API_KEY"] = "your-api-key"
messages = [{ "content": "Hello, how are you?","role": "user"}]
response = completion(model="gpt-4", messages=messages)
print(response)
import os
from litellm import completion
# use the LiteLLM API Key
os.environ["TOGETHERAI_API_KEY"] = "your-api-key"
messages = [{ "content": "Hello, how are you?","role": "user"}]
response = completion(model="togethercomputer/llama-2-70b-chat", messages=messages)
print(response)
import os
from litellm import completion
# use the LiteLLM API Key
os.environ["TOGETHERAI_API_KEY"] = "your-api-key"
messages = [{ "content": "Hello, how are you?","role": "user"}]
response = completion(model="togethercomputer/falcon-40b-instruct", messages=messages)
print(response)
👉 See all supported providers on LiteLLM API
Need a dedicated key? Email us @ krrish@berri.ai