Skip to main content
LangChain is a framework for building applications powered by language models. DeepInfra integrates with LangChain via official adapters for LLMs, chat models, and embeddings.

Available adapters

Installation

pip install langchain langchain-community
Set your API token:
import os
os.environ["DEEPINFRA_API_TOKEN"] = "<your DeepInfra API token>"

LLM examples

import os
from langchain_community.llms import DeepInfra
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

os.environ["DEEPINFRA_API_TOKEN"] = "<your DeepInfra API token>"

llm = DeepInfra(model_id="deepseek-ai/DeepSeek-V3")
llm.model_kwargs = {
    "temperature": 0.7,
    "repetition_penalty": 1.2,
    "max_new_tokens": 250,
    "top_p": 0.9,
}

# Basic inference
print(llm.invoke("Who let the dogs out?"))

# Streaming inference
for chunk in llm.stream("Who let the dogs out?"):
    print(chunk)

# Chain with prompt template
template = """Question: {question}

Answer: Let's think step by step."""
prompt = PromptTemplate(template=template, input_variables=["question"])
llm_chain = prompt | llm
print(llm_chain.invoke("Can penguins reach the North pole?"))

Chat examples

import os
from langchain_community.chat_models import ChatDeepInfra
from langchain_core.messages import HumanMessage
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

os.environ["DEEPINFRA_API_TOKEN"] = "<your DeepInfra API token>"

messages = [
    HumanMessage(content="Translate this sentence from English to French. I love programming.")
]

# Synchronous
chat = ChatDeepInfra(model="deepseek-ai/DeepSeek-V3")
print(chat.invoke(messages))

# Async
async def async_example():
    chat = ChatDeepInfra(model="deepseek-ai/DeepSeek-V3")
    await chat.agenerate([messages])

# Streaming
chat_stream = ChatDeepInfra(
    streaming=True,
    verbose=True,
    callbacks=[StreamingStdOutCallbackHandler()],
)
print(chat_stream.invoke(messages))

Embeddings

import os
from langchain_community.embeddings import DeepInfraEmbeddings

os.environ["DEEPINFRA_API_TOKEN"] = "<your DeepInfra API token>"

embeddings = DeepInfraEmbeddings(
    model_id="Qwen/Qwen3-Embedding-8B",
    query_instruction="",
    embed_instruction="",
)

docs = ["Dog is not a cat", "Beta is the second letter of Greek alphabet"]
document_result = embeddings.embed_documents(docs)
print(document_result)