Python SDK Reference
Shared Types
from llama_stack_client.types import (
AgentConfig,
BatchCompletion,
CompletionMessage,
ContentDelta,
Document,
InterleavedContent,
InterleavedContentItem,
Message,
ParamType,
QueryConfig,
QueryResult,
ReturnType,
SafetyViolation,
SamplingParams,
ScoringResult,
SystemMessage,
ToolCall,
ToolParamDefinition,
ToolResponseMessage,
URL,
UserMessage,
)
Toolgroups
Types:
from llama_stack_client.types import (
ListToolGroupsResponse,
ToolGroup,
ToolgroupListResponse,
)
Methods:
client.toolgroups.list() -> ToolgroupListResponseclient.toolgroups.get(toolgroup_id) -> ToolGroupclient.toolgroups.register(**params) -> Noneclient.toolgroups.unregister(toolgroup_id) -> None
Tools
Types:
from llama_stack_client.types import ListToolsResponse, Tool, ToolListResponse
Methods:
client.tools.list(**params) -> ToolListResponseclient.tools.get(tool_name) -> Tool
ToolRuntime
Types:
from llama_stack_client.types import ToolDef, ToolInvocationResult
Methods:
client.tool_runtime.invoke_tool(**params) -> ToolInvocationResultclient.tool_runtime.list_tools(**params) -> JSONLDecoder[ToolDef]
RagTool
Methods:
client.tool_runtime.rag_tool.insert(**params) -> Noneclient.tool_runtime.rag_tool.query(**params) -> QueryResult
Agents
The Agents API is deprecated. Use the Responses API instead.
The Responses API provides equivalent functionality with an OpenAI-compatible interface. New applications should use client.responses.create() rather than the agents workflow below.
Types:
from llama_stack_client.types import (
InferenceStep,
MemoryRetrievalStep,
ShieldCallStep,
ToolExecutionStep,
ToolResponse,
AgentCreateResponse,
)
Methods:
client.agents.create(**params) -> AgentCreateResponseclient.agents.delete(agent_id) -> None
Session
Types:
from llama_stack_client.types.agents import Session, SessionCreateResponse
Methods:
client.agents.session.create(agent_id, **params) -> SessionCreateResponseclient.agents.session.retrieve(session_id, *, agent_id, **params) -> Sessionclient.agents.session.delete(session_id, *, agent_id) -> None
Steps
Types:
from llama_stack_client.types.agents import StepRetrieveResponse
Methods:
client.agents.steps.retrieve(step_id, *, agent_id, session_id, turn_id) -> StepRetrieveResponse
Turn
Types:
from llama_stack_client.types.agents import Turn, TurnCreateResponse
Methods:
client.agents.turn.create(session_id, *, agent_id, **params) -> TurnCreateResponseclient.agents.turn.retrieve(turn_id, *, agent_id, session_id) -> Turn
Datasets
Types:
from llama_stack_client.types import (
ListDatasetsResponse,
DatasetRetrieveResponse,
DatasetListResponse,
)
Methods:
client.datasets.retrieve(dataset_id) -> Optional[DatasetRetrieveResponse]client.datasets.list() -> DatasetListResponseclient.datasets.register(**params) -> Noneclient.datasets.unregister(dataset_id) -> None
Eval
Types:
from llama_stack_client.types import EvaluateResponse, Job
Methods:
client.eval.evaluate_rows(benchmark_id, **params) -> EvaluateResponseclient.eval.run_eval(benchmark_id, **params) -> Job
Jobs
Types:
from llama_stack_client.types.eval import JobStatusResponse
Methods:
client.eval.jobs.retrieve(job_id, *, benchmark_id) -> EvaluateResponseclient.eval.jobs.cancel(job_id, *, benchmark_id) -> Noneclient.eval.jobs.status(job_id, *, benchmark_id) -> Optional[JobStatusResponse]
Inspect
Types:
from llama_stack_client.types import HealthInfo, ProviderInfo, RouteInfo, VersionInfo
Methods:
client.inspect.health() -> HealthInfoclient.inspect.version() -> VersionInfo
Inference
Types:
from llama_stack_client.types import (
CompletionResponse,
EmbeddingsResponse,
TokenLogProbs,
InferenceChatCompletionResponse,
InferenceCompletionResponse,
)
Methods:
client.inference.embeddings(**params) -> EmbeddingsResponse
VectorIo
This API is deprecated and will be removed in a future version.
Use the OpenAI-compatible Vector Stores API instead:
- Instead of
client.vector_io.insert(), useclient.vector_stores.files.create()andclient.vector_stores.files.chunks.create() - Instead of
client.vector_io.query(), useclient.vector_stores.search()
See the RAG documentation for migration examples.
Related: Issue #2981
Types:
from llama_stack_client.types import QueryChunksResponse
Methods:
client.vector_io.insert(**params) -> Noneclient.vector_io.query(**params) -> QueryChunksResponse
VectorDBs
This API is deprecated and will be removed in a future version.
Use the OpenAI-compatible Vector Stores API instead:
- Instead of
client.vector_dbs.register(), useclient.vector_stores.create() - Instead of
client.vector_dbs.list(), useclient.vector_stores.list() - Instead of
client.vector_dbs.retrieve(), useclient.vector_stores.retrieve() - Instead of
client.vector_dbs.unregister(), useclient.vector_stores.delete()
See the RAG documentation for migration examples.
Related: Issue #2981
Types:
from llama_stack_client.types import (
ListVectorDBsResponse,
VectorDBRetrieveResponse,
VectorDBListResponse,
VectorDBRegisterResponse,
)
Methods:
client.vector_dbs.retrieve(vector_db_id) -> Optional[VectorDBRetrieveResponse]client.vector_dbs.list() -> VectorDBListResponseclient.vector_dbs.register(**params) -> VectorDBRegisterResponseclient.vector_dbs.unregister(vector_db_id) -> None
Models
Types:
from llama_stack_client.types import ListModelsResponse, Model, ModelListResponse
Methods:
client.models.retrieve(model_id) -> Optional[Model]client.models.list() -> ModelListResponseclient.models.register(**params) -> Modelclient.models.unregister(model_id) -> None
PostTraining
The Post Training API is not currently available in Llama Stack. There are no active providers implementing this API. The SDK types remain for forward compatibility but these endpoints are non-functional.
Types:
from llama_stack_client.types import ListPostTrainingJobsResponse, PostTrainingJob
Methods:
client.post_training.preference_optimize(**params) -> PostTrainingJobclient.post_training.supervised_fine_tune(**params) -> PostTrainingJob
Job
Types:
from llama_stack_client.types.post_training import (
JobListResponse,
JobArtifactsResponse,
JobStatusResponse,
)
Methods:
client.post_training.job.list() -> JobListResponseclient.post_training.job.artifacts(**params) -> Optional[JobArtifactsResponse]client.post_training.job.cancel(**params) -> Noneclient.post_training.job.status(**params) -> Optional[JobStatusResponse]
Providers
Types:
from llama_stack_client.types import ListProvidersResponse, ProviderListResponse
Methods:
client.providers.list() -> ProviderListResponse
Routes
Types:
from llama_stack_client.types import ListRoutesResponse, RouteListResponse
Methods:
client.routes.list() -> RouteListResponse
Safety
Types:
from llama_stack_client.types import RunShieldResponse
Methods:
client.safety.run_shield(**params) -> RunShieldResponse
Shields
Types:
from llama_stack_client.types import ListShieldsResponse, Shield, ShieldListResponse
Methods:
client.shields.retrieve(identifier) -> Optional[Shield]client.shields.list() -> ShieldListResponseclient.shields.register(**params) -> Shield
SyntheticDataGeneration
The Synthetic Data Generation API is not currently available in Llama Stack. There are no active providers implementing this API. The SDK types remain for forward compatibility but these endpoints are non-functional.
Types:
from llama_stack_client.types import SyntheticDataGenerationResponse
Methods:
client.synthetic_data_generation.generate(**params) -> SyntheticDataGenerationResponse
Datasetio
Types:
from llama_stack_client.types import PaginatedRowsResult
Methods:
client.datasetio.append_rows(**params) -> Noneclient.datasetio.get_rows_paginated(**params) -> PaginatedRowsResult
Scoring
Types:
from llama_stack_client.types import ScoringScoreResponse, ScoringScoreBatchResponse
Methods:
client.scoring.score(**params) -> ScoringScoreResponseclient.scoring.score_batch(**params) -> ScoringScoreBatchResponse
ScoringFunctions
Types:
from llama_stack_client.types import (
ListScoringFunctionsResponse,
ScoringFn,
ScoringFunctionListResponse,
)
Methods:
client.scoring_functions.retrieve(scoring_fn_id) -> Optional[ScoringFn]client.scoring_functions.list() -> ScoringFunctionListResponseclient.scoring_functions.register(**params) -> None
Benchmarks
Types:
from llama_stack_client.types import (
Benchmark,
ListBenchmarksResponse,
BenchmarkListResponse,
)
Methods:
client.benchmarks.retrieve(benchmark_id) -> Optional[Benchmark]client.benchmarks.list() -> BenchmarkListResponseclient.benchmarks.register(**params) -> None