Structured output#
Currently, only the Pytorch backend has this capability. Therefore, whether you are using the pipeline or the api_server, please specify the use of the Pytorch backend.
pipeline#
from lmdeploy import pipeline
from lmdeploy.messages import GenerationConfig, PytorchEngineConfig
model = 'internlm/internlm2-chat-1_8b'
guide = {
'type': 'object',
'properties': {
'name': {
'type': 'string'
},
'skills': {
'type': 'array',
'items': {
'type': 'string',
'maxLength': 10
},
'minItems': 3
},
'work history': {
'type': 'array',
'items': {
'type': 'object',
'properties': {
'company': {
'type': 'string'
},
'duration': {
'type': 'string'
}
},
'required': ['company']
}
}
},
'required': ['name', 'skills', 'work history']
}
pipe = pipeline(model, backend_config=PytorchEngineConfig(), log_level='INFO')
gen_config = GenerationConfig(
response_format=dict(type='json_schema', json_schema=dict(name='test', schema=guide)))
response = pipe(['Make a self introduction please.'], gen_config=gen_config)
print(response)
api_server#
Firstly, start the api_server service for the InternLM2 model.
lmdeploy serve api_server internlm/internlm2-chat-1_8b --backend pytorch
The client can test using OpenAI’s python package: The output result is a response in JSON format.
from openai import OpenAI
guide = {
'type': 'object',
'properties': {
'name': {
'type': 'string'
},
'skills': {
'type': 'array',
'items': {
'type': 'string',
'maxLength': 10
},
'minItems': 3
},
'work history': {
'type': 'array',
'items': {
'type': 'object',
'properties': {
'company': {
'type': 'string'
},
'duration': {
'type': 'string'
}
},
'required': ['company']
}
}
},
'required': ['name', 'skills', 'work history']
}
response_format=dict(type='json_schema', json_schema=dict(name='test',schema=guide))
messages = [{'role': 'user', 'content': 'Make a self-introduction please.'}]
client = OpenAI(api_key='YOUR_API_KEY', base_url='http://0.0.0.0:23333/v1')
model_name = client.models.list().data[0].id
response = client.chat.completions.create(
model=model_name,
messages=messages,
temperature=0.8,
response_format=response_format,
top_p=0.8)
print(response)