Skip to content

Commit 012c917

Browse files
authored
Wrapping completions and chat/completions endpoint (#2)
* rebase and squash commits on latest main * cargo fmt * fix: 2038y problem --------- Co-authored-by: michaelfeil <[email protected]>
1 parent f93012d commit 012c917

File tree

5 files changed

+1107
-6
lines changed

5 files changed

+1107
-6
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ RUN cargo build --release
3939
# Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
4040
FROM debian:bullseye-slim as pytorch-install
4141

42-
ARG PYTORCH_VERSION=2.0.0
42+
ARG PYTORCH_VERSION=2.0.1
4343
ARG PYTHON_VERSION=3.9
4444
ARG CUDA_VERSION=11.8
4545
ARG MAMBA_VERSION=23.1.0-1

docs/openapi.json

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,184 @@
102102
}
103103
}
104104
},
105+
"/completions": {
106+
"post": {
107+
"tags": [
108+
"Text Generation Inference"
109+
],
110+
"summary": "Completion request. Enable stream of token by setting `stream == true`",
111+
"description": "Completion request. Enable stream of token by setting `stream == true`",
112+
"operationId": "completions_generate",
113+
"requestBody": {
114+
"content": {
115+
"application/json": {
116+
"schema": {
117+
"$ref": "#/components/schemas/CompatCompletionRequest"
118+
}
119+
}
120+
},
121+
"required": true
122+
},
123+
"responses": {
124+
"200": {
125+
"description": "Generated Completion",
126+
"content": {
127+
"application/json": {
128+
"schema": {
129+
"$ref": "#/components/schemas/CompletionsResponse"
130+
}
131+
},
132+
"text/event-stream": {
133+
"schema": {
134+
"$ref": "#/components/schemas/CompletionsResponse"
135+
}
136+
}
137+
}
138+
},
139+
"422": {
140+
"description": "Input validation error",
141+
"content": {
142+
"application/json": {
143+
"schema": {
144+
"$ref": "#/components/schemas/ErrorResponse"
145+
},
146+
"example": {
147+
"error": "Input validation error"
148+
}
149+
}
150+
}
151+
},
152+
"424": {
153+
"description": "Generation Error",
154+
"content": {
155+
"application/json": {
156+
"schema": {
157+
"$ref": "#/components/schemas/ErrorResponse"
158+
},
159+
"example": {
160+
"error": "Request failed during generation"
161+
}
162+
}
163+
}
164+
},
165+
"429": {
166+
"description": "Model is overloaded",
167+
"content": {
168+
"application/json": {
169+
"schema": {
170+
"$ref": "#/components/schemas/ErrorResponse"
171+
},
172+
"example": {
173+
"error": "Model is overloaded"
174+
}
175+
}
176+
}
177+
},
178+
"500": {
179+
"description": "Incomplete generation",
180+
"content": {
181+
"application/json": {
182+
"schema": {
183+
"$ref": "#/components/schemas/ErrorResponse"
184+
},
185+
"example": {
186+
"error": "Incomplete generation"
187+
}
188+
}
189+
}
190+
}
191+
}
192+
}
193+
},
194+
"/chat/completions": {
195+
"post": {
196+
"tags": [
197+
"Text Generation Inference"
198+
],
199+
"summary": "Generate tokens via Chat",
200+
"description": "Generate tokens via Chat",
201+
"operationId": "chatcompletions_generate",
202+
"requestBody": {
203+
"content": {
204+
"application/json": {
205+
"schema": {
206+
"$ref": "#/components/schemas/CompatChatCompletionRequest"
207+
}
208+
}
209+
},
210+
"required": true
211+
},
212+
"responses": {
213+
"200": {
214+
"description": "Generated Completion",
215+
"content": {
216+
"application/json": {
217+
"schema": {
218+
"$ref": "#/components/schemas/ChatCompletionsResponse"
219+
}
220+
},
221+
"text/event-stream": {
222+
"schema": {
223+
"$ref": "#/components/schemas/ChatCompletionsStreamResponse"
224+
}
225+
}
226+
}
227+
},
228+
"422": {
229+
"description": "Input validation error",
230+
"content": {
231+
"application/json": {
232+
"schema": {
233+
"$ref": "#/components/schemas/ErrorResponse"
234+
},
235+
"example": {
236+
"error": "Input validation error"
237+
}
238+
}
239+
}
240+
},
241+
"424": {
242+
"description": "Generation Error",
243+
"content": {
244+
"application/json": {
245+
"schema": {
246+
"$ref": "#/components/schemas/ErrorResponse"
247+
},
248+
"example": {
249+
"error": "Request failed during generation"
250+
}
251+
}
252+
}
253+
},
254+
"429": {
255+
"description": "Model is overloaded",
256+
"content": {
257+
"application/json": {
258+
"schema": {
259+
"$ref": "#/components/schemas/ErrorResponse"
260+
},
261+
"example": {
262+
"error": "Model is overloaded"
263+
}
264+
}
265+
}
266+
},
267+
"500": {
268+
"description": "Incomplete generation",
269+
"content": {
270+
"application/json": {
271+
"schema": {
272+
"$ref": "#/components/schemas/ErrorResponse"
273+
},
274+
"example": {
275+
"error": "Incomplete generation"
276+
}
277+
}
278+
}
279+
}
280+
}
281+
}
282+
},
105283
"/generate": {
106284
"post": {
107285
"tags": [

0 commit comments

Comments
 (0)