Skip to content

Commit 78f5c35

Browse files
chore(api): docs and response_format response property (#778)
1 parent c2c998d commit 78f5c35

File tree

3 files changed

+140
-28
lines changed

3 files changed

+140
-28
lines changed

src/resources/beta/assistants.ts

+36-2
Original file line numberDiff line numberDiff line change
@@ -142,13 +142,47 @@ export interface Assistant {
142142
*/
143143
tools: Array<AssistantTool>;
144144

145+
/**
146+
* Specifies the format that the model must output. Compatible with
147+
* [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
148+
* all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
149+
*
150+
* Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
151+
* message the model generates is valid JSON.
152+
*
153+
* **Important:** when using JSON mode, you **must** also instruct the model to
154+
* produce JSON yourself via a system or user message. Without this, the model may
155+
* generate an unending stream of whitespace until the generation reaches the token
156+
* limit, resulting in a long-running and seemingly "stuck" request. Also note that
157+
* the message content may be partially cut off if `finish_reason="length"`, which
158+
* indicates the generation exceeded `max_tokens` or the conversation exceeded the
159+
* max context length.
160+
*/
161+
response_format?: ThreadsAPI.AssistantResponseFormatOption | null;
162+
163+
/**
164+
* What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
165+
* make the output more random, while lower values like 0.2 will make it more
166+
* focused and deterministic.
167+
*/
168+
temperature?: number | null;
169+
145170
/**
146171
* A set of resources that are used by the assistant's tools. The resources are
147172
* specific to the type of tool. For example, the `code_interpreter` tool requires
148173
* a list of file IDs, while the `file_search` tool requires a list of vector store
149174
* IDs.
150175
*/
151176
tool_resources?: Assistant.ToolResources | null;
177+
178+
/**
179+
* An alternative to sampling with temperature, called nucleus sampling, where the
180+
* model considers the results of the tokens with top_p probability mass. So 0.1
181+
* means only the tokens comprising the top 10% probability mass are considered.
182+
*
183+
* We generally recommend altering this or temperature but not both.
184+
*/
185+
top_p?: number | null;
152186
}
153187

154188
export namespace Assistant {
@@ -1012,7 +1046,7 @@ export interface AssistantCreateParams {
10121046
/**
10131047
* Specifies the format that the model must output. Compatible with
10141048
* [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
1015-
* all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
1049+
* all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
10161050
*
10171051
* Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
10181052
* message the model generates is valid JSON.
@@ -1158,7 +1192,7 @@ export interface AssistantUpdateParams {
11581192
/**
11591193
* Specifies the format that the model must output. Compatible with
11601194
* [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
1161-
* all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
1195+
* all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
11621196
*
11631197
* Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
11641198
* message the model generates is valid JSON.

src/resources/beta/threads/runs/runs.ts

+66-18
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ export interface Run {
409409
/**
410410
* Specifies the format that the model must output. Compatible with
411411
* [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
412-
* all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
412+
* all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
413413
*
414414
* Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
415415
* message the model generates is valid JSON.
@@ -446,7 +446,7 @@ export interface Run {
446446
* Controls which (if any) tool is called by the model. `none` means the model will
447447
* not call any tools and instead generates a message. `auto` is the default value
448448
* and means the model can pick between generating a message or calling a tool.
449-
* Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
449+
* Specifying a particular tool like `{"type": "file_search"}` or
450450
* `{"type": "function", "function": {"name": "my_function"}}` forces the model to
451451
* call that tool.
452452
*/
@@ -459,6 +459,10 @@ export interface Run {
459459
*/
460460
tools: Array<AssistantsAPI.AssistantTool>;
461461

462+
/**
463+
* Controls for how a thread will be truncated prior to the run. Use this to
464+
* control the intial context window of the run.
465+
*/
462466
truncation_strategy: Run.TruncationStrategy | null;
463467

464468
/**
@@ -534,6 +538,10 @@ export namespace Run {
534538
}
535539
}
536540

541+
/**
542+
* Controls for how a thread will be truncated prior to the run. Use this to
543+
* control the intial context window of the run.
544+
*/
537545
export interface TruncationStrategy {
538546
/**
539547
* The truncation strategy to use for the thread. The default is `auto`. If set to
@@ -620,7 +628,7 @@ export interface RunCreateParamsBase {
620628
* The maximum number of completion tokens that may be used over the course of the
621629
* run. The run will make a best effort to use only the number of completion tokens
622630
* specified, across multiple turns of the run. If the run exceeds the number of
623-
* completion tokens specified, the run will end with status `complete`. See
631+
* completion tokens specified, the run will end with status `incomplete`. See
624632
* `incomplete_details` for more info.
625633
*/
626634
max_completion_tokens?: number | null;
@@ -629,7 +637,7 @@ export interface RunCreateParamsBase {
629637
* The maximum number of prompt tokens that may be used over the course of the run.
630638
* The run will make a best effort to use only the number of prompt tokens
631639
* specified, across multiple turns of the run. If the run exceeds the number of
632-
* prompt tokens specified, the run will end with status `complete`. See
640+
* prompt tokens specified, the run will end with status `incomplete`. See
633641
* `incomplete_details` for more info.
634642
*/
635643
max_prompt_tokens?: number | null;
@@ -673,7 +681,7 @@ export interface RunCreateParamsBase {
673681
/**
674682
* Specifies the format that the model must output. Compatible with
675683
* [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
676-
* all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
684+
* all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
677685
*
678686
* Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
679687
* message the model generates is valid JSON.
@@ -706,7 +714,7 @@ export interface RunCreateParamsBase {
706714
* Controls which (if any) tool is called by the model. `none` means the model will
707715
* not call any tools and instead generates a message. `auto` is the default value
708716
* and means the model can pick between generating a message or calling a tool.
709-
* Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
717+
* Specifying a particular tool like `{"type": "file_search"}` or
710718
* `{"type": "function", "function": {"name": "my_function"}}` forces the model to
711719
* call that tool.
712720
*/
@@ -722,9 +730,15 @@ export interface RunCreateParamsBase {
722730
* An alternative to sampling with temperature, called nucleus sampling, where the
723731
* model considers the results of the tokens with top_p probability mass. So 0.1
724732
* means only the tokens comprising the top 10% probability mass are considered.
733+
*
734+
* We generally recommend altering this or temperature but not both.
725735
*/
726736
top_p?: number | null;
727737

738+
/**
739+
* Controls for how a thread will be truncated prior to the run. Use this to
740+
* control the intial context window of the run.
741+
*/
728742
truncation_strategy?: RunCreateParams.TruncationStrategy | null;
729743
}
730744

@@ -770,6 +784,10 @@ export namespace RunCreateParams {
770784
}
771785
}
772786

787+
/**
788+
* Controls for how a thread will be truncated prior to the run. Use this to
789+
* control the intial context window of the run.
790+
*/
773791
export interface TruncationStrategy {
774792
/**
775793
* The truncation strategy to use for the thread. The default is `auto`. If set to
@@ -865,7 +883,7 @@ export interface RunCreateAndPollParams {
865883
* The maximum number of completion tokens that may be used over the course of the
866884
* run. The run will make a best effort to use only the number of completion tokens
867885
* specified, across multiple turns of the run. If the run exceeds the number of
868-
* completion tokens specified, the run will end with status `complete`. See
886+
* completion tokens specified, the run will end with status `incomplete`. See
869887
* `incomplete_details` for more info.
870888
*/
871889
max_completion_tokens?: number | null;
@@ -874,7 +892,7 @@ export interface RunCreateAndPollParams {
874892
* The maximum number of prompt tokens that may be used over the course of the run.
875893
* The run will make a best effort to use only the number of prompt tokens
876894
* specified, across multiple turns of the run. If the run exceeds the number of
877-
* prompt tokens specified, the run will end with status `complete`. See
895+
* prompt tokens specified, the run will end with status `incomplete`. See
878896
* `incomplete_details` for more info.
879897
*/
880898
max_prompt_tokens?: number | null;
@@ -918,7 +936,7 @@ export interface RunCreateAndPollParams {
918936
/**
919937
* Specifies the format that the model must output. Compatible with
920938
* [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
921-
* all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
939+
* all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
922940
*
923941
* Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
924942
* message the model generates is valid JSON.
@@ -944,7 +962,7 @@ export interface RunCreateAndPollParams {
944962
* Controls which (if any) tool is called by the model. `none` means the model will
945963
* not call any tools and instead generates a message. `auto` is the default value
946964
* and means the model can pick between generating a message or calling a tool.
947-
* Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
965+
* Specifying a particular tool like `{"type": "file_search"}` or
948966
* `{"type": "function", "function": {"name": "my_function"}}` forces the model to
949967
* call that tool.
950968
*/
@@ -960,9 +978,15 @@ export interface RunCreateAndPollParams {
960978
* An alternative to sampling with temperature, called nucleus sampling, where the
961979
* model considers the results of the tokens with top_p probability mass. So 0.1
962980
* means only the tokens comprising the top 10% probability mass are considered.
981+
*
982+
* We generally recommend altering this or temperature but not both.
963983
*/
964984
top_p?: number | null;
965985

986+
/**
987+
* Controls for how a thread will be truncated prior to the run. Use this to
988+
* control the intial context window of the run.
989+
*/
966990
truncation_strategy?: RunCreateAndPollParams.TruncationStrategy | null;
967991
}
968992

@@ -1008,6 +1032,10 @@ export namespace RunCreateAndPollParams {
10081032
}
10091033
}
10101034

1035+
/**
1036+
* Controls for how a thread will be truncated prior to the run. Use this to
1037+
* control the intial context window of the run.
1038+
*/
10111039
export interface TruncationStrategy {
10121040
/**
10131041
* The truncation strategy to use for the thread. The default is `auto`. If set to
@@ -1056,7 +1084,7 @@ export interface RunCreateAndStreamParams {
10561084
* The maximum number of completion tokens that may be used over the course of the
10571085
* run. The run will make a best effort to use only the number of completion tokens
10581086
* specified, across multiple turns of the run. If the run exceeds the number of
1059-
* completion tokens specified, the run will end with status `complete`. See
1087+
* completion tokens specified, the run will end with status `incomplete`. See
10601088
* `incomplete_details` for more info.
10611089
*/
10621090
max_completion_tokens?: number | null;
@@ -1065,7 +1093,7 @@ export interface RunCreateAndStreamParams {
10651093
* The maximum number of prompt tokens that may be used over the course of the run.
10661094
* The run will make a best effort to use only the number of prompt tokens
10671095
* specified, across multiple turns of the run. If the run exceeds the number of
1068-
* prompt tokens specified, the run will end with status `complete`. See
1096+
* prompt tokens specified, the run will end with status `incomplete`. See
10691097
* `incomplete_details` for more info.
10701098
*/
10711099
max_prompt_tokens?: number | null;
@@ -1109,7 +1137,7 @@ export interface RunCreateAndStreamParams {
11091137
/**
11101138
* Specifies the format that the model must output. Compatible with
11111139
* [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
1112-
* all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
1140+
* all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
11131141
*
11141142
* Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
11151143
* message the model generates is valid JSON.
@@ -1135,7 +1163,7 @@ export interface RunCreateAndStreamParams {
11351163
* Controls which (if any) tool is called by the model. `none` means the model will
11361164
* not call any tools and instead generates a message. `auto` is the default value
11371165
* and means the model can pick between generating a message or calling a tool.
1138-
* Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
1166+
* Specifying a particular tool like `{"type": "file_search"}` or
11391167
* `{"type": "function", "function": {"name": "my_function"}}` forces the model to
11401168
* call that tool.
11411169
*/
@@ -1151,9 +1179,15 @@ export interface RunCreateAndStreamParams {
11511179
* An alternative to sampling with temperature, called nucleus sampling, where the
11521180
* model considers the results of the tokens with top_p probability mass. So 0.1
11531181
* means only the tokens comprising the top 10% probability mass are considered.
1182+
*
1183+
* We generally recommend altering this or temperature but not both.
11541184
*/
11551185
top_p?: number | null;
11561186

1187+
/**
1188+
* Controls for how a thread will be truncated prior to the run. Use this to
1189+
* control the intial context window of the run.
1190+
*/
11571191
truncation_strategy?: RunCreateAndStreamParams.TruncationStrategy | null;
11581192
}
11591193

@@ -1199,6 +1233,10 @@ export namespace RunCreateAndStreamParams {
11991233
}
12001234
}
12011235

1236+
/**
1237+
* Controls for how a thread will be truncated prior to the run. Use this to
1238+
* control the intial context window of the run.
1239+
*/
12021240
export interface TruncationStrategy {
12031241
/**
12041242
* The truncation strategy to use for the thread. The default is `auto`. If set to
@@ -1247,7 +1285,7 @@ export interface RunStreamParams {
12471285
* The maximum number of completion tokens that may be used over the course of the
12481286
* run. The run will make a best effort to use only the number of completion tokens
12491287
* specified, across multiple turns of the run. If the run exceeds the number of
1250-
* completion tokens specified, the run will end with status `complete`. See
1288+
* completion tokens specified, the run will end with status `incomplete`. See
12511289
* `incomplete_details` for more info.
12521290
*/
12531291
max_completion_tokens?: number | null;
@@ -1256,7 +1294,7 @@ export interface RunStreamParams {
12561294
* The maximum number of prompt tokens that may be used over the course of the run.
12571295
* The run will make a best effort to use only the number of prompt tokens
12581296
* specified, across multiple turns of the run. If the run exceeds the number of
1259-
* prompt tokens specified, the run will end with status `complete`. See
1297+
* prompt tokens specified, the run will end with status `incomplete`. See
12601298
* `incomplete_details` for more info.
12611299
*/
12621300
max_prompt_tokens?: number | null;
@@ -1300,7 +1338,7 @@ export interface RunStreamParams {
13001338
/**
13011339
* Specifies the format that the model must output. Compatible with
13021340
* [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
1303-
* all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
1341+
* all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
13041342
*
13051343
* Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
13061344
* message the model generates is valid JSON.
@@ -1326,7 +1364,7 @@ export interface RunStreamParams {
13261364
* Controls which (if any) tool is called by the model. `none` means the model will
13271365
* not call any tools and instead generates a message. `auto` is the default value
13281366
* and means the model can pick between generating a message or calling a tool.
1329-
* Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
1367+
* Specifying a particular tool like `{"type": "file_search"}` or
13301368
* `{"type": "function", "function": {"name": "my_function"}}` forces the model to
13311369
* call that tool.
13321370
*/
@@ -1342,9 +1380,15 @@ export interface RunStreamParams {
13421380
* An alternative to sampling with temperature, called nucleus sampling, where the
13431381
* model considers the results of the tokens with top_p probability mass. So 0.1
13441382
* means only the tokens comprising the top 10% probability mass are considered.
1383+
*
1384+
* We generally recommend altering this or temperature but not both.
13451385
*/
13461386
top_p?: number | null;
13471387

1388+
/**
1389+
* Controls for how a thread will be truncated prior to the run. Use this to
1390+
* control the intial context window of the run.
1391+
*/
13481392
truncation_strategy?: RunStreamParams.TruncationStrategy | null;
13491393
}
13501394

@@ -1390,6 +1434,10 @@ export namespace RunStreamParams {
13901434
}
13911435
}
13921436

1437+
/**
1438+
* Controls for how a thread will be truncated prior to the run. Use this to
1439+
* control the intial context window of the run.
1440+
*/
13931441
export interface TruncationStrategy {
13941442
/**
13951443
* The truncation strategy to use for the thread. The default is `auto`. If set to

0 commit comments

Comments
 (0)