|
| 1 | +//go:build smoke |
| 2 | + |
| 3 | +package smoke |
| 4 | + |
| 5 | +import ( |
| 6 | + "bufio" |
| 7 | + "context" |
| 8 | + "encoding/json" |
| 9 | + "fmt" |
| 10 | + "os" |
| 11 | + "path/filepath" |
| 12 | + "strings" |
| 13 | + "testing" |
| 14 | + |
| 15 | + openai "github.com/gptscript-ai/chat-completion-client" |
| 16 | + "github.com/gptscript-ai/gptscript/pkg/runner" |
| 17 | + "github.com/gptscript-ai/gptscript/pkg/tests/judge" |
| 18 | + "github.com/gptscript-ai/gptscript/pkg/types" |
| 19 | + "github.com/samber/lo" |
| 20 | + "github.com/stretchr/testify/assert" |
| 21 | + "github.com/stretchr/testify/require" |
| 22 | + "gotest.tools/v3/icmd" |
| 23 | +) |
| 24 | + |
| 25 | +const defaultModelEnvVar = "GPTSCRIPT_DEFAULT_MODEL" |
| 26 | + |
| 27 | +func TestSmoke(t *testing.T) { |
| 28 | + client := openai.NewClient(os.Getenv("OPENAI_API_KEY")) |
| 29 | + smokeJudge, err := judge.New[[]event](client) |
| 30 | + require.NoError(t, err, "error initializing smoke test judge") |
| 31 | + |
| 32 | + for _, tc := range getTestcases(t) { |
| 33 | + t.Run(tc.name, func(t *testing.T) { |
| 34 | + cmd := icmd.Command( |
| 35 | + "gptscript", |
| 36 | + "--color=false", |
| 37 | + "--disable-cache", |
| 38 | + "--events-stream-to", |
| 39 | + tc.actualEventsFile, |
| 40 | + "--default-model", |
| 41 | + tc.defaultModel, |
| 42 | + tc.gptFile, |
| 43 | + ) |
| 44 | + |
| 45 | + result := icmd.RunCmd(cmd) |
| 46 | + defer func() { |
| 47 | + t.Helper() |
| 48 | + assert.NoError(t, os.Remove(tc.actualEventsFile)) |
| 49 | + }() |
| 50 | + |
| 51 | + require.NoError(t, result.Error, "stderr: %q", result.Stderr()) |
| 52 | + require.Zero(t, result.ExitCode) |
| 53 | + |
| 54 | + var ( |
| 55 | + actualEvents = getActualEvents(t, tc.actualEventsFile) |
| 56 | + expectedEvents = make([]event, 0) |
| 57 | + ) |
| 58 | + f, err := os.Open(tc.expectedEventsFile) |
| 59 | + if os.IsNotExist(err) { |
| 60 | + // No expected events found, store the results of the latest call as the golden file for future tests runs |
| 61 | + f, err := os.Create(tc.expectedEventsFile) |
| 62 | + require.NoError(t, err) |
| 63 | + defer f.Close() |
| 64 | + |
| 65 | + encoder := json.NewEncoder(f) |
| 66 | + encoder.SetIndent("", " ") |
| 67 | + require.NoError(t, encoder.Encode(actualEvents)) |
| 68 | + t.Skipf("Generated initial golden file %q, skipping test", tc.expectedEventsFile) |
| 69 | + } else { |
| 70 | + require.NoError(t, err) |
| 71 | + defer f.Close() |
| 72 | + |
| 73 | + decoder := json.NewDecoder(f) |
| 74 | + require.NoError(t, decoder.Decode(&expectedEvents)) |
| 75 | + } |
| 76 | + |
| 77 | + ctx, cancel := context.WithCancel(context.Background()) |
| 78 | + defer cancel() |
| 79 | + |
| 80 | + equal, reasoning, err := smokeJudge.Equal( |
| 81 | + ctx, |
| 82 | + expectedEvents, |
| 83 | + actualEvents, |
| 84 | + `The field values of the elements of expected and actual must be roughly equivalent. |
| 85 | +Ignore variations in timestamps, IDs, and verbiage when determining equivalence.`, |
| 86 | + ) |
| 87 | + require.NoError(t, err, "error getting judge ruling on output") |
| 88 | + require.True(t, equal, reasoning) |
| 89 | + t.Logf("reasoning: %q", reasoning) |
| 90 | + }) |
| 91 | + } |
| 92 | +} |
| 93 | + |
| 94 | +type testcase struct { |
| 95 | + name string |
| 96 | + dir string |
| 97 | + gptFile string |
| 98 | + defaultModel string |
| 99 | + modelName string |
| 100 | + env []string |
| 101 | + actualEventsFile string |
| 102 | + expectedEventsFile string |
| 103 | +} |
| 104 | + |
| 105 | +func getTestcases(t *testing.T) []testcase { |
| 106 | + t.Helper() |
| 107 | + |
| 108 | + defaultModel := os.Getenv(defaultModelEnvVar) |
| 109 | + modelName := strings.Split(defaultModel, " ")[0] |
| 110 | + |
| 111 | + var testcases []testcase |
| 112 | + for _, d := range lo.Must(os.ReadDir("testdata")) { |
| 113 | + if !d.IsDir() { |
| 114 | + continue |
| 115 | + } |
| 116 | + var ( |
| 117 | + dirName = d.Name() |
| 118 | + dir = filepath.Join("testdata", dirName) |
| 119 | + ) |
| 120 | + |
| 121 | + files, err := os.ReadDir(dir) |
| 122 | + require.NoError(t, err, "failed to get testdata dir %q", dir) |
| 123 | + |
| 124 | + for _, f := range files { |
| 125 | + if f.IsDir() || filepath.Ext(f.Name()) != ".gpt" { |
| 126 | + continue |
| 127 | + } |
| 128 | + |
| 129 | + testcases = append(testcases, testcase{ |
| 130 | + name: dirName, |
| 131 | + dir: dir, |
| 132 | + gptFile: filepath.Join(dir, f.Name()), |
| 133 | + defaultModel: defaultModel, |
| 134 | + modelName: modelName, |
| 135 | + expectedEventsFile: filepath.Join(dir, fmt.Sprintf("%s-expected.json", modelName)), |
| 136 | + actualEventsFile: filepath.Join(dir, fmt.Sprintf("%s.json", modelName)), |
| 137 | + }) |
| 138 | + |
| 139 | + // Only take the first .gpt file in each testcase directory |
| 140 | + break |
| 141 | + } |
| 142 | + } |
| 143 | + |
| 144 | + return testcases |
| 145 | +} |
| 146 | + |
| 147 | +type event struct { |
| 148 | + runner.Event |
| 149 | + ChatRequest *openai.ChatCompletionRequest `json:"chatRequest,omitempty"` |
| 150 | + ChatResponse *types.CompletionMessage `json:"chatResponse,omitempty"` |
| 151 | +} |
| 152 | + |
| 153 | +func getActualEvents(t *testing.T, eventsFile string) []event { |
| 154 | + t.Helper() |
| 155 | + |
| 156 | + f, err := os.Open(eventsFile) |
| 157 | + require.NoError(t, err) |
| 158 | + defer f.Close() |
| 159 | + |
| 160 | + var ( |
| 161 | + events []event |
| 162 | + scanner = bufio.NewScanner(f) |
| 163 | + ) |
| 164 | + for scanner.Scan() { |
| 165 | + line := scanner.Text() |
| 166 | + // Skip blank lines |
| 167 | + if strings.TrimSpace(line) == "" { |
| 168 | + continue |
| 169 | + } |
| 170 | + |
| 171 | + var e event |
| 172 | + require.NoError(t, json.Unmarshal([]byte(line), &e)) |
| 173 | + events = append(events, e) |
| 174 | + } |
| 175 | + |
| 176 | + require.NoError(t, scanner.Err()) |
| 177 | + |
| 178 | + return events |
| 179 | +} |
0 commit comments