Reject image input for Count Token on device

gsiddh · gsiddh · commit 6f86c585aee3 · 2025-04-22T10:02:07.000-07:00
diff --git a/packages/vertexai/src/methods/chrome-adapter.test.ts b/packages/vertexai/src/methods/chrome-adapter.test.ts
@@ -15,6 +15,7 @@
  * limitations under the License.
  */
 
+import { VertexAIError } from '../errors';
 import { expect, use } from 'chai';
 import sinonChai from 'sinon-chai';
 import chaiAsPromised from 'chai-as-promised';
@@ -26,7 +27,7 @@ import {
   LanguageModelMessageContent
 } from '../types/language-model';
 import { match, stub } from 'sinon';
-import { GenerateContentRequest } from '../types';
+import { GenerateContentRequest, VertexAIErrorCode } from '../types';
 
 use(sinonChai);
 use(chaiAsPromised);
@@ -414,6 +415,54 @@ describe('ChromeAdapter', () => {
         totalTokens: expectedCount
       });
     });
+    it('count tokens for image based input should throw', async () => {
+      // setting up stubs
+      const languageModelProvider = {
+        create: () => Promise.resolve({})
+      } as LanguageModel;
+      const languageModel = {
+        measureInputUsage: _i => Promise.resolve(123)
+      } as LanguageModel;
+      const createStub = stub(languageModelProvider, 'create').resolves(
+        languageModel
+      );
+
+      const countTokenRequestWithImagePart = {
+        contents: [
+          {
+            role: 'user',
+            parts: [
+              { text: 'test' },
+              {
+                inlineData: {
+                  data: sampleBase64EncodedImage,
+                  mimeType: 'image/jpeg'
+                }
+              }
+            ]
+          }
+        ]
+      } as GenerateContentRequest;
+
+      const adapter = new ChromeAdapter(
+        languageModelProvider,
+        'only_on_device'
+      );
+
+      try {
+        await adapter.countTokens(countTokenRequestWithImagePart);
+      } catch (e) {
+        // the call to countToken should be rejected with Error
+        expect((e as VertexAIError).code).to.equal(
+          VertexAIErrorCode.INVALID_CONTENT
+        );
+        expect((e as VertexAIError).message).includes('image input');
+      }
+
+      // Asserts that create stub was not called - error happens before this
+      // step is reached
+      expect(createStub).not.called;
+    });
   });
   describe('generateContentStream', () => {
     it('generates content stream', async () => {
diff --git a/packages/vertexai/src/methods/chrome-adapter.ts b/packages/vertexai/src/methods/chrome-adapter.ts
@@ -15,11 +15,13 @@
  * limitations under the License.
  */
 
+import { VertexAIError } from '../errors';
 import {
   CountTokensRequest,
   GenerateContentRequest,
   InferenceMode,
-  Part
+  Part,
+  VertexAIErrorCode
 } from '../types';
 import {
   Availability,
@@ -130,7 +132,18 @@ export class ChromeAdapter {
   }
 
   async countTokens(request: CountTokensRequest): Promise<Response> {
-    // TODO: Check if the request contains an image, and if so, throw.
+    // validated that no input is of image type. The current on-device
+    // implementation doesn't support image input for the `measureInputUsage` API.
+    for (const part of request.contents[0].parts) {
+      // console.log(part);
+      if (part.inlineData) {
+        throw new VertexAIError(
+          VertexAIErrorCode.INVALID_CONTENT,
+          'Support for image input is not yet available for the Count Tokens API when running on-device.'
+        );
+      }
+    }
+
     const session = await this.createSession(
       // TODO: normalize on-device params during construction.
       this.onDeviceParams || {}