Test code
#!/usr/bin/env bun
import { GeminiClient } from "./ai/vertex-gemini";
import fs from "fs";
import path from "path";
async function main() {
const imagePath = Bun.argv[2];
if (!imagePath) {
console.error("Please provide an image path");
process.exit(1);
}
/**
* Supported models:
* "gemini-1.5-flash-001"
* "gemini-1.5-flash-002"
*/
const modelName = "gemini-1.5-flash-002";
const tableSchema = {
type: "array",
items: {
type: "array",
items: {
type: "string",
},
},
};
const client = new GeminiClient({
model: modelName,
generationConfig: {
temperature: 0,
responseLogprobs: true,
logprobs: 5,
responseMimeType: "application/json",
responseSchema: tableSchema,
},
});
try {
// For non-streaming response with logprobs:
const response = await client.generateContent(
`Convert the first table in this image ${imagePath} into a 2d array. Feel free to ignore formatting fluff or random things. Keep the headers. The number of columns and rows should match - there are no merged columns.`,
[{ path: imagePath }]
);
console.log("Response:", JSON.stringify(response, null, 2));
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
const outputPath = path.join(
process.cwd(),
`/data/${timestamp}-${modelName}-${imagePath.split("/").pop()}.json`
);
fs.writeFileSync(outputPath, JSON.stringify(response, null, 2));
// // For streaming response:
// console.log("\\nStreaming response:");
// for await (const chunk of client.streamContent(
// "Describe this image in detail",
// [{ path: imagePath }]
// )) {
// console.log("Chunk:", JSON.stringify(chunk));
// }
} catch (error) {
console.error("Error:", error);
}
}
main();
Adapter
import { VertexAI } from "@google-cloud/vertexai";
import { readFileSync } from "fs";
type HarmCategory =
| "HARM_CATEGORY_HATE_SPEECH"
| "HARM_CATEGORY_DANGEROUS_CONTENT"
| "HARM_CATEGORY_SEXUALLY_EXPLICIT"
| "HARM_CATEGORY_HARASSMENT";
type HarmThreshold =
| "BLOCK_LOW_AND_ABOVE"
| "BLOCK_MEDIUM_AND_ABOVE"
| "BLOCK_ONLY_HIGH"
| "BLOCK_NONE"
| "OFF";
interface SafetySetting {
category: HarmCategory;
threshold: HarmThreshold;
}
interface GenerationConfig {
maxOutputTokens?: number;
temperature?: number;
topP?: number;
topK?: number;
candidateCount?: number;
stopSequences?: string[];
seed?: number;
responseLogprobs?: boolean;
logprobs?: number;
responseMimeType?: "application/json" | "text/plain";
responseSchema?: any;
}
interface GeminiOptions {
project?: string;
location?: string;
model?: string;
safetySettings?: SafetySetting[];
generationConfig?: GenerationConfig;
}
interface FileInput {
path: string;
mimeType?: string;
}
const DEFAULT_CONFIG: GeminiOptions = {
project: process.env.GOOGLE_CLOUD_PROJECT,
location: "us-central1",
model: "gemini-1.5-flash-002",
safetySettings: [
{
category: "HARM_CATEGORY_HATE_SPEECH",
threshold: "OFF",
},
{
category: "HARM_CATEGORY_DANGEROUS_CONTENT",
threshold: "OFF",
},
{
category: "HARM_CATEGORY_SEXUALLY_EXPLICIT",
threshold: "OFF",
},
{
category: "HARM_CATEGORY_HARASSMENT",
threshold: "OFF",
},
],
generationConfig: {
maxOutputTokens: 8192,
temperature: 0.2,
topP: 0.95,
responseLogprobs: true,
logprobs: 5,
},
};
export class GeminiClient {
private vertexAI: any;
private model: any;
private config: GeminiOptions;
constructor(options: Partial<GeminiOptions> = {}) {
this.config = {
...DEFAULT_CONFIG,
...options,
generationConfig: {
...DEFAULT_CONFIG.generationConfig,
...options.generationConfig,
},
safetySettings: options.safetySettings || DEFAULT_CONFIG.safetySettings,
};
if (!this.config.project) {
throw new Error(
"Project ID is required. Set GOOGLE_CLOUD_PROJECT env variable or pass in options."
);
}
this.vertexAI = new VertexAI({
project: this.config.project,
location: this.config.location,
});
this.model = this.vertexAI.preview.getGenerativeModel({
model: this.config.model,
generationConfig: this.config.generationConfig,
safetySettings: this.config.safetySettings,
});
}
/**
* Convert a file to base64 with proper mime type
*/
private async processFile(file: FileInput): Promise<{
inlineData: { mimeType: string; data: string };
}> {
const fileData = readFileSync(file.path);
const base64Data = fileData.toString("base64");
return {
inlineData: {
mimeType: file.mimeType || "image/jpeg", // Default to jpeg if not specified
data: base64Data,
},
};
}
/**
* Generate content with Gemini
*/
async generateContent(
prompt: string,
files: FileInput[] = [],
streaming: boolean = false
) {
try {
// Process all files
const processedFiles = await Promise.all(
files.map((file) => this.processFile(file))
);
// Construct request
const request = {
contents: [
{
role: "user",
parts: [...processedFiles, { text: prompt }],
},
],
};
if (streaming) {
return await this.model.generateContentStream(request);
} else {
return await this.model.generateContent(request);
}
} catch (error) {
console.error("Error generating content:", error);
throw error;
}
}
/**
* Generate content and stream the response
*/
async *streamContent(prompt: string, files: FileInput[] = []) {
const streamingResp = await this.generateContent(prompt, files, true);
for await (const chunk of streamingResp.stream) {
yield chunk;
}
// Return the final aggregated response
return await streamingResp.response;
}
}
// Example usage:
/*
const client = new GeminiClient({
project: 'your-project',
generationConfig: {
temperature: 0.4,
responseLogprobs: true,
logprobs: 5
}
});
// Non-streaming usage
const response = await client.generateContent(
"Describe this image in detail",
[{ path: "path/to/image.jpg" }]
);
// Streaming usage
for await (const chunk of client.streamContent(
"Describe this image in detail",
[{ path: "path/to/image.jpg" }]
)) {
console.log(chunk);
}
*/