const { readSecret, SECRET_KEYS } = require('./secrets'); const fetch = require('node-fetch').default; const express = require('express'); const { jsonParser } = require('../express-common'); const { GEMINI_SAFETY } = require('../constants'); const API_MAKERSUITE = 'https://generativelanguage.googleapis.com'; const router = express.Router(); router.post('/caption-image', jsonParser, async (request, response) => { try { const mimeType = request.body.image.split(';')[0].split(':')[1]; const base64Data = request.body.image.split(',')[1]; const apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.MAKERSUITE); const apiUrl = new URL(request.body.reverse_proxy || API_MAKERSUITE); const model = request.body.model || 'gemini-pro-vision'; const url = `${apiUrl.origin}/v1beta/models/${model}:generateContent?key=${apiKey}`; const body = { contents: [{ parts: [ { text: request.body.prompt }, { inlineData: { mimeType: 'image/png', // It needs to specify a MIME type in data if it's not a PNG data: mimeType === 'image/png' ? base64Data : request.body.image, }, }], }], safetySettings: GEMINI_SAFETY, generationConfig: { maxOutputTokens: 1000 }, }; console.log('Multimodal captioning request', model, body); const result = await fetch(url, { body: JSON.stringify(body), method: 'POST', headers: { 'Content-Type': 'application/json', }, timeout: 0, }); if (!result.ok) { const error = await result.json(); console.log(`Google AI Studio API returned error: ${result.status} ${result.statusText}`, error); return response.status(result.status).send({ error: true }); } const data = await result.json(); console.log('Multimodal captioning response', data); const candidates = data?.candidates; if (!candidates) { return response.status(500).send('No candidates found, image was most likely filtered.'); } const caption = candidates[0].content.parts[0].text; if (!caption) { return response.status(500).send('No caption found'); } return response.json({ caption }); } catch (error) { console.error(error); response.status(500).send('Internal server error'); } }); module.exports = { router };