From 7df051795aa8779c6633ce0c9a7ac45fdcb49de6 Mon Sep 17 00:00:00 2001 From: jrmyr Date: Mon, 5 May 2025 12:08:15 +0000 Subject: [PATCH] Responses updates. --- _opt/responses.js | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/_opt/responses.js b/_opt/responses.js index 6b60bec..a8ca383 100644 --- a/_opt/responses.js +++ b/_opt/responses.js @@ -274,9 +274,11 @@ async function onMessage(client, cfg, message) { } // Build request body, including replied-to message context and mention of who spoke let referencePrefix = ''; + let referenceMessage = null; if (message.reference?.messageId) { try { const ref = await message.channel.messages.fetch(message.reference.messageId); + referenceMessage = ref; const refContent = ref.content || ''; if (ref.author.id === botId) { referencePrefix = `You said: ${refContent}`; @@ -365,6 +367,23 @@ async function onMessage(client, cfg, message) { body.tools = tools; } + // If there are image attachments in the referenced or current message, wrap text and images into a multimodal message + const refImages = referenceMessage + ? referenceMessage.attachments.filter(att => /\.(png|jpe?g|gif|webp)$/i.test(att.name || att.url)) + : new Map(); + const currImages = message.attachments.filter(att => /\.(png|jpe?g|gif|webp)$/i.test(att.name || att.url)); + if (refImages.size > 0 || currImages.size > 0) { + // build ordered content items: text first, then referenced images, then current images + const contentItems = [{ type: 'input_text', text: userInput }]; + for (const att of refImages.values()) { + contentItems.push({ type: 'input_image', detail: 'auto', image_url: att.url }); + } + for (const att of currImages.values()) { + contentItems.push({ type: 'input_image', detail: 'auto', image_url: att.url }); + } + body.input = [{ type: 'message', role: 'user', content: contentItems }]; + } + // Call OpenAI Responses logger.debug(`Calling AI with body: ${JSON.stringify(body)}`); const resp = await client.openai.responses.create(body);