diff --git a/_opt/responses.js b/_opt/responses.js index 6b60bec..a8ca383 100644 --- a/_opt/responses.js +++ b/_opt/responses.js @@ -274,9 +274,11 @@ async function onMessage(client, cfg, message) { } // Build request body, including replied-to message context and mention of who spoke let referencePrefix = ''; + let referenceMessage = null; if (message.reference?.messageId) { try { const ref = await message.channel.messages.fetch(message.reference.messageId); + referenceMessage = ref; const refContent = ref.content || ''; if (ref.author.id === botId) { referencePrefix = `You said: ${refContent}`; @@ -365,6 +367,23 @@ async function onMessage(client, cfg, message) { body.tools = tools; } + // If there are image attachments in the referenced or current message, wrap text and images into a multimodal message + const refImages = referenceMessage + ? referenceMessage.attachments.filter(att => /\.(png|jpe?g|gif|webp)$/i.test(att.name || att.url)) + : new Map(); + const currImages = message.attachments.filter(att => /\.(png|jpe?g|gif|webp)$/i.test(att.name || att.url)); + if (refImages.size > 0 || currImages.size > 0) { + // build ordered content items: text first, then referenced images, then current images + const contentItems = [{ type: 'input_text', text: userInput }]; + for (const att of refImages.values()) { + contentItems.push({ type: 'input_image', detail: 'auto', image_url: att.url }); + } + for (const att of currImages.values()) { + contentItems.push({ type: 'input_image', detail: 'auto', image_url: att.url }); + } + body.input = [{ type: 'message', role: 'user', content: contentItems }]; + } + // Call OpenAI Responses logger.debug(`Calling AI with body: ${JSON.stringify(body)}`); const resp = await client.openai.responses.create(body);