Responses updates.

This commit is contained in:
jrmyr 2025-04-27 19:28:50 +00:00
parent c533f74b32
commit 2eeb70b114
2 changed files with 328 additions and 154 deletions

View File

@ -1,5 +1,8 @@
// _opt/responses.js
// Simplified OpenAI Responses module with clear flow and context management
/**
* Simplified OpenAI Responses module for Discord.
* Listens to message events, sends chat queries to the OpenAI Responses API,
* and handles text or image (function_call) outputs.
*/
import fs from 'fs/promises';
import path from 'path';
import { OpenAI } from 'openai';
@ -10,10 +13,10 @@ import { AttachmentBuilder } from 'discord.js';
const MAX_DISCORD_MSG_LENGTH = 2000;
/**
* Split a long message into chunks <= maxLength, preserving code fences by closing and reopening.
* @param {string} text
* @param {number} maxLength
* @returns {string[]}
* Split a long message into chunks of at most maxLength, preserving code fences.
* @param {string} text - Full message text to split.
* @param {number} maxLength - Maximum length per chunk.
* @returns {string[]} Array of message chunks.
*/
function splitMessage(text, maxLength = MAX_DISCORD_MSG_LENGTH) {
const lines = text.split(/\n/);
@ -68,7 +71,10 @@ function splitMessage(text, maxLength = MAX_DISCORD_MSG_LENGTH) {
}
/**
* Load the system prompt from disk.
* Load AI system prompt text from a file.
* @param {string} filePath - Path to the prompt file.
* @param {object} logger - Logger instance for reporting.
* @returns {Promise<string>} Promise resolving to the prompt text or empty string.
*/
async function loadSystemPrompt(filePath, logger) {
try {
@ -82,9 +88,12 @@ async function loadSystemPrompt(filePath, logger) {
}
/**
* Determine if the bot should respond:
* - Mentioned
* - Direct reply
* Determine whether the bot should respond to a message.
* Triggers when the bot is mentioned or when the message is a direct reply.
* @param {Message} message - The incoming Discord message.
* @param {string} botId - The bot user ID.
* @param {object} logger - Logger for debugging.
* @returns {Promise<boolean>} True if the bot should respond.
*/
async function shouldRespond(message, botId, logger) {
if (message.author.bot || !botId) return false;
@ -101,14 +110,22 @@ async function shouldRespond(message, botId, logger) {
}
/**
* Cache the last response ID for context continuity.
* Cache the last AI response ID for a channel or thread, for context continuity.
* @param {object} client - Discord client with pocketbase cache.
* @param {string} key - Channel or thread identifier.
* @param {string} id - The AI response ID to cache.
* @param {number} ttlSeconds - Time-to-live for the cache entry in seconds.
*/
function cacheResponse(client, key, id, ttlSeconds) {
client.pb?.cache?.set(key, id, ttlSeconds);
}
/**
* Award output tokens via the scorekeeper module.
* Award token usage points to a user for AI output via the scorekeeper.
* @param {object} client - Discord client with scorekeeper.
* @param {string} guildId - ID of the guild (server).
* @param {string} userId - ID of the user.
* @param {number} amount - Number of tokens to award.
*/
function awardOutput(client, guildId, userId, amount) {
if (client.scorekeeper && amount > 0) {
@ -118,8 +135,13 @@ function awardOutput(client, guildId, userId, amount) {
}
/**
* Handle image generation function calls.
* Returns true if an image was handled and replied.
* Handle an OpenAI 'generate_image' function call response.
* Downloads generated images, saves to disk, and replies with attachments.
* @param {object} client - Discord client instance.
* @param {Message} message - The original Discord message to reply to.
* @param {object} resp - OpenAI Responses API response containing function_call.
* @param {object} cfg - Module configuration for image generation.
* @returns {Promise<boolean>} True if the function call was handled.
*/
async function handleImage(client, message, resp, cfg) {
const calls = Array.isArray(resp.output) ? resp.output : [];
@ -132,69 +154,91 @@ async function handleImage(client, message, resp, cfg) {
await message.reply('Cannot generate image: empty prompt.');
return true;
}
// Determine image size based on aspect: square, landscape, or portrait
// Square will always use 1024x1024
let size;
switch (args.aspect) {
case 'landscape':
size = '1792x1024';
break;
case 'portrait':
size = '1024x1792';
break;
case 'square':
size = '1024x1024';
break;
default:
size = '1024x1024';
// Use image model defined in config
const model = cfg.imageGeneration.defaultModel;
const promptText = args.prompt;
// Determine number of images (1-10); DALL·E-3 only supports 1
let count = 1;
if (args.n != null) {
const nVal = typeof args.n === 'number' ? args.n : parseInt(args.n, 10);
if (!Number.isNaN(nVal)) count = nVal;
}
// Determine image quality, defaulting to cfg.imageGeneration.defaultQuality
const quality = ['standard', 'hd'].includes(args.quality)
? args.quality
: cfg.imageGeneration.defaultQuality;
// clamp between 1 and 10
count = Math.max(1, Math.min(10, count));
if (model === 'dall-e-3') count = 1;
const size = args.size || 'auto';
// Determine quality based on config and model constraints
let quality = args.quality || cfg.imageGeneration.defaultQuality;
if (model === 'gpt-image-1') {
if (!['low', 'medium', 'high', 'auto'].includes(quality)) quality = 'auto';
} else if (model === 'dall-e-2') {
quality = 'standard';
} else if (model === 'dall-e-3') {
if (!['standard', 'hd', 'auto'].includes(quality)) quality = 'standard';
}
const background = args.background;
const moderation = args.moderation;
const outputFormat = args.output_format;
const compression = args.output_compression;
const style = args.style;
const user = args.user || message.author.id;
try {
// Generate image via OpenAI
const imgRes = await client.openai.images.generate({ model: 'dall-e-3', prompt: args.prompt, quality: quality, size: size, n: 1 });
const url = imgRes.data?.[0]?.url;
if (!url) throw new Error('No image URL');
// Download and save locally
const dl = await axios.get(url, { responseType: 'arraybuffer' });
const buf = Buffer.from(dl.data);
const filename = `${message.author.id}-${Date.now()}.png`;
const dir = cfg.imageGeneration.imageSavePath || './images';
await fs.mkdir(dir, { recursive: true });
const filePath = path.join(dir, filename);
await fs.writeFile(filePath, buf);
client.logger.info(`Saved image: ${filePath}`);
// Reply with attachment
const attachment = new AttachmentBuilder(buf, { name: filename });
await message.reply({ content: args.prompt, files: [attachment] });
// Follow-up recap to preserve conversation context (submit function tool output)
try {
const convKey = message.thread?.id || message.channel.id;
// Build a function_call_output input item for the Responses API
const toolOutputItem = {
type: 'function_call_output',
call_id: fn.call_id,
output: JSON.stringify({ url }),
};
const recapBody = {
model: cfg.defaultModel,
// re-use original system/developer instructions
instructions: client.responsesSystemPrompt,
previous_response_id: resp.id,
input: [toolOutputItem],
max_output_tokens: Math.min(100, cfg.defaultMaxTokens),
temperature: cfg.defaultTemperature,
};
const recapResp = await client.openai.responses.create(recapBody);
cacheResponse(client, convKey, recapResp.id, Math.floor(cfg.conversationExpiry / 1000));
// Award tokens for the recap chat response
const recapTokens = recapResp.usage?.total_tokens ?? recapResp.usage?.completion_tokens ?? 0;
awardOutput(client, message.guild.id, message.author.id, recapTokens);
} catch (err) {
client.logger.error(`Recap failed: ${err.message}`);
// Build generate parameters
const genParams = { model, prompt: promptText, n: count, size, quality, user };
// response_format supported for DALL·E models (not gpt-image-1)
if (model !== 'gpt-image-1' && args.response_format) {
genParams['response_format'] = args.response_format;
}
// gpt-image-1 supports background, moderation, output_format, and output_compression
if (model === 'gpt-image-1') {
if (background) genParams['background'] = background;
if (moderation) genParams['moderation'] = moderation;
if (outputFormat) {
genParams['output_format'] = outputFormat;
// only support compression for JPEG or WEBP formats
if (['jpeg','webp'].includes(outputFormat) && typeof compression === 'number') {
genParams['output_compression'] = compression;
}
}
}
// dall-e-3 supports style
if (model === 'dall-e-3' && style) {
genParams['style'] = style;
}
// Generate images via OpenAI Images API
const imgRes = await client.openai.images.generate(genParams);
const images = imgRes.data || [];
if (!images.length) throw new Error('No images generated');
// Ensure save directory exists
const dir = cfg.imageGeneration?.imageSavePath || './images';
await fs.mkdir(dir, { recursive: true });
const attachments = [];
const outputs = [];
// Process each generated image
for (let i = 0; i < images.length; i++) {
const img = images[i];
let buffer, ext = outputFormat || 'png';
if (img.b64_json) {
buffer = Buffer.from(img.b64_json, 'base64');
outputs.push({ b64_json: img.b64_json });
} else if (img.url) {
const dl = await axios.get(img.url, { responseType: 'arraybuffer' });
buffer = Buffer.from(dl.data);
// derive extension from URL if possible
const parsed = path.extname(img.url.split('?')[0]).replace(/^[.]/, '');
if (parsed) ext = parsed;
outputs.push({ url: img.url });
} else {
throw new Error('No image data');
}
const filename = `${message.author.id}-${Date.now()}-${i}.${ext}`;
const filePath = path.join(dir, filename);
await fs.writeFile(filePath, buffer);
client.logger.info(`Saved image: ${filePath}`);
attachments.push(new AttachmentBuilder(buffer, { name: filename }));
}
// Reply with attachments
await message.reply({ content: promptText, files: attachments });
} catch (err) {
client.logger.error(`Image error: ${err.message}`);
await message.reply(`Image generation error: ${err.message}`);
@ -203,12 +247,11 @@ async function handleImage(client, message, resp, cfg) {
}
/**
* Main message handler:
* 1. Determine if bot should respond
* 2. Build and send AI request
* 3. Cache response ID
* 4. Handle image or text reply
* 5. Award output points
* Main message handler for Discord 'messageCreate' events.
* Sends user messages to the AI, handles text or image responses, and awards tokens.
* @param {object} client - Discord client instance.
* @param {object} cfg - Module configuration.
* @param {Message} message - Incoming Discord message.
*/
async function onMessage(client, cfg, message) {
const logger = client.logger;
@ -252,19 +295,58 @@ async function onMessage(client, cfg, message) {
// Assemble any enabled tools
const tools = [];
if (cfg.tools?.imageGeneration) {
const model = cfg.imageGeneration.defaultModel;
// Configure allowed sizes per model
let sizeEnum;
switch (model) {
case 'gpt-image-1': sizeEnum = ['auto','1024x1024','1536x1024','1024x1536']; break;
case 'dall-e-2': sizeEnum = ['256x256','512x512','1024x1024']; break;
case 'dall-e-3': sizeEnum = ['auto','1024x1024','1792x1024','1024x1792']; break;
default: sizeEnum = ['auto','1024x1024'];
}
// Configure quality options per model
let qualityEnum;
switch (model) {
case 'gpt-image-1': qualityEnum = ['auto','low','medium','high']; break;
case 'dall-e-2': qualityEnum = ['standard']; break;
case 'dall-e-3': qualityEnum = ['auto','standard','hd']; break;
default: qualityEnum = ['auto','standard'];
}
// Build schema properties dynamically
const properties = {
prompt: { type: 'string', description: 'Text description of desired image(s).' },
n: { type: 'number', description: 'Number of images to generate.' },
size: { type: 'string', enum: sizeEnum, description: 'Image size.' },
quality: { type: 'string', enum: qualityEnum, description: 'Image quality.' },
user: { type: 'string', description: 'Unique end-user identifier.' }
};
if (model !== 'gpt-image-1') {
properties.response_format = { type: 'string', enum: ['url','b64_json'], description: 'Format of returned images.' };
}
if (model === 'gpt-image-1') {
properties.background = { type: 'string', enum: ['transparent','opaque','auto'], description: 'Background transparency.' };
properties.moderation = { type: 'string', enum: ['low','auto'], description: 'Content moderation level.' };
properties.output_format = { type: 'string', enum: ['png','jpeg','webp'], description: 'Output image format.' };
properties.output_compression = { type: 'number', description: 'Compression level (0-100).' };
}
if (model === 'dall-e-3') {
properties.style = { type: 'string', enum: ['vivid','natural'], description: 'Style option for dall-e-3.' };
}
// Determine required fields
const required = ['prompt','n','size','quality','user'];
if (model !== 'gpt-image-1') required.push('response_format');
if (model === 'gpt-image-1') required.push('background','moderation','output_format','output_compression');
if (model === 'dall-e-3') required.push('style');
// Register the function tool
tools.push({
type: 'function',
name: 'generate_image',
description: 'Generate an image with a given prompt, aspect, and quality.',
description: `Generate images using model ${model} with requested parameters.`,
parameters: {
type: 'object',
properties: {
prompt: { type: 'string' },
aspect: { type: 'string', enum: ['square','portrait','landscape'] },
quality: { type: 'string', enum: ['standard', 'hd'] },
},
required: ['prompt','aspect','quality'],
additionalProperties: false,
properties,
required,
additionalProperties: false
},
strict: true
});
@ -346,11 +428,11 @@ async function onMessage(client, cfg, message) {
}
/**
* Send a narrative response to a specific Discord channel or thread.
* @param {import('discord.js').Client} client - Discord client instance.
* @param {Object} cfg - Responses module configuration.
* @param {string} channelId - ID of the Discord channel or thread.
* @param {string} text - Narrative input text to process.
* Send a standalone narrative via the Responses API to a Discord channel or thread.
* @param {object} client - Discord client instance.
* @param {object} cfg - Module configuration.
* @param {string} channelId - Target channel or thread ID.
* @param {string} text - Narrative prompt text.
*/
export async function sendNarrative(client, cfg, channelId, text) {
const logger = client.logger;
@ -387,7 +469,12 @@ export async function sendNarrative(client, cfg, channelId, text) {
}
/**
* Initialize the Responses module
* Initialize the Responses module:
* - Load the system prompt.
* - Configure the OpenAI client.
* - Register the message event handler.
* @param {object} client - Discord client instance.
* @param {object} clientConfig - Full client configuration object.
*/
export async function init(client, clientConfig) {
const cfg = clientConfig.responses;

View File

@ -1,13 +1,18 @@
/**
* Slash command module for '/query'.
* Defines and handles the /query command via the OpenAI Responses API,
* including optional image generation function calls.
*/
import { SlashCommandBuilder, AttachmentBuilder } from 'discord.js';
import fs from 'fs/promises';
import path from 'path';
import axios from 'axios';
/**
* Split long text into Discord-safe chunks without breaking mid-line.
* @param {string} text
* @param {number} max
* @returns {string[]}
* Split long text into chunks safe for Discord messaging.
* @param {string} text - Text to split.
* @param {number} max - Max characters per chunk.
* @returns {string[]} Array of message chunks.
*/
function splitLongMessage(text, max = 2000) {
const lines = text.split('\n');
@ -26,8 +31,14 @@ function splitLongMessage(text, max = 2000) {
}
/**
* Handle 'generate_image' function calls for slash commands.
* Returns true if image was sent.
* Handle an OpenAI 'generate_image' function call in a slash-command interaction.
* Downloads generated images, saves locally, and replies with attachments.
* @param {object} client - Discord client instance.
* @param {CommandInteraction} interaction - The slash command interaction.
* @param {object} resp - OpenAI Responses API response.
* @param {object} cfg - Module configuration.
* @param {boolean} ephemeral - Whether to reply ephemerally.
* @returns {Promise<boolean>} True if a function call was handled.
*/
async function handleImageInteraction(client, interaction, resp, cfg, ephemeral) {
const calls = Array.isArray(resp.output) ? resp.output : [];
@ -40,55 +51,90 @@ async function handleImageInteraction(client, interaction, resp, cfg, ephemeral)
await interaction.editReply({ content: 'Cannot generate image: empty prompt.', ephemeral });
return true;
}
let size;
switch (args.aspect) {
case 'landscape': size = '1792x1024'; break;
case 'portrait': size = '1024x1792'; break;
case 'square': default: size = '1024x1024'; break;
// Always use image model defined in config
const model = cfg.imageGeneration.defaultModel;
const promptText = args.prompt;
// Determine number of images (1-10); DALL·E-3 only supports 1
let count = 1;
if (args.n != null) {
const nVal = typeof args.n === 'number' ? args.n : parseInt(args.n, 10);
if (!Number.isNaN(nVal)) count = nVal;
}
const quality = ['standard', 'hd'].includes(args.quality)
? args.quality
: cfg.imageGeneration.defaultQuality;
// clamp
count = Math.max(1, Math.min(10, count));
if (model === 'dall-e-3') count = 1;
const size = args.size || 'auto';
// Determine quality based on config and model constraints
let quality = args.quality || cfg.imageGeneration.defaultQuality;
if (model === 'gpt-image-1') {
if (!['low', 'medium', 'high', 'auto'].includes(quality)) quality = 'auto';
} else if (model === 'dall-e-2') {
quality = 'standard';
} else if (model === 'dall-e-3') {
if (!['standard', 'hd', 'auto'].includes(quality)) quality = 'standard';
}
const background = args.background;
const moderation = args.moderation;
const outputFormat = args.output_format;
const compression = args.output_compression;
const style = args.style;
const user = args.user || interaction.user.id;
try {
const imgRes = await client.openai.images.generate({ model: 'dall-e-3', prompt: args.prompt, quality, size, n: 1 });
const url = imgRes.data?.[0]?.url;
if (!url) throw new Error('No image URL');
const dl = await axios.get(url, { responseType: 'arraybuffer' });
const buf = Buffer.from(dl.data);
const filename = `${interaction.user.id}-${Date.now()}.png`;
const dir = cfg.imageGeneration.imageSavePath || './images';
await fs.mkdir(dir, { recursive: true });
const filePath = path.join(dir, filename);
await fs.writeFile(filePath, buf);
client.logger.info(`Saved image: ${filePath}`);
const attachment = new AttachmentBuilder(buf, { name: filename });
await interaction.editReply({ content: args.prompt, files: [attachment] });
// Recap output for context
try {
const convKey = interaction.channelId;
const toolOutputItem = {
type: 'function_call_output',
call_id: fn.call_id,
output: JSON.stringify({ url }),
};
const recapBody = {
model: cfg.defaultModel,
instructions: client.responsesSystemPrompt,
previous_response_id: resp.id,
input: [toolOutputItem],
max_output_tokens: Math.min(100, cfg.defaultMaxTokens),
temperature: cfg.defaultTemperature,
};
const recapResp = await client.openai.responses.create(recapBody);
client.pb?.cache?.set(convKey, recapResp.id, Math.floor(cfg.conversationExpiry / 1000));
const recapTokens = recapResp.usage?.total_tokens ?? recapResp.usage?.completion_tokens ?? 0;
if (client.scorekeeper && recapTokens > 0) {
client.scorekeeper.addOutput(interaction.guildId, interaction.user.id, recapTokens)
.catch(e => client.logger.error(`Scorekeeper error: ${e.message}`));
}
} catch (err) {
client.logger.error(`Recap failed: ${err.message}`);
// Build generate parameters
const genParams = { model, prompt: promptText, n: count, size, quality, user };
// response_format supported for DALL·E models (not gpt-image-1)
if (model !== 'gpt-image-1' && args.response_format) {
genParams['response_format'] = args.response_format;
}
// gpt-image-1 supports background, moderation, output_format, and output_compression
if (model === 'gpt-image-1') {
if (background) genParams['background'] = background;
if (moderation) genParams['moderation'] = moderation;
if (outputFormat) {
genParams['output_format'] = outputFormat;
// only support compression for JPEG or WEBP formats
if (['jpeg','webp'].includes(outputFormat) && typeof compression === 'number') {
genParams['output_compression'] = compression;
}
}
}
// dall-e-3 supports style
if (model === 'dall-e-3' && style) {
genParams['style'] = style;
}
// Generate images via OpenAI Images API
const imgRes = await client.openai.images.generate(genParams);
const images = imgRes.data || [];
if (!images.length) throw new Error('No images generated');
// Ensure save directory exists
const dir = cfg.imageGeneration?.imageSavePath || './images';
await fs.mkdir(dir, { recursive: true });
const attachments = [];
const outputs = [];
// Process each generated image
for (let i = 0; i < images.length; i++) {
const img = images[i];
let buffer, ext = outputFormat || 'png';
if (img.b64_json) {
buffer = Buffer.from(img.b64_json, 'base64');
outputs.push({ b64_json: img.b64_json });
} else if (img.url) {
const dl = await axios.get(img.url, { responseType: 'arraybuffer' });
buffer = Buffer.from(dl.data);
const parsed = path.extname(img.url.split('?')[0]).replace(/^[.]/, '');
if (parsed) ext = parsed;
outputs.push({ url: img.url });
} else {
throw new Error('No image data');
}
const filename = `${interaction.user.id}-${Date.now()}-${i}.${ext}`;
const filePath = path.join(dir, filename);
await fs.writeFile(filePath, buffer);
client.logger.info(`Saved image: ${filePath}`);
attachments.push(new AttachmentBuilder(buffer, { name: filename }));
}
// Reply with attachments
await interaction.editReply({ content: promptText, files: attachments });
return true;
} catch (err) {
client.logger.error(`Image generation error: ${err.message}`);
@ -103,6 +149,9 @@ async function handleImageInteraction(client, interaction, resp, cfg, ephemeral)
* prompt - Required string: the text to send to AI.
* ephemeral - Optional boolean: send response ephemerally (default: true).
*/
/**
* Slash command definitions and handlers for the '/query' command.
*/
export const commands = [
{
data: new SlashCommandBuilder()
@ -160,19 +209,57 @@ export const commands = [
// Assemble enabled tools
const tools = [];
if (cfg.tools?.imageGeneration) {
const model = cfg.imageGeneration.defaultModel;
// Configure allowed sizes per model
let sizeEnum;
switch (model) {
case 'gpt-image-1': sizeEnum = ['auto','1024x1024','1536x1024','1024x1536']; break;
case 'dall-e-2': sizeEnum = ['256x256','512x512','1024x1024']; break;
case 'dall-e-3': sizeEnum = ['auto','1024x1024','1792x1024','1024x1792']; break;
default: sizeEnum = ['auto','1024x1024'];
}
// Configure quality options per model
let qualityEnum;
switch (model) {
case 'gpt-image-1': qualityEnum = ['auto','low','medium','high']; break;
case 'dall-e-2': qualityEnum = ['standard']; break;
case 'dall-e-3': qualityEnum = ['auto','standard','hd']; break;
default: qualityEnum = ['auto','standard'];
}
// Build schema properties dynamically
const properties = {
prompt: { type: 'string', description: 'Text description of desired image(s).' },
n: { type: 'number', description: 'Number of images to generate.' },
size: { type: 'string', enum: sizeEnum, description: 'Image size.' },
quality: { type: 'string', enum: qualityEnum, description: 'Image quality.' },
user: { type: 'string', description: 'Unique end-user identifier.' }
};
if (model !== 'gpt-image-1') {
properties.response_format = { type: 'string', enum: ['url','b64_json'], description: 'Format of returned images.' };
}
if (model === 'gpt-image-1') {
properties.background = { type: 'string', enum: ['transparent','opaque','auto'], description: 'Background transparency.' };
properties.moderation = { type: 'string', enum: ['low','auto'], description: 'Content moderation level.' };
properties.output_format = { type: 'string', enum: ['png','jpeg','webp'], description: 'Output image format.' };
properties.output_compression = { type: 'number', description: 'Compression level (0-100).' };
}
if (model === 'dall-e-3') {
properties.style = { type: 'string', enum: ['vivid','natural'], description: 'Style option for dall-e-3.' };
}
// Determine required fields
const required = ['prompt','n','size','quality','user'];
if (model !== 'gpt-image-1') required.push('response_format');
if (model === 'gpt-image-1') required.push('background','moderation','output_format','output_compression');
if (model === 'dall-e-3') required.push('style');
tools.push({
type: 'function',
name: 'generate_image',
description: 'Generate an image with a given prompt, aspect, and quality.',
description: `Generate images using model ${model} with requested parameters.`,
parameters: {
type: 'object',
properties: {
prompt: { type: 'string' },
aspect: { type: 'string', enum: ['square','portrait','landscape'] },
quality: { type: 'string', enum: ['standard','hd'] },
},
required: ['prompt','aspect','quality'],
additionalProperties: false,
properties,
required,
additionalProperties: false
},
strict: true,
});