ClientX/_opt/responses.js
2025-04-30 00:30:34 +00:00

489 lines
19 KiB
JavaScript

/**
* Simplified OpenAI Responses module for Discord.
* Listens to message events, sends chat queries to the OpenAI Responses API,
* and handles text or image (function_call) outputs.
*/
import fs from 'fs/promises';
import path from 'path';
import { OpenAI } from 'openai';
import axios from 'axios';
import { AttachmentBuilder, PermissionFlagsBits } from 'discord.js';
// Discord message max length
const MAX_DISCORD_MSG_LENGTH = 2000;
/**
* Split a long message into chunks of at most maxLength, preserving code fences.
* @param {string} text - Full message text to split.
* @param {number} maxLength - Maximum length per chunk.
* @returns {string[]} Array of message chunks.
*/
function splitMessage(text, maxLength = MAX_DISCORD_MSG_LENGTH) {
const lines = text.split(/\n/);
const chunks = [];
let chunk = '';
let codeBlockOpen = false;
let codeBlockFence = '```';
for (let line of lines) {
const trimmed = line.trim();
const isFenceLine = trimmed.startsWith('```');
if (isFenceLine) {
if (!codeBlockOpen) {
codeBlockOpen = true;
codeBlockFence = trimmed;
} else if (trimmed === '```') {
// closing fence
codeBlockOpen = false;
}
}
// include the newline that was removed by split
const segment = line + '\n';
// if adding segment exceeds limit
if (chunk.length + segment.length > maxLength) {
if (chunk.length > 0) {
// close open code block if needed
if (codeBlockOpen) chunk += '\n```';
chunks.push(chunk);
// start new chunk, reopen code block if needed
chunk = codeBlockOpen ? (codeBlockFence + '\n' + segment) : segment;
continue;
}
// single segment too long, split it directly
let rest = segment;
while (rest.length > maxLength) {
let part = rest.slice(0, maxLength);
if (codeBlockOpen) part += '\n```';
chunks.push(part);
rest = codeBlockOpen ? (codeBlockFence + '\n' + rest.slice(maxLength)) : rest.slice(maxLength);
}
chunk = rest;
continue;
}
chunk += segment;
}
if (chunk) {
// close any unclosed code block
if (codeBlockOpen) chunk += '\n```';
chunks.push(chunk);
}
// remove trailing newline from each chunk
return chunks.map(c => c.endsWith('\n') ? c.slice(0, -1) : c);
}
/**
* Load AI system prompt text from a file.
* @param {string} filePath - Path to the prompt file.
* @param {object} logger - Logger instance for reporting.
* @returns {Promise<string>} Promise resolving to the prompt text or empty string.
*/
async function loadSystemPrompt(filePath, logger) {
try {
const prompt = await fs.readFile(path.resolve(filePath), 'utf8');
logger.info(`Loaded system prompt: ${filePath}`);
return prompt;
} catch (err) {
logger.error(`Failed to load system prompt: ${err.message}`);
return '';
}
}
/**
* Determine whether the bot should respond to a message.
* Triggers when the bot is mentioned or when the message is a direct reply.
* @param {Message} message - The incoming Discord message.
* @param {string} botId - The bot user ID.
* @param {object} logger - Logger for debugging.
* @returns {Promise<boolean>} True if the bot should respond.
*/
async function shouldRespond(message, botId, logger) {
if (message.author.bot || !botId) return false;
const isMention = message.mentions.users.has(botId);
let isReply = false;
if (message.reference?.messageId) {
try {
const ref = await message.channel.messages.fetch(message.reference.messageId);
isReply = ref.author.id === botId;
} catch {}
}
logger.debug(`Trigger? mention=${isMention} reply=${isReply}`);
return isMention || isReply;
}
/**
* Cache the last AI response ID for a channel or thread, for context continuity.
* @param {object} client - Discord client with pocketbase cache.
* @param {string} key - Channel or thread identifier.
* @param {string} id - The AI response ID to cache.
* @param {number} ttlSeconds - Time-to-live for the cache entry in seconds.
*/
function cacheResponse(client, key, id, ttlSeconds) {
client.pb?.cache?.set(key, id, ttlSeconds);
}
/**
* Award token usage points to a user for AI output via the scorekeeper.
* @param {object} client - Discord client with scorekeeper.
* @param {string} guildId - ID of the guild (server).
* @param {string} userId - ID of the user.
* @param {number} amount - Number of tokens to award.
*/
function awardOutput(client, guildId, userId, amount) {
if (client.scorekeeper && amount > 0) {
client.scorekeeper.addOutput(guildId, userId, amount)
.catch(err => client.logger.error(`Scorekeeper error: ${err.message}`));
}
}
/**
* Handle an OpenAI 'generate_image' function call response.
* Downloads generated images, saves to disk, and replies with attachments.
* @param {object} client - Discord client instance.
* @param {Message} message - The original Discord message to reply to.
* @param {object} resp - OpenAI Responses API response containing function_call.
* @param {object} cfg - Module configuration for image generation.
* @returns {Promise<boolean>} True if the function call was handled.
*/
async function handleImage(client, message, resp, cfg) {
const calls = Array.isArray(resp.output) ? resp.output : [];
const fn = calls.find(o => o.type === 'function_call' && o.name === 'generate_image');
if (!fn?.arguments) return false;
client.logger.debug(`Image function args: ${fn.arguments}`);
let args;
try { args = JSON.parse(fn.arguments); } catch { return false; }
if (!args.prompt?.trim()) {
await message.reply('Cannot generate image: empty prompt.');
return true;
}
// Use image model defined in config
const model = cfg.imageGeneration.defaultModel;
const promptText = args.prompt;
// Determine number of images (1-10); DALL·E-3 only supports 1
let count = 1;
if (args.n != null) {
const nVal = typeof args.n === 'number' ? args.n : parseInt(args.n, 10);
if (!Number.isNaN(nVal)) count = nVal;
}
// clamp between 1 and 10
count = Math.max(1, Math.min(10, count));
if (model === 'dall-e-3') count = 1;
const size = args.size || 'auto';
// Determine quality based on config and model constraints
let quality = args.quality || cfg.imageGeneration.defaultQuality;
if (model === 'gpt-image-1') {
if (!['low', 'medium', 'high', 'auto'].includes(quality)) quality = 'auto';
} else if (model === 'dall-e-2') {
quality = 'standard';
} else if (model === 'dall-e-3') {
if (!['standard', 'hd', 'auto'].includes(quality)) quality = 'standard';
}
const background = args.background;
const moderation = args.moderation;
const outputFormat = args.output_format;
const compression = args.output_compression;
const style = args.style;
const user = args.user || message.author.id;
try {
// Build generate parameters
const genParams = { model, prompt: promptText, n: count, size, quality, user };
// response_format supported for DALL·E models (not gpt-image-1)
if (model !== 'gpt-image-1' && args.response_format) {
genParams['response_format'] = args.response_format;
}
// gpt-image-1 supports background, moderation, output_format, and output_compression
if (model === 'gpt-image-1') {
if (background) genParams['background'] = background;
if (moderation) genParams['moderation'] = moderation;
if (outputFormat) {
genParams['output_format'] = outputFormat;
// only support compression for JPEG or WEBP formats
if (['jpeg','webp'].includes(outputFormat) && typeof compression === 'number') {
genParams['output_compression'] = compression;
}
}
}
// dall-e-3 supports style
if (model === 'dall-e-3' && style) {
genParams['style'] = style;
}
// Generate images via OpenAI Images API
const imgRes = await client.openai.images.generate(genParams);
const images = imgRes.data || [];
if (!images.length) throw new Error('No images generated');
// Ensure save directory exists
const dir = cfg.imageGeneration?.imageSavePath || './images';
await fs.mkdir(dir, { recursive: true });
const attachments = [];
const outputs = [];
// Process each generated image
for (let i = 0; i < images.length; i++) {
const img = images[i];
let buffer, ext = outputFormat || 'png';
if (img.b64_json) {
buffer = Buffer.from(img.b64_json, 'base64');
outputs.push({ b64_json: img.b64_json });
} else if (img.url) {
const dl = await axios.get(img.url, { responseType: 'arraybuffer' });
buffer = Buffer.from(dl.data);
// derive extension from URL if possible
const parsed = path.extname(img.url.split('?')[0]).replace(/^[.]/, '');
if (parsed) ext = parsed;
outputs.push({ url: img.url });
} else {
throw new Error('No image data');
}
const filename = `${message.author.id}-${Date.now()}-${i}.${ext}`;
const filePath = path.join(dir, filename);
await fs.writeFile(filePath, buffer);
client.logger.info(`Saved image: ${filePath}`);
attachments.push(new AttachmentBuilder(buffer, { name: filename }));
}
// Reply with attachments
await message.reply({ content: promptText, files: attachments });
} catch (err) {
client.logger.error(`Image error: ${err.message}`);
await message.reply(`Image generation error: ${err.message}`);
}
return true;
}
/**
* Main message handler for Discord 'messageCreate' events.
* Sends user messages to the AI, handles text or image responses, and awards tokens.
* @param {object} client - Discord client instance.
* @param {object} cfg - Module configuration.
* @param {Message} message - Incoming Discord message.
*/
async function onMessage(client, cfg, message) {
const logger = client.logger;
const botId = client.user?.id;
if (!(await shouldRespond(message, botId, logger))) return;
await message.channel.sendTyping();
// Determine channel/thread key for context
const key = message.thread?.id || message.channel.id;
// Initialize per-channel lock map
const lockMap = client._responseLockMap || (client._responseLockMap = new Map());
// Get last pending promise for this key
const last = lockMap.get(key) || Promise.resolve();
// Handler to run in sequence
const handler = async () => {
try {
// Previous response ID for context continuity
const prev = client.pb?.cache?.get(key);
// Enforce minimum score to use AI responses
// Enforce minimum score to use AI responses, but allow guild admins
try {
const isAdmin = message.member?.permissions?.has(PermissionFlagsBits.Administrator);
const scoreData = await client.scorekeeper.getScore(message.guild.id, message.author.id);
if (!isAdmin && scoreData.totalScore < cfg.minScore) {
await message.reply(
`You need an I/O score of at least ${cfg.minScore} to use AI responses. Your current I/O score is ${scoreData.totalScore.toFixed(2)}.`
);
return;
}
} catch (err) {
client.logger.error(`Error checking score: ${err.message}`);
}
// Build request body, prefixing with a mention of who spoke
const speakerMention = `<@${message.author.id}>`;
const body = {
model: cfg.defaultModel,
instructions: client.responsesSystemPrompt,
input: `${speakerMention} said to you: ${message.content}`,
previous_response_id: prev,
max_output_tokens: cfg.defaultMaxTokens,
temperature: cfg.defaultTemperature,
};
// Assemble any enabled tools
const tools = [];
if (cfg.tools?.imageGeneration) {
const model = cfg.imageGeneration.defaultModel;
// Configure allowed sizes per model
let sizeEnum;
switch (model) {
case 'gpt-image-1': sizeEnum = ['auto','1024x1024','1536x1024','1024x1536']; break;
case 'dall-e-2': sizeEnum = ['256x256','512x512','1024x1024']; break;
case 'dall-e-3': sizeEnum = ['auto','1024x1024','1792x1024','1024x1792']; break;
default: sizeEnum = ['auto','1024x1024'];
}
// Configure quality options per model
let qualityEnum;
switch (model) {
case 'gpt-image-1': qualityEnum = ['auto','low','medium','high']; break;
case 'dall-e-2': qualityEnum = ['standard']; break;
case 'dall-e-3': qualityEnum = ['auto','standard','hd']; break;
default: qualityEnum = ['auto','standard'];
}
// Build schema properties dynamically
const properties = {
prompt: { type: 'string', description: 'Text description of desired image(s).' },
n: { type: 'number', description: 'Number of images to generate.' },
size: { type: 'string', enum: sizeEnum, description: 'Image size.' },
quality: { type: 'string', enum: qualityEnum, description: 'Image quality.' },
user: { type: 'string', description: 'Unique end-user identifier.' }
};
if (model !== 'gpt-image-1') {
properties.response_format = { type: 'string', enum: ['url','b64_json'], description: 'Format of returned images.' };
}
if (model === 'gpt-image-1') {
properties.background = { type: 'string', enum: ['transparent','opaque','auto'], description: 'Background transparency.' };
properties.moderation = { type: 'string', enum: ['low','auto'], description: 'Content moderation level.' };
properties.output_format = { type: 'string', enum: ['png','jpeg','webp'], description: 'Output image format.' };
properties.output_compression = { type: 'number', description: 'Compression level (0-100).' };
}
if (model === 'dall-e-3') {
properties.style = { type: 'string', enum: ['vivid','natural'], description: 'Style option for dall-e-3.' };
}
// Determine required fields
const required = ['prompt','n','size','quality','user'];
if (model !== 'gpt-image-1') required.push('response_format');
if (model === 'gpt-image-1') required.push('background','moderation','output_format','output_compression');
if (model === 'dall-e-3') required.push('style');
// Register the function tool
tools.push({
type: 'function',
name: 'generate_image',
description: `Generate images using model ${model} with requested parameters.`,
parameters: {
type: 'object',
properties,
required,
additionalProperties: false
},
strict: true
});
}
if (cfg.tools?.webSearch) {
tools.push({ type: 'web_search_preview' });
}
if (tools.length) {
body.tools = tools;
}
// Call OpenAI Responses
logger.debug(`Calling AI with body: ${JSON.stringify(body)}`);
const resp = await client.openai.responses.create(body);
logger.info(`AI response id=${resp.id}`);
// Award tokens for the AI chat response
const chatTokens = resp.usage?.total_tokens ?? resp.usage?.completion_tokens ?? 0;
awardOutput(client, message.guild.id, message.author.id, chatTokens);
// Cache response ID if not a function call
const isFuncCall = Array.isArray(resp.output) && resp.output.some(o => o.type === 'function_call');
if (!isFuncCall && resp.id && cfg.conversationExpiry) {
cacheResponse(client, key, resp.id, Math.floor(cfg.conversationExpiry / 1000));
}
// Handle image function call if present
if (await handleImage(client, message, resp, cfg)) return;
// Otherwise reply with text
const text = resp.output_text?.trim();
if (text) {
const parts = splitMessage(text, MAX_DISCORD_MSG_LENGTH);
for (const part of parts) {
await message.reply(part);
}
}
} catch (err) {
logger.error(`Queued onMessage error for ${key}: ${err.message}`);
}
};
// Chain the handler to the last promise
const next = last.then(handler).catch(err => logger.error(err));
lockMap.set(key, next);
// Queue enqueued; handler will send response when its turn arrives
return;
// Call OpenAI Responses
let resp;
try {
logger.debug(`Calling AI with body: ${JSON.stringify(body)}`);
resp = await client.openai.responses.create(body);
logger.info(`AI response id=${resp.id}`);
// Award tokens for the AI chat response immediately (captures token usage even if image follows)
const chatTokens = resp.usage?.total_tokens ?? resp.usage?.completion_tokens ?? 0;
awardOutput(client, message.guild.id, message.author.id, chatTokens);
} catch (err) {
logger.error(`AI error: ${err.message}`);
return message.reply('Error generating response.');
}
// Cache for next turn only if this was a text response
const isFuncCall = Array.isArray(resp.output) && resp.output.some(o => o.type === 'function_call');
if (!isFuncCall && resp.id && cfg.conversationExpiry) {
cacheResponse(client, key, resp.id, Math.floor(cfg.conversationExpiry / 1000));
}
// Handle image function call if present
if (await handleImage(client, message, resp, cfg)) return;
// Otherwise reply with text (split if over Discord limit)
const text = resp.output_text?.trim();
if (text) {
const parts = splitMessage(text, MAX_DISCORD_MSG_LENGTH);
for (const part of parts) {
await message.reply(part);
}
}
}
/**
* Send a standalone narrative via the Responses API to a Discord channel or thread.
* @param {object} client - Discord client instance.
* @param {object} cfg - Module configuration.
* @param {string} channelId - Target channel or thread ID.
* @param {string} text - Narrative prompt text.
*/
export async function sendNarrative(client, cfg, channelId, text) {
const logger = client.logger;
try {
// Build the narrative instructions
const instructions = `${client.responsesSystemPrompt}\n\nGenerate the following as an engaging narrative:`;
const body = {
model: cfg.defaultModel,
instructions,
input: text,
max_output_tokens: cfg.defaultMaxTokens,
temperature: cfg.defaultTemperature,
};
logger.debug('sendNarrative: calling AI with body', body);
const resp = await client.openai.responses.create(body);
logger.info(`sendNarrative AI response id=${resp.id}`);
// Fetch the target channel or thread
const channel = await client.channels.fetch(channelId);
if (!channel || typeof channel.send !== 'function') {
logger.error(`sendNarrative: cannot send to channel ID ${channelId}`);
return;
}
// Split the output and send
const content = resp.output_text?.trim();
if (content) {
const parts = splitMessage(content, MAX_DISCORD_MSG_LENGTH);
for (const part of parts) {
await channel.send(part);
}
}
} catch (err) {
client.logger.error(`sendNarrative error: ${err.message}`);
}
}
/**
* Initialize the Responses module:
* - Load the system prompt.
* - Configure the OpenAI client.
* - Register the message event handler.
* @param {object} client - Discord client instance.
* @param {object} clientConfig - Full client configuration object.
*/
export async function init(client, clientConfig) {
const cfg = clientConfig.responses;
client.logger.info('Initializing Responses module');
client.responsesSystemPrompt = await loadSystemPrompt(cfg.systemPromptPath, client.logger);
client.openai = new OpenAI({ apiKey: cfg.apiKey });
client.on('messageCreate', m => onMessage(client, cfg, m));
client.logger.info('Responses module ready');
}