196 lines
5.5 KiB
JavaScript
196 lines
5.5 KiB
JavaScript
import OpenAI from 'openai';
|
|
import fs from 'fs';
|
|
import path from 'path';
|
|
|
|
let openai = null;
|
|
|
|
// Max characters per chunk for summarization
|
|
const MAX_CHUNK_CHARS = 30000;
|
|
|
|
/**
|
|
* Get OpenAI client (lazy initialization)
|
|
*/
|
|
function getOpenAI() {
|
|
if (!openai) {
|
|
if (!process.env.OPENAI_API_KEY) {
|
|
throw new Error('OPENAI_API_KEY environment variable is not set');
|
|
}
|
|
openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
|
|
}
|
|
return openai;
|
|
}
|
|
|
|
/**
|
|
* Summarize text using GPT-4o
|
|
*/
|
|
export async function summarizeText(text, options = {}) {
|
|
const {
|
|
model = 'gpt-5.1', // GPT-5.1 - latest OpenAI model (Nov 2025)
|
|
language = 'same', // 'same' = same as input, or specify language code
|
|
style = 'concise', // 'concise', 'detailed', 'bullet'
|
|
maxLength = null, // optional max length in words
|
|
} = options;
|
|
|
|
const client = getOpenAI();
|
|
|
|
let styleInstruction = '';
|
|
switch (style) {
|
|
case 'detailed':
|
|
styleInstruction = 'Provide a detailed summary that captures all important points and nuances.';
|
|
break;
|
|
case 'bullet':
|
|
styleInstruction = 'Provide the summary as bullet points, highlighting the key points.';
|
|
break;
|
|
case 'concise':
|
|
default:
|
|
styleInstruction = 'Provide a concise summary that captures the main points.';
|
|
}
|
|
|
|
let languageInstruction = '';
|
|
if (language === 'same') {
|
|
languageInstruction = 'Write the summary in the same language as the input text.';
|
|
} else {
|
|
languageInstruction = `Write the summary in ${language}.`;
|
|
}
|
|
|
|
let lengthInstruction = '';
|
|
if (maxLength) {
|
|
lengthInstruction = `Keep the summary under ${maxLength} words.`;
|
|
}
|
|
|
|
const systemPrompt = `You are an expert summarizer. ${styleInstruction} ${languageInstruction} ${lengthInstruction}
|
|
Focus on the most important information and main ideas. Be accurate and objective.`;
|
|
|
|
// Handle long texts by chunking
|
|
if (text.length > MAX_CHUNK_CHARS) {
|
|
return await summarizeLongText(text, { model, systemPrompt, style });
|
|
}
|
|
|
|
const response = await client.chat.completions.create({
|
|
model,
|
|
messages: [
|
|
{ role: 'system', content: systemPrompt },
|
|
{ role: 'user', content: `Please summarize the following text:\n\n${text}` },
|
|
],
|
|
temperature: 0.3,
|
|
});
|
|
|
|
return {
|
|
summary: response.choices[0].message.content,
|
|
model,
|
|
style,
|
|
inputLength: text.length,
|
|
chunks: 1,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Summarize long text by chunking and combining summaries
|
|
*/
|
|
async function summarizeLongText(text, options) {
|
|
const { model, systemPrompt, style } = options;
|
|
const client = getOpenAI();
|
|
|
|
// Split into chunks
|
|
const chunks = [];
|
|
let currentChunk = '';
|
|
const sentences = text.split(/(?<=[.!?。!?\n])\s*/);
|
|
|
|
for (const sentence of sentences) {
|
|
if ((currentChunk + sentence).length > MAX_CHUNK_CHARS && currentChunk) {
|
|
chunks.push(currentChunk.trim());
|
|
currentChunk = sentence;
|
|
} else {
|
|
currentChunk += ' ' + sentence;
|
|
}
|
|
}
|
|
if (currentChunk.trim()) {
|
|
chunks.push(currentChunk.trim());
|
|
}
|
|
|
|
console.log(`Summarizing ${chunks.length} chunks...`);
|
|
|
|
// Summarize each chunk
|
|
const chunkSummaries = [];
|
|
for (let i = 0; i < chunks.length; i++) {
|
|
console.log(`[${i + 1}/${chunks.length}] Summarizing chunk...`);
|
|
const response = await client.chat.completions.create({
|
|
model,
|
|
messages: [
|
|
{ role: 'system', content: systemPrompt },
|
|
{ role: 'user', content: `Please summarize the following text (part ${i + 1} of ${chunks.length}):\n\n${chunks[i]}` },
|
|
],
|
|
temperature: 0.3,
|
|
});
|
|
chunkSummaries.push(response.choices[0].message.content);
|
|
}
|
|
|
|
// Combine summaries if multiple chunks
|
|
if (chunkSummaries.length === 1) {
|
|
return {
|
|
summary: chunkSummaries[0],
|
|
model,
|
|
style,
|
|
inputLength: text.length,
|
|
chunks: 1,
|
|
};
|
|
}
|
|
|
|
// Create final combined summary
|
|
const combinedText = chunkSummaries.join('\n\n---\n\n');
|
|
const finalResponse = await client.chat.completions.create({
|
|
model,
|
|
messages: [
|
|
{ role: 'system', content: `You are an expert summarizer. Combine and synthesize the following partial summaries into a single coherent ${style} summary. Remove redundancy and ensure a smooth flow.` },
|
|
{ role: 'user', content: `Please combine these summaries into one:\n\n${combinedText}` },
|
|
],
|
|
temperature: 0.3,
|
|
});
|
|
|
|
return {
|
|
summary: finalResponse.choices[0].message.content,
|
|
model,
|
|
style,
|
|
inputLength: text.length,
|
|
chunks: chunks.length,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Summarize a text file
|
|
*/
|
|
export async function summarizeFile(filePath, options = {}) {
|
|
if (!fs.existsSync(filePath)) {
|
|
throw new Error(`File not found: ${filePath}`);
|
|
}
|
|
|
|
const { outputDir, ...otherOptions } = options;
|
|
|
|
const text = fs.readFileSync(filePath, 'utf-8');
|
|
const result = await summarizeText(text, otherOptions);
|
|
|
|
// Save summary to file
|
|
const dir = outputDir || path.dirname(filePath);
|
|
const baseName = path.basename(filePath, path.extname(filePath));
|
|
const summaryPath = path.join(dir, `${baseName}_summary.txt`);
|
|
|
|
fs.writeFileSync(summaryPath, result.summary, 'utf-8');
|
|
|
|
return {
|
|
...result,
|
|
filePath,
|
|
summaryPath,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Get available summary styles
|
|
*/
|
|
export function getSummaryStyles() {
|
|
return {
|
|
concise: 'A brief summary capturing main points',
|
|
detailed: 'A comprehensive summary with nuances',
|
|
bullet: 'Key points as bullet points',
|
|
};
|
|
}
|