videotomp3transcriptor/src/services/summarize.js
2025-12-04 20:57:51 +08:00

196 lines
5.5 KiB
JavaScript

import OpenAI from 'openai';
import fs from 'fs';
import path from 'path';
let openai = null;
// Max characters per chunk for summarization
const MAX_CHUNK_CHARS = 30000;
/**
* Get OpenAI client (lazy initialization)
*/
function getOpenAI() {
if (!openai) {
if (!process.env.OPENAI_API_KEY) {
throw new Error('OPENAI_API_KEY environment variable is not set');
}
openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
}
return openai;
}
/**
* Summarize text using GPT-4o
*/
export async function summarizeText(text, options = {}) {
const {
model = 'gpt-5.1', // GPT-5.1 - latest OpenAI model (Nov 2025)
language = 'same', // 'same' = same as input, or specify language code
style = 'concise', // 'concise', 'detailed', 'bullet'
maxLength = null, // optional max length in words
} = options;
const client = getOpenAI();
let styleInstruction = '';
switch (style) {
case 'detailed':
styleInstruction = 'Provide a detailed summary that captures all important points and nuances.';
break;
case 'bullet':
styleInstruction = 'Provide the summary as bullet points, highlighting the key points.';
break;
case 'concise':
default:
styleInstruction = 'Provide a concise summary that captures the main points.';
}
let languageInstruction = '';
if (language === 'same') {
languageInstruction = 'Write the summary in the same language as the input text.';
} else {
languageInstruction = `Write the summary in ${language}.`;
}
let lengthInstruction = '';
if (maxLength) {
lengthInstruction = `Keep the summary under ${maxLength} words.`;
}
const systemPrompt = `You are an expert summarizer. ${styleInstruction} ${languageInstruction} ${lengthInstruction}
Focus on the most important information and main ideas. Be accurate and objective.`;
// Handle long texts by chunking
if (text.length > MAX_CHUNK_CHARS) {
return await summarizeLongText(text, { model, systemPrompt, style });
}
const response = await client.chat.completions.create({
model,
messages: [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: `Please summarize the following text:\n\n${text}` },
],
temperature: 0.3,
});
return {
summary: response.choices[0].message.content,
model,
style,
inputLength: text.length,
chunks: 1,
};
}
/**
* Summarize long text by chunking and combining summaries
*/
async function summarizeLongText(text, options) {
const { model, systemPrompt, style } = options;
const client = getOpenAI();
// Split into chunks
const chunks = [];
let currentChunk = '';
const sentences = text.split(/(?<=[.!?。!?\n])\s*/);
for (const sentence of sentences) {
if ((currentChunk + sentence).length > MAX_CHUNK_CHARS && currentChunk) {
chunks.push(currentChunk.trim());
currentChunk = sentence;
} else {
currentChunk += ' ' + sentence;
}
}
if (currentChunk.trim()) {
chunks.push(currentChunk.trim());
}
console.log(`Summarizing ${chunks.length} chunks...`);
// Summarize each chunk
const chunkSummaries = [];
for (let i = 0; i < chunks.length; i++) {
console.log(`[${i + 1}/${chunks.length}] Summarizing chunk...`);
const response = await client.chat.completions.create({
model,
messages: [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: `Please summarize the following text (part ${i + 1} of ${chunks.length}):\n\n${chunks[i]}` },
],
temperature: 0.3,
});
chunkSummaries.push(response.choices[0].message.content);
}
// Combine summaries if multiple chunks
if (chunkSummaries.length === 1) {
return {
summary: chunkSummaries[0],
model,
style,
inputLength: text.length,
chunks: 1,
};
}
// Create final combined summary
const combinedText = chunkSummaries.join('\n\n---\n\n');
const finalResponse = await client.chat.completions.create({
model,
messages: [
{ role: 'system', content: `You are an expert summarizer. Combine and synthesize the following partial summaries into a single coherent ${style} summary. Remove redundancy and ensure a smooth flow.` },
{ role: 'user', content: `Please combine these summaries into one:\n\n${combinedText}` },
],
temperature: 0.3,
});
return {
summary: finalResponse.choices[0].message.content,
model,
style,
inputLength: text.length,
chunks: chunks.length,
};
}
/**
* Summarize a text file
*/
export async function summarizeFile(filePath, options = {}) {
if (!fs.existsSync(filePath)) {
throw new Error(`File not found: ${filePath}`);
}
const { outputDir, ...otherOptions } = options;
const text = fs.readFileSync(filePath, 'utf-8');
const result = await summarizeText(text, otherOptions);
// Save summary to file
const dir = outputDir || path.dirname(filePath);
const baseName = path.basename(filePath, path.extname(filePath));
const summaryPath = path.join(dir, `${baseName}_summary.txt`);
fs.writeFileSync(summaryPath, result.summary, 'utf-8');
return {
...result,
filePath,
summaryPath,
};
}
/**
* Get available summary styles
*/
export function getSummaryStyles() {
return {
concise: 'A brief summary capturing main points',
detailed: 'A comprehensive summary with nuances',
bullet: 'Key points as bullet points',
};
}