From d4ac6f585993e9a9a73e27d5e1d6abc298622029 Mon Sep 17 00:00:00 2001 From: StillHammer Date: Tue, 2 Dec 2025 13:23:48 +0800 Subject: [PATCH] Add documentation, Claude config, and update services MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add comprehensive API documentation in docs/API.md - Add Claude project instructions (CLAUDE.md) - Add server startup scripts for Windows and Unix - Update transcription, translation, and summarize services - Update server.js with latest changes 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .claude/settings.local.json | 9 + CLAUDE.md | 128 ++++++++ docs/API.md | 561 ++++++++++++++++++++++++++++++++++ src/server.js | 54 ++-- src/services/summarize.js | 6 +- src/services/transcription.js | 2 +- src/services/translation.js | 9 +- start-server.bat | 61 ++++ start-server.sh | 58 ++++ 9 files changed, 859 insertions(+), 29 deletions(-) create mode 100644 .claude/settings.local.json create mode 100644 CLAUDE.md create mode 100644 docs/API.md create mode 100644 start-server.bat create mode 100644 start-server.sh diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..919dca9 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,9 @@ +{ + "permissions": { + "allow": [ + "Bash(npm run server:*)" + ], + "deny": [], + "ask": [] + } +} diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..d6c04b3 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,128 @@ +# Video to MP3 Transcriptor - Instructions pour Claude + +## À propos du projet +Ce projet est une API Node.js/Express pour télécharger des vidéos YouTube en MP3, les transcrire, les traduire et les résumer. + +## Documentation + +### Documentation API +La documentation complète de l'API se trouve dans **`docs/API.md`**. + +**IMPORTANT** : Cette documentation doit TOUJOURS être maintenue à jour. Chaque fois qu'un endpoint est modifié, ajouté ou supprimé, la documentation doit être mise à jour en conséquence. + +### Responsabilités de maintenance de la documentation + +Quand tu modifies le code, tu DOIS mettre à jour `docs/API.md` si : +- Un nouvel endpoint est ajouté +- Un endpoint existant est modifié (paramètres, réponses, etc.) +- Un endpoint est supprimé +- Les modèles par défaut changent +- De nouveaux paramètres sont ajoutés +- Le format des réponses change + +## Structure du projet + +``` +videotoMP3Transcriptor/ +├── docs/ +│ └── API.md # Documentation complète de l'API +├── src/ +│ ├── server.js # Serveur Express et routes API +│ ├── services/ +│ │ ├── youtube.js # Téléchargement YouTube +│ │ ├── transcription.js # Transcription OpenAI +│ │ ├── translation.js # Traduction GPT +│ │ └── summarize.js # Résumé GPT-5.1 +│ └── cli.js # Interface en ligne de commande +├── public/ # Interface web (si présente) +├── output/ # Répertoire de sortie par défaut +├── .env # Variables d'environnement +└── package.json + +``` + +## Configuration + +### Port du serveur +- Port par défaut : **8888** +- Configurable via `process.env.PORT` dans `.env` + +### Modèles par défaut +- **Transcription** : `gpt-4o-mini-transcribe` +- **Résumé** : `gpt-5.1` +- **Traduction** : `gpt-4o-mini` (hardcodé) + +### Variables d'environnement requises +```env +OPENAI_API_KEY=sk-... +PORT=8888 # optionnel +OUTPUT_DIR=./output # optionnel +``` + +## Commandes importantes + +```bash +# Lancer le serveur +npm run server + +# Lancer le CLI +npm run cli + +# Installer les dépendances +npm install +``` + +## Points d'attention + +### Paramètres outputPath +Tous les endpoints supportent maintenant un paramètre `outputPath` optionnel pour spécifier un répertoire de sortie personnalisé. Si non spécifié, le répertoire par défaut `OUTPUT_DIR` est utilisé. + +### Modèles de transcription disponibles +- `gpt-4o-mini-transcribe` (par défaut) - Rapide et économique +- `gpt-4o-transcribe` - Qualité supérieure +- `whisper-1` - Modèle original Whisper (supporte plus de formats) + +### Formats de sortie +- **Transcription** : txt, json, srt, vtt (selon le modèle) +- **Traduction** : txt +- **Résumé** : txt + +## Règles de développement + +1. **Documentation d'abord** : Avant de modifier un endpoint, vérifie `docs/API.md` +2. **Après modification** : Mets à jour immédiatement `docs/API.md` +3. **Tests** : Redémarre le serveur après chaque modification +4. **Cohérence** : Garde la même structure de réponse pour tous les endpoints similaires + +## Architecture des endpoints + +### Endpoints streaming (SSE) +- `/download-stream` +- `/process-stream` +- `/summarize-stream` + +Ces endpoints utilisent Server-Sent Events pour envoyer des mises à jour de progression en temps réel. + +### Endpoints non-streaming +- `/download` +- `/process` +- Tous les endpoints POST avec upload de fichiers + +Ces endpoints retournent une réponse unique une fois le traitement terminé. + +## Maintenance + +Lors de l'ajout de nouvelles fonctionnalités : +1. Implémente la fonctionnalité dans le service approprié (`src/services/`) +2. Ajoute les routes dans `src/server.js` +3. **Mets à jour `docs/API.md` IMMÉDIATEMENT** +4. Teste l'endpoint avec curl ou Postman +5. Vérifie que la documentation est claire et complète + +## Notes importantes + +- Le serveur doit toujours être sur le port **8888** +- Les clés API OpenAI sont requises pour transcription/traduction/résumé +- Le répertoire `output/` est créé automatiquement si inexistant +- Les fichiers uploadés sont stockés dans `OUTPUT_DIR` +- Les vidéos YouTube sont téléchargées en MP3 automatiquement diff --git a/docs/API.md b/docs/API.md new file mode 100644 index 0000000..7b02be2 --- /dev/null +++ b/docs/API.md @@ -0,0 +1,561 @@ +# API Documentation - Video to MP3 Transcriptor + +## Base URL +``` +http://localhost:8888 +``` + +## Table of Contents +- [Health & Info](#health--info) +- [Download Endpoints](#download-endpoints) +- [Transcription Endpoints](#transcription-endpoints) +- [Translation Endpoints](#translation-endpoints) +- [Summarization Endpoints](#summarization-endpoints) +- [File Management](#file-management) + +--- + +## Health & Info + +### GET /health +Health check endpoint. + +**Response:** +```json +{ + "status": "ok", + "timestamp": "2025-11-28T12:00:00.000Z" +} +``` + +### GET /api +Get API information and available endpoints. + +**Response:** +```json +{ + "name": "Video to MP3 Transcriptor API", + "version": "1.0.0", + "endpoints": { ... } +} +``` + +### GET /info +Get information about a YouTube video or playlist. + +**Query Parameters:** +- `url` (required): YouTube URL + +**Example:** +```bash +curl "http://localhost:8888/info?url=https://www.youtube.com/watch?v=VIDEO_ID" +``` + +**Response:** +```json +{ + "success": true, + "title": "Video Title", + "type": "video", + "duration": 300, + "channel": "Channel Name", + "videoCount": 1 +} +``` + +--- + +## Download Endpoints + +### GET /download-stream +Download YouTube video(s) to MP3 with Server-Sent Events (SSE) progress updates. + +**Query Parameters:** +- `url` (required): YouTube URL +- `outputPath` (optional): Custom output directory path + +**Example:** +```bash +curl "http://localhost:8888/download-stream?url=https://www.youtube.com/watch?v=VIDEO_ID" +``` + +**SSE Events:** +- `info`: Video/playlist information +- `progress`: Download progress updates +- `video-complete`: Individual video completion +- `complete`: All downloads complete +- `error`: Error occurred + +### POST /download +Download YouTube video(s) to MP3 (non-streaming). + +**Body Parameters:** +```json +{ + "url": "https://www.youtube.com/watch?v=VIDEO_ID", + "outputPath": "./custom/path" // optional +} +``` + +**Example:** +```bash +curl -X POST http://localhost:8888/download \ + -H "Content-Type: application/json" \ + -d '{"url":"https://www.youtube.com/watch?v=VIDEO_ID"}' +``` + +**Response:** +```json +{ + "success": true, + "playlistTitle": null, + "totalVideos": 1, + "successCount": 1, + "failCount": 0, + "videos": [ + { + "success": true, + "title": "Video Title", + "filePath": "./output/video.mp3", + "fileUrl": "/files/video.mp3" + } + ] +} +``` + +--- + +## Transcription Endpoints + +### POST /transcribe +Transcribe an existing audio file. + +**Body Parameters:** +```json +{ + "filePath": "./output/audio.mp3", + "language": "en", // optional (auto-detect if not specified) + "format": "txt", // optional: txt, json, srt, vtt + "model": "gpt-4o-mini-transcribe", // optional: gpt-4o-mini-transcribe (default), gpt-4o-transcribe, whisper-1 + "outputPath": "./custom/path" // optional +} +``` + +**Available Models:** +- `gpt-4o-mini-transcribe` (default) - Fast and cost-effective +- `gpt-4o-transcribe` - Higher quality +- `whisper-1` - Original Whisper model (supports more formats) + +**Example:** +```bash +curl -X POST http://localhost:8888/transcribe \ + -H "Content-Type: application/json" \ + -d '{ + "filePath": "./output/audio.mp3", + "language": "en", + "model": "gpt-4o-mini-transcribe" + }' +``` + +**Response:** +```json +{ + "success": true, + "filePath": "./output/audio.mp3", + "transcriptionPath": "./output/audio.txt", + "transcriptionUrl": "/files/audio.txt", + "text": "Transcribed text content..." +} +``` + +### POST /upload-transcribe +Upload and transcribe audio files. + +**Form Data:** +- `files`: Audio file(s) (multiple files supported, max 50) +- `language`: Language code (optional) +- `model`: Transcription model (optional, default: gpt-4o-mini-transcribe) +- `outputPath`: Custom output directory (optional) + +**Example:** +```bash +curl -X POST http://localhost:8888/upload-transcribe \ + -F "files=@audio1.mp3" \ + -F "files=@audio2.mp3" \ + -F "language=en" \ + -F "model=gpt-4o-mini-transcribe" +``` + +**Response:** +```json +{ + "success": true, + "totalFiles": 2, + "successCount": 2, + "failCount": 0, + "results": [ + { + "success": true, + "fileName": "audio1.mp3", + "transcriptionPath": "./output/audio1.txt", + "transcriptionUrl": "/files/audio1.txt", + "text": "Transcription..." + } + ] +} +``` + +### GET /process-stream +Download + Transcribe with SSE progress updates. + +**Query Parameters:** +- `url` (required): YouTube URL +- `language` (optional): Language code +- `model` (optional): Transcription model (default: gpt-4o-mini-transcribe) +- `outputPath` (optional): Custom output directory + +**Example:** +```bash +curl "http://localhost:8888/process-stream?url=https://www.youtube.com/watch?v=VIDEO_ID&language=en&model=gpt-4o-mini-transcribe" +``` + +**SSE Events:** +- `info`: Video information +- `progress`: Progress updates (downloading or transcribing) +- `video-complete`: Download complete +- `transcribe-complete`: Transcription complete +- `complete`: All operations complete +- `error`: Error occurred + +### POST /process +Download + Transcribe (non-streaming). + +**Body Parameters:** +```json +{ + "url": "https://www.youtube.com/watch?v=VIDEO_ID", + "language": "en", // optional + "format": "txt", // optional + "model": "gpt-4o-mini-transcribe", // optional + "outputPath": "./custom/path" // optional +} +``` + +**Example:** +```bash +curl -X POST http://localhost:8888/process \ + -H "Content-Type: application/json" \ + -d '{ + "url": "https://www.youtube.com/watch?v=VIDEO_ID", + "language": "en", + "model": "gpt-4o-mini-transcribe" + }' +``` + +**Response:** +```json +{ + "success": true, + "playlistTitle": null, + "totalVideos": 1, + "downloadedCount": 1, + "transcribedCount": 1, + "results": [ + { + "title": "Video Title", + "downloadSuccess": true, + "audioPath": "./output/video.mp3", + "audioUrl": "/files/video.mp3", + "transcriptionSuccess": true, + "transcriptionPath": "./output/video.txt", + "transcriptionUrl": "/files/video.txt", + "text": "Transcription..." + } + ] +} +``` + +--- + +## Translation Endpoints + +### GET /languages +Get available translation languages. + +**Response:** +```json +{ + "languages": { + "en": "English", + "fr": "French", + "es": "Spanish", + "de": "German", + "zh": "Chinese", + "ja": "Japanese", + ... + } +} +``` + +### POST /translate +Translate text. + +**Body Parameters:** +```json +{ + "text": "Text to translate", + "targetLang": "fr", // required: target language code + "sourceLang": "en" // optional: source language (auto-detect if not specified) +} +``` + +**Example:** +```bash +curl -X POST http://localhost:8888/translate \ + -H "Content-Type: application/json" \ + -d '{ + "text": "Hello, how are you?", + "targetLang": "fr" + }' +``` + +**Response:** +```json +{ + "success": true, + "originalText": "Hello, how are you?", + "translatedText": "Bonjour, comment allez-vous ?", + "targetLanguage": "French", + "sourceLanguage": "auto-detected", + "chunks": 1 +} +``` + +### POST /translate-file +Translate uploaded text files. + +**Form Data:** +- `files`: Text file(s) (.txt, multiple files supported, max 50) +- `targetLang`: Target language code (required) +- `sourceLang`: Source language code (optional) +- `outputPath`: Custom output directory (optional) + +**Example:** +```bash +curl -X POST http://localhost:8888/translate-file \ + -F "files=@document.txt" \ + -F "targetLang=fr" \ + -F "sourceLang=en" +``` + +**Response:** +```json +{ + "success": true, + "totalFiles": 1, + "successCount": 1, + "failCount": 0, + "results": [ + { + "success": true, + "fileName": "document.txt", + "translationPath": "./output/document_fr.txt", + "translationUrl": "/files/document_fr.txt", + "translatedText": "Translated content..." + } + ] +} +``` + +--- + +## Summarization Endpoints + +### GET /summary-styles +Get available summary styles. + +**Response:** +```json +{ + "styles": { + "concise": "A brief summary capturing main points", + "detailed": "A comprehensive summary with nuances", + "bullet": "Key points as bullet points" + } +} +``` + +### POST /summarize +Summarize text using GPT-5.1. + +**Body Parameters:** +```json +{ + "text": "Long text to summarize...", + "style": "concise", // optional: concise (default), detailed, bullet + "language": "same", // optional: 'same' (default) or language code + "model": "gpt-5.1" // optional: default is gpt-5.1 +} +``` + +**Example:** +```bash +curl -X POST http://localhost:8888/summarize \ + -H "Content-Type: application/json" \ + -d '{ + "text": "Long article content...", + "style": "bullet", + "language": "same" + }' +``` + +**Response:** +```json +{ + "success": true, + "summary": "Summary content...", + "model": "gpt-5.1", + "style": "bullet", + "inputLength": 5000, + "chunks": 1 +} +``` + +### POST /summarize-file +Summarize uploaded text files using GPT-5.1. + +**Form Data:** +- `files`: Text file(s) (.txt, multiple files supported, max 50) +- `style`: Summary style (optional, default: concise) +- `language`: Output language (optional, default: same) +- `model`: AI model (optional, default: gpt-5.1) +- `outputPath`: Custom output directory (optional) + +**Example:** +```bash +curl -X POST http://localhost:8888/summarize-file \ + -F "files=@article.txt" \ + -F "style=detailed" \ + -F "language=same" +``` + +**Response:** +```json +{ + "success": true, + "totalFiles": 1, + "successCount": 1, + "failCount": 0, + "results": [ + { + "success": true, + "fileName": "article.txt", + "summaryPath": "./output/article_summary.txt", + "summaryUrl": "/files/article_summary.txt", + "summary": "Summary content...", + "model": "gpt-5.1", + "chunks": 1 + } + ] +} +``` + +### GET /summarize-stream +Full pipeline: Download -> Transcribe -> Summarize with SSE progress. + +**Query Parameters:** +- `url` (required): YouTube URL +- `style` (optional): Summary style (default: concise) +- `language` (optional): Output language (default: same) +- `model` (optional): Transcription model (default: gpt-4o-mini-transcribe) +- `outputPath` (optional): Custom output directory + +**Example:** +```bash +curl "http://localhost:8888/summarize-stream?url=https://www.youtube.com/watch?v=VIDEO_ID&style=bullet&model=gpt-4o-mini-transcribe" +``` + +**SSE Events:** +- `info`: Video information +- `progress`: Progress updates (downloading, transcribing, or summarizing) +- `video-complete`: Download complete +- `transcribe-complete`: Transcription complete +- `summarize-complete`: Summary complete +- `complete`: All operations complete +- `error`: Error occurred + +--- + +## File Management + +### GET /files-list +List all downloaded/generated files. + +**Example:** +```bash +curl http://localhost:8888/files-list +``` + +**Response:** +```json +{ + "files": [ + { + "name": "video.mp3", + "url": "/files/video.mp3", + "path": "./output/video.mp3" + }, + { + "name": "video.txt", + "url": "/files/video.txt", + "path": "./output/video.txt" + } + ] +} +``` + +### GET /files/:filename +Serve a specific file. + +**Example:** +```bash +curl http://localhost:8888/files/video.mp3 --output video.mp3 +``` + +--- + +## Error Responses + +All endpoints return error responses in the following format: + +```json +{ + "error": "Error message describing what went wrong" +} +``` + +Common HTTP status codes: +- `400` - Bad Request (missing required parameters) +- `500` - Internal Server Error (processing failed) + +--- + +## Notes + +### Output Paths +All endpoints that support `outputPath` parameter: +- If not specified, files are saved to the default `OUTPUT_DIR` (./output) +- If specified, files are saved to the custom path provided + +### Models +- **Transcription**: Default is `gpt-4o-mini-transcribe` (cost-effective) +- **Summarization**: Default is `gpt-5.1` (latest GPT model) +- **Translation**: Uses `gpt-4o-mini` (hardcoded) + +### File Formats +- **Audio**: MP3, WAV, M4A, OGG, FLAC +- **Text**: TXT files +- **Transcription outputs**: TXT, JSON, SRT, VTT (depending on model) + +### API Key +Ensure `OPENAI_API_KEY` is set in your `.env` file for transcription, translation, and summarization features to work. diff --git a/src/server.js b/src/server.js index c830899..f16ed6d 100644 --- a/src/server.js +++ b/src/server.js @@ -14,7 +14,7 @@ import { convertToMP3, convertMultipleToMP3, getSupportedFormats } from './servi dotenv.config(); const app = express(); -const PORT = process.env.PORT || 3000; +const PORT = process.env.PORT || 8888; const OUTPUT_DIR = process.env.OUTPUT_DIR || './output'; // Ensure output directory exists @@ -151,10 +151,10 @@ app.get('/info', async (req, res) => { /** * GET /download-stream * Download with SSE progress updates - * Query: url (required) + * Query: url (required), outputPath (optional) */ app.get('/download-stream', async (req, res) => { - const { url } = req.query; + const { url, outputPath } = req.query; if (!url) { return res.status(400).json({ error: 'URL parameter required' }); @@ -195,7 +195,7 @@ app.get('/download-stream', async (req, res) => { let videoStartTime = Date.now(); const result = await download(url, { - outputDir: OUTPUT_DIR, + outputDir: outputPath || OUTPUT_DIR, onDownloadProgress: (progress) => { // Calculate overall progress const videoProgress = progress.percent || 0; @@ -274,11 +274,12 @@ app.get('/download-stream', async (req, res) => { /** * POST /download * Download a video or playlist as MP3 (non-streaming version) - * Body: { url: string, outputDir?: string } + * Body: { url: string, outputPath?: string } */ app.post('/download', async (req, res) => { try { - const { url, outputDir = OUTPUT_DIR } = req.body; + const { url, outputPath } = req.body; + const outputDir = outputPath || OUTPUT_DIR; if (!url) { return res.status(400).json({ error: 'URL required in request body' }); @@ -309,11 +310,11 @@ app.post('/download', async (req, res) => { /** * POST /transcribe * Transcribe an existing audio file - * Body: { filePath: string, language?: string, format?: string } + * Body: { filePath: string, language?: string, format?: string, outputPath?: string } */ app.post('/transcribe', async (req, res) => { try { - const { filePath, language, format = 'txt', model = 'gpt-4o-transcribe' } = req.body; + const { filePath, language, format = 'txt', model = 'gpt-4o-mini-transcribe', outputPath } = req.body; if (!filePath) { return res.status(400).json({ error: 'filePath required in request body' }); @@ -329,6 +330,7 @@ app.post('/transcribe', async (req, res) => { responseFormat: format === 'txt' ? 'text' : format, outputFormat: format, model, + outputDir: outputPath, }); res.json({ @@ -346,6 +348,7 @@ app.post('/transcribe', async (req, res) => { /** * POST /upload-transcribe * Upload audio files and transcribe them + * Body: { language?: string, model?: string, outputPath?: string } */ app.post('/upload-transcribe', upload.array('files', 50), async (req, res) => { try { @@ -357,7 +360,7 @@ app.post('/upload-transcribe', upload.array('files', 50), async (req, res) => { return res.status(400).json({ error: 'No files uploaded' }); } - const { language, model = 'gpt-4o-transcribe' } = req.body; + const { language, model = 'gpt-4o-mini-transcribe', outputPath } = req.body; const results = []; console.log(`Transcribing ${req.files.length} uploaded files with model ${model}`); @@ -372,6 +375,7 @@ app.post('/upload-transcribe', upload.array('files', 50), async (req, res) => { responseFormat: 'text', outputFormat: 'txt', model, + outputDir: outputPath, }); results.push({ @@ -471,10 +475,10 @@ app.get('/supported-formats', (req, res) => { /** * GET /process-stream * Download and transcribe with SSE progress updates - * Query: url, language?, model? + * Query: url, language?, model?, outputPath? */ app.get('/process-stream', async (req, res) => { - const { url, language, model = 'gpt-4o-transcribe' } = req.query; + const { url, language, model = 'gpt-4o-mini-transcribe', outputPath } = req.query; if (!url) { return res.status(400).json({ error: 'URL parameter required' }); @@ -519,7 +523,7 @@ app.get('/process-stream', async (req, res) => { let videoStartTime = Date.now(); const downloadResult = await download(url, { - outputDir: OUTPUT_DIR, + outputDir: outputPath || OUTPUT_DIR, onDownloadProgress: (progress) => { const videoProgress = progress.percent || 0; // Download is 50% of total, transcribe is other 50% @@ -579,6 +583,7 @@ app.get('/process-stream', async (req, res) => { responseFormat: 'text', outputFormat: 'txt', model, + outputDir: outputPath, }); transcribeResults.push(result); videosTranscribed++; @@ -642,11 +647,12 @@ app.get('/process-stream', async (req, res) => { /** * POST /process * Download and transcribe a video or playlist (non-streaming) - * Body: { url: string, language?: string, format?: string } + * Body: { url: string, language?: string, format?: string, outputPath?: string } */ app.post('/process', async (req, res) => { try { - const { url, language, format = 'txt', outputDir = OUTPUT_DIR, model = 'gpt-4o-transcribe' } = req.body; + const { url, language, format = 'txt', outputPath, model = 'gpt-4o-mini-transcribe' } = req.body; + const outputDir = outputPath || OUTPUT_DIR; if (!url) { return res.status(400).json({ error: 'URL required in request body' }); @@ -670,6 +676,7 @@ app.post('/process', async (req, res) => { responseFormat: format === 'txt' ? 'text' : format, outputFormat: format, model, + outputDir, }); // Combine results @@ -771,6 +778,7 @@ app.post('/translate', async (req, res) => { /** * POST /translate-file * Translate uploaded text files + * Body: { targetLang: string, sourceLang?: string, outputPath?: string } */ app.post('/translate-file', uploadText.array('files', 50), async (req, res) => { try { @@ -782,7 +790,7 @@ app.post('/translate-file', uploadText.array('files', 50), async (req, res) => { return res.status(400).json({ error: 'No files uploaded' }); } - const { targetLang, sourceLang } = req.body; + const { targetLang, sourceLang, outputPath } = req.body; if (!targetLang) { return res.status(400).json({ error: 'targetLang required' }); @@ -796,7 +804,7 @@ app.post('/translate-file', uploadText.array('files', 50), async (req, res) => { console.log(`[${i + 1}/${req.files.length}] Translating: ${file.originalname}`); try { - const result = await translateFile(file.path, targetLang, sourceLang || null); + const result = await translateFile(file.path, targetLang, sourceLang || null, outputPath); results.push({ success: true, fileName: file.originalname, @@ -866,6 +874,7 @@ app.post('/summarize', async (req, res) => { /** * POST /summarize-file * Summarize uploaded text files using GPT-5.1 + * Body: { style?: string, language?: string, model?: string, outputPath?: string } */ app.post('/summarize-file', uploadText.array('files', 50), async (req, res) => { try { @@ -877,7 +886,7 @@ app.post('/summarize-file', uploadText.array('files', 50), async (req, res) => { return res.status(400).json({ error: 'No files uploaded' }); } - const { style = 'concise', language = 'same', model = 'gpt-5.1' } = req.body; + const { style = 'concise', language = 'same', model = 'gpt-5.1', outputPath } = req.body; const results = []; console.log(`Summarizing ${req.files.length} files with ${model}`); @@ -887,7 +896,7 @@ app.post('/summarize-file', uploadText.array('files', 50), async (req, res) => { console.log(`[${i + 1}/${req.files.length}] Summarizing: ${file.originalname}`); try { - const result = await summarizeFile(file.path, { style, language, model }); + const result = await summarizeFile(file.path, { style, language, model, outputDir: outputPath }); results.push({ success: true, fileName: file.originalname, @@ -922,10 +931,10 @@ app.post('/summarize-file', uploadText.array('files', 50), async (req, res) => { /** * GET /summarize-stream * Full pipeline: Download -> Transcribe -> Summarize with SSE progress - * Query: url, style?, language?, model? + * Query: url, style?, language?, model?, outputPath? */ app.get('/summarize-stream', async (req, res) => { - const { url, style = 'concise', language = 'same', model = 'gpt-4o-transcribe' } = req.query; + const { url, style = 'concise', language = 'same', model = 'gpt-4o-mini-transcribe', outputPath } = req.query; if (!url) { return res.status(400).json({ error: 'URL parameter required' }); @@ -968,7 +977,7 @@ app.get('/summarize-stream', async (req, res) => { let videosDownloaded = 0; const downloadResult = await download(url, { - outputDir: OUTPUT_DIR, + outputDir: outputPath || OUTPUT_DIR, onDownloadProgress: (progress) => { const videoProgress = progress.percent || 0; const overallPercent = ((videosDownloaded + (videoProgress / 100)) / totalVideos) * 33; @@ -1019,6 +1028,7 @@ app.get('/summarize-stream', async (req, res) => { responseFormat: 'text', outputFormat: 'txt', model, + outputDir: outputPath, }); transcribeResults.push({ ...result, title: video.title, success: true }); @@ -1059,7 +1069,7 @@ app.get('/summarize-stream', async (req, res) => { }); try { - const result = await summarizeFile(transcription.transcriptionPath, { style, language, model: 'gpt-5.1' }); + const result = await summarizeFile(transcription.transcriptionPath, { style, language, model: 'gpt-5.1', outputDir: outputPath }); summaryResults.push({ title: transcription.title, success: true, diff --git a/src/services/summarize.js b/src/services/summarize.js index fedc9b1..4acb8bd 100644 --- a/src/services/summarize.js +++ b/src/services/summarize.js @@ -164,11 +164,13 @@ export async function summarizeFile(filePath, options = {}) { throw new Error(`File not found: ${filePath}`); } + const { outputDir, ...otherOptions } = options; + const text = fs.readFileSync(filePath, 'utf-8'); - const result = await summarizeText(text, options); + const result = await summarizeText(text, otherOptions); // Save summary to file - const dir = path.dirname(filePath); + const dir = outputDir || path.dirname(filePath); const baseName = path.basename(filePath, path.extname(filePath)); const summaryPath = path.join(dir, `${baseName}_summary.txt`); diff --git a/src/services/transcription.js b/src/services/transcription.js index f0df30d..b3b5f28 100644 --- a/src/services/transcription.js +++ b/src/services/transcription.js @@ -23,7 +23,7 @@ const MODELS = { }, }; -const DEFAULT_MODEL = 'gpt-4o-transcribe'; +const DEFAULT_MODEL = 'gpt-4o-mini-transcribe'; /** * Get OpenAI client (lazy initialization) diff --git a/src/services/translation.js b/src/services/translation.js index d4a1636..efbe1c0 100644 --- a/src/services/translation.js +++ b/src/services/translation.js @@ -207,8 +207,9 @@ export async function translateText(text, targetLang, sourceLang = null) { * @param {string} filePath - Path to text file * @param {string} targetLang - Target language code * @param {string} sourceLang - Source language code (optional) + * @param {string} outputDir - Output directory (optional) */ -export async function translateFile(filePath, targetLang, sourceLang = null) { +export async function translateFile(filePath, targetLang, sourceLang = null, outputDir = null) { if (!fs.existsSync(filePath)) { throw new Error(`File not found: ${filePath}`); } @@ -219,7 +220,7 @@ export async function translateFile(filePath, targetLang, sourceLang = null) { // Save translation const baseName = path.basename(filePath, path.extname(filePath)); const outputPath = path.join( - path.dirname(filePath), + outputDir || path.dirname(filePath), `${baseName}_${targetLang}.txt` ); @@ -235,7 +236,7 @@ export async function translateFile(filePath, targetLang, sourceLang = null) { /** * Translate multiple files */ -export async function translateMultiple(filePaths, targetLang, sourceLang = null, onProgress = null) { +export async function translateMultiple(filePaths, targetLang, sourceLang = null, outputDir = null, onProgress = null) { const results = []; for (let i = 0; i < filePaths.length; i++) { @@ -248,7 +249,7 @@ export async function translateMultiple(filePaths, targetLang, sourceLang = null console.log(`[${i + 1}/${filePaths.length}] Translating: ${path.basename(filePath)}`); try { - const result = await translateFile(filePath, targetLang, sourceLang); + const result = await translateFile(filePath, targetLang, sourceLang, outputDir); results.push(result); } catch (error) { console.error(`Failed to translate ${filePath}: ${error.message}`); diff --git a/start-server.bat b/start-server.bat new file mode 100644 index 0000000..4a94129 --- /dev/null +++ b/start-server.bat @@ -0,0 +1,61 @@ +@echo off +REM Video to MP3 Transcriptor Server Starter +REM This script starts the API server on port 8888 + +echo ========================================== +echo Video to MP3 Transcriptor API +echo ========================================== +echo. + +REM Check if node is installed +where node >nul 2>nul +if %ERRORLEVEL% NEQ 0 ( + echo Error: Node.js is not installed + echo Please install Node.js from https://nodejs.org/ + pause + exit /b 1 +) + +REM Check if npm is installed +where npm >nul 2>nul +if %ERRORLEVEL% NEQ 0 ( + echo Error: npm is not installed + echo Please install npm + pause + exit /b 1 +) + +REM Check if .env file exists +if not exist .env ( + echo Warning: .env file not found + echo Creating .env file... + ( + echo OPENAI_API_KEY= + echo PORT=8888 + echo OUTPUT_DIR=./output + ) > .env + echo. + echo Please edit .env and add your OPENAI_API_KEY + echo. +) + +REM Check if node_modules exists +if not exist node_modules ( + echo Installing dependencies... + call npm install + echo. +) + +REM Kill any process using port 8888 +echo Checking port 8888... +npx kill-port 8888 >nul 2>nul + +echo. +echo Starting server on http://localhost:8888 +echo Press Ctrl+C to stop the server +echo. +echo ========================================== +echo. + +REM Start the server +call npm run server diff --git a/start-server.sh b/start-server.sh new file mode 100644 index 0000000..523385a --- /dev/null +++ b/start-server.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# Video to MP3 Transcriptor Server Starter +# This script starts the API server on port 8888 + +echo "==========================================" +echo "Video to MP3 Transcriptor API" +echo "==========================================" +echo "" + +# Check if node is installed +if ! command -v node &> /dev/null +then + echo "Error: Node.js is not installed" + echo "Please install Node.js from https://nodejs.org/" + exit 1 +fi + +# Check if npm is installed +if ! command -v npm &> /dev/null +then + echo "Error: npm is not installed" + echo "Please install npm" + exit 1 +fi + +# Check if .env file exists +if [ ! -f .env ]; then + echo "Warning: .env file not found" + echo "Creating .env file..." + echo "OPENAI_API_KEY=" > .env + echo "PORT=8888" >> .env + echo "OUTPUT_DIR=./output" >> .env + echo "" + echo "Please edit .env and add your OPENAI_API_KEY" + echo "" +fi + +# Check if node_modules exists +if [ ! -d "node_modules" ]; then + echo "Installing dependencies..." + npm install + echo "" +fi + +# Kill any process using port 8888 +echo "Checking port 8888..." +npx kill-port 8888 2>/dev/null + +echo "" +echo "Starting server on http://localhost:8888" +echo "Press Ctrl+C to stop the server" +echo "" +echo "==========================================" +echo "" + +# Start the server +npm run server