Fix Story Reader word parsing for letters and punctuation

- Separate punctuation from words during sentence parsing
- Add special handling for letter pairs (Aa, Bb, Cc, etc.)
- Add special handling for punctuation marks (., !, ?, :, etc.)
- Preserve punctuation display while enabling proper word-by-word navigation
- Fix alphabet learning display in SBS-1 content

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
StillHammer 2025-09-20 11:29:36 +08:00
parent 30a2028da6
commit e67e40f09b

View File

@ -1115,11 +1115,16 @@ class StoryReader {
breakSentenceIntoWords(original, translation) { breakSentenceIntoWords(original, translation) {
if (!original) return []; if (!original) return [];
const words = original.split(/\s+/).filter(word => word.trim().length > 0); // First, separate punctuation from words while preserving spaces
const translationWords = translation ? translation.split(/\s+/).filter(word => word.trim().length > 0) : []; const preprocessed = original.replace(/([.,!?;:"'()[\]{}\-–—])/g, ' $1 ');
const words = preprocessed.split(/\s+/).filter(word => word.trim().length > 0);
// Do the same for translation
const translationPreprocessed = translation ? translation.replace(/([.,!?;:"'()[\]{}\-–—])/g, ' $1 ') : '';
const translationWords = translationPreprocessed ? translationPreprocessed.split(/\s+/).filter(word => word.trim().length > 0) : [];
return words.map((word, index) => { return words.map((word, index) => {
// Clean punctuation for word lookup // Clean punctuation for word lookup, but preserve punctuation in display
const cleanWord = word.replace(/[.,!?;:"'()[\]{}\-–—]/g, '').toLowerCase(); const cleanWord = word.replace(/[.,!?;:"'()[\]{}\-–—]/g, '').toLowerCase();
// Try to find in vocabulary // Try to find in vocabulary
@ -1127,11 +1132,23 @@ class StoryReader {
let wordType = 'word'; let wordType = 'word';
let pronunciation = ''; let pronunciation = '';
// Special handling for letter pairs (like "Aa", "Bb", etc.)
if (/^[A-Za-z]{1,2}$/.test(cleanWord)) {
wordType = 'letter';
wordTranslation = word; // Keep the letter as is
}
// Special handling for punctuation marks
if (/^[.,!?;:"'()[\]{}]$/.test(word)) {
wordType = 'punctuation';
wordTranslation = word; // Keep punctuation as is
}
// Look up in content vocabulary if available // Look up in content vocabulary if available
if (this.vocabulary && this.vocabulary[cleanWord]) { if (this.vocabulary && this.vocabulary[cleanWord]) {
const vocabEntry = this.vocabulary[cleanWord]; const vocabEntry = this.vocabulary[cleanWord];
wordTranslation = vocabEntry.user_language || vocabEntry.translation || wordTranslation; wordTranslation = vocabEntry.user_language || vocabEntry.translation || wordTranslation;
wordType = vocabEntry.type || 'word'; wordType = vocabEntry.type || wordType;
pronunciation = vocabEntry.pronunciation || ''; pronunciation = vocabEntry.pronunciation || '';
} }