""" Translation and sanity checking logic. """ from typing import Tuple, Optional from vtt_utils import VTTFile, has_japanese_characters from ollama_client import OllamaClient class TranslationProcessor: """Handles translation and sanity checking of VTT chunks.""" def __init__(self, ollama_client: OllamaClient): """ Initialize translation processor. Args: ollama_client: OllamaClient instance for making translation requests """ self.ollama_client = ollama_client def translate_chunk(self, vtt_chunk: VTTFile) -> Optional[VTTFile]: """ Translate a VTT chunk by translating each subtitle individually. Args: vtt_chunk: VTTFile chunk to translate Returns: Translated VTTFile or None if translation fails """ if not vtt_chunk.subtitles: return None from vtt_utils import Subtitle translated_subtitles = [] failed_count = 0 for original_sub in vtt_chunk.subtitles: # Translate each subtitle individually for maximum accuracy translated_text = self.ollama_client.translate(original_sub.text) if translated_text is None or not translated_text.strip(): failed_count += 1 translated_text = "" translated_subtitles.append(Subtitle( start_time=original_sub.start_time, end_time=original_sub.end_time, text=translated_text )) # If more than 50% failed, something is very wrong if failed_count > len(vtt_chunk.subtitles) * 0.5: return None # Create new VTT file with translated subtitles translated_chunk = VTTFile.__new__(VTTFile) translated_chunk.filepath = vtt_chunk.filepath translated_chunk.subtitles = translated_subtitles return translated_chunk def sanity_check(self, vtt_chunk: VTTFile) -> Tuple[bool, str]: """ Perform sanity checks on translated chunk. Args: vtt_chunk: VTTFile to check Returns: Tuple of (is_valid, reason_for_failure) is_valid is True if all checks pass """ if not vtt_chunk.subtitles: return False, "Chunk is empty" empty_count = 0 japanese_count = 0 for i, subtitle in enumerate(vtt_chunk.subtitles): # Check for empty text if not subtitle.text.strip(): empty_count += 1 continue # Check for Japanese characters if has_japanese_characters(subtitle.text): japanese_count += 1 # Allow some empties (up to 10% of subtitles) if empty_count > len(vtt_chunk.subtitles) * 0.1: return False, f"Too many empty subtitles ({empty_count})" # Allow some Japanese (up to 10% - probably proper nouns/names) # This is more lenient since some names and special terms might not translate japanese_percent = (japanese_count / len(vtt_chunk.subtitles)) * 100 if japanese_percent > 10: return False, f"{japanese_count} subtitles ({japanese_percent:.1f}%) contain Japanese characters" return True, "" def process_chunk_with_retry(self, vtt_chunk: VTTFile, chunk_id: int) -> Optional[VTTFile]: """ Process a chunk with one retry on sanity check failure. Args: vtt_chunk: VTT chunk to process chunk_id: Identifier for logging purposes Returns: Processed and verified VTT chunk or None if processing fails """ # First attempt translated = self.translate_chunk(vtt_chunk) if translated is None: print(f" ✗ Chunk {chunk_id} translation returned None") return None # Check sanity is_valid, reason = self.sanity_check(translated) if is_valid: return translated # One retry print(f" ⚠ Chunk {chunk_id} sanity check failed: {reason}. Retrying...") translated = self.translate_chunk(vtt_chunk) if translated is None: print(f" ✗ Chunk {chunk_id} retry translation returned None") return None # Check again is_valid, reason = self.sanity_check(translated) if is_valid: return translated # Failed after retry print(f" ✗ Chunk {chunk_id} failed after retry: {reason}") return None