#!/usr/bin/env python3 """ Main script for translating Japanese VTT files to English using Ollama. This script orchestrates the entire translation pipeline: 1. Prompts user for input VTT file 2. Analyzes and chunks the file 3. Translates each chunk via Ollama 4. Validates translations 5. Reassembles into final output """ import os import sys import tempfile import shutil from pathlib import Path from vtt_utils import VTTFile, Subtitle from chunker import VTTChunker from ollama_client import OllamaClient from translator import TranslationProcessor from reassembler import VTTReassembler from tui import ProgressDisplay # Configuration from environment / hardcoded defaults OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434/') OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', 'translategemma:12b') TEMP_DIR = '/tmp/' def get_input_file() -> str: """ Prompt user for input VTT file path. Returns: Absolute path to the VTT file """ display = ProgressDisplay() display.print_banner("Japanese VTT Translator") while True: display.print_info("Enter the path to your Japanese VTT file:") file_path = input(" > ").strip() if not file_path: display.print_warning("Please enter a valid path.") continue # Expand user home directory expanded_path = os.path.expanduser(file_path) # Convert to absolute path if not os.path.isabs(expanded_path): expanded_path = os.path.abspath(expanded_path) if not os.path.exists(expanded_path): display.print_error(f"File not found: {expanded_path}") continue if not expanded_path.lower().endswith('.vtt'): display.print_warning("File must be a .vtt file.") continue return expanded_path return "" def validate_ollama_connection() -> bool: """ Validate that Ollama server is available and has the required model. Returns: True if connection is valid, False otherwise """ display = ProgressDisplay() display.print_section("Validating Ollama Connection") display.print_info(f"Server URL: {OLLAMA_BASE_URL}") display.print_info(f"Model: {OLLAMA_MODEL}") client = OllamaClient(OLLAMA_BASE_URL, OLLAMA_MODEL) if not client.is_available(): display.print_error("Cannot connect to Ollama server.") display.print_info(f"Make sure Ollama is running at {OLLAMA_BASE_URL}") return False display.print_success("✓ Connected to Ollama") # Try to get model info model_info = client.get_model_info() if model_info: display.print_success(f"✓ Model '{OLLAMA_MODEL}' is available") else: display.print_warning(f"Could not verify model '{OLLAMA_MODEL}' availability") display.print_info("Proceeding anyway - may fail during translation") return True def main(): """Main execution flow.""" display = ProgressDisplay() try: # Step 1: Get input file display.print_step(1, 6, "Select Input File") input_file = get_input_file() if not input_file: display.print_error("No valid file selected. Exiting.") return display.print_success(f"✓ Selected: {input_file}") # Step 2: Validate Ollama connection display.print_step(2, 6, "Validate Ollama Connection") if not validate_ollama_connection(): display.print_error("Cannot proceed without Ollama connection. Exiting.") return # Step 3: Load and analyze input file display.print_step(3, 6, "Load and Analyze VTT File") display.print_info("Loading VTT file...") try: vtt_file = VTTFile(input_file) except Exception as e: display.print_error(f"Failed to parse VTT file: {e}") return display.print_success(f"✓ Loaded {len(vtt_file.subtitles)} subtitles") total_minutes, total_hours = vtt_file.get_duration() display.print_file_info( os.path.basename(input_file), total_minutes, total_hours, 0 # Will update after chunking ) # Step 4: Chunk the file display.print_step(4, 6, "Chunk VTT File") display.print_info("Chunking file respecting token limits...") chunker = VTTChunker(vtt_file) chunks = chunker.chunk() display.print_success(f"✓ Created {len(chunks)} chunks") token_estimates = chunker.get_chunk_token_estimates() display.print_info(f"Average tokens per chunk: {sum(token_estimates) // len(token_estimates)}") # Step 5: Translate chunks display.print_step(5, 6, "Translate Chunks") display.print_info( f"Translating {len(chunks)} chunks via Ollama (this may take several minutes)..." ) client = OllamaClient(OLLAMA_BASE_URL, OLLAMA_MODEL) processor = TranslationProcessor(client) translated_chunks = [] failed_chunks = [] for i, chunk in enumerate(chunks, 1): display.print_chunk_status( i, len(chunks), "⏳ Processing...", f"{len(chunk.subtitles)} subtitles" ) # Create a custom processor that shows progress client = OllamaClient(OLLAMA_BASE_URL, OLLAMA_MODEL) # Translate subtitles with progress translated_subs = [] for j, sub in enumerate(chunk.subtitles, 1): # Show progress bar for every subtitle (or every 10 if there are many) progress_interval = max(1, len(chunk.subtitles) // 20) if len(chunk.subtitles) > 50 else 1 if j % progress_interval == 0 or j == 1 or j == len(chunk.subtitles): display.print_progress_bar( j, len(chunk.subtitles), label=f"Chunk {i}" ) # Translate with feedback print(f" Translating subtitle {j}/{len(chunk.subtitles)}...", end="\r", flush=True) translated_text = client.translate(sub.text) if translated_text is None: translated_text = "" translated_subs.append(Subtitle( start_time=sub.start_time, end_time=sub.end_time, text=translated_text )) print() # Clear the progress line # Create translated chunk processed_chunk = VTTFile.__new__(VTTFile) processed_chunk.filepath = chunk.filepath processed_chunk.subtitles = translated_subs # Sanity check is_valid, reason = processor.sanity_check(processed_chunk) if is_valid: translated_chunks.append(processed_chunk) display.print_chunk_status(i, len(chunks), "✓ Translated") else: # Try once more display.print_warning(f" Sanity check failed: {reason}. Retrying chunk {i}...") translated_subs = [] for j, sub in enumerate(chunk.subtitles, 1): print(f" Retrying subtitle {j}/{len(chunk.subtitles)}...", end="\r", flush=True) translated_text = client.translate(sub.text) if translated_text is None: translated_text = "" translated_subs.append(Subtitle( start_time=sub.start_time, end_time=sub.end_time, text=translated_text )) print() # Clear the progress line processed_chunk = VTTFile.__new__(VTTFile) processed_chunk.filepath = chunk.filepath processed_chunk.subtitles = translated_subs is_valid, reason = processor.sanity_check(processed_chunk) if is_valid: translated_chunks.append(processed_chunk) display.print_chunk_status(i, len(chunks), "✓ Translated (retry)") else: failed_chunks.append(i) display.print_chunk_status(i, len(chunks), f"✗ Failed: {reason}") if failed_chunks: display.print_warning( f"Failed to translate {len(failed_chunks)} chunk(s): {failed_chunks}" ) if len(failed_chunks) == len(chunks): display.print_error("All chunks failed. Cannot proceed. Exiting.") return else: display.print_success(f"✓ All {len(chunks)} chunks translated successfully") # Step 6: Reassemble and finalize display.print_step(6, 6, "Reassemble and Finalize") display.print_info("Reassembling translated chunks...") if not translated_chunks: display.print_error("No translated chunks available. Exiting.") return output_dir = os.path.dirname(input_file) output_path = VTTReassembler.reassemble( translated_chunks, os.path.basename(input_file), output_dir ) display.print_success(f"✓ Reassembled into single file") # Final summary display.print_banner("Translation Complete!") display.print_info(f"Output file: {output_path}") if failed_chunks: display.print_warning( f"Note: {len(failed_chunks)} chunk(s) could not be translated. " f"Output is incomplete." ) display.print_success("Translation pipeline completed successfully!") except KeyboardInterrupt: display.print_warning("\nInterrupted by user.") sys.exit(1) except Exception as e: display.print_error(f"Unexpected error: {e}") import traceback traceback.print_exc() sys.exit(1) if __name__ == '__main__': main()