| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287 |
- #!/usr/bin/env python3
- """
- Main script for translating Japanese VTT files to English using Ollama.
- This script orchestrates the entire translation pipeline:
- 1. Prompts user for input VTT file
- 2. Analyzes and chunks the file
- 3. Translates each chunk via Ollama
- 4. Validates translations
- 5. Reassembles into final output
- """
- import os
- import sys
- import tempfile
- import shutil
- from pathlib import Path
- from vtt_utils import VTTFile, Subtitle
- from chunker import VTTChunker
- from ollama_client import OllamaClient
- from translator import TranslationProcessor
- from reassembler import VTTReassembler
- from tui import ProgressDisplay
- # Configuration from environment / hardcoded defaults
- OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://ai-house:11434/')
- OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', 'translategemma:12b')
- TEMP_DIR = '/tmp/'
- def get_input_file() -> str:
- """
- Prompt user for input VTT file path.
- Returns:
- Absolute path to the VTT file
- """
- display = ProgressDisplay()
- display.print_banner("Japanese VTT Translator")
- while True:
- display.print_info("Enter the path to your Japanese VTT file:")
- file_path = input(" > ").strip()
- if not file_path:
- display.print_warning("Please enter a valid path.")
- continue
- # Expand user home directory
- expanded_path = os.path.expanduser(file_path)
- # Convert to absolute path
- if not os.path.isabs(expanded_path):
- expanded_path = os.path.abspath(expanded_path)
- if not os.path.exists(expanded_path):
- display.print_error(f"File not found: {expanded_path}")
- continue
- if not expanded_path.lower().endswith('.vtt'):
- display.print_warning("File must be a .vtt file.")
- continue
- return expanded_path
- return ""
- def validate_ollama_connection() -> bool:
- """
- Validate that Ollama server is available and has the required model.
- Returns:
- True if connection is valid, False otherwise
- """
- display = ProgressDisplay()
- display.print_section("Validating Ollama Connection")
- display.print_info(f"Server URL: {OLLAMA_BASE_URL}")
- display.print_info(f"Model: {OLLAMA_MODEL}")
- client = OllamaClient(OLLAMA_BASE_URL, OLLAMA_MODEL)
- if not client.is_available():
- display.print_error("Cannot connect to Ollama server.")
- display.print_info(f"Make sure Ollama is running at {OLLAMA_BASE_URL}")
- return False
- display.print_success("✓ Connected to Ollama")
- # Try to get model info
- model_info = client.get_model_info()
- if model_info:
- display.print_success(f"✓ Model '{OLLAMA_MODEL}' is available")
- else:
- display.print_warning(f"Could not verify model '{OLLAMA_MODEL}' availability")
- display.print_info("Proceeding anyway - may fail during translation")
- return True
- def main():
- """Main execution flow."""
- display = ProgressDisplay()
- try:
- # Step 1: Get input file
- display.print_step(1, 6, "Select Input File")
- input_file = get_input_file()
- if not input_file:
- display.print_error("No valid file selected. Exiting.")
- return
- display.print_success(f"✓ Selected: {input_file}")
- # Step 2: Validate Ollama connection
- display.print_step(2, 6, "Validate Ollama Connection")
- if not validate_ollama_connection():
- display.print_error("Cannot proceed without Ollama connection. Exiting.")
- return
- # Step 3: Load and analyze input file
- display.print_step(3, 6, "Load and Analyze VTT File")
- display.print_info("Loading VTT file...")
- try:
- vtt_file = VTTFile(input_file)
- except Exception as e:
- display.print_error(f"Failed to parse VTT file: {e}")
- return
- display.print_success(f"✓ Loaded {len(vtt_file.subtitles)} subtitles")
- total_minutes, total_hours = vtt_file.get_duration()
- display.print_file_info(
- os.path.basename(input_file),
- total_minutes,
- total_hours,
- 0 # Will update after chunking
- )
- # Step 4: Chunk the file
- display.print_step(4, 6, "Chunk VTT File")
- display.print_info("Chunking file respecting token limits...")
- chunker = VTTChunker(vtt_file)
- chunks = chunker.chunk()
- display.print_success(f"✓ Created {len(chunks)} chunks")
- token_estimates = chunker.get_chunk_token_estimates()
- display.print_info(f"Average tokens per chunk: {sum(token_estimates) // len(token_estimates)}")
- # Step 5: Translate chunks
- display.print_step(5, 6, "Translate Chunks")
- display.print_info(
- f"Translating {len(chunks)} chunks via Ollama (this may take several minutes)..."
- )
- client = OllamaClient(OLLAMA_BASE_URL, OLLAMA_MODEL)
- processor = TranslationProcessor(client)
- translated_chunks = []
- failed_chunks = []
- for i, chunk in enumerate(chunks, 1):
- display.print_chunk_status(
- i, len(chunks), "⏳ Processing...",
- f"{len(chunk.subtitles)} subtitles"
- )
- # Create a custom processor that shows progress
- client = OllamaClient(OLLAMA_BASE_URL, OLLAMA_MODEL)
- # Translate subtitles with progress
- translated_subs = []
- for j, sub in enumerate(chunk.subtitles, 1):
- # Show progress bar for every subtitle (or every 10 if there are many)
- progress_interval = max(1, len(chunk.subtitles) // 20) if len(chunk.subtitles) > 50 else 1
- if j % progress_interval == 0 or j == 1 or j == len(chunk.subtitles):
- display.print_progress_bar(
- j, len(chunk.subtitles),
- label=f"Chunk {i}"
- )
- # Translate with feedback
- print(f" Translating subtitle {j}/{len(chunk.subtitles)}...", end="\r", flush=True)
- translated_text = client.translate(sub.text)
- if translated_text is None:
- translated_text = ""
- translated_subs.append(Subtitle(
- start_time=sub.start_time,
- end_time=sub.end_time,
- text=translated_text
- ))
- print() # Clear the progress line
- # Create translated chunk
- processed_chunk = VTTFile.__new__(VTTFile)
- processed_chunk.filepath = chunk.filepath
- processed_chunk.subtitles = translated_subs
- # Sanity check
- is_valid, reason = processor.sanity_check(processed_chunk)
- if is_valid:
- translated_chunks.append(processed_chunk)
- display.print_chunk_status(i, len(chunks), "✓ Translated")
- else:
- # Try once more
- display.print_warning(f" Sanity check failed: {reason}. Retrying chunk {i}...")
- translated_subs = []
- for j, sub in enumerate(chunk.subtitles, 1):
- print(f" Retrying subtitle {j}/{len(chunk.subtitles)}...", end="\r", flush=True)
- translated_text = client.translate(sub.text)
- if translated_text is None:
- translated_text = ""
- translated_subs.append(Subtitle(
- start_time=sub.start_time,
- end_time=sub.end_time,
- text=translated_text
- ))
- print() # Clear the progress line
- processed_chunk = VTTFile.__new__(VTTFile)
- processed_chunk.filepath = chunk.filepath
- processed_chunk.subtitles = translated_subs
- is_valid, reason = processor.sanity_check(processed_chunk)
- if is_valid:
- translated_chunks.append(processed_chunk)
- display.print_chunk_status(i, len(chunks), "✓ Translated (retry)")
- else:
- failed_chunks.append(i)
- display.print_chunk_status(i, len(chunks), f"✗ Failed: {reason}")
- if failed_chunks:
- display.print_warning(
- f"Failed to translate {len(failed_chunks)} chunk(s): {failed_chunks}"
- )
- if len(failed_chunks) == len(chunks):
- display.print_error("All chunks failed. Cannot proceed. Exiting.")
- return
- else:
- display.print_success(f"✓ All {len(chunks)} chunks translated successfully")
- # Step 6: Reassemble and finalize
- display.print_step(6, 6, "Reassemble and Finalize")
- display.print_info("Reassembling translated chunks...")
- if not translated_chunks:
- display.print_error("No translated chunks available. Exiting.")
- return
- output_dir = os.path.dirname(input_file)
- output_path = VTTReassembler.reassemble(
- translated_chunks,
- os.path.basename(input_file),
- output_dir
- )
- display.print_success(f"✓ Reassembled into single file")
- # Final summary
- display.print_banner("Translation Complete!")
- display.print_info(f"Output file: {output_path}")
- if failed_chunks:
- display.print_warning(
- f"Note: {len(failed_chunks)} chunk(s) could not be translated. "
- f"Output is incomplete."
- )
- display.print_success("Translation pipeline completed successfully!")
- except KeyboardInterrupt:
- display.print_warning("\nInterrupted by user.")
- sys.exit(1)
- except Exception as e:
- display.print_error(f"Unexpected error: {e}")
- import traceback
- traceback.print_exc()
- sys.exit(1)
- if __name__ == '__main__':
- main()
|