import argparse import shutil from pathlib import Path from opencc import OpenCC from translator import ( DEFAULT_MODEL_CACHE, SUPPORTED_AI_LANGS, TranslatorConfig, build_translator, ) SOURCE_DIR = Path("./Subtitle_Trans_CN") BASE_OPENCC_TARGETS = [ ("通用繁體", Path("./Subtitle_ZH_Hant"), OpenCC("s2t")), ] TW_HK_TARGETS = [ ("台灣繁體", Path("./Subtitle_ZH_Hant_TW"), OpenCC("s2tw")), ("香港繁體", Path("./Subtitle_ZH_Hant_HK"), OpenCC("s2hk")), ] AI_LANG_DIRS = { "en": Path("./Subtitle_EN"), "jp": Path("./Subtitle_JP"), "korean": Path("./Subtitle_KOREAN"), } AI_LANGS = ["en", "jp", "korean"] def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="批量翻译 SRT 字幕为多种语言。") parser.add_argument("--backend", choices=["local", "openai"], default="local") parser.add_argument("--openai-model", default="gpt-4o-mini") parser.add_argument("--openai-api-key", default=None) parser.add_argument( "--model-cache-dir", default=str(DEFAULT_MODEL_CACHE), help="NLLB 本地模型存储目录。", ) parser.add_argument("--batch-size", type=int, default=4) parser.add_argument("--max-length", type=int, default=512) parser.add_argument( "--include-tw-hk", action="store_true", help="额外生成台繁与港繁字幕。", ) parser.add_argument( "--show-lang", action="store_true", help="打印正在翻译的语言。", ) return parser.parse_args() def ensure_source_dir(source_dir: Path) -> list[Path]: if not source_dir.is_dir(): print(f"❌ 目录 {source_dir} 不存在,请检查路径。") raise SystemExit(1) files = sorted(source_dir.glob("*.srt")) if not files: print(f"❌ 目录 {source_dir} 中没有找到任何 .srt 文件。") raise SystemExit(1) return files def prepare_target_dir(path: Path) -> None: if path.exists(): for child in path.iterdir(): if child.is_file() or child.is_symlink(): child.unlink() else: shutil.rmtree(child) else: path.mkdir(parents=True, exist_ok=True) def is_dialog_line(line: str) -> bool: stripped = line.strip() if not stripped: return False if stripped.isdigit(): return False if "-->" in stripped: return False return True def split_line(line: str) -> tuple[str, str]: if line.endswith("\r\n"): return line[:-2], "\r\n" if line.endswith("\n"): return line[:-1], "\n" if line.endswith("\r"): return line[:-1], "\r" return line, "" def translate_dialogs(content: str, translator, target_lang: str) -> str: lines = content.splitlines(keepends=True) indices: list[int] = [] texts: list[str] = [] endings: list[str] = [] for idx, line in enumerate(lines): if is_dialog_line(line): text, ending = split_line(line) indices.append(idx) texts.append(text) endings.append(ending) if not texts: return content translated = translator.translate(texts, target_lang=target_lang) for pos, new_text in enumerate(translated): idx = indices[pos] lines[idx] = new_text + endings[pos] return "".join(lines) def main() -> None: args = parse_args() files = ensure_source_dir(SOURCE_DIR) opencc_targets = list(BASE_OPENCC_TARGETS) if args.include_tw_hk: opencc_targets.extend(TW_HK_TARGETS) for _, target_dir, _ in opencc_targets: prepare_target_dir(target_dir) ai_targets = AI_LANG_DIRS for lang, path in ai_targets.items(): if lang not in SUPPORTED_AI_LANGS: raise SystemExit(f"❌ 不支持的 AI 语言: {lang}") prepare_target_dir(path) translator = build_translator( TranslatorConfig( backend=args.backend, openai_model=args.openai_model, openai_api_key=args.openai_api_key, model_cache_dir=Path(args.model_cache_dir), batch_size=args.batch_size, max_length=args.max_length, ) ) total = len(files) print(f"🔄 开始转换 {total} 个 .srt 文件...") for idx, file_path in enumerate(files, 1): percent = idx * 100 // total print(f"🔄 转换中: {idx}/{total} ({percent}%) - {file_path}") try: content = file_path.read_text(encoding="utf-8") except UnicodeDecodeError as exc: print(f"❌ 转换失败: {file_path} (无法以 UTF-8 读取: {exc})") continue # 繁体系 for label, target_dir, converter in opencc_targets: if args.show_lang: print(f" ↳ 正在生成 {label}") out_path = target_dir / file_path.name converted = converter.convert(content) out_path.write_text(converted, encoding="utf-8") # 英日韩 for lang, target_dir in ai_targets.items(): if args.show_lang: lang_label = SUPPORTED_AI_LANGS[lang]["label"] print(f" ↳ 正在翻译 {lang_label}") translated_content = translate_dialogs(content, translator, lang) out_path = target_dir / file_path.name out_path.write_text(translated_content, encoding="utf-8") print(f"✅ 转换成功: {file_path}") if __name__ == "__main__": main()