| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154 |
- import argparse
- import os
- from copy import copy
- from pathlib import Path
- from opencc import OpenCC
- from openpyxl import load_workbook
- from openpyxl.utils import get_column_letter
- from translator import (
- DEFAULT_MODEL_CACHE,
- SUPPORTED_AI_LANGS,
- TranslatorConfig,
- build_translator,
- )
- INPUT_DIR = Path("./excel")
- OUTPUT_DIR = Path(".")
- SOURCE_COLUMN_INDEX = 2
- TARGET_COLUMN_INDEX = 3
- REMARK_COLUMN_INDEX = 6
- OPENCC_HEADERS = [
- ("译文(ZH-Hant)", OpenCC("s2t")),
- ("译文(ZH-TW)", OpenCC("s2tw")),
- ("译文(ZH-HK)", OpenCC("s2hk")),
- ]
- AI_LANGS = ["en", "jp", "korean"]
- def parse_args() -> argparse.Namespace:
- parser = argparse.ArgumentParser(description="翻译 Excel UI 文本到多语言。")
- parser.add_argument("--backend", choices=["local", "openai"], default="local")
- parser.add_argument("--openai-model", default="gpt-4o-mini")
- parser.add_argument("--openai-api-key", default=None)
- parser.add_argument(
- "--model-cache-dir",
- default=str(DEFAULT_MODEL_CACHE),
- help="NLLB 本地模型存储目录。",
- )
- parser.add_argument("--batch-size", type=int, default=4)
- parser.add_argument("--max-length", type=int, default=512)
- return parser.parse_args()
- def insert_translation_columns(ws, start_col: int, total_cols: int) -> None:
- for _ in range(total_cols):
- column_widths = {}
- for col in range(1, ws.max_column + 1):
- width = ws.column_dimensions[get_column_letter(col)].width
- if width is not None:
- column_widths[col] = width
- ws.insert_cols(start_col)
- for col, width in column_widths.items():
- new_col = col + 1 if col >= start_col else col
- ws.column_dimensions[get_column_letter(new_col)].width = width
- for row in ws.iter_rows():
- source_cell = row[start_col - 2]
- target_cell = row[start_col - 1]
- if source_cell.has_style:
- target_cell.font = copy(source_cell.font)
- target_cell.border = copy(source_cell.border)
- target_cell.fill = copy(source_cell.fill)
- target_cell.number_format = copy(source_cell.number_format)
- target_cell.protection = copy(source_cell.protection)
- target_cell.alignment = copy(source_cell.alignment)
- def should_skip(remark_value) -> bool:
- remark_text = str(remark_value or "").strip()
- return any(keyword in remark_text for keyword in ("无需翻译", "不翻译"))
- def translate_row(
- ws,
- row_idx: int,
- simplified_text: str,
- translator,
- ) -> None:
- for offset, (_, converter) in enumerate(OPENCC_HEADERS):
- value = converter.convert(simplified_text)
- ws.cell(row=row_idx, column=TARGET_COLUMN_INDEX + offset, value=value)
- for lang_offset, lang in enumerate(AI_LANGS, start=len(OPENCC_HEADERS)):
- translated = translator.translate_text(simplified_text, lang)
- ws.cell(
- row=row_idx,
- column=TARGET_COLUMN_INDEX + lang_offset,
- value=translated,
- )
- def main() -> None:
- args = parse_args()
- translator = build_translator(
- TranslatorConfig(
- backend=args.backend,
- openai_model=args.openai_model,
- openai_api_key=args.openai_api_key,
- model_cache_dir=Path(args.model_cache_dir),
- batch_size=args.batch_size,
- max_length=args.max_length,
- )
- )
- total_new_cols = len(OPENCC_HEADERS) + len(AI_LANGS)
- for lang in AI_LANGS:
- if lang not in SUPPORTED_AI_LANGS:
- raise SystemExit(f"❌ 不支持的 AI 语言: {lang}")
- for filename in os.listdir(INPUT_DIR):
- if not filename.endswith(".xlsx"):
- continue
- if "_zh-Hant" in filename or filename.startswith("~$"):
- continue
- input_path = INPUT_DIR / filename
- output_path = OUTPUT_DIR / filename.replace(".xlsx", "_zh-Hant.xlsx")
- print(f"📝 处理文件: {filename}")
- wb = load_workbook(input_path)
- ws = wb.active
- insert_translation_columns(ws, TARGET_COLUMN_INDEX, total_new_cols)
- headers = [header for header, _ in OPENCC_HEADERS] + [
- SUPPORTED_AI_LANGS[lang]["label"] for lang in AI_LANGS
- ]
- for offset, header in enumerate(headers):
- ws.cell(row=1, column=TARGET_COLUMN_INDEX + offset, value=header)
- remark_col = REMARK_COLUMN_INDEX + total_new_cols
- for row in ws.iter_rows(
- min_row=2,
- min_col=SOURCE_COLUMN_INDEX,
- max_col=remark_col,
- ):
- remark_cell = row[remark_col - SOURCE_COLUMN_INDEX]
- if should_skip(remark_cell.value):
- continue
- cell = row[0]
- simplified = cell.value
- if isinstance(simplified, str) and simplified.strip():
- translate_row(ws, cell.row, simplified, translator)
- wb.save(output_path)
- print(f"✅ 已输出: {output_path}\n")
- if __name__ == "__main__":
- main()
|