excel_to_zh_hant.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. import argparse
  2. import os
  3. from copy import copy
  4. from pathlib import Path
  5. from opencc import OpenCC
  6. from openpyxl import load_workbook
  7. from openpyxl.utils import get_column_letter
  8. from translator import (
  9. DEFAULT_MODEL_CACHE,
  10. SUPPORTED_AI_LANGS,
  11. TranslatorConfig,
  12. build_translator,
  13. )
  14. INPUT_DIR = Path("./excel")
  15. OUTPUT_DIR = Path(".")
  16. SOURCE_COLUMN_INDEX = 2
  17. TARGET_COLUMN_INDEX = 3
  18. REMARK_COLUMN_INDEX = 6
  19. OPENCC_HEADERS = [
  20. ("译文(ZH-Hant)", OpenCC("s2t")),
  21. ("译文(ZH-TW)", OpenCC("s2tw")),
  22. ("译文(ZH-HK)", OpenCC("s2hk")),
  23. ]
  24. AI_LANGS = ["en", "jp", "korean"]
  25. def parse_args() -> argparse.Namespace:
  26. parser = argparse.ArgumentParser(description="翻译 Excel UI 文本到多语言。")
  27. parser.add_argument("--backend", choices=["local", "openai"], default="local")
  28. parser.add_argument("--openai-model", default="gpt-4o-mini")
  29. parser.add_argument("--openai-api-key", default=None)
  30. parser.add_argument(
  31. "--model-cache-dir",
  32. default=str(DEFAULT_MODEL_CACHE),
  33. help="NLLB 本地模型存储目录。",
  34. )
  35. parser.add_argument("--batch-size", type=int, default=4)
  36. parser.add_argument("--max-length", type=int, default=512)
  37. return parser.parse_args()
  38. def insert_translation_columns(ws, start_col: int, total_cols: int) -> None:
  39. for _ in range(total_cols):
  40. column_widths = {}
  41. for col in range(1, ws.max_column + 1):
  42. width = ws.column_dimensions[get_column_letter(col)].width
  43. if width is not None:
  44. column_widths[col] = width
  45. ws.insert_cols(start_col)
  46. for col, width in column_widths.items():
  47. new_col = col + 1 if col >= start_col else col
  48. ws.column_dimensions[get_column_letter(new_col)].width = width
  49. for row in ws.iter_rows():
  50. source_cell = row[start_col - 2]
  51. target_cell = row[start_col - 1]
  52. if source_cell.has_style:
  53. target_cell.font = copy(source_cell.font)
  54. target_cell.border = copy(source_cell.border)
  55. target_cell.fill = copy(source_cell.fill)
  56. target_cell.number_format = copy(source_cell.number_format)
  57. target_cell.protection = copy(source_cell.protection)
  58. target_cell.alignment = copy(source_cell.alignment)
  59. def should_skip(remark_value) -> bool:
  60. remark_text = str(remark_value or "").strip()
  61. return any(keyword in remark_text for keyword in ("无需翻译", "不翻译"))
  62. def translate_row(
  63. ws,
  64. row_idx: int,
  65. simplified_text: str,
  66. translator,
  67. ) -> None:
  68. for offset, (_, converter) in enumerate(OPENCC_HEADERS):
  69. value = converter.convert(simplified_text)
  70. ws.cell(row=row_idx, column=TARGET_COLUMN_INDEX + offset, value=value)
  71. for lang_offset, lang in enumerate(AI_LANGS, start=len(OPENCC_HEADERS)):
  72. translated = translator.translate_text(simplified_text, lang)
  73. ws.cell(
  74. row=row_idx,
  75. column=TARGET_COLUMN_INDEX + lang_offset,
  76. value=translated,
  77. )
  78. def main() -> None:
  79. args = parse_args()
  80. translator = build_translator(
  81. TranslatorConfig(
  82. backend=args.backend,
  83. openai_model=args.openai_model,
  84. openai_api_key=args.openai_api_key,
  85. model_cache_dir=Path(args.model_cache_dir),
  86. batch_size=args.batch_size,
  87. max_length=args.max_length,
  88. )
  89. )
  90. total_new_cols = len(OPENCC_HEADERS) + len(AI_LANGS)
  91. for lang in AI_LANGS:
  92. if lang not in SUPPORTED_AI_LANGS:
  93. raise SystemExit(f"❌ 不支持的 AI 语言: {lang}")
  94. for filename in os.listdir(INPUT_DIR):
  95. if not filename.endswith(".xlsx"):
  96. continue
  97. if "_zh-Hant" in filename or filename.startswith("~$"):
  98. continue
  99. input_path = INPUT_DIR / filename
  100. output_path = OUTPUT_DIR / filename.replace(".xlsx", "_zh-Hant.xlsx")
  101. print(f"📝 处理文件: {filename}")
  102. wb = load_workbook(input_path)
  103. ws = wb.active
  104. insert_translation_columns(ws, TARGET_COLUMN_INDEX, total_new_cols)
  105. headers = [header for header, _ in OPENCC_HEADERS] + [
  106. SUPPORTED_AI_LANGS[lang]["label"] for lang in AI_LANGS
  107. ]
  108. for offset, header in enumerate(headers):
  109. ws.cell(row=1, column=TARGET_COLUMN_INDEX + offset, value=header)
  110. remark_col = REMARK_COLUMN_INDEX + total_new_cols
  111. for row in ws.iter_rows(
  112. min_row=2,
  113. min_col=SOURCE_COLUMN_INDEX,
  114. max_col=remark_col,
  115. ):
  116. remark_cell = row[remark_col - SOURCE_COLUMN_INDEX]
  117. if should_skip(remark_cell.value):
  118. continue
  119. cell = row[0]
  120. simplified = cell.value
  121. if isinstance(simplified, str) and simplified.strip():
  122. translate_row(ws, cell.row, simplified, translator)
  123. wb.save(output_path)
  124. print(f"✅ 已输出: {output_path}\n")
  125. if __name__ == "__main__":
  126. main()