diff --git a/README.md b/README.md
index b1e22a9..ed60ecd 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,7 @@
 
 ## 项目功能
 
+- 批量翻译。提供批量翻译(`batch_convert`)和逐行翻译(`convert`)2种接口。建议用 `batch_convert`。使用`batch_convert`时,需要在配置文件中使用`google`。
 - 对 mod 的 .tra 文件进行翻译。支持单文件翻译、批量文件翻译。
 - 支持有道、google 的翻译API。
 - 支持对文件自动转码。gb18030, utf-8 等。
@@ -22,20 +23,42 @@
 ## 使用方法
 
 1. 初次执行前,需要执行项目初始化脚本 init_shell.sh,输入`namespace`,自动创建程序执行需要的目录和文件。
+`namespace` 的作用是为多个翻译任务做区分,进行环境隔离。一般而言,一个mod的翻译任务只需要一个`namespace`即可。
 2. 修改配置文件 appconf.ini
 3. 将待汉化 .tra 文件放入 tra/ 目录下。
 4. 执行需要的程序(如 main.py, simple_main.py, debug.py 等)
 
 main.py 对 tra/ 目录下的文件进行翻译。
 ```
-python3 main.py
+python3 main.py -p batch # 批量翻译模式。效率很高。翻译源必须配置为 google
+python3 main.py -p batch # 逐行翻译模式。效率较低。
 ```
 
 simple_main.py 可翻译单个文件、或者一个 file_list。并且,还支持断点重续功能,从失败位置继续翻译,而不是从头开始。
-建议使用此方法,比 main.py 更灵活,功能更丰富。
+
+```
+# 例子
+python3 simple_main.py -st 1 -ed 10 # 翻译 tra/ 目录下,文件序号从1到10的 tra 文件 
 ```
-python3 simple_main.py
+
+### 文件的分片和整合
+对于一个庞大的 tra 文件(例如500行以上),建议对文件进行分片。程序会将指定文件分割成多个文件,然后进行管理、翻译,以及最后进行手动合并。
 ```
+# 分片
+python3 sep_and_combine.py -s -name 文件名 -f 起始行数 -size 100 # 将文件切分成5个文件,每个文件100个 `@语块`。
+# 分片的文件会放在 tra/ 目录当中,文件名格式为 dia_1.tra, dia_2.tra, ...
+
+# 也可以简单写为
+python3 sep_and_combine.py -s -name 文件名 # 默认从0行开始,以100行为单位切分文件
+
+# 合并
+python3 sep_and_combine.py -c -down 分片文件的序号下界 -up 分片文件的序号上界 -out dialog.tra
+# 可以简单写为
+python3 sep_and_combine.py -c # 默认合并 output/ 目录下的所有文件,输出为 dialog.tra
+```
+
+### 如果中途失败了,怎么处理
+重新执行命令即可。代码做了断点记录,可从失败位置继续翻译,不会重复翻译。
 
 ## 一些辅助程序
 
@@ -84,4 +107,5 @@ python3 simple_main.py
 6. 巅峰之战 Ascension v2.023
 7. 艾德温罗曼史 Edwin Romance v3.1
 8. 伊文德拉 Evandra NPC mod
-9. 爱蒙罗曼史 Imoen Romance v4.1
\ No newline at end of file
+9. 爱蒙罗曼史 Imoen Romance 
+10. v4.1
\ No newline at end of file
diff --git a/dict/name_dict.txt b/dict/name_dict.txt
index 72f94d1..3c348a2 100644
--- a/dict/name_dict.txt
+++ b/dict/name_dict.txt
@@ -326,6 +326,7 @@ Herdrin#何德林
 Herod#赫罗德
 Hester#赫斯特
 Hexxat#赫克塞特
+Haiass#海斯
 Hoach Randymonk#霍奇·兰迪莫克
 Hokkney#郝克尼
 Horvat#霍瓦特
@@ -632,6 +633,7 @@ Safana#莎法娜
 Sallo#萨罗
 Sahana#萨哈娜
 Samuel#山谬
+Sandrah#珊德拉
 Sarevok Anchev#沙洛佛克·安基夫
 Sarevok#沙洛佛克
 Sashenstar#沙散塔
diff --git "a/docs/350円276円205円345円212円251円347円250円213円345円272円217円344円273円213円347円273円215円.md" "b/docs/350円276円205円345円212円251円347円250円213円345円272円217円344円273円213円347円273円215円.md"
index 49043e7..819c873 100644
--- "a/docs/350円276円205円345円212円251円347円250円213円345円272円217円344円273円213円347円273円215円.md"
+++ "b/docs/350円276円205円345円212円251円347円250円213円345円272円217円344円273円213円347円273円215円.md"
@@ -3,7 +3,6 @@
 - debug.py 用来做一些简单的功能和测试。
 - sep_and_combine.py 对大文件进行切分和聚合,减少每次执行的时间成本。
 - correctness_check.py 文件正确性校验。由于某些不规范文本内会有`@xxx = ~ 111 222~ 333~`这种形式的句子,所以存在误报情况。
-- merge_file.py 对将多个文件合并入 master 文件,并输出为新文件。
 - ctnt_search.py 提供对目标内容的搜索功能,并将搜索结果导出文件。格式为:
 ```commandline
 filename &#124; pattern &#124; line_num &#124; content
diff --git a/google_trans.py b/google_trans.py
index 8d712a7..7e6e360 100644
--- a/google_trans.py
+++ b/google_trans.py
@@ -52,6 +52,7 @@
 
 class GoogleTrans:
 def __init__(self, from_lang, to_lang):
+ self.name = 'google'
 self.PROXY_URL = utils.read_config('appconf.ini')['google']['proxy']
 self.from_lang = from_lang
 self.to_lang = to_lang
diff --git a/main.py b/main.py
index b9560f0..b79552e 100644
--- a/main.py
+++ b/main.py
@@ -125,20 +125,18 @@ def batch_solve(self, line, batch_trans_lines, index):
 # 预处理 得到一个包含若干占位符+原文的res
 res = self.text_pre_solve(line, pattern='batch', index=index)
 
+ # debug模式下不调用API
 if self.mode == 'debug':
- # debug模式下不调用API
- zh = res
 return False
 else:
 # 加入批量翻译
 batch_trans_lines.append(res)
 return True
 def single_solve(self, line):
- if not self.no_need_trans(line):
+ if self.no_need_trans(line):
 return line
 # 预处理
 res = self.text_pre_solve(line)
-
 if self.mode == 'debug':
 # debug模式下不调用API
 zh = res
@@ -149,13 +147,10 @@ def single_solve(self, line):
 print('[API结果] ' + zh)
 # 计数器
 self.counter.incr()
-
 # 特殊字符还原
 rev_back = self.text_after_solve(zh)
-
 # 等待,防止频繁调用报错
 self.counter.wait()
-
 return rev_back
 
 # 还原声音占位符
@@ -170,7 +165,6 @@ def on_voice(self, line, pattern='single', index=0):
 line = cache + line
 self.voice_multi_cache[index] = ''
 return line
-
 # 去除声音占位符
 def off_voice(self, line, pattern='single', index=0):
 if pattern == 'single':
@@ -189,7 +183,6 @@ def off_voice(self, line, pattern='single', index=0):
 self.voice_multi_cache[index] = v
 line = line[rp + 1:]
 return line
- #todo self.voice_multi_cache 初始化
 def get_translator(self):
 use = utils.read_config('appconf.ini')['config']['use']
 if use == 'youdao':
@@ -230,6 +223,9 @@ def _init_token(self, lines, w_dict, idx):
 i = i+1
 return i
 
+ def _init_voice_cache(self, size):
+ for i in range(size):
+ self.voice_multi_cache.append('')
 
 def direct_translate(self, line):
 line = line.lower()
@@ -391,30 +387,39 @@ def has_zh(self, string):
 def zh_signal(self, ch):
 return '\u4e00' <= ch <= '\u9fff' - def fill(self, line, format_text, flag, fill_lines, trans_flag_lines): + def fill(self, origin_text, format_text, flag, fill_lines, trans_flag_lines): + # trans_flag_lines 用来记录每行是否需要翻译 trans_flag_lines.append(flag) + # fill_lines 用来记录每行的预处理结果 + # 如果flag为true,说明需要翻译,则加入 format_text if flag: fill_lines.append(format_text) + # 如果flag为false,说明不需要翻译,则加入原文 else: - fill_lines.append(line) + fill_lines.append(origin_text) # 批量翻译并写文件 def batch_convert(self, lines, filename, output_encoding): - # 待填补lines xxx{}yyy + if self.translator.name != 'google': + print('必须使用google才能使用批量翻译') + return [] + # 初始化 self.voice_multi_cache + self._init_voice_cache(len(lines)) + # 填充模板数组 fill_lines = [] # 是否要翻译的标志位数组 true/false trans_flag_lines = [] # 需要翻译的lines batch_trans_lines = [] - # 初始化 self.voice_multi_cache - for i in range(len(lines)): - self.voice_multi_cache.append('') - + print('开始批量翻译 len(lines):' + str(len(lines))) j = -1 for i in range(len(lines)): if i <= j: continue line = lines[i] + if not len(lines): + continue + # {} 作为占位符,用于填充待翻译文本,翻译好之后,替换到{} l = line.find('~') if l != -1: r = line.find('~', l + 1) @@ -431,9 +436,13 @@ def batch_convert(self, lines, filename, output_encoding): line[:l+1] + '{}', flag, fill_lines, trans_flag_lines) j = i+1 + # 找到下一个结束符~,期间将这些行的数据,进行处理,放入 fill_lines 中 while (lines[j].find('~') == -1): + if len(lines[j]) == 0: + j = j+1 + continue flag = self.batch_solve(lines[j], batch_trans_lines, j) - self.fill(line, + self.fill(lines[j], '{}', flag, fill_lines, trans_flag_lines) j = j+1 @@ -442,9 +451,11 @@ def batch_convert(self, lines, filename, output_encoding): self.fill(line, '{}' + lines[j][r:], flag, fill_lines, trans_flag_lines) - # 批量翻译结果 - batch_result = self.translator.batch_translate(batch_trans_lines) + if self.mode == 'debug': + batch_result = batch_trans_lines + else: + batch_result = self.translator.batch_translate(batch_trans_lines) next = 0 res = [] @@ -456,9 +467,11 @@ def batch_convert(self, lines, filename, output_encoding): # 特殊字符还原 text_rev = self.text_after_solve(batch_result[next], pattern='batch', index=i) res.append(l.format(text_rev)) + # print(f'字符串还原:{text_rev}') next = next+1 else: res.append(fill_lines[i]) + print('=' * 20) # 写文件 utils.write_file('', filename, res, output_encoding) return res @@ -512,7 +525,7 @@ def do_write_append(self, log, prefix, filename, lines, encoding, next_line_num) utils.write_line_in_append(prefix, filename, lines, encoding) log.writelogs(filename, next_line_num) -def main(): +def main(args): # solver solver = Solver() @@ -538,13 +551,21 @@ def main(): # 先写log记录 log.writelogs(file) lines = utils.read_file('tra/'+file, 'utf-8') - # 批量翻译 - solver.batch_convert(lines, file, 'utf-8') - # 逐行翻译 - # solver.convert('tra/'+file, 'utf-8') + if args.p == 'batch': + solver.batch_convert(lines, file, 'utf-8') + elif args.p == 'line': + # 逐行翻译 + solver.convert('tra/'+file, 'utf-8') + else: + print('参数错误') + break print('-'*30) log.done() if __name__ == '__main__': - main() + parser = argparse.ArgumentParser() + parser.add_argument('-p', type=str, default='', help='翻译模式 batch/line') + args = parser.parse_args() + + main(args) diff --git a/sep_and_combine.py b/sep_and_combine.py index 1e0f35d..c1d239f 100644 --- a/sep_and_combine.py +++ b/sep_and_combine.py @@ -34,7 +34,7 @@ def takeout_text(filename, start_line=0): return res -def seperate_to_files(lines): +def seperate_to_files(lines, page_size): line_cnt = 0 res = [] idx = 1 @@ -51,7 +51,7 @@ def seperate_to_files(lines): line_cnt += 1 r += l.count('~') - if line_cnt> 50 and r % 2 == 0 and l.count('~')> 0:
+ if line_cnt> page_size and r % 2 == 0 and l.count('~')> 0:
 analyse.append('[文件名:]' + str(idx)+'.tra' + ' [~个数:]' + str(r) + ' [文件行数:]' + str(len(res)))
 analyse.append('[from:]' + res[0])
 analyse.append('[to:]' + res[-1])
@@ -101,23 +101,31 @@ def manage(args):
 if args.s:
 # 文件切分
 start_line = args.f
- seperate_to_files(takeout_text('tra/dialog.tra', start_line))
+ seperate_to_files(takeout_text('tra/'+args.name, start_line), args.size)
 elif args.c:
 # 文件整合
-
 file_list = []
- # 遍历分割后的文件
- for i in range(1, args.up):
- # 拼装文件名
- file_list.append('output/sod_' + str(i) + '.tra')
- combine_to_file(file_list, 'dialog.tra')
+ # 说明默认整合全部文件
+ if args.down == 1 and args.up == -1:
+ file_list = utils.read_tras()
+ print(file_list)
+ else:
+ # 遍历分割后的文件
+ for i in range(args.down, args.up):
+ # 拼装文件名
+ file_list.append('output/dia_' + str(i) + '.tra')
+ combine_to_file(file_list, args.out)
 
 if __name__ == '__main__':
 parser = argparse.ArgumentParser()
+ parser.add_argument('-name', type=str, default='dialog.tra', help='文件名')
 parser.add_argument('-s', action='store_true', help='文件切分')
 parser.add_argument('-c', action='store_true', help='文件整合')
 parser.add_argument('-f', type=int, default=0, help='起始行数(从0开始)')
- parser.add_argument('-up', type=int, default=1, help='整合文件的文件id上界(必填)')
+ parser.add_argument('-down', type=int, default=1, help='整合文件的文件id下界(可选)')
+ parser.add_argument('-up', type=int, default=-1, help='整合文件的文件id上界(可选)')
+ parser.add_argument('-out', type=str, default='dialog.tra', help='文件整合后的文件名')
+ parser.add_argument('-size', type=int, default=100, help='每个文件的行数')
 args = parser.parse_args()
 
 manage(args)
diff --git a/simple_main.py b/simple_main.py
index cc17bb5..5b6943e 100644
--- a/simple_main.py
+++ b/simple_main.py
@@ -2,12 +2,11 @@
 import utils
 import time
 import readlogs
-
-# 为合并SoD 定制
+import argparse
 
 # 翻译单文件,但是是立即写入模式,翻译一行写入一行
 # 避免因为API调用失败导致整个文件翻译无效的问题
-def trans_and_write_append(filename, output_encoding, line_num=0):
+def trans_and_write_append(filename, output_encoding='utf-8', line_num=0):
 print('*' * 20)
 start_time = time.time()
 lines = utils.read_file('tra/' + filename)
@@ -17,23 +16,23 @@ def trans_and_write_append(filename, output_encoding, line_num=0):
 print("[执行时间]", end_time - start_time, "seconds")
 
 #翻译单文件
-def single_trans(filename, log, output_encoding):
+def single_trans(filename, log, output_encoding='utf-8', mode=''):
 print('*' * 20)
 start_time = time.time()
 # 先写log记录
 log.writelogs(filename)
- solver = main.Solver('')
+ solver = main.Solver(mode)
 lines = utils.read_file('tra/'+filename)
 res = solver.batch_convert(lines, filename, output_encoding)
 
 for r in res:
 print(r)
-
+ print('finish')
 end_time = time.time()
 print("[执行时间]", end_time - start_time, "seconds")
 
 # 根据区间翻译
-def range_trans(file_list, output_encoding):
+def range_trans(file_list, output_encoding='utf-8'):
 
 log = readlogs.ReadLogs()
 # 读取上次结束文件名
@@ -43,7 +42,9 @@ def range_trans(file_list, output_encoding):
 flg = False
 
 start_time = time.time()
+ file_list.sort()
 for file in file_list:
+ print(file)
 if not flg and lastfile != '' and file != lastfile:
 print('pass ' + file)
 continue
@@ -59,23 +60,59 @@ def range_trans(file_list, output_encoding):
 end_time = time.time()
 print("[总执行时间]", end_time - start_time, "seconds")
 
+def process(args):
+ if args.ed == -1 or args.st == -1:
+ exit()
+ file_list = []
+ for i in range(args.st, args.ed):
+ file_list.append('dia_' + str(i) + '.tra')
+ log = readlogs.ReadLogs()
 
+ for file in file_list:
+ if args.test:
+ mode = 'debug'
+ else:
+ mode = ''
+ single_trans(file, log, args.outfilecode, mode)
+ if mode == '':
+ waittime = 3 + main.Counter('')._1D10()
+ print('等待' + str(waittime) + '秒')
+ time.sleep(waittime)
 
-# if __name__ == '__main__':
-# # file = 'dia_12.tra'
-# # single_trans(file, log, 'gb18030')
-# #
-# file_list = []
-# for i in range(1, 9):
-# file_list.append('s'+str(i)+'.tra')
-# range_trans(file_list, 'utf-8')
+def process_list(file_list):
+ log = readlogs.ReadLogs()
+
+ for file in file_list:
+ single_trans(file, log)
+ waittime = 3 + main.Counter('')._1D10()
+ print('等待' + str(waittime) + '秒')
+ time.sleep(waittime)
+
+def generate_file_list():
+ # file_list = ['dia_7.tra','dia_22.tra','dia_23.tra','dia_25.tra','dia_26.tra','dia_40.tra','dia_41.tra','dia_45.tra','dia_49.tra','dia_323.tra','dia_324.tra','dia_329.tra','dia_331.tra','dia_333.tra','dia_335.tra','dia_336.tra','dia_339.tra']
+ # return file_list
+ return []
 
-# 临时1
 if __name__ == '__main__':
- file = 'le#inter.tra'
- log = readlogs.ReadLogs()
- single_trans(file, log, 'utf-8')
- log.done()
+ # file = 'dia_12.tra'
+ # single_trans(file, log, 'gb18030')
+ #
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-st', type=int, default=-1, help='起始文件号')
+ parser.add_argument('-ed', type=int, default=-1, help='终止文件号')
+ parser.add_argument('-outfilecode', type=str, default='utf-8', help='输出文件编码')
+ parser.add_argument('-test', action='store_true', default=False, help='测试模式')
+
+ args = parser.parse_args()
+ process(args)
+ # process_list(generate_file_list())
+
+# 临时1
+# if __name__ == '__main__':
+# file = 'le#inter.tra'
+# log = readlogs.ReadLogs()
+# single_trans(file, log, 'utf-8')
+# log.done()
 
 # # 临时2
 # if __name__ == '__main__':
diff --git a/tools/correctness_check.py b/tools/correctness_check.py
index b4d900f..7b04b96 100644
--- a/tools/correctness_check.py
+++ b/tools/correctness_check.py
@@ -10,6 +10,7 @@ def correctness_check(file_list):
 result_file = []
 result = []
 for file in file_list:
+ print(file)
 lines = utils.read_file(file)
 if len(lines) == 0:
 continue
@@ -43,7 +44,7 @@ def dispatcher(args):
 path_prefix = 'tra/'
 if args.p !='':
 path_prefix = args.p+'/'
- up_limit = 500
+ up_limit = 1000
 if args.up != 0:
 up_limit = args.up
 # 文件正确性校验
diff --git a/utils.py b/utils.py
index 606a002..02e5a8e 100644
--- a/utils.py
+++ b/utils.py
@@ -92,6 +92,21 @@ def write_file(prefix, filename, lines, encoding='utf-8'):
 for m in lines:
 f.write(m+'\n')
 
+# 以覆盖的方式写入
+def split_write_file(prefix, filename, lines, encoding='utf-8'):
+ print('写入文件:', filename)
+ root_dir = os.path.dirname(os.path.abspath(__file__))
+ path = root_dir + '/resource/' + NAMESPACE +'/tra/'
+ # 防止路径不存在
+ if not os.path.exists(path):
+ os.makedirs(path)
+ dir_file_path = path + '/' + prefix+filename
+ if not os.path.exists(dir_file_path):
+ open(dir_file_path, 'w').close()
+ with open(dir_file_path, 'w', encoding=encoding) as f:
+ for m in lines:
+ f.write(m+'\n')
+
 # 以追加的方式写入
 def write_line_in_append(prefix, filename, lines, encoding='utf-8'):
 print('写入文件:', filename)
diff --git a/youdao.py b/youdao.py
index 2b93489..d18db04 100644
--- a/youdao.py
+++ b/youdao.py
@@ -14,6 +14,7 @@ def __init__(self, from_lang, to_lang):
 appKey = appconf['youdao']['appKey'] # 应用id
 appSecret = appconf['youdao']['appSecret'] # 应用密钥
 
+ self.name = 'youdao'
 self.YOUDAO_URL = 'https://openapi.youdao.com/api/'
 self.APP_KEY = appKey # 应用id
 self.APP_SECRET = appSecret # 应用密钥
</div><div class="naked_ctrl">
<form action="/index.cgi/contrast" method="get" name="gate">
<p><a href="http://altstyle.alfasado.net">AltStyle</a> によって変換されたページ <a href="https://github.com/evalcony/TransMod2CN/compare/batch_trans...main.diff">(-&gt;オリジナル)</a>
/ <label>アドレス: <input type="text" name="naked_post_url" value="https://github.com/evalcony/TransMod2CN/compare/batch_trans...main.diff" size="22" /></label> <label>モード: <select name="naked_post_mode">
<option value="default">デフォルト</option>
<option value="speech">音声ブラウザ</option>
<option value="ruby">ルビ付き</option>
<option value="contrast" selected="selected">配色反転</option>
<option value="larger-text">文字拡大</option>
<option value="mobile">モバイル</option>
</select>
<input type="submit" value="表示" />
</p>
</form>
</div>