import chardet,re, json # 须安装第三方库,为获得文件编码
def get_jp_text(path):
with open(path, "rb") as f:
# 获得文件编码
full_data = f.read()
chaInfo = chardet.detect(full_data)
if chaInfo["encoding"].upper() in ["GB2312", "GBK"]:
chaInfo["encoding"] = "GB18030"
# 将日文存为列表
f.seek(0)
list_data = f.readlines()
jp_texts = []
for data in list_data:
text = data.decode(chaInfo["encoding"])
jp_texts.append(text.rstrip("\n").rstrip("\r"))
# 将jp文本转换为字典
"""
发现一个bug,用json会将相同的key去掉
但小说中有啊啊啊之类的相同段 所以用笨办法安全点
"""
with open("TransFile.json", "w", encoding="UTF-8") as f:
f.write("{\n")
for text in jp_texts:
if text != jp_texts[-1]:
f.write(f""" "{text}":"{text}",\n""")
else:
f.write(f""" "{text}":"{text}"\n""")
f.write("}")
def zh_novel(path):
with open(path, "r", encoding="utf-8") as f:
"""
这里原本也是用json实现的,不过相同键会被删除,改成正则吧
"""
data = f.readlines()
data = data[1:-1]
new_data = []
for text in data:
zh_text = re.search(r'".+"\:"(.+)",{0,1}$', text)
new_data.append(zh_text[1])
with open("中文翻译.txt", "w", encoding="utf-8") as f:
for data in new_data:
f.write(f"{data}\n")
if __name__ == "__main__":
# 将路径换成你的文件路径
get_jp_text(r"C:\人間失格.txt") # 将txt转为字典
# 将翻译后的字典路径替换下方TransFile.json,取消注释状态
# zh_novel(r"TransFile.json") # 将翻译后的字典转为txt