python爬虫调用谷歌翻译接口

2019年7月4日15:53:17

(¦3[▓▓] 晚安 谷歌翻译环境 Python 3.6 第三方库 Execjs (pip install PyExecJS ) 文件列表 同目录下的四个文件: - tool.py - google_translate.py - input.txt - __init__.py 主要有四个文件 1、tool.py (用来生成谷歌翻译url中的tk字段) 2、google_translate.py (用来读取中文txt文件,并翻译生成新的德语txt文件) 3、input.txt(需要翻译的txt中文文件) 4、__init__.py(该文件为空文件,用来impot HandleJS.py) 详细可以百度"python3 引用同目录py文件" 使用步骤 注:这里是中文文本转换为德语文本 1、同目录下创建四个文件,分别将下边代码粘贴上去 2、运行 python3 google_translate.py或 python google_translate.py生成output.txt 修改说明 本文件是中文翻译为德语的,如果要转换为其他语言 请修改 google_translate.py 文件里的 translate()函数的以下字段。 关于语言代码和国家的对应,请查看"国家语言代码对照表"文章。 如:中文 zh-CN 德语 de 英语 en old_language_code = "zh-CN" # 中文 new_language_code = "de" # 德语 # new_language_code = "en" # 英语

tool.py

注:用来生成谷歌翻译url中的tk字段 import execjs class Py4Js(): def __init__(self): self.ctx = execjs.compile(""" function TL(a) { var k = ""; var b = 406644; var b1 = 3293161072; var jd = "."; var $b = "+-a^+6"; var Zb = "+-3^+b+-f"; for (var e = [], f = 0, g = 0; g < a.length; g++) { var m = a.charCodeAt(g); 128 > m ? e[f++] = m : (2048 > m ? e[f++] = m >> 6 | 192 : (55296 == (m & 64512) && g + 1 < a.length && 56320 == (a.charCodeAt(g + 1) & 64512) ? (m = 65536 + ((m & 1023) << 10) + (a.charCodeAt(++g) & 1023), e[f++] = m >> 18 | 240, e[f++] = m >> 12 & 63 | 128) : e[f++] = m >> 12 | 224, e[f++] = m >> 6 & 63 | 128), e[f++] = m & 63 | 128) } a = b; for (f = 0; f < e.length; f++) a += e[f], a = RL(a, $b); a = RL(a, Zb); a ^= b1 || 0; 0 > a && (a = (a & 2147483647) + 2147483648); a %= 1E6; return a.toString() + jd + (a ^ b) }; function RL(a, b) { var t = "a"; var Yb = "+"; for (var c = 0; c < b.length - 2; c += 3) { var d = b.charAt(c + 2), d = d >= t ? d.charCodeAt(0) - 87 : Number(d), d = b.charAt(c + 1) == Yb ? a >>> d: a << d; a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d } return a } """) def getTk(self, text): return self.ctx.call("TL", text)

google_translate.py

注:用来读取中文txt文件,并翻译生成新的德语txt文件 # -*- coding: utf-8 -*- import urllib.request from tool import Py4Js def open_url(url): headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'} req = urllib.request.Request(url=url, headers=headers) response = urllib.request.urlopen(req) data = response.read().decode('utf-8') return data def translate(content, tk): if len(content) > 4891: print("翻译的长度超过限制!!!") return content = urllib.parse.quote(content) old_language_code = "zh-CN" # new_language_code = "en" # 英语 new_language_code = "de" # 德语 url = "https://translate.google.cn/translate_a/single?client=webapp&" \ "sl=%s&tl=%s&hl=en&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&clearbtn=1&otf=1&" \ "pc=1&ssel=3&tsel=3&kc=2&tk=%s&q=%s" % (old_language_code, new_language_code, tk, content) result = open_url(url) end = result.find("\",") if end > 4: print(result[4:end]) return result[4:end] def get_old_text_list(): key_str_list = list() with open("input.txt", "r") as fo: lines = fo.readlines() for line in lines: if "'" in line and ":" in line and "," in line: line_list = line.split("'") line_str = line_list[-2] if line_str != "React Admin": key_str_list.append(line_str) print(line_str) return key_str_list def get_new_text_list(key_str_list): js = Py4Js() value_str_list = list() key_value_dict = dict() for key in key_str_list: content = key if content == 'q!': break tk = js.getTk(content) value = translate(content, tk) value_str_list.append(value) key_value_dict[key] = value print(key_str_list) print(value_str_list) print(key_value_dict) return value_str_list, key_value_dict def output_new_text(key_str_list, value_str_list, key_value_dict): with open("input.txt", "r") as fo: lines = fo.readlines() with open("output.txt", "w") as fw: for line in lines: for key in key_str_list: if key in line: line = line.replace(key, key_value_dict[key]) fw.write(line) def main(): key_str_list = get_old_text_list() value_str_list,key_value_dict = get_old_text_list(key_str_list) output_new_text(key_str_list, value_str_list, key_value_dict) if __name__ == "__main__": main()

inpupt.txt

注:需要翻译的txt中文文件 export default { application: { name: 'React Admin', }, ajaxTip: { success: '成功', error: '失败', noAccess: '无权访问此资源', notFound: '访问资源不存在', serverBusy: '服务器繁忙', timeOut: '超时', }, menu: { menus: '菜单&权限', codeGenerator: '代码生成', home: '首页', document: '文档', page404: '404页面不存在', login: '登录', example: '示例', userCenter: '用户中心', users: '用户列表', userEdit: '用户编辑', roles: '角色列表', modifyPassword: '修改密码', setting: '设置', logout: '退出登录', ajax: 'ajax请求', antDesign: 'Ant Design 官网', google: '谷歌官网', component: '组件', tableRowDraggable: '表格行可拖拽', asyncSelect: '异步下拉', formElement: '表单元素', formItemLayout: '表单布局', formUtil: '表单相关工具', inputClear: '可清空Input', listPage: '列表页', modal: '弹框', noData: '暂无数据', operator: '操作', pagination: '分页组件', permission: '权限', popPrompt: '弹框输入', queryBar: '查询条', queryItem: '查询条件', tableAnimate: '表格动画高阶组件', tableDragColumn: '表头拖拽高阶组件', tableDragRow: '表格行可拖拽高阶组件', tableEditable: '可编辑表格', tableRightClick: '表格行右键高阶组件', toolBar: '工具条', userAvatar: '用户头像', }, login: { title: '欢迎登录', submit: '登录', userName: '用户名', password: '密码', userNameEmptyTip: '请输入用户名!', passwordEmptyTip: '请输入密码!', }, setting: { navigationLayout: '导航布局', topSideMenu: '顶部+左侧导航', topMenu: '顶部导航', sideMenu: '左侧导航', tabsSetting: 'Tab页导航设置', tabsShow: '显示Tab页导航', pageSetting: '页面设置', showHead: '显示头部', fixedHead: '头部固定', menuSetting: '菜单设置', keepMenuOpen: '保持菜单展开', keepPage: '保持页面状态', fullScreen: '全屏显示', exitFullScreen: '退出全屏', selectPrimaryColor: '选择主题颜色', }, tabs: { refresh: '刷新', refreshAll: '刷新全部', close: '关闭', closeOthers: '关闭其他', closeAll: '关闭所有', closeLeft: '关闭左侧', closeRight: '关闭右侧', }, errorPage: { needLogin: '您还未登录!', pageNotFound: '您访问的页面不存在!', redirectTo: '跳转到', orReturn: '或者返回', previousStep: '上一步', }, };

--

内容版权声明:除非注明,否则皆为本站原创文章。

转载注明出处:https://www.heiqu.com/wpfjfz.html