grpc 之 word2pdf使用

​ 做一个word转pdf的服务,采用grpc,使用libreoffice命令。

1.构建libreoffice镜像 FROM python:3.6 ENV TZ=Asia/Shanghai RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN cd /etc/apt \ && mv sources.list sources.list.bak \ && echo "deb stretch main non-free contrib \ deb-src stretch main non-free contrib \ deb stretch/updates main \ deb-src stretch/updates main \ deb stretch-updates main non-free contrib \ deb-src stretch-updates main non-free contrib \ deb stretch-backports main non-free contrib \ deb-src stretch-backports main non-free contrib" > sources.list RUN apt-get update RUN apt-get install -y libreoffice COPY ./ /root/ RUN mv /root/simsun.ttc /usr/share/fonts && mv /root/simhei.ttf /usr/share/fonts && cd /usr/share/fonts && fc-cache -fv # docker build -t libreoffice .

采用python3.6镜像

使用阿里源

安装libreoffice

解决中文乱码 加入中文字体

2.grpc服务端、客户端

创建proto配置文件 然后编译

服务端与客户端 采用二进制 数据进行传输

服务端

#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2019/7/9 0009 16:41 # @File : word2pdf_server_main.py # @author : dfkai # @Software: PyCharm # python -m grpc_tools.protoc -I. --python_out=. --grpc_python_out=. ./data.proto import os import pathlib import time import traceback import uuid from concurrent import futures import grpc from proto_py import word2pdf_pb2, word2pdf_pb2_grpc _ONE_DAY_IN_SECONDS = 60 * 60 * 24 _HOST = os.environ.get("HOSTNAME", "localhost") _PORT = '8080' class FormatData(word2pdf_pb2_grpc.FormatDataServicer): def DoFormat(self, request, context): """ proto 定义方法 :param request: :param context: :return: """ data = request.text doc_path, pdf_path, pdf_file_path = self.get_doc_pdf_path() with open(doc_path, "wb") as f: f.write(data) if self.word2pdf_linux(doc_path, pdf_path): try: with open(pdf_file_path, "rb") as f: pdf_data = f.read() except: traceback.format_exc() else: pdf_data = b"fail" return word2pdf_pb2.Data(text=pdf_data) def get_doc_pdf_path(self): """ 获取文件路径 :return: """ baseDir = os.getcwd() p = pathlib.Path(baseDir) u_name = str(uuid.uuid4()).replace("-", "") doc_name = u_name + ".docx" pdf_name = u_name + ".pdf" pdf_path = p / f"filepath/pdf/" doc_path = p / f"filepath/doc/{doc_name}" pdf_file_path = p / f"filepath/pdf/{pdf_name}" print(doc_path, pdf_path, pdf_file_path) return rf"{doc_path}", rf"{pdf_path}", rf"{pdf_file_path}" def word2pdf_win(self, doc_path, pdf_path): """ windows 生成 :param doc_path: :param pdf_path: :return: """ from win32com import client import pythoncom pythoncom.CoInitialize() # word = client.Dispatch("Word.Application") word = client.DispatchEx("Word.Application") worddoc = word.Documents.Open(doc_path) try: worddoc.SaveAs(pdf_path, FileFormat=17) except Exception as e: print(e) return False finally: worddoc.Close() return True def word2pdf_linux(self, doc_path, pdf_path): """ linux 生成 pdf ,利用 libreoffice 命令 :param doc_path: :param pdf_path: :return: """ try: os.system(f"soffice --headless --invisible --convert-to pdf {doc_path} --outdir {pdf_path} ") except: traceback.format_exc() return False return True def serve(): """ rpc 服务 :return: """ grpcServer = grpc.server(futures.ThreadPoolExecutor(max_workers=4)) word2pdf_pb2_grpc.add_FormatDataServicer_to_server(FormatData(), grpcServer) grpcServer.add_insecure_port(_HOST + ':' + _PORT) grpcServer.start() try: while True: time.sleep(_ONE_DAY_IN_SECONDS) except KeyboardInterrupt: grpcServer.stop(0) if __name__ == '__main__': serve()

客户端

#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2019/7/9 0009 16:40 # @File : word2pdf_client_main.py # @author : dfkai # @Software: PyCharm import traceback import grpc from proto_py import word2pdf_pb2, word2pdf_pb2_grpc _HOST = 'localhost' _PORT = '8080' def run(): file_name = "test" doc_name = file_name + '.doc' conn = grpc.insecure_channel(_HOST + ':' + _PORT) client = word2pdf_pb2_grpc.FormatDataStub(channel=conn) with open(doc_name, "rb") as f: data = f.read() response = client.DoFormat(word2pdf_pb2.Data(text=data)) if response.text == b"fail": # 发送消息 生成失败 pass else: pdf_name = file_name + f'.pdf' try: with open(pdf_name, "wb") as f: f.write(response.text) except: traceback.format_exc() # 发送消息 生成失败 else: # 发送消息 生成成功 pass if __name__ == '__main__': import time beg = time.time() run() end = time.time() print(end - beg)

proto配置文件

syntax = "proto3"; package example; service FormatData { rpc DoFormat(Data) returns (Data){} } message Data { bytes text = 1; }

进入文件目录,构建命令:python -m grpc_tools.protoc -I. --python_out=./proto_py/ --grpc_python_out=./proto_py/ ./proto/word2pdf.proto

3.构建rpc服务端镜像 FROM libreoffice ENV TZ=Asia/Shanghai RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone COPY ./ /root/ WORKDIR /root/word2pdfRPC RUN pip3 install -i https://mirrors.aliyun.com/pypi/simple/ -r requirments.txt EXPOSE 8080 CMD python server_main.py # docker build -t word2pdf . # docker run -d -p 8080:8080 -v /root/data/word2pdf/:/root/word2pdfRPC/filepath/ --name word2pdf word2pdf

reuirements.txt

futures==3.1.1 grpcio==1.22.0 grpcio-tools==1.22.0 protobuf==3.8.0

参考、推荐:

protobuf和thrift对比

Language Guide (proto3)

内容版权声明:除非注明,否则皆为本站原创文章。

转载注明出处:https://www.heiqu.com/wssswf.html