1
xiaokongwu 2022-09-02 18:35:49 +08:00 via iPhone
Pdf 进行关键字搜索,然后加一层,用文字覆盖
|
2
xiaokongwu 2022-09-02 18:39:11 +08:00 via iPhone 1
@xiaokongwu Java 的话,itext spire aspose 都可以,不过是商业的 sdk
|
3
ll5270 2022-09-02 18:57:45 +08:00 1
这个我有经验
import io import os import PyPDF2 import httpx import pikepdf from app.general.exception import ValidationException def get_pdf_by_url(url): try: rsp = httpx.get(url) except Exception: raise ValidationException(msg="下载地址访问失败") raw_pdf_io = io.BytesIO(rsp.content) pdf = pikepdf.Pdf.open(raw_pdf_io) pike_pdf_io = io.BytesIO() pdf.save(pike_pdf_io) return PyPDF2.PdfFileReader(pike_pdf_io) def add_watermark(water_file, page_pdf): """ 将水印 pdf 与 pdf 的一页进行合并 """ pdfReader = PyPDF2.PdfFileReader(water_file) page_pdf.mergePage(pdfReader.getPage(0)) return page_pdf def execute_watermark_pdf(path, url): pdf_reader = get_pdf_by_url(url) pdf_writer = PyPDF2.PdfFileWriter() # 用于写 pdf # page_pdf = add_watermark("/Users/xx/Downloads/aa.pdf", pdf_reader.getPage(0)) for page in range(pdf_reader.numPages): if page == 0: page_pdf = add_watermark("/Users/xx/Downloads/aa.pdf", pdf_reader.getPage(page)) elif page == 5: page_pdf = add_watermark("/Users/xx/Downloads/aa6.pdf", pdf_reader.getPage(page)) else: page_pdf = pdf_reader.getPage(page) pdf_writer.addPage(page_pdf) new_pdf = io.BytesIO() pdf_writer.write(new_pdf) new_pdf.seek(0) with open(f"pdf/{path}.pdf", "wb") as f: f.write(new_pdf.getvalue()) from PyPDF2 import PdfFileReader, PdfFileWriter, PdfFileMerger from os import remove, listdir, mkdir from os.path import join, isdir, split, splitext, basename from pdf2image import convert_from_path from reportlab.lib.pagesizes import A4, landscape, portrait from reportlab.pdfgen import canvas # 把 pdf 文件拆成 jpg 图片,每页一张 def pdf2jpgs(path, file): # 路径和文件名 # 创建同名文件夹 mkdir("temp/" + path) # 转换图片,图片像素 dpi 为 480 print("pdf/" + file) images = convert_from_path("pdf/" + file, dpi = 100, fmt = 'JPEG', thread_count = 4,) # 保存图片 for index, image in enumerate(images): image.save('temp/{}/{}.jpg'.format(path,index)) # 把 jpg 图片合并为 pdf 文件 def merge_jpg2pdf(jpgpath): # 要合并的图片 jpg_files = [join(jpgpath, fn) for fn in listdir("temp/" + jpgpath) if fn.endswith('.jpg')] jpg_files.sort(key = lambda fn: int(splitext(basename(fn))[0])) result_pdf = PdfFileMerger() # 临时文件 temp_pdf = 'temp.pdf' # 依次转 pdf ,再合并 pdf for fn in jpg_files: # 转 pdf ,portrait 纵向页面,landscape 横向页面 c = canvas.Canvas(temp_pdf, pagesize = portrait(A4)) c.drawImage("temp/" + fn, 0 , 0, *portrait(A4)) c.save() # 合并 with open(temp_pdf, 'rb') as fp: pdf_reader = PdfFileReader(fp) result_pdf.append(pdf_reader) # 保存结果 result_pdf.write("result/" + jpgpath + '.pdf') result_pdf.close() # remove("temp/" + jpgpath) if __name__ == '__main__': data = [] for i in data: if not os.path.exists("result" + "/" + i["user_loan_id"] + ".pdf"): execute_watermark_pdf(i["user_loan_id"], i["url"]) # 转图片 pdf2jpgs(i["user_loan_id"], i["user_loan_id"] + ".pdf") # 图片合并成 pdf merge_jpg2pdf(i["user_loan_id"]) # break |
5
Have1R 2022-09-03 10:17:00 +08:00
直接在 Adobe Acrobat DC 里用密文功能,划哪黑哪
|