 |
|
9
zdl0929 Aug 14, 2024
找了半天没合适的,然后 gpt 半小时搞定😂 ------ # 读取文件夹中的所有 word 文件,把每一个转换为图像文件,再将图像文件合并到一个同名的 pdf 文件中 import os from docx2pdf import convert from pdf2image import convert_from_path import img2pdf import shutil
def word_to_pdf(word_file, pdf_file): convert(word_file, pdf_file)
def pdf_to_images(pdf_file, image_prefix): images = convert_from_path(pdf_file) # 如果 image_prefix 文件夹不存在创建文件夹 os.makedirs(os.path.join("imagetmp", image_prefix), exist_ok=True) image_paths = [] for i, image in enumerate(images): image_path = os.path.join("imagetmp/"+ image_prefix, f'page_{i + 1}.png') image.save(image_path, 'PNG') image_paths.append(image_path) return image_paths def images_to_pdf(images, pdf_file): with open(pdf_file, "wb") as f: f.write(img2pdf.convert([i for i in images if i.endswith(".png")]))
def convert_word_files_to_pdf(source_directory, target_directory): for root, dirs, files in os.walk(source_directory): for file in files: if file.endswith(".docx"): source_file = os.path.join(root, file) pdf_file = os.path.join(root, file.replace(".docx", ".pdf")) image_pdf_file = os.path.join(root, file.replace(".docx", ".pdf")) word_to_pdf(source_file, pdf_file) images = pdf_to_images(pdf_file, file.replace(".docx", "")) images_to_pdf(images, image_pdf_file) # os.remove(pdf_file) target_dir = root.replace(source_directory, target_directory) os.makedirs(target_dir, exist_ok=True) shutil.move(image_pdf_file, target_dir)
convert_word_files_to_pdf(source_dir, dist_dir)
|