下版pdfを1ファイルに結合し、栞を追加する作業はよくあります。この際、ノンブルとタイトルとその階層情報あれば、pyMupdfライブラリで、簡単にできます!
import fitz
import openpyxl
def export_pdf_bookmark_to_excel(pdf_file, excel_format):
# 既存の栞を取得(今後の改訂編集に流用データになる)
doc = fitz.open(pdf_file)
bookmark = doc.get_toc() # [[level, title, page_num], ...]
doc.close()
#
wb = openpyxl.load_workbook(excel_format)
ws = wb.active
# from 3rd row
for i, x in enumerate(bookmark):
ws.cell(i + 3, 1).value = x[2] # page_num
ws.cell(i + 3, x[0] + 1).value = x[1]
wb.save(excel_format.replace(".xlsx", "_new.xlsx"))
def import_bookmark_from_excel_to_pdf(excel_file, pdf_file):
# 規定のエクセルから栞を取得してPDFに設定
bookmark = []
wb = openpyxl.load_workbook(excel_file, read_only=True)
ws = wb.active
max_row = ws.max_row
start_row = 3
for i in range(start_row, max_row + 1):
page_num = int(ws.cell(i, 1).value) # A列
for c in range(2, 12): # NOTE: B-K列
title = ws.cell(i, c).value
if title is not None:
level = c - 1
bookmark.append([level, title, page_num])
break
wb.close()
#
doc = fitz.open(pdf_file)
doc.set_toc(bookmark)
doc.save(pdf_file.replace(".pdf", "_new.pdf"))
# https://media.readthedocs.org/pdf/xlwings/stable/xlwings.pdf
pdf_file_with_bookmark = "docs-xlwings-org-en-stable.pdf"
excel_format = "bookmark.xlsx"
export_pdf_bookmark_to_excel(pdf_file_with_bookmark, excel_format)
#
pdf_file_blank = "blank.pdf" # しおりなし
excel_with_bookmark = "bookmark_new.xlsx"
import_bookmark_from_excel_to_pdf(excel_with_bookmark, pdf_file_blank)

