文章目录
- 1,代码结构
- 2,代码详解
- 2.1,将范围字符串转成list
- 2.2,获取pdf文件特定页
- 2.3,将pdf页list合并为pdf文件并保存
- 2.4,遍历所有要合并的文件,进行合并
- 2.5,给出要合并的pdf文件及范围,合并到指定文件
- 3,完整代码
- 4,执行结果
1,代码结构
import os
from PyPDF2 import PdfReader, PdfWriter
class PdfMerge:
def __init__(self, fileList, dstFile):
self.fileList = fileList
self.dstFile = dstFile
def readPdf(self, pdfFile, pageRange):
def writePdf(self, pageList):
def getRangeList(self, rangeStr):
def mergePdf(self):
def main():
if __name__ == "__main__":
main()
2,代码详解
2.1,将范围字符串转成list
如:
“0:5” 转成 [0,1,2,3,4]
“0,1,2,3,6,7” 转成 [0,1,2,3,6,7]
“2” 转成 [2]
def getRangeList(self, rangeStr):
rangeList = []
if rangeStr.find(':') >= 0:
r = rangeStr.split(':')
if len(r) != 2:
print("rangeStr[%s] split [:] failed!"%(rangeStr))
return rangeList
mi = int(r[0])
ma = int(r[1])
for i in range(mi, ma):
rangeList.append(i)
elif rangeStr.find(',') >= 0:
r = rangeStr.split(',')
mi = int(r[0])
ma = int(r[1])
for i in r:
rangeList.append(int(i))
elif rangeStr.isnumeric():
rangeList.append(int(rangeStr))
else:
print("rangeStr split failed! not find [:] and [,]")
return rangeList
2.2,获取pdf文件特定页
def readPdf(self, pdfFile, pageRange):
pageList = []
pdf_reader = PdfReader(pdfFile)
for i in range(len(pdf_reader.pages)):
if i in pageRange:
pageList.append(pdf_reader.pages[i])
return pageList
2.3,将pdf页list合并为pdf文件并保存
def writePdf(self, pageList):
pdf_writer = PdfWriter()
for page in pageList:
pdf_writer.add_page(page)
with open(self.dstFile, 'wb') as out:
pdf_writer.write(out)
2.4,遍历所有要合并的文件,进行合并
def mergePdf(self):
pageList = []
for pdf in self.fileList:
file = pdf["file"]
fileRange = self.getRangeList(pdf["range"])
print("file[{}] rangeStr[{}] rangeList{}".format(file, pdf["range"], fileRange))
pageList.extend(self.readPdf(file, fileRange))
self.writePdf(pageList)
2.5,给出要合并的pdf文件及范围,合并到指定文件
def main():
fileList = [{"file":"source/1_任务书.pdf", "range":"0:10"},{"file":"source/20230409074902162.pdf", "range":"0"}]
#fileList = [{"file":"source/1_任务书.pdf", "range":"0,1,5,6,7"},{"file":"source/20230409074902162.pdf", "range":"0"}]
fileProcess = PdfMerge(fileList, "source/任务书.pdf")
fileProcess.mergePdf()
3,完整代码
import os
from PyPDF2 import PdfReader, PdfWriter
class PdfMerge:
def __init__(self, fileList, dstFile):
self.fileList = fileList
self.dstFile = dstFile
def readPdf(self, pdfFile, pageRange):
pageList = []
pdf_reader = PdfReader(pdfFile)
for i in range(len(pdf_reader.pages)):
if i in pageRange:
pageList.append(pdf_reader.pages[i])
return pageList
def writePdf(self, pageList):
pdf_writer = PdfWriter()
for page in pageList:
pdf_writer.add_page(page)
with open(self.dstFile, 'wb') as out:
pdf_writer.write(out)
def getRangeList(self, rangeStr):
rangeList = []
if rangeStr.find(':') >= 0:
r = rangeStr.split(':')
if len(r) != 2:
print("rangeStr[%s] split [:] failed!"%(rangeStr))
return rangeList
mi = int(r[0])
ma = int(r[1])
for i in range(mi, ma):
rangeList.append(i)
elif rangeStr.find(',') >= 0:
r = rangeStr.split(',')
mi = int(r[0])
ma = int(r[1])
for i in r:
rangeList.append(int(i))
elif rangeStr.isnumeric():
rangeList.append(int(rangeStr))
else:
print("rangeStr split failed! not find [:] and [,]")
return rangeList
def mergePdf(self):
pageList = []
for pdf in self.fileList:
file = pdf["file"]
fileRange = self.getRangeList(pdf["range"])
print("file[{}] rangeStr[{}] rangeList{}".format(file, pdf["range"], fileRange))
pageList.extend(self.readPdf(file, fileRange))
self.writePdf(pageList)
def main():
fileList = [{"file":"source/1_任务书.pdf", "range":"0:10"},{"file":"source/20230409074902162.pdf", "range":"0"}]
# fileList = [{"file":"source/1_任务书.pdf", "range":"0,1,5,6,7"},{"file":"source/20230409074902162.pdf", "range":"0"}]
fileProcess = PdfMerge(fileList, "source/任务书.pdf")
fileProcess.mergePdf()
if __name__ == "__main__":
main()
4,执行结果