POM.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<!-- <version>3.2.1</version>-->
<version>2.3.9.RELEASE</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<groupId>com.jack</groupId>
<artifactId>jackDemo</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>jackDemo</name>
<description>jackDemo</description>
<properties>
<java.version>1.8</java.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-mongodb</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-devtools</artifactId>
<scope>runtime</scope>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.75</version>
</dependency>
<dependency>
<groupId>org.openpnp</groupId>
<artifactId>opencv</artifactId>
<version>4.5.3-4</version>
</dependency>
<!-- Apache POI for Excel files -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>5.2.3</version> <!-- 请检查并使用最新版本 -->
</dependency>
<!-- Apache POI dependencies (these may be included automatically by Maven, but it's good to be explicit) -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>5.2.3</version> <!-- 与poi-ooxml版本保持一致 -->
</dependency>
<!-- Apache Commons Collections (required by POI) -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-collections4</artifactId>
<version>4.4</version> <!-- 确保版本与你的项目兼容 -->
</dependency>
<!-- Apache Commons IO (optional, but useful for file handling) -->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.11.0</version> <!-- 确保版本与你的项目兼容 -->
</dependency>
<!-- PDFBox for reading PDF files -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.24</version>
</dependency>
<!-- docx4j for creating Word documents -->
<dependency>
<groupId>org.docx4j</groupId>
<artifactId>docx4j</artifactId>
<version>3.2.1</version>
</dependency>
<dependency>
<groupId>javax.xml.bind</groupId>
<artifactId>jaxb-api</artifactId>
<version>2.3.1</version>
</dependency>
<dependency>
<groupId>org.glassfish.jaxb</groupId>
<artifactId>jaxb-runtime</artifactId>
<version>2.3.1</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>
java 文件:
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart;
import org.docx4j.wml.Body;
import org.docx4j.wml.P;
import org.docx4j.wml.R;
import org.docx4j.wml.Text;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
public class PdfToWordConverter {
public static void main(String[] args) throws Exception{
String pdfFilePath = "D:\\word\\何以为父影响彼此一生的父子关系.pdf"; // 替换为你的PDF文件路径
String wordFilePath = "D:\\word\\何以为父影响彼此一生的父子关系.docx"; // 生成的Word文件路径
try {
// 读取PDF文件内容
String pdfText = extractTextFromPdf(pdfFilePath);
// 将内容写入Word文档
createWordDocument(wordFilePath, pdfText);
System.out.println("PDF to Word conversion completed successfully!");
} catch (IOException e) {
e.printStackTrace();
}
}
public static String extractTextFromPdf(String filePath) throws IOException {
PDDocument document = PDDocument.load(new FileInputStream(filePath));
PDFTextStripper pdfStripper = new PDFTextStripper();
return pdfStripper.getText(document);
}
public static void createWordDocument(String filePath, String content) throws Exception {
WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.createPackage();
MainDocumentPart mainDocumentPart = wordMLPackage.getMainDocumentPart();
Body body = mainDocumentPart.getContents().getBody();
// 将内容按段落分割并添加到Word文档中
String[] paragraphs = content.split("\\r?\\n");
for (String paragraph : paragraphs) {
P p = new P();
R r = new R();
Text text = new Text();
text.setParent(paragraph);
r.getContent().add(text);
p.getContent().add(r);
body.getContent().add(p);
}
// 保存Word文档
try (FileOutputStream out = new FileOutputStream(new File(filePath))) {
wordMLPackage.save(out);
}
}
}



















