背景:看书的时候经常遇到英文pdf,没有合适的翻译软件可以快速翻译全书。这里提供一个解决方案。
Step 1
- 打开英文pdf
- CTRL+A全选文字
- CTRL+C复制
- 打开记事本
- CTRL+V复制
- 保存为data.txt
Step 2
写一个C++脚本
// ToolPdf2Html.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
//
#include <iostream>
#include <cstring>
#include <vector>
#include <unordered_map>
#include <fstream>
#include <iostream>
#include <sstream>
#include <windows.h>
//pdf->txt->html (edge translate)
static void ReadFileContentsByte(std::string filename, std::vector<char>& data)
{
std::fstream fin;
fin.open(filename, std::ios::in | std::ios::binary);
if (!fin.is_open())
{
return;
}
//const int LENGTH = 1000;
fin.seekg(0, std::ios::end);
long int size = fin.tellg();
fin.seekg(0, std::ios::beg);
data.resize(size, 0);
char temp;
long i = 0;
while ((temp = fin.get()) != EOF)
{
//str.push_back((char)temp);
if (i >= size) {
std::cout << i << "/" << size << std::endl;
exit(0);
}
data[i] = temp;
i++;
//std::cout << (byte)temp;
}
fin.close();
}
static void ReadFileContentsLines(std::string filename, std::vector<std::string>& data)
{
std::fstream fin;
fin.open(filename, std::ios::in | std::ios::binary);
if (!fin.is_open())
{
return;
}
std::string line;
while (std::getline(fin, line))
{
data.push_back(line);
}
fin.close();
}
static void WriteFileContentsByte(std::string filename, std::string& data)
{
std::fstream fout;
fout.open(filename, std::ios::out);
if (!fout.is_open())
{
std::cout << "no open file " << filename << std::endl;
return;
}
fout << data;
fout.close();
}
static std::string Number2Stri(int value)
{
std::string str;
std::stringstream ss;
ss << value;
ss >> str;
return str;
}
int main(int argc, char *argv[])
{
std::cout << "Hello World!\n";
std::string filepath = "data.txt";
if (argc > 1) {
filepath = argv[1];
}
std::cout << "filepath=" << filepath.c_str() << std::endl;
std::vector<std::string> data;
ReadFileContentsLines(filepath, data);
std::cout << "data.size=" << data.size() << std::endl;
//每100行分割成一个html
std::string htmlHead =
"<!DOCTYPE html>\n"
"<html>\n"
"<head>\n"
"<meta charset = \"utf-8\">\n"
"<title>The C++ Programming Language</title>\n"
"</head>\n"
"<body>\n";
std::string htmlEnd =
"</body> </html>";
std::string htmlPrevious =
"<br/><br/><a href=\"a.hmtl\">上一个</a>";
std::string htmlNext =
" <a href=\"a.hmtl\">下一个</a>";
int len = data.size();
std::string output = htmlHead;
WIN32_FIND_DATA findData;
HANDLE hFind = FindFirstFile(L"output\\*", &findData);
if (hFind == INVALID_HANDLE_VALUE) {
std::cout << "文件夹不存在" << std::endl;
if (CreateDirectory(L"output", NULL)) {
std::cout << "文件夹创建成功" << std::endl;
}
else {
std::cout << "文件夹创建失败,错误代码:" << GetLastError() << std::endl;
}
}
else {
FindClose(hFind);
std::cout << "文件夹存在" << std::endl;
}
const int singleLen = 50;
for (int i = 0; i < len; ++i) {
//std::cout << i << ":" << data[i] << std::endl;
if (i != 0 && i % singleLen == 0) {
{
//save
int index = i / singleLen;
//test
//if (index > 10) {
// break;
//}
if (index == 1) {
}
else {
output += "<br/><br/><a href=\"index" + Number2Stri((i - 1) / singleLen) + ".html\">Previous</a> ";
}
output += Number2Stri(i / singleLen);
if ((index + 1) * singleLen >= len) {
}
else {
output += " <a href=\"index" + Number2Stri(index + 1) + ".html\">Next</a>";
}
output += htmlEnd;
std::string filep = "output/index" + Number2Stri(index) + ".html";
std::cout << "write to " << filep.c_str() << std::endl;
// std::cout << "output to " << output.c_str() << std::endl;
WriteFileContentsByte(filep, output);
output = htmlHead;
}
}
output += data[i] + "<br/>";
}
std::cout << "finish." << std::endl;
}
- 脚本会读入data.txt
- 按行处理,每50行生成一个html
- 命令行运行脚本
ToolPdf2Html.exe data.txt
- 在output文件夹下生成一堆html
Step 3
用微软的Edge浏览器打开html,浏览器自动翻译英文
通过上一页
和下一页
翻页