1.解决pdf含javasprct脚本动作,这里是验证pdf内部事件。相关pdf文件下载:
测试pdf文件
相关包 iTextSharp 5.5.13.4
iTextSharp
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
private Boolean IsPdfSafe(Stream stream)
{
// PdfReader reader = new PdfReader(stream);
using (PdfReader reader = new PdfReader(stream))
{
for (int i = 1; i <= reader.NumberOfPages; i++)
{
PdfDictionary pageDic = reader.GetPageN(i);
PdfObject obj = PdfReader.GetPdfObject(pageDic.Get(PdfName.ANNOTS));
if (obj == null || !obj.IsArray())
continue;
PdfArray annots = (PdfArray)obj;
for (int j = 0; j < annots.Size; j++)
{
PdfDictionary annot = annots.GetAsDict(j);
PdfName subtype = annot.GetAsName(PdfName.SUBTYPE);
if (PdfName.LINK.Equals(subtype))
{
PdfDictionary actionDict = annot.GetAsDict(PdfName.A);
if (actionDict != null)
{
PdfObject action = actionDict.Get(PdfName.S);
if (action != null)
{
//可以判断具体哪些事件被拦截,目前已知事件GOTO,URI(连接跳转)
Console.WriteLine("Action found: " + action.ToString());
return false;
}
}
}
}
}
}
return true;
}
//应用的地方 写入缓存比较好,因为是非报错判断成功后可进行保存
using (MemoryStream memoryStream = new MemoryStream(buffer))
{
if (!IsPdfSafe(memoryStream))
{
//TODO 返回错误信息
}
}
以上就是判断脚本的相关代码。
2.下面介绍一下读取pdf内容的脚本内容,仅做参考。
for (int i = 1; i <= reader.NumberOfPages; i++)
{
PdfDictionary pageDict = reader.GetPageN(i);
PdfDictionary resourcesDict = pageDict.GetAsDict(PdfName.RESOURCES);
// PdfDictionary ACTION = resourcesDict.GetAsDict(PdfName.ACTION);
// PdfDictionary actionDict = javascriptDict.GetAsDict(PdfName.JAVASCRIPT);
if (resourcesDict != null)
{
PdfDictionary javascriptDict = resourcesDict.GetAsDict(PdfName.JS);
if (javascriptDict != null)
{
PdfDictionary actionDict = javascriptDict.GetAsDict(PdfName.JAVASCRIPT);
if (actionDict != null)
{
return false;
}
}
}
}
除了上面的iTextSharp ,还有itext7也是做pdf相关处理的。iTextSharp 已经停止更新两年了,如果要开发建议用itext7,上述代码进攻参考,因为版本不一样,调用接口也会不一样,开发时候请查相关api具体内容。
代码地址:https://github.com/itext/itextsharp
api地址:https://api.itextpdf.com/iText/dotnet/
3.如何制作带脚本pdf ,请参考下面博文。
https://blog.csdn.net/weixin_50464560/article/details/123841755?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522172259109316800188565451%2522%252C%2522scm%2522%253A%252220140713.130102334.pc%255Fblog.%2522%257D&request_id=172259109316800188565451&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2blogfirst_rank_ecpm_v1~rank_v31_ecpm-5-123841755-null-null.nonecase&utm_term=pdf%20xss&spm=1018.2226.3001.4450