带标签的 PDF(也称为 PDF/UA)是一种包含底层标签树(类似于 HTML)的 PDF,用于定义文档的结构。这些标签可以帮助屏幕阅读器浏览整个文档而不会丢失任何信息。本文介绍如何使用Spire.PDF for .NET在 C# 和 VB.NET 中从头开始创建带标签的 PDF 。
Spire.PDF for .NET 是一款独立 PDF 控件,用于 .NET 程序中创建、编辑和操作 PDF 文档。使用 Spire.PDF 类库,开发人员可以新建一个 PDF 文档或者对现有的 PDF 文档进行处理,且无需安装 Adobe Acrobat。
E-iceblue 功能类库Spire 系列文档处理组件均由中国本土团队研发,不依赖第三方软件,不受其他国家的技术或法律法规限制,同时适配国产操作系统如中科方德、中标麒麟等,兼容国产文档处理软件 WPS(如 .wps/.et/.dps 等格式
Spire.PDF for.net下载
安装 Spire.PDF for .NET
首先,您需要将 Spire.PDF for.NET 包中包含的 DLL 文件作为引用添加到您的 .NET 项目中。
PM> Install-Package Spire.PDF
创建具有丰富元素的标签 PDF
要在带标签的 PDF 文档中添加结构元素,我们必须首先创建PdfTaggedContent类的对象。然后,使用PdfTaggedContent.StructureTreeRoot.AppendChildElement()方法将元素添加到根。以下是使用 Spire.PDF for .NET 向带标签的 PDF 添加“标题”元素的详细步骤。
- 创建一个PdfDocument对象并使用PdfDocument.Pages.Add()方法向其中添加一个页面。
- 创建PdfTaggedContent类的对象。
- 使用PdfTaggedContent.SetPdfUA1Identification()方法使文档符合 PDF/UA 识别。
- 使用PdfTaggedContent.StructureTreeRoot.AppendChildElement()方法将“文档”元素添加到文档的根目录。
- 使用PdfStructureElement.AppendChildElement()方法在“document”元素下添加“heading”元素。
- 使用PdfStructureElement.BeginMarkedContent()方法添加开始标签,指示标题元素的开始。
- 使用PdfPageBase.Canvas.DrawString()方法在页面上绘制标题文本。
- 使用PdfStructureElement.BeginMarkedContent()方法添加结束标签,这意味着标题元素在此结束。
- 使用PdfDocument.SaveToFile()方法将文档保存为 PDF 文件。
以下代码片段提供了一个示例,说明如何在 C# 和 VB.NET 中在标记的 PDF 文档中创建各种元素,包括文档、标题、段落、图形和表格。
【C#】
using Spire.Pdf; using Spire.Pdf.Graphics; using Spire.Pdf.Interchange.TaggedPdf; using Spire.Pdf.Tables; using System.Data; using System.Drawing; namespace CreatePDFUA { class Program { static void Main(string[] args) { //Create a PdfDocument object PdfDocument doc = new PdfDocument(); //Add a page PdfPageBase page = doc.Pages.Add(PdfPageSize.A4, new PdfMargins(20)); //Set tab order page.SetTabOrder(TabOrder.Structure); //Create an object of PdfTaggedContent class PdfTaggedContent taggedContent = new PdfTaggedContent(doc); //Set language and title for the document taggedContent.SetLanguage("en-US"); taggedContent.SetTitle("test"); //Set PDF/UA1 identification taggedContent.SetPdfUA1Identification(); //Create font and brush PdfTrueTypeFont font = new PdfTrueTypeFont(new Font("Times New Roman", 14), true); PdfSolidBrush brush = new PdfSolidBrush(Color.Black); //Add a "document" element PdfStructureElement document = taggedContent.StructureTreeRoot.AppendChildElement(PdfStandardStructTypes.Document); //Add a "heading" element PdfStructureElement heading1 = document.AppendChildElement(PdfStandardStructTypes.HeadingLevel1); heading1.BeginMarkedContent(page); string headingText = "What Is a Tagged PDF?"; page.Canvas.DrawString(headingText, font, brush, new PointF(0, 0)); heading1.EndMarkedContent(page); //Add a "paragraph" element PdfStructureElement paragraph = document.AppendChildElement(PdfStandardStructTypes.Paragraph); paragraph.BeginMarkedContent(page); string paragraphText = "“Tagged PDF” doesn’t seem like a life-changing term. But for some, it is. For people who are " + "blind or have low vision and use assistive technology (such as screen readers and connected Braille displays) to " + "access information, an untagged PDF means they are missing out on information contained in the document because assistive " + "technology cannot “read” untagged PDFs. Digital accessibility has opened up so many avenues to information that were once " + "closed to people with visual disabilities, but PDFs often get left out of the equation."; RectangleF rect = new RectangleF(0, 30, page.Canvas.ClientSize.Width, page.Canvas.ClientSize.Height); page.Canvas.DrawString(paragraphText, font, brush, rect); paragraph.EndMarkedContent(page); //Add a "figure" element to PdfStructureElement figure = document.AppendChildElement(PdfStandardStructTypes.Figure); figure.BeginMarkedContent(page); PdfImage image = PdfImage.FromFile(@"C:\Users\Administrator\Desktop\pdfua.png"); page.Canvas.DrawImage(image, new PointF(0, 150)); figure.EndMarkedContent(page); //Add a "table" element PdfStructureElement table = document.AppendChildElement(PdfStandardStructTypes.Table); table.BeginMarkedContent(page); PdfTable pdfTable = new PdfTable(); pdfTable.Style.DefaultStyle.Font = font; DataTable dataTable = new DataTable(); dataTable.Columns.Add("Name"); dataTable.Columns.Add("Age"); dataTable.Columns.Add("Sex"); dataTable.Rows.Add(new string[] { "John", "22", "Male" }); dataTable.Rows.Add(new string[] { "Katty", "25", "Female" }); pdfTable.DataSource = dataTable; pdfTable.Style.ShowHeader = true; pdfTable.Draw(page.Canvas, new PointF(0, 280), 300f); table.EndMarkedContent(page); //Save the document to file doc.SaveToFile("CreatePDFUA.pdf"); } } }
【VB.NET】
Imports Spire.Pdf Imports Spire.Pdf.Graphics Imports Spire.Pdf.Interchange.TaggedPdf Imports Spire.Pdf.Tables Imports System.Data Imports System.Drawing Namespace CreatePDFUA Class Program Shared Sub Main(ByVal args() As String) 'Create a PdfDocument object Dim doc As PdfDocument = New PdfDocument() 'Add a page Dim page As PdfPageBase = doc.Pages.Add(PdfPageSize.A4,New PdfMargins(20)) 'Set tab order page.SetTabOrder(TabOrder.Structure) 'Create an object of PdfTaggedContent class Dim taggedContent As PdfTaggedContent = New PdfTaggedContent(doc) 'Set language and title for the document taggedContent.SetLanguage("en-US") taggedContent.SetTitle("test") 'Set PDF/UA1 identification taggedContent.SetPdfUA1Identification() 'Create font and brush Dim font As PdfTrueTypeFont = New PdfTrueTypeFont(New Font("Times New Roman",14),True) Dim brush As PdfSolidBrush = New PdfSolidBrush(Color.Black) 'Add a "document" element Dim document As PdfStructureElement = taggedContent.StructureTreeRoot.AppendChildElement(PdfStandardStructTypes.Document) 'Add a "heading" element Dim heading1 As PdfStructureElement = document.AppendChildElement(PdfStandardStructTypes.HeadingLevel1) heading1.BeginMarkedContent(page) Dim headingText As String = "What Is a Tagged PDF?" page.Canvas.DrawString(headingText,font,brush,New PointF(0,0)) heading1.EndMarkedContent(page) 'Add a "paragraph" element Dim paragraph As PdfStructureElement = document.AppendChildElement(PdfStandardStructTypes.Paragraph) paragraph.BeginMarkedContent(page) String paragraphText = "“Tagged PDF” doesn’t seem like a life-changing term. But for some, it is. For people who are " + "blind or have low vision and use assistive technology (such as screen readers and connected Braille displays) to " + "access information, an untagged PDF means they are missing out on information contained in the document because assistive " + "technology cannot “read” untagged PDFs. Digital accessibility has opened up so many avenues to information that were once " + "closed to people with visual disabilities, but PDFs often get left out of the equation." Dim rect As RectangleF = New RectangleF(0,30,page.Canvas.ClientSize.Width,page.Canvas.ClientSize.Height) page.Canvas.DrawString(paragraphText, font, brush, rect) paragraph.EndMarkedContent(page) 'Add a "figure" element to Dim figure As PdfStructureElement = document.AppendChildElement(PdfStandardStructTypes.Figure) figure.BeginMarkedContent(page) Dim image As PdfImage = PdfImage.FromFile("C:\Users\Administrator\Desktop\pdfua.png") page.Canvas.DrawImage(image,New PointF(0,150)) figure.EndMarkedContent(page) 'Add a "table" element Dim table As PdfStructureElement = document.AppendChildElement(PdfStandardStructTypes.Table) table.BeginMarkedContent(page) Dim pdfTable As PdfTable = New PdfTable() pdfTable.Style.DefaultStyle.Font = font Dim dataTable As DataTable = New DataTable() dataTable.Columns.Add("Name") dataTable.Columns.Add("Age") dataTable.Columns.Add("Sex") Dim String() As dataTable.Rows.Add(New { "John", "22", "Male" } ) Dim String() As dataTable.Rows.Add(New { "Katty", "25", "Female" } ) pdfTable.DataSource = dataTable pdfTable.Style.ShowHeader = True pdfTable.Draw(page.Canvas,New PointF(0,280),300f) table.EndMarkedContent(page) 'Save the document to file doc.SaveToFile("CreatePDFUA.pdf") End Sub End Class End Namespace