C# WinForm程序 PDF文档分割代码实现_c# pdf 成尺寸切成pdf

作者：人工智能uu | 2024-08-23 02:08:19

踩

c# pdf 成尺寸切成pdf

C#使用itextsharp对PDF分割处理
程序运行界面：

在这里插入图片描述

PDF分割操作共有以下几个步骤:
1.运行文件夹里面的PDFTools.exe文件
2.点击浏览按钮选择需要分割的PDF文件
3.选择分割方式，可以分割成单页的文档，或者固定多少页一个文档，或者自定义每个文档的页数
4.点击分割后，新的文件会生成在原始PDF的路径

PDF分割功能说明
能快速方便的把一个PDF文件任意分割成你所设想的多个PDF文件，简单、高效；一键操作，快速、方便
可以把文件转换成单页的PDF，比如一个文件有20页，就可以分割成20个文件
也可以分割成固定页数一个文档，比如20页的文档分割成每5页一个文档，就是拆分成4个文档
当然页可以自定义页面分割，
比如 1-5,6-8 这样就是分割成3个文档，分别是第1页到第5页的内容合并到一个文档、第6页到第8页的内容合并到一个文档。
比如 1&3,3&10 这样就是分割成2个文档，分别是第1页和第3页的内容合并到一个文档,第3页和第10页内容合并到一个文档。
比如 1&2,1&2,3-5,14 这样就是分割成5个文档，分别是第1页和第2页的内容合并到一个文档,第1页和第2页内容合并到一个文档、第3-5页的内容合并到一个文档、第14页单独一个文档。

PDF文档分割关键性代码

using System;
using System.Collections.Generic;
using iTextSharp.text;
using iTextSharp.text.pdf;
using System.Text;

namespace PDFTools
{
    /// <summary>
    /// 文件名:PdfExtractorUtility/
    /// 文件功能描述:处理PDF文件/
    /// 版权所有:Copyright (C) EXT.AZHANG/
    /// 创建标识:2021.6.2/     /// 修改描述:/
    /// </summary>
    class PdfExtractorUtility
    {
        /// <summary> 
        /// 从已有PDF文件中拷贝指定的页码范围到一个新的pdf文件中 使用pdfCopyProvider.AddPage()方法
        /// </summary>
        /// <param name="sourcePdfPath">文件路径+文件名</param>
        public void SplitPDF(string sourcePdfPath, string outputPdfPath, int startPage, int endPage)
        {
            PdfReader reader = null;
            Document sourceDocument = null;
            PdfCopy pdfCopyProvider = null;
            PdfImportedPage importedPage = null;
            try
            {
                reader = new PdfReader(sourcePdfPath);
                sourceDocument = new Document(reader.GetPageSizeWithRotation(startPage)); pdfCopyProvider = new PdfCopy(sourceDocument, new System.IO.FileStream(outputPdfPath, System.IO.FileMode.Create));
                sourceDocument.Open();
                for (int i = startPage; i <= endPage; i++)
                {
                    importedPage = pdfCopyProvider.GetImportedPage(reader, i); pdfCopyProvider.AddPage(importedPage);
                }
                sourceDocument.Close();
                reader.Close();
            }
            catch (Exception ex) { throw ex; }
        }


        /// <summary> 
        /// 将PDF文件分割成单页
        /// </summary>
        /// <param name="sourcePdfPath">文件路径+文件名</param>
        public void Split2SinglePage(string sourcePdfPath)
        {
            PdfReader reader = null;
            try
            {
                string fileNameWithoutExtension = System.IO.Path.GetFileNameWithoutExtension(sourcePdfPath);
                string outputPdfFolder = System.IO.Path.GetDirectoryName(sourcePdfPath);
                reader = new PdfReader(sourcePdfPath);

                for (int i = 1; i <= reader.NumberOfPages; i++)
                {
                    PdfCopy pdfCopyProvider = null;
                    PdfImportedPage importedPage = null;
                    Document sourceDocument = null;
                    string outputPdfPath = outputPdfFolder + "\\" + fileNameWithoutExtension + "_" + i + ".pdf";
                    sourceDocument = new Document(reader.GetPageSizeWithRotation(i));
                    pdfCopyProvider = new PdfCopy(sourceDocument, new System.IO.FileStream(outputPdfPath, System.IO.FileMode.Create));
                    sourceDocument.Open();
                    importedPage = pdfCopyProvider.GetImportedPage(reader, i);
                    pdfCopyProvider.AddPage(importedPage);
                    sourceDocument.Close();
                }

                reader.Close();
            }
            catch (Exception ex) { throw ex; }
        }


        /// <summary> 
        /// 将PDF文件平均分割成多个文件，无法分尽，剩余页数就加到最后一个文档
        /// </summary>
        /// <param name="sourcePdfPath">文件路径+文件名</param>
        /// <param name="count">需要生成的文档数量</param>
        public void Split2AveragePage(string sourcePdfPath, int count)
        {
            PdfReader reader = null;
            try
            {
                string fileNameWithoutExtension = System.IO.Path.GetFileNameWithoutExtension(sourcePdfPath);
                string outputPdfFolder = System.IO.Path.GetDirectoryName(sourcePdfPath);
                reader = new PdfReader(sourcePdfPath);
                // int page = (reader.NumberOfPages / count);
                // 计算每个文档的页数，总是舍去小数
                int page = (int)Math.Floor((double)(reader.NumberOfPages) / (double)(count));
                int startPage = 1;
                int endPage = 1;

                LogUtil.WriteLog("每个文档的页数：" + page.ToString());

                for (int i = 1; i <= count; i++)
                {
                    string outputPdfPath = outputPdfFolder + "\\" + fileNameWithoutExtension + "_" + i + ".pdf"; ;

                    if (i == 1)
                    {
                        startPage = 1;
                        endPage = page;

                    }
                    else
                    {

                        startPage = endPage + 1;
                        endPage = startPage + page - 1;
                    }

                    if (startPage > reader.NumberOfPages)
                        break;

                    if (endPage > reader.NumberOfPages)
                        endPage = reader.NumberOfPages;

                    if (i == count)
                        endPage = reader.NumberOfPages;

                    LogUtil.WriteLog(outputPdfPath + " > " + startPage.ToString() + "-" + endPage.ToString());
                    SplitPDF(sourcePdfPath, outputPdfPath, startPage, endPage);

                }

                reader.Close();
            }
            catch (Exception ex) { throw ex; }
        }



        /// <summary> 
        /// 将PDF文件按文档固定页数割成多个文件
        /// </summary>
        /// <param name="sourcePdfPath">文件路径+文件名</param>
        /// <param name="page">每个文档页数</param>
        public void Split2Page(string sourcePdfPath, int page)
        {
            PdfReader reader = null;
            try
            {
                string fileNameWithoutExtension = System.IO.Path.GetFileNameWithoutExtension(sourcePdfPath);
                string outputPdfFolder = System.IO.Path.GetDirectoryName(sourcePdfPath);
                reader = new PdfReader(sourcePdfPath);
                // int page = (reader.NumberOfPages / count);
                // 计算按固定页数生成文档的数量 只要有小数都加1
                int count = (int)Math.Ceiling((double)(reader.NumberOfPages) / (double)(page));
                int startPage = 1;
                int endPage = 1;

                LogUtil.WriteLog("文档数量：" + count.ToString());

                for (int i = 1; i <= count; i++)
                {
                    string outputPdfPath = outputPdfFolder + "\\" + fileNameWithoutExtension + "_" + i + ".pdf"; ;

                    if (i == 1)
                    {
                        startPage = 1;
                        endPage = page;

                    }
                    else
                    {

                        startPage = endPage + 1;
                        endPage = endPage + page;
                    }

                    if (startPage > reader.NumberOfPages)
                        break;

                    if (endPage > reader.NumberOfPages)
                        endPage = reader.NumberOfPages;

                    if (i == count)
                        endPage = reader.NumberOfPages;

                    LogUtil.WriteLog(outputPdfPath + " > " + startPage.ToString() + "-" + endPage.ToString());
                    SplitPDF(sourcePdfPath, outputPdfPath, startPage, endPage);

                }

                reader.Close();
            }
            catch (Exception ex) { throw ex; }
        }


        /// <summary> 
        /// 从已有PDF文件中拷贝指定的页码范围到一个新的pdf文件中 使用pdfCopyProvider.AddPage()方法
        /// </summary>
        /// <param name="sourcePdfPath">文件路径+文件名</param>
        /// <param name="custpages">自定义的页数范围</param>
        public void SplitPDFCustPage(string sourcePdfPath, string custpages)
        {
            //  string[] strArray = custpages.Trim().Split(",");
            string[] strArray = custpages.Trim().Split(new Char[] { ',' });
            string fileNameWithoutExtension = System.IO.Path.GetFileNameWithoutExtension(sourcePdfPath);
            string outputPdfFolder = System.IO.Path.GetDirectoryName(sourcePdfPath);
            int startPage;
            int endPage;

            for (int i = 0; i < strArray.Length; i++)
            {

                LogUtil.WriteLog("自定义页面范围：" + strArray[i]);

                // 横杠-相连的页码，抽取连续的范围内的页码生成到一个文档
                if (strArray[i].Contains("-"))
                {
                  //  string[] array = strArray[i].Split("-");
                    string[] array = strArray[i].Split(new Char[] { '-' });
                    startPage = int.Parse(array[0]);
                    endPage = int.Parse(array[1]);
                    string outputPdfPath = outputPdfFolder + "\\" + fileNameWithoutExtension + " " + startPage + "-" + endPage + ".pdf";
                    LogUtil.WriteLog(outputPdfPath);
                    SplitPDF(sourcePdfPath, outputPdfPath, startPage, endPage);

                }
                // and &相连的页码，抽取指定页码生成到一个文档
                else if (strArray[i].Contains("&"))
                {
                 //   int[] intArray = Array.ConvertAll(strArray[i].Split("&"), int.Parse);
                    int[] intArray = Array.ConvertAll(strArray[i].Split(new Char[] { '&' }), int.Parse);
                    string pages = string.Join("&", intArray);
                    string outputPdfPath = outputPdfFolder + "\\" + fileNameWithoutExtension + " " + pages + ".pdf";
                    LogUtil.WriteLog(outputPdfPath);
                    SplitPDF2ExtractPages(sourcePdfPath, outputPdfPath, intArray);

                }
                else
                {
                    startPage = int.Parse(strArray[i]);
                    endPage = int.Parse(strArray[i]);
                    string outputPdfPath = outputPdfFolder + "\\" + fileNameWithoutExtension + " " + strArray[i] + ".pdf"; ;
                    LogUtil.WriteLog(outputPdfPath);
                    SplitPDF(sourcePdfPath, outputPdfPath, startPage, endPage);
                }
            }
        }

        /// <summary> 
        /// 将已有pdf文件中 不连续 的页拷贝至新的pdf文件中。其中需要拷贝的页码存于数组 int[] extractThesePages中
        /// </summary>
        /// <param name="sourcePdfPath">文件路径+文件名</param>
        /// <param name="extractThesePages">页码集合</param>
        /// <param name="outputPdfPath">文件路径+文件名</param>
        public void SplitPDF2ExtractPages(string sourcePdfPath, string outputPdfPath, int[] extractThesePages)
        {
            PdfReader reader = null;
            Document sourceDocument = null;
            PdfCopy pdfCopyProvider = null;
            PdfImportedPage importedPage = null;

            try
            {
                reader = new PdfReader(sourcePdfPath);
                sourceDocument = new Document(reader.GetPageSizeWithRotation(extractThesePages[0]));
                pdfCopyProvider = new PdfCopy(sourceDocument, new System.IO.FileStream(outputPdfPath, System.IO.FileMode.Create));
                sourceDocument.Open();
                foreach (int pageNumber in extractThesePages)
                {
                    importedPage = pdfCopyProvider.GetImportedPage(reader, pageNumber); pdfCopyProvider.AddPage(importedPage);
                }
                sourceDocument.Close();
                reader.Close();
            }
            catch (Exception ex)
            {
                throw ex;
            }
        }

    }
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280

代码开源 github https://github.com/gmaaa123/PDFTools

本文内容由网友自发贡献，转载请注明出处：https://www.wpsshop.cn/w/人工智能uu/article/detail/1018966