Example: fast split into single pages

This is faster than using qpdf --pages to do it.
This commit is contained in:
Jay Berkenbilt 2014-06-07 13:04:30 -04:00
parent 9f8aba1db7
commit 0b2e9cb168
7 changed files with 195 additions and 1 deletions

View File

@ -1,5 +1,8 @@
2014-06-07 Jay Berkenbilt <ejb@ql.org>
* New example program: pdf-split-pages: efficiently split PDF
files into individual pages.
* Bug fix: don't fail on files that contain streams where /Filter
or /DecodeParms references a stream. Before, qpdf would try to
convert these to direct objects, which would fail because of the

View File

@ -5,7 +5,8 @@ BINS_examples = \
pdf-double-page-size \
pdf-invert-images \
pdf-create \
pdf-parse-content
pdf-parse-content \
pdf-split-pages
CBINS_examples = pdf-linearize
TARGETS_examples = $(foreach B,$(BINS_examples) $(CBINS_examples),examples/$(OUTPUT_DIR)/$(call binname,$(B)))

View File

@ -0,0 +1,77 @@
//
// This is a stand-alone example of splitting a PDF into individual
// pages. It is much faster than using the qpdf command-line tool to
// split into separate files per page.
//
#include <qpdf/QPDF.hh>
#include <qpdf/QPDFWriter.hh>
#include <qpdf/QUtil.hh>
#include <string>
#include <iostream>
#include <cstdlib>
static bool static_id = false;
static void process(char const* whoami,
char const* infile,
std::string outprefix)
{
QPDF inpdf;
inpdf.processFile(infile);
std::vector<QPDFObjectHandle> const& pages = inpdf.getAllPages();
int pageno_len = QUtil::int_to_string(pages.size()).length();
int pageno = 0;
for (std::vector<QPDFObjectHandle>::const_iterator iter = pages.begin();
iter != pages.end(); ++iter)
{
QPDFObjectHandle page = *iter;
std::string outfile =
outprefix + QUtil::int_to_string(++pageno, pageno_len) + ".pdf";
QPDF outpdf;
outpdf.emptyPDF();
outpdf.addPage(page, false);
QPDFWriter outpdfw(outpdf, outfile.c_str());
if (static_id)
{
// For the test suite, uncompress streams and use static
// IDs.
outpdfw.setStaticID(true);
outpdfw.setStreamDataMode(qpdf_s_uncompress);
}
outpdfw.write();
}
}
int main(int argc, char* argv[])
{
char* whoami = QUtil::getWhoami(argv[0]);
// For libtool's sake....
if (strncmp(whoami, "lt-", 3) == 0)
{
whoami += 3;
}
// For test suite
if ((argc > 1) && (strcmp(argv[1], " --static-id") == 0))
{
static_id = true;
--argc;
++argv;
}
if (argc != 3)
{
std::cerr << "Usage: " << whoami << " infile outprefix" << std::endl;
}
try
{
process(whoami, argv[1], argv[2]);
}
catch (std::exception e)
{
std::cerr << whoami << ": exception: " << e.what() << std::endl;
return 2;
}
return 0;
}

View File

@ -0,0 +1,33 @@
#!/usr/bin/env perl
require 5.008;
use warnings;
use strict;
chdir("pdf-split-pages");
require TestDriver;
my $td = new TestDriver('pdf-split-pages');
cleanup();
$td->runtest("split",
{$td->COMMAND => "pdf-split-pages ' --static-id' in.pdf out"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check page 1",
{$td->FILE => "out1.pdf"},
{$td->FILE => "exp1.pdf"});
$td->runtest("check page 2",
{$td->FILE => "out2.pdf"},
{$td->FILE => "exp2.pdf"});
cleanup();
$td->report(3);
sub cleanup
{
unlink (<out?.pdf>);
}

View File

@ -0,0 +1,40 @@
%PDF-1.3
%¿÷¢þ
1 0 obj
<< /Pages 2 0 R /Type /Catalog >>
endobj
2 0 obj
<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
endobj
3 0 obj
<< /Contents 4 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 5 0 R >> /ProcSet 6 0 R >> /Type /Page >>
endobj
4 0 obj
<< /Length 44 >>
stream
BT
/F1 24 Tf
72 720 Td
(Page 1) Tj
ET
endstream
endobj
5 0 obj
<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
endobj
6 0 obj
[ /PDF /Text ]
endobj
xref
0 7
0000000000 65535 f
0000000015 00000 n
0000000064 00000 n
0000000123 00000 n
0000000266 00000 n
0000000359 00000 n
0000000466 00000 n
trailer << /Root 1 0 R /Size 7 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >>
startxref
496
%%EOF

View File

@ -0,0 +1,40 @@
%PDF-1.3
%¿÷¢þ
1 0 obj
<< /Pages 2 0 R /Type /Catalog >>
endobj
2 0 obj
<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
endobj
3 0 obj
<< /Contents 4 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 5 0 R >> /ProcSet 6 0 R >> /Type /Page >>
endobj
4 0 obj
<< /Length 44 >>
stream
BT
/F1 24 Tf
72 720 Td
(Page 2) Tj
ET
endstream
endobj
5 0 obj
<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
endobj
6 0 obj
[ /PDF /Text ]
endobj
xref
0 7
0000000000 65535 f
0000000015 00000 n
0000000064 00000 n
0000000123 00000 n
0000000266 00000 n
0000000359 00000 n
0000000466 00000 n
trailer << /Root 1 0 R /Size 7 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >>
startxref
496
%%EOF

Binary file not shown.