Support page collation with --collate (fixes #259)

This commit is contained in:
Jay Berkenbilt 2019-01-04 15:12:31 -05:00
parent 16fd6e64f9
commit 7b6ab900dc
6 changed files with 1049 additions and 0 deletions

View File

@ -1,5 +1,9 @@
2019-01-04 Jay Berkenbilt <ejb@ql.org>
* Add new option --collate. When specified, the semantics of
--pages change from concatenation to collation. See the manual for
a more detailed discussion. Fixes #259.
* Add new method QPDFWriter::getFinalVersion, which returns the
PDF version that will ultimately be written to the final file. See
comments in QPDFWriter.hh for some restrictions on its use. Fixes

View File

@ -124,6 +124,7 @@ struct Options
show_filtered_stream_data(false),
show_pages(false),
show_page_images(false),
collate(false),
json(false),
check(false),
require_outfile(true),
@ -202,6 +203,7 @@ struct Options
bool show_filtered_stream_data;
bool show_pages;
bool show_page_images;
bool collate;
bool json;
std::set<std::string> json_keys;
std::set<std::string> json_objects;
@ -216,6 +218,7 @@ struct Options
struct QPDFPageData
{
QPDFPageData(std::string const& filename, QPDF* qpdf, char const* range);
QPDFPageData(QPDFPageData const& other, int page);
std::string filename;
QPDF* qpdf;
@ -557,6 +560,7 @@ class ArgParser
void argEncryptionFilePassword(char* parameter);
void argPages();
void argRotate(char* parameter);
void argCollate();
void argStreamData(char* parameter);
void argCompressStreams(char* parameter);
void argDecodeLevel(char* parameter);
@ -743,6 +747,7 @@ ArgParser::initOptionTable()
&ArgParser::argRotate, "[+|-]angle:page-range");
char const* stream_data_choices[] =
{"compress", "preserve", "uncompress", 0};
(*t)["collate"] = oe_bare(&ArgParser::argCollate);
(*t)["stream-data"] = oe_requiredChoices(
&ArgParser::argStreamData, stream_data_choices);
(*t)["compress-streams"] = oe_requiredChoices(
@ -1062,6 +1067,12 @@ ArgParser::argEncryptionFilePassword(char* parameter)
o.encryption_file_password = parameter;
}
void
ArgParser::argCollate()
{
o.collate = true;
}
void
ArgParser::argPages()
{
@ -1711,6 +1722,8 @@ Basic Options\n\
--decrypt remove any encryption on the file\n\
--password-is-hex-key treat primary password option as a hex-encoded key\n\
--pages options -- select specific pages from one or more files\n\
--collate causes files specified in --pages to be collated\n\
rather than concatenated\n\
--rotate=[+|-]angle[:page-range]\n\
rotate each specified page 90, 180, or 270 degrees;\n\
rotate all pages if no page range is given\n\
@ -1854,6 +1867,14 @@ If the page range is omitted, the range of 1-z is assumed. qpdf decides\n\
that the page range is omitted if the range argument is either -- or a\n\
valid file name and not a valid range.\n\
\n\
The usual behavior of --pages is to add all pages from the first file,\n\
then all pages from the second file, and so on. If the --collate option\n\
is specified, then pages are collated instead. In other words, qpdf takes\n\
the first page from the first file, the first page from the second file,\n\
and so on until it runs out of files; then it takes the second page from\n\
each file, etc. When a file runs out of pages, it is skipped until all\n\
specified pages are taken from all files.\n\
\n\
See the manual for examples and a discussion of additional subtleties.\n\
\n\
\n\
@ -2225,6 +2246,14 @@ QPDFPageData::QPDFPageData(std::string const& filename,
}
}
QPDFPageData::QPDFPageData(QPDFPageData const& other, int page) :
filename(other.filename),
qpdf(other.qpdf),
orig_pages(other.orig_pages)
{
this->selected_pages.push_back(page);
}
static void parse_version(std::string const& full_version_string,
std::string& version, int& extension_level)
{
@ -3553,6 +3582,35 @@ static void handle_page_specs(QPDF& pdf, Options& o,
dh.removePage(*iter);
}
if (o.collate && (parsed_specs.size() > 1))
{
// Collate the pages by selecting one page from each spec in
// order. When a spec runs out of pages, stop selecting from
// it.
std::vector<QPDFPageData> new_parsed_specs;
size_t nspecs = parsed_specs.size();
size_t cur_page = 0;
bool got_pages = true;
while (got_pages)
{
got_pages = false;
for (size_t i = 0; i < nspecs; ++i)
{
QPDFPageData& page_data = parsed_specs.at(i);
if (cur_page < page_data.selected_pages.size())
{
got_pages = true;
new_parsed_specs.push_back(
QPDFPageData(
page_data,
page_data.selected_pages.at(cur_page)));
}
}
++cur_page;
}
parsed_specs = new_parsed_specs;
}
// Add all the pages from all the files in the order specified.
// Keep track of any pages from the original file that we are
// selecting.

View File

@ -1737,6 +1737,28 @@ $td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "shared-images-errors-1-3-out.pdf"});
show_ntests();
# ----------
$td->notify("--- Collating ---");
my @collate = (
["three-files", "collate-odd",
"collate-odd.pdf 1-5 minimal.pdf collate-even.pdf 7-1"],
);
$n_tests += 2 * scalar(@collate);
foreach my $d (@collate)
{
my ($description, $first, $args) = @$d;
$td->runtest("collate pages: $description",
{$td->COMMAND =>
"qpdf --qdf --static-id --collate $first.pdf" .
" --pages $args -- a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "$description-collate-out.pdf"});
}
show_ntests();
# ----------
$td->notify("--- PDF From Scratch ---");

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,965 @@
%PDF-1.3
%¿÷¢þ
%QDF-1.0
%% Original object ID: 1 0
1 0 obj
<<
/Outlines 2 0 R
/PageLabels <<
/Nums [
0
<<
/P ()
/St 1
>>
1
<<
/St 2
>>
2
<<
/S /D
/St 3
>>
3
<<
/S /r
/St 1
>>
4
<<
/P ()
/St 1
>>
5
<<
/S /r
/St 3
>>
6
<<
/S /r
/St 6
>>
7
<<
/S /r
/St 5
>>
8
<<
/P ()
/St 1
>>
10
<<
/S /r
/St 4
>>
11
<<
/S /r
/St 2
>>
12
<<
/P ()
/St 2
>>
]
>>
/PageMode /UseOutlines
/Pages 3 0 R
/Type /Catalog
>>
endobj
%% Original object ID: 2 0
2 0 obj
<<
/Count 6
/First 4 0 R
/Last 5 0 R
/Type /Outlines
>>
endobj
%% Original object ID: 3 0
3 0 obj
<<
/Count 13
/Kids [
6 0 R
7 0 R
8 0 R
9 0 R
10 0 R
11 0 R
12 0 R
13 0 R
14 0 R
15 0 R
16 0 R
17 0 R
18 0 R
]
/Type /Pages
>>
endobj
%% Original object ID: 4 0
4 0 obj
<<
/Count 4
/Dest [
null
/XYZ
null
null
null
]
/First 19 0 R
/Last 20 0 R
/Next 5 0 R
/Parent 2 0 R
/Title (Isís 1 -> 5: /XYZ null null null)
/Type /Outline
>>
endobj
%% Original object ID: 5 0
5 0 obj
<<
/Dest [
null
/XYZ
66
756
3
]
/Parent 2 0 R
/Prev 4 0 R
/Title (Trepak 2 -> 15: /XYZ 66 756 3)
/Type /Outline
>>
endobj
%% Page 1
%% Original object ID: 6 0
6 0 obj
<<
/Contents 21 0 R
/MediaBox [
0
0
612
792
]
/Parent 3 0 R
/Resources <<
/Font <<
/F1 23 0 R
>>
/ProcSet 24 0 R
>>
/Type /Page
>>
endobj
%% Page 2
%% Original object ID: 47 0
7 0 obj
<<
/Contents 25 0 R
/MediaBox [
0
0
612
792
]
/Parent 3 0 R
/Resources <<
/Font <<
/F1 27 0 R
>>
/ProcSet 28 0 R
>>
/Type /Page
>>
endobj
%% Page 3
%% Original object ID: 51 0
8 0 obj
<<
/Contents 29 0 R
/MediaBox [
0
0
612
792
]
/Parent 3 0 R
/Resources <<
/Font <<
/F1 31 0 R
>>
/ProcSet 32 0 R
>>
/Type /Page
>>
endobj
%% Page 4
%% Original object ID: 7 0
9 0 obj
<<
/Contents 33 0 R
/MediaBox [
0
0
612
792
]
/Parent 3 0 R
/Resources <<
/Font <<
/F1 23 0 R
>>
/ProcSet 24 0 R
>>
/Type /Page
>>
endobj
%% Page 5
%% Original object ID: 55 0
10 0 obj
<<
/Contents 35 0 R
/MediaBox [
0
0
612
792
]
/Parent 3 0 R
/Resources <<
/Font <<
/F1 31 0 R
>>
/ProcSet 32 0 R
>>
/Type /Page
>>
endobj
%% Page 6
%% Original object ID: 8 0
11 0 obj
<<
/Contents 37 0 R
/MediaBox [
0
0
612
792
]
/Parent 3 0 R
/Resources <<
/Font <<
/F1 23 0 R
>>
/ProcSet 24 0 R
>>
/Type /Page
>>
endobj
%% Page 7
%% Original object ID: 57 0
12 0 obj
<<
/Contents 39 0 R
/MediaBox [
0
0
612
792
]
/Parent 3 0 R
/Resources <<
/Font <<
/F1 31 0 R
>>
/ProcSet 32 0 R
>>
/Type /Page
>>
endobj
%% Page 8
%% Original object ID: 9 0
13 0 obj
<<
/Contents 41 0 R
/MediaBox [
0
0
612
792
]
/Parent 3 0 R
/Resources <<
/Font <<
/F1 23 0 R
>>
/ProcSet 24 0 R
>>
/Type /Page
>>
endobj
%% Page 9
%% Original object ID: 59 0
14 0 obj
<<
/Contents 43 0 R
/MediaBox [
0
0
612
792
]
/Parent 3 0 R
/Resources <<
/Font <<
/F1 31 0 R
>>
/ProcSet 32 0 R
>>
/Type /Page
>>
endobj
%% Page 10
%% Original object ID: 10 0
15 0 obj
<<
/Contents 45 0 R
/MediaBox [
0
0
612
792
]
/Parent 3 0 R
/Resources <<
/Font <<
/F1 23 0 R
>>
/ProcSet 24 0 R
>>
/Type /Page
>>
endobj
%% Page 11
%% Original object ID: 61 0
16 0 obj
<<
/Contents 47 0 R
/MediaBox [
0
0
612
792
]
/Parent 3 0 R
/Resources <<
/Font <<
/F1 31 0 R
>>
/ProcSet 32 0 R
>>
/Type /Page
>>
endobj
%% Page 12
%% Original object ID: 63 0
17 0 obj
<<
/Contents 49 0 R
/MediaBox [
0
0
612
792
]
/Parent 3 0 R
/Resources <<
/Font <<
/F1 31 0 R
>>
/ProcSet 32 0 R
>>
/Type /Page
>>
endobj
%% Page 13
%% Original object ID: 65 0
18 0 obj
<<
/Contents 51 0 R
/MediaBox [
0
0
612
792
]
/Parent 3 0 R
/Resources <<
/Font <<
/F1 31 0 R
>>
/ProcSet 32 0 R
>>
/Type /Page
>>
endobj
%% Original object ID: 21 0
19 0 obj
<<
/Count -3
/Dest [
null
/Fit
]
/First 53 0 R
/Last 54 0 R
/Next 20 0 R
/Parent 4 0 R
/Title (Amanda 1.1 -> 11: /Fit)
/Type /Outline
>>
endobj
%% Original object ID: 22 0
20 0 obj
<<
/Count 2
/Dest [
null
/FitH
792
]
/First 55 0 R
/Last 56 0 R
/Parent 4 0 R
/Prev 19 0 R
/Title <feff00530061006e00640079002000f703a303b103bd03b403b900f700200031002e00320020002d003e002000310033003a0020002f00460069007400480020003700390032>
/Type /Outline
>>
endobj
%% Contents for page 1
%% Original object ID: 23 0
21 0 obj
<<
/Length 22 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato 0) Tj
ET
endstream
endobj
22 0 obj
46
endobj
%% Original object ID: 24 0
23 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
%% Original object ID: 25 0
24 0 obj
[
/PDF
/Text
]
endobj
%% Contents for page 2
%% Original object ID: 48 0
25 0 obj
<<
/Length 26 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
26 0 obj
44
endobj
%% Original object ID: 49 0
27 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
%% Original object ID: 50 0
28 0 obj
[
/PDF
/Text
]
endobj
%% Contents for page 3
%% Original object ID: 52 0
29 0 obj
<<
/Length 30 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato 13) Tj
ET
endstream
endobj
30 0 obj
47
endobj
%% Original object ID: 53 0
31 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
%% Original object ID: 54 0
32 0 obj
[
/PDF
/Text
]
endobj
%% Contents for page 4
%% Original object ID: 26 0
33 0 obj
<<
/Length 34 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato 2) Tj
ET
endstream
endobj
34 0 obj
46
endobj
%% Contents for page 5
%% Original object ID: 56 0
35 0 obj
<<
/Length 36 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato 11) Tj
ET
endstream
endobj
36 0 obj
47
endobj
%% Contents for page 6
%% Original object ID: 27 0
37 0 obj
<<
/Length 38 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato 4) Tj
ET
endstream
endobj
38 0 obj
46
endobj
%% Contents for page 7
%% Original object ID: 58 0
39 0 obj
<<
/Length 40 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato 9) Tj
ET
endstream
endobj
40 0 obj
46
endobj
%% Contents for page 8
%% Original object ID: 28 0
41 0 obj
<<
/Length 42 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato 6) Tj
ET
endstream
endobj
42 0 obj
46
endobj
%% Contents for page 9
%% Original object ID: 60 0
43 0 obj
<<
/Length 44 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato 7) Tj
ET
endstream
endobj
44 0 obj
46
endobj
%% Contents for page 10
%% Original object ID: 29 0
45 0 obj
<<
/Length 46 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato 8) Tj
ET
endstream
endobj
46 0 obj
46
endobj
%% Contents for page 11
%% Original object ID: 62 0
47 0 obj
<<
/Length 48 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato 5) Tj
ET
endstream
endobj
48 0 obj
46
endobj
%% Contents for page 12
%% Original object ID: 64 0
49 0 obj
<<
/Length 50 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato 3) Tj
ET
endstream
endobj
50 0 obj
46
endobj
%% Contents for page 13
%% Original object ID: 66 0
51 0 obj
<<
/Length 52 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato 1) Tj
ET
endstream
endobj
52 0 obj
46
endobj
%% Original object ID: 40 0
53 0 obj
<<
/Count -2
/Dest [
null
/FitV
100
]
/First 57 0 R
/Last 58 0 R
/Next 54 0 R
/Parent 19 0 R
/Title (Isosicle 1.1.1 -> 12: /FitV 100)
/Type /Outline
>>
endobj
%% Original object ID: 41 0
54 0 obj
<<
/Count 1
/Dest [
null
/XYZ
null
null
null
]
/First 59 0 R
/Last 59 0 R
/Parent 19 0 R
/Prev 53 0 R
/Title (Isosicle 1.1.2 -> 12: /XYZ null null null)
/Type /Outline
>>
endobj
%% Original object ID: 42 0
55 0 obj
<<
/Dest [
null
/FitR
66
714
180
770
]
/Next 56 0 R
/Parent 20 0 R
/Title (Trepsichord 1.2.1 -> 1: /FitR 66 714 180 770)
/Type /Outline
>>
endobj
%% Original object ID: 43 0
56 0 obj
<<
/Dest [
6 0 R
/XYZ
null
null
null
]
/Parent 20 0 R
/Prev 55 0 R
/Title (Trepsicle 1.2.2 -> 0: /XYZ null null null)
/Type /Outline
>>
endobj
%% Original object ID: 44 0
57 0 obj
<<
/Dest [
null
/XYZ
null
null
null
]
/Next 58 0 R
/Parent 53 0 R
/Title (Isosicle 1.1.1.1 -> 18: /XYZ null null null)
/Type /Outline
>>
endobj
%% Original object ID: 45 0
58 0 obj
<<
/Dest [
null
/XYZ
null
null
null
]
/Parent 53 0 R
/Prev 57 0 R
/Title (Isosicle 1.1.1.2 -> 19: /XYZ null null null)
/Type /Outline
>>
endobj
%% Original object ID: 46 0
59 0 obj
<<
/Dest [
null
/XYZ
null
null
null
]
/Parent 54 0 R
/Title (Isosicle 1.1.2.1 -> 22: /XYZ null null null)
/Type /Outline
>>
endobj
xref
0 60
0000000000 65535 f
0000000052 00000 n
0000000853 00000 n
0000000960 00000 n
0000001189 00000 n
0000001430 00000 n
0000001630 00000 n
0000001863 00000 n
0000002096 00000 n
0000002328 00000 n
0000002561 00000 n
0000002794 00000 n
0000003028 00000 n
0000003261 00000 n
0000003495 00000 n
0000003730 00000 n
0000003965 00000 n
0000004200 00000 n
0000004435 00000 n
0000004659 00000 n
0000004867 00000 n
0000005224 00000 n
0000005327 00000 n
0000005375 00000 n
0000005522 00000 n
0000005609 00000 n
0000005710 00000 n
0000005758 00000 n
0000005905 00000 n
0000005992 00000 n
0000006096 00000 n
0000006144 00000 n
0000006291 00000 n
0000006378 00000 n
0000006481 00000 n
0000006552 00000 n
0000006656 00000 n
0000006727 00000 n
0000006830 00000 n
0000006901 00000 n
0000007004 00000 n
0000007075 00000 n
0000007178 00000 n
0000007249 00000 n
0000007352 00000 n
0000007424 00000 n
0000007527 00000 n
0000007599 00000 n
0000007702 00000 n
0000007774 00000 n
0000007877 00000 n
0000007949 00000 n
0000008052 00000 n
0000008100 00000 n
0000008327 00000 n
0000008581 00000 n
0000008801 00000 n
0000009014 00000 n
0000009228 00000 n
0000009442 00000 n
trailer <<
/Root 1 0 R
/Size 60
/ID [<d3fab8d0603e683dc94e42ac31141868><31415926535897932384626433832795>]
>>
startxref
9613
%%EOF