Support excluding values from numeric ranges (fixes #564, #790)

This commit is contained in:
Jay Berkenbilt 2024-01-01 10:14:01 -05:00
parent 708ea4ef43
commit 070ee710eb
9 changed files with 107 additions and 21 deletions

View File

@ -1,3 +1,8 @@
2024-01-01 Jay Berkenbilt <ejb@ql.org>
* Support "x" before a group in a numeric range to exclude a group
from the previous group. Details are in the manual.
2023-12-29 Jay Berkenbilt <ejb@ql.org> 2023-12-29 Jay Berkenbilt <ejb@ql.org>
* When flattening annotations, preserve annotations without any * When flattening annotations, preserve annotations without any

View File

@ -442,7 +442,24 @@ namespace QUtil
inline bool is_number(char const*); inline bool is_number(char const*);
// This method parses the numeric range syntax used by the qpdf command-line tool. May throw // This method parses the numeric range syntax used by the qpdf command-line tool. May throw
// std::runtime_error. // std::runtime_error. A numeric range is as comma-separated list of groups. A group may be a
// number specification or a range of number specifications separated by a dash. A number
// specification may be one of the following (where <n> is a number):
// * <n> -- the numeric value of n
// * z -- the value of the `max` parameter
// * r<n> -- represents max + 1 - <n> (<n> from the end)
//
// If the group is two number specifications separated by a dash, it represents the range of
// numbers from the first to the second, inclusive. If the first is greater than the second, the
// numbers are descending.
//
// From qpdf 11.7.1: if a group starts with `x`, its members are excluded from the previous
// group that didn't start with `x1.
//
// Example: with max of 15, the range "4-10,x7-9,12-8,xr5" is 4, 5, 6, 10, 12, 10, 9, 8. This is
// 4 through 10 inclusive without 7 through 9 inclusive followed by 12 to 8 inclusiuve
// (descending) without 11 (the fifth value counting backwards from 15). For more information
// and additional examples, see the "Page Ranges" section in the manual.
QPDF_DLL QPDF_DLL
std::vector<int> parse_numrange(char const* range, int max); std::vector<int> parse_numrange(char const* range, int max);

View File

@ -9,12 +9,12 @@ include/qpdf/auto_job_c_pages.hh b3cc0f21029f6d89efa043dcdbfa183cb59325b6506001c
include/qpdf/auto_job_c_uo.hh ae21b69a1efa9333050f4833d465f6daff87e5b38e5106e49bbef5d4132e4ed1 include/qpdf/auto_job_c_uo.hh ae21b69a1efa9333050f4833d465f6daff87e5b38e5106e49bbef5d4132e4ed1
job.yml 4f89fc7b622df897d30d403d8035aa36fc7de8d8c43042c736e0300d904cb05c job.yml 4f89fc7b622df897d30d403d8035aa36fc7de8d8c43042c736e0300d904cb05c
libqpdf/qpdf/auto_job_decl.hh 9c6f701c29f3f764d620186bed92685a2edf2e4d11e4f4532862c05470cfc4d2 libqpdf/qpdf/auto_job_decl.hh 9c6f701c29f3f764d620186bed92685a2edf2e4d11e4f4532862c05470cfc4d2
libqpdf/qpdf/auto_job_help.hh 62c40dcd827fcea261a9f432f457aac1331731199ee3530e40de763811ba158e libqpdf/qpdf/auto_job_help.hh 838f4065f64dc3fbd493510fd21d8ab4e16ee2434592776f44f80cbe3045cb50
libqpdf/qpdf/auto_job_init.hh b4c2b3724fba61f1206fd3bae81951636852592f67a63ef9539839c2c5995065 libqpdf/qpdf/auto_job_init.hh b4c2b3724fba61f1206fd3bae81951636852592f67a63ef9539839c2c5995065
libqpdf/qpdf/auto_job_json_decl.hh 06caa46eaf71db8a50c046f91866baa8087745a9474319fb7c86d92634cc8297 libqpdf/qpdf/auto_job_json_decl.hh 06caa46eaf71db8a50c046f91866baa8087745a9474319fb7c86d92634cc8297
libqpdf/qpdf/auto_job_json_init.hh f5acb9aa103131cb68dec0e12c4d237a6459bdb49b24773c24f0c2724a462b8f libqpdf/qpdf/auto_job_json_init.hh f5acb9aa103131cb68dec0e12c4d237a6459bdb49b24773c24f0c2724a462b8f
libqpdf/qpdf/auto_job_schema.hh b53c006fec2e75b1b73588d242d49a32f7d3db820b1541de106c5d4c27fbb4d9 libqpdf/qpdf/auto_job_schema.hh b53c006fec2e75b1b73588d242d49a32f7d3db820b1541de106c5d4c27fbb4d9
manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580 manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580
manual/cli.rst f361df89dd212daf65e82df8b7b1f8a5e3554043c545f8e7cb14ba5ded21e04e manual/cli.rst d6d1ca82c936ffeaf137c586f988f80043db4c3b226d26fdf94f19a6005d012e
manual/qpdf.1 def5ee093f342b222da7e1890cf44145fb7ee7f8024e75d1668f560b7f7f20d6 manual/qpdf.1 10dc52d32a6d8885ce4e4292875ee7fe8e7a826ef3fc28db5671be413bcaacc7
manual/qpdf.1.in 436ecc85d45c4c9e2dbd1725fb7f0177fb627179469f114561adf3cb6cbb677b manual/qpdf.1.in 436ecc85d45c4c9e2dbd1725fb7f0177fb627179469f114561adf3cb6cbb677b

View File

@ -1303,6 +1303,10 @@ QUtil::str_compare_nocase(char const* s1, char const* s2)
std::vector<int> std::vector<int>
QUtil::parse_numrange(char const* range, int max) QUtil::parse_numrange(char const* range, int max)
{ {
// Performance note: this implementation aims to be straightforward, not efficient. Numeric
// range parsing is used only during argument processing. It is not used during processing of
// PDF files.
static std::regex group_re(R"((x)?(z|r?\d+)(?:-(z|r?\d+))?)"); static std::regex group_re(R"((x)?(z|r?\d+)(?:-(z|r?\d+))?)");
auto parse_num = [&max](std::string const& s) -> int { auto parse_num = [&max](std::string const& s) -> int {
if (s == "z") { if (s == "z") {
@ -1375,12 +1379,22 @@ QUtil::parse_numrange(char const* range, int max)
first = false; first = false;
auto first_num = parse_num(m[2].str()); auto first_num = parse_num(m[2].str());
auto is_span = m[3].matched; auto is_span = m[3].matched;
int last_num; int last_num{0};
if (is_span) { if (is_span) {
last_num = parse_num(m[3].str()); last_num = parse_num(m[3].str());
} }
if (is_exclude) { if (is_exclude) {
// XXX std::vector<int> work;
populate(work, first_num, is_span, last_num);
std::set<int> exclusions;
exclusions.insert(work.begin(), work.end());
work = last_group;
last_group.clear();
for (auto n: work) {
if (exclusions.count(n) == 0) {
last_group.emplace_back(n);
}
}
} else { } else {
result.insert(result.end(), last_group.begin(), last_group.end()); result.insert(result.end(), last_group.begin(), last_group.end());
populate(last_group, first_num, is_span, last_num); populate(last_group, first_num, is_span, last_num);

View File

@ -286,12 +286,19 @@ value, even if the file uses features that may not be available
in that version. in that version.
)"); )");
ap.addHelpTopic("page-ranges", "page range syntax", R"(A full description of the page range syntax, with examples, can be ap.addHelpTopic("page-ranges", "page range syntax", R"(A full description of the page range syntax, with examples, can be
found in the manual. Summary: found in the manual. In summary, a range is a comma-separated list
of groups. A group is a number or a range of numbers separated by a
dash. A group may be prepended by x to exclude its members from the
previous group. A number may be one of
- a,b,c pages a, b, and c - <n> where <n> represents a number is the <n>th page
- a-b pages a through b inclusive; if a > b, this counts down - r<n> is the <n>th page from the end
- r<n> where <n> represents a number is the <n>th page from the end - z the last page, same as r1
- z the last page, same as r1
- a,b,c pages a, b, and c
- a-b pages a through b inclusive; if a > b, this counts down
- a-b,xc pages a through b except page c
- a-b,xc-d pages a through b except pages c through d
You can append :even or :odd to select every other page from the You can append :even or :odd to select every other page from the
resulting set of pages, where :odd starts with the first page and resulting set of pages, where :odd starts with the first page and

View File

@ -67,6 +67,12 @@ my @nrange_tests = (
["1-6,8-12:even", ["1-6,8-12:even",
"numeric range 1-6,8-12:even -> 2 4 6 9 11", "numeric range 1-6,8-12:even -> 2 4 6 9 11",
0], 0],
["x1",
"error at * in numeric range *x1: first range group may not be an exclusion",
2],
["4-10,x7-9,12-8,xr5",
"numeric range 4-10,x7-9,12-8,xr5 -> 4 5 6 10 12 10 9 8",
0],
); );
foreach my $d (@nrange_tests) foreach my $d (@nrange_tests)
{ {

View File

@ -1274,12 +1274,19 @@ Page Ranges
.. help-topic page-ranges: page range syntax .. help-topic page-ranges: page range syntax
A full description of the page range syntax, with examples, can be A full description of the page range syntax, with examples, can be
found in the manual. Summary: found in the manual. In summary, a range is a comma-separated list
of groups. A group is a number or a range of numbers separated by a
dash. A group may be prepended by x to exclude its members from the
previous group. A number may be one of
- a,b,c pages a, b, and c - <n> where <n> represents a number is the <n>th page
- a-b pages a through b inclusive; if a > b, this counts down - r<n> is the <n>th page from the end
- r<n> where <n> represents a number is the <n>th page from the end - z the last page, same as r1
- z the last page, same as r1
- a,b,c pages a, b, and c
- a-b pages a through b inclusive; if a > b, this counts down
- a-b,xc pages a through b except page c
- a-b,xc-d pages a through b except pages c through d
You can append :even or :odd to select every other page from the You can append :even or :odd to select every other page from the
resulting set of pages, where :odd starts with the first page and resulting set of pages, where :odd starts with the first page and
@ -1303,6 +1310,10 @@ section describes the syntax of a page range.
of pages from the first to the second. If the first number is higher of pages from the first to the second. If the first number is higher
than the second number, it is the range of pages in reverse. than the second number, it is the range of pages in reverse.
- A number or dash-separated range of numbers may be prepended with
``x`` (from qpdf 11.7.1). This means to exclude the pages in that
range from the previous range that didn't start with ``x``.
- The range may be appended with ``:odd`` or ``:even`` to select only - The range may be appended with ``:odd`` or ``:even`` to select only
pages from the resulting range in odd or even positions. In this pages from the resulting range in odd or even positions. In this
case, odd and even refer to positions in the final range, not case, odd and even refer to positions in the final range, not
@ -1350,6 +1361,16 @@ section describes the syntax of a page range.
- pages 7 and 9, which are the pages in even positions from the - pages 7 and 9, which are the pages in even positions from the
original set of 5, 7, 8, 9, 12 original set of 5, 7, 8, 9, 12
- - ``1-10,x3-4``
- pages 1 through 10 except pages 3 and 4 (1, 2, and 5
through 10)
- - ``4-10,x7-9,12-8,xr5``
- In a 15-page file, this is 4, 5, 6, 10, 12, 10, 9, and 8 in
that order. That is pages 4 through 10 except 7 through 9
followed by 12 through 8 descending except 11 (the fifth page
from the end)
.. _modification-options: .. _modification-options:
PDF Modification PDF Modification

View File

@ -377,16 +377,26 @@ value, even if the file uses features that may not be available
in that version. in that version.
.SH PAGE-RANGES (page range syntax) .SH PAGE-RANGES (page range syntax)
A full description of the page range syntax, with examples, can be A full description of the page range syntax, with examples, can be
found in the manual. Summary: found in the manual. In summary, a range is a comma-separated list
of groups. A group is a number or a range of numbers separated by a
dash. A group may be prepended by x to exclude its members from the
previous group. A number may be one of
.IP \[bu] .IP \[bu]
a,b,c pages a, b, and c <n> where <n> represents a number is the <n>th page
.IP \[bu] .IP \[bu]
a-b pages a through b inclusive; if a > b, this counts down r<n> is the <n>th page from the end
.IP \[bu] .IP \[bu]
r<n> where <n> represents a number is the <n>th page from the end z the last page, same as r1
.IP \[bu] .IP \[bu]
z the last page, same as r1 a,b,c pages a, b, and c
.IP \[bu]
a-b pages a through b inclusive; if a > b, this counts down
.IP \[bu]
a-b,xc pages a through b except page c
.IP \[bu]
a-b,xc-d pages a through b except pages c through d
You can append :even or :odd to select every other page from the You can append :even or :odd to select every other page from the
resulting set of pages, where :odd starts with the first page and resulting set of pages, where :odd starts with the first page and

View File

@ -44,6 +44,12 @@ Planned changes for future 12.x (subject to change):
- When flattening annotations, preserve hyperlinks and other - When flattening annotations, preserve hyperlinks and other
annotations that inherently have no appearance information. annotations that inherently have no appearance information.
- CLI Enhancements
- Introduce ``x`` in the numeric range syntax to allow exclusion
of pages within a page range. See :ref:`page-ranges` for
details.
11.7.0: December 24, 2023 11.7.0: December 24, 2023
- Bug fixes: - Bug fixes: