From 070ee710eb0aaf6ddc845735c6ea0c28d3b7e5a1 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Mon, 1 Jan 2024 10:14:01 -0500 Subject: [PATCH] Support excluding values from numeric ranges (fixes #564, #790) --- ChangeLog | 5 +++++ include/qpdf/QUtil.hh | 19 ++++++++++++++++++- job.sums | 6 +++--- libqpdf/QUtil.cc | 18 ++++++++++++++++-- libqpdf/qpdf/auto_job_help.hh | 17 ++++++++++++----- libtests/qtest/numrange.test | 6 ++++++ manual/cli.rst | 31 ++++++++++++++++++++++++++----- manual/qpdf.1 | 20 +++++++++++++++----- manual/release-notes.rst | 6 ++++++ 9 files changed, 107 insertions(+), 21 deletions(-) diff --git a/ChangeLog b/ChangeLog index 95586348..a5bc330a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2024-01-01 Jay Berkenbilt + + * Support "x" before a group in a numeric range to exclude a group + from the previous group. Details are in the manual. + 2023-12-29 Jay Berkenbilt * When flattening annotations, preserve annotations without any diff --git a/include/qpdf/QUtil.hh b/include/qpdf/QUtil.hh index ab2727a6..4259922f 100644 --- a/include/qpdf/QUtil.hh +++ b/include/qpdf/QUtil.hh @@ -442,7 +442,24 @@ namespace QUtil inline bool is_number(char const*); // This method parses the numeric range syntax used by the qpdf command-line tool. May throw - // std::runtime_error. + // std::runtime_error. A numeric range is as comma-separated list of groups. A group may be a + // number specification or a range of number specifications separated by a dash. A number + // specification may be one of the following (where is a number): + // * -- the numeric value of n + // * z -- the value of the `max` parameter + // * r -- represents max + 1 - ( from the end) + // + // If the group is two number specifications separated by a dash, it represents the range of + // numbers from the first to the second, inclusive. If the first is greater than the second, the + // numbers are descending. + // + // From qpdf 11.7.1: if a group starts with `x`, its members are excluded from the previous + // group that didn't start with `x1. + // + // Example: with max of 15, the range "4-10,x7-9,12-8,xr5" is 4, 5, 6, 10, 12, 10, 9, 8. This is + // 4 through 10 inclusive without 7 through 9 inclusive followed by 12 to 8 inclusiuve + // (descending) without 11 (the fifth value counting backwards from 15). For more information + // and additional examples, see the "Page Ranges" section in the manual. QPDF_DLL std::vector parse_numrange(char const* range, int max); diff --git a/job.sums b/job.sums index 807e36fd..adde95f6 100644 --- a/job.sums +++ b/job.sums @@ -9,12 +9,12 @@ include/qpdf/auto_job_c_pages.hh b3cc0f21029f6d89efa043dcdbfa183cb59325b6506001c include/qpdf/auto_job_c_uo.hh ae21b69a1efa9333050f4833d465f6daff87e5b38e5106e49bbef5d4132e4ed1 job.yml 4f89fc7b622df897d30d403d8035aa36fc7de8d8c43042c736e0300d904cb05c libqpdf/qpdf/auto_job_decl.hh 9c6f701c29f3f764d620186bed92685a2edf2e4d11e4f4532862c05470cfc4d2 -libqpdf/qpdf/auto_job_help.hh 62c40dcd827fcea261a9f432f457aac1331731199ee3530e40de763811ba158e +libqpdf/qpdf/auto_job_help.hh 838f4065f64dc3fbd493510fd21d8ab4e16ee2434592776f44f80cbe3045cb50 libqpdf/qpdf/auto_job_init.hh b4c2b3724fba61f1206fd3bae81951636852592f67a63ef9539839c2c5995065 libqpdf/qpdf/auto_job_json_decl.hh 06caa46eaf71db8a50c046f91866baa8087745a9474319fb7c86d92634cc8297 libqpdf/qpdf/auto_job_json_init.hh f5acb9aa103131cb68dec0e12c4d237a6459bdb49b24773c24f0c2724a462b8f libqpdf/qpdf/auto_job_schema.hh b53c006fec2e75b1b73588d242d49a32f7d3db820b1541de106c5d4c27fbb4d9 manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580 -manual/cli.rst f361df89dd212daf65e82df8b7b1f8a5e3554043c545f8e7cb14ba5ded21e04e -manual/qpdf.1 def5ee093f342b222da7e1890cf44145fb7ee7f8024e75d1668f560b7f7f20d6 +manual/cli.rst d6d1ca82c936ffeaf137c586f988f80043db4c3b226d26fdf94f19a6005d012e +manual/qpdf.1 10dc52d32a6d8885ce4e4292875ee7fe8e7a826ef3fc28db5671be413bcaacc7 manual/qpdf.1.in 436ecc85d45c4c9e2dbd1725fb7f0177fb627179469f114561adf3cb6cbb677b diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc index 5e88ff88..c0aca105 100644 --- a/libqpdf/QUtil.cc +++ b/libqpdf/QUtil.cc @@ -1303,6 +1303,10 @@ QUtil::str_compare_nocase(char const* s1, char const* s2) std::vector QUtil::parse_numrange(char const* range, int max) { + // Performance note: this implementation aims to be straightforward, not efficient. Numeric + // range parsing is used only during argument processing. It is not used during processing of + // PDF files. + static std::regex group_re(R"((x)?(z|r?\d+)(?:-(z|r?\d+))?)"); auto parse_num = [&max](std::string const& s) -> int { if (s == "z") { @@ -1375,12 +1379,22 @@ QUtil::parse_numrange(char const* range, int max) first = false; auto first_num = parse_num(m[2].str()); auto is_span = m[3].matched; - int last_num; + int last_num{0}; if (is_span) { last_num = parse_num(m[3].str()); } if (is_exclude) { - // XXX + std::vector work; + populate(work, first_num, is_span, last_num); + std::set exclusions; + exclusions.insert(work.begin(), work.end()); + work = last_group; + last_group.clear(); + for (auto n: work) { + if (exclusions.count(n) == 0) { + last_group.emplace_back(n); + } + } } else { result.insert(result.end(), last_group.begin(), last_group.end()); populate(last_group, first_num, is_span, last_num); diff --git a/libqpdf/qpdf/auto_job_help.hh b/libqpdf/qpdf/auto_job_help.hh index da6520c5..4ee0f56e 100644 --- a/libqpdf/qpdf/auto_job_help.hh +++ b/libqpdf/qpdf/auto_job_help.hh @@ -286,12 +286,19 @@ value, even if the file uses features that may not be available in that version. )"); ap.addHelpTopic("page-ranges", "page range syntax", R"(A full description of the page range syntax, with examples, can be -found in the manual. Summary: +found in the manual. In summary, a range is a comma-separated list +of groups. A group is a number or a range of numbers separated by a +dash. A group may be prepended by x to exclude its members from the +previous group. A number may be one of -- a,b,c pages a, b, and c -- a-b pages a through b inclusive; if a > b, this counts down -- r where represents a number is the th page from the end -- z the last page, same as r1 +- where represents a number is the th page +- r is the th page from the end +- z the last page, same as r1 + +- a,b,c pages a, b, and c +- a-b pages a through b inclusive; if a > b, this counts down +- a-b,xc pages a through b except page c +- a-b,xc-d pages a through b except pages c through d You can append :even or :odd to select every other page from the resulting set of pages, where :odd starts with the first page and diff --git a/libtests/qtest/numrange.test b/libtests/qtest/numrange.test index b1a04c98..72d24db8 100644 --- a/libtests/qtest/numrange.test +++ b/libtests/qtest/numrange.test @@ -67,6 +67,12 @@ my @nrange_tests = ( ["1-6,8-12:even", "numeric range 1-6,8-12:even -> 2 4 6 9 11", 0], + ["x1", + "error at * in numeric range *x1: first range group may not be an exclusion", + 2], + ["4-10,x7-9,12-8,xr5", + "numeric range 4-10,x7-9,12-8,xr5 -> 4 5 6 10 12 10 9 8", + 0], ); foreach my $d (@nrange_tests) { diff --git a/manual/cli.rst b/manual/cli.rst index 457796e3..592ba6ef 100644 --- a/manual/cli.rst +++ b/manual/cli.rst @@ -1274,12 +1274,19 @@ Page Ranges .. help-topic page-ranges: page range syntax A full description of the page range syntax, with examples, can be - found in the manual. Summary: + found in the manual. In summary, a range is a comma-separated list + of groups. A group is a number or a range of numbers separated by a + dash. A group may be prepended by x to exclude its members from the + previous group. A number may be one of - - a,b,c pages a, b, and c - - a-b pages a through b inclusive; if a > b, this counts down - - r where represents a number is the th page from the end - - z the last page, same as r1 + - where represents a number is the th page + - r is the th page from the end + - z the last page, same as r1 + + - a,b,c pages a, b, and c + - a-b pages a through b inclusive; if a > b, this counts down + - a-b,xc pages a through b except page c + - a-b,xc-d pages a through b except pages c through d You can append :even or :odd to select every other page from the resulting set of pages, where :odd starts with the first page and @@ -1303,6 +1310,10 @@ section describes the syntax of a page range. of pages from the first to the second. If the first number is higher than the second number, it is the range of pages in reverse. +- A number or dash-separated range of numbers may be prepended with + ``x`` (from qpdf 11.7.1). This means to exclude the pages in that + range from the previous range that didn't start with ``x``. + - The range may be appended with ``:odd`` or ``:even`` to select only pages from the resulting range in odd or even positions. In this case, odd and even refer to positions in the final range, not @@ -1350,6 +1361,16 @@ section describes the syntax of a page range. - pages 7 and 9, which are the pages in even positions from the original set of 5, 7, 8, 9, 12 + - - ``1-10,x3-4`` + - pages 1 through 10 except pages 3 and 4 (1, 2, and 5 + through 10) + + - - ``4-10,x7-9,12-8,xr5`` + - In a 15-page file, this is 4, 5, 6, 10, 12, 10, 9, and 8 in + that order. That is pages 4 through 10 except 7 through 9 + followed by 12 through 8 descending except 11 (the fifth page + from the end) + .. _modification-options: PDF Modification diff --git a/manual/qpdf.1 b/manual/qpdf.1 index 6a859c8a..d758dca3 100644 --- a/manual/qpdf.1 +++ b/manual/qpdf.1 @@ -377,16 +377,26 @@ value, even if the file uses features that may not be available in that version. .SH PAGE-RANGES (page range syntax) A full description of the page range syntax, with examples, can be -found in the manual. Summary: +found in the manual. In summary, a range is a comma-separated list +of groups. A group is a number or a range of numbers separated by a +dash. A group may be prepended by x to exclude its members from the +previous group. A number may be one of .IP \[bu] -a,b,c pages a, b, and c + where represents a number is the th page .IP \[bu] -a-b pages a through b inclusive; if a > b, this counts down +r is the th page from the end .IP \[bu] -r where represents a number is the th page from the end +z the last page, same as r1 + .IP \[bu] -z the last page, same as r1 +a,b,c pages a, b, and c +.IP \[bu] +a-b pages a through b inclusive; if a > b, this counts down +.IP \[bu] +a-b,xc pages a through b except page c +.IP \[bu] +a-b,xc-d pages a through b except pages c through d You can append :even or :odd to select every other page from the resulting set of pages, where :odd starts with the first page and diff --git a/manual/release-notes.rst b/manual/release-notes.rst index 9a8ea027..17dc116d 100644 --- a/manual/release-notes.rst +++ b/manual/release-notes.rst @@ -44,6 +44,12 @@ Planned changes for future 12.x (subject to change): - When flattening annotations, preserve hyperlinks and other annotations that inherently have no appearance information. + - CLI Enhancements + + - Introduce ``x`` in the numeric range syntax to allow exclusion + of pages within a page range. See :ref:`page-ranges` for + details. + 11.7.0: December 24, 2023 - Bug fixes: