diff --git a/ChangeLog b/ChangeLog index c5336c11..62f1a541 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2024-01-05 Jay Berkenbilt + + * Add --set-page-labels command-line argument and supporting API. + Fixes #939. + - QPDFJob::Config::setPageLabels + - pdf_page_label_e enumerated type + - QPDFPageLabelDocumentHelper::pageLabelDict + 2024-01-01 Jay Berkenbilt * Support comma-separated numeric values with --collate to select diff --git a/include/qpdf/Constants.h b/include/qpdf/Constants.h index 24f7dc3a..89152a91 100644 --- a/include/qpdf/Constants.h +++ b/include/qpdf/Constants.h @@ -232,4 +232,14 @@ enum pdf_annotation_flag_e { /* Encryption/password status for QPDFJob */ enum qpdf_encryption_status_e { qpdf_es_encrypted = 1 << 0, qpdf_es_password_incorrect = 1 << 1 }; +/* Page label types */ +enum qpdf_page_label_e { + pl_none, + pl_digits, + pl_alpha_lower, + pl_alpha_upper, + pl_roman_lower, + pl_roman_upper, +}; + #endif /* QPDFCONSTANTS_H */ diff --git a/include/qpdf/QPDFJob.hh b/include/qpdf/QPDFJob.hh index 9a7afb0f..4626c264 100644 --- a/include/qpdf/QPDFJob.hh +++ b/include/qpdf/QPDFJob.hh @@ -296,7 +296,8 @@ class QPDFJob Config* config; }; - class PageLabelsConfig { + class PageLabelsConfig + { friend class QPDFJob; friend class Config; @@ -458,6 +459,22 @@ class QPDFJob std::vector repeat_pagenos; }; + struct PageLabelSpec + { + PageLabelSpec( + int first_page, qpdf_page_label_e label_type, int start_num, std::string_view prefix) : + first_page(first_page), + label_type(label_type), + start_num(start_num), + prefix(prefix) + { + } + int first_page; + qpdf_page_label_e label_type; + int start_num{1}; + std::string prefix; + }; + // Helper functions static void usage(std::string const& msg); static JSON json_schema(int json_version, std::set* keys = nullptr); @@ -694,7 +711,7 @@ class QPDFJob bool json_output{false}; std::string update_from_json; bool report_mem_usage{false}; - std::vector page_label_specs; + std::vector page_label_specs; }; std::shared_ptr m; }; diff --git a/include/qpdf/QPDFPageLabelDocumentHelper.hh b/include/qpdf/QPDFPageLabelDocumentHelper.hh index eb610544..57b6919c 100644 --- a/include/qpdf/QPDFPageLabelDocumentHelper.hh +++ b/include/qpdf/QPDFPageLabelDocumentHelper.hh @@ -49,6 +49,11 @@ class QPDFPageLabelDocumentHelper: public QPDFDocumentHelper QPDF_DLL bool hasPageLabels(); + // Helper function to create a dictionary suitable for adding to the /PageLabels numbers tree. + QPDF_DLL + static QPDFObjectHandle + pageLabelDict(qpdf_page_label_e label_type, int start_num, std::string_view prefix); + // Return a page label dictionary representing the page label for the given page. The page does // not need to appear explicitly in the page label dictionary. This method will adjust /St as // needed to produce a label that is suitable for the page. diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc index b0910cbd..2e0abb43 100644 --- a/libqpdf/QPDFJob.cc +++ b/libqpdf/QPDFJob.cc @@ -2172,6 +2172,37 @@ QPDFJob::handleTransformations(QPDF& pdf) if (m->remove_page_labels) { pdf.getRoot().removeKey("/PageLabels"); } + if (!m->page_label_specs.empty()) { + auto nums = QPDFObjectHandle::newArray(); + auto n_pages = QIntC::to_int(dh.getAllPages().size()); + int last_page_seen{0}; + for (auto& spec: m->page_label_specs) { + if (spec.first_page < 0) { + spec.first_page = n_pages + 1 + spec.first_page; + } + if (last_page_seen == 0) { + if (spec.first_page != 1) { + throw std::runtime_error( + "the first page label specification must start with page 1"); + } + } else if (spec.first_page <= last_page_seen) { + throw std::runtime_error( + "page label specifications must be in order by first page"); + } + if (spec.first_page > n_pages) { + throw std::runtime_error( + "page label spec: page " + std::to_string(spec.first_page) + + " is more than the total number of pages (" + std::to_string(n_pages) + ")"); + } + last_page_seen = spec.first_page; + nums.appendItem(QPDFObjectHandle::newInteger(spec.first_page - 1)); + nums.appendItem(QPDFPageLabelDocumentHelper::pageLabelDict( + spec.label_type, spec.start_num, spec.prefix)); + } + auto page_labels = QPDFObjectHandle::newDictionary(); + page_labels.replaceKey("/Nums", nums); + pdf.getRoot().replaceKey("/PageLabels", page_labels); + } if (!m->attachments_to_remove.empty()) { QPDFEmbeddedFileDocumentHelper efdh(pdf); for (auto const& key: m->attachments_to_remove) { @@ -3019,10 +3050,9 @@ QPDFJob::writeOutfile(QPDF& pdf) try { QUtil::remove_file(backup.c_str()); } catch (QPDFSystemError& e) { - *m->log->getError() - << m->message_prefix << ": unable to delete original file (" << e.what() << ");" - << " original file left in " << backup - << ", but the input was successfully replaced\n"; + *m->log->getError() << m->message_prefix << ": unable to delete original file (" + << e.what() << ");" << " original file left in " << backup + << ", but the input was successfully replaced\n"; } } } diff --git a/libqpdf/QPDFJob_config.cc b/libqpdf/QPDFJob_config.cc index 9651c3b9..e48d7e31 100644 --- a/libqpdf/QPDFJob_config.cc +++ b/libqpdf/QPDFJob_config.cc @@ -1,5 +1,7 @@ #include +#include + #include #include #include @@ -1062,11 +1064,50 @@ QPDFJob::Config::encrypt( QPDFJob::Config* QPDFJob::Config::setPageLabels(const std::vector& specs) { - // XXX validate - for (auto const& xxx: specs) { - std::cout << "XXX config: spec: " << xxx << std::endl; + static std::regex page_label_re(R"(^(z|r?\d+):([DaArR])?(?:/(\d+)?(?:/(.+)?)?)?$)"); + o.m->page_label_specs.clear(); + for (auto const& spec: specs) { + std::smatch match; + if (!std::regex_match(spec, match, page_label_re)) { + usage("page label spec must be n:[D|a|A|r|R][/start[/prefix]]"); + } + auto first_page_str = match[1].str(); + int first_page; + if (first_page_str == "z") { + first_page = -1; + } else if (first_page_str.at(0) == 'r') { + first_page = -QUtil::string_to_int(first_page_str.substr(1).c_str()); + } else { + first_page = QUtil::string_to_int(first_page_str.c_str()); + } + auto label_type_ch = match[2].matched ? match[2].str().at(0) : '\0'; + qpdf_page_label_e label_type; + switch (label_type_ch) { + case 'D': + label_type = pl_digits; + break; + case 'a': + label_type = pl_alpha_lower; + break; + case 'A': + label_type = pl_alpha_upper; + break; + case 'r': + label_type = pl_roman_lower; + break; + case 'R': + label_type = pl_roman_upper; + break; + default: + label_type = pl_none; + } + + auto start_num = match[3].matched ? QUtil::string_to_int(match[3].str().c_str()) : 1; + auto prefix = match[4].matched ? match[4].str() : ""; + // We can't check ordering until we know how many pages there are, so that is delayed until + // near the end. + o.m->page_label_specs.emplace_back(first_page, label_type, start_num, prefix); } - o.m->page_label_specs = specs; return this; } diff --git a/libqpdf/QPDFPageLabelDocumentHelper.cc b/libqpdf/QPDFPageLabelDocumentHelper.cc index d94c41fd..e291e9aa 100644 --- a/libqpdf/QPDFPageLabelDocumentHelper.cc +++ b/libqpdf/QPDFPageLabelDocumentHelper.cc @@ -99,3 +99,36 @@ QPDFPageLabelDocumentHelper::getLabelsForPageRange( } } } + +QPDFObjectHandle +QPDFPageLabelDocumentHelper::pageLabelDict( + qpdf_page_label_e label_type, int start_num, std::string_view prefix) +{ + auto num = "<< /Type /PageLabel >>"_qpdf; + switch (label_type) { + case pl_none: + break; + case pl_digits: + num.replaceKey("/S", "/D"_qpdf); + break; + case pl_alpha_lower: + num.replaceKey("/S", "/a"_qpdf); + break; + case pl_alpha_upper: + num.replaceKey("/S", "/A"_qpdf); + break; + case pl_roman_lower: + num.replaceKey("/S", "/r"_qpdf); + break; + case pl_roman_upper: + num.replaceKey("/S", "/R"_qpdf); + break; + } + if (!prefix.empty()) { + num.replaceKey("/P", QPDFObjectHandle::newUnicodeString(std::string(prefix))); + } + if (start_num != 1) { + num.replaceKey("/St", QPDFObjectHandle::newInteger(start_num)); + } + return num; +} diff --git a/manual/release-notes.rst b/manual/release-notes.rst index 72b1aad6..6fb4758a 100644 --- a/manual/release-notes.rst +++ b/manual/release-notes.rst @@ -38,7 +38,7 @@ Planned changes for future 12.x (subject to change): .. x.y.z: not yet released -11.7.1: not yet released +11.8.0: not yet released - Bug fixes: - When flattening annotations, preserve hyperlinks and other @@ -54,6 +54,19 @@ Planned changes for future 12.x (subject to change): :qpdf:ref:`--collate` to select different numbers of pages from different groups. + - Add :qpdf:ref:`--set-page-labels` option to completely override + page labels in the output. + + - Library Enhancements + + - Add API to support :qpdf:ref:`--set-page-labels`: + + - ``QPDFJob::Config::setPageLabels`` + + - ``pdf_page_label_e`` enumerated type + + - ``QPDFPageLabelDocumentHelper::pageLabelDict`` + 11.7.0: December 24, 2023 - Bug fixes: diff --git a/qpdf/qtest/page-labels.test b/qpdf/qtest/page-labels.test index 616dcb6a..9927437b 100644 --- a/qpdf/qtest/page-labels.test +++ b/qpdf/qtest/page-labels.test @@ -29,5 +29,44 @@ $td->runtest("no page labels", {$td->FILE => "no-page-labels.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); +# --set-page-labels +my @errors = ( + ["quack", ".*page label spec must be.*"], + ["5:r 10:D", ".*the first page .*must start with page 1.*"], + ["1:r 10:D 31:A", + ".*page 31 is more than the total number of pages \\(30\\).*"], +); +$n_tests += scalar(@errors); + +foreach my $d (@errors) +{ + my ($specs, $err) = @$d; + $td->runtest("error ($specs)", + {$td->COMMAND => "qpdf --set-page-labels $specs --" . + " page-labels-num-tree.pdf a.pdf"}, + {$td->REGEXP => $err, $td->EXIT_STATUS => 2}, + $td->NORMALIZE_NEWLINES); +} + +$n_tests += 4; +$td->runtest("set page labels", + {$td->COMMAND => "qpdf page-labels-num-tree.pdf a.pdf" . + " --set-page-labels 1:a 3:R/2 6:r//Z- 8:A/17 r10:D/3 28: z://end --"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +$td->runtest("after set page labels", + {$td->COMMAND => "test_driver 47 a.pdf"}, + {$td->FILE => "set-page-labels.out", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +$td->runtest("set page labels (json)", + {$td->COMMAND => "qpdf page-labels-num-tree.pdf b.pdf" . + " --job-json-file=set-page-labels.json"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +$td->runtest("after set page labels", + {$td->COMMAND => "test_driver 47 b.pdf"}, + {$td->FILE => "set-page-labels.out", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + cleanup(); $td->report($n_tests); diff --git a/qpdf/qtest/qpdf/set-page-labels.json b/qpdf/qtest/qpdf/set-page-labels.json new file mode 100644 index 00000000..e3a3daf8 --- /dev/null +++ b/qpdf/qtest/qpdf/set-page-labels.json @@ -0,0 +1,11 @@ +{ + "setPageLabels": [ + "1:a", + "3:R/2", + "6:r//Z-", + "8:A/17", + "r10:D/3", + "28:", + "z://end" + ] +} diff --git a/qpdf/qtest/qpdf/set-page-labels.out b/qpdf/qtest/qpdf/set-page-labels.out new file mode 100644 index 00000000..b0bfe14a --- /dev/null +++ b/qpdf/qtest/qpdf/set-page-labels.out @@ -0,0 +1,8 @@ +1 << /S /a /St 1 >> +3 << /S /R /St 2 >> +6 << /P (Z-) /S /r /St 1 >> +8 << /S /A /St 17 >> +21 << /S /D /St 3 >> +28 << /St 1 >> +30 << /P (end) /St 1 >> +test 47 done