diff --git a/ChangeLog b/ChangeLog index 9f33d4cf..5f464987 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2019-04-20 Jay Berkenbilt + + * Revert change that included preservation of outlines (bookmarks) + in --split-pages. The way it was implemented caused a very + significant performance penalty when splitting pages with + outlines. We need a better solution that only copies the relevant + items, not the whole tree. + 2019-03-11 Jay Berkenbilt * JSON serialization: add missing leading 0 to decimal values diff --git a/TODO b/TODO index a86e472f..a6ff5baf 100644 --- a/TODO +++ b/TODO @@ -77,6 +77,17 @@ Page splitting/merging * make sure conflicting named destinations work possibly test by including the same file by two paths in a merge + Note: original implementation of bookmark preservation for split + pages caused a very high performance hit. The problem was + introduced in 313ba081265f69ac9a0324f9fe87087c72918191 and reverted + in the commit that adds this paragraph. The revert includes marking + a few tests cases as $td->EXPECT_FAILURE. When properly coded, the + test cases will need to be adjusted to only include the parts of + the outlines that are actually copied. The tests in question are + "split page with outlines". When implementing properly, ensure that + the performance is not adversely affected by timing split-pages on + a large file with complex outlines such as the PDF specification. + When pruning outlines, keep all outlines in the hierarchy that are above an outline for a page we care about. If one of the ancestor outlines points to a non-existent page, clear its dest. If an diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc index 4ec0a48c..dec88241 100644 --- a/qpdf/qpdf.cc +++ b/qpdf/qpdf.cc @@ -4966,30 +4966,6 @@ static void write_outfile(QPDF& pdf, Options& o) "/Nums", QPDFObjectHandle::newArray(labels)); outpdf.getRoot().replaceKey("/PageLabels", page_labels); } - // Copying the outlines tree, names table, and any - // outdated Dests key from the original file will make - // some things work in the split files. It is not a - // complete solution, but at least outlines whose - // destinations are on pages that have been preserved will - // work normally. There are other top-level structures - // that should be copied as well. This will be improved in - // the future. - std::list to_copy; - to_copy.push_back("/Names"); - to_copy.push_back("/Dests"); - to_copy.push_back("/Outlines"); - for (std::list::iterator iter = to_copy.begin(); - iter != to_copy.end(); ++iter) - { - QPDFObjectHandle orig = pdf.getRoot().getKey(*iter); - if (! orig.isIndirect()) - { - orig = pdf.makeIndirectObject(orig); - } - outpdf.getRoot().replaceKey( - *iter, - outpdf.copyForeignObject(orig)); - } std::string page_range = QUtil::int_to_string(first, pageno_len); if (o.split_pages > 1) { diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index a968404c..43219109 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -1594,6 +1594,8 @@ foreach my $i (qw(01-06 07-11)) {$td->FILE => "labels-split-$i.pdf"}); } +# See comments in TODO about these expected failures. Search for +# "split page with outlines". $td->runtest("split page with outlines", {$td->COMMAND => "qpdf --qdf --static-id --split-pages=10". " outlines-with-actions.pdf split-out-outlines.pdf"}, @@ -1602,7 +1604,8 @@ foreach my $i (qw(01-10 11-20 21-30)) { $td->runtest("check output ($i)", {$td->FILE => "split-out-outlines-$i.pdf"}, - {$td->FILE => "outlines-split-$i.pdf"}); + {$td->FILE => "outlines-split-$i.pdf"}, + $td->EXPECT_FAILURE) } foreach my $d (@sp_cases)