Keep only referenced form fields in --pages

This commit is contained in:
Jay Berkenbilt 2021-02-23 07:57:14 -05:00
parent 50037fb33d
commit be3a8c0e7a
7 changed files with 99 additions and 11 deletions

View File

@ -140,7 +140,7 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper
std::vector<QPDFAnnotationObjectHelper>
getWidgetAnnotationsForPage(QPDFPageObjectHelper);
// Return form fields for a page.
// Return top-level form fields for a page.
QPDF_DLL
std::vector<QPDFFormFieldObjectHelper>
getFormFieldsForPage(QPDFPageObjectHelper);
@ -210,11 +210,15 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper
QPDFAcroFormDocumentHelper* from_afdh = nullptr);
// Copy form fields from a page in a different QPDF object to this
// QPDF.
// QPDF. If copied_fields is not null, it will be initialized with
// the fields that were copied. Items in the vector are objects in
// the receiving QPDF (the one associated with this
// QPDFAcroFormDocumentHelper).
QPDF_DLL
void copyFieldsFromForeignPage(
QPDFPageObjectHelper foreign_page,
QPDFAcroFormDocumentHelper& foreign_afdh);
QPDFAcroFormDocumentHelper& foreign_afdh,
std::vector<QPDFObjectHandle>* copied_fields = nullptr);
private:
void analyze();

View File

@ -135,15 +135,21 @@ QPDFAcroFormDocumentHelper::getWidgetAnnotationsForPage(QPDFPageObjectHelper h)
std::vector<QPDFFormFieldObjectHelper>
QPDFAcroFormDocumentHelper::getFormFieldsForPage(QPDFPageObjectHelper ph)
{
std::set<QPDFObjGen> added;
std::vector<QPDFFormFieldObjectHelper> result;
auto widget_annotations = getWidgetAnnotationsForPage(ph);
for (auto annot: widget_annotations)
{
auto field = getFieldForAnnotation(annot);
field = field.getTopLevelField();
if (field.getObjectHandle().isDictionary())
auto og = field.getObjectHandle().getObjGen();
if (! added.count(og))
{
result.push_back(field);
added.insert(og);
if (field.getObjectHandle().isDictionary())
{
result.push_back(field);
}
}
}
return result;
@ -674,18 +680,27 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
void
QPDFAcroFormDocumentHelper::copyFieldsFromForeignPage(
QPDFPageObjectHelper foreign_page,
QPDFAcroFormDocumentHelper& foreign_afdh)
QPDFAcroFormDocumentHelper& foreign_afdh,
std::vector<QPDFObjectHandle>* copied_fields)
{
std::set<QPDFObjGen> added;
for (auto field: foreign_afdh.getFormFieldsForPage(foreign_page))
{
auto new_field = this->qpdf.copyForeignObject(
field.getObjectHandle());
if (! new_field.isIndirect())
{
new_field = this->qpdf.makeIndirectObject(new_field);
}
auto og = new_field.getObjGen();
if (! added.count(og))
{
addFormField(new_field);
added.insert(og);
if (copied_fields)
{
copied_fields->push_back(new_field);
}
}
}
}

View File

@ -5846,6 +5846,7 @@ static void handle_page_specs(QPDF& pdf, Options& o, bool& warnings)
std::map<unsigned long long,
PointerHolder<QPDFAcroFormDocumentHelper>> afdh_map;
auto this_afdh = get_afdh_for_qpdf(afdh_map, &pdf);
std::set<QPDFObjGen> referenced_fields;
for (std::vector<QPDFPageData>::iterator iter =
parsed_specs.begin();
iter != parsed_specs.end(); ++iter)
@ -5906,7 +5907,13 @@ static void handle_page_specs(QPDF& pdf, Options& o, bool& warnings)
else if (other_afdh->hasAcroForm())
{
QTC::TC("qpdf", "qpdf copy form fields in pages");
this_afdh->copyFieldsFromForeignPage(to_copy, *other_afdh);
std::vector<QPDFObjectHandle> copied_fields;
this_afdh->copyFieldsFromForeignPage(
to_copy, *other_afdh, &copied_fields);
for (auto const& cf: copied_fields)
{
referenced_fields.insert(cf.getObjGen());
}
}
}
if (page_data.qpdf->anyWarnings())
@ -5929,16 +5936,57 @@ static void handle_page_specs(QPDF& pdf, Options& o, bool& warnings)
// Delete page objects for unused page in primary. This prevents
// those objects from being preserved by being referred to from
// other places, such as the outlines dictionary.
// other places, such as the outlines dictionary. Also make sure
// we keep form fields from pages we preserved.
for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno)
{
if (selected_from_orig.count(QIntC::to_int(pageno)) == 0)
auto page = orig_pages.at(pageno);
if (selected_from_orig.count(QIntC::to_int(pageno)))
{
for (auto field: this_afdh->getFormFieldsForPage(page))
{
QTC::TC("qpdf", "qpdf pages keeping field from original");
referenced_fields.insert(field.getObjectHandle().getObjGen());
}
}
else
{
pdf.replaceObject(
orig_pages.at(pageno).getObjectHandle().getObjGen(),
page.getObjectHandle().getObjGen(),
QPDFObjectHandle::newNull());
}
}
// Remove unreferenced form fields
if (this_afdh->hasAcroForm())
{
auto acroform = pdf.getRoot().getKey("/AcroForm");
auto fields = acroform.getKey("/Fields");
if (fields.isArray())
{
auto new_fields = QPDFObjectHandle::newArray();
if (fields.isIndirect())
{
new_fields = pdf.makeIndirectObject(new_fields);
}
for (auto const& field: fields.aitems())
{
if (referenced_fields.count(field.getObjGen()))
{
new_fields.appendItem(field);
}
}
if (new_fields.getArrayNItems() > 0)
{
QTC::TC("qpdf", "qpdf keep some fields in pages");
acroform.replaceKey("/Fields", new_fields);
}
else
{
QTC::TC("qpdf", "qpdf no more fields in pages");
pdf.getRoot().removeKey("/AcroForm");
}
}
}
}
static void handle_rotations(QPDF& pdf, Options& o)

View File

@ -577,3 +577,6 @@ QPDFAcroFormDocumentHelper field with parent 3
QPDFAcroFormDocumentHelper modify ap matrix 0
qpdf copy form fields in split_pages 0
qpdf copy form fields in pages 0
qpdf keep some fields in pages 0
qpdf pages keeping field from original 0
qpdf no more fields in pages 0

View File

@ -2414,7 +2414,7 @@ foreach my $f (qw(screen print))
show_ntests();
# ----------
$td->notify("--- Copy Annotations ---");
$n_tests += 21;
$n_tests += 25;
$td->runtest("complex copy annotations",
{$td->COMMAND =>
@ -2479,6 +2479,24 @@ for (my $i = 1; $i <= 2; ++$i)
{$td->FILE => "split-out-$i.pdf"},
{$td->FILE => "fields-split-$i.pdf"});
}
$td->runtest("keeping some fields",
{$td->COMMAND =>
"qpdf --static-id fields-two-pages.pdf" .
" --pages . 1 minimal.pdf -- a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "kept-some-fields.pdf"});
$td->runtest("not keeping any fields",
{$td->COMMAND =>
"qpdf --static-id kept-some-fields.pdf" .
" --pages . 2 -- a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "kept-no-fields.pdf"});
show_ntests();
# ----------

Binary file not shown.

Binary file not shown.