qpdf/include/qpdf/QPDFAcroFormDocumentHelper.hh

313 lines
13 KiB
C++

// Copyright (c) 2005-2022 Jay Berkenbilt
//
// This file is part of qpdf.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Versions of qpdf prior to version 7 were released under the terms
// of version 2.0 of the Artistic License. At your option, you may
// continue to consider qpdf to be licensed under those terms. Please
// see the manual for additional information.
#ifndef QPDFACROFORMDOCUMENTHELPER_HH
#define QPDFACROFORMDOCUMENTHELPER_HH
// This document helper is intended to help with operations on
// interactive forms. Here are the key things to know:
// * The PDF specification talks about interactive forms and also
// about form XObjects. While form XObjects appear in parts of
// interactive forms, this class is concerned about interactive
// forms, not form XObjects.
//
// * Interactive forms are discussed in the PDF Specification (ISO PDF
// 32000-1:2008) section 12.7. Also relevant is the section about
// Widget annotations. Annotations are discussed in section 12.5
// with annotation dictionaries discussed in 12.5.1. Widget
// annotations are discussed specifically in section 12.5.6.19.
//
// * What you need to know about the structure of interactive forms in
// PDF files:
//
// - The document catalog contains the key "/AcroForm" which
// contains a list of fields. Fields are represented as a tree
// structure much like pages. Nodes in the fields tree may contain
// other fields. Fields may inherit values of many of their
// attributes from ancestors in the tree.
//
// - Fields may also have children that are widget annotations. As a
// special case, and a cause of considerable confusion, if a field
// has a single annotation as a child, the annotation dictionary
// may be merged with the field dictionary. In that case, the
// field and the annotation are in the same object. Note that,
// while field dictionary attributes are inherited, annotation
// dictionary attributes are not.
//
// - A page dictionary contains a key called "/Annots" which
// contains a simple list of annotations. For any given annotation
// of subtype "/Widget", you should encounter that annotation in
// the "/Annots" dictionary of a page, and you should also be able
// to reach it by traversing through the "/AcroForm" dictionary
// from the document catalog. In the simplest case (and also a
// very common case), a form field's widget annotation will be
// merged with the field object, and the object will appear
// directly both under "/Annots" in the page dictionary and under
// "/Fields" in the "/AcroForm" dictionary. In a more complex
// case, you may have to trace through various "/Kids" elements in
// the "/AcroForm" field entry until you find the annotation
// dictionary.
#include <qpdf/QPDFDocumentHelper.hh>
#include <qpdf/DLL.h>
#include <qpdf/QPDFAnnotationObjectHelper.hh>
#include <qpdf/QPDFFormFieldObjectHelper.hh>
#include <qpdf/QPDFPageObjectHelper.hh>
#include <map>
#include <set>
#include <vector>
class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper
{
public:
QPDF_DLL
QPDFAcroFormDocumentHelper(QPDF&);
QPDF_DLL
virtual ~QPDFAcroFormDocumentHelper()
{
}
// This class lazily creates an internal cache of the mapping
// among form fields, annotations, and pages. Methods within this
// class preserve the validity of this cache. However, if you
// modify pages' annotation dictionaries, the document's /AcroForm
// dictionary, or any form fields manually in a way that alters
// the association between forms, fields, annotations, and pages,
// it may cause this cache to become invalid. This method marks
// the cache invalid and forces it to be regenerated the next time
// it is needed.
QPDF_DLL
void invalidateCache();
QPDF_DLL
bool hasAcroForm();
// Add a form field, initializing the document's AcroForm
// dictionary if needed, updating the cache if necessary. Note
// that you are adding fields that are copies of other fields,
// this method may result in multiple fields existing with the
// same qualified name, which can have unexpected side effects. In
// that case, you should use addAndRenameFormFields() instead.
QPDF_DLL
void addFormField(QPDFFormFieldObjectHelper);
// Add a collection of form fields making sure that their fully
// qualified names don't conflict with already present form
// fields. Fields within the collection of new fields that have
// the same name as each other will continue to do so.
QPDF_DLL
void addAndRenameFormFields(std::vector<QPDFObjectHandle> fields);
// Remove fields from the fields array
QPDF_DLL
void removeFormFields(std::set<QPDFObjGen> const&);
// Set the name of a field, updating internal records of field
// names. Name should be UTF-8 encoded.
QPDF_DLL
void setFormFieldName(QPDFFormFieldObjectHelper, std::string const& name);
// Return a vector of all terminal fields in a document. Terminal
// fields are fields that have no children that are also fields.
// Terminal fields may still have children that are annotations.
// Intermediate nodes in the fields tree are not included in this
// list, but you can still reach them through the getParent method
// of the field object helper.
QPDF_DLL
std::vector<QPDFFormFieldObjectHelper> getFormFields();
// Return all the form fields that have the given fully-qualified
// name and also have an explicit "/T" attribute. For this
// information to be accurate, any changes to field names must be
// done through setFormFieldName() above.
QPDF_DLL
std::set<QPDFObjGen>
getFieldsWithQualifiedName(std::string const& name);
// Return the annotations associated with a terminal field. Note
// that in the case of a field having a single annotation, the
// underlying object will typically be the same as the underlying
// object for the field.
QPDF_DLL
std::vector<QPDFAnnotationObjectHelper>
getAnnotationsForField(QPDFFormFieldObjectHelper);
// Return annotations of subtype /Widget for a page.
QPDF_DLL
std::vector<QPDFAnnotationObjectHelper>
getWidgetAnnotationsForPage(QPDFPageObjectHelper);
// Return top-level form fields for a page.
QPDF_DLL
std::vector<QPDFFormFieldObjectHelper>
getFormFieldsForPage(QPDFPageObjectHelper);
// Return the terminal field that is associated with this
// annotation. If the annotation dictionary is merged with the
// field dictionary, the underlying object will be the same, but
// this is not always the case. Note that if you call this method
// with an annotation that is not a widget annotation, there will
// not be an associated field, and this method will return a
// helper associated with a null object (isNull() == true).
QPDF_DLL
QPDFFormFieldObjectHelper
getFieldForAnnotation(QPDFAnnotationObjectHelper);
// Return the current value of /NeedAppearances. If
// /NeedAppearances is missing, return false as that is how PDF
// viewers are supposed to interpret it.
QPDF_DLL
bool getNeedAppearances();
// Indicate whether appearance streams must be regenerated. If you
// modify a field value, you should call setNeedAppearances(true)
// unless you also generate an appearance stream for the
// corresponding annotation at the same time. If you generate
// appearance streams for all fields, you can call
// setNeedAppearances(false). If you use
// QPDFFormFieldObjectHelper::setV, it will automatically call
// this method unless you tell it not to.
QPDF_DLL
void setNeedAppearances(bool);
// If /NeedAppearances is false, do nothing. Otherwise generate
// appearance streams for all widget annotations that need them.
// See comments in QPDFFormFieldObjectHelper.hh for
// generateAppearance for limitations. For checkbox and radio
// button fields, this code ensures that appearance state is
// consistent with the field's value and uses any pre-existing
// appearance streams.
QPDF_DLL
void generateAppearancesIfNeeded();
// Note: this method works on all annotations, not just ones with
// associated fields. For each annotation in old_annots, apply the
// given transformation matrix to create a new annotation. New
// annotations are appended to new_annots. If the annotation is
// associated with a form field, a new form field is created that
// points to the new annotation and is appended to new_fields, and
// the old field is added to old_fields.
//
// old_annots may belong to a different QPDF object. In that case,
// you should pass in from_qpdf, and copyForeignObject will be
// called automatically. If this is the case, for efficiency, you
// may pass in a QPDFAcroFormDocumentHelper for the other file to
// avoid the expensive process of creating one for each call to
// transformAnnotations. New fields and annotations are not added
// to the document or pages. You have to do that yourself after
// calling transformAnnotations. If this operation will leave
// orphaned fields behind, such as if you are replacing the old
// annotations with the new ones on the same page and the fields
// and annotations are not shared, you will also need to remove
// the old fields to prevent them from hanging round unreferenced.
QPDF_DLL
void transformAnnotations(
QPDFObjectHandle old_annots,
std::vector<QPDFObjectHandle>& new_annots,
std::vector<QPDFObjectHandle>& new_fields,
std::set<QPDFObjGen>& old_fields,
QPDFMatrix const& cm,
QPDF* from_qpdf = nullptr,
QPDFAcroFormDocumentHelper* from_afdh = nullptr);
// Copy form fields and annotations from one page to another,
// allowing the from page to be in a different QPDF or in the same
// QPDF. This would typically be called after calling addPage to
// add field/annotation awareness. When just copying the page by
// itself, annotations end up being shared, and fields end up
// being omitted because there is no reference to the field from
// the page. This method ensures that each separate copy of a page
// has private annotations and that fields and annotations are
// properly updated to resolve conflicts that may occur from
// common resource and field names across documents. It is
// basically a wrapper around transformAnnotations that handles
// updating the receiving page. If new_fields is non-null, any
// newly created fields are added to it.
QPDF_DLL
void fixCopiedAnnotations(
QPDFObjectHandle to_page,
QPDFObjectHandle from_page,
QPDFAcroFormDocumentHelper& from_afdh,
std::set<QPDFObjGen>* new_fields = nullptr);
// copyFieldsFromForeignPage was added in qpdf 10.2 and made to do
// nothing in 10.3. It wasn't actually doing the right thing and
// would result in broken files in all but the simplest case of a
// single page from one file being added to another file, as
// happens with qpdf --split-pages.
[[deprecated("Use fixCopiedAnnotations instead")]]
// ABI: delete this method
QPDF_DLL
void copyFieldsFromForeignPage(
QPDFPageObjectHelper foreign_page,
QPDFAcroFormDocumentHelper& foreign_afdh,
std::vector<QPDFObjectHandle>* copied_fields = nullptr);
private:
void analyze();
void traverseField(QPDFObjectHandle field,
QPDFObjectHandle parent,
int depth, std::set<QPDFObjGen>& visited);
QPDFObjectHandle getOrCreateAcroForm();
void adjustInheritedFields(
QPDFObjectHandle obj,
bool override_da, std::string const& from_default_da,
bool override_q, int from_default_q);
void adjustDefaultAppearances(
QPDFObjectHandle obj,
std::map<std::string,
std::map<std::string, std::string>> const& dr_map);
void adjustAppearanceStream(
QPDFObjectHandle stream,
std::map<std::string,
std::map<std::string, std::string>> dr_map);
class Members
{
friend class QPDFAcroFormDocumentHelper;
public:
QPDF_DLL
~Members();
private:
Members();
Members(Members const&);
bool cache_valid;
std::map<QPDFObjGen,
std::vector<QPDFAnnotationObjectHelper>
> field_to_annotations;
std::map<QPDFObjGen, QPDFFormFieldObjectHelper> annotation_to_field;
std::map<QPDFObjGen, std::string> field_to_name;
std::map<std::string, std::set<QPDFObjGen>> name_to_fields;
};
PointerHolder<Members> m;
};
#endif // QPDFACROFORMDOCUMENTHELPER_HH