// Copyright (c) 2005-2022 Jay Berkenbilt // // This file is part of qpdf. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Versions of qpdf prior to version 7 were released under the terms // of version 2.0 of the Artistic License. At your option, you may // continue to consider qpdf to be licensed under those terms. Please // see the manual for additional information. #ifndef QPDFACROFORMDOCUMENTHELPER_HH #define QPDFACROFORMDOCUMENTHELPER_HH // This document helper is intended to help with operations on // interactive forms. Here are the key things to know: // * The PDF specification talks about interactive forms and also // about form XObjects. While form XObjects appear in parts of // interactive forms, this class is concerned about interactive // forms, not form XObjects. // // * Interactive forms are discussed in the PDF Specification (ISO PDF // 32000-1:2008) section 12.7. Also relevant is the section about // Widget annotations. Annotations are discussed in section 12.5 // with annotation dictionaries discussed in 12.5.1. Widget // annotations are discussed specifically in section 12.5.6.19. // // * What you need to know about the structure of interactive forms in // PDF files: // // - The document catalog contains the key "/AcroForm" which // contains a list of fields. Fields are represented as a tree // structure much like pages. Nodes in the fields tree may contain // other fields. Fields may inherit values of many of their // attributes from ancestors in the tree. // // - Fields may also have children that are widget annotations. As a // special case, and a cause of considerable confusion, if a field // has a single annotation as a child, the annotation dictionary // may be merged with the field dictionary. In that case, the // field and the annotation are in the same object. Note that, // while field dictionary attributes are inherited, annotation // dictionary attributes are not. // // - A page dictionary contains a key called "/Annots" which // contains a simple list of annotations. For any given annotation // of subtype "/Widget", you should encounter that annotation in // the "/Annots" dictionary of a page, and you should also be able // to reach it by traversing through the "/AcroForm" dictionary // from the document catalog. In the simplest case (and also a // very common case), a form field's widget annotation will be // merged with the field object, and the object will appear // directly both under "/Annots" in the page dictionary and under // "/Fields" in the "/AcroForm" dictionary. In a more complex // case, you may have to trace through various "/Kids" elements in // the "/AcroForm" field entry until you find the annotation // dictionary. #include #include #include #include #include #include #include #include class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper { public: QPDF_DLL QPDFAcroFormDocumentHelper(QPDF&); QPDF_DLL virtual ~QPDFAcroFormDocumentHelper() { } // This class lazily creates an internal cache of the mapping // among form fields, annotations, and pages. Methods within this // class preserve the validity of this cache. However, if you // modify pages' annotation dictionaries, the document's /AcroForm // dictionary, or any form fields manually in a way that alters // the association between forms, fields, annotations, and pages, // it may cause this cache to become invalid. This method marks // the cache invalid and forces it to be regenerated the next time // it is needed. QPDF_DLL void invalidateCache(); QPDF_DLL bool hasAcroForm(); // Add a form field, initializing the document's AcroForm // dictionary if needed, updating the cache if necessary. Note // that you are adding fields that are copies of other fields, // this method may result in multiple fields existing with the // same qualified name, which can have unexpected side effects. In // that case, you should use addAndRenameFormFields() instead. QPDF_DLL void addFormField(QPDFFormFieldObjectHelper); // Add a collection of form fields making sure that their fully // qualified names don't conflict with already present form // fields. Fields within the collection of new fields that have // the same name as each other will continue to do so. QPDF_DLL void addAndRenameFormFields(std::vector fields); // Remove fields from the fields array QPDF_DLL void removeFormFields(std::set const&); // Set the name of a field, updating internal records of field // names. Name should be UTF-8 encoded. QPDF_DLL void setFormFieldName(QPDFFormFieldObjectHelper, std::string const& name); // Return a vector of all terminal fields in a document. Terminal // fields are fields that have no children that are also fields. // Terminal fields may still have children that are annotations. // Intermediate nodes in the fields tree are not included in this // list, but you can still reach them through the getParent method // of the field object helper. QPDF_DLL std::vector getFormFields(); // Return all the form fields that have the given fully-qualified // name and also have an explicit "/T" attribute. For this // information to be accurate, any changes to field names must be // done through setFormFieldName() above. QPDF_DLL std::set getFieldsWithQualifiedName(std::string const& name); // Return the annotations associated with a terminal field. Note // that in the case of a field having a single annotation, the // underlying object will typically be the same as the underlying // object for the field. QPDF_DLL std::vector getAnnotationsForField(QPDFFormFieldObjectHelper); // Return annotations of subtype /Widget for a page. QPDF_DLL std::vector getWidgetAnnotationsForPage(QPDFPageObjectHelper); // Return top-level form fields for a page. QPDF_DLL std::vector getFormFieldsForPage(QPDFPageObjectHelper); // Return the terminal field that is associated with this // annotation. If the annotation dictionary is merged with the // field dictionary, the underlying object will be the same, but // this is not always the case. Note that if you call this method // with an annotation that is not a widget annotation, there will // not be an associated field, and this method will return a // helper associated with a null object (isNull() == true). QPDF_DLL QPDFFormFieldObjectHelper getFieldForAnnotation(QPDFAnnotationObjectHelper); // Return the current value of /NeedAppearances. If // /NeedAppearances is missing, return false as that is how PDF // viewers are supposed to interpret it. QPDF_DLL bool getNeedAppearances(); // Indicate whether appearance streams must be regenerated. If you // modify a field value, you should call setNeedAppearances(true) // unless you also generate an appearance stream for the // corresponding annotation at the same time. If you generate // appearance streams for all fields, you can call // setNeedAppearances(false). If you use // QPDFFormFieldObjectHelper::setV, it will automatically call // this method unless you tell it not to. QPDF_DLL void setNeedAppearances(bool); // If /NeedAppearances is false, do nothing. Otherwise generate // appearance streams for all widget annotations that need them. // See comments in QPDFFormFieldObjectHelper.hh for // generateAppearance for limitations. For checkbox and radio // button fields, this code ensures that appearance state is // consistent with the field's value and uses any pre-existing // appearance streams. QPDF_DLL void generateAppearancesIfNeeded(); // Note: this method works on all annotations, not just ones with // associated fields. For each annotation in old_annots, apply the // given transformation matrix to create a new annotation. New // annotations are appended to new_annots. If the annotation is // associated with a form field, a new form field is created that // points to the new annotation and is appended to new_fields, and // the old field is added to old_fields. // // old_annots may belong to a different QPDF object. In that case, // you should pass in from_qpdf, and copyForeignObject will be // called automatically. If this is the case, for efficiency, you // may pass in a QPDFAcroFormDocumentHelper for the other file to // avoid the expensive process of creating one for each call to // transformAnnotations. New fields and annotations are not added // to the document or pages. You have to do that yourself after // calling transformAnnotations. If this operation will leave // orphaned fields behind, such as if you are replacing the old // annotations with the new ones on the same page and the fields // and annotations are not shared, you will also need to remove // the old fields to prevent them from hanging round unreferenced. QPDF_DLL void transformAnnotations( QPDFObjectHandle old_annots, std::vector& new_annots, std::vector& new_fields, std::set& old_fields, QPDFMatrix const& cm, QPDF* from_qpdf = nullptr, QPDFAcroFormDocumentHelper* from_afdh = nullptr); // Copy form fields and annotations from one page to another, // allowing the from page to be in a different QPDF or in the same // QPDF. This would typically be called after calling addPage to // add field/annotation awareness. When just copying the page by // itself, annotations end up being shared, and fields end up // being omitted because there is no reference to the field from // the page. This method ensures that each separate copy of a page // has private annotations and that fields and annotations are // properly updated to resolve conflicts that may occur from // common resource and field names across documents. It is // basically a wrapper around transformAnnotations that handles // updating the receiving page. If new_fields is non-null, any // newly created fields are added to it. QPDF_DLL void fixCopiedAnnotations( QPDFObjectHandle to_page, QPDFObjectHandle from_page, QPDFAcroFormDocumentHelper& from_afdh, std::set* new_fields = nullptr); // copyFieldsFromForeignPage was added in qpdf 10.2 and made to do // nothing in 10.3. It wasn't actually doing the right thing and // would result in broken files in all but the simplest case of a // single page from one file being added to another file, as // happens with qpdf --split-pages. [[deprecated("Use fixCopiedAnnotations instead")]] // ABI: delete this method QPDF_DLL void copyFieldsFromForeignPage( QPDFPageObjectHelper foreign_page, QPDFAcroFormDocumentHelper& foreign_afdh, std::vector* copied_fields = nullptr); private: void analyze(); void traverseField(QPDFObjectHandle field, QPDFObjectHandle parent, int depth, std::set& visited); QPDFObjectHandle getOrCreateAcroForm(); void adjustInheritedFields( QPDFObjectHandle obj, bool override_da, std::string const& from_default_da, bool override_q, int from_default_q); void adjustDefaultAppearances( QPDFObjectHandle obj, std::map> const& dr_map); void adjustAppearanceStream( QPDFObjectHandle stream, std::map> dr_map); class Members { friend class QPDFAcroFormDocumentHelper; public: QPDF_DLL ~Members(); private: Members(); Members(Members const&); bool cache_valid; std::map > field_to_annotations; std::map annotation_to_field; std::map field_to_name; std::map> name_to_fields; }; PointerHolder m; }; #endif // QPDFACROFORMDOCUMENTHELPER_HH