From ad34b9c278608dfdcfdbe7402acb3a6dd04c3d0e Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Mon, 8 Feb 2021 18:07:21 -0500 Subject: [PATCH] Implement helpers for file attachments --- ChangeLog | 6 + include/qpdf/QPDFEFStreamObjectHelper.hh | 122 +++++++++ .../qpdf/QPDFEmbeddedFileDocumentHelper.hh | 97 ++++++++ include/qpdf/QPDFFileSpecObjectHelper.hh | 126 ++++++++++ libqpdf/QPDFEFStreamObjectHelper.cc | 193 +++++++++++++++ libqpdf/QPDFEmbeddedFileDocumentHelper.cc | 146 +++++++++++ libqpdf/QPDFFileSpecObjectHelper.cc | 157 ++++++++++++ libqpdf/build.mk | 3 + manual/qpdf-manual.xml | 10 + qpdf/qpdf.testcov | 2 + qpdf/qtest/qpdf.test | 23 ++ qpdf/qtest/qpdf/test76.out | 4 + qpdf/qtest/qpdf/test76.pdf | 233 ++++++++++++++++++ qpdf/qtest/qpdf/test77.pdf | 194 +++++++++++++++ qpdf/test_driver.cc | 62 +++++ 15 files changed, 1378 insertions(+) create mode 100644 include/qpdf/QPDFEFStreamObjectHelper.hh create mode 100644 include/qpdf/QPDFEmbeddedFileDocumentHelper.hh create mode 100644 include/qpdf/QPDFFileSpecObjectHelper.hh create mode 100644 libqpdf/QPDFEFStreamObjectHelper.cc create mode 100644 libqpdf/QPDFEmbeddedFileDocumentHelper.cc create mode 100644 libqpdf/QPDFFileSpecObjectHelper.cc create mode 100644 qpdf/qtest/qpdf/test76.out create mode 100644 qpdf/qtest/qpdf/test76.pdf create mode 100644 qpdf/qtest/qpdf/test77.pdf diff --git a/ChangeLog b/ChangeLog index a6f1c4b4..0511ca53 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,12 @@ pdf_time_to_qpdf_time, qpdf_time_to_pdf_time, get_current_qpdf_time. +2021-02-08 Jay Berkenbilt + + * Add helper classes for file attachments: + QPDFEmbeddedFileDocumentHelper, QPDFFileSpecObjectHelper, + QPDFEFStreamObjectHelper. See their header files for details. + 2021-02-07 Jay Berkenbilt * Add new functions QUtil::pipe_file and QUtil::file_provider for diff --git a/include/qpdf/QPDFEFStreamObjectHelper.hh b/include/qpdf/QPDFEFStreamObjectHelper.hh new file mode 100644 index 00000000..fe960785 --- /dev/null +++ b/include/qpdf/QPDFEFStreamObjectHelper.hh @@ -0,0 +1,122 @@ +// Copyright (c) 2005-2021 Jay Berkenbilt +// +// This file is part of qpdf. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Versions of qpdf prior to version 7 were released under the terms +// of version 2.0 of the Artistic License. At your option, you may +// continue to consider qpdf to be licensed under those terms. Please +// see the manual for additional information. + +#ifndef QPDFEFSTREAMOBJECTHELPER_HH +#define QPDFEFSTREAMOBJECTHELPER_HH + +#include + +#include + +#include +#include + +// This class provides a higher level interface around Embedded File +// Streams, which are discussed in section 7.11.4 of the ISO-32000 PDF +// specification. + +class QPDFEFStreamObjectHelper: public QPDFObjectHelper +{ + public: + QPDF_DLL + QPDFEFStreamObjectHelper(QPDFObjectHandle); + QPDF_DLL + virtual ~QPDFEFStreamObjectHelper() = default; + + // Date parameters are strings that comform to the PDF spec for + // date/time strings, which is "D:yyyymmddhhmmss" where is + // either "Z" for UTC or "-hh'mm'" or "+hh'mm'" for timezone + // offset. Examples: "D:20210207161528-05'00'", + // "D:20210207211528Z". See QUtil::qpdf_time_to_pdf_time. + + QPDF_DLL + std::string getCreationDate(); + QPDF_DLL + std::string getModDate(); + // Get size as reported in the object; return 0 if not present. + QPDF_DLL + size_t getSize(); + // Subtype is a mime type such as "text/plain" + QPDF_DLL + std::string getSubtype(); + // Return the MD5 checksum as stored in the object as a binary + // string. This does not check consistency with the data. If not + // present, return an empty string. + QPDF_DLL + std::string getChecksum(); + + // Setters return a reference to this object so that they can be + // used as fluent interfaces, e.g. + // efsoh.setCreationDate(x).setModDate(y); + + // Create a new embedded file stream with the given stream data, + // which can be provided in any of several ways. To get the new + // object back, call getObjectHandle() on the returned object. The + // checksum and size are computed automatically and stored. Other + // parameters may be supplied using setters defined below. + QPDF_DLL + static QPDFEFStreamObjectHelper + createEFStream(QPDF& qpdf, PointerHolder data); + QPDF_DLL + static QPDFEFStreamObjectHelper + createEFStream(QPDF& qpdf, std::string const& data); + // The provider function must write the data to the given + // pipeline. The function may be called multiple times by the qpdf + // library. You can pass QUtil::file_provider(filename) as the + // provider to have the qpdf library provide the contents of + // filename as a binary. + QPDF_DLL + static QPDFEFStreamObjectHelper + createEFStream(QPDF& qpdf, std::function provider); + + // Setters for other parameters + QPDF_DLL + QPDFEFStreamObjectHelper& setCreationDate(std::string const&); + QPDF_DLL + QPDFEFStreamObjectHelper& setModDate(std::string const&); + + // Set subtype as a mime-type, e.g. "text/plain" or + // "application/pdf". + QPDF_DLL + QPDFEFStreamObjectHelper& setSubtype(std::string const&); + + private: + QPDFObjectHandle getParam(std::string const& pkey); + void setParam(std::string const& pkey, QPDFObjectHandle const&); + static QPDFEFStreamObjectHelper newFromStream(QPDFObjectHandle stream); + + class Members + { + friend class QPDFEFStreamObjectHelper; + + public: + QPDF_DLL + ~Members() = default; + + private: + Members(); + Members(Members const&) = delete; + }; + + PointerHolder m; +}; + +#endif // QPDFEFSTREAMOBJECTHELPER_HH diff --git a/include/qpdf/QPDFEmbeddedFileDocumentHelper.hh b/include/qpdf/QPDFEmbeddedFileDocumentHelper.hh new file mode 100644 index 00000000..e850eb9c --- /dev/null +++ b/include/qpdf/QPDFEmbeddedFileDocumentHelper.hh @@ -0,0 +1,97 @@ +// Copyright (c) 2005-2021 Jay Berkenbilt +// +// This file is part of qpdf. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Versions of qpdf prior to version 7 were released under the terms +// of version 2.0 of the Artistic License. At your option, you may +// continue to consider qpdf to be licensed under those terms. Please +// see the manual for additional information. + +#ifndef QPDFEMBEDDEDFILEDOCUMENTHELPER_HH +#define QPDFEMBEDDEDFILEDOCUMENTHELPER_HH + +#include + +#include +#include +#include +#include + +#include +#include + +// This class provides a higher level interface around document-level +// file attachments, also known as embedded files. These are discussed +// in sections 7.7.4 and 7.11 of the ISO-32000 PDF specification. + +class QPDFEmbeddedFileDocumentHelper: public QPDFDocumentHelper +{ + public: + QPDF_DLL + QPDFEmbeddedFileDocumentHelper(QPDF&); + QPDF_DLL + virtual ~QPDFEmbeddedFileDocumentHelper() = default; + + QPDF_DLL + bool hasEmbeddedFiles() const; + + QPDF_DLL + std::map> getEmbeddedFiles(); + + // If an embedded file with the given name exists, return a + // (shared) pointer to it. Otherwise, return nullptr. + QPDF_DLL + std::shared_ptr + getEmbeddedFile(std::string const& name); + + // Add or replace an attachment + QPDF_DLL + void replaceEmbeddedFile( + std::string const& name, QPDFFileSpecObjectHelper const&); + + // Remove an embedded file if present. Return value is true if the + // file was present and was removed. This method not only removes + // the embedded file from the embedded files name tree but also + // nulls out the file specification dictionary. This means that + // any references to this file from file attachment annotations + // will also stop working. This is the best way to make the + // attachment actually disappear from the file and not just from + // the list of attachments. + QPDF_DLL + bool removeEmbeddedFile(std::string const& name); + + private: + void initEmbeddedFiles(); + + class Members + { + friend class QPDFEmbeddedFileDocumentHelper; + + public: + QPDF_DLL + ~Members() = default; + + private: + Members(); + Members(Members const&) = delete; + + std::shared_ptr embedded_files; + }; + + PointerHolder m; +}; + +#endif // QPDFEMBEDDEDFILEDOCUMENTHELPER_HH diff --git a/include/qpdf/QPDFFileSpecObjectHelper.hh b/include/qpdf/QPDFFileSpecObjectHelper.hh new file mode 100644 index 00000000..28012e3d --- /dev/null +++ b/include/qpdf/QPDFFileSpecObjectHelper.hh @@ -0,0 +1,126 @@ +// Copyright (c) 2005-2021 Jay Berkenbilt +// +// This file is part of qpdf. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Versions of qpdf prior to version 7 were released under the terms +// of version 2.0 of the Artistic License. At your option, you may +// continue to consider qpdf to be licensed under those terms. Please +// see the manual for additional information. + +#ifndef QPDFFILESPECOBJECTHELPER_HH +#define QPDFFILESPECOBJECTHELPER_HH + +#include + +#include + +#include +#include + +// This class provides a higher level interface around File +// Specification dictionaries, which are discussed in section 7.11 of +// the ISO-32000 PDF specification. + +class QPDFFileSpecObjectHelper: public QPDFObjectHelper +{ + public: + QPDF_DLL + QPDFFileSpecObjectHelper(QPDFObjectHandle); + QPDF_DLL + virtual ~QPDFFileSpecObjectHelper() = default; + + QPDF_DLL + std::string getDescription(); + + // Get the main filename for this file specification. In priority + // order, check /UF, /F, /Unix, /DOS, /Mac. + QPDF_DLL + std::string getFilename(); + + // Return any of /UF, /F, /Unix, /DOS, /Mac filename keys that may + // be present in the object. + QPDF_DLL + std::map getFilenames(); + + // Get the requested embedded file stream for this file + // specification. If key is empty, In priority order, check /UF, + // /F, /Unix, /DOS, /Mac. Returns a null object if not found. If + // this is an actual embedded file stream, its data is the content + // of the attachment. You can also use + // QPDFEFStreamObjectHelper for higher level access to + // the parameters. + QPDF_DLL + QPDFObjectHandle getEmbeddedFileStream(std::string const& key = ""); + + // Return the /EF key of the file spec, which is a map from file + // name key to embedded file stream. + QPDF_DLL + QPDFObjectHandle getEmbeddedFileStreams(); + + // Setters return a reference to this object so that they can be + // used as fluent interfaces, e.g. + // fsoh.setDescription(x).setFilename(y); + + // Create a new filespec as an indirect object with the given + // filename, and attach the contents of the specified file as data + // in an embedded file stream. + QPDF_DLL + static + QPDFFileSpecObjectHelper createFileSpec( + QPDF& qpdf, + std::string const& filename, + std::string const& fullpath); + + // Create a new filespec as an indirect object with the given + // unicode filename and embedded file stream. The file name will + // be used as both /UF and /F. If you need to override, call + // setFilename. + QPDF_DLL + static + QPDFFileSpecObjectHelper createFileSpec( + QPDF& qpdf, + std::string const& filename, + QPDFEFStreamObjectHelper); + + QPDF_DLL + QPDFFileSpecObjectHelper& setDescription(std::string const&); + // setFilename sets /UF to unicode_name. If compat_name is empty, + // it is also set to unicode_name. unicode_name should be a UTF-8 + // encoded string. compat_name is converted to a string + // QPDFObjectHandle literally, preserving whatever encoding it + // might happen to have. + QPDF_DLL + QPDFFileSpecObjectHelper& setFilename( + std::string const& unicode_name, + std::string const& compat_name = ""); + + private: + class Members + { + friend class QPDFFileSpecObjectHelper; + + public: + QPDF_DLL + ~Members() = default; + + private: + Members(); + Members(Members const&) = delete; + }; + + PointerHolder m; +}; + +#endif // QPDFFILESPECOBJECTHELPER_HH diff --git a/libqpdf/QPDFEFStreamObjectHelper.cc b/libqpdf/QPDFEFStreamObjectHelper.cc new file mode 100644 index 00000000..c4e64a71 --- /dev/null +++ b/libqpdf/QPDFEFStreamObjectHelper.cc @@ -0,0 +1,193 @@ +#include +#include +#include +#include +#include +#include + +QPDFEFStreamObjectHelper::QPDFEFStreamObjectHelper( + QPDFObjectHandle oh) : + QPDFObjectHelper(oh), + m(new Members()) +{ +} + +QPDFEFStreamObjectHelper::Members::Members() +{ +} + +QPDFObjectHandle +QPDFEFStreamObjectHelper::getParam(std::string const& pkey) +{ + auto params = this->oh.getDict().getKey("/Params"); + if (params.isDictionary()) + { + return params.getKey(pkey); + } + return QPDFObjectHandle::newNull(); +} + +void +QPDFEFStreamObjectHelper::setParam( + std::string const& pkey, QPDFObjectHandle const& pval) +{ + auto params = this->oh.getDict().getKey("/Params"); + if (! params.isDictionary()) + { + params = QPDFObjectHandle::newDictionary(); + this->oh.getDict().replaceKey("/Params", params); + } + params.replaceKey(pkey, pval); +} + +std::string +QPDFEFStreamObjectHelper::getCreationDate() +{ + auto val = getParam("/CreationDate"); + if (val.isString()) + { + return val.getUTF8Value(); + } + return ""; +} + +std::string +QPDFEFStreamObjectHelper::getModDate() +{ + auto val = getParam("/ModDate"); + if (val.isString()) + { + return val.getUTF8Value(); + } + return ""; +} + +size_t +QPDFEFStreamObjectHelper::getSize() +{ + auto val = getParam("/Size"); + if (val.isInteger()) + { + return QIntC::to_size(val.getUIntValueAsUInt()); + } + return 0; +} + +std::string +QPDFEFStreamObjectHelper::getSubtype() +{ + auto val = getParam("/Subtype"); + if (val.isName()) + { + auto n = val.getName(); + if (n.length() > 1) + { + return n.substr(1); + } + } + return ""; +} + +std::string +QPDFEFStreamObjectHelper::getChecksum() +{ + auto val = getParam("/CheckSum"); + if (val.isString()) + { + return val.getStringValue(); + } + return ""; +} + +QPDFEFStreamObjectHelper +QPDFEFStreamObjectHelper::createEFStream( + QPDF& qpdf, PointerHolder data) +{ + return newFromStream(QPDFObjectHandle::newStream(&qpdf, data)); +} + +QPDFEFStreamObjectHelper +QPDFEFStreamObjectHelper::createEFStream( + QPDF& qpdf, std::string const& data) +{ + return newFromStream(QPDFObjectHandle::newStream(&qpdf, data)); +} + +namespace QEF +{ + class Provider: public QPDFObjectHandle::StreamDataProvider + { + public: + Provider(std::function provider) : + StreamDataProvider(false), + provider(provider) + { + } + virtual ~Provider() = default; + virtual void provideStreamData(int objid, int generation, + Pipeline* pipeline) override + { + this->provider(pipeline); + } + + private: + std::function provider; + }; +}; + +QPDFEFStreamObjectHelper +QPDFEFStreamObjectHelper::createEFStream( + QPDF& qpdf, std::function provider) +{ + auto stream = QPDFObjectHandle::newStream(&qpdf); + stream.replaceStreamData(new QEF::Provider(provider), + QPDFObjectHandle::newNull(), + QPDFObjectHandle::newNull()); + return newFromStream(stream); +} + +QPDFEFStreamObjectHelper& +QPDFEFStreamObjectHelper::setCreationDate(std::string const& date) +{ + setParam("/CreationDate", QPDFObjectHandle::newString(date)); + return *this; +} + +QPDFEFStreamObjectHelper& +QPDFEFStreamObjectHelper::setModDate(std::string const& date) +{ + setParam("/ModDate", QPDFObjectHandle::newString(date)); + return *this; +} + +QPDFEFStreamObjectHelper& +QPDFEFStreamObjectHelper::setSubtype(std::string const& subtype) +{ + setParam("/Subtype", QPDFObjectHandle::newName("/" + subtype)); + return *this; +} + +QPDFEFStreamObjectHelper +QPDFEFStreamObjectHelper::newFromStream(QPDFObjectHandle stream) +{ + QPDFEFStreamObjectHelper result(stream); + stream.getDict().replaceKey( + "/Type", QPDFObjectHandle::newName("/EmbeddedFile")); + Pl_Discard discard; + Pl_MD5 md5("EF md5", &discard); + Pl_Count count("EF size", &md5); + if (! stream.pipeStreamData(&count, nullptr, 0, qpdf_dl_all)) + { + stream.warnIfPossible( + "unable to get stream data for new embedded file stream"); + } + else + { + result.setParam( + "/Size", QPDFObjectHandle::newInteger(count.getCount())); + result.setParam( + "/CheckSum", QPDFObjectHandle::newString( + QUtil::hex_decode(md5.getHexDigest()))); + } + return result; +} diff --git a/libqpdf/QPDFEmbeddedFileDocumentHelper.cc b/libqpdf/QPDFEmbeddedFileDocumentHelper.cc new file mode 100644 index 00000000..6348529d --- /dev/null +++ b/libqpdf/QPDFEmbeddedFileDocumentHelper.cc @@ -0,0 +1,146 @@ +#include + +// File attachments are stored in the /EmbeddedFiles (name tree) key +// of the /Names dictionary from the document catalog. Each entry +// points to a /FileSpec, which in turn points to one more Embedded +// File Streams. Note that file specs can appear in other places as +// well, such as file attachment annotations, among others. +// +// root -> /Names -> /EmbeddedFiles = name tree +// filename -> filespec +// << +// /Desc () +// /EF << +// /F x 0 R +// /UF x 0 R +// >> +// /F (name) +// /UF (name) +// /Type /Filespec +// >> +// x 0 obj +// << +// /Type /EmbeddedFile +// /DL filesize % not in spec? +// /Params << +// /CheckSum +// /CreationDate (D:yyyymmddhhmmss{-hh'mm'|+hh'mm'|Z}) +// /ModDate (D:yyyymmddhhmmss-hh'mm') +// /Size filesize +// /Subtype /mime#2ftype +// >> +// >> + +QPDFEmbeddedFileDocumentHelper::QPDFEmbeddedFileDocumentHelper(QPDF& qpdf) : + QPDFDocumentHelper(qpdf), + m(new Members()) +{ + auto root = qpdf.getRoot(); + auto names = root.getKey("/Names"); + if (names.isDictionary()) + { + auto embedded_files = names.getKey("/EmbeddedFiles"); + if (embedded_files.isDictionary()) + { + this->m->embedded_files = + std::make_shared( + embedded_files, qpdf); + } + } +} + +QPDFEmbeddedFileDocumentHelper::Members::Members() +{ +} + +bool +QPDFEmbeddedFileDocumentHelper::hasEmbeddedFiles() const +{ + return (this->m->embedded_files.get() != nullptr); +} + +void +QPDFEmbeddedFileDocumentHelper::initEmbeddedFiles() +{ + if (hasEmbeddedFiles()) + { + return; + } + auto root = qpdf.getRoot(); + auto names = root.getKey("/Names"); + if (! names.isDictionary()) + { + names = QPDFObjectHandle::newDictionary(); + root.replaceKey("/Names", names); + } + auto embedded_files = names.getKey("/EmbeddedFiles"); + if (! embedded_files.isDictionary()) + { + auto nth = QPDFNameTreeObjectHelper::newEmpty(this->qpdf); + names.replaceKey("/EmbeddedFiles", nth.getObjectHandle()); + this->m->embedded_files = + std::make_shared(nth); + } +} + +std::shared_ptr +QPDFEmbeddedFileDocumentHelper::getEmbeddedFile(std::string const& name) +{ + std::shared_ptr result; + if (this->m->embedded_files) + { + auto i = this->m->embedded_files->find(name); + if (i != this->m->embedded_files->end()) + { + result = std::make_shared(i->second); + } + } + return result; +} + +std::map> +QPDFEmbeddedFileDocumentHelper::getEmbeddedFiles() +{ + std::map> result; + if (this->m->embedded_files) + { + for (auto const& i: *(this->m->embedded_files)) + { + result[i.first] = std::make_shared( + i.second); + } + } + return result; +} + +void +QPDFEmbeddedFileDocumentHelper::replaceEmbeddedFile( + std::string const& name, QPDFFileSpecObjectHelper const& fs) +{ + initEmbeddedFiles(); + this->m->embedded_files->insert( + name, fs.getObjectHandle()); +} + +bool +QPDFEmbeddedFileDocumentHelper::removeEmbeddedFile(std::string const& name) +{ + if (! hasEmbeddedFiles()) + { + return false; + } + auto iter = this->m->embedded_files->find(name); + if (iter == this->m->embedded_files->end()) + { + return false; + } + auto oh = iter->second; + iter.remove(); + if (oh.isIndirect()) + { + this->qpdf.replaceObject(oh.getObjGen(), QPDFObjectHandle::newNull()); + } + + return true; +} diff --git a/libqpdf/QPDFFileSpecObjectHelper.cc b/libqpdf/QPDFFileSpecObjectHelper.cc new file mode 100644 index 00000000..ad422d2b --- /dev/null +++ b/libqpdf/QPDFFileSpecObjectHelper.cc @@ -0,0 +1,157 @@ +#include +#include +#include +#include + +#include +#include + +QPDFFileSpecObjectHelper::QPDFFileSpecObjectHelper( + QPDFObjectHandle oh) : + QPDFObjectHelper(oh) +{ + if (! oh.isDictionary()) + { + oh.warnIfPossible("Embedded file object is not a dictionary"); + return; + } + auto type = oh.getKey("/Type"); + if (! (type.isName() && (type.getName() == "/Filespec"))) + { + oh.warnIfPossible("Embedded file object's type is not /Filespec"); + } +} + +QPDFFileSpecObjectHelper::Members::Members() +{ +} + +static std::vector name_keys = { + "/UF", "/F", "/Unix", "/DOS", "/Mac"}; + +std::string +QPDFFileSpecObjectHelper::getDescription() +{ + std::string result; + auto desc = this->oh.getKey("/Desc"); + if (desc.isString()) + { + result = desc.getUTF8Value(); + } + return result; +} + +std::string +QPDFFileSpecObjectHelper::getFilename() +{ + for (auto const& i: name_keys) + { + auto k = this->oh.getKey(i); + if (k.isString()) + { + return k.getUTF8Value(); + } + } + return ""; +} + +std::map +QPDFFileSpecObjectHelper::getFilenames() +{ + std::map result; + for (auto const& i: name_keys) + { + auto k = this->oh.getKey(i); + if (k.isString()) + { + result[i] = k.getUTF8Value(); + } + } + return result; +} + +QPDFObjectHandle +QPDFFileSpecObjectHelper::getEmbeddedFileStream(std::string const& key) +{ + auto ef = this->oh.getKey("/EF"); + if (! ef.isDictionary()) + { + return QPDFObjectHandle::newNull(); + } + if (! key.empty()) + { + return ef.getKey(key); + } + for (auto const& i: name_keys) + { + auto k = ef.getKey(i); + if (k.isStream()) + { + return k; + } + } + return QPDFObjectHandle::newNull(); +} + +QPDFObjectHandle +QPDFFileSpecObjectHelper::getEmbeddedFileStreams() +{ + return this->oh.getKey("/EF"); +} + +QPDFFileSpecObjectHelper +QPDFFileSpecObjectHelper::createFileSpec( + QPDF& qpdf, + std::string const& filename, + std::string const& fullpath) +{ + return createFileSpec( + qpdf, filename, + QPDFEFStreamObjectHelper::createEFStream( + qpdf, + QUtil::file_provider(fullpath))); +} + +QPDFFileSpecObjectHelper +QPDFFileSpecObjectHelper::createFileSpec( + QPDF& qpdf, + std::string const& filename, + QPDFEFStreamObjectHelper efsoh) +{ + auto oh = qpdf.makeIndirectObject(QPDFObjectHandle::newDictionary()); + oh.replaceKey("/Type", QPDFObjectHandle::newName("/Filespec")); + QPDFFileSpecObjectHelper result(oh); + result.setFilename(filename); + auto ef = QPDFObjectHandle::newDictionary(); + ef.replaceKey("/F", efsoh.getObjectHandle()); + ef.replaceKey("/UF", efsoh.getObjectHandle()); + oh.replaceKey("/EF", ef); + return result; +} + +QPDFFileSpecObjectHelper& +QPDFFileSpecObjectHelper::setDescription(std::string const& desc) +{ + this->oh.replaceKey("/Desc", QPDFObjectHandle::newUnicodeString(desc)); + return *this; +} + +QPDFFileSpecObjectHelper& +QPDFFileSpecObjectHelper::setFilename( + std::string const& unicode_name, + std::string const& compat_name) +{ + auto uf = QPDFObjectHandle::newUnicodeString(unicode_name); + this->oh.replaceKey("/UF", uf); + if (compat_name.empty()) + { + QTC::TC("qpdf", "QPDFFileSpecObjectHelper empty compat_name"); + this->oh.replaceKey("/F", uf); + } + else + { + QTC::TC("qpdf", "QPDFFileSpecObjectHelper non-empty compat_name"); + this->oh.replaceKey("/F", QPDFObjectHandle::newString(compat_name)); + } + return *this; +} diff --git a/libqpdf/build.mk b/libqpdf/build.mk index ca15611a..f453e58e 100644 --- a/libqpdf/build.mk +++ b/libqpdf/build.mk @@ -58,7 +58,10 @@ SRCS_libqpdf = \ libqpdf/QPDFAcroFormDocumentHelper.cc \ libqpdf/QPDFAnnotationObjectHelper.cc \ libqpdf/QPDFCryptoProvider.cc \ + libqpdf/QPDFEFStreamObjectHelper.cc \ + libqpdf/QPDFEmbeddedFileDocumentHelper.cc \ libqpdf/QPDFExc.cc \ + libqpdf/QPDFFileSpecObjectHelper.cc \ libqpdf/QPDFFormFieldObjectHelper.cc \ libqpdf/QPDFMatrix.cc \ libqpdf/QPDFNameTreeObjectHelper.cc \ diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml index 05526958..174883a7 100644 --- a/manual/qpdf-manual.xml +++ b/manual/qpdf-manual.xml @@ -4949,6 +4949,16 @@ print "\n"; working with PDF timestamp strings. + + + Add new helper classes for supporting file attachments, also + known as embedded files. New classes are + QPDFEmbeddedFileDocumentHelper, + QPDFFileSpecObjectHelper, and + QPDFEFStreamObjectHelper. See their + respective headers for details. + + Add warn to diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index 20015780..a2d2b71f 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -569,3 +569,5 @@ QPDFPageObjectHelper unresolved names 0 QPDFPageObjectHelper resolving unresolved 0 qpdf password stdin 0 qpdf password file 0 +QPDFFileSpecObjectHelper empty compat_name 0 +QPDFFileSpecObjectHelper non-empty compat_name 0 diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 45600db9..2412f6d4 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -520,6 +520,29 @@ $td->runtest("page operations on form xobject", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); +show_ntests(); +# ---------- +$td->notify("--- File Attachments ---"); +$n_tests += 4; + +open(F, ">auto-txt") or die; +print F "from file"; +close(F); +$td->runtest("attachments", + {$td->COMMAND => "test_driver 76 minimal.pdf auto-txt"}, + {$td->FILE => "test76.out", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "test76.pdf"}); +$td->runtest("attachments", + {$td->COMMAND => "test_driver 77 test76.pdf"}, + {$td->STRING => "test 77 done\n", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "test77.pdf"}); + show_ntests(); # ---------- $td->notify("--- Stream Replacement Tests ---"); diff --git a/qpdf/qtest/qpdf/test76.out b/qpdf/qtest/qpdf/test76.out new file mode 100644 index 00000000..4264a737 --- /dev/null +++ b/qpdf/qtest/qpdf/test76.out @@ -0,0 +1,4 @@ +att1 -> att1.txt +att2 -> att2.txt +att3 -> Ï€.txt +test 76 done diff --git a/qpdf/qtest/qpdf/test76.pdf b/qpdf/qtest/qpdf/test76.pdf new file mode 100644 index 00000000..d81d03f7 --- /dev/null +++ b/qpdf/qtest/qpdf/test76.pdf @@ -0,0 +1,233 @@ +%PDF-1.3 +%¿÷¢þ +%QDF-1.0 + +%% Original object ID: 1 0 +1 0 obj +<< + /Names << + /EmbeddedFiles 2 0 R + >> + /Pages 3 0 R + /Type /Catalog +>> +endobj + +%% Original object ID: 9 0 +2 0 obj +<< + /Names [ + (att1) + 4 0 R + (att2) + 5 0 R + (att3) + 6 0 R + ] +>> +endobj + +%% Original object ID: 2 0 +3 0 obj +<< + /Count 1 + /Kids [ + 7 0 R + ] + /Type /Pages +>> +endobj + +%% Original object ID: 8 0 +4 0 obj +<< + /Desc (some text) + /EF << + /F 8 0 R + /UF 8 0 R + >> + /F (att1.txt) + /Type /Filespec + /UF (att1.txt) +>> +endobj + +%% Original object ID: 12 0 +5 0 obj +<< + /EF << + /F 10 0 R + /UF 10 0 R + >> + /F (att2.txt) + /Type /Filespec + /UF (att2.txt) +>> +endobj + +%% Original object ID: 13 0 +6 0 obj +<< + /EF << + /F 12 0 R + /UF 12 0 R + >> + /F (att3.txt) + /Type /Filespec + /UF +>> +endobj + +%% Page 1 +%% Original object ID: 3 0 +7 0 obj +<< + /Contents 14 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 3 0 R + /Resources << + /Font << + /F1 16 0 R + >> + /ProcSet 17 0 R + >> + /Type /Page +>> +endobj + +%% Original object ID: 7 0 +8 0 obj +<< + /Params << + /CheckSum <2e10f186a4cdf5be438747f4bdc2d4d4> + /CreationDate (D:20210207191121-05'00') + /ModDate (D:20210208001122Z) + /Size 9 + /Subtype /text#2fplain + >> + /Type /EmbeddedFile + /Length 9 0 R +>> +stream +from file +endstream +endobj + +%QDF: ignore_newline +9 0 obj +9 +endobj + +%% Original object ID: 10 0 +10 0 obj +<< + /Params << + /CheckSum <2fce9c8228e360ba9b04a1bd1bf63d6b> + /Size 11 + /Subtype /text#2fplain + >> + /Type /EmbeddedFile + /Length 11 0 R +>> +stream +from string +endstream +endobj + +%QDF: ignore_newline +11 0 obj +11 +endobj + +%% Original object ID: 11 0 +12 0 obj +<< + /Params << + /CheckSum <2236c155b1d62b7f00285bba081d4336> + /Size 11 + /Subtype /text#2fplain + >> + /Type /EmbeddedFile + /Length 13 0 R +>> +stream +from buffer +endstream +endobj + +%QDF: ignore_newline +13 0 obj +11 +endobj + +%% Contents for page 1 +%% Original object ID: 4 0 +14 0 obj +<< + /Length 15 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +15 0 obj +44 +endobj + +%% Original object ID: 6 0 +16 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 5 0 +17 0 obj +[ + /PDF + /Text +] +endobj + +xref +0 18 +0000000000 65535 f +0000000052 00000 n +0000000175 00000 n +0000000302 00000 n +0000000401 00000 n +0000000563 00000 n +0000000707 00000 n +0000000876 00000 n +0000001098 00000 n +0000001389 00000 n +0000001435 00000 n +0000001654 00000 n +0000001702 00000 n +0000001921 00000 n +0000001991 00000 n +0000002092 00000 n +0000002139 00000 n +0000002285 00000 n +trailer << + /Root 1 0 R + /Size 18 + /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] +>> +startxref +2321 +%%EOF diff --git a/qpdf/qtest/qpdf/test77.pdf b/qpdf/qtest/qpdf/test77.pdf new file mode 100644 index 00000000..4aa01abf --- /dev/null +++ b/qpdf/qtest/qpdf/test77.pdf @@ -0,0 +1,194 @@ +%PDF-1.3 +%¿÷¢þ +%QDF-1.0 + +%% Original object ID: 1 0 +1 0 obj +<< + /Names << + /EmbeddedFiles 2 0 R + >> + /Pages 3 0 R + /Type /Catalog +>> +endobj + +%% Original object ID: 2 0 +2 0 obj +<< + /Names [ + (att1) + 4 0 R + (att3) + 5 0 R + ] +>> +endobj + +%% Original object ID: 3 0 +3 0 obj +<< + /Count 1 + /Kids [ + 6 0 R + ] + /Type /Pages +>> +endobj + +%% Original object ID: 4 0 +4 0 obj +<< + /Desc (some text) + /EF << + /F 7 0 R + /UF 7 0 R + >> + /F (att1.txt) + /Type /Filespec + /UF (att1.txt) +>> +endobj + +%% Original object ID: 6 0 +5 0 obj +<< + /EF << + /F 9 0 R + /UF 9 0 R + >> + /F (att3.txt) + /Type /Filespec + /UF +>> +endobj + +%% Page 1 +%% Original object ID: 7 0 +6 0 obj +<< + /Contents 11 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 3 0 R + /Resources << + /Font << + /F1 13 0 R + >> + /ProcSet 14 0 R + >> + /Type /Page +>> +endobj + +%% Original object ID: 8 0 +7 0 obj +<< + /Params << + /CheckSum <2e10f186a4cdf5be438747f4bdc2d4d4> + /CreationDate (D:20210207191121-05'00') + /ModDate (D:20210208001122Z) + /Size 9 + /Subtype /text#2fplain + >> + /Type /EmbeddedFile + /Length 8 0 R +>> +stream +from file +endstream +endobj + +%QDF: ignore_newline +8 0 obj +9 +endobj + +%% Original object ID: 12 0 +9 0 obj +<< + /Params << + /CheckSum <2236c155b1d62b7f00285bba081d4336> + /Size 11 + /Subtype /text#2fplain + >> + /Type /EmbeddedFile + /Length 10 0 R +>> +stream +from buffer +endstream +endobj + +%QDF: ignore_newline +10 0 obj +11 +endobj + +%% Contents for page 1 +%% Original object ID: 14 0 +11 0 obj +<< + /Length 12 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +12 0 obj +44 +endobj + +%% Original object ID: 16 0 +13 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 17 0 +14 0 obj +[ + /PDF + /Text +] +endobj + +xref +0 15 +0000000000 65535 f +0000000052 00000 n +0000000175 00000 n +0000000281 00000 n +0000000380 00000 n +0000000541 00000 n +0000000708 00000 n +0000000930 00000 n +0000001221 00000 n +0000001267 00000 n +0000001485 00000 n +0000001556 00000 n +0000001657 00000 n +0000001705 00000 n +0000001852 00000 n +trailer << + /Root 1 0 R + /Size 15 + /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] +>> +startxref +1888 +%%EOF diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc index 6cb5fc80..e7e12834 100644 --- a/qpdf/test_driver.cc +++ b/qpdf/test_driver.cc @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -2716,6 +2717,67 @@ void runtest(int n, char const* filename1, char const* arg2) w.setQDFMode(true); w.write(); } + else if (n == 76) + { + // Embedded files. arg2 is a file to attach. Hard-code the + // mime type and file name for test purposes. + QPDFEmbeddedFileDocumentHelper efdh(pdf); + auto fs1 = QPDFFileSpecObjectHelper::createFileSpec( + pdf, "att1.txt", arg2); + fs1.setDescription("some text"); + auto efs1 = QPDFEFStreamObjectHelper(fs1.getEmbeddedFileStream()); + efs1.setSubtype("text/plain") + .setCreationDate("D:20210207191121-05'00'") + .setModDate("D:20210208001122Z"); + efdh.replaceEmbeddedFile("att1", fs1); + auto efs2 = QPDFEFStreamObjectHelper::createEFStream( + pdf, "from string"); + efs2.setSubtype("text/plain"); + Pl_Buffer p("buffer"); + p.write(QUtil::unsigned_char_pointer("from buffer"), 11); + p.finish(); + auto efs3 = QPDFEFStreamObjectHelper::createEFStream( + pdf, p.getBuffer()); + efs3.setSubtype("text/plain"); + efdh.replaceEmbeddedFile( + "att2", QPDFFileSpecObjectHelper::createFileSpec( + pdf, "att2.txt", efs2)); + auto fs3 = QPDFFileSpecObjectHelper::createFileSpec( + pdf, "att3.txt", efs3); + efdh.replaceEmbeddedFile("att3", fs3); + fs3.setFilename("\xcf\x80.txt", "att3.txt"); + + assert(efs1.getCreationDate() == "D:20210207191121-05'00'"); + assert(efs1.getModDate() == "D:20210208001122Z"); + assert(efs2.getSize() == 11); + assert(efs2.getSubtype() == "text/plain"); + assert(QUtil::hex_encode(efs2.getChecksum()) == + "2fce9c8228e360ba9b04a1bd1bf63d6b"); + + for (auto iter: efdh.getEmbeddedFiles()) + { + std::cout << iter.first << " -> " << iter.second->getFilename() + << std::endl; + } + assert(efdh.getEmbeddedFile("att1")->getFilename() == "att1.txt"); + assert(! efdh.getEmbeddedFile("potato")); + + QPDFWriter w(pdf, "a.pdf"); + w.setStaticID(true); + w.setQDFMode(true); + w.write(); + } + else if (n == 77) + { + QPDFEmbeddedFileDocumentHelper efdh(pdf); + assert(efdh.removeEmbeddedFile("att2")); + assert(! efdh.removeEmbeddedFile("att2")); + + QPDFWriter w(pdf, "a.pdf"); + w.setStaticID(true); + w.setQDFMode(true); + w.write(); + } else { throw std::runtime_error(std::string("invalid test ") +