diff --git a/ChangeLog b/ChangeLog index aa8842ce..0c9cb388 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,10 @@ 2022-04-23 Jay Berkenbilt + * Add a new QPDF::warn method that takes the parameters of + QPDFExc's constructor except for the filename, which is taken from + the QPDF object. This is a shorter way to issue warnings on behalf + of a QPDF object. + * Add new method QUtil::is_explicit_utf8 that tests whether a string is explicitly marked as being UTF-8 encoded, as allowed by the PDF 2.0 spec. Such a string starts with the bytes 0xEF 0xBB diff --git a/TODO b/TODO index 51de2d74..e7dbc01a 100644 --- a/TODO +++ b/TODO @@ -21,8 +21,6 @@ Misc --show-encryption could potentially retry with this option if the first time doesn't work. Then, with the file open, we can read the encryption dictionary normally. -* Have a warn in QPDF that passes its variable arguments onto QPDFExc - so you don't have to do warn(QPDFExc(...)) * Find all places in the code that write to std::cout, std::err, stdout, or stderr to make sure they obey default output stream settings for QPDF and QPDFJob. This probably includes adding a diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index e704c2f8..bbbb896e 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -262,6 +262,16 @@ class QPDF // rules, and it will be available with getWarnings(). QPDF_DLL void warn(QPDFExc const& e); + // Same as above but creates the QPDFExc object using the + // arguments passed to warn. The filename argument to QPDFExc is + // omitted. This method uses the filename associated with the QPDF + // object. + QPDF_DLL + void warn( + qpdf_error_code_e error_code, + std::string const& object, + qpdf_offset_t offset, + std::string const& message); QPDF_DLL std::string getFilename() const; diff --git a/libqpdf/NNTree.cc b/libqpdf/NNTree.cc index 985a3cb2..9d4e8ec7 100644 --- a/libqpdf/NNTree.cc +++ b/libqpdf/NNTree.cc @@ -18,14 +18,7 @@ get_description(QPDFObjectHandle& node) static void warn(QPDF& qpdf, QPDFObjectHandle& node, std::string const& msg) { - qpdf.warn( - // line-break - QPDFExc( - qpdf_e_damaged_pdf, - qpdf.getFilename(), - get_description(node), - 0, - msg)); + qpdf.warn(qpdf_e_damaged_pdf, get_description(node), 0, msg); } static void diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index abf51333..309e3461 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -459,12 +459,7 @@ QPDF::parse(char const* password) PatternFinder hf(*this, &QPDF::findHeader); if (!this->m->file->findFirst("%PDF-", 0, 1024, hf)) { QTC::TC("qpdf", "QPDF not a pdf file"); - warn(QPDFExc( - qpdf_e_damaged_pdf, - this->m->file->getName(), - "", - 0, - "can't find PDF header")); + warn(qpdf_e_damaged_pdf, "", 0, "can't find PDF header"); // QPDFWriter writes files that usually require at least // version 1.2 for /FlateDecode this->m->pdf_version = "1.2"; @@ -541,6 +536,16 @@ QPDF::warn(QPDFExc const& e) } } +void +QPDF::warn( + qpdf_error_code_e error_code, + std::string const& object, + qpdf_offset_t offset, + std::string const& message) +{ + warn(QPDFExc(error_code, this->getFilename(), object, offset, message)); +} + void QPDF::setTrailer(QPDFObjectHandle obj) { @@ -562,19 +567,13 @@ QPDF::reconstruct_xref(QPDFExc& e) this->m->reconstructed_xref = true; - warn(QPDFExc( - qpdf_e_damaged_pdf, - this->m->file->getName(), - "", - 0, - "file is damaged")); + warn(qpdf_e_damaged_pdf, "", 0, "file is damaged"); warn(e); - warn(QPDFExc( + warn( qpdf_e_damaged_pdf, - this->m->file->getName(), "", 0, - "Attempting to reconstruct cross-reference table")); + "Attempting to reconstruct cross-reference table"); // Delete all references to type 1 (uncompressed) objects std::set to_delete; @@ -700,12 +699,11 @@ QPDF::read_xref(qpdf_offset_t xref_offset) if ((strncmp(buf, "xref", 4) == 0) && QUtil::is_space(buf[4])) { if (skipped_space) { QTC::TC("qpdf", "QPDF xref skipped space"); - warn(QPDFExc( + warn( qpdf_e_damaged_pdf, - this->m->file->getName(), "", 0, - "extraneous whitespace seen before xref")); + "extraneous whitespace seen before xref"); } QTC::TC( "qpdf", @@ -753,15 +751,14 @@ QPDF::read_xref(qpdf_offset_t xref_offset) } if ((size < 1) || (size - 1 != max_obj)) { QTC::TC("qpdf", "QPDF xref size mismatch"); - warn(QPDFExc( + warn( qpdf_e_damaged_pdf, - this->m->file->getName(), "", 0, (std::string("reported number of objects (") + QUtil::int_to_string(size) + ") is not one plus the highest object number (" + - QUtil::int_to_string(max_obj) + ")"))); + QUtil::int_to_string(max_obj) + ")")); } // We no longer need the deleted_objects table, so go ahead and @@ -885,12 +882,11 @@ QPDF::parse_xrefEntry( } if (invalid) { - warn(QPDFExc( + warn( qpdf_e_damaged_pdf, - this->m->file->getName(), "xref table", this->m->file->getLastOffset(), - "accepting invalid xref table entry")); + "accepting invalid xref table entry"); } f1 = QUtil::string_to_ll(f1_str.c_str()); @@ -1813,12 +1809,11 @@ QPDF::readObjectAtOffset( // ignore these. if (offset == 0) { QTC::TC("qpdf", "QPDF bogus 0 offset", 0); - warn(QPDFExc( + warn( qpdf_e_damaged_pdf, - this->m->file->getName(), this->m->last_object_description, 0, - "object has offset 0")); + "object has offset 0"); return QPDFObjectHandle::newNull(); } @@ -1898,16 +1893,15 @@ QPDF::readObjectAtOffset( return result; } else { QTC::TC("qpdf", "QPDF object gone after xref reconstruction"); - warn(QPDFExc( + warn( qpdf_e_damaged_pdf, - this->m->file->getName(), "", 0, std::string( "object " + QUtil::int_to_string(exp_objid) + " " + QUtil::int_to_string(exp_generation) + " not found in file after regenerating" - " cross reference table"))); + " cross reference table")); return QPDFObjectHandle::newNull(); } } else { @@ -1921,12 +1915,11 @@ QPDF::readObjectAtOffset( if (!(readToken(this->m->file) == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "endobj"))) { QTC::TC("qpdf", "QPDF err expected endobj"); - warn(QPDFExc( + warn( qpdf_e_damaged_pdf, - this->m->file->getName(), this->m->last_object_description, this->m->file->getLastOffset(), - "expected endobj")); + "expected endobj"); } QPDFObjGen og(objid, generation); @@ -2005,13 +1998,12 @@ QPDF::resolve(int objid, int generation) // indirectly in some key that has to be resolved during // object parsing, such as stream length. QTC::TC("qpdf", "QPDF recursion loop in resolve"); - warn(QPDFExc( + warn( qpdf_e_damaged_pdf, - this->m->file->getName(), "", this->m->file->getLastOffset(), ("loop detected resolving object " + QUtil::int_to_string(objid) + - " " + QUtil::int_to_string(generation)))); + " " + QUtil::int_to_string(generation))); return std::shared_ptr(new QPDF_Null); } ResolveRecorder rr(this, og); @@ -2054,14 +2046,13 @@ QPDF::resolve(int objid, int generation) } catch (QPDFExc& e) { warn(e); } catch (std::exception& e) { - warn(QPDFExc( + warn( qpdf_e_damaged_pdf, - this->m->file->getName(), "", 0, ("object " + QUtil::int_to_string(objid) + "/" + QUtil::int_to_string(generation) + - ": error reading object: " + e.what()))); + ": error reading object: " + e.what())); } } if (this->m->obj_cache.count(og) == 0) { @@ -2112,13 +2103,12 @@ QPDF::resolveObjectsInStream(int obj_stream_number) QPDFObjectHandle dict = obj_stream.getDict(); if (!dict.isDictionaryOfType("/ObjStm")) { QTC::TC("qpdf", "QPDF ERR object stream with wrong type"); - warn(QPDFExc( + warn( qpdf_e_damaged_pdf, - this->m->file->getName(), this->m->last_object_description, this->m->file->getLastOffset(), ("supposed object stream " + - QUtil::int_to_string(obj_stream_number) + " has wrong type"))); + QUtil::int_to_string(obj_stream_number) + " has wrong type")); } if (!(dict.getKey("/N").isInteger() && dict.getKey("/First").isInteger())) { diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc index af7a7c38..692e6420 100644 --- a/libqpdf/QPDFJob.cc +++ b/libqpdf/QPDFJob.cc @@ -2677,14 +2677,13 @@ QPDFJob::handlePageSpecs( *other_afdh, &referenced_fields); } catch (std::exception& e) { - pdf.warn(QPDFExc( + pdf.warn( qpdf_e_damaged_pdf, - pdf.getFilename(), "", 0, ("Exception caught while fixing copied" " annotations. This may be a qpdf bug. " + - std::string("Exception: ") + e.what()))); + std::string("Exception: ") + e.what())); } } } @@ -3127,14 +3126,13 @@ QPDFJob::doSplitPages(QPDF& pdf, bool& warnings) try { out_afdh->fixCopiedAnnotations(new_page, page, afdh); } catch (std::exception& e) { - pdf.warn(QPDFExc( + pdf.warn( qpdf_e_damaged_pdf, - pdf.getFilename(), "", 0, - "Exception caught while fixing copied" - " annotations. This may be a qpdf bug." + - std::string("Exception: ") + e.what())); + ("Exception caught while fixing copied" + " annotations. This may be a qpdf bug." + + std::string("Exception: ") + e.what())); } } } diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 8b31c3b6..9fadbef3 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -1556,8 +1556,9 @@ QPDFObjectHandle::arrayOrStreamToStreamArray( item.getOwningQPDF(), QPDFExc( qpdf_e_damaged_pdf, - description, - "item index " + QUtil::int_to_string(i) + " (from 0)", + "", + description + ": item index " + + QUtil::int_to_string(i) + " (from 0)", 0, "ignoring non-stream in an array of streams")); } diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc index cf52532b..c36b7725 100644 --- a/libqpdf/QPDF_Stream.cc +++ b/libqpdf/QPDF_Stream.cc @@ -302,12 +302,10 @@ QPDF_Stream::filterable( if (!filters_okay) { QTC::TC("qpdf", "QPDF_Stream invalid filter"); - warn(QPDFExc( + warn( qpdf_e_damaged_pdf, - qpdf->getFilename(), - "", this->offset, - "stream filter type is not name or array")); + "stream filter type is not name or array"); return false; } @@ -355,13 +353,11 @@ QPDF_Stream::filterable( // one case of a file whose /DecodeParms was [ << >> ] when // /Filters was empty has been seen in the wild. if ((filters.size() != 0) && (decode_parms.size() != filters.size())) { - warn(QPDFExc( + warn( qpdf_e_damaged_pdf, - qpdf->getFilename(), - "", this->offset, "stream /DecodeParms length is" - " inconsistent with filters")); + " inconsistent with filters"); filterable = false; } @@ -474,12 +470,7 @@ QPDF_Stream::pipeStreamData( Pl_Flate* flate = dynamic_cast(pipeline); if (flate != nullptr) { flate->setWarnCallback([this](char const* msg, int code) { - warn(QPDFExc( - qpdf_e_damaged_pdf, - qpdf->getFilename(), - "", - this->offset, - msg)); + warn(qpdf_e_damaged_pdf, this->offset, msg); }); } } @@ -551,34 +542,28 @@ QPDF_Stream::pipeStreamData( if (filter && (!suppress_warnings) && normalizer.get() && normalizer->anyBadTokens()) { - warn(QPDFExc( + warn( qpdf_e_damaged_pdf, - qpdf->getFilename(), - "", this->offset, - "content normalization encountered bad tokens")); + "content normalization encountered bad tokens"); if (normalizer->lastTokenWasBad()) { QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize"); - warn(QPDFExc( + warn( qpdf_e_damaged_pdf, - qpdf->getFilename(), - "", this->offset, "normalized content ended with a bad token;" " you may be able to resolve this by" " coalescing content streams in combination" " with normalizing content. From the command" - " line, specify --coalesce-contents")); + " line, specify --coalesce-contents"); } - warn(QPDFExc( + warn( qpdf_e_damaged_pdf, - qpdf->getFilename(), - "", this->offset, "Resulting stream data may be corrupted but is" " may still useful for manual inspection." " For more information on this warning, search" - " for content normalization in the manual.")); + " for content normalization in the manual."); } return success; @@ -645,7 +630,10 @@ QPDF_Stream::replaceDict(QPDFObjectHandle new_dict) } void -QPDF_Stream::warn(QPDFExc const& e) +QPDF_Stream::warn( + qpdf_error_code_e error_code, + qpdf_offset_t offset, + std::string const& message) { - this->qpdf->warn(e); + this->qpdf->warn(error_code, "", offset, message); } diff --git a/libqpdf/QPDF_encryption.cc b/libqpdf/QPDF_encryption.cc index 8166fc0d..00920082 100644 --- a/libqpdf/QPDF_encryption.cc +++ b/libqpdf/QPDF_encryption.cc @@ -803,12 +803,11 @@ QPDF::initializeEncryption() // Treating a missing ID as the empty string enables qpdf to // decrypt some invalid encrypted files with no /ID that // poppler can read but Adobe Reader can't. - warn(QPDFExc( + warn( qpdf_e_damaged_pdf, - this->m->file->getName(), "trailer", this->m->file->getLastOffset(), - "invalid /ID in trailer dictionary")); + "invalid /ID in trailer dictionary"); } QPDFObjectHandle encryption_dict = this->m->trailer.getKey("/Encrypt"); @@ -831,13 +830,12 @@ QPDF::initializeEncryption() "unsupported encryption filter"); } if (!encryption_dict.getKey("/SubFilter").isNull()) { - warn(QPDFExc( + warn( qpdf_e_unsupported, - this->m->file->getName(), "encryption dictionary", this->m->file->getLastOffset(), "file uses encryption SubFilters," - " which qpdf does not support")); + " which qpdf does not support"); } if (!(encryption_dict.getKey("/V").isInteger() && @@ -1067,13 +1065,12 @@ QPDF::initializeEncryption() this->m->encp->encryption_key = recover_encryption_key_with_password( this->m->encp->provided_password, data, perms_valid); if (!perms_valid) { - warn(QPDFExc( + warn( qpdf_e_damaged_pdf, - this->m->file->getName(), "encryption dictionary", this->m->file->getLastOffset(), "/Perms field in encryption dictionary" - " doesn't match expected value")); + " doesn't match expected value"); } } } @@ -1130,14 +1127,13 @@ QPDF::decryptString(std::string& str, int objid, int generation) break; default: - warn(QPDFExc( + warn( qpdf_e_damaged_pdf, - this->m->file->getName(), this->m->last_object_description, this->m->file->getLastOffset(), "unknown encryption filter for strings" " (check /StrF in /Encrypt dictionary);" - " strings may be decrypted improperly")); + " strings may be decrypted improperly"); // To avoid repeated warnings, reset cf_string. Assume // we'd want to use AES if V == 4. this->m->encp->cf_string = e_aes; diff --git a/libqpdf/QPDF_optimization.cc b/libqpdf/QPDF_optimization.cc index c76e255d..0c453126 100644 --- a/libqpdf/QPDF_optimization.cc +++ b/libqpdf/QPDF_optimization.cc @@ -266,15 +266,14 @@ QPDF::pushInheritedAttributesToPageInternal( "Pages object", cur_pages.getObjectID(), cur_pages.getGeneration()); - warn(QPDFExc( + warn( qpdf_e_pages, - this->m->file->getName(), this->m->last_object_description, 0, - "Unknown key " + key + - " in /Pages object" - " is being discarded as a result of" - " flattening the /Pages tree")); + ("Unknown key " + key + + " in /Pages object" + " is being discarded as a result of" + " flattening the /Pages tree")); } } } diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc index 065be7e7..c2b2dd1a 100644 --- a/libqpdf/QPDF_pages.cc +++ b/libqpdf/QPDF_pages.cc @@ -130,12 +130,11 @@ QPDF::getAllPagesInternal( } if (!cur_node.isDictionaryOfType(wanted_type)) { - warn(QPDFExc( + warn( qpdf_e_damaged_pdf, - this->m->file->getName(), "page tree node", this->m->file->getLastOffset(), - "/Type key should be " + wanted_type + " but is not; overriding")); + "/Type key should be " + wanted_type + " but is not; overriding"); cur_node.replaceKey("/Type", QPDFObjectHandle::newName(wanted_type)); } visited.erase(this_og); diff --git a/libqpdf/qpdf/QPDF_Stream.hh b/libqpdf/qpdf/QPDF_Stream.hh index 4df83d29..a6ee9551 100644 --- a/libqpdf/qpdf/QPDF_Stream.hh +++ b/libqpdf/qpdf/QPDF_Stream.hh @@ -90,7 +90,10 @@ class QPDF_Stream: public QPDFObject std::vector>& filters, bool& specialized_compression, bool& lossy_compression); - void warn(QPDFExc const& e); + void warn( + qpdf_error_code_e error_code, + qpdf_offset_t offset, + std::string const& message); void setDictDescription(); void setStreamDescription(); diff --git a/qpdf/qtest/qpdf/split-content-stream-errors.out b/qpdf/qtest/qpdf/split-content-stream-errors.out index 1eebc495..1321927e 100644 --- a/qpdf/qtest/qpdf/split-content-stream-errors.out +++ b/qpdf/qtest/qpdf/split-content-stream-errors.out @@ -4,7 +4,7 @@ File is not encrypted File is not linearized WARNING: split-content-stream-errors.pdf (offset 557): error decoding stream data for object 6 0: LZWDecoder: bad code received WARNING: split-content-stream-errors.pdf (offset 557): stream will be re-processed without filtering to avoid data loss -WARNING: page object 3 0 (item index 0 (from 0)): ignoring non-stream in an array of streams +WARNING: page object 3 0: item index 0 (from 0): ignoring non-stream in an array of streams WARNING: split-content-stream-errors.pdf (offset 557): error decoding stream data for object 6 0: LZWDecoder: bad code received ERROR: page 1: content stream (content stream object 6 0): errors while decoding content stream qpdf: errors detected