Add new QPDF::warn that takes most of QPDFExc's arguments

This commit is contained in:
Jay Berkenbilt 2022-04-23 18:03:44 -04:00
parent 696ca53205
commit 68e721981a
13 changed files with 93 additions and 113 deletions

View File

@ -1,5 +1,10 @@
2022-04-23 Jay Berkenbilt <ejb@ql.org>
* Add a new QPDF::warn method that takes the parameters of
QPDFExc's constructor except for the filename, which is taken from
the QPDF object. This is a shorter way to issue warnings on behalf
of a QPDF object.
* Add new method QUtil::is_explicit_utf8 that tests whether a
string is explicitly marked as being UTF-8 encoded, as allowed by
the PDF 2.0 spec. Such a string starts with the bytes 0xEF 0xBB

2
TODO
View File

@ -21,8 +21,6 @@ Misc
--show-encryption could potentially retry with this option if the
first time doesn't work. Then, with the file open, we can read the
encryption dictionary normally.
* Have a warn in QPDF that passes its variable arguments onto QPDFExc
so you don't have to do warn(QPDFExc(...))
* Find all places in the code that write to std::cout, std::err,
stdout, or stderr to make sure they obey default output stream
settings for QPDF and QPDFJob. This probably includes adding a

View File

@ -262,6 +262,16 @@ class QPDF
// rules, and it will be available with getWarnings().
QPDF_DLL
void warn(QPDFExc const& e);
// Same as above but creates the QPDFExc object using the
// arguments passed to warn. The filename argument to QPDFExc is
// omitted. This method uses the filename associated with the QPDF
// object.
QPDF_DLL
void warn(
qpdf_error_code_e error_code,
std::string const& object,
qpdf_offset_t offset,
std::string const& message);
QPDF_DLL
std::string getFilename() const;

View File

@ -18,14 +18,7 @@ get_description(QPDFObjectHandle& node)
static void
warn(QPDF& qpdf, QPDFObjectHandle& node, std::string const& msg)
{
qpdf.warn(
// line-break
QPDFExc(
qpdf_e_damaged_pdf,
qpdf.getFilename(),
get_description(node),
0,
msg));
qpdf.warn(qpdf_e_damaged_pdf, get_description(node), 0, msg);
}
static void

View File

@ -459,12 +459,7 @@ QPDF::parse(char const* password)
PatternFinder hf(*this, &QPDF::findHeader);
if (!this->m->file->findFirst("%PDF-", 0, 1024, hf)) {
QTC::TC("qpdf", "QPDF not a pdf file");
warn(QPDFExc(
qpdf_e_damaged_pdf,
this->m->file->getName(),
"",
0,
"can't find PDF header"));
warn(qpdf_e_damaged_pdf, "", 0, "can't find PDF header");
// QPDFWriter writes files that usually require at least
// version 1.2 for /FlateDecode
this->m->pdf_version = "1.2";
@ -541,6 +536,16 @@ QPDF::warn(QPDFExc const& e)
}
}
void
QPDF::warn(
qpdf_error_code_e error_code,
std::string const& object,
qpdf_offset_t offset,
std::string const& message)
{
warn(QPDFExc(error_code, this->getFilename(), object, offset, message));
}
void
QPDF::setTrailer(QPDFObjectHandle obj)
{
@ -562,19 +567,13 @@ QPDF::reconstruct_xref(QPDFExc& e)
this->m->reconstructed_xref = true;
warn(QPDFExc(
qpdf_e_damaged_pdf,
this->m->file->getName(),
"",
0,
"file is damaged"));
warn(qpdf_e_damaged_pdf, "", 0, "file is damaged");
warn(e);
warn(QPDFExc(
warn(
qpdf_e_damaged_pdf,
this->m->file->getName(),
"",
0,
"Attempting to reconstruct cross-reference table"));
"Attempting to reconstruct cross-reference table");
// Delete all references to type 1 (uncompressed) objects
std::set<QPDFObjGen> to_delete;
@ -700,12 +699,11 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
if ((strncmp(buf, "xref", 4) == 0) && QUtil::is_space(buf[4])) {
if (skipped_space) {
QTC::TC("qpdf", "QPDF xref skipped space");
warn(QPDFExc(
warn(
qpdf_e_damaged_pdf,
this->m->file->getName(),
"",
0,
"extraneous whitespace seen before xref"));
"extraneous whitespace seen before xref");
}
QTC::TC(
"qpdf",
@ -753,15 +751,14 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
}
if ((size < 1) || (size - 1 != max_obj)) {
QTC::TC("qpdf", "QPDF xref size mismatch");
warn(QPDFExc(
warn(
qpdf_e_damaged_pdf,
this->m->file->getName(),
"",
0,
(std::string("reported number of objects (") +
QUtil::int_to_string(size) +
") is not one plus the highest object number (" +
QUtil::int_to_string(max_obj) + ")")));
QUtil::int_to_string(max_obj) + ")"));
}
// We no longer need the deleted_objects table, so go ahead and
@ -885,12 +882,11 @@ QPDF::parse_xrefEntry(
}
if (invalid) {
warn(QPDFExc(
warn(
qpdf_e_damaged_pdf,
this->m->file->getName(),
"xref table",
this->m->file->getLastOffset(),
"accepting invalid xref table entry"));
"accepting invalid xref table entry");
}
f1 = QUtil::string_to_ll(f1_str.c_str());
@ -1813,12 +1809,11 @@ QPDF::readObjectAtOffset(
// ignore these.
if (offset == 0) {
QTC::TC("qpdf", "QPDF bogus 0 offset", 0);
warn(QPDFExc(
warn(
qpdf_e_damaged_pdf,
this->m->file->getName(),
this->m->last_object_description,
0,
"object has offset 0"));
"object has offset 0");
return QPDFObjectHandle::newNull();
}
@ -1898,16 +1893,15 @@ QPDF::readObjectAtOffset(
return result;
} else {
QTC::TC("qpdf", "QPDF object gone after xref reconstruction");
warn(QPDFExc(
warn(
qpdf_e_damaged_pdf,
this->m->file->getName(),
"",
0,
std::string(
"object " + QUtil::int_to_string(exp_objid) + " " +
QUtil::int_to_string(exp_generation) +
" not found in file after regenerating"
" cross reference table")));
" cross reference table"));
return QPDFObjectHandle::newNull();
}
} else {
@ -1921,12 +1915,11 @@ QPDF::readObjectAtOffset(
if (!(readToken(this->m->file) ==
QPDFTokenizer::Token(QPDFTokenizer::tt_word, "endobj"))) {
QTC::TC("qpdf", "QPDF err expected endobj");
warn(QPDFExc(
warn(
qpdf_e_damaged_pdf,
this->m->file->getName(),
this->m->last_object_description,
this->m->file->getLastOffset(),
"expected endobj"));
"expected endobj");
}
QPDFObjGen og(objid, generation);
@ -2005,13 +1998,12 @@ QPDF::resolve(int objid, int generation)
// indirectly in some key that has to be resolved during
// object parsing, such as stream length.
QTC::TC("qpdf", "QPDF recursion loop in resolve");
warn(QPDFExc(
warn(
qpdf_e_damaged_pdf,
this->m->file->getName(),
"",
this->m->file->getLastOffset(),
("loop detected resolving object " + QUtil::int_to_string(objid) +
" " + QUtil::int_to_string(generation))));
" " + QUtil::int_to_string(generation)));
return std::shared_ptr<QPDFObject>(new QPDF_Null);
}
ResolveRecorder rr(this, og);
@ -2054,14 +2046,13 @@ QPDF::resolve(int objid, int generation)
} catch (QPDFExc& e) {
warn(e);
} catch (std::exception& e) {
warn(QPDFExc(
warn(
qpdf_e_damaged_pdf,
this->m->file->getName(),
"",
0,
("object " + QUtil::int_to_string(objid) + "/" +
QUtil::int_to_string(generation) +
": error reading object: " + e.what())));
": error reading object: " + e.what()));
}
}
if (this->m->obj_cache.count(og) == 0) {
@ -2112,13 +2103,12 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
QPDFObjectHandle dict = obj_stream.getDict();
if (!dict.isDictionaryOfType("/ObjStm")) {
QTC::TC("qpdf", "QPDF ERR object stream with wrong type");
warn(QPDFExc(
warn(
qpdf_e_damaged_pdf,
this->m->file->getName(),
this->m->last_object_description,
this->m->file->getLastOffset(),
("supposed object stream " +
QUtil::int_to_string(obj_stream_number) + " has wrong type")));
QUtil::int_to_string(obj_stream_number) + " has wrong type"));
}
if (!(dict.getKey("/N").isInteger() && dict.getKey("/First").isInteger())) {

View File

@ -2677,14 +2677,13 @@ QPDFJob::handlePageSpecs(
*other_afdh,
&referenced_fields);
} catch (std::exception& e) {
pdf.warn(QPDFExc(
pdf.warn(
qpdf_e_damaged_pdf,
pdf.getFilename(),
"",
0,
("Exception caught while fixing copied"
" annotations. This may be a qpdf bug. " +
std::string("Exception: ") + e.what())));
std::string("Exception: ") + e.what()));
}
}
}
@ -3127,14 +3126,13 @@ QPDFJob::doSplitPages(QPDF& pdf, bool& warnings)
try {
out_afdh->fixCopiedAnnotations(new_page, page, afdh);
} catch (std::exception& e) {
pdf.warn(QPDFExc(
pdf.warn(
qpdf_e_damaged_pdf,
pdf.getFilename(),
"",
0,
"Exception caught while fixing copied"
" annotations. This may be a qpdf bug." +
std::string("Exception: ") + e.what()));
("Exception caught while fixing copied"
" annotations. This may be a qpdf bug." +
std::string("Exception: ") + e.what()));
}
}
}

View File

@ -1556,8 +1556,9 @@ QPDFObjectHandle::arrayOrStreamToStreamArray(
item.getOwningQPDF(),
QPDFExc(
qpdf_e_damaged_pdf,
description,
"item index " + QUtil::int_to_string(i) + " (from 0)",
"",
description + ": item index " +
QUtil::int_to_string(i) + " (from 0)",
0,
"ignoring non-stream in an array of streams"));
}

View File

@ -302,12 +302,10 @@ QPDF_Stream::filterable(
if (!filters_okay) {
QTC::TC("qpdf", "QPDF_Stream invalid filter");
warn(QPDFExc(
warn(
qpdf_e_damaged_pdf,
qpdf->getFilename(),
"",
this->offset,
"stream filter type is not name or array"));
"stream filter type is not name or array");
return false;
}
@ -355,13 +353,11 @@ QPDF_Stream::filterable(
// one case of a file whose /DecodeParms was [ << >> ] when
// /Filters was empty has been seen in the wild.
if ((filters.size() != 0) && (decode_parms.size() != filters.size())) {
warn(QPDFExc(
warn(
qpdf_e_damaged_pdf,
qpdf->getFilename(),
"",
this->offset,
"stream /DecodeParms length is"
" inconsistent with filters"));
" inconsistent with filters");
filterable = false;
}
@ -474,12 +470,7 @@ QPDF_Stream::pipeStreamData(
Pl_Flate* flate = dynamic_cast<Pl_Flate*>(pipeline);
if (flate != nullptr) {
flate->setWarnCallback([this](char const* msg, int code) {
warn(QPDFExc(
qpdf_e_damaged_pdf,
qpdf->getFilename(),
"",
this->offset,
msg));
warn(qpdf_e_damaged_pdf, this->offset, msg);
});
}
}
@ -551,34 +542,28 @@ QPDF_Stream::pipeStreamData(
if (filter && (!suppress_warnings) && normalizer.get() &&
normalizer->anyBadTokens()) {
warn(QPDFExc(
warn(
qpdf_e_damaged_pdf,
qpdf->getFilename(),
"",
this->offset,
"content normalization encountered bad tokens"));
"content normalization encountered bad tokens");
if (normalizer->lastTokenWasBad()) {
QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize");
warn(QPDFExc(
warn(
qpdf_e_damaged_pdf,
qpdf->getFilename(),
"",
this->offset,
"normalized content ended with a bad token;"
" you may be able to resolve this by"
" coalescing content streams in combination"
" with normalizing content. From the command"
" line, specify --coalesce-contents"));
" line, specify --coalesce-contents");
}
warn(QPDFExc(
warn(
qpdf_e_damaged_pdf,
qpdf->getFilename(),
"",
this->offset,
"Resulting stream data may be corrupted but is"
" may still useful for manual inspection."
" For more information on this warning, search"
" for content normalization in the manual."));
" for content normalization in the manual.");
}
return success;
@ -645,7 +630,10 @@ QPDF_Stream::replaceDict(QPDFObjectHandle new_dict)
}
void
QPDF_Stream::warn(QPDFExc const& e)
QPDF_Stream::warn(
qpdf_error_code_e error_code,
qpdf_offset_t offset,
std::string const& message)
{
this->qpdf->warn(e);
this->qpdf->warn(error_code, "", offset, message);
}

View File

@ -803,12 +803,11 @@ QPDF::initializeEncryption()
// Treating a missing ID as the empty string enables qpdf to
// decrypt some invalid encrypted files with no /ID that
// poppler can read but Adobe Reader can't.
warn(QPDFExc(
warn(
qpdf_e_damaged_pdf,
this->m->file->getName(),
"trailer",
this->m->file->getLastOffset(),
"invalid /ID in trailer dictionary"));
"invalid /ID in trailer dictionary");
}
QPDFObjectHandle encryption_dict = this->m->trailer.getKey("/Encrypt");
@ -831,13 +830,12 @@ QPDF::initializeEncryption()
"unsupported encryption filter");
}
if (!encryption_dict.getKey("/SubFilter").isNull()) {
warn(QPDFExc(
warn(
qpdf_e_unsupported,
this->m->file->getName(),
"encryption dictionary",
this->m->file->getLastOffset(),
"file uses encryption SubFilters,"
" which qpdf does not support"));
" which qpdf does not support");
}
if (!(encryption_dict.getKey("/V").isInteger() &&
@ -1067,13 +1065,12 @@ QPDF::initializeEncryption()
this->m->encp->encryption_key = recover_encryption_key_with_password(
this->m->encp->provided_password, data, perms_valid);
if (!perms_valid) {
warn(QPDFExc(
warn(
qpdf_e_damaged_pdf,
this->m->file->getName(),
"encryption dictionary",
this->m->file->getLastOffset(),
"/Perms field in encryption dictionary"
" doesn't match expected value"));
" doesn't match expected value");
}
}
}
@ -1130,14 +1127,13 @@ QPDF::decryptString(std::string& str, int objid, int generation)
break;
default:
warn(QPDFExc(
warn(
qpdf_e_damaged_pdf,
this->m->file->getName(),
this->m->last_object_description,
this->m->file->getLastOffset(),
"unknown encryption filter for strings"
" (check /StrF in /Encrypt dictionary);"
" strings may be decrypted improperly"));
" strings may be decrypted improperly");
// To avoid repeated warnings, reset cf_string. Assume
// we'd want to use AES if V == 4.
this->m->encp->cf_string = e_aes;

View File

@ -266,15 +266,14 @@ QPDF::pushInheritedAttributesToPageInternal(
"Pages object",
cur_pages.getObjectID(),
cur_pages.getGeneration());
warn(QPDFExc(
warn(
qpdf_e_pages,
this->m->file->getName(),
this->m->last_object_description,
0,
"Unknown key " + key +
" in /Pages object"
" is being discarded as a result of"
" flattening the /Pages tree"));
("Unknown key " + key +
" in /Pages object"
" is being discarded as a result of"
" flattening the /Pages tree"));
}
}
}

View File

@ -130,12 +130,11 @@ QPDF::getAllPagesInternal(
}
if (!cur_node.isDictionaryOfType(wanted_type)) {
warn(QPDFExc(
warn(
qpdf_e_damaged_pdf,
this->m->file->getName(),
"page tree node",
this->m->file->getLastOffset(),
"/Type key should be " + wanted_type + " but is not; overriding"));
"/Type key should be " + wanted_type + " but is not; overriding");
cur_node.replaceKey("/Type", QPDFObjectHandle::newName(wanted_type));
}
visited.erase(this_og);

View File

@ -90,7 +90,10 @@ class QPDF_Stream: public QPDFObject
std::vector<std::shared_ptr<QPDFStreamFilter>>& filters,
bool& specialized_compression,
bool& lossy_compression);
void warn(QPDFExc const& e);
void warn(
qpdf_error_code_e error_code,
qpdf_offset_t offset,
std::string const& message);
void setDictDescription();
void setStreamDescription();

View File

@ -4,7 +4,7 @@ File is not encrypted
File is not linearized
WARNING: split-content-stream-errors.pdf (offset 557): error decoding stream data for object 6 0: LZWDecoder: bad code received
WARNING: split-content-stream-errors.pdf (offset 557): stream will be re-processed without filtering to avoid data loss
WARNING: page object 3 0 (item index 0 (from 0)): ignoring non-stream in an array of streams
WARNING: page object 3 0: item index 0 (from 0): ignoring non-stream in an array of streams
WARNING: split-content-stream-errors.pdf (offset 557): error decoding stream data for object 6 0: LZWDecoder: bad code received
ERROR: page 1: content stream (content stream object 6 0): errors while decoding content stream
qpdf: errors detected