Use a specific error code for type warnings and clarify docs

This commit is contained in:
Jay Berkenbilt 2021-12-10 09:34:42 -05:00
parent b2b2a175c4
commit 3340dbe976
8 changed files with 196 additions and 19 deletions

View File

@ -132,7 +132,9 @@ RELEASE PREPARATION
* Check all open issues and pull requests in github and the
sourceforge trackers. See ~/scripts/github-issues. Don't forget pull
requests.
requests. Note: If the location for reporting issues changes, do a
careful check of documentation and code to make sure any comments
that include the issue creation URL are updated.
* Check `TODO` file to make sure all planned items for the release are
done or retargeted.

View File

@ -143,6 +143,7 @@
"hcryptprov",
"hdict",
"hoffmann",
"holger",
"hosoda",
"htcondor",
"htdocs",
@ -205,6 +206,7 @@
"linp",
"listitem",
"ljpeg",
"longjmp",
"lpstr",
"lqpdf",
"lssl",
@ -367,6 +369,7 @@
"scarff",
"seekable",
"segfaulting",
"setjmp",
"sharedresources",
"smatch",
"softlink",

View File

@ -38,6 +38,7 @@ enum qpdf_error_code_e
qpdf_e_password, /* incorrect password for encrypted file */
qpdf_e_damaged_pdf, /* syntax errors or other damage in PDF */
qpdf_e_pages, /* erroneous or unsupported pages structure */
qpdf_e_object, /* type/bounds errors accessing objects */
};
/* Write Parameters. See QPDFWriter.hh for details. */

View File

@ -609,15 +609,65 @@ class QPDFObjectHandle
QPDF_DLL
bool hasObjectDescription();
// Accessor methods. If an accessor method that is valid for only
// a particular object type is called on an object of the wrong
// type, an exception is thrown.
// Accessor methods
//
// (Note: this comment is referenced in qpdf-c.h and the manual.)
//
// In PDF files, objects have specific types, but there is nothing
// that prevents PDF files from containing objects of types that
// aren't expected by the specification. Many of the accessors
// here expect objects of a particular type. Prior to qpdf 8,
// calling an accessor on a method of the wrong type, such as
// trying to get a dictionary key from an array, trying to get the
// string value of a number, etc., would throw an exception, but
// since qpdf 8, qpdf issues a warning and recovers using the
// following behavior:
//
// * Requesting a value of the wrong type (int value from string,
// array item from a scalar or dictionary, etc.) will return a
// zero-like value for that type: false for boolean, 0 for
// number, the empty string for string, or the null object for
// an object handle.
//
// * Accessing an array item that is out of bounds will return a
// null object.
//
// * Attempts to mutate an object of the wrong type (e.g.,
// attempting to add a dictionary key to a scalar or array) will
// be ignored.
//
// When any of these fallback behaviors are used, qpdf issues a
// warning. Starting in qpdf 10.5, these warnings have the error
// code qpdf_e_object. Prior to 10.5, they had the error code
// qpdf_e_damaged_pdf. If the QPDFObjectHandle is associated with
// a QPDF object (as is the case for all objects whose origin was
// a PDF file), the warning is issued using the normal warning
// mechanism (as described in QPDF.hh), making it possible to
// suppress or otherwise detect them. If the QPDFObjectHandle is
// not associated with a QPDF object (meaning it was created
// programmatically), an exception will be thrown.
//
// The way to avoid getting any type warnings or exceptions, even
// when working with malformed PDF files, is to always check the
// type of a QPDFObjectHandle before accessing it (for example,
// make sure that isString() returns true before calling
// getStringValue()) and to always be sure that any array indices
// are in bounds.
//
// For additional discussion and rationale for this behavior, see
// the section in the QPDF manual entitled "Object Accessor
// Methods".
// Methods for bool objects
QPDF_DLL
bool getBoolValue();
// Methods for integer objects
// Methods for integer objects. Note: if an integer value is too
// big (too far away from zero in either direction) to fit in the
// requested return type, the maximum or minimum value for that
// return type may be returned. For example, on a system with
// 32-bit int, a numeric object with a value of 2^40 (or anything
// too big for 32 bits) will be returned as INT_MAX.
QPDF_DLL
long long getIntValue();
QPDF_DLL

View File

@ -3048,23 +3048,17 @@ void
QPDFObjectHandle::typeWarning(char const* expected_type,
std::string const& warning)
{
QPDF* context = 0;
QPDF* context = nullptr;
std::string description;
dereference();
if (this->obj->getDescription(context, description))
{
warn(context,
QPDFExc(
qpdf_e_damaged_pdf,
this->obj->getDescription(context, description);
// Null context handled by warn
warn(context,
QPDFExc(qpdf_e_object,
"", description, 0,
std::string("operation for ") + expected_type +
" attempted on object of type " +
getTypeName() + ": " + warning));
}
else
{
assertType(expected_type, false);
}
}
void
@ -3091,7 +3085,12 @@ QPDFObjectHandle::warnIfPossible(std::string const& warning,
void
QPDFObjectHandle::objectWarning(std::string const& warning)
{
warnIfPossible(warning, true);
QPDF* context = nullptr;
std::string description;
dereference();
this->obj->getDescription(context, description);
// Null context handled by warn
warn(context, QPDFExc(qpdf_e_object, "", description, 0, warning));
}
void

View File

@ -4560,6 +4560,96 @@ outfile.pdf</option>
filtered stream contents to a given pipeline.
</para>
</sect1>
<sect1 id="ref.object-accessors">
<!-- This section is referenced in QPDFObjectHandle.hh -->
<title>Object Accessor Methods</title>
<para>
For general information about how to access instances of
<classname>QPDFObjectHandle</classname>, please see the comments
in <filename>QPDFObjectHandle.hh</filename>. Search for
&ldquo;Accessor methods&rdquo;. This section provides a more
in-depth discussion of the behavior and the rationale for the
behavior.
</para>
<para>
<emphasis>Why were type errors made into warnings?</emphasis> When
type checks were introduced into qpdf in the early days, it was
expected that type errors would only occur as a result of
programmer error. However, in practice, type errors would occur
with malformed PDF files because of assumptions made in code,
including code within the qpdf library and code written by library
users. The most common case would be chaining calls to
<function>getKey()</function> to access keys deep within a
dictionary. In many cases, qpdf would be able to recover from
these situations, but the old behavior often resulted in crashes
rather than graceful recovery. For this reason, the errors were
changed to warnings.
</para>
<para>
<emphasis>Why even warn about type errors when the user can't
usually do anything about them?</emphasis> Type warnings are
extremely valuable during development. Since it's impossible to
catch at compile time things like typos in dictionary key names or
logic errors around what the structure of a PDF file might be, the
presence of type warnings can save lots of developer time. They
have also proven useful in exposing issues in qpdf itself that
would have otherwise gone undetected.
</para>
<para>
<emphasis>Can there be a type-safe
<classname>QPDFObjectHandle</classname>?</emphasis> It would be
great if <classname>QPDFObjectHandle</classname> could be more
strongly typed so that you'd have to have check that something was
of a particular type before calling type-specific accessor
methods. However, implementing this at this stage of the library's
history would be quite difficult, and it would make a the common
pattern of drilling into an object no longer work. While it would
be possible to have a parallel interface, it would create a lot of
extra code. If qpdf were written in a language like rust, an
interface like this would make a lot of sense, but, for a variety
of reasons, the qpdf API is consistent with other APIs of its
time, relying on exception handling to catch errors. The
underlying PDF objects are inherently not type-safe. Forcing
stronger type safety in <classname>QPDFObjectHandle</classname>
would ultimately cause a lot more code to have to be written and
would like make software that uses qpdf more brittle, and even so,
checks would have to occur at runtime.
</para>
<para>
<emphasis>Why do type errors sometimes raise
exceptions?</emphasis> The way warnings work in qpdf requires a
<classname>QPDF</classname> object to be associated with an object
handle for a warning to be issued. It would be nice if this could
be fixed, but it would require major changes to the API. Rather
than throwing away these conditions, we convert them to
exceptions. It's not that bad though. Since any object handle that
was read from a file has an associated <classname>QPDF</classname>
object, it would only be type errors on objects that were created
explicitly that would cause exceptions, and in that case, type
errors are much more likely to be the result of a coding error
than invalid input.
</para>
<para>
<emphasis>Why does the behavior of a type exception differ between
the C and C++ API?</emphasis> There is no way to throw and catch
exceptions in C short of something like
<function>setjmp</function> and <function>longjmp</function>, and
that approach is not portable across language barriers. Since the
C API is often used from other languages, it's important to keep
things as simple as possible. Starting in qpdf 10.5, exceptions
that used to crash code using the C API will be written to stderr
by default, and it is possible to register an error handler.
There's no reason that the error handler can't simulate exception
handling in some way, such as by using <function>setjmp</function>
and <function>longjmp</function> or by setting some variable that
can be checked after library calls are made. In retrospect, it
might have been better if the C API object handle methods returned
error codes like the other methods and set return values in
passed-in pointers, but this would complicate both the
implementation and the use of the library for a case that is
actually quite rare and largely avoidable.
</para>
</sect1>
</chapter>
<chapter id="ref.linearization">
<title>Linearization</title>
@ -5125,6 +5215,20 @@ print "\n";
Library Enhancements
</para>
<itemizedlist>
<listitem>
<para>
Since qpdf version 8, using object accessor methods on an
instance of <classname>QPDFObjectHandle</classname> may
create warnings if the object is not of the expected type.
These warnings now have an error code of
<literal>qpdf_e_object</literal> instead of
<literal>qpdf_e_damaged_pdf</literal>. Also, comments have
been added to <filename>QPDFObjectHandle.hh</filename> to
explain in more detail what the behavior is. See <xref
linkend="ref.object-accessors"/> for a more in-depth
discussion.
</para>
</listitem>
<listitem>
<para>
Add <function>qpdf_get_last_string_length</function> to the

View File

@ -273,12 +273,16 @@ $td->runtest("check final version",
show_ntests();
# ----------
$td->notify("--- Exceptions ---");
$n_tests += 1;
$n_tests += 2;
$td->runtest("check exception handling",
{$td->COMMAND => "test_driver 61 -"},
{$td->FILE => "exceptions.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check certain exception types",
{$td->COMMAND => "test_driver 81 -"},
{$td->STRING => "test 81 done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
@ -5303,6 +5307,7 @@ for (my $large = 0; $large < $nlarge; ++$large)
$td->NORMALIZE_NEWLINES);
unlink $file;
}
show_ntests();
# ----------
cleanup();

View File

@ -259,7 +259,7 @@ void runtest(int n, char const* filename1, char const* arg2)
pdf.processMemoryFile((std::string(filename1) + ".pdf").c_str(),
p, size);
}
else if (n == 61)
else if ((n == 61) || (n == 81))
{
// Ignore filename argument entirely
}
@ -3049,6 +3049,19 @@ void runtest(int n, char const* filename1, char const* arg2)
w2.setQDFMode(true);
w2.write();
}
else if (n == 81)
{
// Exercise that type errors get their own special type
try
{
QPDFObjectHandle::newNull().getIntValue();
assert(false);
}
catch (QPDFExc& e)
{
assert(e.getErrorCode() == qpdf_e_object);
}
}
else
{
throw std::runtime_error(std::string("invalid test ") +