mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 10:58:58 +00:00
Use a specific error code for type warnings and clarify docs
This commit is contained in:
parent
b2b2a175c4
commit
3340dbe976
@ -132,7 +132,9 @@ RELEASE PREPARATION
|
||||
|
||||
* Check all open issues and pull requests in github and the
|
||||
sourceforge trackers. See ~/scripts/github-issues. Don't forget pull
|
||||
requests.
|
||||
requests. Note: If the location for reporting issues changes, do a
|
||||
careful check of documentation and code to make sure any comments
|
||||
that include the issue creation URL are updated.
|
||||
|
||||
* Check `TODO` file to make sure all planned items for the release are
|
||||
done or retargeted.
|
||||
|
@ -143,6 +143,7 @@
|
||||
"hcryptprov",
|
||||
"hdict",
|
||||
"hoffmann",
|
||||
"holger",
|
||||
"hosoda",
|
||||
"htcondor",
|
||||
"htdocs",
|
||||
@ -205,6 +206,7 @@
|
||||
"linp",
|
||||
"listitem",
|
||||
"ljpeg",
|
||||
"longjmp",
|
||||
"lpstr",
|
||||
"lqpdf",
|
||||
"lssl",
|
||||
@ -367,6 +369,7 @@
|
||||
"scarff",
|
||||
"seekable",
|
||||
"segfaulting",
|
||||
"setjmp",
|
||||
"sharedresources",
|
||||
"smatch",
|
||||
"softlink",
|
||||
|
@ -38,6 +38,7 @@ enum qpdf_error_code_e
|
||||
qpdf_e_password, /* incorrect password for encrypted file */
|
||||
qpdf_e_damaged_pdf, /* syntax errors or other damage in PDF */
|
||||
qpdf_e_pages, /* erroneous or unsupported pages structure */
|
||||
qpdf_e_object, /* type/bounds errors accessing objects */
|
||||
};
|
||||
|
||||
/* Write Parameters. See QPDFWriter.hh for details. */
|
||||
|
@ -609,15 +609,65 @@ class QPDFObjectHandle
|
||||
QPDF_DLL
|
||||
bool hasObjectDescription();
|
||||
|
||||
// Accessor methods. If an accessor method that is valid for only
|
||||
// a particular object type is called on an object of the wrong
|
||||
// type, an exception is thrown.
|
||||
// Accessor methods
|
||||
//
|
||||
// (Note: this comment is referenced in qpdf-c.h and the manual.)
|
||||
//
|
||||
// In PDF files, objects have specific types, but there is nothing
|
||||
// that prevents PDF files from containing objects of types that
|
||||
// aren't expected by the specification. Many of the accessors
|
||||
// here expect objects of a particular type. Prior to qpdf 8,
|
||||
// calling an accessor on a method of the wrong type, such as
|
||||
// trying to get a dictionary key from an array, trying to get the
|
||||
// string value of a number, etc., would throw an exception, but
|
||||
// since qpdf 8, qpdf issues a warning and recovers using the
|
||||
// following behavior:
|
||||
//
|
||||
// * Requesting a value of the wrong type (int value from string,
|
||||
// array item from a scalar or dictionary, etc.) will return a
|
||||
// zero-like value for that type: false for boolean, 0 for
|
||||
// number, the empty string for string, or the null object for
|
||||
// an object handle.
|
||||
//
|
||||
// * Accessing an array item that is out of bounds will return a
|
||||
// null object.
|
||||
//
|
||||
// * Attempts to mutate an object of the wrong type (e.g.,
|
||||
// attempting to add a dictionary key to a scalar or array) will
|
||||
// be ignored.
|
||||
//
|
||||
// When any of these fallback behaviors are used, qpdf issues a
|
||||
// warning. Starting in qpdf 10.5, these warnings have the error
|
||||
// code qpdf_e_object. Prior to 10.5, they had the error code
|
||||
// qpdf_e_damaged_pdf. If the QPDFObjectHandle is associated with
|
||||
// a QPDF object (as is the case for all objects whose origin was
|
||||
// a PDF file), the warning is issued using the normal warning
|
||||
// mechanism (as described in QPDF.hh), making it possible to
|
||||
// suppress or otherwise detect them. If the QPDFObjectHandle is
|
||||
// not associated with a QPDF object (meaning it was created
|
||||
// programmatically), an exception will be thrown.
|
||||
//
|
||||
// The way to avoid getting any type warnings or exceptions, even
|
||||
// when working with malformed PDF files, is to always check the
|
||||
// type of a QPDFObjectHandle before accessing it (for example,
|
||||
// make sure that isString() returns true before calling
|
||||
// getStringValue()) and to always be sure that any array indices
|
||||
// are in bounds.
|
||||
//
|
||||
// For additional discussion and rationale for this behavior, see
|
||||
// the section in the QPDF manual entitled "Object Accessor
|
||||
// Methods".
|
||||
|
||||
// Methods for bool objects
|
||||
QPDF_DLL
|
||||
bool getBoolValue();
|
||||
|
||||
// Methods for integer objects
|
||||
// Methods for integer objects. Note: if an integer value is too
|
||||
// big (too far away from zero in either direction) to fit in the
|
||||
// requested return type, the maximum or minimum value for that
|
||||
// return type may be returned. For example, on a system with
|
||||
// 32-bit int, a numeric object with a value of 2^40 (or anything
|
||||
// too big for 32 bits) will be returned as INT_MAX.
|
||||
QPDF_DLL
|
||||
long long getIntValue();
|
||||
QPDF_DLL
|
||||
|
@ -3048,23 +3048,17 @@ void
|
||||
QPDFObjectHandle::typeWarning(char const* expected_type,
|
||||
std::string const& warning)
|
||||
{
|
||||
QPDF* context = 0;
|
||||
QPDF* context = nullptr;
|
||||
std::string description;
|
||||
dereference();
|
||||
if (this->obj->getDescription(context, description))
|
||||
{
|
||||
warn(context,
|
||||
QPDFExc(
|
||||
qpdf_e_damaged_pdf,
|
||||
this->obj->getDescription(context, description);
|
||||
// Null context handled by warn
|
||||
warn(context,
|
||||
QPDFExc(qpdf_e_object,
|
||||
"", description, 0,
|
||||
std::string("operation for ") + expected_type +
|
||||
" attempted on object of type " +
|
||||
getTypeName() + ": " + warning));
|
||||
}
|
||||
else
|
||||
{
|
||||
assertType(expected_type, false);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@ -3091,7 +3085,12 @@ QPDFObjectHandle::warnIfPossible(std::string const& warning,
|
||||
void
|
||||
QPDFObjectHandle::objectWarning(std::string const& warning)
|
||||
{
|
||||
warnIfPossible(warning, true);
|
||||
QPDF* context = nullptr;
|
||||
std::string description;
|
||||
dereference();
|
||||
this->obj->getDescription(context, description);
|
||||
// Null context handled by warn
|
||||
warn(context, QPDFExc(qpdf_e_object, "", description, 0, warning));
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -4560,6 +4560,96 @@ outfile.pdf</option>
|
||||
filtered stream contents to a given pipeline.
|
||||
</para>
|
||||
</sect1>
|
||||
<sect1 id="ref.object-accessors">
|
||||
<!-- This section is referenced in QPDFObjectHandle.hh -->
|
||||
<title>Object Accessor Methods</title>
|
||||
<para>
|
||||
For general information about how to access instances of
|
||||
<classname>QPDFObjectHandle</classname>, please see the comments
|
||||
in <filename>QPDFObjectHandle.hh</filename>. Search for
|
||||
“Accessor methods”. This section provides a more
|
||||
in-depth discussion of the behavior and the rationale for the
|
||||
behavior.
|
||||
</para>
|
||||
<para>
|
||||
<emphasis>Why were type errors made into warnings?</emphasis> When
|
||||
type checks were introduced into qpdf in the early days, it was
|
||||
expected that type errors would only occur as a result of
|
||||
programmer error. However, in practice, type errors would occur
|
||||
with malformed PDF files because of assumptions made in code,
|
||||
including code within the qpdf library and code written by library
|
||||
users. The most common case would be chaining calls to
|
||||
<function>getKey()</function> to access keys deep within a
|
||||
dictionary. In many cases, qpdf would be able to recover from
|
||||
these situations, but the old behavior often resulted in crashes
|
||||
rather than graceful recovery. For this reason, the errors were
|
||||
changed to warnings.
|
||||
</para>
|
||||
<para>
|
||||
<emphasis>Why even warn about type errors when the user can't
|
||||
usually do anything about them?</emphasis> Type warnings are
|
||||
extremely valuable during development. Since it's impossible to
|
||||
catch at compile time things like typos in dictionary key names or
|
||||
logic errors around what the structure of a PDF file might be, the
|
||||
presence of type warnings can save lots of developer time. They
|
||||
have also proven useful in exposing issues in qpdf itself that
|
||||
would have otherwise gone undetected.
|
||||
</para>
|
||||
<para>
|
||||
<emphasis>Can there be a type-safe
|
||||
<classname>QPDFObjectHandle</classname>?</emphasis> It would be
|
||||
great if <classname>QPDFObjectHandle</classname> could be more
|
||||
strongly typed so that you'd have to have check that something was
|
||||
of a particular type before calling type-specific accessor
|
||||
methods. However, implementing this at this stage of the library's
|
||||
history would be quite difficult, and it would make a the common
|
||||
pattern of drilling into an object no longer work. While it would
|
||||
be possible to have a parallel interface, it would create a lot of
|
||||
extra code. If qpdf were written in a language like rust, an
|
||||
interface like this would make a lot of sense, but, for a variety
|
||||
of reasons, the qpdf API is consistent with other APIs of its
|
||||
time, relying on exception handling to catch errors. The
|
||||
underlying PDF objects are inherently not type-safe. Forcing
|
||||
stronger type safety in <classname>QPDFObjectHandle</classname>
|
||||
would ultimately cause a lot more code to have to be written and
|
||||
would like make software that uses qpdf more brittle, and even so,
|
||||
checks would have to occur at runtime.
|
||||
</para>
|
||||
<para>
|
||||
<emphasis>Why do type errors sometimes raise
|
||||
exceptions?</emphasis> The way warnings work in qpdf requires a
|
||||
<classname>QPDF</classname> object to be associated with an object
|
||||
handle for a warning to be issued. It would be nice if this could
|
||||
be fixed, but it would require major changes to the API. Rather
|
||||
than throwing away these conditions, we convert them to
|
||||
exceptions. It's not that bad though. Since any object handle that
|
||||
was read from a file has an associated <classname>QPDF</classname>
|
||||
object, it would only be type errors on objects that were created
|
||||
explicitly that would cause exceptions, and in that case, type
|
||||
errors are much more likely to be the result of a coding error
|
||||
than invalid input.
|
||||
</para>
|
||||
<para>
|
||||
<emphasis>Why does the behavior of a type exception differ between
|
||||
the C and C++ API?</emphasis> There is no way to throw and catch
|
||||
exceptions in C short of something like
|
||||
<function>setjmp</function> and <function>longjmp</function>, and
|
||||
that approach is not portable across language barriers. Since the
|
||||
C API is often used from other languages, it's important to keep
|
||||
things as simple as possible. Starting in qpdf 10.5, exceptions
|
||||
that used to crash code using the C API will be written to stderr
|
||||
by default, and it is possible to register an error handler.
|
||||
There's no reason that the error handler can't simulate exception
|
||||
handling in some way, such as by using <function>setjmp</function>
|
||||
and <function>longjmp</function> or by setting some variable that
|
||||
can be checked after library calls are made. In retrospect, it
|
||||
might have been better if the C API object handle methods returned
|
||||
error codes like the other methods and set return values in
|
||||
passed-in pointers, but this would complicate both the
|
||||
implementation and the use of the library for a case that is
|
||||
actually quite rare and largely avoidable.
|
||||
</para>
|
||||
</sect1>
|
||||
</chapter>
|
||||
<chapter id="ref.linearization">
|
||||
<title>Linearization</title>
|
||||
@ -5125,6 +5215,20 @@ print "\n";
|
||||
Library Enhancements
|
||||
</para>
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>
|
||||
Since qpdf version 8, using object accessor methods on an
|
||||
instance of <classname>QPDFObjectHandle</classname> may
|
||||
create warnings if the object is not of the expected type.
|
||||
These warnings now have an error code of
|
||||
<literal>qpdf_e_object</literal> instead of
|
||||
<literal>qpdf_e_damaged_pdf</literal>. Also, comments have
|
||||
been added to <filename>QPDFObjectHandle.hh</filename> to
|
||||
explain in more detail what the behavior is. See <xref
|
||||
linkend="ref.object-accessors"/> for a more in-depth
|
||||
discussion.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
Add <function>qpdf_get_last_string_length</function> to the
|
||||
|
@ -273,12 +273,16 @@ $td->runtest("check final version",
|
||||
show_ntests();
|
||||
# ----------
|
||||
$td->notify("--- Exceptions ---");
|
||||
$n_tests += 1;
|
||||
$n_tests += 2;
|
||||
|
||||
$td->runtest("check exception handling",
|
||||
{$td->COMMAND => "test_driver 61 -"},
|
||||
{$td->FILE => "exceptions.out", $td->EXIT_STATUS => 0},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
$td->runtest("check certain exception types",
|
||||
{$td->COMMAND => "test_driver 81 -"},
|
||||
{$td->STRING => "test 81 done\n", $td->EXIT_STATUS => 0},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
|
||||
show_ntests();
|
||||
# ----------
|
||||
@ -5303,6 +5307,7 @@ for (my $large = 0; $large < $nlarge; ++$large)
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
unlink $file;
|
||||
}
|
||||
show_ntests();
|
||||
# ----------
|
||||
|
||||
cleanup();
|
||||
|
@ -259,7 +259,7 @@ void runtest(int n, char const* filename1, char const* arg2)
|
||||
pdf.processMemoryFile((std::string(filename1) + ".pdf").c_str(),
|
||||
p, size);
|
||||
}
|
||||
else if (n == 61)
|
||||
else if ((n == 61) || (n == 81))
|
||||
{
|
||||
// Ignore filename argument entirely
|
||||
}
|
||||
@ -3049,6 +3049,19 @@ void runtest(int n, char const* filename1, char const* arg2)
|
||||
w2.setQDFMode(true);
|
||||
w2.write();
|
||||
}
|
||||
else if (n == 81)
|
||||
{
|
||||
// Exercise that type errors get their own special type
|
||||
try
|
||||
{
|
||||
QPDFObjectHandle::newNull().getIntValue();
|
||||
assert(false);
|
||||
}
|
||||
catch (QPDFExc& e)
|
||||
{
|
||||
assert(e.getErrorCode() == qpdf_e_object);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error(std::string("invalid test ") +
|
||||
|
Loading…
Reference in New Issue
Block a user