Fix dangling references (fixes #240)

On certain operations, such as iterating through all objects and
adding new indirect objects, walk through the entire object structure
and explicitly resolve any indirect references to non-existent
objects. That prevents new objects from springing into existence and
causing the previously dangling references to point to them.
This commit is contained in:
Jay Berkenbilt 2019-01-04 10:17:33 -05:00
parent 158156d506
commit a01359189b
17 changed files with 344 additions and 17 deletions

View File

@ -1,3 +1,12 @@
2019-01-04 Jay Berkenbilt <ejb@ql.org>
* Detect and recover from dangling references. If a PDF file
contained an indirect reference to a non-existent object (which is
valid), when adding a new object to the file, it was possible for
the new object to take the object ID of the dangling reference,
thereby causing the dangling reference to point to the new object.
This case is now prevented. Fixes #240.
2019-01-03 Jay Berkenbilt <ejb@ql.org>
* Fix behavior of form field value setting to handle the following

View File

@ -431,9 +431,21 @@ class QPDF
QPDF_DLL
void showXRefTable();
// Detect all indirect references to objects that don't exist and
// resolve them by replacing them with null, which is how the PDF
// spec says to interpret such dangling references. This method is
// called automatically if you try to add any new objects, if you
// call getAllObjects, and before a file is written. The qpdf
// object caches whether it has run this to avoid running it
// multiple times. You can pass true to force it to run again if
// you have explicitly added new objects that may have additional
// dangling references.
QPDF_DLL
void fixDanglingReferences(bool force = false);
// Return the approximate number of indirect objects. It is
// approximate because not all objects in the file are preserved
// in all cases.
// in all cases, and gaps in object numbering are not preserved.
QPDF_DLL
size_t getObjectCount();
@ -1199,6 +1211,7 @@ class QPDF
CopiedStreamDataProvider* copied_stream_data_provider;
std::set<QPDFObjGen> attachment_streams;
bool reconstructed_xref;
bool fixed_dangling_refs;
// Linearization data
qpdf_offset_t first_xref_item_offset; // actual value from file

View File

@ -94,6 +94,7 @@ QPDF::Members::Members() :
pushed_inherited_attributes_to_pages(false),
copied_stream_data_provider(0),
reconstructed_xref(false),
fixed_dangling_refs(false),
first_xref_item_offset(0),
uncompressed_after_compressed(false)
{
@ -1218,33 +1219,129 @@ QPDF::showXRefTable()
}
}
void
QPDF::fixDanglingReferences(bool force)
{
if (this->m->fixed_dangling_refs && (! force))
{
return;
}
this->m->fixed_dangling_refs = true;
// Create a set of all known indirect objects including those
// we've previously resolved and those that we have created.
std::set<QPDFObjGen> to_process;
for (std::map<QPDFObjGen, ObjCache>::iterator iter =
this->m->obj_cache.begin();
iter != this->m->obj_cache.end(); ++iter)
{
to_process.insert((*iter).first);
}
for (std::map<QPDFObjGen, QPDFXRefEntry>::iterator iter =
this->m->xref_table.begin();
iter != this->m->xref_table.end(); ++iter)
{
to_process.insert((*iter).first);
}
// For each non-scalar item to process, put it in the queue.
std::list<QPDFObjectHandle> queue;
queue.push_back(this->m->trailer);
for (std::set<QPDFObjGen>::iterator iter = to_process.begin();
iter != to_process.end(); ++iter)
{
QPDFObjectHandle obj = QPDFObjectHandle::Factory::newIndirect(
this, (*iter).getObj(), (*iter).getGen());
if (obj.isDictionary() || obj.isArray())
{
queue.push_back(obj);
}
else if (obj.isStream())
{
queue.push_back(obj.getDict());
}
}
// Process the queue by recursively resolving all object
// references. We don't need to do loop detection because we don't
// traverse known indirect objects when processing the queue.
while (! queue.empty())
{
QPDFObjectHandle obj = queue.front();
queue.pop_front();
std::list<QPDFObjectHandle> to_check;
if (obj.isDictionary())
{
std::map<std::string, QPDFObjectHandle> members =
obj.getDictAsMap();
for (std::map<std::string, QPDFObjectHandle>::iterator iter =
members.begin();
iter != members.end(); ++iter)
{
to_check.push_back((*iter).second);
}
}
else if (obj.isArray())
{
std::vector<QPDFObjectHandle> elements = obj.getArrayAsVector();
for (std::vector<QPDFObjectHandle>::iterator iter =
elements.begin();
iter != elements.end(); ++iter)
{
to_check.push_back(*iter);
}
}
for (std::list<QPDFObjectHandle>::iterator iter = to_check.begin();
iter != to_check.end(); ++iter)
{
QPDFObjectHandle sub = *iter;
if (sub.isIndirect())
{
if (sub.getOwningQPDF() == this)
{
QPDFObjGen og(sub.getObjGen());
if (this->m->obj_cache.count(og) == 0)
{
QTC::TC("qpdf", "QPDF detected dangling ref");
queue.push_back(sub);
}
}
}
else
{
queue.push_back(sub);
}
}
}
}
size_t
QPDF::getObjectCount()
{
// This method returns the next available indirect object number.
// makeIndirectObject uses it for this purpose.
QPDFObjGen o1(0, 0);
// makeIndirectObject uses it for this purpose. After
// fixDanglingReferences is called, all objects in the xref table
// will also be in obj_cache.
fixDanglingReferences();
QPDFObjGen og(0, 0);
if (! this->m->obj_cache.empty())
{
o1 = (*(this->m->obj_cache.rbegin())).first;
og = (*(this->m->obj_cache.rbegin())).first;
}
QPDFObjGen o2(0, 0);
if (! this->m->xref_table.empty())
{
o2 = (*(this->m->xref_table.rbegin())).first;
}
QTC::TC("qpdf", "QPDF indirect last obj from xref",
(o2.getObj() > o1.getObj()) ? 1 : 0);
return std::max(o1.getObj(), o2.getObj());
return og.getObj();
}
std::vector<QPDFObjectHandle>
QPDF::getAllObjects()
{
// After fixDanglingReferences is called, all objects are in the
// object cache.
fixDanglingReferences(true);
std::vector<QPDFObjectHandle> result;
for (std::map<QPDFObjGen, QPDFXRefEntry>::iterator iter =
this->m->xref_table.begin();
iter != this->m->xref_table.end(); ++iter)
for (std::map<QPDFObjGen, ObjCache>::iterator iter =
this->m->obj_cache.begin();
iter != this->m->obj_cache.end(); ++iter)
{
QPDFObjGen const& og = (*iter).first;
@ -1752,7 +1849,6 @@ QPDF::resolve(int objid, int generation)
}
ResolveRecorder rr(this, og);
// PDF spec says unknown objects resolve to the null object.
if ((! this->m->obj_cache.count(og)) && this->m->xref_table.count(og))
{
QPDFXRefEntry const& entry = this->m->xref_table[og];
@ -1800,6 +1896,7 @@ QPDF::resolve(int objid, int generation)
}
if (this->m->obj_cache.count(og) == 0)
{
// PDF spec says unknown objects resolve to the null object.
QTC::TC("qpdf", "QPDF resolve failure to null");
QPDFObjectHandle oh = QPDFObjectHandle::newNull();
this->m->obj_cache[og] =

View File

@ -2244,6 +2244,7 @@ QPDFWriter::prepareFileForWrite()
// includes stream lengths, stream filtering parameters, and
// document extension level information.
this->m->pdf.fixDanglingReferences(true);
std::list<QPDFObjectHandle> queue;
queue.push_back(getTrimmedTrailer());
std::set<int> visited;

View File

@ -82,7 +82,6 @@ QPDFObjectHandle clone dictionary 0
QPDFObjectHandle makeDirect loop 0
QPDFObjectHandle ERR clone stream 0
QPDFTokenizer allow pound anywhere in name 0
QPDF indirect last obj from xref 1
QPDF default for xref stream field 0 0
QPDF prev key in xref stream dictionary 0
QPDF prev key in trailer dictionary 0
@ -402,3 +401,4 @@ QPDFFormFieldObjectHelper list not found 0
QPDFFormFieldObjectHelper list found 0
QPDFFormFieldObjectHelper list first too low 0
QPDFFormFieldObjectHelper list last too high 0
QPDF detected dangling ref 0

View File

@ -173,6 +173,22 @@ $td->runtest("\@file exists and file doesn't",
{$td->FILE => "check-at-file.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Dangling Refs ---");
my @dangling = (qw(minimal dangling-refs));
$n_tests += 2 * scalar(@dangling);
foreach my $f (@dangling)
{
$td->runtest("dangling refs: $f",
{$td->COMMAND => "test_driver 53 $f.pdf"},
{$td->FILE => "$f-dangling.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "$f-dangling-out.pdf"});
}
show_ntests();
# ----------
$td->notify("--- Form Tests ---");

Binary file not shown.

View File

@ -0,0 +1,13 @@
all objects
1 0 R
2 0 R
3 0 R
4 0 R
5 0 R
6 0 R
7 0 R
8 0 R
9 0 R
10 0 R
11 0 R
test 53 done

View File

@ -0,0 +1,104 @@
%PDF-1.3
%¿÷¢þ
%QDF-1.0
1 0 obj
<<
/Pages 2 0 R
/Type /Catalog
/Dangling 8 0 R
/AlsoDangling [
9 0 R
<<
/yes 2 0 R
/no 10 0 R
/nope 8 0 R
>>
]
>>
endobj
2 0 obj
<<
/Count 1
/Kids [
3 0 R
]
/Type /Pages
>>
endobj
%% Page 1
3 0 obj
<<
/Contents 4 0 R
/MediaBox [
0
0
612
792
]
/Parent 2 0 R
/Resources <<
/Font <<
/F1 6 0 R
>>
/ProcSet 7 0 R
>>
/Type /Page
>>
endobj
%% Contents for page 1
4 0 obj
<<
/Length 5 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
5 0 obj
44
endobj
6 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
7 0 obj
[
/PDF
/Text
]
endobj
xref
0 8
0000000000 65535 f
0000000025 00000 n
0000000195 00000 n
0000000277 00000 n
0000000492 00000 n
0000000591 00000 n
0000000610 00000 n
0000000728 00000 n
trailer <<
/Root 1 0 R
/Size 8
/ID [<7141a6cf32de469328cf0f51982b5f89><7141a6cf32de469328cf0f51982b5f89>]
>>
startxref
763
%%EOF

View File

@ -122,6 +122,32 @@ WARNING: issue-101.pdf (object 11 0, offset 1357): unknown token while reading o
WARNING: issue-101.pdf (object 11 0, offset 1359): unknown token while reading object; treating as string
WARNING: issue-101.pdf (object 11 0, offset 1368): unexpected )
WARNING: issue-101.pdf (object 11 0, offset 1373): expected endobj
WARNING: issue-101.pdf (object 2 0, offset 244): unknown token while reading object; treating as string
WARNING: issue-101.pdf (object 7 0, offset 3855): unknown token while reading object; treating as string
WARNING: issue-101.pdf (object 7 0, offset 3863): treating unexpected brace token as null
WARNING: issue-101.pdf (object 7 0, offset 3864): unknown token while reading object; treating as string
WARNING: issue-101.pdf (object 7 0, offset 3866): unknown token while reading object; treating as string
WARNING: issue-101.pdf (object 7 0, offset 3873): unknown token while reading object; treating as string
WARNING: issue-101.pdf (object 7 0, offset 3879): unknown token while reading object; treating as string
WARNING: issue-101.pdf (object 7 0, offset 3888): unknown token while reading object; treating as string
WARNING: issue-101.pdf (object 7 0, offset 3901): unknown token while reading object; treating as string
WARNING: issue-101.pdf (object 7 0, offset 3905): unknown token while reading object; treating as string
WARNING: issue-101.pdf (object 7 0, offset 3913): unknown token while reading object; treating as string
WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake1
WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake2
WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake3
WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake4
WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake5
WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake6
WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake7
WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake8
WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake9
WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake10
WARNING: issue-101.pdf (object 7 0, offset 3844): stream dictionary lacks /Length key
WARNING: issue-101.pdf (object 7 0, offset 3962): attempting to recover stream length
WARNING: issue-101.pdf (object 7 0, offset 3962): recovered stream length: 12
WARNING: issue-101.pdf (object 8 0, offset 4067): invalid character ()) in hexstring
WARNING: issue-101.pdf (object 8 0, offset 4069): expected endobj
WARNING: issue-101.pdf (object 9 0, offset 2832): unknown token while reading object; treating as string
WARNING: issue-101.pdf (object 9 0, offset 2834): expected endobj
qpdf: operation succeeded with warnings; resulting file may have some problems

View File

@ -5,4 +5,12 @@ WARNING: issue-117.pdf (offset 66): loop detected resolving object 2 0
WARNING: issue-117.pdf (object 2 0, offset 22): /Length key in stream dictionary is not an integer
WARNING: issue-117.pdf (object 2 0, offset 67): attempting to recover stream length
WARNING: issue-117.pdf (object 2 0, offset 67): recovered stream length: 91
WARNING: issue-117.pdf (object 5 0, offset 1559): expected endstream
WARNING: issue-117.pdf (object 5 0, offset 349): attempting to recover stream length
WARNING: issue-117.pdf (object 5 0, offset 349): recovered stream length: 762
WARNING: issue-117.pdf (object 5 0, offset 1121): expected endobj
WARNING: issue-117.pdf (object 7 0, offset 1791): unknown token while reading object; treating as string
WARNING: issue-117.pdf (object 7 0, offset 1267): /Length key in stream dictionary is not an integer
WARNING: issue-117.pdf (object 7 0, offset 1418): attempting to recover stream length
WARNING: issue-117.pdf (object 7 0, offset 1418): recovered stream length: 347
attempt to make a stream into a direct object

View File

@ -1,3 +1,7 @@
WARNING: issue-120.pdf (offset 85): loop detected resolving object 3 0
WARNING: issue-120.pdf (object 6 0, offset 85): supposed object stream 3 is not a stream
WARNING: issue-120.pdf: file is damaged
WARNING: issue-120.pdf (object 8 10, offset 26880): expected n n obj
WARNING: issue-120.pdf: Attempting to reconstruct cross-reference table
WARNING: issue-120.pdf: object 8 10 not found in file after regenerating cross reference table
qpdf: operation succeeded with warnings; resulting file may have some problems

View File

@ -14,4 +14,7 @@ WARNING: issue-143.pdf (object 1 0, offset 21): stream dictionary lacks /Length
WARNING: issue-143.pdf (object 1 0, offset 84): attempting to recover stream length
WARNING: issue-143.pdf (object 1 0, offset 84): recovered stream length: 606
WARNING: issue-143.pdf object stream 1 (object 2 0, offset 33): expected dictionary key but found non-name object; inserting key /QPDFFake1
WARNING: issue-143.pdf (object 2 0, offset 84): supposed object stream 12336 is not a stream
WARNING: issue-143.pdf (object 2 0, offset 84): supposed object stream 12336 is not a stream
WARNING: issue-143.pdf (object 2 0, offset 84): supposed object stream 12336 is not a stream
qpdf: operation succeeded with warnings; resulting file may have some problems

View File

@ -8,3 +8,8 @@ WARNING: issue-51.pdf (object 2 0, offset 71): attempting to recover stream leng
WARNING: issue-51.pdf (object 2 0, offset 71): unable to recover stream data; treating stream as empty
WARNING: issue-51.pdf (object 2 0, offset 977): expected endobj
WARNING: issue-51.pdf (object 2 0, offset 977): EOF after endobj
WARNING: issue-51.pdf (object 3 0): object has offset 0
WARNING: issue-51.pdf (object 4 0): object has offset 0
WARNING: issue-51.pdf (object 5 0): object has offset 0
WARNING: issue-51.pdf (object 6 0): object has offset 0
WARNING: issue-51.pdf (object 8 0): object has offset 0

Binary file not shown.

View File

@ -0,0 +1,9 @@
all objects
1 0 R
2 0 R
3 0 R
4 0 R
5 0 R
6 0 R
7 0 R
test 53 done

View File

@ -1846,6 +1846,25 @@ void runtest(int n, char const* filename1, char const* arg2)
QPDFWriter w(pdf, "a.pdf");
w.write();
}
else if (n == 53)
{
// Test get all objects and dangling ref handling
QPDFObjectHandle root = pdf.getRoot();
root.replaceKey(
"/Q1",
pdf.makeIndirectObject(QPDFObjectHandle::newString("potato")));
std::cout << "all objects" << std::endl;
std::vector<QPDFObjectHandle> all = pdf.getAllObjects();
for (std::vector<QPDFObjectHandle>::iterator iter = all.begin();
iter != all.end(); ++iter)
{
std::cout << (*iter).unparse() << std::endl;
}
QPDFWriter w(pdf, "a.pdf");
w.setStaticID(true);
w.write();
}
else
{
throw std::runtime_error(std::string("invalid test ") +