2
1
mirror of https://github.com/qpdf/qpdf.git synced 2025-01-02 22:50:20 +00:00

In Xref_table track (maximum possible) length of uncompressed objects

Use start of next object or xref table to put an upper bound on object
length.

Tests reuse existing incremental-1.pdf. incremental-1-bad.pdf is
incremental-1.pdf with whitespace inserted after object 1 0.
This commit is contained in:
m-holger 2024-10-24 11:34:00 +01:00
parent e8849ebe35
commit 413e8acd4f
11 changed files with 448 additions and 6 deletions

View File

@ -737,6 +737,7 @@ class QPDF
// For testing only -- do not add to DLL // For testing only -- do not add to DLL
static bool test_json_validators(); static bool test_json_validators();
void test_xref();
private: private:
// It has never been safe to copy QPDF objects as there is code in the library that assumes // It has never been safe to copy QPDF objects as there is code in the library that assumes

View File

@ -1133,3 +1133,9 @@ QPDF::removeSecurityRestrictions()
acroform.replaceKey("/SigFlags", QPDFObjectHandle::newInteger(0)); acroform.replaceKey("/SigFlags", QPDFObjectHandle::newInteger(0));
} }
} }
void
QPDF::test_xref()
{
objects().xref_table().test();
}

View File

@ -2,6 +2,7 @@
#include <qpdf/QPDF_private.hh> #include <qpdf/QPDF_private.hh>
#include <algorithm>
#include <array> #include <array>
#include <cstring> #include <cstring>
#include <limits> #include <limits>
@ -89,6 +90,21 @@ namespace
}; };
} // namespace } // namespace
void
Xref_table::test()
{
std::cout << "id, gen, offset, length, next\n";
int i = 0;
for (auto const& entry: table) {
if (entry.type() == 1) {
std::cout << i << ", " << entry.gen() << ", " << entry.type() << ", " << entry.offset()
<< ", " << entry.length() << ", " << (entry.offset() + toO(entry.length()))
<< '\n';
}
++i;
}
}
bool bool
QPDF::findStartxref() QPDF::findStartxref()
{ {
@ -143,6 +159,7 @@ Xref_table::initialize()
PatternFinder sf(qpdf, &QPDF::findStartxref); PatternFinder sf(qpdf, &QPDF::findStartxref);
qpdf_offset_t xref_offset = 0; qpdf_offset_t xref_offset = 0;
if (file->findLast("startxref", start_offset, 0, sf)) { if (file->findLast("startxref", start_offset, 0, sf)) {
offsets.emplace_back(file->tell(), 0);
xref_offset = QUtil::string_to_ll(read_token().getValue().c_str()); xref_offset = QUtil::string_to_ll(read_token().getValue().c_str());
} }
@ -167,10 +184,30 @@ Xref_table::initialize()
} }
} }
calc_lengths();
prepare_obj_table(); prepare_obj_table();
initialized_ = true; initialized_ = true;
} }
void
Xref_table::calc_lengths()
{
if (offsets.size() > 1) {
std::sort(offsets.begin(), offsets.end());
size_t id = 0;
auto end = table.size();
qpdf_offset_t offset = 0;
for (auto const& item: offsets) {
if (id && id < end) {
table[id].length_ = toS(item.first - offset);
}
offset = item.first;
id = item.second;
}
}
offsets.clear();
}
// Remove any dangling reference picked up while parsing or reconstructing the xref table from the // Remove any dangling reference picked up while parsing or reconstructing the xref table from the
// object table. // object table.
void void
@ -234,6 +271,7 @@ Xref_table::reconstruct(QPDFExc& e)
file->seek(0, SEEK_END); file->seek(0, SEEK_END);
qpdf_offset_t eof = file->tell(); qpdf_offset_t eof = file->tell();
offsets.emplace_back(eof, 0);
file->seek(0, SEEK_SET); file->seek(0, SEEK_SET);
// Don't allow very long tokens here during recovery. All the interesting tokens are covered. // Don't allow very long tokens here during recovery. All the interesting tokens are covered.
static size_t const MAX_LEN = 10; static size_t const MAX_LEN = 10;
@ -256,8 +294,13 @@ Xref_table::reconstruct(QPDFExc& e)
} }
} }
file->seek(pos, SEEK_SET); file->seek(pos, SEEK_SET);
} else if (!trailer_ && t1.isWord("trailer")) { } else if (t1.isWord("trailer")) {
trailers.emplace_back(file->tell()); offsets.emplace_back(token_start, 0);
if (!trailer_) {
trailers.emplace_back(file->tell());
}
} else if (t1.isWord("xref")) {
offsets.emplace_back(token_start, 0);
} }
file->findAndSkipNextEOL(); file->findAndSkipNextEOL();
} }
@ -280,8 +323,9 @@ Xref_table::reconstruct(QPDFExc& e)
for (auto it = found_objects.rbegin(); it != rend; it++) { for (auto it = found_objects.rbegin(); it != rend; it++) {
auto [obj, gen, token_start] = *it; auto [obj, gen, token_start] = *it;
insert(obj, 1, token_start, gen); insert(obj, 1, token_start, gen);
check_warnings();
} }
calc_lengths();
check_warnings();
if (!trailer_) { if (!trailer_) {
qpdf_offset_t max_offset{0}; qpdf_offset_t max_offset{0};
@ -401,6 +445,7 @@ Xref_table::read(qpdf_offset_t xref_offset)
while (QUtil::is_space(buf[skip])) { while (QUtil::is_space(buf[skip])) {
++skip; ++skip;
} }
offsets.emplace_back(xref_offset, 0);
xref_offset = process_section(xref_offset + skip); xref_offset = process_section(xref_offset + skip);
} else { } else {
xref_offset = read_stream(xref_offset); xref_offset = read_stream(xref_offset);
@ -1037,6 +1082,11 @@ Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
// entry. This will need to be revisited when we want to support incremental updates or more // entry. This will need to be revisited when we want to support incremental updates or more
// comprehensive checking. // comprehensive checking.
QTC::TC("qpdf", "QPDF xref replaced / deleted object", old_type == 0 ? 0 : 1); QTC::TC("qpdf", "QPDF xref replaced / deleted object", old_type == 0 ? 0 : 1);
if (f0 == 1) {
// Save offset of deleted/replaced object to allow us to calculate object length once we
// are finished loading the xref table.
offsets.emplace_back(f1, 0);
}
return; return;
} }
@ -1051,12 +1101,13 @@ Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
// f2 is generation // f2 is generation
QTC::TC("qpdf", "QPDF xref gen > 0", (f2 > 0) ? 1 : 0); QTC::TC("qpdf", "QPDF xref gen > 0", (f2 > 0) ? 1 : 0);
entry = {f2, Uncompressed(f1)}; entry = {f2, Uncompressed(f1)};
break; offsets.emplace_back(f1, static_cast<size_t>(obj));
return;
case 2: case 2:
entry = {0, Compressed(toI(f1), f2)}; entry = {0, Compressed(toI(f1), f2)};
object_streams_ = true; object_streams_ = true;
break; return;
default: default:
throw qpdf.damagedPDF( throw qpdf.damagedPDF(

View File

@ -228,6 +228,8 @@ class QPDF::Objects
return first_item_offset_; return first_item_offset_;
} }
void test();
private: private:
// Object, count, offset of first entry // Object, count, offset of first entry
typedef std::tuple<int, int, qpdf_offset_t> Subsection; typedef std::tuple<int, int, qpdf_offset_t> Subsection;
@ -294,7 +296,14 @@ class QPDF::Objects
return type() == 2 ? std::get<2>(entry).stream_index : 0; return type() == 2 ? std::get<2>(entry).stream_index : 0;
} }
size_t
length() const noexcept
{
return length_;
}
int gen_{0}; int gen_{0};
size_t length_{0}; // For uncompressed objects.
Xref entry; Xref entry;
qpdf_offset_t end_before_space_{0}; qpdf_offset_t end_before_space_{0};
qpdf_offset_t end_after_space_{0}; qpdf_offset_t end_after_space_{0};
@ -314,6 +323,7 @@ class QPDF::Objects
} }
void read(qpdf_offset_t offset); void read(qpdf_offset_t offset);
void calc_lengths();
void prepare_obj_table(); void prepare_obj_table();
// Methods to parse tables // Methods to parse tables
@ -376,6 +386,9 @@ class QPDF::Objects
std::vector<Entry> table; std::vector<Entry> table;
QPDFObjectHandle trailer_; QPDFObjectHandle trailer_;
// Temporary offset table used to calculate uncompressed object length.
std::vector<std::pair<qpdf_offset_t, size_t>> offsets;
bool attempt_recovery_{true}; bool attempt_recovery_{true};
bool initialized_{false}; bool initialized_{false};
bool ignore_streams_{false}; bool ignore_streams_{false};

View File

@ -34,7 +34,8 @@ set(TEST_PROGRAMS
rc4 rc4
runlength runlength
sha2 sha2
sparse_array) sparse_array
xref)
set(TEST_C_PROGRAMS set(TEST_C_PROGRAMS
logger_c) logger_c)

34
libtests/qtest/xref.test Normal file
View File

@ -0,0 +1,34 @@
#!/usr/bin/env perl
require 5.008;
use warnings;
use strict;
unshift(@INC, '.');
chdir("xref") or die "chdir testdir failed: $!\n";
require TestDriver;
my $td = new TestDriver('xref');
my @files = ("incremental-1",
"incremental-1-bad",
);
foreach my $file (@files)
{
$td->runtest("xref $file",
{$td->COMMAND => "xref $file.pdf"},
{$td->FILE => "$file.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
cleanup();
$td->report(scalar(@files));
sub cleanup
{
unlink "tmp";
}

View File

@ -0,0 +1,12 @@
WARNING: incremental-1-bad.pdf: file is damaged
WARNING: incremental-1-bad.pdf (offset 1241): xref not found
WARNING: incremental-1-bad.pdf: Attempting to reconstruct cross-reference table
id, gen, offset, length, next
1, 0, 1, 9, 93, 102
2, 0, 1, 102, 72, 174
3, 0, 1, 1108, 172, 1280
4, 1, 1, 987, 26, 1013
5, 0, 1, 442, 35, 477
6, 0, 1, 477, 118, 595
7, 0, 1, 1013, 95, 1108
xref done

View File

@ -0,0 +1,145 @@
%PDF-1.3
1 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
2 0 obj
<<
/Type /Pages
/Kids [
3 0 R
]
/Count 1
>>
endobj
3 0 obj
<<
/Type /Page
/Parent 2 0 R
/MediaBox [0 0 612 792]
/Contents 4 0 R
/Resources <<
/ProcSet 5 0 R
/Font <<
/F1 6 0 R
>>
>>
>>
endobj
4 0 obj
<<
/Length 44
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
5 0 obj
[
/PDF
/Text
]
endobj
6 0 obj
<<
/Type /Font
/Subtype /Type1
/Name /F1
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
>>
endobj
xref
0 7
0000000000 65535 f
0000000009 00000 n
0000000063 00000 n
0000000135 00000 n
0000000307 00000 n
0000000403 00000 n
0000000438 00000 n
trailer <<
/Size 7
/Root 1 0 R
>>
startxref
556
%%EOF
% Delete object 4 and increment generation
xref
0 1
0000000004 65535 f
4 1
0000000000 00001 f
trailer <<
/Size 7
/Root 1 0 R
/Prev 556
>>
startxref
807
%%EOF
% Reuse object 4
4 1 obj
[ 7 0 R ]
endobj
7 0 obj
<<
/Length 43
>>
stream
BT
/F1 24 Tf
72 720 Td
(Salad) Tj
ET
endstream
endobj
3 0 obj
<<
/Type /Page
/Parent 2 0 R
/MediaBox [0 0 612 792]
/Contents 4 1 R
/Resources <<
/ProcSet 5 0 R
/Font <<
/F1 6 0 R
>>
>>
>>
endobj
xref
0 1
0000000000 65535 f
3 2
0000001069 00000 n
0000000948 00001 n
7 1
0000000974 00000 n
trailer <<
/Size 8
/Root 1 0 R
/Prev 807
/Gone 4 0 R
>>
startxref
1241
%%EOF

View File

@ -0,0 +1,9 @@
id, gen, offset, length, next
1, 0, 1, 9, 54, 63
2, 0, 1, 63, 72, 135
3, 0, 1, 1069, 172, 1241
4, 1, 1, 948, 26, 974
5, 0, 1, 403, 35, 438
6, 0, 1, 438, 118, 556
7, 0, 1, 974, 95, 1069
xref done

View File

@ -0,0 +1,145 @@
%PDF-1.3
1 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
2 0 obj
<<
/Type /Pages
/Kids [
3 0 R
]
/Count 1
>>
endobj
3 0 obj
<<
/Type /Page
/Parent 2 0 R
/MediaBox [0 0 612 792]
/Contents 4 0 R
/Resources <<
/ProcSet 5 0 R
/Font <<
/F1 6 0 R
>>
>>
>>
endobj
4 0 obj
<<
/Length 44
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
5 0 obj
[
/PDF
/Text
]
endobj
6 0 obj
<<
/Type /Font
/Subtype /Type1
/Name /F1
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
>>
endobj
xref
0 7
0000000000 65535 f
0000000009 00000 n
0000000063 00000 n
0000000135 00000 n
0000000307 00000 n
0000000403 00000 n
0000000438 00000 n
trailer <<
/Size 7
/Root 1 0 R
>>
startxref
556
%%EOF
% Delete object 4 and increment generation
xref
0 1
0000000004 65535 f
4 1
0000000000 00001 f
trailer <<
/Size 7
/Root 1 0 R
/Prev 556
>>
startxref
807
%%EOF
% Reuse object 4
4 1 obj
[ 7 0 R ]
endobj
7 0 obj
<<
/Length 43
>>
stream
BT
/F1 24 Tf
72 720 Td
(Salad) Tj
ET
endstream
endobj
3 0 obj
<<
/Type /Page
/Parent 2 0 R
/MediaBox [0 0 612 792]
/Contents 4 1 R
/Resources <<
/ProcSet 5 0 R
/Font <<
/F1 6 0 R
>>
>>
>>
endobj
xref
0 1
0000000000 65535 f
3 2
0000001069 00000 n
0000000948 00001 n
7 1
0000000974 00000 n
trailer <<
/Size 8
/Root 1 0 R
/Prev 807
/Gone 4 0 R
>>
startxref
1241
%%EOF

25
libtests/xref.cc Normal file
View File

@ -0,0 +1,25 @@
#include <qpdf/QPDF.hh>
#include <cstdlib>
#include <iostream>
#include <map>
int
main(int argc, char* argv[])
{
if (argc != 2) {
std::cerr << "usage: xref INFILE\n";
std::exit(2);
}
try {
QPDF qpdf;
qpdf.processFile(argv[1]);
qpdf.test_xref();
} catch (std::exception& e) {
std::cerr << e.what() << '\n';
std::exit(2);
}
std::cout << "xref done\n";
return 0;
}