2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-06-04 19:30:53 +00:00

Properly handle pages with no contents (fixes #194)

Remove calls to assertPageObject(). All cases in the library that
called assertPageObject() work fine if you don't call
assertPageObject() because nothing assumes anything that was being
checked by that call. Removing the calls enables more files to be
successfully processed.
This commit is contained in:
Jay Berkenbilt 2018-03-06 11:25:09 -05:00
parent 1a4dcb4aaf
commit e4e2e26d99
10 changed files with 189 additions and 11 deletions

View File

@ -1,3 +1,7 @@
2018-03-06 Jay Berkenbilt <ejb@ql.org>
* Properly handle pages with no contents. Fixes #194.
2018-03-05 Jay Berkenbilt <ejb@ql.org> 2018-03-05 Jay Berkenbilt <ejb@ql.org>
* Improve handling of loops while following cross reference * Improve handling of loops while following cross reference

View File

@ -830,6 +830,11 @@ class QPDFObjectHandle
QPDF_DLL QPDF_DLL
void assertNumber(); void assertNumber();
// The isPageObject method checks the /Type key of the object.
// This is not completely reliable as there are some otherwise
// valid files whose /Type is wrong for page objects. qpdf is
// slightly more accepting but may still return false here when
// treating the object as a page would work. Use this sparingly.
QPDF_DLL QPDF_DLL
bool isPageObject(); bool isPageObject();
QPDF_DLL QPDF_DLL

View File

@ -932,8 +932,6 @@ QPDFObjectHandle::getGeneration() const
std::map<std::string, QPDFObjectHandle> std::map<std::string, QPDFObjectHandle>
QPDFObjectHandle::getPageImages() QPDFObjectHandle::getPageImages()
{ {
assertPageObject();
// Note: this code doesn't handle inherited resources. If this // Note: this code doesn't handle inherited resources. If this
// page dictionary doesn't have a /Resources key or has one whose // page dictionary doesn't have a /Resources key or has one whose
// value is null or an empty dictionary, you are supposed to walk // value is null or an empty dictionary, you are supposed to walk
@ -1081,7 +1079,6 @@ QPDFObjectHandle::addPageContents(QPDFObjectHandle new_contents, bool first)
void void
QPDFObjectHandle::rotatePage(int angle, bool relative) QPDFObjectHandle::rotatePage(int angle, bool relative)
{ {
assertPageObject();
if ((angle % 90) != 0) if ((angle % 90) != 0)
{ {
throw std::runtime_error( throw std::runtime_error(
@ -1137,7 +1134,6 @@ QPDFObjectHandle::rotatePage(int angle, bool relative)
void void
QPDFObjectHandle::coalesceContentStreams() QPDFObjectHandle::coalesceContentStreams()
{ {
assertPageObject();
QPDFObjectHandle contents = this->getKey("/Contents"); QPDFObjectHandle contents = this->getKey("/Contents");
if (contents.isStream()) if (contents.isStream())
{ {
@ -1218,7 +1214,6 @@ QPDFObjectHandle::parse(std::string const& object_str,
void void
QPDFObjectHandle::pipePageContents(Pipeline* p) QPDFObjectHandle::pipePageContents(Pipeline* p)
{ {
assertPageObject();
std::string description = "page object " + std::string description = "page object " +
QUtil::int_to_string(this->m->objid) + " " + QUtil::int_to_string(this->m->objid) + " " +
QUtil::int_to_string(this->m->generation); QUtil::int_to_string(this->m->generation);
@ -1256,7 +1251,6 @@ QPDFObjectHandle::pipeContentStreams(
void void
QPDFObjectHandle::parsePageContents(ParserCallbacks* callbacks) QPDFObjectHandle::parsePageContents(ParserCallbacks* callbacks)
{ {
assertPageObject();
std::string description = "page object " + std::string description = "page object " +
QUtil::int_to_string(this->m->objid) + " " + QUtil::int_to_string(this->m->objid) + " " +
QUtil::int_to_string(this->m->generation); QUtil::int_to_string(this->m->generation);
@ -1267,7 +1261,6 @@ QPDFObjectHandle::parsePageContents(ParserCallbacks* callbacks)
void void
QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next) QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next)
{ {
assertPageObject();
std::string description = "token filter for page object " + std::string description = "token filter for page object " +
QUtil::int_to_string(this->m->objid) + " " + QUtil::int_to_string(this->m->objid) + " " +
QUtil::int_to_string(this->m->generation); QUtil::int_to_string(this->m->generation);
@ -2222,8 +2215,29 @@ QPDFObjectHandle::assertNumber()
bool bool
QPDFObjectHandle::isPageObject() QPDFObjectHandle::isPageObject()
{ {
// Some PDF files have /Type broken on pages. // See comments in QPDFObjectHandle.hh.
return (this->isDictionary() && this->hasKey("/Contents")); if (! this->isDictionary())
{
return false;
}
if (this->hasKey("/Type"))
{
QPDFObjectHandle type = this->getKey("/Type");
if (type.isName() && (type.getName() == "/Page"))
{
return true;
}
// Files have been seen in the wild that have /Type (Page)
if (type.isString() && (type.getStringValue() == "Page"))
{
return true;
}
}
if (this->hasKey("/Contents"))
{
return true;
}
return false;
} }
bool bool

View File

@ -191,7 +191,6 @@ QPDF::insertPage(QPDFObjectHandle newpage, int pos)
// pos = npages adds to the end. // pos = npages adds to the end.
flattenPagesTree(); flattenPagesTree();
newpage.assertPageObject();
if (! newpage.isIndirect()) if (! newpage.isIndirect())
{ {
@ -288,7 +287,6 @@ QPDF::addPage(QPDFObjectHandle newpage, bool first)
int int
QPDF::findPage(QPDFObjectHandle& page) QPDF::findPage(QPDFObjectHandle& page)
{ {
page.assertPageObject();
return findPage(page.getObjGen()); return findPage(page.getObjGen());
} }

View File

@ -919,6 +919,34 @@ $td->runtest("check output",
{$td->FILE => "a.pdf"}, {$td->FILE => "a.pdf"},
{$td->FILE => "coalesce-out.pdf"}); {$td->FILE => "coalesce-out.pdf"});
show_ntests();
# ----------
$td->notify("--- Page with no contents ---");
$n_tests += 7;
$td->runtest("check no contents",
{$td->COMMAND => "qpdf --check no-contents.pdf"},
{$td->FILE => "no-contents-check.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
foreach my $arg ('--qdf', '--coalesce-contents', '')
{
$td->runtest("convert no contents ($arg)",
{$td->COMMAND =>
"qpdf $arg --static-id no-contents.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
my $suf = $arg;
$suf =~ s/--//;
if ($suf eq '')
{
$suf = "none";
}
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "no-contents-$suf.pdf"});
}
show_ntests(); show_ntests();
# ---------- # ----------
$td->notify("--- Token filters ---"); $td->notify("--- Token filters ---");

View File

@ -0,0 +1,6 @@
checking no-contents.pdf
PDF Version: 1.3
File is not encrypted
File is not linearized
No syntax or stream encoding errors found; the file may still contain
errors that qpdf cannot detect

View File

@ -0,0 +1,27 @@
%PDF-1.3
%¿÷¢þ
1 0 obj
<< /Pages 2 0 R /Type /Catalog >>
endobj
2 0 obj
<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
endobj
3 0 obj
<< /Contents 4 0 R /MediaBox [ 0 0 720 720 ] /Parent 2 0 R /Resources << >> /Type /Page >>
endobj
4 0 obj
<< /Length 0 /Filter /FlateDecode >>
stream
endstream
endobj
xref
0 5
0000000000 65535 f
0000000015 00000 n
0000000064 00000 n
0000000123 00000 n
0000000229 00000 n
trailer << /Root 1 0 R /Size 5 /ID [<52bba3c78160d0c6e851b59110e5d076><31415926535897932384626433832795>] >>
startxref
298
%%EOF

View File

@ -0,0 +1,21 @@
%PDF-1.3
%¿÷¢þ
1 0 obj
<< /Pages 2 0 R /Type /Catalog >>
endobj
2 0 obj
<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
endobj
3 0 obj
<< /MediaBox [ 0 0 720 720 ] /Parent 2 0 R /Resources << >> /Type /Page >>
endobj
xref
0 4
0000000000 65535 f
0000000015 00000 n
0000000064 00000 n
0000000123 00000 n
trailer << /Root 1 0 R /Size 4 /ID [<52bba3c78160d0c6e851b59110e5d076><31415926535897932384626433832795>] >>
startxref
213
%%EOF

View File

@ -0,0 +1,54 @@
%PDF-1.3
%¿÷¢þ
%QDF-1.0
%% Original object ID: 1 0
1 0 obj
<<
/Pages 2 0 R
/Type /Catalog
>>
endobj
%% Original object ID: 2 0
2 0 obj
<<
/Count 1
/Kids [
3 0 R
]
/Type /Pages
>>
endobj
%% Page 1
%% Original object ID: 3 0
3 0 obj
<<
/MediaBox [
0
0
720
720
]
/Parent 2 0 R
/Resources <<
>>
/Type /Page
>>
endobj
xref
0 4
0000000000 65535 f
0000000052 00000 n
0000000133 00000 n
0000000242 00000 n
trailer <<
/Root 1 0 R
/Size 4
/ID [<52bba3c78160d0c6e851b59110e5d076><31415926535897932384626433832795>]
>>
startxref
361
%%EOF

View File

@ -0,0 +1,21 @@
%PDF-1.3
%¿÷¢þ
1 0 obj
<< /Pages 2 0 R /Type /Catalog >>
endobj
2 0 obj
<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
endobj
3 0 obj
<< /MediaBox [ 0 0 720 720 ] /Parent 2 0 R /Resources << >> /Type /Page >>
endobj
xref
0 4
0000000000 65535 f
0000000015 00000 n
0000000064 00000 n
0000000123 00000 n
trailer << /Root 1 0 R /Size 4 /ID [<52bba3c78160d0c6e851b59110e5d076><52bba3c78160d0c6e851b59110e5d076>] >>
startxref
213
%%EOF