2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-06-04 03:10:52 +00:00

Improve qpdf --check

Fix exit status for case of errors without warnings, continue after
errors when possible, add test case for parsing a file with content
stream errors on some but not all pages.
This commit is contained in:
Jay Berkenbilt 2013-01-25 11:08:50 -05:00
parent a7e8b8c789
commit a5d8783f67
6 changed files with 55 additions and 19 deletions

View File

@ -1,3 +1,9 @@
2013-01-25 Jay Berkenbilt <ejb@ql.org>
* qpdf --check was exiting with status 0 in some rare cases even
when errors were found. It now always exits with one of the
document error codes (0 for success, 2 for errors, 3 or warnings).
2013-01-24 Jay Berkenbilt <ejb@ql.org> 2013-01-24 Jay Berkenbilt <ejb@ql.org>
* qpdf --check now does syntactic checks all pages' content * qpdf --check now does syntactic checks all pages' content
@ -5,10 +11,6 @@
errors are still not checked, and there are no plans to add errors are still not checked, and there are no plans to add
semantic checks. semantic checks.
* Bug fix: in versions 4.0.0 and 4.0.1, qpdf --check wasn't doing
as full of a check for linearized files as for non-linearized
files.
2013-01-22 Jay Berkenbilt <ejb@ql.org> 2013-01-22 Jay Berkenbilt <ejb@ql.org>
* Add QPDFObjectHandle::getTypeCode(). This method returns a * Add QPDFObjectHandle::getTypeCode(). This method returns a

View File

@ -1012,10 +1012,10 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
} }
else if (! object.isInitialized()) else if (! object.isInitialized())
{ {
throw std::logic_error( throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
"INTERNAL ERROR: uninitialized object (token = " + object_description,
QUtil::int_to_string(token.getType()) + input->getLastOffset(),
", " + token.getValue() + ")"); "parse error while reading object");
} }
else else
{ {

View File

@ -1428,7 +1428,11 @@ int main(int argc, char* argv[])
} }
if (check) if (check)
{ {
bool okay = false; // Code below may set okay to false but not to true.
// We assume okay until we prove otherwise but may
// continue to perform additional checks after finding
// errors.
bool okay = true;
std::cout << "checking " << infilename << std::endl; std::cout << "checking " << infilename << std::endl;
try try
{ {
@ -1444,8 +1448,11 @@ int main(int argc, char* argv[])
if (pdf.isLinearized()) if (pdf.isLinearized())
{ {
std::cout << "File is linearized\n"; std::cout << "File is linearized\n";
okay = pdf.checkLinearization(); if (! pdf.checkLinearization())
// any errors are reported by checkLinearization(). {
// any errors are reported by checkLinearization()
okay = false;
}
} }
else else
{ {
@ -1453,8 +1460,8 @@ int main(int argc, char* argv[])
} }
// Write the file no nowhere, uncompressing // Write the file no nowhere, uncompressing
// streams. This causes full file traversal // streams. This causes full file traversal and
// and decoding of all streams we can decode. // decoding of all streams we can decode.
QPDFWriter w(pdf); QPDFWriter w(pdf);
Pl_Discard discard; Pl_Discard discard;
w.setOutputPipeline(&discard); w.setOutputPipeline(&discard);
@ -1464,19 +1471,30 @@ int main(int argc, char* argv[])
// Parse all content streams // Parse all content streams
std::vector<QPDFObjectHandle> pages = pdf.getAllPages(); std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
DiscardContents discard_contents; DiscardContents discard_contents;
int pageno = 0;
for (std::vector<QPDFObjectHandle>::iterator iter = for (std::vector<QPDFObjectHandle>::iterator iter =
pages.begin(); pages.begin();
iter != pages.end(); ++iter) iter != pages.end(); ++iter)
{ {
QPDFObjectHandle::parseContentStream( ++pageno;
(*iter).getKey("/Contents"), &discard_contents); try
{
QPDFObjectHandle::parseContentStream(
(*iter).getKey("/Contents"),
&discard_contents);
}
catch (QPDFExc& e)
{
okay = false;
std::cout << "page " << pageno << ": "
<< e.what() << std::endl;
}
} }
okay = true;
} }
catch (std::exception& e) catch (std::exception& e)
{ {
std::cout << e.what() << std::endl; std::cout << e.what() << std::endl;
okay = false;
} }
if (okay) if (okay)
{ {
@ -1493,6 +1511,10 @@ int main(int argc, char* argv[])
<< std::endl; << std::endl;
} }
} }
else
{
exit(EXIT_ERROR);
}
} }
} }
else else

View File

@ -199,7 +199,7 @@ $td->runtest("remove page we don't have",
show_ntests(); show_ntests();
# ---------- # ----------
$td->notify("--- Miscellaneous Tests ---"); $td->notify("--- Miscellaneous Tests ---");
$n_tests += 59; $n_tests += 60;
$td->runtest("qpdf version", $td->runtest("qpdf version",
{$td->COMMAND => "qpdf --version"}, {$td->COMMAND => "qpdf --version"},
@ -435,7 +435,7 @@ $td->runtest("EOF terminating literal tokens",
$td->NORMALIZE_NEWLINES); $td->NORMALIZE_NEWLINES);
$td->runtest("EOF reading token", $td->runtest("EOF reading token",
{$td->COMMAND => "qpdf --check eof-reading-token.pdf"}, {$td->COMMAND => "qpdf --check eof-reading-token.pdf"},
{$td->FILE => "eof-reading-token.out", $td->EXIT_STATUS => 0}, {$td->FILE => "eof-reading-token.out", $td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES); $td->NORMALIZE_NEWLINES);
$td->runtest("extra header text", $td->runtest("extra header text",
{$td->COMMAND => "test_driver 32 minimal.pdf"}, {$td->COMMAND => "test_driver 32 minimal.pdf"},
@ -478,6 +478,11 @@ $td->runtest("tokenize content streams",
{$td->FILE => "tokenize-content-streams.out", {$td->FILE => "tokenize-content-streams.out",
$td->EXIT_STATUS => 0}, $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES); $td->NORMALIZE_NEWLINES);
$td->runtest("content stream errors",
{$td->COMMAND => "qpdf --check content-stream-errors.pdf"},
{$td->FILE => "content-stream-errors.out",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
show_ntests(); show_ntests();
# ---------- # ----------

View File

@ -0,0 +1,7 @@
checking content-stream-errors.pdf
PDF Version: 1.3
File is not encrypted
File is not linearized
page 1: content stream object 7 0 (content, file position 52): parse error while reading object
page 3: content stream object 15 0 (stream data, file position 117): EOF found while reading inline image
page 4: content stream object 19 0 (content, file position 53): parse error while reading object

Binary file not shown.