Improve qpdf --check

Fix exit status for case of errors without warnings, continue after
errors when possible, add test case for parsing a file with content
stream errors on some but not all pages.
This commit is contained in:
Jay Berkenbilt 2013-01-25 11:08:50 -05:00
parent a7e8b8c789
commit a5d8783f67
6 changed files with 55 additions and 19 deletions

View File

@ -1,3 +1,9 @@
2013-01-25 Jay Berkenbilt <ejb@ql.org>
* qpdf --check was exiting with status 0 in some rare cases even
when errors were found. It now always exits with one of the
document error codes (0 for success, 2 for errors, 3 or warnings).
2013-01-24 Jay Berkenbilt <ejb@ql.org>
* qpdf --check now does syntactic checks all pages' content
@ -5,10 +11,6 @@
errors are still not checked, and there are no plans to add
semantic checks.
* Bug fix: in versions 4.0.0 and 4.0.1, qpdf --check wasn't doing
as full of a check for linearized files as for non-linearized
files.
2013-01-22 Jay Berkenbilt <ejb@ql.org>
* Add QPDFObjectHandle::getTypeCode(). This method returns a

View File

@ -1012,10 +1012,10 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
}
else if (! object.isInitialized())
{
throw std::logic_error(
"INTERNAL ERROR: uninitialized object (token = " +
QUtil::int_to_string(token.getType()) +
", " + token.getValue() + ")");
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
object_description,
input->getLastOffset(),
"parse error while reading object");
}
else
{

View File

@ -1428,7 +1428,11 @@ int main(int argc, char* argv[])
}
if (check)
{
bool okay = false;
// Code below may set okay to false but not to true.
// We assume okay until we prove otherwise but may
// continue to perform additional checks after finding
// errors.
bool okay = true;
std::cout << "checking " << infilename << std::endl;
try
{
@ -1444,8 +1448,11 @@ int main(int argc, char* argv[])
if (pdf.isLinearized())
{
std::cout << "File is linearized\n";
okay = pdf.checkLinearization();
// any errors are reported by checkLinearization().
if (! pdf.checkLinearization())
{
// any errors are reported by checkLinearization()
okay = false;
}
}
else
{
@ -1453,8 +1460,8 @@ int main(int argc, char* argv[])
}
// Write the file no nowhere, uncompressing
// streams. This causes full file traversal
// and decoding of all streams we can decode.
// streams. This causes full file traversal and
// decoding of all streams we can decode.
QPDFWriter w(pdf);
Pl_Discard discard;
w.setOutputPipeline(&discard);
@ -1464,19 +1471,30 @@ int main(int argc, char* argv[])
// Parse all content streams
std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
DiscardContents discard_contents;
int pageno = 0;
for (std::vector<QPDFObjectHandle>::iterator iter =
pages.begin();
iter != pages.end(); ++iter)
{
QPDFObjectHandle::parseContentStream(
(*iter).getKey("/Contents"), &discard_contents);
++pageno;
try
{
QPDFObjectHandle::parseContentStream(
(*iter).getKey("/Contents"),
&discard_contents);
}
catch (QPDFExc& e)
{
okay = false;
std::cout << "page " << pageno << ": "
<< e.what() << std::endl;
}
}
okay = true;
}
catch (std::exception& e)
{
std::cout << e.what() << std::endl;
okay = false;
}
if (okay)
{
@ -1493,6 +1511,10 @@ int main(int argc, char* argv[])
<< std::endl;
}
}
else
{
exit(EXIT_ERROR);
}
}
}
else

View File

@ -199,7 +199,7 @@ $td->runtest("remove page we don't have",
show_ntests();
# ----------
$td->notify("--- Miscellaneous Tests ---");
$n_tests += 59;
$n_tests += 60;
$td->runtest("qpdf version",
{$td->COMMAND => "qpdf --version"},
@ -435,7 +435,7 @@ $td->runtest("EOF terminating literal tokens",
$td->NORMALIZE_NEWLINES);
$td->runtest("EOF reading token",
{$td->COMMAND => "qpdf --check eof-reading-token.pdf"},
{$td->FILE => "eof-reading-token.out", $td->EXIT_STATUS => 0},
{$td->FILE => "eof-reading-token.out", $td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->runtest("extra header text",
{$td->COMMAND => "test_driver 32 minimal.pdf"},
@ -478,6 +478,11 @@ $td->runtest("tokenize content streams",
{$td->FILE => "tokenize-content-streams.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("content stream errors",
{$td->COMMAND => "qpdf --check content-stream-errors.pdf"},
{$td->FILE => "content-stream-errors.out",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------

View File

@ -0,0 +1,7 @@
checking content-stream-errors.pdf
PDF Version: 1.3
File is not encrypted
File is not linearized
page 1: content stream object 7 0 (content, file position 52): parse error while reading object
page 3: content stream object 15 0 (stream data, file position 117): EOF found while reading inline image
page 4: content stream object 19 0 (content, file position 53): parse error while reading object

Binary file not shown.