Allow --check to coexist with and precede other operations (fixes #42)

This commit is contained in:
Jay Berkenbilt 2017-07-29 19:56:21 -04:00
parent 570db9b60b
commit 885b8781cc
6 changed files with 243 additions and 93 deletions

View File

@ -1,5 +1,15 @@
2017-07-29 Jay Berkenbilt <ejb@ql.org>
* When passing multiple inspection arguments, run --check first,
and defer exit until after all the checks have been run. This
makes it possible to force operations such as --show-xref to be
delayed until after recovery attempts have been made. For example,
if you have a file with a syntactically valid xref table that has
some offsets that are incorrect, running qpdf --check --show-xref
on that file will first recover the xref and the dump the
recovered xref, while just running qpdf --show-xref will show the
xref table as present in the file. Fixes #42.
* When recovering stream length, indicate the recovered length.
Fixes #44.

View File

@ -1383,6 +1383,97 @@ int main(int argc, char* argv[])
}
if (outfilename == 0)
{
int exit_code = 0;
if (check)
{
// Code below may set okay to false but not to true.
// We assume okay until we prove otherwise but may
// continue to perform additional checks after finding
// errors.
bool okay = true;
std::cout << "checking " << infilename << std::endl;
try
{
int extension_level = pdf.getExtensionLevel();
std::cout << "PDF Version: " << pdf.getPDFVersion();
if (extension_level > 0)
{
std::cout << " extension level "
<< pdf.getExtensionLevel();
}
std::cout << std::endl;
::show_encryption(pdf);
if (pdf.isLinearized())
{
std::cout << "File is linearized\n";
if (! pdf.checkLinearization())
{
// any errors are reported by checkLinearization()
okay = false;
}
}
else
{
std::cout << "File is not linearized\n";
}
// Write the file no nowhere, uncompressing
// streams. This causes full file traversal and
// decoding of all streams we can decode.
QPDFWriter w(pdf);
Pl_Discard discard;
w.setOutputPipeline(&discard);
w.setStreamDataMode(qpdf_s_uncompress);
w.write();
// Parse all content streams
std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
DiscardContents discard_contents;
int pageno = 0;
for (std::vector<QPDFObjectHandle>::iterator iter =
pages.begin();
iter != pages.end(); ++iter)
{
++pageno;
try
{
QPDFObjectHandle::parseContentStream(
(*iter).getKey("/Contents"),
&discard_contents);
}
catch (QPDFExc& e)
{
okay = false;
std::cout << "page " << pageno << ": "
<< e.what() << std::endl;
}
}
}
catch (std::exception& e)
{
std::cout << e.what() << std::endl;
okay = false;
}
if (okay)
{
if (! pdf.getWarnings().empty())
{
exit_code = EXIT_WARNING;
}
else
{
std::cout << "No syntax or stream encoding errors"
<< " found; the file may still contain"
<< std::endl
<< "errors that qpdf cannot detect"
<< std::endl;
}
}
else
{
exit_code = EXIT_ERROR;
}
}
if (show_npages)
{
QTC::TC("qpdf", "qpdf npages");
@ -1402,7 +1493,7 @@ int main(int argc, char* argv[])
}
else
{
exit(EXIT_ERROR);
exit_code = EXIT_ERROR;
}
}
if (show_linearization)
@ -1435,7 +1526,7 @@ int main(int argc, char* argv[])
QTC::TC("qpdf", "qpdf unable to filter");
std::cerr << "Unable to filter stream data."
<< std::endl;
exit(EXIT_ERROR);
exit_code = EXIT_ERROR;
}
else
{
@ -1512,96 +1603,10 @@ int main(int argc, char* argv[])
}
}
}
if (check)
{
// Code below may set okay to false but not to true.
// We assume okay until we prove otherwise but may
// continue to perform additional checks after finding
// errors.
bool okay = true;
std::cout << "checking " << infilename << std::endl;
try
{
int extension_level = pdf.getExtensionLevel();
std::cout << "PDF Version: " << pdf.getPDFVersion();
if (extension_level > 0)
{
std::cout << " extension level "
<< pdf.getExtensionLevel();
}
std::cout << std::endl;
::show_encryption(pdf);
if (pdf.isLinearized())
{
std::cout << "File is linearized\n";
if (! pdf.checkLinearization())
{
// any errors are reported by checkLinearization()
okay = false;
}
}
else
{
std::cout << "File is not linearized\n";
}
// Write the file no nowhere, uncompressing
// streams. This causes full file traversal and
// decoding of all streams we can decode.
QPDFWriter w(pdf);
Pl_Discard discard;
w.setOutputPipeline(&discard);
w.setStreamDataMode(qpdf_s_uncompress);
w.write();
// Parse all content streams
std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
DiscardContents discard_contents;
int pageno = 0;
for (std::vector<QPDFObjectHandle>::iterator iter =
pages.begin();
iter != pages.end(); ++iter)
{
++pageno;
try
{
QPDFObjectHandle::parseContentStream(
(*iter).getKey("/Contents"),
&discard_contents);
}
catch (QPDFExc& e)
{
okay = false;
std::cout << "page " << pageno << ": "
<< e.what() << std::endl;
}
}
}
catch (std::exception& e)
{
std::cout << e.what() << std::endl;
okay = false;
}
if (okay)
{
if (! pdf.getWarnings().empty())
{
exit(EXIT_WARNING);
}
else
{
std::cout << "No syntax or stream encoding errors"
<< " found; the file may still contain"
<< std::endl
<< "errors that qpdf cannot detect"
<< std::endl;
}
}
else
{
exit(EXIT_ERROR);
}
}
if (exit_code)
{
exit(exit_code);
}
}
else
{

View File

@ -206,7 +206,7 @@ $td->runtest("remove page we don't have",
show_ntests();
# ----------
$td->notify("--- Miscellaneous Tests ---");
$n_tests += 91;
$n_tests += 93;
$td->runtest("qpdf version",
{$td->COMMAND => "qpdf --version"},
@ -628,6 +628,19 @@ $td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "newline-before-endstream.pdf"});
# Demonstrate show-xref after check and not after check to illustrate
# that it can dump the real xref or the recovered xref.
$td->runtest("dump bad xref",
{$td->COMMAND => "qpdf --show-xref bad-xref-entry.pdf"},
{$td->FILE => "bad-xref-entry.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("dump corrected bad xref",
{$td->COMMAND => "qpdf --check --show-xref bad-xref-entry.pdf"},
{$td->FILE => "bad-xref-entry-corrected.out",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------

View File

@ -0,0 +1,14 @@
checking bad-xref-entry.pdf
PDF Version: 1.3
File is not encrypted
File is not linearized
WARNING: bad-xref-entry.pdf: file is damaged
WARNING: bad-xref-entry.pdf (object 5 0, file position 580): expected n n obj
WARNING: bad-xref-entry.pdf: Attempting to reconstruct cross-reference table
1/0: uncompressed; offset = 52
2/0: uncompressed; offset = 133
3/0: uncompressed; offset = 242
4/0: uncompressed; offset = 484
5/0: uncompressed; offset = 583
6/0: uncompressed; offset = 629
7/0: uncompressed; offset = 774

View File

@ -0,0 +1,7 @@
1/0: uncompressed; offset = 52
2/0: uncompressed; offset = 133
3/0: uncompressed; offset = 242
4/0: uncompressed; offset = 484
5/0: uncompressed; offset = 580
6/0: uncompressed; offset = 629
7/0: uncompressed; offset = 774

View File

@ -0,0 +1,101 @@
%PDF-1.3
%¿÷¢þ
%QDF-1.0
%% Original object ID: 1 0
1 0 obj
<<
/Pages 2 0 R
/Type /Catalog
>>
endobj
%% Original object ID: 2 0
2 0 obj
<<
/Count 1
/Kids [
3 0 R
]
/Type /Pages
>>
endobj
%% Page 1
%% Original object ID: 3 0
3 0 obj
<<
/Contents 4 0 R
/MediaBox [
0
0
612
792
]
/Parent 2 0 R
/Resources <<
/Font <<
/F1 6 0 R
>>
/ProcSet 7 0 R
>>
/Type /Page
>>
endobj
%% Contents for page 1
%% Original object ID: 4 0
4 0 obj
<<
/Length 5 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
5 0 obj
44
endobj
%% Original object ID: 6 0
6 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
%% Original object ID: 5 0
7 0 obj
[
/PDF
/Text
]
endobj
xref
0 8
0000000000 65535 f
0000000052 00000 n
0000000133 00000 n
0000000242 00000 n
0000000484 00000 n
0000000580 00000 n
0000000629 00000 n
0000000774 00000 n
trailer <<
/Root 1 0 R
/Size 8
/ID [<2e68fbddcf3742fa64db89e66acd25d9><2e68fbddcf3742fa64db89e66acd25d9>]
>>
startxref
809
%%EOF