mirror of
https://github.com/qpdf/qpdf.git
synced 2024-06-01 01:40:51 +00:00
Additional checks for unreferenced resources
Explicitly abandon removal of unreferenced resources if there are any lexical errors in the page's contents. This case always generated a warning, but it now also prevents removal of unreferenced resources, this strongly decreasing the likelihood of data loss.
This commit is contained in:
parent
e09ae710dc
commit
5cfcd4f361
|
@ -99,11 +99,16 @@ QPDFPageObjectHelper::addContentTokenFilter(
|
|||
class NameWatcher: public QPDFObjectHandle::TokenFilter
|
||||
{
|
||||
public:
|
||||
NameWatcher() :
|
||||
saw_bad(false)
|
||||
{
|
||||
}
|
||||
virtual ~NameWatcher()
|
||||
{
|
||||
}
|
||||
virtual void handleToken(QPDFTokenizer::Token const&);
|
||||
std::set<std::string> names;
|
||||
bool saw_bad;
|
||||
};
|
||||
|
||||
void
|
||||
|
@ -116,6 +121,10 @@ NameWatcher::handleToken(QPDFTokenizer::Token const& token)
|
|||
this->names.insert(
|
||||
QPDFObjectHandle::newName(token.getValue()).getName());
|
||||
}
|
||||
else if (token.getType() == QPDFTokenizer::tt_bad)
|
||||
{
|
||||
saw_bad = true;
|
||||
}
|
||||
writeToken(token);
|
||||
}
|
||||
|
||||
|
@ -134,6 +143,14 @@ QPDFPageObjectHelper::removeUnreferencedResources()
|
|||
"; not attempting to remove unreferenced objects from this page");
|
||||
return;
|
||||
}
|
||||
if (nw.saw_bad)
|
||||
{
|
||||
QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names");
|
||||
this->oh.warnIfPossible(
|
||||
"Bad token found while scanning content stream; "
|
||||
"not attempting to remove unreferenced objects from this page");
|
||||
return;
|
||||
}
|
||||
// Walk through /Font and /XObject dictionaries, removing any
|
||||
// resources that are not referenced. We must make copies of
|
||||
// resource dictionaries down into the dictionaries are mutating
|
||||
|
|
|
@ -412,3 +412,4 @@ QPDF copy foreign stream with provider 0
|
|||
QPDF copy foreign stream with buffer 0
|
||||
QPDF immediate copy stream data 0
|
||||
qpdf copy same page more than once 1
|
||||
QPDFPageObjectHelper bad token finding names 0
|
||||
|
|
|
@ -1384,7 +1384,7 @@ my @sp_cases = (
|
|||
[11, 'pdf extension', '', 'split-out.Pdf'],
|
||||
[4, 'fallback', '--pages 11-pages.pdf 1-3 minimal.pdf --', 'split-out'],
|
||||
);
|
||||
$n_tests += 21;
|
||||
$n_tests += 23;
|
||||
for (@sp_cases)
|
||||
{
|
||||
$n_tests += 1 + $_->[0];
|
||||
|
@ -1482,10 +1482,20 @@ $td->runtest("split shared font, xobject",
|
|||
foreach my $i (qw(1 2 3 4))
|
||||
{
|
||||
$td->runtest("check output ($i)",
|
||||
{$td->FILE => "shared-font-xobject-split-$i.pdf"},
|
||||
{$td->FILE => "split-out-shared-font-xobject-$i.pdf"});
|
||||
{$td->FILE => "split-out-shared-font-xobject-$i.pdf"},
|
||||
{$td->FILE => "shared-font-xobject-split-$i.pdf"});
|
||||
}
|
||||
|
||||
$td->runtest("unreferenced resources with bad token",
|
||||
{$td->COMMAND =>
|
||||
"qpdf --qdf --static-id --split-pages=2" .
|
||||
" coalesce.pdf split-out-bad-token.pdf"},
|
||||
{$td->FILE => "coalesce-split.out", $td->EXIT_STATUS => 3},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
$td->runtest("check output",
|
||||
{$td->FILE => "split-out-bad-token-1-2.pdf"},
|
||||
{$td->FILE => "coalesce-split-1-2.pdf"});
|
||||
|
||||
show_ntests();
|
||||
# ----------
|
||||
$td->notify("--- Keep Files Open ---");
|
||||
|
|
231
qpdf/qtest/qpdf/coalesce-split-1-2.pdf
Normal file
231
qpdf/qtest/qpdf/coalesce-split-1-2.pdf
Normal file
|
@ -0,0 +1,231 @@
|
|||
%PDF-1.3
|
||||
%¿÷¢þ
|
||||
%QDF-1.0
|
||||
|
||||
%% Original object ID: 1 0
|
||||
1 0 obj
|
||||
<<
|
||||
/Pages 2 0 R
|
||||
/Type /Catalog
|
||||
>>
|
||||
endobj
|
||||
|
||||
%% Original object ID: 2 0
|
||||
2 0 obj
|
||||
<<
|
||||
/Count 2
|
||||
/Kids [
|
||||
3 0 R
|
||||
4 0 R
|
||||
]
|
||||
/Type /Pages
|
||||
>>
|
||||
endobj
|
||||
|
||||
%% Page 1
|
||||
%% Original object ID: 3 0
|
||||
3 0 obj
|
||||
<<
|
||||
/Contents [
|
||||
5 0 R
|
||||
7 0 R
|
||||
9 0 R
|
||||
11 0 R
|
||||
]
|
||||
/MediaBox [
|
||||
0
|
||||
0
|
||||
612
|
||||
792
|
||||
]
|
||||
/Parent 2 0 R
|
||||
/Resources <<
|
||||
/Font <<
|
||||
/F1 13 0 R
|
||||
>>
|
||||
/ProcSet 14 0 R
|
||||
>>
|
||||
/Type /Page
|
||||
>>
|
||||
endobj
|
||||
|
||||
%% Page 2
|
||||
%% Original object ID: 14 0
|
||||
4 0 obj
|
||||
<<
|
||||
/Contents 15 0 R
|
||||
/MediaBox [
|
||||
0
|
||||
0
|
||||
612
|
||||
792
|
||||
]
|
||||
/Parent 2 0 R
|
||||
/Resources <<
|
||||
/Font <<
|
||||
/F1 17 0 R
|
||||
>>
|
||||
/ProcSet 18 0 R
|
||||
>>
|
||||
/Type /Page
|
||||
>>
|
||||
endobj
|
||||
|
||||
%% Contents for page 1
|
||||
%% Original object ID: 4 0
|
||||
5 0 obj
|
||||
<<
|
||||
/Length 6 0 R
|
||||
>>
|
||||
stream
|
||||
BT
|
||||
/F1 24 Tf
|
||||
72 720 Td
|
||||
(Pot
|
||||
endstream
|
||||
endobj
|
||||
|
||||
%QDF: ignore_newline
|
||||
6 0 obj
|
||||
33
|
||||
endobj
|
||||
|
||||
%% Contents for page 1
|
||||
%% Original object ID: 6 0
|
||||
7 0 obj
|
||||
<<
|
||||
/Length 8 0 R
|
||||
>>
|
||||
stream
|
||||
ato) Tj
|
||||
ET [ /array
|
||||
endstream
|
||||
endobj
|
||||
|
||||
%QDF: ignore_newline
|
||||
8 0 obj
|
||||
19
|
||||
endobj
|
||||
|
||||
%% Contents for page 1
|
||||
%% Original object ID: 8 0
|
||||
9 0 obj
|
||||
<<
|
||||
/Length 10 0 R
|
||||
>>
|
||||
stream
|
||||
/split ] BI
|
||||
/CS /G/W 66/H 47/BPC 8/F/Fl/DP<</Predictor 15/Columns 66>>
|
||||
ID xœÅÖIà P|ÿC;UÈ`ÀÓ7‘Z©¦Ä˜Úæ<C39A>}Dðï_´øÉW©„œÄ-”ˆ>ÿ‡À<E280A1>>”^&®¡uâ]€"!‡•–*¬&<26>E|Sy® ðd-€<<3C>B0Bú@Nê+<hlèKÐî/56L ‰<C2A0>ã £–¹¦>0>Y<>ù!cì\YØ%Yð¥Ö8?& Öëˆ}j’ûè;«<>3<EFBFBD>ÂÖlpÛsHöûtú
|
||||
endstream
|
||||
endobj
|
||||
|
||||
%QDF: ignore_newline
|
||||
10 0 obj
|
||||
253
|
||||
endobj
|
||||
|
||||
%% Contents for page 1
|
||||
%% Original object ID: 10 0
|
||||
11 0 obj
|
||||
<<
|
||||
/Length 12 0 R
|
||||
>>
|
||||
stream
|
||||
QØTt*hÌUúãwÍÕÐ%¨)p–³"•DiRj¹–DYNUÓÙAv’Fà&
|
||||
<EFBFBD>ÍÔu#c•ÆW ô߉W“O
|
||||
EI
|
||||
endstream
|
||||
endobj
|
||||
|
||||
%QDF: ignore_newline
|
||||
12 0 obj
|
||||
65
|
||||
endobj
|
||||
|
||||
%% Original object ID: 12 0
|
||||
13 0 obj
|
||||
<<
|
||||
/BaseFont /Helvetica
|
||||
/Encoding /WinAnsiEncoding
|
||||
/Name /F1
|
||||
/Subtype /Type1
|
||||
/Type /Font
|
||||
>>
|
||||
endobj
|
||||
|
||||
%% Original object ID: 13 0
|
||||
14 0 obj
|
||||
[
|
||||
/PDF
|
||||
/Text
|
||||
]
|
||||
endobj
|
||||
|
||||
%% Contents for page 2
|
||||
%% Original object ID: 15 0
|
||||
15 0 obj
|
||||
<<
|
||||
/Length 16 0 R
|
||||
>>
|
||||
stream
|
||||
BT
|
||||
/F1 24 Tf
|
||||
72 720 Td
|
||||
(Potato) Tj
|
||||
ET
|
||||
endstream
|
||||
endobj
|
||||
|
||||
16 0 obj
|
||||
44
|
||||
endobj
|
||||
|
||||
%% Original object ID: 17 0
|
||||
17 0 obj
|
||||
<<
|
||||
/BaseFont /Helvetica
|
||||
/Encoding /WinAnsiEncoding
|
||||
/Name /F1
|
||||
/Subtype /Type1
|
||||
/Type /Font
|
||||
>>
|
||||
endobj
|
||||
|
||||
%% Original object ID: 18 0
|
||||
18 0 obj
|
||||
[
|
||||
/PDF
|
||||
/Text
|
||||
]
|
||||
endobj
|
||||
|
||||
xref
|
||||
0 19
|
||||
0000000000 65535 f
|
||||
0000000052 00000 n
|
||||
0000000133 00000 n
|
||||
0000000252 00000 n
|
||||
0000000525 00000 n
|
||||
0000000770 00000 n
|
||||
0000000880 00000 n
|
||||
0000000949 00000 n
|
||||
0000001045 00000 n
|
||||
0000001114 00000 n
|
||||
0000001445 00000 n
|
||||
0000001517 00000 n
|
||||
0000001661 00000 n
|
||||
0000001709 00000 n
|
||||
0000001856 00000 n
|
||||
0000001943 00000 n
|
||||
0000002044 00000 n
|
||||
0000002092 00000 n
|
||||
0000002239 00000 n
|
||||
trailer <<
|
||||
/Root 1 0 R
|
||||
/Size 19
|
||||
/ID [<31415926535897932384626433832795><31415926535897932384626433832795>]
|
||||
>>
|
||||
startxref
|
||||
2275
|
||||
%%EOF
|
10
qpdf/qtest/qpdf/coalesce-split.out
Normal file
10
qpdf/qtest/qpdf/coalesce-split.out
Normal file
|
@ -0,0 +1,10 @@
|
|||
WARNING: coalesce.pdf, object 3 0 at offset 181: Bad token found while scanning content stream; not attempting to remove unreferenced objects from this page
|
||||
WARNING: empty PDF: content normalization encountered bad tokens
|
||||
WARNING: empty PDF: normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents
|
||||
WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual.
|
||||
WARNING: empty PDF: content normalization encountered bad tokens
|
||||
WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual.
|
||||
WARNING: empty PDF: content normalization encountered bad tokens
|
||||
WARNING: empty PDF: normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents
|
||||
WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual.
|
||||
qpdf: operation succeeded with warnings; resulting file may have some problems
|
Loading…
Reference in New Issue
Block a user