2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-06-13 15:42:21 +00:00

Pass offset and length to ParserCallbacks::handleObject

This commit is contained in:
Jay Berkenbilt 2019-08-22 19:16:25 -04:00
parent 4b2e72c4cd
commit 3f1ab64066
11 changed files with 326 additions and 219 deletions

View File

@ -1,5 +1,19 @@
2019-08-22 Jay Berkenbilt <ejb@ql.org> 2019-08-22 Jay Berkenbilt <ejb@ql.org>
* In QPDFObjectHandle::ParserCallbacks, in addition to
handleObject(QPDFObjectHandle), allow developers to override
handleObject(QPDFObjectHandle, size_t offset, size_t length). If
this method appears instead, it is called with the offset of the
object in the content stream (which may be concatenated from an
array of streams) and the length of the object. Intervening
whitespace and comments are not included in offset and length.
* Add method
QPDFObjectHandle::ParserCallbacks::contentSize(size_t). If
defined, it is called by the content stream parser before the
first call to handleObject, and the argument is the total size in
bytes of the content streams.
* Add QPDFObjectHandle::isDirectNull() -- a const method that * Add QPDFObjectHandle::isDirectNull() -- a const method that
allows determining whether an object is a literal null without allows determining whether an object is a literal null without
attempting to resolve it. attempting to resolve it.

View File

@ -26,14 +26,23 @@ class ParserCallbacks: public QPDFObjectHandle::ParserCallbacks
{ {
} }
virtual void handleObject(QPDFObjectHandle); virtual void contentSize(size_t);
virtual void handleObject(QPDFObjectHandle, size_t offset, size_t length);
virtual void handleEOF(); virtual void handleEOF();
}; };
void void
ParserCallbacks::handleObject(QPDFObjectHandle obj) ParserCallbacks::contentSize(size_t size)
{ {
std::cout << obj.getTypeName() << ": "; std::cout << "content size: " << size << std::endl;
}
void
ParserCallbacks::handleObject(QPDFObjectHandle obj,
size_t offset, size_t length)
{
std::cout << obj.getTypeName() << ", offset=" << offset
<< ", length=" << length << ": ";
if (obj.isInlineImage()) if (obj.isInlineImage())
{ {
std::cout << QUtil::hex_encode(obj.getInlineImageValue()) << std::endl; std::cout << QUtil::hex_encode(obj.getInlineImageValue()) << std::endl;

View File

@ -1,11 +1,12 @@
operator: BT content size: 44
name: /F1 operator, offset=0, length=2: BT
integer: 24 name, offset=5, length=3: /F1
operator: Tf integer, offset=9, length=2: 24
integer: 72 operator, offset=12, length=2: Tf
integer: 720 integer, offset=17, length=2: 72
operator: Td integer, offset=20, length=3: 720
string: (Potato) operator, offset=24, length=2: Td
operator: Tj string, offset=29, length=8: (Potato)
operator: ET operator, offset=38, length=2: Tj
operator, offset=41, length=2: ET
-EOF- -EOF-

View File

@ -159,16 +159,28 @@ class QPDFObjectHandle
// This class is used by parsePageContents. Callers must // This class is used by parsePageContents. Callers must
// instantiate a subclass of this with handlers defined to accept // instantiate a subclass of this with handlers defined to accept
// QPDFObjectHandles that are parsed from the stream. // QPDFObjectHandles that are parsed from the stream.
class ParserCallbacks class QPDF_DLL_CLASS ParserCallbacks
{ {
public: public:
QPDF_DLL QPDF_DLL
virtual ~ParserCallbacks() virtual ~ParserCallbacks()
{ {
} }
virtual void handleObject(QPDFObjectHandle) = 0; // One of the handleObject methods must be overridden.
QPDF_DLL
virtual void handleObject(QPDFObjectHandle);
QPDF_DLL
virtual void handleObject(
QPDFObjectHandle, size_t offset, size_t length);
virtual void handleEOF() = 0; virtual void handleEOF() = 0;
// Override this if you want to know the full size of the
// contents, possibly after concatenation of multiple streams.
// This is called before the first call to handleObject.
QPDF_DLL
virtual void contentSize(size_t);
protected: protected:
// Implementors may call this method during parsing to // Implementors may call this method during parsing to
// terminate parsing early. This method throws an exception // terminate parsing early. This method throws an exception

View File

@ -105,6 +105,29 @@ QPDFObjectHandle::TokenFilter::writeToken(QPDFTokenizer::Token const& token)
write(value.c_str(), value.length()); write(value.c_str(), value.length());
} }
void
QPDFObjectHandle::ParserCallbacks::handleObject(QPDFObjectHandle)
{
throw std::logic_error("You must override one of the"
" handleObject methods in ParserCallbacks");
}
void
QPDFObjectHandle::ParserCallbacks::handleObject(
QPDFObjectHandle oh, size_t, size_t)
{
// This version of handleObject was added in qpdf 9. If the
// developer did not override it, fall back to the older
// interface.
handleObject(oh);
}
void
QPDFObjectHandle::ParserCallbacks::contentSize(size_t)
{
// Ignore by default; overriding this is optional.
}
void void
QPDFObjectHandle::ParserCallbacks::terminateParsing() QPDFObjectHandle::ParserCallbacks::terminateParsing()
{ {
@ -1615,6 +1638,7 @@ QPDFObjectHandle::parseContentStream_internal(
std::string all_description; std::string all_description;
pipeContentStreams(&buf, description, all_description); pipeContentStreams(&buf, description, all_description);
PointerHolder<Buffer> stream_data = buf.getBuffer(); PointerHolder<Buffer> stream_data = buf.getBuffer();
callbacks->contentSize(stream_data->getSize());
try try
{ {
parseContentStream_data(stream_data, all_description, parseContentStream_data(stream_data, all_description,
@ -1642,6 +1666,13 @@ QPDFObjectHandle::parseContentStream_data(
bool empty = false; bool empty = false;
while (QIntC::to_size(input->tell()) < length) while (QIntC::to_size(input->tell()) < length)
{ {
// Read a token and seek to the beginning. The offset we get
// from this process is the beginning of the next
// non-ignorable (space, comment) token. This way, the offset
// and don't including ignorable content.
tokenizer.readToken(input, "content", true);
qpdf_offset_t offset = input->getLastOffset();
input->seek(offset, SEEK_SET);
QPDFObjectHandle obj = QPDFObjectHandle obj =
parseInternal(input, "content", tokenizer, parseInternal(input, "content", tokenizer,
empty, 0, context, true); empty, 0, context, true);
@ -1650,8 +1681,9 @@ QPDFObjectHandle::parseContentStream_data(
// EOF // EOF
break; break;
} }
size_t length = QIntC::to_size(input->tell() - offset);
callbacks->handleObject(obj); callbacks->handleObject(obj, QIntC::to_size(offset), length);
if (obj.isOperator() && (obj.getOperatorValue() == "ID")) if (obj.isOperator() && (obj.getOperatorValue() == "ID"))
{ {
// Discard next character; it is the space after ID that // Discard next character; it is the space after ID that
@ -1661,6 +1693,8 @@ QPDFObjectHandle::parseContentStream_data(
tokenizer.expectInlineImage(input); tokenizer.expectInlineImage(input);
QPDFTokenizer::Token t = QPDFTokenizer::Token t =
tokenizer.readToken(input, description, true); tokenizer.readToken(input, description, true);
offset = input->getLastOffset();
length = QIntC::to_size(input->tell() - offset);
if (t.getType() == QPDFTokenizer::tt_bad) if (t.getType() == QPDFTokenizer::tt_bad)
{ {
QTC::TC("qpdf", "QPDFObjectHandle EOF in inline image"); QTC::TC("qpdf", "QPDFObjectHandle EOF in inline image");
@ -1674,7 +1708,8 @@ QPDFObjectHandle::parseContentStream_data(
std::string inline_image = t.getValue(); std::string inline_image = t.getValue();
QTC::TC("qpdf", "QPDFObjectHandle inline image token"); QTC::TC("qpdf", "QPDFObjectHandle inline image token");
callbacks->handleObject( callbacks->handleObject(
QPDFObjectHandle::newInlineImage(inline_image)); QPDFObjectHandle::newInlineImage(inline_image),
QIntC::to_size(offset), length);
} }
} }
} }

View File

@ -4489,6 +4489,25 @@ print "\n";
<function>getUIntValueAsUInt</function>. <function>getUIntValueAsUInt</function>.
</para> </para>
</listitem> </listitem>
<listitem>
<para>
When parsing content streams with
<classname>QPDFObjectHandle::ParserCallbacks</classname>, in
place of the method
<function>handleObject(QPDFObjectHandle)</function>, the
developer may override
<function>handleObject(QPDFObjectHandle, size_t offset,
size_t length)</function>. If this method is defined, it
will be invoked with the object along with its offset and
length within the overall contents being parsed. Intervening
spaces and comments are not included in offset and length.
Additionally, a new method
<function>contentSize(size_t)</function> may be implemented.
If present, it will be called prior to the first call to
<function>handleObject</function> with the total size in
bytes of the combined contents.
</para>
</listitem>
<listitem> <listitem>
<para> <para>
The underlying implementation of QPDF arrays has been The underlying implementation of QPDF arrays has been

View File

@ -1,27 +1,28 @@
operator: BT content size: 139
name: /F1 operator, offset=0, length=2: BT
integer: 24 name, offset=5, length=3: /F1
operator: Tf integer, offset=9, length=2: 24
integer: 72 operator, offset=12, length=2: Tf
integer: 720 integer, offset=17, length=2: 72
operator: Td integer, offset=20, length=3: 720
string: (Potato) operator, offset=24, length=2: Td
operator: Tj string, offset=29, length=8: (Potato)
operator: ET operator, offset=38, length=2: Tj
operator: BI operator, offset=41, length=2: ET
name: /CS operator, offset=66, length=2: BI
name: /G name, offset=69, length=3: /CS
name: /W name, offset=73, length=2: /G
integer: 1 name, offset=75, length=2: /W
name: /H integer, offset=78, length=1: 1
integer: 1 name, offset=79, length=2: /H
name: /BPC integer, offset=82, length=1: 1
integer: 8 name, offset=83, length=4: /BPC
name: /F integer, offset=88, length=1: 8
name: /Fl name, offset=89, length=2: /F
name: /DP name, offset=91, length=3: /Fl
dictionary: << /Columns 1 /Predictor 15 >> name, offset=94, length=3: /DP
operator: ID dictionary, offset=97, length=27: << /Columns 1 /Predictor 15 >>
operator, offset=125, length=2: ID
WARNING: page object 3 0 stream 4 0 (stream data, offset 139): EOF found while reading inline image WARNING: page object 3 0 stream 4 0 (stream data, offset 139): EOF found while reading inline image
-EOF- -EOF-
test 37 done test 37 done

View File

@ -1,86 +1,88 @@
name: /potato content size: 44
name, offset=0, length=7: /potato
test suite: terminating parsing test suite: terminating parsing
real: 0.1 content size: 454
integer: 0 real, offset=0, length=3: 0.1
integer: 0 integer, offset=4, length=1: 0
real: 0.1 integer, offset=6, length=1: 0
integer: 0 real, offset=8, length=3: 0.1
integer: 0 integer, offset=12, length=1: 0
operator: cm integer, offset=14, length=1: 0
operator: q operator, offset=16, length=2: cm
integer: 0 operator, offset=19, length=1: q
real: 1.1999 integer, offset=21, length=1: 0
real: -1.1999 real, offset=23, length=6: 1.1999
integer: 0 real, offset=30, length=7: -1.1999
real: 121.19 integer, offset=38, length=2: 0
real: 150.009 real, offset=41, length=6: 121.19
operator: cm real, offset=48, length=7: 150.009
operator: BI operator, offset=56, length=2: cm
name: /CS operator, offset=59, length=2: BI
name: /G name, offset=62, length=3: /CS
name: /W name, offset=66, length=2: /G
integer: 1 name, offset=68, length=2: /W
name: /H integer, offset=71, length=1: 1
integer: 1 name, offset=72, length=2: /H
name: /BPC integer, offset=75, length=1: 1
integer: 8 name, offset=76, length=4: /BPC
name: /F integer, offset=81, length=1: 8
name: /Fl name, offset=82, length=2: /F
name: /DP name, offset=84, length=3: /Fl
dictionary: << /Columns 1 /Predictor 15 >> name, offset=87, length=3: /DP
operator: ID dictionary, offset=90, length=27: << /Columns 1 /Predictor 15 >>
inline-image: 789c63fc0f00010301010a operator, offset=118, length=2: ID
operator: EI inline-image, offset=121, length=11: 789c63fc0f00010301010a
operator: Q operator, offset=132, length=2: EI
operator: q operator, offset=135, length=1: Q
integer: 0 operator, offset=137, length=1: q
real: 35.997 integer, offset=139, length=1: 0
real: -128.389 real, offset=141, length=6: 35.997
integer: 0 real, offset=148, length=8: -128.389
real: 431.964 integer, offset=157, length=2: 0
real: 7269.02 real, offset=160, length=7: 431.964
operator: cm real, offset=168, length=7: 7269.02
operator: BI operator, offset=176, length=2: cm
name: /CS operator, offset=179, length=2: BI
name: /G name, offset=182, length=3: /CS
name: /W name, offset=186, length=2: /G
integer: 30 name, offset=188, length=2: /W
name: /H integer, offset=191, length=2: 30
integer: 107 name, offset=193, length=2: /H
name: /BPC integer, offset=196, length=3: 107
integer: 8 name, offset=199, length=4: /BPC
name: /F integer, offset=204, length=1: 8
name: /Fl name, offset=205, length=2: /F
name: /DP name, offset=207, length=3: /Fl
dictionary: << /Columns 30 /Predictor 15 >> name, offset=210, length=3: /DP
operator: ID dictionary, offset=213, length=28: << /Columns 30 /Predictor 15 >>
inline-image: 789cedd1a11100300800b1b2ffd06503148283bc8dfcf8af2a306ee352eff2e06318638c31c63b3801627b620a0a operator, offset=242, length=2: ID
operator: EI inline-image, offset=245, length=46: 789cedd1a11100300800b1b2ffd06503148283bc8dfcf8af2a306ee352eff2e06318638c31c63b3801627b620a0a
operator: Q operator, offset=291, length=2: EI
operator: q operator, offset=294, length=1: Q
integer: 0 operator, offset=296, length=1: q
real: 38.3968 integer, offset=298, length=1: 0
real: -93.5922 real, offset=300, length=7: 38.3968
integer: 0 real, offset=308, length=8: -93.5922
real: 431.964 integer, offset=317, length=2: 0
real: 7567.79 real, offset=320, length=7: 431.964
operator: cm real, offset=328, length=7: 7567.79
operator: BI operator, offset=336, length=2: cm
name: /CS operator, offset=339, length=2: BI
name: /G name, offset=342, length=3: /CS
name: /W name, offset=346, length=2: /G
integer: 32 name, offset=348, length=2: /W
name: /H integer, offset=351, length=2: 32
integer: 78 name, offset=353, length=2: /H
name: /BPC integer, offset=356, length=2: 78
integer: 8 name, offset=358, length=4: /BPC
name: /F integer, offset=363, length=1: 8
name: /Fl name, offset=364, length=2: /F
name: /DP name, offset=366, length=3: /Fl
dictionary: << /Columns 32 /Predictor 15 >> name, offset=369, length=3: /DP
operator: ID dictionary, offset=372, length=28: << /Columns 32 /Predictor 15 >>
inline-image: 789c63fccf801f308e2a185530aa60882a20203faa605401890a0643aa1e5530aa6054010d140000bdd03c130a operator, offset=401, length=2: ID
operator: EI inline-image, offset=404, length=45: 789c63fccf801f308e2a185530aa60882a20203faa605401890a0643aa1e5530aa6054010d140000bdd03c130a
operator: Q operator, offset=449, length=2: EI
operator, offset=452, length=1: Q
-EOF- -EOF-
test 37 done test 37 done

View File

@ -1,95 +1,100 @@
operator: BT content size: 44
name: /F1 operator, offset=0, length=2: BT
integer: 24 name, offset=5, length=3: /F1
operator: Tf integer, offset=9, length=2: 24
integer: 72 operator, offset=12, length=2: Tf
integer: 720 integer, offset=17, length=2: 72
operator: Td integer, offset=20, length=3: 720
string: (Potato) operator, offset=24, length=2: Td
operator: Tj string, offset=29, length=8: (Potato)
operator: ET operator, offset=38, length=2: Tj
operator, offset=41, length=2: ET
-EOF- -EOF-
real: 0.1 content size: 490
integer: 0 real, offset=0, length=3: 0.1
integer: 0 integer, offset=4, length=1: 0
real: 0.1 integer, offset=6, length=1: 0
integer: 0 real, offset=8, length=3: 0.1
integer: 0 integer, offset=12, length=1: 0
operator: cm integer, offset=14, length=1: 0
operator: q operator, offset=16, length=2: cm
integer: 0 operator, offset=19, length=1: q
real: 1.1999 integer, offset=21, length=1: 0
real: -1.1999 real, offset=23, length=6: 1.1999
integer: 0 real, offset=30, length=7: -1.1999
real: 121.19 integer, offset=38, length=2: 0
real: 150.009 real, offset=41, length=6: 121.19
operator: cm real, offset=48, length=7: 150.009
operator: BI operator, offset=56, length=2: cm
name: /CS operator, offset=59, length=2: BI
name: /G name, offset=62, length=3: /CS
name: /W name, offset=66, length=2: /G
integer: 1 name, offset=68, length=2: /W
name: /H integer, offset=71, length=1: 1
integer: 1 name, offset=72, length=2: /H
name: /BPC integer, offset=75, length=1: 1
integer: 8 name, offset=76, length=4: /BPC
name: /F integer, offset=81, length=1: 8
name: /Fl name, offset=82, length=2: /F
name: /DP name, offset=84, length=3: /Fl
dictionary: << /Columns 1 /Predictor 15 >> name, offset=87, length=3: /DP
operator: ID dictionary, offset=90, length=27: << /Columns 1 /Predictor 15 >>
inline-image: 789c63fc0f00010301010a operator, offset=118, length=2: ID
operator: EI inline-image, offset=121, length=11: 789c63fc0f00010301010a
operator: Q operator, offset=132, length=2: EI
operator: q operator, offset=135, length=1: Q
integer: 0 operator, offset=137, length=1: q
real: 35.997 integer, offset=139, length=1: 0
real: -128.389 real, offset=141, length=6: 35.997
integer: 0 real, offset=148, length=8: -128.389
real: 431.964 integer, offset=157, length=2: 0
real: 7269.02 real, offset=160, length=7: 431.964
operator: cm real, offset=168, length=7: 7269.02
operator: BI operator, offset=176, length=2: cm
name: /CS operator, offset=179, length=2: BI
name: /G name, offset=182, length=3: /CS
name: /W name, offset=186, length=2: /G
integer: 30 name, offset=188, length=2: /W
name: /H integer, offset=191, length=2: 30
integer: 107 name, offset=193, length=2: /H
name: /BPC integer, offset=196, length=3: 107
integer: 8 name, offset=199, length=4: /BPC
name: /F integer, offset=204, length=1: 8
name: /Fl name, offset=205, length=2: /F
name: /DP name, offset=207, length=3: /Fl
dictionary: << /Columns 30 /Predictor 15 >> name, offset=210, length=3: /DP
operator: ID dictionary, offset=214, length=28: << /Columns 30 /Predictor 15 >>
inline-image: 789cedd1a11100300800b1b2ffd06503148283bc8dfcf8af2a306ee352eff2e06318638c31c63b3801627b620a0a operator, offset=243, length=2: ID
operator: EI inline-image, offset=246, length=46: 789cedd1a11100300800b1b2ffd06503148283bc8dfcf8af2a306ee352eff2e06318638c31c63b3801627b620a0a
operator: Q operator, offset=292, length=2: EI
operator: q operator, offset=295, length=1: Q
integer: 0 operator, offset=297, length=1: q
real: 38.3968 array, offset=299, length=30: [ 1 /two (three) << /four 5 >> ]
real: -93.5922 operator, offset=330, length=1: Q
integer: 0 operator, offset=332, length=1: q
real: 431.964 integer, offset=334, length=1: 0
real: 7567.79 real, offset=336, length=7: 38.3968
operator: cm real, offset=344, length=8: -93.5922
operator: BI integer, offset=353, length=2: 0
name: /CS real, offset=356, length=7: 431.964
name: /G real, offset=364, length=7: 7567.79
name: /W operator, offset=372, length=2: cm
integer: 32 operator, offset=375, length=2: BI
name: /H name, offset=378, length=3: /CS
integer: 78 name, offset=382, length=2: /G
name: /BPC name, offset=384, length=2: /W
integer: 8 integer, offset=387, length=2: 32
name: /F name, offset=389, length=2: /H
name: /Fl integer, offset=392, length=2: 78
name: /DP name, offset=394, length=4: /BPC
dictionary: << /Columns 32 /Predictor 15 >> integer, offset=399, length=1: 8
operator: ID name, offset=400, length=2: /F
inline-image: 789c63fccf801f308e2a185530aa60882a20203faa605401890a0643aa1e5530aa6054010d140000bdd03c130a name, offset=402, length=3: /Fl
operator: EI name, offset=405, length=3: /DP
operator: Q dictionary, offset=408, length=28: << /Columns 32 /Predictor 15 >>
operator, offset=437, length=2: ID
inline-image, offset=440, length=45: 789c63fccf801f308e2a185530aa60882a20203faa605401890a0643aa1e5530aa6054010d140000bdd03c130a
operator, offset=485, length=2: EI
operator, offset=488, length=1: Q
-EOF- -EOF-
test 37 done test 37 done

View File

@ -76,19 +76,28 @@ class ParserCallbacks: public QPDFObjectHandle::ParserCallbacks
{ {
} }
virtual void handleObject(QPDFObjectHandle); virtual void contentSize(size_t size);
virtual void handleObject(QPDFObjectHandle, size_t, size_t);
virtual void handleEOF(); virtual void handleEOF();
}; };
void void
ParserCallbacks::handleObject(QPDFObjectHandle obj) ParserCallbacks::contentSize(size_t size)
{
std::cout << "content size: " << size << std::endl;
}
void
ParserCallbacks::handleObject(QPDFObjectHandle obj,
size_t offset, size_t length)
{ {
if (obj.isName() && (obj.getName() == "/Abort")) if (obj.isName() && (obj.getName() == "/Abort"))
{ {
std::cout << "test suite: terminating parsing" << std::endl; std::cout << "test suite: terminating parsing" << std::endl;
terminateParsing(); terminateParsing();
} }
std::cout << obj.getTypeName() << ": "; std::cout << obj.getTypeName() << ", offset=" << offset
<< ", length=" << length << ": ";
if (obj.isInlineImage()) if (obj.isInlineImage())
{ {
// Exercise getTypeCode // Exercise getTypeCode