C API: add several stream functions (fixes #596)

This commit is contained in:
Jay Berkenbilt 2021-12-17 13:28:11 -05:00
parent a0dbb71a64
commit feafcc4e88
10 changed files with 904 additions and 1 deletions

View File

@ -1,5 +1,8 @@
2021-12-17 Jay Berkenbilt <ejb@ql.org>
* C API: add functions for working with stream data. Search for
"STREAM FUNCTIONS" in qpdf-c.h. Fixes #596.
* QPDFObjectHandle object types have been moved from
QPDFObject::object_type_e to qpdf_object_type_e (defined in
Constants.h). Old values are available for backward compatibility.

View File

@ -129,7 +129,9 @@ extern "C" {
qpdf_data qpdf_init();
/* Pass a pointer to the qpdf_data pointer created by qpdf_init to
* clean up resources.
* clean up resources. This does not include buffers initialized
* by functions that return stream data but it otherwise includes
* all data associated with the QPDF object or any object handles.
*/
QPDF_DLL
void qpdf_cleanup(qpdf_data* qpdf);
@ -752,6 +754,15 @@ extern "C" {
QPDF_DLL
qpdf_oh qpdf_oh_new_dictionary(qpdf_data qpdf);
/* Create a new stream. Use qpdf_oh_get_dict to get (and
* subsequently modify) the stream dictionary if needed. See
* comments in QPDFObjectHandle.hh for newStream() for additional
* notes. You must call qpdf_oh_replace_stream_data to provide
* data for the stream. See STREAM FUNCTIONS below.
*/
QPDF_DLL
qpdf_oh qpdf_oh_new_stream(qpdf_data qpdf);
QPDF_DLL
void qpdf_oh_make_direct(qpdf_data qpdf, qpdf_oh oh);
@ -789,6 +800,67 @@ extern "C" {
QPDF_DLL
char const* qpdf_oh_unparse_binary(qpdf_data qpdf, qpdf_oh oh);
/* Note about foreign objects: the C API does not have enough
* information in the value of a qpdf_oh to know what QPDF object
* it belongs to. To uniquely specify a qpdf object handle from a
* specific qpdf_data instance, you always pair the qpdf_oh with
* the correct qpdf_data. Otherwise, you are likely to get
* completely the wrong object if you are not lucky enough to get
* an error about the object being invalid.
*/
/* Copy foreign object: the qpdf_oh returned belongs to `qpdf`,
* while `foreign_oh` belongs to `other_qpdf`.
*/
QPDF_DLL
qpdf_oh qpdf_oh_copy_foreign_object(
qpdf_data qpdf, qpdf_data other_qpdf, qpdf_oh foreign_oh);
/* STREAM FUNCTIONS */
/* These functions provide basic access to streams and stream
* data. They are not as comprehensive as what is in
* QPDFObjectHandle, but they do allow for working with streams
* and stream data as caller-managed memory.
*/
/* Get stream data as a buffer. The buffer is allocated with
* malloc and must be freed by the caller. The size of the buffer
* is stored in *len. The arguments are similar to those in
* QPDFObjectHandle::pipeStreamData. To get raw stream data, pass
* qpdf_dl_none as decode_level. Otherwise, filtering is attempted
* and *filtered is set to indicate whether it was successful. If
* *filtered is QPDF_FALSE, then raw, unfiltered stream data was
* returned. You may pass a null pointer as filtered if you don't
* care about the result. If you pass a null pointer as bufp (and
* len), the value of filtered will be set to whether the stream
* can be filterable.
*/
QPDF_DLL
QPDF_ERROR_CODE qpdf_oh_get_stream_data(
qpdf_data qpdf, qpdf_oh stream_oh,
enum qpdf_stream_decode_level_e decode_level, QPDF_BOOL* filtered,
unsigned char** bufp, size_t* len);
/* This function returns the concatenation of all of a page's
* content streams as a single, dynamically allocated buffer. As
* with qpdf_oh_get_stream_data, the buffer is allocated with
* malloc and must be freed by the caller.
*/
QPDF_DLL
QPDF_ERROR_CODE qpdf_oh_get_page_content_data(
qpdf_data qpdf, qpdf_oh page_oh,
unsigned char** bufp, size_t* len);
/* The data pointed to by bufp will be copied by the library. It
* does not need to remain valid after the call returns.
*/
QPDF_DLL
void qpdf_oh_replace_stream_data(
qpdf_data qpdf, qpdf_oh stream_oh,
unsigned char const* buf, size_t len,
qpdf_oh filter, qpdf_oh decode_parms);
/* PAGE FUNCTIONS */
/* The first time a page function is called, qpdf will traverse

View File

@ -5,6 +5,7 @@
#include <qpdf/QTC.hh>
#include <qpdf/QPDFExc.hh>
#include <qpdf/Pl_Discard.hh>
#include <qpdf/Pl_Buffer.hh>
#include <qpdf/QIntC.hh>
#include <qpdf/QUtil.hh>
@ -1427,6 +1428,13 @@ qpdf_oh qpdf_oh_new_dictionary(qpdf_data qpdf)
return new_object(qpdf, QPDFObjectHandle::newDictionary());
}
qpdf_oh qpdf_oh_new_stream(qpdf_data qpdf)
{
QTC::TC("qpdf", "qpdf-c called qpdf_oh_new_stream");
return new_object(
qpdf, QPDFObjectHandle::newStream(qpdf->qpdf.getPointer()));
}
void qpdf_oh_make_direct(qpdf_data qpdf, qpdf_oh oh)
{
do_with_oh_void(
@ -1580,6 +1588,88 @@ char const* qpdf_oh_unparse_binary(qpdf_data qpdf, qpdf_oh oh)
});
}
qpdf_oh qpdf_oh_copy_foreign_object(
qpdf_data qpdf, qpdf_data other_qpdf, qpdf_oh foreign_oh)
{
return do_with_oh<qpdf_oh>(
other_qpdf, foreign_oh,
return_uninitialized(qpdf),
[qpdf](QPDFObjectHandle& o) {
QTC::TC("qpdf", "qpdf-c called qpdf_oh_copy_foreign_object");
return new_object(qpdf, qpdf->qpdf->copyForeignObject(o));
});
}
QPDF_ERROR_CODE qpdf_oh_get_stream_data(
qpdf_data qpdf, qpdf_oh stream_oh,
qpdf_stream_decode_level_e decode_level, QPDF_BOOL* filtered,
unsigned char** bufp, size_t* len)
{
return trap_errors(qpdf, [stream_oh, decode_level,
filtered, bufp, len] (qpdf_data q) {
auto stream = qpdf_oh_item_internal(q, stream_oh);
Pipeline* p = nullptr;
Pl_Buffer buf("stream data");
if (bufp)
{
p = &buf;
}
bool was_filtered = false;
if (stream.pipeStreamData(
p, &was_filtered, 0, decode_level, false, false))
{
QTC::TC("qpdf", "qpdf-c stream data buf set",
bufp ? 0 : 1);
if (p && bufp && len)
{
buf.getMallocBuffer(bufp, len);
}
QTC::TC("qpdf", "qpdf-c stream data filtered set",
filtered ? 0 : 1);
if (filtered)
{
*filtered = was_filtered ? QPDF_TRUE : QPDF_FALSE;
}
}
else
{
throw std::runtime_error(
"unable to access stream data for stream " + stream.unparse());
}
});
}
QPDF_ERROR_CODE qpdf_oh_get_page_content_data(
qpdf_data qpdf, qpdf_oh page_oh,
unsigned char** bufp, size_t* len)
{
return trap_errors(qpdf, [page_oh, bufp, len] (qpdf_data q) {
QTC::TC("qpdf", "qpdf-c called qpdf_oh_get_page_content_data");
auto o = qpdf_oh_item_internal(q, page_oh);
Pl_Buffer buf("page contents");
o.pipePageContents(&buf);
buf.getMallocBuffer(bufp, len);
});
}
void qpdf_oh_replace_stream_data(
qpdf_data qpdf, qpdf_oh stream_oh,
unsigned char const* buf, size_t len,
qpdf_oh filter_oh, qpdf_oh decode_parms_oh)
{
do_with_oh_void(qpdf, stream_oh, [
qpdf, buf, len, filter_oh,
decode_parms_oh](QPDFObjectHandle& o) {
QTC::TC("qpdf", "qpdf-c called qpdf_oh_replace_stream_data");
auto filter = qpdf_oh_item_internal(qpdf, filter_oh);
auto decode_parms = qpdf_oh_item_internal(qpdf, decode_parms_oh);
// XXX test with binary data with null
o.replaceStreamData(
std::string(reinterpret_cast<char const*>(buf), len),
filter, decode_parms);
});
}
int qpdf_get_num_pages(qpdf_data qpdf)
{
QTC::TC("qpdf", "qpdf-c called qpdf_num_pages");

View File

@ -3659,6 +3659,9 @@ For a detailed list of changes, please see the file
- Add several functions for working with pages. See ``PAGE
FUNCTIONS`` in ``include/qpdf/qpdf-c.h`` for details.
- Add several functions for working with streams. See ``STREAM
FUNCTIONS`` in ``include/qpdf/qpdf-c.h`` for details.
- Documentation change
- The documentation sources have been switched from docbook to

View File

@ -1063,6 +1063,118 @@ static void test37(char const* infile,
assert(qpdf_get_num_pages(qpdf) == 10);
}
static void test38(char const* infile,
char const* password,
char const* outfile,
char const* xarg)
{
/* This test expects 11-pages.pdf. */
/* Read stream data */
assert(qpdf_read(qpdf, infile, password) == 0);
qpdf_oh stream = qpdf_get_object_by_id(qpdf, 17, 0);
qpdf_oh dict = qpdf_oh_get_dict(qpdf, stream);
assert(qpdf_oh_get_int_value_as_int(
qpdf, qpdf_oh_get_key(qpdf, dict, "/Length")) == 53);
/* Get raw data */
unsigned char *buf = 0;
size_t len = 0;
assert(qpdf_oh_get_stream_data(
qpdf, stream, qpdf_dl_none, 0, &buf, &len) == 0);
assert(len == 53);
assert(((int)buf[0] == 'x') && ((int)buf[1] == 0234));
free(buf);
/* Test whether filterable */
QPDF_BOOL filtered = QPDF_FALSE;
assert(qpdf_oh_get_stream_data(
qpdf, stream, qpdf_dl_all, &filtered, 0, 0) == 0);
assert(filtered == QPDF_TRUE);
/* Get filtered data */
assert(qpdf_oh_get_stream_data(
qpdf, stream, qpdf_dl_all, 0, &buf, &len) == 0);
assert(len == 47);
assert(memcmp((char const*)buf,
"BT /F1 15 Tf 72 720 Td (Original page 2) Tj ET\n",
len) == 0);
/* Get page data */
qpdf_oh page2 = qpdf_get_page_n(qpdf, 1); /* 0-based index */
unsigned char* buf2 = 0;
assert(qpdf_oh_get_page_content_data(qpdf, page2, &buf2, &len) == 0);
assert(len == 47);
assert(memcmp(buf, buf2, len) == 0);
free(buf);
free(buf2);
/* errors */
printf("page content on broken page\n");
qpdf_oh_replace_key(qpdf, page2, "/Contents", qpdf_oh_new_integer(qpdf, 3));
buf = 0;
qpdf_oh_get_page_content_data(qpdf, page2, &buf, &len);
assert(buf == 0);
report_errors();
printf("stream data for non stream\n");
qpdf_oh root = qpdf_get_root(qpdf);
assert(qpdf_oh_get_stream_data(qpdf, root, qpdf_dl_all, 0, 0, 0) != 0);
report_errors();
}
static void test39(char const* infile,
char const* password,
char const* outfile,
char const* xarg)
{
/* This test expects 11-pages.pdf as file1 and minimal.pdf as xarg. */
/* Foreign object */
qpdf_data qpdf2 = qpdf_init();
assert(qpdf_read(qpdf, infile, password) == 0);
assert(qpdf_read(qpdf2, xarg, "") == 0);
qpdf_oh resources = qpdf_get_object_by_id(qpdf2, 3, 0);
qpdf_oh copy = qpdf_oh_copy_foreign_object(qpdf, qpdf2, resources);
qpdf_oh root = qpdf_get_root(qpdf);
qpdf_oh_replace_key(qpdf, root, "/Copy", copy);
qpdf_init_write(qpdf, outfile);
qpdf_set_static_ID(qpdf, QPDF_TRUE);
qpdf_set_qdf_mode(qpdf, QPDF_TRUE);
qpdf_set_suppress_original_object_IDs(qpdf, QPDF_TRUE);
qpdf_write(qpdf);
report_errors();
qpdf_cleanup(&qpdf2);
}
static void test40(char const* infile,
char const* password,
char const* outfile,
char const* xarg)
{
/* This test expects minimal.pdf. */
/* New stream */
assert(qpdf_read(qpdf, infile, password) == 0);
qpdf_oh stream = qpdf_oh_new_stream(qpdf);
qpdf_oh_replace_stream_data(
qpdf, stream,
(unsigned char*)"12345\000abcde", 11, /* embedded null */
qpdf_oh_new_null(qpdf), qpdf_oh_new_null(qpdf));
qpdf_oh root = qpdf_get_root(qpdf);
qpdf_oh_replace_key(qpdf, root, "/Potato", stream);
qpdf_init_write(qpdf, outfile);
qpdf_set_static_ID(qpdf, QPDF_TRUE);
qpdf_set_qdf_mode(qpdf, QPDF_TRUE);
qpdf_set_suppress_original_object_IDs(qpdf, QPDF_TRUE);
qpdf_write(qpdf);
report_errors();
}
int main(int argc, char* argv[])
{
char* p = 0;
@ -1140,6 +1252,9 @@ int main(int argc, char* argv[])
(n == 35) ? test35 :
(n == 36) ? test36 :
(n == 37) ? test37 :
(n == 38) ? test38 :
(n == 39) ? test39 :
(n == 40) ? test40 :
0);
if (fn == 0)

View File

@ -617,3 +617,9 @@ qpdf-c called qpdf_push_inherited_attributes_to_page 0
qpdf-c called qpdf_add_page 0
qpdf-c called qpdf_add_page_at 0
qpdf-c called qpdf_remove_page 0
qpdf-c called qpdf_oh_new_stream 0
qpdf-c called qpdf_oh_copy_foreign_object 0
qpdf-c stream data filtered set 1
qpdf-c stream data buf set 1
qpdf-c called qpdf_oh_get_page_content_data 0
qpdf-c called qpdf_oh_replace_stream_data 0

View File

@ -4904,6 +4904,34 @@ $td->runtest("C pages cache",
show_ntests();
# ----------
$td->notify("--- C API Stream Functions ---");
$n_tests += 5;
$td->runtest("C read streams",
{$td->COMMAND =>
"qpdf-ctest 38 11-pages.pdf '' ''"},
{$td->FILE => "c-get-stream.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("C foreign object",
{$td->COMMAND =>
"qpdf-ctest 39 11-pages.pdf '' a.pdf minimal.pdf"},
{$td->STRING => "C test 39 done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => 'a.pdf'},
{$td->FILE => 'c-foreign.pdf'});
$td->runtest("C new stream",
{$td->COMMAND =>
"qpdf-ctest 40 minimal.pdf '' a.pdf"},
{$td->STRING => "C test 40 done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => 'a.pdf'},
{$td->FILE => 'c-new-stream.pdf'});
show_ntests();
# ----------
$td->notify("--- Content Preservation Tests ---");
# $n_tests incremented below

View File

@ -0,0 +1,573 @@
%PDF-1.4
%¿÷¢þ
%QDF-1.0
1 0 obj
<<
/Copy 3 0 R
/Pages 4 0 R
/Type /Catalog
>>
endobj
2 0 obj
<<
/CreationDate (D:20120721200217)
/Producer (Apex PDFWriter)
>>
endobj
3 0 obj
<<
/Contents 5 0 R
/MediaBox [
0
0
612
792
]
/Resources <<
/Font <<
/F1 7 0 R
>>
/ProcSet 8 0 R
>>
/Type /Page
>>
endobj
4 0 obj
<<
/Count 11
/Kids [
9 0 R
10 0 R
11 0 R
12 0 R
13 0 R
14 0 R
15 0 R
16 0 R
17 0 R
18 0 R
19 0 R
]
/Type /Pages
>>
endobj
5 0 obj
<<
/Length 6 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
6 0 obj
44
endobj
7 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
8 0 obj
[
/PDF
/Text
]
endobj
%% Page 1
9 0 obj
<<
/Contents 20 0 R
/MediaBox [
0
0
612
792
]
/Parent 4 0 R
/Resources <<
/Font <<
/F1 22 0 R
>>
/ProcSet [
/PDF
/Text
]
>>
/Type /Page
>>
endobj
%% Page 2
10 0 obj
<<
/Contents 23 0 R
/MediaBox [
0
0
612
792
]
/Parent 4 0 R
/Resources <<
/Font <<
/F1 22 0 R
>>
/ProcSet [
/PDF
/Text
]
>>
/Type /Page
>>
endobj
%% Page 3
11 0 obj
<<
/Contents 25 0 R
/MediaBox [
0
0
612
792
]
/Parent 4 0 R
/Resources <<
/Font <<
/F1 22 0 R
>>
/ProcSet [
/PDF
/Text
]
>>
/Type /Page
>>
endobj
%% Page 4
12 0 obj
<<
/Contents 27 0 R
/MediaBox [
0
0
612
792
]
/Parent 4 0 R
/Resources <<
/Font <<
/F1 22 0 R
>>
/ProcSet [
/PDF
/Text
]
>>
/Type /Page
>>
endobj
%% Page 5
13 0 obj
<<
/Contents 29 0 R
/MediaBox [
0
0
612
792
]
/Parent 4 0 R
/Resources <<
/Font <<
/F1 22 0 R
>>
/ProcSet [
/PDF
/Text
]
>>
/Type /Page
>>
endobj
%% Page 6
14 0 obj
<<
/Contents 31 0 R
/MediaBox [
0
0
612
792
]
/Parent 4 0 R
/Resources <<
/Font <<
/F1 22 0 R
>>
/ProcSet [
/PDF
/Text
]
>>
/Type /Page
>>
endobj
%% Page 7
15 0 obj
<<
/Contents 33 0 R
/MediaBox [
0
0
612
792
]
/Parent 4 0 R
/Resources <<
/Font <<
/F1 22 0 R
>>
/ProcSet [
/PDF
/Text
]
>>
/Type /Page
>>
endobj
%% Page 8
16 0 obj
<<
/Contents 35 0 R
/MediaBox [
0
0
612
792
]
/Parent 4 0 R
/Resources <<
/Font <<
/F1 22 0 R
>>
/ProcSet [
/PDF
/Text
]
>>
/Type /Page
>>
endobj
%% Page 9
17 0 obj
<<
/Contents 37 0 R
/MediaBox [
0
0
612
792
]
/Parent 4 0 R
/Resources <<
/Font <<
/F1 22 0 R
>>
/ProcSet [
/PDF
/Text
]
>>
/Type /Page
>>
endobj
%% Page 10
18 0 obj
<<
/Contents 39 0 R
/MediaBox [
0
0
612
792
]
/Parent 4 0 R
/Resources <<
/Font <<
/F1 22 0 R
>>
/ProcSet [
/PDF
/Text
]
>>
/Type /Page
>>
endobj
%% Page 11
19 0 obj
<<
/Contents 41 0 R
/MediaBox [
0
0
612
792
]
/Parent 4 0 R
/Resources <<
/Font <<
/F1 22 0 R
>>
/ProcSet [
/PDF
/Text
]
>>
/Type /Page
>>
endobj
%% Contents for page 1
20 0 obj
<<
/Length 21 0 R
>>
stream
BT /F1 15 Tf 72 720 Td (Original page 1) Tj ET
endstream
endobj
21 0 obj
47
endobj
22 0 obj
<<
/BaseFont /Times-Roman
/Encoding /WinAnsiEncoding
/Subtype /Type1
/Type /Font
>>
endobj
%% Contents for page 2
23 0 obj
<<
/Length 24 0 R
>>
stream
BT /F1 15 Tf 72 720 Td (Original page 2) Tj ET
endstream
endobj
24 0 obj
47
endobj
%% Contents for page 3
25 0 obj
<<
/Length 26 0 R
>>
stream
BT /F1 15 Tf 72 720 Td (Original page 3) Tj ET
endstream
endobj
26 0 obj
47
endobj
%% Contents for page 4
27 0 obj
<<
/Length 28 0 R
>>
stream
BT /F1 15 Tf 72 720 Td (Original page 4) Tj ET
endstream
endobj
28 0 obj
47
endobj
%% Contents for page 5
29 0 obj
<<
/Length 30 0 R
>>
stream
BT /F1 15 Tf 72 720 Td (Original page 5) Tj ET
endstream
endobj
30 0 obj
47
endobj
%% Contents for page 6
31 0 obj
<<
/Length 32 0 R
>>
stream
BT /F1 15 Tf 72 720 Td (Original page 6) Tj ET
endstream
endobj
32 0 obj
47
endobj
%% Contents for page 7
33 0 obj
<<
/Length 34 0 R
>>
stream
BT /F1 15 Tf 72 720 Td (Original page 7) Tj ET
endstream
endobj
34 0 obj
47
endobj
%% Contents for page 8
35 0 obj
<<
/Length 36 0 R
>>
stream
BT /F1 15 Tf 72 720 Td (Original page 8) Tj ET
endstream
endobj
36 0 obj
47
endobj
%% Contents for page 9
37 0 obj
<<
/Length 38 0 R
>>
stream
BT /F1 15 Tf 72 720 Td (Original page 9) Tj ET
endstream
endobj
38 0 obj
47
endobj
%% Contents for page 10
39 0 obj
<<
/Length 40 0 R
>>
stream
BT /F1 15 Tf 72 720 Td (Original page 10) Tj ET
endstream
endobj
40 0 obj
48
endobj
%% Contents for page 11
41 0 obj
<<
/Length 42 0 R
>>
stream
BT /F1 15 Tf 72 720 Td (Original page 11) Tj ET
endstream
endobj
42 0 obj
48
endobj
xref
0 43
0000000000 65535 f
0000000025 00000 n
0000000093 00000 n
0000000179 00000 n
0000000355 00000 n
0000000538 00000 n
0000000637 00000 n
0000000656 00000 n
0000000774 00000 n
0000000819 00000 n
0000001048 00000 n
0000001278 00000 n
0000001508 00000 n
0000001738 00000 n
0000001968 00000 n
0000002198 00000 n
0000002428 00000 n
0000002658 00000 n
0000002889 00000 n
0000003120 00000 n
0000003363 00000 n
0000003467 00000 n
0000003487 00000 n
0000003619 00000 n
0000003723 00000 n
0000003766 00000 n
0000003870 00000 n
0000003913 00000 n
0000004017 00000 n
0000004060 00000 n
0000004164 00000 n
0000004207 00000 n
0000004311 00000 n
0000004354 00000 n
0000004458 00000 n
0000004501 00000 n
0000004605 00000 n
0000004648 00000 n
0000004752 00000 n
0000004796 00000 n
0000004901 00000 n
0000004945 00000 n
0000005050 00000 n
trailer <<
/Info 2 0 R
/Root 1 0 R
/Size 43
/ID [<e032a88c7a987db6ca3abee555506ccc><31415926535897932384626433832795>]
>>
startxref
5070
%%EOF

View File

@ -0,0 +1,13 @@
page content on broken page
error: page object 5 0: object is supposed to be a stream or an array of streams but is neither
code: 5
file:
pos : 0
text: object is supposed to be a stream or an array of streams but is neither
stream data for non stream
error: operation for stream attempted on object of type dictionary
code: 2
file:
pos : 0
text: operation for stream attempted on object of type dictionary
C test 38 done

Binary file not shown.