diff --git a/ChangeLog b/ChangeLog index 530ad51c..3b663a53 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,10 @@ 2017-09-12 Jay Berkenbilt + * Improve the error message that is issued when QPDFWriter + encounters a stream that can't be decoded. In particular, mention + that the stream will be copied without filtering to avoid data + loss. + * Add new methods to the C API to correspond to new additions to QPDFWriter: - qpdf_set_compress_streams diff --git a/TODO b/TODO index f928b6de..aaeef9b1 100644 --- a/TODO +++ b/TODO @@ -3,10 +3,6 @@ Before final 7.0.0 * Create release notes - * See if the error message that gets generated when retrying a stream - without filtering after error detection can be less scary. - Communicate that the original stream data is being preserved. - Soon ==== diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 8b03eed7..ddd82b4d 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -554,11 +554,13 @@ class QPDF static bool pipeStreamData(QPDF* qpdf, int objid, int generation, qpdf_offset_t offset, size_t length, QPDFObjectHandle dict, - Pipeline* pipeline, bool suppress_warnings) + Pipeline* pipeline, + bool suppress_warnings, + bool will_retry) { return qpdf->pipeStreamData( objid, generation, offset, length, dict, pipeline, - suppress_warnings); + suppress_warnings, will_retry); } }; friend class Pipe; @@ -688,7 +690,8 @@ class QPDF qpdf_offset_t offset, size_t length, QPDFObjectHandle dict, Pipeline* pipeline, - bool suppress_warnings); + bool suppress_warnings, + bool will_retry); // For QPDFWriter: diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index 936ccc5d..79d83851 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -420,12 +420,21 @@ class QPDFObjectHandle // configured filters. QPDFWriter handles this by attempting to // get the stream data without filtering, but callers should // consider a false return value when decode_level is not - // qpdf_dl_none to be a potential loss of data. + // qpdf_dl_none to be a potential loss of data. If you intend to + // retry in that case, pass true as the value of will_retry. This + // changes the warning issued by the library to indicate that the + // operation will be retried without filtering to avoid data loss. QPDF_DLL bool pipeStreamData(Pipeline*, unsigned long encode_flags, qpdf_stream_decode_level_e decode_level, bool suppress_warnings = false); + QPDF_DLL + bool pipeStreamData(Pipeline*, + unsigned long encode_flags, + qpdf_stream_decode_level_e decode_level, + bool suppress_warnings, + bool will_retry); // Legacy pipeStreamData. This maps to the the flags-based // pipeStreamData as follows: diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index bea83c98..1ec0d6b6 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -2382,7 +2382,8 @@ QPDF::pipeStreamData(int objid, int generation, qpdf_offset_t offset, size_t length, QPDFObjectHandle stream_dict, Pipeline* pipeline, - bool suppress_warnings) + bool suppress_warnings, + bool will_retry) { bool success = false; std::vector > to_delete; @@ -2430,6 +2431,13 @@ QPDF::pipeStreamData(int objid, int generation, "error decoding stream data for object " + QUtil::int_to_string(objid) + " " + QUtil::int_to_string(generation) + ": " + e.what())); + if (will_retry) + { + warn(QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(), + "", this->m->file->getLastOffset(), + "stream will be re-processed without" + " filtering to avoid data loss")); + } } } if (! success) diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 888d9a3a..247b3b38 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -500,10 +500,20 @@ QPDFObjectHandle::pipeStreamData(Pipeline* p, unsigned long encode_flags, qpdf_stream_decode_level_e decode_level, bool suppress_warnings) +{ + return pipeStreamData( + p, encode_flags, decode_level, suppress_warnings, false); +} + +bool +QPDFObjectHandle::pipeStreamData(Pipeline* p, + unsigned long encode_flags, + qpdf_stream_decode_level_e decode_level, + bool suppress_warnings, bool will_retry) { assertStream(); return dynamic_cast(obj.getPointer())->pipeStreamData( - p, encode_flags, decode_level, suppress_warnings); + p, encode_flags, decode_level, suppress_warnings, will_retry); } bool diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index 1ce4bfb6..d4e13310 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -1623,7 +1623,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, ((filter && compress) ? qpdf_ef_compress : 0)), (filter ? (uncompress ? qpdf_dl_all : this->m->stream_decode_level) - : qpdf_dl_none)); + : qpdf_dl_none), false, (attempt == 1)); popPipelineStack(&stream_data); if (filter && (! filtered)) { diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc index 27c6b477..cf26f001 100644 --- a/libqpdf/QPDF_Stream.cc +++ b/libqpdf/QPDF_Stream.cc @@ -94,7 +94,7 @@ PointerHolder QPDF_Stream::getStreamData(qpdf_stream_decode_level_e decode_level) { Pl_Buffer buf("stream data buffer"); - if (! pipeStreamData(&buf, 0, decode_level, false)) + if (! pipeStreamData(&buf, 0, decode_level, false, false)) { throw std::logic_error("getStreamData called on unfilterable stream"); } @@ -106,7 +106,7 @@ PointerHolder QPDF_Stream::getRawStreamData() { Pl_Buffer buf("stream data buffer"); - pipeStreamData(&buf, 0, qpdf_dl_none, false); + pipeStreamData(&buf, 0, qpdf_dl_none, false, false); QTC::TC("qpdf", "QPDF_Stream getRawStreamData"); return buf.getBuffer(); } @@ -373,7 +373,7 @@ bool QPDF_Stream::pipeStreamData(Pipeline* pipeline, unsigned long encode_flags, qpdf_stream_decode_level_e decode_level, - bool suppress_warnings) + bool suppress_warnings, bool will_retry) { std::vector filters; int predictor = 1; @@ -540,7 +540,8 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, if (! QPDF::Pipe::pipeStreamData(this->qpdf, this->objid, this->generation, this->offset, this->length, this->stream_dict, pipeline, - suppress_warnings)) + suppress_warnings, + will_retry)) { filter = false; } diff --git a/libqpdf/qpdf/QPDF_Stream.hh b/libqpdf/qpdf/QPDF_Stream.hh index a2bd5d61..50771627 100644 --- a/libqpdf/qpdf/QPDF_Stream.hh +++ b/libqpdf/qpdf/QPDF_Stream.hh @@ -25,7 +25,7 @@ class QPDF_Stream: public QPDFObject bool pipeStreamData(Pipeline*, unsigned long encode_flags, qpdf_stream_decode_level_e decode_level, - bool suppress_warnings); + bool suppress_warnings, bool will_retry); PointerHolder getStreamData(qpdf_stream_decode_level_e); PointerHolder getRawStreamData(); void replaceStreamData(PointerHolder data, diff --git a/qpdf/qtest/qpdf/bad-data.out b/qpdf/qtest/qpdf/bad-data.out index 3ea1d07f..2d463759 100644 --- a/qpdf/qtest/qpdf/bad-data.out +++ b/qpdf/qtest/qpdf/bad-data.out @@ -1,2 +1,3 @@ WARNING: bad-data.pdf (file position 319): error decoding stream data for object 4 0: LZWDecoder: bad code received +WARNING: bad-data.pdf (file position 319): stream will be re-processed without filtering to avoid data loss qpdf: operation succeeded with warnings; resulting file may have some problems diff --git a/qpdf/qtest/qpdf/bad-jpeg-check.out b/qpdf/qtest/qpdf/bad-jpeg-check.out index ad7f8ecc..3607d1c1 100644 --- a/qpdf/qtest/qpdf/bad-jpeg-check.out +++ b/qpdf/qtest/qpdf/bad-jpeg-check.out @@ -3,3 +3,4 @@ PDF Version: 1.3 File is not encrypted File is not linearized WARNING: bad-jpeg.pdf (file position 735): error decoding stream data for object 6 0: Not a JPEG file: starts with 0x77 0x77 +WARNING: bad-jpeg.pdf (file position 735): stream will be re-processed without filtering to avoid data loss diff --git a/qpdf/qtest/qpdf/bad-jpeg.out b/qpdf/qtest/qpdf/bad-jpeg.out index 915060a4..4a7b6bd1 100644 --- a/qpdf/qtest/qpdf/bad-jpeg.out +++ b/qpdf/qtest/qpdf/bad-jpeg.out @@ -1,2 +1,3 @@ WARNING: bad-jpeg.pdf (file position 735): error decoding stream data for object 6 0: Not a JPEG file: starts with 0x77 0x77 +WARNING: bad-jpeg.pdf (file position 735): stream will be re-processed without filtering to avoid data loss qpdf: operation succeeded with warnings; resulting file may have some problems diff --git a/qpdf/qtest/qpdf/damaged-stream-c-check.out b/qpdf/qtest/qpdf/damaged-stream-c-check.out index 501806d2..d3d01493 100644 --- a/qpdf/qtest/qpdf/damaged-stream-c-check.out +++ b/qpdf/qtest/qpdf/damaged-stream-c-check.out @@ -3,3 +3,8 @@ warning: damaged-stream.pdf (file position 426): error decoding stream data for file: damaged-stream.pdf pos : 426 text: error decoding stream data for object 5 0: LZWDecoder: bad code received +warning: damaged-stream.pdf (file position 426): stream will be re-processed without filtering to avoid data loss + code: 5 + file: damaged-stream.pdf + pos : 426 + text: stream will be re-processed without filtering to avoid data loss diff --git a/qpdf/qtest/qpdf/damaged-stream.out b/qpdf/qtest/qpdf/damaged-stream.out index 39e7260b..9ab30730 100644 --- a/qpdf/qtest/qpdf/damaged-stream.out +++ b/qpdf/qtest/qpdf/damaged-stream.out @@ -3,3 +3,4 @@ PDF Version: 1.3 File is not encrypted File is not linearized WARNING: damaged-stream.pdf (file position 426): error decoding stream data for object 5 0: LZWDecoder: bad code received +WARNING: damaged-stream.pdf (file position 426): stream will be re-processed without filtering to avoid data loss diff --git a/qpdf/qtest/qpdf/issue-106.out b/qpdf/qtest/qpdf/issue-106.out index d93b7274..8766a31f 100644 --- a/qpdf/qtest/qpdf/issue-106.out +++ b/qpdf/qtest/qpdf/issue-106.out @@ -2,4 +2,5 @@ WARNING: issue-106.pdf: file is damaged WARNING: issue-106.pdf (file position 809): xref not found WARNING: issue-106.pdf: Attempting to reconstruct cross-reference table WARNING: issue-106.pdf (file position 965): error decoding stream data for object 8 0: stream inflate: inflate: data: incorrect data check +WARNING: issue-106.pdf (file position 965): stream will be re-processed without filtering to avoid data loss qpdf: operation succeeded with warnings; resulting file may have some problems diff --git a/qpdf/qtest/qpdf/split-content-stream-errors.out b/qpdf/qtest/qpdf/split-content-stream-errors.out index a00a78c4..81e6b8cb 100644 --- a/qpdf/qtest/qpdf/split-content-stream-errors.out +++ b/qpdf/qtest/qpdf/split-content-stream-errors.out @@ -3,6 +3,7 @@ PDF Version: 1.3 File is not encrypted File is not linearized WARNING: split-content-stream-errors.pdf (file position 557): error decoding stream data for object 6 0: LZWDecoder: bad code received +WARNING: split-content-stream-errors.pdf (file position 557): stream will be re-processed without filtering to avoid data loss WARNING: content stream: ignoring non-stream while parsing content streams WARNING: split-content-stream-errors.pdf (file position 557): error decoding stream data for object 6 0: LZWDecoder: bad code received WARNING: content stream (content stream object 6 0): errors while decoding content stream