diff --git a/TODO b/TODO index 6061c44e..d77f159f 100644 --- a/TODO +++ b/TODO @@ -33,9 +33,9 @@ Soon prioritized so that we can poll all registered filters to see whether they are capable of filtering a particular stream. - * If possible, consider adding RLE, CCITT3, CCITT4, or any other easy - filters. For some reference code that we probably can't use but - may be handy anyway, see + * If possible, consider adding CCITT3, CCITT4, or any other easy + filters. For some reference code that we probably can't use but may + be handy anyway, see http://partners.adobe.com/public/developer/ps/sdk/index_archive.html * If possible, support the following types of broken files: diff --git a/include/qpdf/Pl_RunLength.hh b/include/qpdf/Pl_RunLength.hh new file mode 100644 index 00000000..86855382 --- /dev/null +++ b/include/qpdf/Pl_RunLength.hh @@ -0,0 +1,42 @@ +// Copyright (c) 2005-2015 Jay Berkenbilt +// +// This file is part of qpdf. This software may be distributed under +// the terms of version 2 of the Artistic License which may be found +// in the source distribution. It is provided "as is" without express +// or implied warranty. + +#ifndef __PL_RUNLENGTH_HH__ +#define __PL_RUNLENGTH_HH__ + +#include + +class Pl_RunLength: public Pipeline +{ + public: + enum action_e { a_encode, a_decode }; + + QPDF_DLL + Pl_RunLength(char const* identifier, Pipeline* next, + action_e action); + QPDF_DLL + virtual ~Pl_RunLength(); + + QPDF_DLL + virtual void write(unsigned char* data, size_t len); + QPDF_DLL + virtual void finish(); + + private: + void encode(unsigned char* data, size_t len); + void decode(unsigned char* data, size_t len); + void flush_encode(); + + enum state_e { st_top, st_copying, st_run }; + + action_e action; + state_e state; + unsigned char buf[128]; + unsigned int length; +}; + +#endif // __PL_RUNLENGTH_HH__ diff --git a/libqpdf/Pl_RunLength.cc b/libqpdf/Pl_RunLength.cc new file mode 100644 index 00000000..1e8c56ca --- /dev/null +++ b/libqpdf/Pl_RunLength.cc @@ -0,0 +1,171 @@ +#include + +#include +#include + +Pl_RunLength::Pl_RunLength(char const* identifier, Pipeline* next, + action_e action) : + Pipeline(identifier, next), + action(action), + state(st_top), + length(0) +{ +} + +Pl_RunLength::~Pl_RunLength() +{ +} + +void +Pl_RunLength::write(unsigned char* data, size_t len) +{ + if (this->action == a_encode) + { + encode(data, len); + } + else + { + decode(data, len); + } +} + +void +Pl_RunLength::encode(unsigned char* data, size_t len) +{ + for (size_t i = 0; i < len; ++i) + { + if ((this->state == st_top) != (this->length <= 1)) + { + throw std::logic_error( + "Pl_RunLength::encode: state/length inconsistency"); + } + unsigned char ch = data[i]; + if ((this->length > 0) && + ((this->state == st_copying) || (this->length < 128)) && + (ch == this->buf[this->length-1])) + { + QTC::TC("libtests", "Pl_RunLength: switch to run", + (this->length == 128) ? 0 : 1); + if (this->state == st_copying) + { + --this->length; + flush_encode(); + this->buf[0] = ch; + this->length = 1; + } + this->state = st_run; + this->buf[this->length] = ch; + ++this->length; + } + else + { + if ((this->length == 128) || (this->state == st_run)) + { + flush_encode(); + } + else if (this->length > 0) + { + this->state = st_copying; + } + this->buf[this->length] = ch; + ++this->length; + } + } +} + +void +Pl_RunLength::decode(unsigned char* data, size_t len) +{ + for (size_t i = 0; i < len; ++i) + { + unsigned char ch = data[i]; + switch (this->state) + { + case st_top: + if (ch < 128) + { + // length represents remaining number of bytes to copy + this->length = 1 + ch; + this->state = st_copying; + } + else if (ch > 128) + { + // length represents number of copies of next byte + this->length = 257 - ch; + this->state = st_run; + } + else // ch == 128 + { + // EOD; stay in this state + } + break; + + case st_copying: + this->getNext()->write(&ch, 1); + if (--this->length == 0) + { + this->state = st_top; + } + break; + + case st_run: + for (unsigned int j = 0; j < this->length; ++j) + { + this->getNext()->write(&ch, 1); + } + this->state = st_top; + break; + } + } +} + +void +Pl_RunLength::flush_encode() +{ + if (this->length == 128) + { + QTC::TC("libtests", "Pl_RunLength flush full buffer", + (this->state == st_copying ? 0 : + this->state == st_run ? 1 : + -1)); + } + if (this->length == 0) + { + QTC::TC("libtests", "Pl_RunLength flush empty buffer"); + } + if (this->state == st_run) + { + if ((this->length < 2) || (this->length > 128)) + { + throw std::logic_error( + "Pl_RunLength: invalid length in flush_encode for run"); + } + unsigned char ch = static_cast(257 - this->length); + this->getNext()->write(&ch, 1); + this->getNext()->write(&this->buf[0], 1); + } + else if (this->length > 0) + { + unsigned char ch = static_cast(this->length - 1); + this->getNext()->write(&ch, 1); + this->getNext()->write(this->buf, this->length); + } + this->state = st_top; + this->length = 0; +} + +void +Pl_RunLength::finish() +{ + // When decoding, we might have read a length byte not followed by + // data, which means the stream was terminated early, but we will + // just ignore this case since this is the only sensible thing to + // do. + if (this->action == a_encode) + { + flush_encode(); + unsigned char ch = 128; + this->getNext()->write(&ch, 1); + } + this->getNext()->finish(); +} diff --git a/libqpdf/build.mk b/libqpdf/build.mk index b8cf1dbc..9a8652a6 100644 --- a/libqpdf/build.mk +++ b/libqpdf/build.mk @@ -28,6 +28,7 @@ SRCS_libqpdf = \ libqpdf/Pl_PNGFilter.cc \ libqpdf/Pl_QPDFTokenizer.cc \ libqpdf/Pl_RC4.cc \ + libqpdf/Pl_RunLength.cc \ libqpdf/Pl_SHA2.cc \ libqpdf/Pl_StdioFile.cc \ libqpdf/QPDF.cc \ diff --git a/libtests/build.mk b/libtests/build.mk index f779d211..7977c8c5 100644 --- a/libtests/build.mk +++ b/libtests/build.mk @@ -14,6 +14,7 @@ BINS_libtests = \ qutil \ random \ rc4 \ + runlength \ sha2 TARGETS_libtests = $(foreach B,$(BINS_libtests),libtests/$(OUTPUT_DIR)/$(call binname,$(B))) diff --git a/libtests/libtests.testcov b/libtests/libtests.testcov index a5fe625f..01ca9efe 100644 --- a/libtests/libtests.testcov +++ b/libtests/libtests.testcov @@ -24,3 +24,6 @@ InputSource start_chars matched but not check 0 InputSource not enough bytes 0 InputSource findLast found more than one 0 InputSource found match at buf[0] 0 +Pl_RunLength: switch to run 1 +Pl_RunLength flush full buffer 1 +Pl_RunLength flush empty buffer 0 diff --git a/libtests/qtest/runlength.test b/libtests/qtest/runlength.test new file mode 100644 index 00000000..26b6155d --- /dev/null +++ b/libtests/qtest/runlength.test @@ -0,0 +1,75 @@ +#!/usr/bin/env perl +require 5.008; +use warnings; +use strict; + +chdir("runlength") or die "chdir testdir failed: $!\n"; + +require TestDriver; + +my $td = new TestDriver('runlength'); + +cleanup(); + +my @files = ( + "01", # basic case, ends with copy + "02", # basic case, ends with run + "03", # long run run + "04", # ends with copy, length % 128 == 0 + "05", # run starts at byte 128 + "empty", # empty file + ); + +# Create this rather than committing an empty file, which always looks +# like an error. +open(F, ">empty"); +close(F); + +foreach my $f (@files) +{ + $td->runtest("encode $f", + {$td->COMMAND => "runlength -encode $f a"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); + $td->runtest("check encoded output", + {$td->FILE => "a"}, + {$td->FILE => "$f.encoded"}); + $td->runtest("decode $f.encoded", + {$td->COMMAND => "runlength -decode $f.encoded a"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); + $td->runtest("check decoded output", + {$td->FILE => "a"}, + {$td->FILE => "$f"}); +} + +concatenate("01.encoded", "02.encoded", "concat.encoded"); +concatenate("01", "02", "concat"); + +$td->runtest("decode with embedded EOD", + {$td->COMMAND => "runlength -decode concat.encoded a"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); +$td->runtest("check decoded output", + {$td->FILE => "a"}, + {$td->FILE => "concat"}); + +cleanup(); + +$td->report(2 + (4 * scalar(@files))); + +sub cleanup +{ + system("rm -f a concat concat.encoded empty"); +} + +sub concatenate +{ + my ($a, $b, $out) = @_; + open(F, ">$out"); + foreach my $f ($a, $b) + { + local $/ = undef; + open(G, "<$f"); + print F ; + close(G); + } + close(F); +} diff --git a/libtests/qtest/runlength/01 b/libtests/qtest/runlength/01 new file mode 100644 index 00000000..c39144df --- /dev/null +++ b/libtests/qtest/runlength/01 @@ -0,0 +1 @@ +wwwwwwwwwwwwwwwwwwwwwwwwwqqqqqrstv \ No newline at end of file diff --git a/libtests/qtest/runlength/01.encoded b/libtests/qtest/runlength/01.encoded new file mode 100644 index 00000000..fc26c6f6 --- /dev/null +++ b/libtests/qtest/runlength/01.encoded @@ -0,0 +1 @@ +čwüqrstv€ \ No newline at end of file diff --git a/libtests/qtest/runlength/02 b/libtests/qtest/runlength/02 new file mode 100644 index 00000000..b5d6cfc8 --- /dev/null +++ b/libtests/qtest/runlength/02 @@ -0,0 +1 @@ +wwwwwwwwwwwwwwwwwwwwwwwwwqqqqqrstvxxxxxxxxxxxxxxxxxxxxxxx \ No newline at end of file diff --git a/libtests/qtest/runlength/02.encoded b/libtests/qtest/runlength/02.encoded new file mode 100644 index 00000000..85bb323e --- /dev/null +++ b/libtests/qtest/runlength/02.encoded @@ -0,0 +1 @@ +čwüqrstvęx€ \ No newline at end of file diff --git a/libtests/qtest/runlength/03 b/libtests/qtest/runlength/03 new file mode 100644 index 00000000..b8dbdb28 Binary files /dev/null and b/libtests/qtest/runlength/03 differ diff --git a/libtests/qtest/runlength/03.encoded b/libtests/qtest/runlength/03.encoded new file mode 100644 index 00000000..0fac6b58 Binary files /dev/null and b/libtests/qtest/runlength/03.encoded differ diff --git a/libtests/qtest/runlength/04 b/libtests/qtest/runlength/04 new file mode 100644 index 00000000..9ad537ac --- /dev/null +++ b/libtests/qtest/runlength/04 @@ -0,0 +1 @@ +wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwabababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab \ No newline at end of file diff --git a/libtests/qtest/runlength/04.encoded b/libtests/qtest/runlength/04.encoded new file mode 100644 index 00000000..0c97dde2 --- /dev/null +++ b/libtests/qtest/runlength/04.encoded @@ -0,0 +1 @@ +wabababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab€ \ No newline at end of file diff --git a/libtests/qtest/runlength/05 b/libtests/qtest/runlength/05 new file mode 100644 index 00000000..979bf52b --- /dev/null +++ b/libtests/qtest/runlength/05 @@ -0,0 +1 @@ +ababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababbbbbb \ No newline at end of file diff --git a/libtests/qtest/runlength/05.encoded b/libtests/qtest/runlength/05.encoded new file mode 100644 index 00000000..2a645eef --- /dev/null +++ b/libtests/qtest/runlength/05.encoded @@ -0,0 +1 @@ +~abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababaűb€ \ No newline at end of file diff --git a/libtests/qtest/runlength/empty.encoded b/libtests/qtest/runlength/empty.encoded new file mode 100644 index 00000000..5416677b --- /dev/null +++ b/libtests/qtest/runlength/empty.encoded @@ -0,0 +1 @@ +€ \ No newline at end of file diff --git a/libtests/runlength.cc b/libtests/runlength.cc new file mode 100644 index 00000000..3dfe0563 --- /dev/null +++ b/libtests/runlength.cc @@ -0,0 +1,47 @@ +#include +#include +#include + +#include +#include +#include +#include + +int main(int argc, char* argv[]) +{ + if (argc != 4) + { + std::cerr << "Usage: runlength {-encode|-decode} infile outfile" + << std::endl; + exit(2); + } + + bool encode = (strcmp("-encode", argv[1]) == 0); + char* infilename = argv[2]; + char* outfilename = argv[3]; + + FILE* infile = QUtil::safe_fopen(infilename, "rb"); + FILE* outfile = QUtil::safe_fopen(outfilename, "wb"); + Pl_StdioFile out("stdout", outfile); + unsigned char buf[100]; + bool done = false; + Pl_RunLength rl( + "runlength", &out, + (encode ? Pl_RunLength::a_encode : Pl_RunLength::a_decode)); + while (! done) + { + size_t len = fread(buf, 1, sizeof(buf), infile); + if (len <= 0) + { + done = true; + } + else + { + rl.write(buf, len); + } + } + rl.finish(); + fclose(infile); + fclose(outfile); + return 0; +}