Implement Pl_RunLength pipeline

This commit is contained in:
Jay Berkenbilt 2017-08-16 06:26:31 -04:00
parent e0d1cd1f4b
commit 2d2f619665
19 changed files with 352 additions and 3 deletions

6
TODO
View File

@ -33,9 +33,9 @@ Soon
prioritized so that we can poll all registered filters to see prioritized so that we can poll all registered filters to see
whether they are capable of filtering a particular stream. whether they are capable of filtering a particular stream.
* If possible, consider adding RLE, CCITT3, CCITT4, or any other easy * If possible, consider adding CCITT3, CCITT4, or any other easy
filters. For some reference code that we probably can't use but filters. For some reference code that we probably can't use but may
may be handy anyway, see be handy anyway, see
http://partners.adobe.com/public/developer/ps/sdk/index_archive.html http://partners.adobe.com/public/developer/ps/sdk/index_archive.html
* If possible, support the following types of broken files: * If possible, support the following types of broken files:

View File

@ -0,0 +1,42 @@
// Copyright (c) 2005-2015 Jay Berkenbilt
//
// This file is part of qpdf. This software may be distributed under
// the terms of version 2 of the Artistic License which may be found
// in the source distribution. It is provided "as is" without express
// or implied warranty.
#ifndef __PL_RUNLENGTH_HH__
#define __PL_RUNLENGTH_HH__
#include <qpdf/Pipeline.hh>
class Pl_RunLength: public Pipeline
{
public:
enum action_e { a_encode, a_decode };
QPDF_DLL
Pl_RunLength(char const* identifier, Pipeline* next,
action_e action);
QPDF_DLL
virtual ~Pl_RunLength();
QPDF_DLL
virtual void write(unsigned char* data, size_t len);
QPDF_DLL
virtual void finish();
private:
void encode(unsigned char* data, size_t len);
void decode(unsigned char* data, size_t len);
void flush_encode();
enum state_e { st_top, st_copying, st_run };
action_e action;
state_e state;
unsigned char buf[128];
unsigned int length;
};
#endif // __PL_RUNLENGTH_HH__

171
libqpdf/Pl_RunLength.cc Normal file
View File

@ -0,0 +1,171 @@
#include <qpdf/Pl_RunLength.hh>
#include <qpdf/QUtil.hh>
#include <qpdf/QTC.hh>
Pl_RunLength::Pl_RunLength(char const* identifier, Pipeline* next,
action_e action) :
Pipeline(identifier, next),
action(action),
state(st_top),
length(0)
{
}
Pl_RunLength::~Pl_RunLength()
{
}
void
Pl_RunLength::write(unsigned char* data, size_t len)
{
if (this->action == a_encode)
{
encode(data, len);
}
else
{
decode(data, len);
}
}
void
Pl_RunLength::encode(unsigned char* data, size_t len)
{
for (size_t i = 0; i < len; ++i)
{
if ((this->state == st_top) != (this->length <= 1))
{
throw std::logic_error(
"Pl_RunLength::encode: state/length inconsistency");
}
unsigned char ch = data[i];
if ((this->length > 0) &&
((this->state == st_copying) || (this->length < 128)) &&
(ch == this->buf[this->length-1]))
{
QTC::TC("libtests", "Pl_RunLength: switch to run",
(this->length == 128) ? 0 : 1);
if (this->state == st_copying)
{
--this->length;
flush_encode();
this->buf[0] = ch;
this->length = 1;
}
this->state = st_run;
this->buf[this->length] = ch;
++this->length;
}
else
{
if ((this->length == 128) || (this->state == st_run))
{
flush_encode();
}
else if (this->length > 0)
{
this->state = st_copying;
}
this->buf[this->length] = ch;
++this->length;
}
}
}
void
Pl_RunLength::decode(unsigned char* data, size_t len)
{
for (size_t i = 0; i < len; ++i)
{
unsigned char ch = data[i];
switch (this->state)
{
case st_top:
if (ch < 128)
{
// length represents remaining number of bytes to copy
this->length = 1 + ch;
this->state = st_copying;
}
else if (ch > 128)
{
// length represents number of copies of next byte
this->length = 257 - ch;
this->state = st_run;
}
else // ch == 128
{
// EOD; stay in this state
}
break;
case st_copying:
this->getNext()->write(&ch, 1);
if (--this->length == 0)
{
this->state = st_top;
}
break;
case st_run:
for (unsigned int j = 0; j < this->length; ++j)
{
this->getNext()->write(&ch, 1);
}
this->state = st_top;
break;
}
}
}
void
Pl_RunLength::flush_encode()
{
if (this->length == 128)
{
QTC::TC("libtests", "Pl_RunLength flush full buffer",
(this->state == st_copying ? 0 :
this->state == st_run ? 1 :
-1));
}
if (this->length == 0)
{
QTC::TC("libtests", "Pl_RunLength flush empty buffer");
}
if (this->state == st_run)
{
if ((this->length < 2) || (this->length > 128))
{
throw std::logic_error(
"Pl_RunLength: invalid length in flush_encode for run");
}
unsigned char ch = static_cast<unsigned char>(257 - this->length);
this->getNext()->write(&ch, 1);
this->getNext()->write(&this->buf[0], 1);
}
else if (this->length > 0)
{
unsigned char ch = static_cast<unsigned char>(this->length - 1);
this->getNext()->write(&ch, 1);
this->getNext()->write(this->buf, this->length);
}
this->state = st_top;
this->length = 0;
}
void
Pl_RunLength::finish()
{
// When decoding, we might have read a length byte not followed by
// data, which means the stream was terminated early, but we will
// just ignore this case since this is the only sensible thing to
// do.
if (this->action == a_encode)
{
flush_encode();
unsigned char ch = 128;
this->getNext()->write(&ch, 1);
}
this->getNext()->finish();
}

View File

@ -28,6 +28,7 @@ SRCS_libqpdf = \
libqpdf/Pl_PNGFilter.cc \ libqpdf/Pl_PNGFilter.cc \
libqpdf/Pl_QPDFTokenizer.cc \ libqpdf/Pl_QPDFTokenizer.cc \
libqpdf/Pl_RC4.cc \ libqpdf/Pl_RC4.cc \
libqpdf/Pl_RunLength.cc \
libqpdf/Pl_SHA2.cc \ libqpdf/Pl_SHA2.cc \
libqpdf/Pl_StdioFile.cc \ libqpdf/Pl_StdioFile.cc \
libqpdf/QPDF.cc \ libqpdf/QPDF.cc \

View File

@ -14,6 +14,7 @@ BINS_libtests = \
qutil \ qutil \
random \ random \
rc4 \ rc4 \
runlength \
sha2 sha2
TARGETS_libtests = $(foreach B,$(BINS_libtests),libtests/$(OUTPUT_DIR)/$(call binname,$(B))) TARGETS_libtests = $(foreach B,$(BINS_libtests),libtests/$(OUTPUT_DIR)/$(call binname,$(B)))

View File

@ -24,3 +24,6 @@ InputSource start_chars matched but not check 0
InputSource not enough bytes 0 InputSource not enough bytes 0
InputSource findLast found more than one 0 InputSource findLast found more than one 0
InputSource found match at buf[0] 0 InputSource found match at buf[0] 0
Pl_RunLength: switch to run 1
Pl_RunLength flush full buffer 1
Pl_RunLength flush empty buffer 0

View File

@ -0,0 +1,75 @@
#!/usr/bin/env perl
require 5.008;
use warnings;
use strict;
chdir("runlength") or die "chdir testdir failed: $!\n";
require TestDriver;
my $td = new TestDriver('runlength');
cleanup();
my @files = (
"01", # basic case, ends with copy
"02", # basic case, ends with run
"03", # long run run
"04", # ends with copy, length % 128 == 0
"05", # run starts at byte 128
"empty", # empty file
);
# Create this rather than committing an empty file, which always looks
# like an error.
open(F, ">empty");
close(F);
foreach my $f (@files)
{
$td->runtest("encode $f",
{$td->COMMAND => "runlength -encode $f a"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check encoded output",
{$td->FILE => "a"},
{$td->FILE => "$f.encoded"});
$td->runtest("decode $f.encoded",
{$td->COMMAND => "runlength -decode $f.encoded a"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check decoded output",
{$td->FILE => "a"},
{$td->FILE => "$f"});
}
concatenate("01.encoded", "02.encoded", "concat.encoded");
concatenate("01", "02", "concat");
$td->runtest("decode with embedded EOD",
{$td->COMMAND => "runlength -decode concat.encoded a"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check decoded output",
{$td->FILE => "a"},
{$td->FILE => "concat"});
cleanup();
$td->report(2 + (4 * scalar(@files)));
sub cleanup
{
system("rm -f a concat concat.encoded empty");
}
sub concatenate
{
my ($a, $b, $out) = @_;
open(F, ">$out");
foreach my $f ($a, $b)
{
local $/ = undef;
open(G, "<$f");
print F <G>;
close(G);
}
close(F);
}

View File

@ -0,0 +1 @@
wwwwwwwwwwwwwwwwwwwwwwwwwqqqqqrstv

View File

@ -0,0 +1 @@
鑧黴rstv€

View File

@ -0,0 +1 @@
wwwwwwwwwwwwwwwwwwwwwwwwwqqqqqrstvxxxxxxxxxxxxxxxxxxxxxxx

View File

@ -0,0 +1 @@
鑧黴rstv陎€

BIN
libtests/qtest/runlength/03 Normal file

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1 @@
wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwabababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab

View File

@ -0,0 +1 @@
abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab€

View File

@ -0,0 +1 @@
ababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababbbbbb

View File

@ -0,0 +1 @@
~abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababa鸼€

View File

@ -0,0 +1 @@
<EFBFBD>

47
libtests/runlength.cc Normal file
View File

@ -0,0 +1,47 @@
#include <qpdf/Pl_RunLength.hh>
#include <qpdf/Pl_StdioFile.hh>
#include <qpdf/QUtil.hh>
#include <stdio.h>
#include <string.h>
#include <iostream>
#include <stdlib.h>
int main(int argc, char* argv[])
{
if (argc != 4)
{
std::cerr << "Usage: runlength {-encode|-decode} infile outfile"
<< std::endl;
exit(2);
}
bool encode = (strcmp("-encode", argv[1]) == 0);
char* infilename = argv[2];
char* outfilename = argv[3];
FILE* infile = QUtil::safe_fopen(infilename, "rb");
FILE* outfile = QUtil::safe_fopen(outfilename, "wb");
Pl_StdioFile out("stdout", outfile);
unsigned char buf[100];
bool done = false;
Pl_RunLength rl(
"runlength", &out,
(encode ? Pl_RunLength::a_encode : Pl_RunLength::a_decode));
while (! done)
{
size_t len = fread(buf, 1, sizeof(buf), infile);
if (len <= 0)
{
done = true;
}
else
{
rl.write(buf, len);
}
}
rl.finish();
fclose(infile);
fclose(outfile);
return 0;
}