2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-12-22 10:58:58 +00:00

Implement Pl_RunLength pipeline

This commit is contained in:
Jay Berkenbilt 2017-08-16 06:26:31 -04:00
parent e0d1cd1f4b
commit 2d2f619665
19 changed files with 352 additions and 3 deletions

6
TODO
View File

@ -33,9 +33,9 @@ Soon
prioritized so that we can poll all registered filters to see
whether they are capable of filtering a particular stream.
* If possible, consider adding RLE, CCITT3, CCITT4, or any other easy
filters. For some reference code that we probably can't use but
may be handy anyway, see
* If possible, consider adding CCITT3, CCITT4, or any other easy
filters. For some reference code that we probably can't use but may
be handy anyway, see
http://partners.adobe.com/public/developer/ps/sdk/index_archive.html
* If possible, support the following types of broken files:

View File

@ -0,0 +1,42 @@
// Copyright (c) 2005-2015 Jay Berkenbilt
//
// This file is part of qpdf. This software may be distributed under
// the terms of version 2 of the Artistic License which may be found
// in the source distribution. It is provided "as is" without express
// or implied warranty.
#ifndef __PL_RUNLENGTH_HH__
#define __PL_RUNLENGTH_HH__
#include <qpdf/Pipeline.hh>
class Pl_RunLength: public Pipeline
{
public:
enum action_e { a_encode, a_decode };
QPDF_DLL
Pl_RunLength(char const* identifier, Pipeline* next,
action_e action);
QPDF_DLL
virtual ~Pl_RunLength();
QPDF_DLL
virtual void write(unsigned char* data, size_t len);
QPDF_DLL
virtual void finish();
private:
void encode(unsigned char* data, size_t len);
void decode(unsigned char* data, size_t len);
void flush_encode();
enum state_e { st_top, st_copying, st_run };
action_e action;
state_e state;
unsigned char buf[128];
unsigned int length;
};
#endif // __PL_RUNLENGTH_HH__

171
libqpdf/Pl_RunLength.cc Normal file
View File

@ -0,0 +1,171 @@
#include <qpdf/Pl_RunLength.hh>
#include <qpdf/QUtil.hh>
#include <qpdf/QTC.hh>
Pl_RunLength::Pl_RunLength(char const* identifier, Pipeline* next,
action_e action) :
Pipeline(identifier, next),
action(action),
state(st_top),
length(0)
{
}
Pl_RunLength::~Pl_RunLength()
{
}
void
Pl_RunLength::write(unsigned char* data, size_t len)
{
if (this->action == a_encode)
{
encode(data, len);
}
else
{
decode(data, len);
}
}
void
Pl_RunLength::encode(unsigned char* data, size_t len)
{
for (size_t i = 0; i < len; ++i)
{
if ((this->state == st_top) != (this->length <= 1))
{
throw std::logic_error(
"Pl_RunLength::encode: state/length inconsistency");
}
unsigned char ch = data[i];
if ((this->length > 0) &&
((this->state == st_copying) || (this->length < 128)) &&
(ch == this->buf[this->length-1]))
{
QTC::TC("libtests", "Pl_RunLength: switch to run",
(this->length == 128) ? 0 : 1);
if (this->state == st_copying)
{
--this->length;
flush_encode();
this->buf[0] = ch;
this->length = 1;
}
this->state = st_run;
this->buf[this->length] = ch;
++this->length;
}
else
{
if ((this->length == 128) || (this->state == st_run))
{
flush_encode();
}
else if (this->length > 0)
{
this->state = st_copying;
}
this->buf[this->length] = ch;
++this->length;
}
}
}
void
Pl_RunLength::decode(unsigned char* data, size_t len)
{
for (size_t i = 0; i < len; ++i)
{
unsigned char ch = data[i];
switch (this->state)
{
case st_top:
if (ch < 128)
{
// length represents remaining number of bytes to copy
this->length = 1 + ch;
this->state = st_copying;
}
else if (ch > 128)
{
// length represents number of copies of next byte
this->length = 257 - ch;
this->state = st_run;
}
else // ch == 128
{
// EOD; stay in this state
}
break;
case st_copying:
this->getNext()->write(&ch, 1);
if (--this->length == 0)
{
this->state = st_top;
}
break;
case st_run:
for (unsigned int j = 0; j < this->length; ++j)
{
this->getNext()->write(&ch, 1);
}
this->state = st_top;
break;
}
}
}
void
Pl_RunLength::flush_encode()
{
if (this->length == 128)
{
QTC::TC("libtests", "Pl_RunLength flush full buffer",
(this->state == st_copying ? 0 :
this->state == st_run ? 1 :
-1));
}
if (this->length == 0)
{
QTC::TC("libtests", "Pl_RunLength flush empty buffer");
}
if (this->state == st_run)
{
if ((this->length < 2) || (this->length > 128))
{
throw std::logic_error(
"Pl_RunLength: invalid length in flush_encode for run");
}
unsigned char ch = static_cast<unsigned char>(257 - this->length);
this->getNext()->write(&ch, 1);
this->getNext()->write(&this->buf[0], 1);
}
else if (this->length > 0)
{
unsigned char ch = static_cast<unsigned char>(this->length - 1);
this->getNext()->write(&ch, 1);
this->getNext()->write(this->buf, this->length);
}
this->state = st_top;
this->length = 0;
}
void
Pl_RunLength::finish()
{
// When decoding, we might have read a length byte not followed by
// data, which means the stream was terminated early, but we will
// just ignore this case since this is the only sensible thing to
// do.
if (this->action == a_encode)
{
flush_encode();
unsigned char ch = 128;
this->getNext()->write(&ch, 1);
}
this->getNext()->finish();
}

View File

@ -28,6 +28,7 @@ SRCS_libqpdf = \
libqpdf/Pl_PNGFilter.cc \
libqpdf/Pl_QPDFTokenizer.cc \
libqpdf/Pl_RC4.cc \
libqpdf/Pl_RunLength.cc \
libqpdf/Pl_SHA2.cc \
libqpdf/Pl_StdioFile.cc \
libqpdf/QPDF.cc \

View File

@ -14,6 +14,7 @@ BINS_libtests = \
qutil \
random \
rc4 \
runlength \
sha2
TARGETS_libtests = $(foreach B,$(BINS_libtests),libtests/$(OUTPUT_DIR)/$(call binname,$(B)))

View File

@ -24,3 +24,6 @@ InputSource start_chars matched but not check 0
InputSource not enough bytes 0
InputSource findLast found more than one 0
InputSource found match at buf[0] 0
Pl_RunLength: switch to run 1
Pl_RunLength flush full buffer 1
Pl_RunLength flush empty buffer 0

View File

@ -0,0 +1,75 @@
#!/usr/bin/env perl
require 5.008;
use warnings;
use strict;
chdir("runlength") or die "chdir testdir failed: $!\n";
require TestDriver;
my $td = new TestDriver('runlength');
cleanup();
my @files = (
"01", # basic case, ends with copy
"02", # basic case, ends with run
"03", # long run run
"04", # ends with copy, length % 128 == 0
"05", # run starts at byte 128
"empty", # empty file
);
# Create this rather than committing an empty file, which always looks
# like an error.
open(F, ">empty");
close(F);
foreach my $f (@files)
{
$td->runtest("encode $f",
{$td->COMMAND => "runlength -encode $f a"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check encoded output",
{$td->FILE => "a"},
{$td->FILE => "$f.encoded"});
$td->runtest("decode $f.encoded",
{$td->COMMAND => "runlength -decode $f.encoded a"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check decoded output",
{$td->FILE => "a"},
{$td->FILE => "$f"});
}
concatenate("01.encoded", "02.encoded", "concat.encoded");
concatenate("01", "02", "concat");
$td->runtest("decode with embedded EOD",
{$td->COMMAND => "runlength -decode concat.encoded a"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check decoded output",
{$td->FILE => "a"},
{$td->FILE => "concat"});
cleanup();
$td->report(2 + (4 * scalar(@files)));
sub cleanup
{
system("rm -f a concat concat.encoded empty");
}
sub concatenate
{
my ($a, $b, $out) = @_;
open(F, ">$out");
foreach my $f ($a, $b)
{
local $/ = undef;
open(G, "<$f");
print F <G>;
close(G);
}
close(F);
}

View File

@ -0,0 +1 @@
wwwwwwwwwwwwwwwwwwwwwwwwwqqqqqrstv

View File

@ -0,0 +1 @@
鑧黴rstv€

View File

@ -0,0 +1 @@
wwwwwwwwwwwwwwwwwwwwwwwwwqqqqqrstvxxxxxxxxxxxxxxxxxxxxxxx

View File

@ -0,0 +1 @@
鑧黴rstv陎€

BIN
libtests/qtest/runlength/03 Normal file

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1 @@
wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwabababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab

View File

@ -0,0 +1 @@
abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab€

View File

@ -0,0 +1 @@
ababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababbbbbb

View File

@ -0,0 +1 @@
~abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababa鸼€

View File

@ -0,0 +1 @@
<EFBFBD>

47
libtests/runlength.cc Normal file
View File

@ -0,0 +1,47 @@
#include <qpdf/Pl_RunLength.hh>
#include <qpdf/Pl_StdioFile.hh>
#include <qpdf/QUtil.hh>
#include <stdio.h>
#include <string.h>
#include <iostream>
#include <stdlib.h>
int main(int argc, char* argv[])
{
if (argc != 4)
{
std::cerr << "Usage: runlength {-encode|-decode} infile outfile"
<< std::endl;
exit(2);
}
bool encode = (strcmp("-encode", argv[1]) == 0);
char* infilename = argv[2];
char* outfilename = argv[3];
FILE* infile = QUtil::safe_fopen(infilename, "rb");
FILE* outfile = QUtil::safe_fopen(outfilename, "wb");
Pl_StdioFile out("stdout", outfile);
unsigned char buf[100];
bool done = false;
Pl_RunLength rl(
"runlength", &out,
(encode ? Pl_RunLength::a_encode : Pl_RunLength::a_decode));
while (! done)
{
size_t len = fread(buf, 1, sizeof(buf), infile);
if (len <= 0)
{
done = true;
}
else
{
rl.write(buf, len);
}
}
rl.finish();
fclose(infile);
fclose(outfile);
return 0;
}