2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-12-22 10:58:58 +00:00

Add internal Pl_Base64

Bidirectional base64; will be used by JSON v2.
This commit is contained in:
Jay Berkenbilt 2022-05-02 15:46:07 -04:00
parent f07284da18
commit 3d9bac43da
17 changed files with 371 additions and 1 deletions

1
TODO
View File

@ -45,7 +45,6 @@ notes from 5/2:
Need new pipelines: Need new pipelines:
* Pl_OStream(std::ostream) with semantics like Pl_StdioFile * Pl_OStream(std::ostream) with semantics like Pl_StdioFile
* Pl_String to std::string with semantics like Pl_Buffer * Pl_String to std::string with semantics like Pl_Buffer
* Pl_Base64
New Pipeline methods: New Pipeline methods:
* writeString(std::string const&) * writeString(std::string const&)

View File

@ -35,6 +35,7 @@ set(libqpdf_SOURCES
Pl_AES_PDF.cc Pl_AES_PDF.cc
Pl_ASCII85Decoder.cc Pl_ASCII85Decoder.cc
Pl_ASCIIHexDecoder.cc Pl_ASCIIHexDecoder.cc
Pl_Base64.cc
Pl_Buffer.cc Pl_Buffer.cc
Pl_Concatenate.cc Pl_Concatenate.cc
Pl_Count.cc Pl_Count.cc

191
libqpdf/Pl_Base64.cc Normal file
View File

@ -0,0 +1,191 @@
#include <qpdf/Pl_Base64.hh>
#include <qpdf/QIntC.hh>
#include <qpdf/QUtil.hh>
#include <algorithm>
#include <cstring>
#include <stdexcept>
static char
to_c(unsigned int ch)
{
return static_cast<char>(ch);
}
static unsigned char
to_uc(int ch)
{
return static_cast<unsigned char>(ch);
}
static int
to_i(int i)
{
return static_cast<int>(i);
}
Pl_Base64::Pl_Base64(char const* identifier, Pipeline* next, action_e action) :
Pipeline(identifier, next),
action(action),
pos(0),
end_of_data(false),
finished(false)
{
reset();
}
void
Pl_Base64::write(unsigned char* data, size_t len)
{
if (finished) {
throw std::logic_error("Pl_Base64 used after finished");
}
if (this->action == a_decode) {
decode(data, len);
} else {
encode(data, len);
}
}
void
Pl_Base64::decode(unsigned char* data, size_t len)
{
unsigned char* p = data;
while (len > 0) {
if (!QUtil::is_space(to_c(*p))) {
this->buf[this->pos++] = *p;
if (this->pos == 4) {
flush();
}
}
++p;
--len;
}
}
void
Pl_Base64::encode(unsigned char* data, size_t len)
{
unsigned char* p = data;
while (len > 0) {
this->buf[this->pos++] = *p;
if (this->pos == 3) {
flush();
}
++p;
--len;
}
}
void
Pl_Base64::flush()
{
if (this->action == a_decode) {
flush_decode();
} else {
flush_encode();
}
reset();
}
void
Pl_Base64::flush_decode()
{
if (this->end_of_data) {
throw std::runtime_error(
getIdentifier() + ": base64 decode: data follows pad characters");
}
int pad = 0;
int shift = 18;
int outval = 0;
for (size_t i = 0; i < 4; ++i) {
int v = 0;
char ch = to_c(this->buf[i]);
if ((ch >= 'A') && (ch <= 'Z')) {
v = ch - 'A';
} else if ((ch >= 'a') && (ch <= 'z')) {
v = ch - 'a' + 26;
} else if ((ch >= '0') && (ch <= '9')) {
v = ch - '0' + 52;
} else if ((ch == '+') || (ch == '-')) {
v = 62;
} else if ((ch == '/') || (ch == '_')) {
v = 63;
} else if (
(ch == '=') && ((i == 3) || ((i == 2) && (this->buf[3] == '=')))) {
++pad;
this->end_of_data = true;
v = 0;
} else {
throw std::runtime_error(
getIdentifier() + ": base64 decode: invalid input");
}
outval |= v << shift;
shift -= 6;
}
unsigned char out[3] = {
to_uc(outval >> 16),
to_uc(0xff & (outval >> 8)),
to_uc(0xff & outval),
};
getNext()->write(out, QIntC::to_size(3 - pad));
}
void
Pl_Base64::flush_encode()
{
int outval = ((this->buf[0] << 16) | (this->buf[1] << 8) | (this->buf[2]));
unsigned char out[4] = {
to_uc(outval >> 18),
to_uc(0x3f & (outval >> 12)),
to_uc(0x3f & (outval >> 6)),
to_uc(0x3f & outval),
};
for (size_t i = 0; i < 4; ++i) {
int ch = to_i(out[i]);
if (ch < 26) {
ch += 'A';
} else if (ch < 52) {
ch -= 26;
ch += 'a';
} else if (ch < 62) {
ch -= 52;
ch += '0';
} else if (ch == 62) {
ch = '+';
} else if (ch == 63) {
ch = '/';
}
out[i] = to_uc(ch);
}
for (size_t i = 0; i < 3 - this->pos; ++i) {
out[3 - i] = '=';
}
getNext()->write(out, 4);
}
void
Pl_Base64::finish()
{
if (this->pos > 0) {
if (finished) {
throw std::logic_error("Pl_Base64 used after finished");
}
if (this->action == a_decode) {
for (size_t i = this->pos; i < 4; ++i) {
this->buf[i] = '=';
}
}
flush();
}
this->finished = true;
getNext()->finish();
}
void
Pl_Base64::reset()
{
this->pos = 0;
memset(buf, 0, 4);
}

30
libqpdf/qpdf/Pl_Base64.hh Normal file
View File

@ -0,0 +1,30 @@
#ifndef PL_BASE64_HH
#define PL_BASE64_HH
#include <qpdf/Pipeline.hh>
class Pl_Base64: public Pipeline
{
public:
enum action_e { a_encode, a_decode };
Pl_Base64(char const* identifier, Pipeline* next, action_e);
virtual ~Pl_Base64() = default;
virtual void write(unsigned char* buf, size_t len) override;
virtual void finish() override;
private:
void decode(unsigned char* buf, size_t len);
void encode(unsigned char* buf, size_t len);
void flush();
void flush_decode();
void flush_encode();
void reset();
action_e action;
unsigned char buf[4];
size_t pos;
bool end_of_data;
bool finished;
};
#endif // PL_BASE64_HH

View File

@ -3,6 +3,7 @@ set(TEST_PROGRAMS
aes aes
arg_parser arg_parser
ascii85 ascii85
base64
bits bits
buffer buffer
closed_file_input_source closed_file_input_source

81
libtests/base64.cc Normal file
View File

@ -0,0 +1,81 @@
#include <qpdf/Pl_Base64.hh>
#include <qpdf/Pl_StdioFile.hh>
#include <qpdf/QUtil.hh>
#include <cassert>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <stdexcept>
static bool
write_some(FILE* f, size_t bytes, Pipeline* p)
{
unsigned char buf[1000];
assert(bytes <= sizeof(buf));
size_t len = fread(buf, 1, bytes, f);
if (len > 0) {
p->write(buf, len);
}
if (len < bytes) {
if (ferror(f)) {
std::cerr << "error reading file" << std::endl;
exit(2);
}
p->finish();
return false;
}
return (len == bytes);
}
static void
usage()
{
std::cerr << "Usage: base64 encode|decode" << std::endl;
exit(2);
}
int
main(int argc, char* argv[])
{
if (argc != 2) {
usage();
}
QUtil::binary_stdout();
QUtil::binary_stdin();
Pl_Base64::action_e action = Pl_Base64::a_decode;
if (strcmp(argv[1], "encode") == 0) {
action = Pl_Base64::a_encode;
} else if (strcmp(argv[1], "decode") != 0) {
usage();
}
try {
Pl_StdioFile out("stdout", stdout);
Pl_Base64 decode("decode", &out, action);
// The comments are "n: n%4 n%3", where n is the number of
// bytes read at the end of the call, and are there to
// indicate that we are reading in chunks that exercise
// various boundary conditions around subsequent writes and
// the state of buf and pos. There are some writes that don't
// do flush at all, some that call flush multiple times, and
// some that start in the middle and do flush, and this is
// true for both encode and decode.
if (write_some(stdin, 1, &decode) && // 1: 1 1
write_some(stdin, 4, &decode) && // 5: 1 2
write_some(stdin, 2, &decode) && // 7: 3 1
write_some(stdin, 2, &decode) && // 9: 1 0
write_some(stdin, 7, &decode) && // 16: 0 1
write_some(stdin, 1, &decode) && // 17: 1 2
write_some(stdin, 9, &decode) && // 26: 2 2
write_some(stdin, 2, &decode)) { // 28: 0 1
while (write_some(stdin, 1000, &decode)) {
}
}
} catch (std::exception& e) {
std::cout << "exception: " << e.what() << std::endl;
exit(2);
}
return 0;
}

View File

@ -0,0 +1,58 @@
#!/usr/bin/env perl
require 5.008;
use warnings;
use strict;
chdir("base64") or die "chdir testdir failed: $!\n";
require TestDriver;
my $td = new TestDriver('base64');
cleanup();
# ** Do not use normalize newlines on these tests. **
my $n = 5;
for (my $i = 1; $i <= $n; ++$i)
{
$td->runtest("encode $i",
{$td->COMMAND => "base64 encode < $i.dec"},
{$td->FILE => "$i.enc", $td->EXIT_STATUS => 0});
$td->runtest("code $i",
{$td->COMMAND => "base64 decode < $i.enc"},
{$td->FILE => "$i.dec", $td->EXIT_STATUS => 0});
}
$td->runtest("non-zero discard bits",
{$td->COMMAND => "echo c2FsYWR= | base64 decode"},
{$td->STRING => "salad", $td->EXIT_STATUS => 0});
$td->runtest("write with +/",
{$td->COMMAND => "echo +/== | base64 decode > a"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("write with -_",
{$td->COMMAND => "echo -_== | base64 decode > b"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("interchangeability of +/ and -_",
{$td->FILE => "a"},
{$td->FILE => "b"});
$td->runtest("invalid characters",
{$td->COMMAND => "echo aaaaa! | base64 decode"},
{$td->REGEXP => ".*invalid input.*", $td->EXIT_STATUS => 2});
$td->runtest("invalid pad",
{$td->COMMAND => "echo a= | base64 decode"},
{$td->REGEXP => ".*invalid input.*", $td->EXIT_STATUS => 2});
$td->runtest("data after pad",
{$td->COMMAND => "echo aa==potato | base64 decode"},
{$td->REGEXP => ".*data follows pad characters.*",
$td->EXIT_STATUS => 2});
cleanup();
$td->report(7 + (2 * $n));
sub cleanup
{
unlink('a', 'b');
}

View File

@ -0,0 +1 @@
123

View File

@ -0,0 +1 @@
MTIz

View File

@ -0,0 +1 @@
1234

View File

@ -0,0 +1 @@
MTIzNA==

View File

@ -0,0 +1 @@
This file has a multiple of four bytes and is longer than four bytes...

View File

@ -0,0 +1 @@
VGhpcyBmaWxlIGhhcyBhIG11bHRpcGxlIG9mIGZvdXIgYnl0ZXMgYW5kIGlzIGxvbmdlciB0aGFuIGZvdXIgYnl0ZXMuLi4K

View File

@ -0,0 +1 @@
This file has a non-multiple of four bytes and is longer than four bytes.

View File

@ -0,0 +1 @@
VGhpcyBmaWxlIGhhcyBhIG5vbi1tdWx0aXBsZSBvZiBmb3VyIGJ5dGVzIGFuZCBpcyBsb25nZXIgdGhhbiBmb3VyIGJ5dGVzLgo=

BIN
libtests/qtest/base64/5.dec Normal file

Binary file not shown.

View File

@ -0,0 +1 @@
VGhpcyBmaWxlIGFzIG9uZSBvZiBldmVyeSBieXRlIGluIGl0LiBOb3QgdGhhdCBpdCByZWFsbHkgbWFrZXMgYW55CmRpZmZlcmVuY2UsIGJ1dCB3aHkgbm90LgoKAAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn+AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq+wsbKztLW2t7i5uru8vb6/wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX2Nna29zd3t/g4eLj5OXm5+jp6uvs7e7v8PHy8/T19vf4+fr7/P3+/wo=