mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 10:58:58 +00:00
Add internal Pl_Base64
Bidirectional base64; will be used by JSON v2.
This commit is contained in:
parent
f07284da18
commit
3d9bac43da
1
TODO
1
TODO
@ -45,7 +45,6 @@ notes from 5/2:
|
|||||||
Need new pipelines:
|
Need new pipelines:
|
||||||
* Pl_OStream(std::ostream) with semantics like Pl_StdioFile
|
* Pl_OStream(std::ostream) with semantics like Pl_StdioFile
|
||||||
* Pl_String to std::string with semantics like Pl_Buffer
|
* Pl_String to std::string with semantics like Pl_Buffer
|
||||||
* Pl_Base64
|
|
||||||
|
|
||||||
New Pipeline methods:
|
New Pipeline methods:
|
||||||
* writeString(std::string const&)
|
* writeString(std::string const&)
|
||||||
|
@ -35,6 +35,7 @@ set(libqpdf_SOURCES
|
|||||||
Pl_AES_PDF.cc
|
Pl_AES_PDF.cc
|
||||||
Pl_ASCII85Decoder.cc
|
Pl_ASCII85Decoder.cc
|
||||||
Pl_ASCIIHexDecoder.cc
|
Pl_ASCIIHexDecoder.cc
|
||||||
|
Pl_Base64.cc
|
||||||
Pl_Buffer.cc
|
Pl_Buffer.cc
|
||||||
Pl_Concatenate.cc
|
Pl_Concatenate.cc
|
||||||
Pl_Count.cc
|
Pl_Count.cc
|
||||||
|
191
libqpdf/Pl_Base64.cc
Normal file
191
libqpdf/Pl_Base64.cc
Normal file
@ -0,0 +1,191 @@
|
|||||||
|
#include <qpdf/Pl_Base64.hh>
|
||||||
|
|
||||||
|
#include <qpdf/QIntC.hh>
|
||||||
|
#include <qpdf/QUtil.hh>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cstring>
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
|
static char
|
||||||
|
to_c(unsigned int ch)
|
||||||
|
{
|
||||||
|
return static_cast<char>(ch);
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned char
|
||||||
|
to_uc(int ch)
|
||||||
|
{
|
||||||
|
return static_cast<unsigned char>(ch);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
to_i(int i)
|
||||||
|
{
|
||||||
|
return static_cast<int>(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
Pl_Base64::Pl_Base64(char const* identifier, Pipeline* next, action_e action) :
|
||||||
|
Pipeline(identifier, next),
|
||||||
|
action(action),
|
||||||
|
pos(0),
|
||||||
|
end_of_data(false),
|
||||||
|
finished(false)
|
||||||
|
{
|
||||||
|
reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Pl_Base64::write(unsigned char* data, size_t len)
|
||||||
|
{
|
||||||
|
if (finished) {
|
||||||
|
throw std::logic_error("Pl_Base64 used after finished");
|
||||||
|
}
|
||||||
|
if (this->action == a_decode) {
|
||||||
|
decode(data, len);
|
||||||
|
} else {
|
||||||
|
encode(data, len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Pl_Base64::decode(unsigned char* data, size_t len)
|
||||||
|
{
|
||||||
|
unsigned char* p = data;
|
||||||
|
while (len > 0) {
|
||||||
|
if (!QUtil::is_space(to_c(*p))) {
|
||||||
|
this->buf[this->pos++] = *p;
|
||||||
|
if (this->pos == 4) {
|
||||||
|
flush();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
++p;
|
||||||
|
--len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Pl_Base64::encode(unsigned char* data, size_t len)
|
||||||
|
{
|
||||||
|
unsigned char* p = data;
|
||||||
|
while (len > 0) {
|
||||||
|
this->buf[this->pos++] = *p;
|
||||||
|
if (this->pos == 3) {
|
||||||
|
flush();
|
||||||
|
}
|
||||||
|
++p;
|
||||||
|
--len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Pl_Base64::flush()
|
||||||
|
{
|
||||||
|
if (this->action == a_decode) {
|
||||||
|
flush_decode();
|
||||||
|
} else {
|
||||||
|
flush_encode();
|
||||||
|
}
|
||||||
|
reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Pl_Base64::flush_decode()
|
||||||
|
{
|
||||||
|
if (this->end_of_data) {
|
||||||
|
throw std::runtime_error(
|
||||||
|
getIdentifier() + ": base64 decode: data follows pad characters");
|
||||||
|
}
|
||||||
|
int pad = 0;
|
||||||
|
int shift = 18;
|
||||||
|
int outval = 0;
|
||||||
|
for (size_t i = 0; i < 4; ++i) {
|
||||||
|
int v = 0;
|
||||||
|
char ch = to_c(this->buf[i]);
|
||||||
|
if ((ch >= 'A') && (ch <= 'Z')) {
|
||||||
|
v = ch - 'A';
|
||||||
|
} else if ((ch >= 'a') && (ch <= 'z')) {
|
||||||
|
v = ch - 'a' + 26;
|
||||||
|
} else if ((ch >= '0') && (ch <= '9')) {
|
||||||
|
v = ch - '0' + 52;
|
||||||
|
} else if ((ch == '+') || (ch == '-')) {
|
||||||
|
v = 62;
|
||||||
|
} else if ((ch == '/') || (ch == '_')) {
|
||||||
|
v = 63;
|
||||||
|
} else if (
|
||||||
|
(ch == '=') && ((i == 3) || ((i == 2) && (this->buf[3] == '=')))) {
|
||||||
|
++pad;
|
||||||
|
this->end_of_data = true;
|
||||||
|
v = 0;
|
||||||
|
} else {
|
||||||
|
throw std::runtime_error(
|
||||||
|
getIdentifier() + ": base64 decode: invalid input");
|
||||||
|
}
|
||||||
|
outval |= v << shift;
|
||||||
|
shift -= 6;
|
||||||
|
}
|
||||||
|
unsigned char out[3] = {
|
||||||
|
to_uc(outval >> 16),
|
||||||
|
to_uc(0xff & (outval >> 8)),
|
||||||
|
to_uc(0xff & outval),
|
||||||
|
};
|
||||||
|
|
||||||
|
getNext()->write(out, QIntC::to_size(3 - pad));
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Pl_Base64::flush_encode()
|
||||||
|
{
|
||||||
|
int outval = ((this->buf[0] << 16) | (this->buf[1] << 8) | (this->buf[2]));
|
||||||
|
unsigned char out[4] = {
|
||||||
|
to_uc(outval >> 18),
|
||||||
|
to_uc(0x3f & (outval >> 12)),
|
||||||
|
to_uc(0x3f & (outval >> 6)),
|
||||||
|
to_uc(0x3f & outval),
|
||||||
|
};
|
||||||
|
for (size_t i = 0; i < 4; ++i) {
|
||||||
|
int ch = to_i(out[i]);
|
||||||
|
if (ch < 26) {
|
||||||
|
ch += 'A';
|
||||||
|
} else if (ch < 52) {
|
||||||
|
ch -= 26;
|
||||||
|
ch += 'a';
|
||||||
|
} else if (ch < 62) {
|
||||||
|
ch -= 52;
|
||||||
|
ch += '0';
|
||||||
|
} else if (ch == 62) {
|
||||||
|
ch = '+';
|
||||||
|
} else if (ch == 63) {
|
||||||
|
ch = '/';
|
||||||
|
}
|
||||||
|
out[i] = to_uc(ch);
|
||||||
|
}
|
||||||
|
for (size_t i = 0; i < 3 - this->pos; ++i) {
|
||||||
|
out[3 - i] = '=';
|
||||||
|
}
|
||||||
|
getNext()->write(out, 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Pl_Base64::finish()
|
||||||
|
{
|
||||||
|
if (this->pos > 0) {
|
||||||
|
if (finished) {
|
||||||
|
throw std::logic_error("Pl_Base64 used after finished");
|
||||||
|
}
|
||||||
|
if (this->action == a_decode) {
|
||||||
|
for (size_t i = this->pos; i < 4; ++i) {
|
||||||
|
this->buf[i] = '=';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
flush();
|
||||||
|
}
|
||||||
|
this->finished = true;
|
||||||
|
getNext()->finish();
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Pl_Base64::reset()
|
||||||
|
{
|
||||||
|
this->pos = 0;
|
||||||
|
memset(buf, 0, 4);
|
||||||
|
}
|
30
libqpdf/qpdf/Pl_Base64.hh
Normal file
30
libqpdf/qpdf/Pl_Base64.hh
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
#ifndef PL_BASE64_HH
|
||||||
|
#define PL_BASE64_HH
|
||||||
|
|
||||||
|
#include <qpdf/Pipeline.hh>
|
||||||
|
|
||||||
|
class Pl_Base64: public Pipeline
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
enum action_e { a_encode, a_decode };
|
||||||
|
Pl_Base64(char const* identifier, Pipeline* next, action_e);
|
||||||
|
virtual ~Pl_Base64() = default;
|
||||||
|
virtual void write(unsigned char* buf, size_t len) override;
|
||||||
|
virtual void finish() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void decode(unsigned char* buf, size_t len);
|
||||||
|
void encode(unsigned char* buf, size_t len);
|
||||||
|
void flush();
|
||||||
|
void flush_decode();
|
||||||
|
void flush_encode();
|
||||||
|
void reset();
|
||||||
|
|
||||||
|
action_e action;
|
||||||
|
unsigned char buf[4];
|
||||||
|
size_t pos;
|
||||||
|
bool end_of_data;
|
||||||
|
bool finished;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // PL_BASE64_HH
|
@ -3,6 +3,7 @@ set(TEST_PROGRAMS
|
|||||||
aes
|
aes
|
||||||
arg_parser
|
arg_parser
|
||||||
ascii85
|
ascii85
|
||||||
|
base64
|
||||||
bits
|
bits
|
||||||
buffer
|
buffer
|
||||||
closed_file_input_source
|
closed_file_input_source
|
||||||
|
81
libtests/base64.cc
Normal file
81
libtests/base64.cc
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
#include <qpdf/Pl_Base64.hh>
|
||||||
|
|
||||||
|
#include <qpdf/Pl_StdioFile.hh>
|
||||||
|
#include <qpdf/QUtil.hh>
|
||||||
|
#include <cassert>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstring>
|
||||||
|
#include <iostream>
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
|
static bool
|
||||||
|
write_some(FILE* f, size_t bytes, Pipeline* p)
|
||||||
|
{
|
||||||
|
unsigned char buf[1000];
|
||||||
|
assert(bytes <= sizeof(buf));
|
||||||
|
size_t len = fread(buf, 1, bytes, f);
|
||||||
|
if (len > 0) {
|
||||||
|
p->write(buf, len);
|
||||||
|
}
|
||||||
|
if (len < bytes) {
|
||||||
|
if (ferror(f)) {
|
||||||
|
std::cerr << "error reading file" << std::endl;
|
||||||
|
exit(2);
|
||||||
|
}
|
||||||
|
p->finish();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return (len == bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
usage()
|
||||||
|
{
|
||||||
|
std::cerr << "Usage: base64 encode|decode" << std::endl;
|
||||||
|
exit(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main(int argc, char* argv[])
|
||||||
|
{
|
||||||
|
if (argc != 2) {
|
||||||
|
usage();
|
||||||
|
}
|
||||||
|
QUtil::binary_stdout();
|
||||||
|
QUtil::binary_stdin();
|
||||||
|
Pl_Base64::action_e action = Pl_Base64::a_decode;
|
||||||
|
if (strcmp(argv[1], "encode") == 0) {
|
||||||
|
action = Pl_Base64::a_encode;
|
||||||
|
} else if (strcmp(argv[1], "decode") != 0) {
|
||||||
|
usage();
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
Pl_StdioFile out("stdout", stdout);
|
||||||
|
Pl_Base64 decode("decode", &out, action);
|
||||||
|
// The comments are "n: n%4 n%3", where n is the number of
|
||||||
|
// bytes read at the end of the call, and are there to
|
||||||
|
// indicate that we are reading in chunks that exercise
|
||||||
|
// various boundary conditions around subsequent writes and
|
||||||
|
// the state of buf and pos. There are some writes that don't
|
||||||
|
// do flush at all, some that call flush multiple times, and
|
||||||
|
// some that start in the middle and do flush, and this is
|
||||||
|
// true for both encode and decode.
|
||||||
|
if (write_some(stdin, 1, &decode) && // 1: 1 1
|
||||||
|
write_some(stdin, 4, &decode) && // 5: 1 2
|
||||||
|
write_some(stdin, 2, &decode) && // 7: 3 1
|
||||||
|
write_some(stdin, 2, &decode) && // 9: 1 0
|
||||||
|
write_some(stdin, 7, &decode) && // 16: 0 1
|
||||||
|
write_some(stdin, 1, &decode) && // 17: 1 2
|
||||||
|
write_some(stdin, 9, &decode) && // 26: 2 2
|
||||||
|
write_some(stdin, 2, &decode)) { // 28: 0 1
|
||||||
|
while (write_some(stdin, 1000, &decode)) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (std::exception& e) {
|
||||||
|
std::cout << "exception: " << e.what() << std::endl;
|
||||||
|
exit(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
58
libtests/qtest/base64.test
Normal file
58
libtests/qtest/base64.test
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
#!/usr/bin/env perl
|
||||||
|
require 5.008;
|
||||||
|
use warnings;
|
||||||
|
use strict;
|
||||||
|
|
||||||
|
chdir("base64") or die "chdir testdir failed: $!\n";
|
||||||
|
|
||||||
|
require TestDriver;
|
||||||
|
|
||||||
|
my $td = new TestDriver('base64');
|
||||||
|
|
||||||
|
cleanup();
|
||||||
|
|
||||||
|
# ** Do not use normalize newlines on these tests. **
|
||||||
|
|
||||||
|
my $n = 5;
|
||||||
|
for (my $i = 1; $i <= $n; ++$i)
|
||||||
|
{
|
||||||
|
$td->runtest("encode $i",
|
||||||
|
{$td->COMMAND => "base64 encode < $i.dec"},
|
||||||
|
{$td->FILE => "$i.enc", $td->EXIT_STATUS => 0});
|
||||||
|
$td->runtest("code $i",
|
||||||
|
{$td->COMMAND => "base64 decode < $i.enc"},
|
||||||
|
{$td->FILE => "$i.dec", $td->EXIT_STATUS => 0});
|
||||||
|
}
|
||||||
|
|
||||||
|
$td->runtest("non-zero discard bits",
|
||||||
|
{$td->COMMAND => "echo c2FsYWR= | base64 decode"},
|
||||||
|
{$td->STRING => "salad", $td->EXIT_STATUS => 0});
|
||||||
|
$td->runtest("write with +/",
|
||||||
|
{$td->COMMAND => "echo +/== | base64 decode > a"},
|
||||||
|
{$td->STRING => "", $td->EXIT_STATUS => 0});
|
||||||
|
$td->runtest("write with -_",
|
||||||
|
{$td->COMMAND => "echo -_== | base64 decode > b"},
|
||||||
|
{$td->STRING => "", $td->EXIT_STATUS => 0});
|
||||||
|
$td->runtest("interchangeability of +/ and -_",
|
||||||
|
{$td->FILE => "a"},
|
||||||
|
{$td->FILE => "b"});
|
||||||
|
|
||||||
|
$td->runtest("invalid characters",
|
||||||
|
{$td->COMMAND => "echo aaaaa! | base64 decode"},
|
||||||
|
{$td->REGEXP => ".*invalid input.*", $td->EXIT_STATUS => 2});
|
||||||
|
$td->runtest("invalid pad",
|
||||||
|
{$td->COMMAND => "echo a= | base64 decode"},
|
||||||
|
{$td->REGEXP => ".*invalid input.*", $td->EXIT_STATUS => 2});
|
||||||
|
$td->runtest("data after pad",
|
||||||
|
{$td->COMMAND => "echo aa==potato | base64 decode"},
|
||||||
|
{$td->REGEXP => ".*data follows pad characters.*",
|
||||||
|
$td->EXIT_STATUS => 2});
|
||||||
|
|
||||||
|
cleanup();
|
||||||
|
|
||||||
|
$td->report(7 + (2 * $n));
|
||||||
|
|
||||||
|
sub cleanup
|
||||||
|
{
|
||||||
|
unlink('a', 'b');
|
||||||
|
}
|
1
libtests/qtest/base64/1.dec
Normal file
1
libtests/qtest/base64/1.dec
Normal file
@ -0,0 +1 @@
|
|||||||
|
123
|
1
libtests/qtest/base64/1.enc
Normal file
1
libtests/qtest/base64/1.enc
Normal file
@ -0,0 +1 @@
|
|||||||
|
MTIz
|
1
libtests/qtest/base64/2.dec
Normal file
1
libtests/qtest/base64/2.dec
Normal file
@ -0,0 +1 @@
|
|||||||
|
1234
|
1
libtests/qtest/base64/2.enc
Normal file
1
libtests/qtest/base64/2.enc
Normal file
@ -0,0 +1 @@
|
|||||||
|
MTIzNA==
|
1
libtests/qtest/base64/3.dec
Normal file
1
libtests/qtest/base64/3.dec
Normal file
@ -0,0 +1 @@
|
|||||||
|
This file has a multiple of four bytes and is longer than four bytes...
|
1
libtests/qtest/base64/3.enc
Normal file
1
libtests/qtest/base64/3.enc
Normal file
@ -0,0 +1 @@
|
|||||||
|
VGhpcyBmaWxlIGhhcyBhIG11bHRpcGxlIG9mIGZvdXIgYnl0ZXMgYW5kIGlzIGxvbmdlciB0aGFuIGZvdXIgYnl0ZXMuLi4K
|
1
libtests/qtest/base64/4.dec
Normal file
1
libtests/qtest/base64/4.dec
Normal file
@ -0,0 +1 @@
|
|||||||
|
This file has a non-multiple of four bytes and is longer than four bytes.
|
1
libtests/qtest/base64/4.enc
Normal file
1
libtests/qtest/base64/4.enc
Normal file
@ -0,0 +1 @@
|
|||||||
|
VGhpcyBmaWxlIGhhcyBhIG5vbi1tdWx0aXBsZSBvZiBmb3VyIGJ5dGVzIGFuZCBpcyBsb25nZXIgdGhhbiBmb3VyIGJ5dGVzLgo=
|
BIN
libtests/qtest/base64/5.dec
Normal file
BIN
libtests/qtest/base64/5.dec
Normal file
Binary file not shown.
1
libtests/qtest/base64/5.enc
Normal file
1
libtests/qtest/base64/5.enc
Normal file
@ -0,0 +1 @@
|
|||||||
|
VGhpcyBmaWxlIGFzIG9uZSBvZiBldmVyeSBieXRlIGluIGl0LiBOb3QgdGhhdCBpdCByZWFsbHkgbWFrZXMgYW55CmRpZmZlcmVuY2UsIGJ1dCB3aHkgbm90LgoKAAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn+AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq+wsbKztLW2t7i5uru8vb6/wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX2Nna29zd3t/g4eLj5OXm5+jp6uvs7e7v8PHy8/T19vf4+fr7/P3+/wo=
|
Loading…
Reference in New Issue
Block a user