2020-01-03 03:01:10 +00:00
|
|
|
#include <qpdf/QIntC.hh>
|
|
|
|
#include <qpdf/QPDF.hh>
|
|
|
|
#include <qpdf/QPDFXRefEntry.hh>
|
|
|
|
#include <qpdf/QUtil.hh>
|
|
|
|
#include <cstdio>
|
|
|
|
#include <cstring>
|
|
|
|
#include <iostream>
|
|
|
|
#include <regex>
|
2023-03-06 17:31:19 +00:00
|
|
|
#include <string_view>
|
2020-01-03 03:01:10 +00:00
|
|
|
|
2023-05-20 11:46:50 +00:00
|
|
|
static char const* whoami = nullptr;
|
2020-01-03 03:01:10 +00:00
|
|
|
|
|
|
|
static void
|
|
|
|
usage()
|
|
|
|
{
|
|
|
|
std::cerr << "Usage: " << whoami << " [filename]" << std::endl;
|
|
|
|
exit(2);
|
|
|
|
}
|
|
|
|
|
|
|
|
class QdfFixer
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
QdfFixer(std::string const& filename);
|
2023-03-06 17:31:19 +00:00
|
|
|
void processLines(std::string const& input);
|
2022-04-02 21:14:10 +00:00
|
|
|
|
2020-01-03 03:01:10 +00:00
|
|
|
private:
|
|
|
|
void fatal(std::string const&);
|
|
|
|
void checkObjId(std::string const& obj_id);
|
|
|
|
void adjustOstreamXref();
|
|
|
|
void writeOstream();
|
|
|
|
void writeBinary(unsigned long long val, size_t bytes);
|
|
|
|
|
|
|
|
std::string filename;
|
|
|
|
enum {
|
|
|
|
st_top,
|
|
|
|
st_in_obj,
|
|
|
|
st_in_stream,
|
|
|
|
st_after_stream,
|
|
|
|
st_in_ostream_dict,
|
|
|
|
st_in_ostream_offsets,
|
|
|
|
st_in_ostream_outer,
|
|
|
|
st_in_ostream_obj,
|
|
|
|
st_in_xref_stream_dict,
|
|
|
|
st_in_length,
|
|
|
|
st_at_xref,
|
|
|
|
st_before_trailer,
|
|
|
|
st_in_trailer,
|
|
|
|
st_done,
|
2023-06-01 13:12:39 +00:00
|
|
|
} state{st_top};
|
2020-01-03 03:01:10 +00:00
|
|
|
|
2023-06-01 13:12:39 +00:00
|
|
|
size_t lineno{0};
|
|
|
|
qpdf_offset_t offset{0};
|
|
|
|
qpdf_offset_t last_offset{0};
|
|
|
|
int last_obj{0};
|
2020-01-03 03:01:10 +00:00
|
|
|
std::vector<QPDFXRefEntry> xref;
|
2023-06-01 13:12:39 +00:00
|
|
|
qpdf_offset_t stream_start{0};
|
|
|
|
size_t stream_length{0};
|
|
|
|
qpdf_offset_t xref_offset{0};
|
|
|
|
size_t xref_f1_nbytes{0};
|
|
|
|
size_t xref_f2_nbytes{0};
|
|
|
|
size_t xref_size{0};
|
2023-03-06 17:31:19 +00:00
|
|
|
std::vector<std::string_view> ostream;
|
2020-01-03 03:01:10 +00:00
|
|
|
std::vector<qpdf_offset_t> ostream_offsets;
|
2023-03-06 17:31:19 +00:00
|
|
|
std::vector<std::string_view> ostream_discarded;
|
2023-06-01 13:12:39 +00:00
|
|
|
size_t ostream_idx{0};
|
|
|
|
int ostream_id{0};
|
2020-01-03 03:01:10 +00:00
|
|
|
std::string ostream_extends;
|
|
|
|
};
|
|
|
|
|
|
|
|
QdfFixer::QdfFixer(std::string const& filename) :
|
2023-06-01 13:12:39 +00:00
|
|
|
filename(filename)
|
2020-01-03 03:01:10 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
QdfFixer::fatal(std::string const& msg)
|
|
|
|
{
|
|
|
|
std::cerr << msg << std::endl;
|
|
|
|
exit(2);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2023-03-06 17:31:19 +00:00
|
|
|
QdfFixer::processLines(std::string const& input)
|
2020-01-03 03:01:10 +00:00
|
|
|
{
|
2023-03-07 20:38:09 +00:00
|
|
|
using namespace std::literals;
|
|
|
|
|
2023-03-06 17:31:19 +00:00
|
|
|
static const std::regex re_n_0_obj("^(\\d+) 0 obj\n$");
|
|
|
|
static const std::regex re_extends("/Extends (\\d+ 0 R)");
|
|
|
|
static const std::regex re_ostream_obj("^%% Object stream: object (\\d+)");
|
|
|
|
static const std::regex re_num("^\\d+\n$");
|
|
|
|
static const std::regex re_size_n("^ /Size \\d+\n$");
|
|
|
|
|
|
|
|
auto sv_diff = [](size_t i) { return static_cast<std::string_view::difference_type>(i); };
|
2020-01-03 03:01:10 +00:00
|
|
|
|
|
|
|
lineno = 0;
|
2023-03-06 17:31:19 +00:00
|
|
|
bool more = true;
|
|
|
|
auto len_line = sv_diff(0);
|
|
|
|
|
|
|
|
std::string_view line;
|
|
|
|
std::string_view input_view{input.data(), input.size()};
|
|
|
|
size_t offs = 0;
|
|
|
|
|
|
|
|
auto b_line = input.cbegin();
|
|
|
|
std::smatch m;
|
|
|
|
auto const matches = [&m, &b_line, &len_line](std::regex const& r) {
|
|
|
|
return std::regex_search(b_line, b_line + len_line, m, r);
|
|
|
|
};
|
|
|
|
|
|
|
|
while (more) {
|
2020-01-03 03:01:10 +00:00
|
|
|
++lineno;
|
|
|
|
last_offset = offset;
|
2023-03-06 17:31:19 +00:00
|
|
|
b_line += len_line;
|
|
|
|
|
|
|
|
offs = input_view.find('\n');
|
|
|
|
if (offs == std::string::npos) {
|
|
|
|
more = false;
|
|
|
|
line = input_view;
|
|
|
|
} else {
|
|
|
|
offs++;
|
|
|
|
line = input_view.substr(0, offs);
|
|
|
|
input_view.remove_prefix(offs);
|
|
|
|
}
|
|
|
|
len_line = sv_diff(line.size());
|
|
|
|
offset += len_line;
|
|
|
|
|
2020-01-03 03:01:10 +00:00
|
|
|
if (state == st_top) {
|
|
|
|
if (matches(re_n_0_obj)) {
|
|
|
|
checkObjId(m[1].str());
|
|
|
|
state = st_in_obj;
|
2023-03-07 20:38:09 +00:00
|
|
|
} else if (line.compare("xref\n"sv) == 0) {
|
2020-01-03 03:01:10 +00:00
|
|
|
xref_offset = last_offset;
|
|
|
|
state = st_at_xref;
|
|
|
|
}
|
|
|
|
std::cout << line;
|
|
|
|
} else if (state == st_in_obj) {
|
|
|
|
std::cout << line;
|
2023-03-07 20:38:09 +00:00
|
|
|
if (line.compare("stream\n"sv) == 0) {
|
2020-01-03 03:01:10 +00:00
|
|
|
state = st_in_stream;
|
|
|
|
stream_start = offset;
|
2023-03-07 20:38:09 +00:00
|
|
|
} else if (line.compare("endobj\n"sv) == 0) {
|
2020-01-03 03:01:10 +00:00
|
|
|
state = st_top;
|
2023-03-07 20:38:09 +00:00
|
|
|
} else if (line.find("/Type /ObjStm"sv) != line.npos) {
|
2020-01-03 03:01:10 +00:00
|
|
|
state = st_in_ostream_dict;
|
|
|
|
ostream_id = last_obj;
|
2023-03-07 20:38:09 +00:00
|
|
|
} else if (line.find("/Type /XRef"sv) != line.npos) {
|
2020-01-03 03:01:10 +00:00
|
|
|
xref_offset = xref.back().getOffset();
|
|
|
|
xref_f1_nbytes = 0;
|
|
|
|
auto t = xref_offset;
|
|
|
|
while (t) {
|
|
|
|
t >>= 8;
|
|
|
|
++xref_f1_nbytes;
|
|
|
|
}
|
|
|
|
// Figure out how many bytes we need for ostream
|
|
|
|
// index. Make sure we get at least 1 byte even if
|
|
|
|
// there are no object streams.
|
|
|
|
int max_objects = 1;
|
2020-04-10 14:07:23 +00:00
|
|
|
for (auto const& e: xref) {
|
2020-01-03 03:01:10 +00:00
|
|
|
if ((e.getType() == 2) && (e.getObjStreamIndex() > max_objects)) {
|
|
|
|
max_objects = e.getObjStreamIndex();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
while (max_objects) {
|
|
|
|
max_objects >>= 8;
|
|
|
|
++xref_f2_nbytes;
|
|
|
|
}
|
|
|
|
auto esize = 1 + xref_f1_nbytes + xref_f2_nbytes;
|
|
|
|
xref_size = 1 + xref.size();
|
|
|
|
auto length = xref_size * esize;
|
|
|
|
std::cout << " /Length " << length << "\n"
|
2023-12-23 02:45:10 +00:00
|
|
|
<< " /W [ 1 " << xref_f1_nbytes << " " << xref_f2_nbytes << " ]\n";
|
2020-01-03 03:01:10 +00:00
|
|
|
state = st_in_xref_stream_dict;
|
|
|
|
}
|
|
|
|
} else if (state == st_in_ostream_dict) {
|
2023-03-07 20:38:09 +00:00
|
|
|
if (line.compare("stream\n"sv) == 0) {
|
2020-01-03 03:01:10 +00:00
|
|
|
state = st_in_ostream_offsets;
|
|
|
|
} else {
|
|
|
|
ostream_discarded.push_back(line);
|
|
|
|
if (matches(re_extends)) {
|
|
|
|
ostream_extends = m[1].str();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// discard line
|
|
|
|
} else if (state == st_in_ostream_offsets) {
|
|
|
|
if (matches(re_ostream_obj)) {
|
|
|
|
checkObjId(m[1].str());
|
|
|
|
stream_start = last_offset;
|
|
|
|
state = st_in_ostream_outer;
|
|
|
|
ostream.push_back(line);
|
|
|
|
} else {
|
|
|
|
ostream_discarded.push_back(line);
|
|
|
|
}
|
|
|
|
// discard line
|
|
|
|
} else if (state == st_in_ostream_outer) {
|
|
|
|
adjustOstreamXref();
|
|
|
|
ostream_offsets.push_back(last_offset - stream_start);
|
|
|
|
state = st_in_ostream_obj;
|
|
|
|
ostream.push_back(line);
|
|
|
|
} else if (state == st_in_ostream_obj) {
|
|
|
|
ostream.push_back(line);
|
|
|
|
if (matches(re_ostream_obj)) {
|
|
|
|
checkObjId(m[1].str());
|
|
|
|
state = st_in_ostream_outer;
|
2023-03-07 20:38:09 +00:00
|
|
|
} else if (line.compare("endstream\n"sv) == 0) {
|
2020-01-03 03:01:10 +00:00
|
|
|
stream_length = QIntC::to_size(last_offset - stream_start);
|
|
|
|
writeOstream();
|
|
|
|
state = st_in_obj;
|
|
|
|
}
|
|
|
|
} else if (state == st_in_xref_stream_dict) {
|
2023-03-07 20:38:09 +00:00
|
|
|
if ((line.find("/Length"sv) != line.npos) || (line.find("/W"sv) != line.npos)) {
|
2020-01-03 03:01:10 +00:00
|
|
|
// already printed
|
2023-03-07 20:38:09 +00:00
|
|
|
} else if (line.find("/Size"sv) != line.npos) {
|
2020-01-03 03:01:10 +00:00
|
|
|
auto xref_size = 1 + xref.size();
|
|
|
|
std::cout << " /Size " << xref_size << "\n";
|
|
|
|
} else {
|
|
|
|
std::cout << line;
|
|
|
|
}
|
2023-03-07 20:38:09 +00:00
|
|
|
if (line.compare("stream\n"sv) == 0) {
|
2020-01-03 03:01:10 +00:00
|
|
|
writeBinary(0, 1);
|
|
|
|
writeBinary(0, xref_f1_nbytes);
|
|
|
|
writeBinary(0, xref_f2_nbytes);
|
2020-04-10 14:07:23 +00:00
|
|
|
for (auto const& x: xref) {
|
2020-01-03 03:01:10 +00:00
|
|
|
unsigned long long f1 = 0;
|
|
|
|
unsigned long long f2 = 0;
|
|
|
|
unsigned int type = QIntC::to_uint(x.getType());
|
|
|
|
if (1 == type) {
|
|
|
|
f1 = QIntC::to_ulonglong(x.getOffset());
|
|
|
|
} else {
|
|
|
|
f1 = QIntC::to_ulonglong(x.getObjStreamNumber());
|
|
|
|
f2 = QIntC::to_ulonglong(x.getObjStreamIndex());
|
|
|
|
}
|
|
|
|
writeBinary(type, 1);
|
|
|
|
writeBinary(f1, xref_f1_nbytes);
|
|
|
|
writeBinary(f2, xref_f2_nbytes);
|
|
|
|
}
|
|
|
|
std::cout << "\nendstream\nendobj\n\n"
|
|
|
|
<< "startxref\n"
|
|
|
|
<< xref_offset << "\n%%EOF\n";
|
|
|
|
state = st_done;
|
|
|
|
}
|
|
|
|
} else if (state == st_in_stream) {
|
2023-03-07 20:38:09 +00:00
|
|
|
if (line.compare("endstream\n"sv) == 0) {
|
2020-01-03 03:01:10 +00:00
|
|
|
stream_length = QIntC::to_size(last_offset - stream_start);
|
|
|
|
state = st_after_stream;
|
|
|
|
}
|
|
|
|
std::cout << line;
|
|
|
|
} else if (state == st_after_stream) {
|
2023-03-07 20:38:09 +00:00
|
|
|
if (line.compare("%QDF: ignore_newline\n"sv) == 0) {
|
2020-10-23 08:02:53 +00:00
|
|
|
if (stream_length > 0) {
|
|
|
|
--stream_length;
|
|
|
|
}
|
2020-01-03 03:01:10 +00:00
|
|
|
} else if (matches(re_n_0_obj)) {
|
|
|
|
checkObjId(m[1].str());
|
|
|
|
state = st_in_length;
|
|
|
|
}
|
|
|
|
std::cout << line;
|
|
|
|
} else if (state == st_in_length) {
|
|
|
|
if (!matches(re_num)) {
|
2022-09-21 16:49:21 +00:00
|
|
|
fatal(filename + ":" + std::to_string(lineno) + ": expected integer");
|
2020-01-03 03:01:10 +00:00
|
|
|
}
|
2022-09-21 16:49:21 +00:00
|
|
|
std::string new_length = std::to_string(stream_length) + "\n";
|
2020-01-03 03:01:10 +00:00
|
|
|
offset -= QIntC::to_offset(line.length());
|
|
|
|
offset += QIntC::to_offset(new_length.length());
|
|
|
|
std::cout << new_length;
|
|
|
|
state = st_top;
|
|
|
|
} else if (state == st_at_xref) {
|
|
|
|
auto n = xref.size();
|
|
|
|
std::cout << "0 " << 1 + n << "\n0000000000 65535 f \n";
|
2020-04-10 14:07:23 +00:00
|
|
|
for (auto const& e: xref) {
|
2020-01-03 03:01:10 +00:00
|
|
|
std::cout << QUtil::int_to_string(e.getOffset(), 10) << " 00000 n \n";
|
|
|
|
}
|
|
|
|
state = st_before_trailer;
|
|
|
|
} else if (state == st_before_trailer) {
|
2023-03-07 20:38:09 +00:00
|
|
|
if (line.compare("trailer <<\n"sv) == 0) {
|
2020-01-03 03:01:10 +00:00
|
|
|
std::cout << line;
|
|
|
|
state = st_in_trailer;
|
|
|
|
}
|
|
|
|
// no output
|
|
|
|
} else if (state == st_in_trailer) {
|
|
|
|
if (matches(re_size_n)) {
|
|
|
|
std::cout << " /Size " << 1 + xref.size() << "\n";
|
|
|
|
} else {
|
|
|
|
std::cout << line;
|
|
|
|
}
|
2023-03-07 20:38:09 +00:00
|
|
|
if (line.compare(">>\n"sv) == 0) {
|
2020-01-03 03:01:10 +00:00
|
|
|
std::cout << "startxref\n" << xref_offset << "\n%%EOF\n";
|
|
|
|
state = st_done;
|
|
|
|
}
|
|
|
|
} else if (state == st_done) {
|
|
|
|
// ignore
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
QdfFixer::checkObjId(std::string const& cur_obj_str)
|
|
|
|
{
|
2023-03-07 19:02:49 +00:00
|
|
|
if (std::stoi(cur_obj_str) != ++last_obj) {
|
2022-02-08 14:18:08 +00:00
|
|
|
fatal(
|
2022-09-21 16:49:21 +00:00
|
|
|
filename + ":" + std::to_string(lineno) + ": expected object " +
|
2023-03-07 19:02:49 +00:00
|
|
|
std::to_string(last_obj));
|
2020-01-03 03:01:10 +00:00
|
|
|
}
|
2023-03-07 19:02:49 +00:00
|
|
|
xref.push_back(QPDFXRefEntry(1, last_offset, 0));
|
2020-01-03 03:01:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
QdfFixer::adjustOstreamXref()
|
|
|
|
{
|
2023-03-07 19:05:26 +00:00
|
|
|
xref.back() = QPDFXRefEntry(2, ostream_id, QIntC::to_int(ostream_idx++));
|
2020-01-03 03:01:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
QdfFixer::writeOstream()
|
|
|
|
{
|
|
|
|
auto first = ostream_offsets.at(0);
|
|
|
|
auto onum = ostream_id;
|
|
|
|
std::string offsets;
|
|
|
|
auto n = ostream_offsets.size();
|
2020-04-09 00:14:04 +00:00
|
|
|
for (auto iter: ostream_offsets) {
|
2022-02-08 14:18:08 +00:00
|
|
|
iter -= QIntC::to_offset(first);
|
|
|
|
++onum;
|
2022-09-21 16:49:21 +00:00
|
|
|
offsets += std::to_string(onum) + " " + std::to_string(iter) + "\n";
|
2020-01-03 03:01:10 +00:00
|
|
|
}
|
|
|
|
auto offset_adjust = QIntC::to_offset(offsets.size());
|
|
|
|
first += offset_adjust;
|
|
|
|
stream_length += QIntC::to_size(offset_adjust);
|
|
|
|
std::string dict_data = "";
|
2022-09-21 16:49:21 +00:00
|
|
|
dict_data += " /Length " + std::to_string(stream_length) + "\n";
|
|
|
|
dict_data += " /N " + std::to_string(n) + "\n";
|
|
|
|
dict_data += " /First " + std::to_string(first) + "\n";
|
2020-01-03 03:01:10 +00:00
|
|
|
if (!ostream_extends.empty()) {
|
2022-02-08 14:18:08 +00:00
|
|
|
dict_data += " /Extends " + ostream_extends + "\n";
|
2020-01-03 03:01:10 +00:00
|
|
|
}
|
|
|
|
dict_data += ">>\n";
|
|
|
|
offset_adjust += QIntC::to_offset(dict_data.length());
|
|
|
|
std::cout << dict_data << "stream\n" << offsets;
|
2020-04-10 14:07:23 +00:00
|
|
|
for (auto const& o: ostream) {
|
2022-02-08 14:18:08 +00:00
|
|
|
std::cout << o;
|
2020-01-03 03:01:10 +00:00
|
|
|
}
|
|
|
|
|
2020-04-10 14:07:23 +00:00
|
|
|
for (auto const& o: ostream_discarded) {
|
2022-02-08 14:18:08 +00:00
|
|
|
offset -= QIntC::to_offset(o.length());
|
2020-01-03 03:01:10 +00:00
|
|
|
}
|
|
|
|
offset += offset_adjust;
|
|
|
|
|
|
|
|
ostream_idx = 0;
|
|
|
|
ostream_id = 0;
|
|
|
|
ostream.clear();
|
|
|
|
ostream_offsets.clear();
|
|
|
|
ostream_discarded.clear();
|
|
|
|
ostream_extends.clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
QdfFixer::writeBinary(unsigned long long val, size_t bytes)
|
|
|
|
{
|
|
|
|
if (bytes > sizeof(unsigned long long)) {
|
|
|
|
throw std::logic_error("fix-qdf::writeBinary called with too many bytes");
|
|
|
|
}
|
2023-03-06 17:43:27 +00:00
|
|
|
std::string data(bytes, '\0');
|
|
|
|
for (auto i = bytes; i > 0; --i) {
|
|
|
|
data[i - 1] = static_cast<char>(val & 0xff); // i.e. val % 256
|
|
|
|
val >>= 8; // i.e. val = val / 256
|
2020-01-03 03:01:10 +00:00
|
|
|
}
|
|
|
|
std::cout << data;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
realmain(int argc, char* argv[])
|
|
|
|
{
|
|
|
|
whoami = QUtil::getWhoami(argv[0]);
|
|
|
|
QUtil::setLineBuf(stdout);
|
2023-05-20 11:46:50 +00:00
|
|
|
char const* filename = nullptr;
|
2020-01-03 03:01:10 +00:00
|
|
|
if (argc > 2) {
|
|
|
|
usage();
|
|
|
|
} else if ((argc > 1) && (strcmp(argv[1], "--version") == 0)) {
|
|
|
|
std::cout << whoami << " from qpdf version " << QPDF::QPDFVersion() << std::endl;
|
|
|
|
return 0;
|
|
|
|
} else if ((argc > 1) && (strcmp(argv[1], "--help") == 0)) {
|
|
|
|
usage();
|
|
|
|
} else if (argc == 2) {
|
|
|
|
filename = argv[1];
|
|
|
|
}
|
2023-03-06 17:31:19 +00:00
|
|
|
std::string input;
|
2023-05-20 11:46:50 +00:00
|
|
|
if (filename == nullptr) {
|
2020-01-03 03:01:10 +00:00
|
|
|
filename = "standard input";
|
|
|
|
QUtil::binary_stdin();
|
2023-03-06 17:31:19 +00:00
|
|
|
input = QUtil::read_file_into_string(stdin);
|
2020-01-03 03:01:10 +00:00
|
|
|
} else {
|
2023-03-06 17:31:19 +00:00
|
|
|
input = QUtil::read_file_into_string(filename);
|
2020-01-03 03:01:10 +00:00
|
|
|
}
|
|
|
|
QUtil::binary_stdout();
|
|
|
|
QdfFixer qf(filename);
|
2023-03-06 17:31:19 +00:00
|
|
|
qf.processLines(input);
|
2020-01-03 03:01:10 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef WINDOWS_WMAIN
|
|
|
|
|
|
|
|
extern "C" int
|
|
|
|
wmain(int argc, wchar_t* argv[])
|
|
|
|
{
|
|
|
|
return QUtil::call_main_from_wmain(argc, argv, realmain);
|
|
|
|
}
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
int
|
|
|
|
main(int argc, char* argv[])
|
|
|
|
{
|
|
|
|
return realmain(argc, argv);
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|