optimize fix-qdf

This commit is contained in:
oltolm 2024-01-27 03:05:13 +01:00
parent e9166457fa
commit 577a7eb8bb
4 changed files with 58 additions and 53 deletions

2
.gitignore vendored
View File

@ -7,3 +7,5 @@ appimage/build
.cache
/html
Doxyfile
compile_commands.json
/.vscode/

View File

@ -94,6 +94,11 @@ namespace QUtil
QPDF_DLL
FILE* safe_fopen(char const* filename, char const* mode);
#ifdef _WIN32
QPDF_DLL
std::shared_ptr<wchar_t> win_convert_filename(char const* filename);
#endif
// The FILE* argument is assumed to be the return of fopen. If null, throw std::runtime_error.
// Otherwise, return the FILE* argument.
QPDF_DLL

View File

@ -13,7 +13,6 @@
#include <cstdlib>
#include <cstring>
#include <fcntl.h>
#include <fstream>
#include <iomanip>
#include <map>
#include <memory>
@ -465,8 +464,8 @@ QUtil::os_wrapper(std::string const& description, int status)
}
#ifdef _WIN32
static std::shared_ptr<wchar_t>
win_convert_filename(char const* filename)
std::shared_ptr<wchar_t>
QUtil::win_convert_filename(char const* filename)
{
// Convert the utf-8 encoded filename argument to wchar_t*. First,
// convert to utf16, then to wchar_t*. Note that u16 will start

View File

@ -4,8 +4,12 @@
#include <qpdf/QUtil.hh>
#include <cstdio>
#include <cstring>
#include <fstream>
#include <iostream>
#include <istream>
#include <regex>
#include <stdexcept>
#include <string>
#include <string_view>
static char const* whoami = nullptr;
@ -21,7 +25,7 @@ class QdfFixer
{
public:
QdfFixer(std::string const& filename);
void processLines(std::string const& input);
void processLines(std::istream& input);
private:
void fatal(std::string const&);
@ -59,9 +63,9 @@ class QdfFixer
size_t xref_f1_nbytes{0};
size_t xref_f2_nbytes{0};
size_t xref_size{0};
std::vector<std::string_view> ostream;
std::vector<std::string> ostream;
std::vector<qpdf_offset_t> ostream_offsets;
std::vector<std::string_view> ostream_discarded;
std::vector<std::string> ostream_discarded;
size_t ostream_idx{0};
int ostream_id{0};
std::string ostream_extends;
@ -80,7 +84,7 @@ QdfFixer::fatal(std::string const& msg)
}
void
QdfFixer::processLines(std::string const& input)
QdfFixer::processLines(std::istream& input)
{
using namespace std::literals;
@ -93,51 +97,33 @@ QdfFixer::processLines(std::string const& input)
auto sv_diff = [](size_t i) { return static_cast<std::string_view::difference_type>(i); };
lineno = 0;
bool more = true;
auto len_line = sv_diff(0);
std::string_view line;
std::string_view input_view{input.data(), input.size()};
size_t offs = 0;
auto b_line = input.cbegin();
std::string line;
std::smatch m;
auto const matches = [&m, &b_line, &len_line](std::regex const& r) {
return std::regex_search(b_line, b_line + len_line, m, r);
};
while (more) {
while (std::getline(input, line, '\n')) {
line += '\n'; // readd the new line because std::getline removes it
++lineno;
last_offset = offset;
b_line += len_line;
offs = input_view.find('\n');
if (offs == std::string::npos) {
more = false;
line = input_view;
} else {
offs++;
line = input_view.substr(0, offs);
input_view.remove_prefix(offs);
}
len_line = sv_diff(line.size());
offset += len_line;
offset += sv_diff(line.size());
if (state == st_top) {
if (matches(re_n_0_obj)) {
if (std::regex_search(line, m, re_n_0_obj)) {
checkObjId(m[1].str());
state = st_in_obj;
} else if (line.compare("xref\n"sv) == 0) {
} else if (line == "xref\n"sv) {
xref_offset = last_offset;
state = st_at_xref;
}
std::cout << line;
} else if (state == st_in_obj) {
std::cout << line;
if (line.compare("stream\n"sv) == 0) {
if (line == "stream\n"sv) {
state = st_in_stream;
stream_start = offset;
} else if (line.compare("endobj\n"sv) == 0) {
} else if (line == "endobj\n"sv) {
state = st_top;
} else if (line.find("/Type /ObjStm"sv) != line.npos) {
state = st_in_ostream_dict;
@ -171,17 +157,17 @@ QdfFixer::processLines(std::string const& input)
state = st_in_xref_stream_dict;
}
} else if (state == st_in_ostream_dict) {
if (line.compare("stream\n"sv) == 0) {
if (line == "stream\n"sv) {
state = st_in_ostream_offsets;
} else {
ostream_discarded.push_back(line);
if (matches(re_extends)) {
if (std::regex_search(line, m, re_extends)) {
ostream_extends = m[1].str();
}
}
// discard line
} else if (state == st_in_ostream_offsets) {
if (matches(re_ostream_obj)) {
if (std::regex_search(line, m, re_ostream_obj)) {
checkObjId(m[1].str());
stream_start = last_offset;
state = st_in_ostream_outer;
@ -197,10 +183,10 @@ QdfFixer::processLines(std::string const& input)
ostream.push_back(line);
} else if (state == st_in_ostream_obj) {
ostream.push_back(line);
if (matches(re_ostream_obj)) {
if (std::regex_search(line, m, re_ostream_obj)) {
checkObjId(m[1].str());
state = st_in_ostream_outer;
} else if (line.compare("endstream\n"sv) == 0) {
} else if (line == "endstream\n"sv) {
stream_length = QIntC::to_size(last_offset - stream_start);
writeOstream();
state = st_in_obj;
@ -214,7 +200,7 @@ QdfFixer::processLines(std::string const& input)
} else {
std::cout << line;
}
if (line.compare("stream\n"sv) == 0) {
if (line == "stream\n"sv) {
writeBinary(0, 1);
writeBinary(0, xref_f1_nbytes);
writeBinary(0, xref_f2_nbytes);
@ -238,23 +224,23 @@ QdfFixer::processLines(std::string const& input)
state = st_done;
}
} else if (state == st_in_stream) {
if (line.compare("endstream\n"sv) == 0) {
if (line == "endstream\n"sv) {
stream_length = QIntC::to_size(last_offset - stream_start);
state = st_after_stream;
}
std::cout << line;
} else if (state == st_after_stream) {
if (line.compare("%QDF: ignore_newline\n"sv) == 0) {
if (line == "%QDF: ignore_newline\n"sv) {
if (stream_length > 0) {
--stream_length;
}
} else if (matches(re_n_0_obj)) {
} else if (std::regex_search(line, m, re_n_0_obj)) {
checkObjId(m[1].str());
state = st_in_length;
}
std::cout << line;
} else if (state == st_in_length) {
if (!matches(re_num)) {
if (!std::regex_search(line, m, re_num)) {
fatal(filename + ":" + std::to_string(lineno) + ": expected integer");
}
std::string new_length = std::to_string(stream_length) + "\n";
@ -270,18 +256,18 @@ QdfFixer::processLines(std::string const& input)
}
state = st_before_trailer;
} else if (state == st_before_trailer) {
if (line.compare("trailer <<\n"sv) == 0) {
if (line == "trailer <<\n"sv) {
std::cout << line;
state = st_in_trailer;
}
// no output
} else if (state == st_in_trailer) {
if (matches(re_size_n)) {
if (std::regex_search(line, m, re_size_n)) {
std::cout << " /Size " << 1 + xref.size() << "\n";
} else {
std::cout << line;
}
if (line.compare(">>\n"sv) == 0) {
if (line == ">>\n"sv) {
std::cout << "startxref\n" << xref_offset << "\n%%EOF\n";
state = st_done;
}
@ -367,9 +353,15 @@ QdfFixer::writeBinary(unsigned long long val, size_t bytes)
static int
realmain(int argc, char* argv[])
{
std::cin.sync_with_stdio(false);
std::cout.sync_with_stdio(false);
std::cerr.sync_with_stdio(false);
whoami = QUtil::getWhoami(argv[0]);
QUtil::setLineBuf(stdout);
char const* filename = nullptr;
QUtil::binary_stdin();
std::string filename;
std::istream input(std::cin.rdbuf());
std::ifstream fin;
if (argc > 2) {
usage();
} else if ((argc > 1) && (strcmp(argv[1], "--version") == 0)) {
@ -380,13 +372,20 @@ realmain(int argc, char* argv[])
} else if (argc == 2) {
filename = argv[1];
}
std::string input;
if (filename == nullptr) {
if (filename.empty()) {
filename = "standard input";
QUtil::binary_stdin();
input = QUtil::read_file_into_string(stdin);
} else {
input = QUtil::read_file_into_string(filename);
#ifdef _WIN32
auto s = QUtil::win_convert_filename(filename.c_str());
fin.open(s.get(), std::ios::binary);
#else
fin.open(filename, std::ios::binary);
#endif
if (fin) {
input.rdbuf(fin.rdbuf());
} else {
QUtil::throw_system_error("open " + filename);
}
}
QUtil::binary_stdout();
QdfFixer qf(filename);