2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-12-22 10:58:58 +00:00

Merge pull request #920 from m-holger/fixqdf_rl

Refactor QdfFixer::processLines
This commit is contained in:
Jay Berkenbilt 2023-03-18 13:59:47 -04:00 committed by GitHub
commit a2c7471e66
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 108 additions and 48 deletions

View File

@ -485,6 +485,11 @@ namespace QUtil
void read_file_into_memory(
char const* filename, std::shared_ptr<char>& file_buf, size_t& size);
QPDF_DLL
std::string read_file_into_string(char const* filename);
QPDF_DLL
std::string read_file_into_string(FILE* f, std::string_view filename = "");
// This used to be called strcasecmp, but that is a macro on some
// platforms, so we have to give it a name that is not likely to
// be a macro anywhere.

View File

@ -760,11 +760,9 @@ QPDFJob::Config::showObject(std::string const& parameter)
QPDFJob::Config*
QPDFJob::Config::jobJsonFile(std::string const& parameter)
{
std::shared_ptr<char> file_buf;
size_t size;
QUtil::read_file_into_memory(parameter.c_str(), file_buf, size);
try {
o.initializeFromJson(std::string(file_buf.get(), size), true);
o.initializeFromJson(
QUtil::read_file_into_string(parameter.c_str()), true);
} catch (std::exception& e) {
throw std::runtime_error(
"error with job-json file " + std::string(parameter) + ": " +

View File

@ -1243,6 +1243,37 @@ QUtil::read_file_into_memory(
}
}
std::string
QUtil::read_file_into_string(char const* filename)
{
FILE* f = safe_fopen(filename, "rb");
FileCloser fc(f);
return read_file_into_string(f, filename);
}
std::string
QUtil::read_file_into_string(FILE* f, std::string_view filename)
{
fseek(f, 0, SEEK_END);
auto size = QIntC::to_size(QUtil::tell(f));
fseek(f, 0, SEEK_SET);
std::string result(size, '\0');
if (auto read = fread(result.data(), 1, size, f); read != size) {
if (ferror(f)) {
throw std::runtime_error(
std::string("failure reading file ") + std::string(filename) +
" into memory: read " + uint_to_string(read) + "; wanted " +
uint_to_string(size));
} else {
throw std::runtime_error(
std::string("premature eof reading file ") +
std::string(filename) + " into memory: read " +
uint_to_string(read) + "; wanted " + uint_to_string(size));
}
}
return result;
}
static bool
read_char_from_FILE(char& ch, FILE* f)
{

File diff suppressed because one or more lines are too long

View File

@ -576,6 +576,12 @@ read_from_file_test()
auto buf2 = b2.getBufferSharedPointer();
assert(buf2->getSize() == size);
assert(memcmp(buf2->getBuffer(), p, size) == 0);
auto s = QUtil::read_file_into_string("other-file");
std::cout << "read " << s.size() << " bytes" << std::endl;
assert(s.size() == 24652);
assert(s.substr(0, 36) == "This file is used for qutil testing.");
assert(s.substr(24641, 10) == "very long.");
}
void

View File

@ -6,6 +6,7 @@
#include <cstring>
#include <iostream>
#include <regex>
#include <string_view>
static char const* whoami = 0;
@ -20,7 +21,7 @@ class QdfFixer
{
public:
QdfFixer(std::string const& filename);
void processLines(std::list<std::string>& lines);
void processLines(std::string const& input);
private:
void fatal(std::string const&);
@ -58,9 +59,9 @@ class QdfFixer
size_t xref_f1_nbytes;
size_t xref_f2_nbytes;
size_t xref_size;
std::vector<std::string> ostream;
std::vector<std::string_view> ostream;
std::vector<qpdf_offset_t> ostream_offsets;
std::vector<std::string> ostream_discarded;
std::vector<std::string_view> ostream_discarded;
size_t ostream_idx;
int ostream_id;
std::string ostream_extends;
@ -92,54 +93,71 @@ QdfFixer::fatal(std::string const& msg)
}
void
QdfFixer::processLines(std::list<std::string>& lines)
QdfFixer::processLines(std::string const& input)
{
static std::regex re_n_0_obj("^(\\d+) 0 obj\n$");
static std::regex re_xref("^xref\n$");
static std::regex re_stream("^stream\n$");
static std::regex re_endobj("^endobj\n$");
static std::regex re_type_objstm("/Type /ObjStm");
static std::regex re_type_xref("/Type /XRef");
static std::regex re_extends("/Extends (\\d+ 0 R)");
static std::regex re_ostream_obj("^%% Object stream: object (\\d+)");
static std::regex re_endstream("^endstream\n$");
static std::regex re_length_or_w("/(Length|W) ");
static std::regex re_size("/Size ");
static std::regex re_ignore_newline("^%QDF: ignore_newline\n$");
static std::regex re_num("^\\d+\n$");
static std::regex re_trailer("^trailer <<");
static std::regex re_size_n("^ /Size \\d+\n$");
static std::regex re_dict_end("^>>\n$");
using namespace std::literals;
static const std::regex re_n_0_obj("^(\\d+) 0 obj\n$");
static const std::regex re_extends("/Extends (\\d+ 0 R)");
static const std::regex re_ostream_obj("^%% Object stream: object (\\d+)");
static const std::regex re_num("^\\d+\n$");
static const std::regex re_size_n("^ /Size \\d+\n$");
auto sv_diff = [](size_t i) {
return static_cast<std::string_view::difference_type>(i);
};
lineno = 0;
for (auto const& line: lines) {
bool more = true;
auto len_line = sv_diff(0);
std::string_view line;
std::string_view input_view{input.data(), input.size()};
size_t offs = 0;
auto b_line = input.cbegin();
std::smatch m;
auto const matches = [&m, &b_line, &len_line](std::regex const& r) {
return std::regex_search(b_line, b_line + len_line, m, r);
};
while (more) {
++lineno;
last_offset = offset;
offset += QIntC::to_offset(line.length());
std::smatch m;
auto matches = [&m, &line](std::regex& r) {
return std::regex_search(line, m, r);
};
b_line += len_line;
offs = input_view.find('\n');
if (offs == std::string::npos) {
more = false;
line = input_view;
} else {
offs++;
line = input_view.substr(0, offs);
input_view.remove_prefix(offs);
}
len_line = sv_diff(line.size());
offset += len_line;
if (state == st_top) {
if (matches(re_n_0_obj)) {
checkObjId(m[1].str());
state = st_in_obj;
} else if (matches(re_xref)) {
} else if (line.compare("xref\n"sv) == 0) {
xref_offset = last_offset;
state = st_at_xref;
}
std::cout << line;
} else if (state == st_in_obj) {
std::cout << line;
if (matches(re_stream)) {
if (line.compare("stream\n"sv) == 0) {
state = st_in_stream;
stream_start = offset;
} else if (matches(re_endobj)) {
} else if (line.compare("endobj\n"sv) == 0) {
state = st_top;
} else if (matches(re_type_objstm)) {
} else if (line.find("/Type /ObjStm"sv) != line.npos) {
state = st_in_ostream_dict;
ostream_id = last_obj;
} else if (matches(re_type_xref)) {
} else if (line.find("/Type /XRef"sv) != line.npos) {
xref_offset = xref.back().getOffset();
xref_f1_nbytes = 0;
auto t = xref_offset;
@ -171,7 +189,7 @@ QdfFixer::processLines(std::list<std::string>& lines)
state = st_in_xref_stream_dict;
}
} else if (state == st_in_ostream_dict) {
if (matches(re_stream)) {
if (line.compare("stream\n"sv) == 0) {
state = st_in_ostream_offsets;
} else {
ostream_discarded.push_back(line);
@ -200,21 +218,22 @@ QdfFixer::processLines(std::list<std::string>& lines)
if (matches(re_ostream_obj)) {
checkObjId(m[1].str());
state = st_in_ostream_outer;
} else if (matches(re_endstream)) {
} else if (line.compare("endstream\n"sv) == 0) {
stream_length = QIntC::to_size(last_offset - stream_start);
writeOstream();
state = st_in_obj;
}
} else if (state == st_in_xref_stream_dict) {
if (matches(re_length_or_w)) {
if ((line.find("/Length"sv) != line.npos) ||
(line.find("/W"sv) != line.npos)) {
// already printed
} else if (matches(re_size)) {
} else if (line.find("/Size"sv) != line.npos) {
auto xref_size = 1 + xref.size();
std::cout << " /Size " << xref_size << "\n";
} else {
std::cout << line;
}
if (matches(re_stream)) {
if (line.compare("stream\n"sv) == 0) {
writeBinary(0, 1);
writeBinary(0, xref_f1_nbytes);
writeBinary(0, xref_f2_nbytes);
@ -238,13 +257,13 @@ QdfFixer::processLines(std::list<std::string>& lines)
state = st_done;
}
} else if (state == st_in_stream) {
if (matches(re_endstream)) {
if (line.compare("endstream\n"sv) == 0) {
stream_length = QIntC::to_size(last_offset - stream_start);
state = st_after_stream;
}
std::cout << line;
} else if (state == st_after_stream) {
if (matches(re_ignore_newline)) {
if (line.compare("%QDF: ignore_newline\n"sv) == 0) {
if (stream_length > 0) {
--stream_length;
}
@ -273,7 +292,7 @@ QdfFixer::processLines(std::list<std::string>& lines)
}
state = st_before_trailer;
} else if (state == st_before_trailer) {
if (matches(re_trailer)) {
if (line.compare("trailer <<\n"sv) == 0) {
std::cout << line;
state = st_in_trailer;
}
@ -284,7 +303,7 @@ QdfFixer::processLines(std::list<std::string>& lines)
} else {
std::cout << line;
}
if (matches(re_dict_end)) {
if (line.compare(">>\n"sv) == 0) {
std::cout << "startxref\n" << xref_offset << "\n%%EOF\n";
state = st_done;
}
@ -392,17 +411,17 @@ realmain(int argc, char* argv[])
} else if (argc == 2) {
filename = argv[1];
}
std::list<std::string> lines;
std::string input;
if (filename == 0) {
filename = "standard input";
QUtil::binary_stdin();
lines = QUtil::read_lines_from_file(stdin, true);
input = QUtil::read_file_into_string(stdin);
} else {
lines = QUtil::read_lines_from_file(filename, true);
input = QUtil::read_file_into_string(filename);
}
QUtil::binary_stdout();
QdfFixer qf(filename);
qf.processLines(lines);
qf.processLines(input);
return 0;
}