mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 19:08:59 +00:00
Merge pull request #920 from m-holger/fixqdf_rl
Refactor QdfFixer::processLines
This commit is contained in:
commit
a2c7471e66
@ -485,6 +485,11 @@ namespace QUtil
|
||||
void read_file_into_memory(
|
||||
char const* filename, std::shared_ptr<char>& file_buf, size_t& size);
|
||||
|
||||
QPDF_DLL
|
||||
std::string read_file_into_string(char const* filename);
|
||||
QPDF_DLL
|
||||
std::string read_file_into_string(FILE* f, std::string_view filename = "");
|
||||
|
||||
// This used to be called strcasecmp, but that is a macro on some
|
||||
// platforms, so we have to give it a name that is not likely to
|
||||
// be a macro anywhere.
|
||||
|
@ -760,11 +760,9 @@ QPDFJob::Config::showObject(std::string const& parameter)
|
||||
QPDFJob::Config*
|
||||
QPDFJob::Config::jobJsonFile(std::string const& parameter)
|
||||
{
|
||||
std::shared_ptr<char> file_buf;
|
||||
size_t size;
|
||||
QUtil::read_file_into_memory(parameter.c_str(), file_buf, size);
|
||||
try {
|
||||
o.initializeFromJson(std::string(file_buf.get(), size), true);
|
||||
o.initializeFromJson(
|
||||
QUtil::read_file_into_string(parameter.c_str()), true);
|
||||
} catch (std::exception& e) {
|
||||
throw std::runtime_error(
|
||||
"error with job-json file " + std::string(parameter) + ": " +
|
||||
|
@ -1243,6 +1243,37 @@ QUtil::read_file_into_memory(
|
||||
}
|
||||
}
|
||||
|
||||
std::string
|
||||
QUtil::read_file_into_string(char const* filename)
|
||||
{
|
||||
FILE* f = safe_fopen(filename, "rb");
|
||||
FileCloser fc(f);
|
||||
return read_file_into_string(f, filename);
|
||||
}
|
||||
|
||||
std::string
|
||||
QUtil::read_file_into_string(FILE* f, std::string_view filename)
|
||||
{
|
||||
fseek(f, 0, SEEK_END);
|
||||
auto size = QIntC::to_size(QUtil::tell(f));
|
||||
fseek(f, 0, SEEK_SET);
|
||||
std::string result(size, '\0');
|
||||
if (auto read = fread(result.data(), 1, size, f); read != size) {
|
||||
if (ferror(f)) {
|
||||
throw std::runtime_error(
|
||||
std::string("failure reading file ") + std::string(filename) +
|
||||
" into memory: read " + uint_to_string(read) + "; wanted " +
|
||||
uint_to_string(size));
|
||||
} else {
|
||||
throw std::runtime_error(
|
||||
std::string("premature eof reading file ") +
|
||||
std::string(filename) + " into memory: read " +
|
||||
uint_to_string(read) + "; wanted " + uint_to_string(size));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static bool
|
||||
read_char_from_FILE(char& ch, FILE* f)
|
||||
{
|
||||
|
File diff suppressed because one or more lines are too long
@ -576,6 +576,12 @@ read_from_file_test()
|
||||
auto buf2 = b2.getBufferSharedPointer();
|
||||
assert(buf2->getSize() == size);
|
||||
assert(memcmp(buf2->getBuffer(), p, size) == 0);
|
||||
|
||||
auto s = QUtil::read_file_into_string("other-file");
|
||||
std::cout << "read " << s.size() << " bytes" << std::endl;
|
||||
assert(s.size() == 24652);
|
||||
assert(s.substr(0, 36) == "This file is used for qutil testing.");
|
||||
assert(s.substr(24641, 10) == "very long.");
|
||||
}
|
||||
|
||||
void
|
||||
|
107
qpdf/fix-qdf.cc
107
qpdf/fix-qdf.cc
@ -6,6 +6,7 @@
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <regex>
|
||||
#include <string_view>
|
||||
|
||||
static char const* whoami = 0;
|
||||
|
||||
@ -20,7 +21,7 @@ class QdfFixer
|
||||
{
|
||||
public:
|
||||
QdfFixer(std::string const& filename);
|
||||
void processLines(std::list<std::string>& lines);
|
||||
void processLines(std::string const& input);
|
||||
|
||||
private:
|
||||
void fatal(std::string const&);
|
||||
@ -58,9 +59,9 @@ class QdfFixer
|
||||
size_t xref_f1_nbytes;
|
||||
size_t xref_f2_nbytes;
|
||||
size_t xref_size;
|
||||
std::vector<std::string> ostream;
|
||||
std::vector<std::string_view> ostream;
|
||||
std::vector<qpdf_offset_t> ostream_offsets;
|
||||
std::vector<std::string> ostream_discarded;
|
||||
std::vector<std::string_view> ostream_discarded;
|
||||
size_t ostream_idx;
|
||||
int ostream_id;
|
||||
std::string ostream_extends;
|
||||
@ -92,54 +93,71 @@ QdfFixer::fatal(std::string const& msg)
|
||||
}
|
||||
|
||||
void
|
||||
QdfFixer::processLines(std::list<std::string>& lines)
|
||||
QdfFixer::processLines(std::string const& input)
|
||||
{
|
||||
static std::regex re_n_0_obj("^(\\d+) 0 obj\n$");
|
||||
static std::regex re_xref("^xref\n$");
|
||||
static std::regex re_stream("^stream\n$");
|
||||
static std::regex re_endobj("^endobj\n$");
|
||||
static std::regex re_type_objstm("/Type /ObjStm");
|
||||
static std::regex re_type_xref("/Type /XRef");
|
||||
static std::regex re_extends("/Extends (\\d+ 0 R)");
|
||||
static std::regex re_ostream_obj("^%% Object stream: object (\\d+)");
|
||||
static std::regex re_endstream("^endstream\n$");
|
||||
static std::regex re_length_or_w("/(Length|W) ");
|
||||
static std::regex re_size("/Size ");
|
||||
static std::regex re_ignore_newline("^%QDF: ignore_newline\n$");
|
||||
static std::regex re_num("^\\d+\n$");
|
||||
static std::regex re_trailer("^trailer <<");
|
||||
static std::regex re_size_n("^ /Size \\d+\n$");
|
||||
static std::regex re_dict_end("^>>\n$");
|
||||
using namespace std::literals;
|
||||
|
||||
static const std::regex re_n_0_obj("^(\\d+) 0 obj\n$");
|
||||
static const std::regex re_extends("/Extends (\\d+ 0 R)");
|
||||
static const std::regex re_ostream_obj("^%% Object stream: object (\\d+)");
|
||||
static const std::regex re_num("^\\d+\n$");
|
||||
static const std::regex re_size_n("^ /Size \\d+\n$");
|
||||
|
||||
auto sv_diff = [](size_t i) {
|
||||
return static_cast<std::string_view::difference_type>(i);
|
||||
};
|
||||
|
||||
lineno = 0;
|
||||
for (auto const& line: lines) {
|
||||
bool more = true;
|
||||
auto len_line = sv_diff(0);
|
||||
|
||||
std::string_view line;
|
||||
std::string_view input_view{input.data(), input.size()};
|
||||
size_t offs = 0;
|
||||
|
||||
auto b_line = input.cbegin();
|
||||
std::smatch m;
|
||||
auto const matches = [&m, &b_line, &len_line](std::regex const& r) {
|
||||
return std::regex_search(b_line, b_line + len_line, m, r);
|
||||
};
|
||||
|
||||
while (more) {
|
||||
++lineno;
|
||||
last_offset = offset;
|
||||
offset += QIntC::to_offset(line.length());
|
||||
std::smatch m;
|
||||
auto matches = [&m, &line](std::regex& r) {
|
||||
return std::regex_search(line, m, r);
|
||||
};
|
||||
b_line += len_line;
|
||||
|
||||
offs = input_view.find('\n');
|
||||
if (offs == std::string::npos) {
|
||||
more = false;
|
||||
line = input_view;
|
||||
} else {
|
||||
offs++;
|
||||
line = input_view.substr(0, offs);
|
||||
input_view.remove_prefix(offs);
|
||||
}
|
||||
len_line = sv_diff(line.size());
|
||||
offset += len_line;
|
||||
|
||||
if (state == st_top) {
|
||||
if (matches(re_n_0_obj)) {
|
||||
checkObjId(m[1].str());
|
||||
state = st_in_obj;
|
||||
} else if (matches(re_xref)) {
|
||||
} else if (line.compare("xref\n"sv) == 0) {
|
||||
xref_offset = last_offset;
|
||||
state = st_at_xref;
|
||||
}
|
||||
std::cout << line;
|
||||
} else if (state == st_in_obj) {
|
||||
std::cout << line;
|
||||
if (matches(re_stream)) {
|
||||
if (line.compare("stream\n"sv) == 0) {
|
||||
state = st_in_stream;
|
||||
stream_start = offset;
|
||||
} else if (matches(re_endobj)) {
|
||||
} else if (line.compare("endobj\n"sv) == 0) {
|
||||
state = st_top;
|
||||
} else if (matches(re_type_objstm)) {
|
||||
} else if (line.find("/Type /ObjStm"sv) != line.npos) {
|
||||
state = st_in_ostream_dict;
|
||||
ostream_id = last_obj;
|
||||
} else if (matches(re_type_xref)) {
|
||||
} else if (line.find("/Type /XRef"sv) != line.npos) {
|
||||
xref_offset = xref.back().getOffset();
|
||||
xref_f1_nbytes = 0;
|
||||
auto t = xref_offset;
|
||||
@ -171,7 +189,7 @@ QdfFixer::processLines(std::list<std::string>& lines)
|
||||
state = st_in_xref_stream_dict;
|
||||
}
|
||||
} else if (state == st_in_ostream_dict) {
|
||||
if (matches(re_stream)) {
|
||||
if (line.compare("stream\n"sv) == 0) {
|
||||
state = st_in_ostream_offsets;
|
||||
} else {
|
||||
ostream_discarded.push_back(line);
|
||||
@ -200,21 +218,22 @@ QdfFixer::processLines(std::list<std::string>& lines)
|
||||
if (matches(re_ostream_obj)) {
|
||||
checkObjId(m[1].str());
|
||||
state = st_in_ostream_outer;
|
||||
} else if (matches(re_endstream)) {
|
||||
} else if (line.compare("endstream\n"sv) == 0) {
|
||||
stream_length = QIntC::to_size(last_offset - stream_start);
|
||||
writeOstream();
|
||||
state = st_in_obj;
|
||||
}
|
||||
} else if (state == st_in_xref_stream_dict) {
|
||||
if (matches(re_length_or_w)) {
|
||||
if ((line.find("/Length"sv) != line.npos) ||
|
||||
(line.find("/W"sv) != line.npos)) {
|
||||
// already printed
|
||||
} else if (matches(re_size)) {
|
||||
} else if (line.find("/Size"sv) != line.npos) {
|
||||
auto xref_size = 1 + xref.size();
|
||||
std::cout << " /Size " << xref_size << "\n";
|
||||
} else {
|
||||
std::cout << line;
|
||||
}
|
||||
if (matches(re_stream)) {
|
||||
if (line.compare("stream\n"sv) == 0) {
|
||||
writeBinary(0, 1);
|
||||
writeBinary(0, xref_f1_nbytes);
|
||||
writeBinary(0, xref_f2_nbytes);
|
||||
@ -238,13 +257,13 @@ QdfFixer::processLines(std::list<std::string>& lines)
|
||||
state = st_done;
|
||||
}
|
||||
} else if (state == st_in_stream) {
|
||||
if (matches(re_endstream)) {
|
||||
if (line.compare("endstream\n"sv) == 0) {
|
||||
stream_length = QIntC::to_size(last_offset - stream_start);
|
||||
state = st_after_stream;
|
||||
}
|
||||
std::cout << line;
|
||||
} else if (state == st_after_stream) {
|
||||
if (matches(re_ignore_newline)) {
|
||||
if (line.compare("%QDF: ignore_newline\n"sv) == 0) {
|
||||
if (stream_length > 0) {
|
||||
--stream_length;
|
||||
}
|
||||
@ -273,7 +292,7 @@ QdfFixer::processLines(std::list<std::string>& lines)
|
||||
}
|
||||
state = st_before_trailer;
|
||||
} else if (state == st_before_trailer) {
|
||||
if (matches(re_trailer)) {
|
||||
if (line.compare("trailer <<\n"sv) == 0) {
|
||||
std::cout << line;
|
||||
state = st_in_trailer;
|
||||
}
|
||||
@ -284,7 +303,7 @@ QdfFixer::processLines(std::list<std::string>& lines)
|
||||
} else {
|
||||
std::cout << line;
|
||||
}
|
||||
if (matches(re_dict_end)) {
|
||||
if (line.compare(">>\n"sv) == 0) {
|
||||
std::cout << "startxref\n" << xref_offset << "\n%%EOF\n";
|
||||
state = st_done;
|
||||
}
|
||||
@ -392,17 +411,17 @@ realmain(int argc, char* argv[])
|
||||
} else if (argc == 2) {
|
||||
filename = argv[1];
|
||||
}
|
||||
std::list<std::string> lines;
|
||||
std::string input;
|
||||
if (filename == 0) {
|
||||
filename = "standard input";
|
||||
QUtil::binary_stdin();
|
||||
lines = QUtil::read_lines_from_file(stdin, true);
|
||||
input = QUtil::read_file_into_string(stdin);
|
||||
} else {
|
||||
lines = QUtil::read_lines_from_file(filename, true);
|
||||
input = QUtil::read_file_into_string(filename);
|
||||
}
|
||||
QUtil::binary_stdout();
|
||||
QdfFixer qf(filename);
|
||||
qf.processLines(lines);
|
||||
qf.processLines(input);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user