2012-07-21 05:37:14 -04:00
|
|
|
#include <qpdf/InputSource.hh>
|
2022-02-04 16:31:31 -05:00
|
|
|
|
2019-06-20 23:35:23 -04:00
|
|
|
#include <qpdf/QIntC.hh>
|
2022-04-02 17:14:10 -04:00
|
|
|
#include <qpdf/QTC.hh>
|
|
|
|
#include <stdexcept>
|
|
|
|
#include <string.h>
|
2012-07-21 05:37:14 -04:00
|
|
|
|
|
|
|
void
|
|
|
|
InputSource::setLastOffset(qpdf_offset_t offset)
|
|
|
|
{
|
|
|
|
this->last_offset = offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
qpdf_offset_t
|
|
|
|
InputSource::getLastOffset() const
|
|
|
|
{
|
|
|
|
return this->last_offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string
|
|
|
|
InputSource::readLine(size_t max_line_length)
|
|
|
|
{
|
|
|
|
// Return at most max_line_length characters from the next line.
|
|
|
|
// Lines are terminated by one or more \r or \n characters.
|
|
|
|
// Consume the trailing newline characters but don't return them.
|
|
|
|
// After this is called, the file will be positioned after a line
|
|
|
|
// terminator or at the end of the file, and last_offset will
|
|
|
|
// point to position the file had when this method was called.
|
|
|
|
|
|
|
|
qpdf_offset_t offset = this->tell();
|
2022-02-06 13:53:16 -05:00
|
|
|
auto bp = std::make_unique<char[]>(max_line_length + 1);
|
|
|
|
char* buf = bp.get();
|
2012-07-21 05:37:14 -04:00
|
|
|
memset(buf, '\0', max_line_length + 1);
|
|
|
|
this->read(buf, max_line_length);
|
|
|
|
this->seek(offset, SEEK_SET);
|
|
|
|
qpdf_offset_t eol = this->findAndSkipNextEOL();
|
|
|
|
this->last_offset = offset;
|
2019-06-20 23:35:23 -04:00
|
|
|
size_t line_length = QIntC::to_size(eol - offset);
|
2022-04-02 17:14:10 -04:00
|
|
|
if (line_length < max_line_length) {
|
2012-07-21 05:37:14 -04:00
|
|
|
buf[line_length] = '\0';
|
|
|
|
}
|
|
|
|
return std::string(buf);
|
|
|
|
}
|
2017-08-06 08:42:01 -04:00
|
|
|
|
|
|
|
bool
|
2022-04-02 17:14:10 -04:00
|
|
|
InputSource::findFirst(
|
|
|
|
char const* start_chars, qpdf_offset_t offset, size_t len, Finder& finder)
|
2017-08-06 08:42:01 -04:00
|
|
|
{
|
|
|
|
// Basic approach: search for the first character of start_chars
|
|
|
|
// starting from offset but not going past len (if len != 0). Once
|
|
|
|
// the first character is found, see if it is the beginning of a
|
|
|
|
// sequence of characters matching start_chars. If so, call
|
|
|
|
// finder.check() to do caller-specific additional checks. If not,
|
|
|
|
// keep searching.
|
|
|
|
|
|
|
|
// This code is tricky and highly subject to off-by-one or other
|
|
|
|
// edge case logic errors. See comments throughout that explain
|
|
|
|
// how we're not missing any edge cases. There are also tests
|
|
|
|
// specifically constructed to make sure we caught the edge cases
|
|
|
|
// in testing.
|
|
|
|
|
|
|
|
char buf[1025]; // size known to input_source.cc in libtests
|
|
|
|
// To enable us to guarantee null-termination, save an extra byte
|
|
|
|
// so that buf[size] is valid memory.
|
|
|
|
size_t size = sizeof(buf) - 1;
|
2022-04-02 17:14:10 -04:00
|
|
|
if ((strlen(start_chars) < 1) || (strlen(start_chars) > size)) {
|
2017-08-06 08:42:01 -04:00
|
|
|
throw std::logic_error(
|
|
|
|
"InputSource::findSource called with"
|
|
|
|
" too small or too large of a character sequence");
|
|
|
|
}
|
|
|
|
|
|
|
|
char* p = 0;
|
|
|
|
qpdf_offset_t buf_offset = offset;
|
|
|
|
size_t bytes_read = 0;
|
|
|
|
|
|
|
|
// Guarantee that we return from this loop. Each time through, we
|
|
|
|
// either return, advance p, or restart the loop with a condition
|
|
|
|
// that will cause return on the next pass. Eventually we will
|
|
|
|
// either be out of range or hit EOF, either of which forces us to
|
|
|
|
// return.
|
2022-04-02 17:14:10 -04:00
|
|
|
while (true) {
|
2017-08-06 08:42:01 -04:00
|
|
|
// Do we need to read more data? Pretend size = 5, buf starts
|
|
|
|
// at 0, and start_chars has 3 characters. buf[5] is valid and
|
|
|
|
// null. If p == 2, start_chars could be buf[2] through
|
|
|
|
// buf[4], so p + strlen(start_chars) == buf + size is okay.
|
|
|
|
// If p points to buf[size], since strlen(start_chars) is
|
|
|
|
// always >= 1, this overflow test will be correct for that
|
|
|
|
// case regardless of start_chars.
|
2022-04-02 17:14:10 -04:00
|
|
|
if ((p == 0) || ((p + strlen(start_chars)) > (buf + bytes_read))) {
|
|
|
|
if (p) {
|
|
|
|
QTC::TC(
|
|
|
|
"libtests",
|
|
|
|
"InputSource read next block",
|
|
|
|
((p == buf + bytes_read) ? 0 : 1));
|
2017-08-06 08:42:01 -04:00
|
|
|
buf_offset += (p - buf);
|
|
|
|
}
|
|
|
|
this->seek(buf_offset, SEEK_SET);
|
|
|
|
// Read into buffer and zero out the rest of the buffer
|
|
|
|
// including buf[size]. We allocated an extra byte so that
|
|
|
|
// we could guarantee null termination as an extra
|
|
|
|
// protection against overrun when using string functions.
|
|
|
|
bytes_read = this->read(buf, size);
|
2022-04-02 17:14:10 -04:00
|
|
|
if (bytes_read < strlen(start_chars)) {
|
|
|
|
QTC::TC(
|
|
|
|
"libtests",
|
|
|
|
"InputSource find EOF",
|
|
|
|
bytes_read == 0 ? 0 : 1);
|
2017-08-06 08:42:01 -04:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
memset(buf + bytes_read, '\0', 1 + (size - bytes_read));
|
|
|
|
p = buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Search for the first character.
|
2022-04-03 16:10:27 -04:00
|
|
|
if ((p = static_cast<char*>(
|
|
|
|
// line-break
|
|
|
|
memchr(
|
|
|
|
p,
|
|
|
|
start_chars[0],
|
|
|
|
bytes_read - QIntC::to_size(p - buf)))) != 0) {
|
2022-04-02 17:14:10 -04:00
|
|
|
if (p == buf) {
|
2017-08-06 08:42:01 -04:00
|
|
|
QTC::TC("libtests", "InputSource found match at buf[0]");
|
|
|
|
}
|
|
|
|
// Found first letter.
|
2022-04-02 17:14:10 -04:00
|
|
|
if (len != 0) {
|
2017-08-06 08:42:01 -04:00
|
|
|
// Make sure it's in range.
|
2019-06-20 23:35:23 -04:00
|
|
|
size_t p_relative_offset =
|
|
|
|
QIntC::to_size((p - buf) + (buf_offset - offset));
|
2022-04-02 17:14:10 -04:00
|
|
|
if (p_relative_offset >= len) {
|
2017-08-06 08:42:01 -04:00
|
|
|
// out of range
|
|
|
|
QTC::TC("libtests", "InputSource out of range");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2022-04-02 17:14:10 -04:00
|
|
|
if ((p + strlen(start_chars)) > (buf + bytes_read)) {
|
2017-08-06 08:42:01 -04:00
|
|
|
// If there are not enough bytes left in the file for
|
|
|
|
// start_chars, we will detect this on the next pass
|
|
|
|
// as EOF and return.
|
|
|
|
QTC::TC("libtests", "InputSource not enough bytes");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// See if p points to a sequence matching start_chars. We
|
|
|
|
// already checked above to make sure we are not going to
|
|
|
|
// overrun memory.
|
2022-04-02 17:14:10 -04:00
|
|
|
if (strncmp(p, start_chars, strlen(start_chars)) == 0) {
|
2017-08-06 08:42:01 -04:00
|
|
|
// Call finder.check() with the input source
|
|
|
|
// positioned to the point of the match.
|
|
|
|
this->seek(buf_offset + (p - buf), SEEK_SET);
|
2022-04-02 17:14:10 -04:00
|
|
|
if (finder.check()) {
|
2017-08-06 08:42:01 -04:00
|
|
|
return true;
|
2022-04-02 17:14:10 -04:00
|
|
|
} else {
|
|
|
|
QTC::TC(
|
|
|
|
"libtests",
|
|
|
|
"InputSource start_chars matched but not check");
|
2017-08-06 08:42:01 -04:00
|
|
|
}
|
2022-04-02 17:14:10 -04:00
|
|
|
} else {
|
|
|
|
QTC::TC(
|
|
|
|
"libtests",
|
|
|
|
"InputSource first char matched but not string");
|
2017-08-06 08:42:01 -04:00
|
|
|
}
|
|
|
|
// This occurrence of the first character wasn't a match.
|
|
|
|
// Skip over it and keep searching.
|
|
|
|
++p;
|
2022-04-02 17:14:10 -04:00
|
|
|
} else {
|
2017-08-06 08:42:01 -04:00
|
|
|
// Trigger reading the next block
|
|
|
|
p = buf + bytes_read;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
throw std::logic_error("InputSource after while (true)");
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
2022-04-02 17:14:10 -04:00
|
|
|
InputSource::findLast(
|
|
|
|
char const* start_chars, qpdf_offset_t offset, size_t len, Finder& finder)
|
2017-08-06 08:42:01 -04:00
|
|
|
{
|
|
|
|
bool found = false;
|
|
|
|
qpdf_offset_t after_found_offset = 0;
|
|
|
|
qpdf_offset_t cur_offset = offset;
|
|
|
|
size_t cur_len = len;
|
2022-04-02 17:14:10 -04:00
|
|
|
while (this->findFirst(start_chars, cur_offset, cur_len, finder)) {
|
|
|
|
if (found) {
|
2017-08-06 08:42:01 -04:00
|
|
|
QTC::TC("libtests", "InputSource findLast found more than one");
|
2022-04-02 17:14:10 -04:00
|
|
|
} else {
|
2017-08-06 08:42:01 -04:00
|
|
|
found = true;
|
|
|
|
}
|
|
|
|
after_found_offset = this->tell();
|
|
|
|
cur_offset = after_found_offset;
|
2019-06-20 23:35:23 -04:00
|
|
|
cur_len = len - QIntC::to_size((cur_offset - offset));
|
2017-08-06 08:42:01 -04:00
|
|
|
}
|
2022-04-02 17:14:10 -04:00
|
|
|
if (found) {
|
2017-08-06 08:42:01 -04:00
|
|
|
this->seek(after_found_offset, SEEK_SET);
|
|
|
|
}
|
|
|
|
return found;
|
|
|
|
}
|