2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-12-22 02:49:00 +00:00

Add QPDF::Xref_table methods type, offset, stream_number and stream_index

This commit is contained in:
m-holger 2024-08-10 14:04:32 +01:00
parent d64b14912d
commit 5fc257f0f7
3 changed files with 75 additions and 55 deletions

View File

@ -1769,20 +1769,17 @@ QPDF::readObjectAtOffset(
if (try_recovery) {
// Try again after reconstructing xref table
m->xref_table.reconstruct(e);
if (m->xref_table.count(exp_og) && (m->xref_table[exp_og].getType() == 1)) {
qpdf_offset_t new_offset = m->xref_table[exp_og].getOffset();
QPDFObjectHandle result =
readObjectAtOffset(false, new_offset, description, exp_og, og, false);
if (m->xref_table.type(exp_og) == 1) {
QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset");
return result;
return readObjectAtOffset(
false, m->xref_table.offset(exp_og), description, exp_og, og, false);
} else {
QTC::TC("qpdf", "QPDF object gone after xref reconstruction");
warn(damagedPDF(
"",
0,
("object " + exp_og.unparse(' ') +
" not found in file after regenerating cross reference "
"table")));
" not found in file after regenerating cross reference table")));
return QPDFObjectHandle::newNull();
}
} else {
@ -1815,7 +1812,7 @@ QPDF::readObjectAtOffset(
}
}
qpdf_offset_t end_after_space = m->file->tell();
if (skip_cache_if_in_xref && m->xref_table.count(og)) {
if (skip_cache_if_in_xref && m->xref_table.type(og)) {
// Ordinarily, an object gets read here when resolved through xref table or stream. In
// the special case of the xref stream and linearization hint tables, the offset comes
// from another source. For the specific case of xref streams, the xref stream is read
@ -1867,33 +1864,32 @@ QPDF::resolve(QPDFObjGen og)
}
ResolveRecorder rr(this, og);
if (m->xref_table.count(og) != 0) {
QPDFXRefEntry const& entry = m->xref_table[og];
try {
switch (entry.getType()) {
case 1:
{
qpdf_offset_t offset = entry.getOffset();
// Object stored in cache by readObjectAtOffset
QPDFObjGen a_og;
QPDFObjectHandle oh = readObjectAtOffset(true, offset, "", og, a_og, false);
}
break;
case 2:
resolveObjectsInStream(entry.getObjStreamNumber());
break;
default:
throw damagedPDF(
"", 0, ("object " + og.unparse('/') + " has unexpected xref entry type"));
try {
switch (m->xref_table.type(og)) {
case 0:
break;
case 1:
{
// Object stored in cache by readObjectAtOffset
QPDFObjGen a_og;
QPDFObjectHandle oh =
readObjectAtOffset(true, m->xref_table.offset(og), "", og, a_og, false);
}
} catch (QPDFExc& e) {
warn(e);
} catch (std::exception& e) {
warn(damagedPDF(
"", 0, ("object " + og.unparse('/') + ": error reading object: " + e.what())));
break;
case 2:
resolveObjectsInStream(m->xref_table.stream_number(og.getObj()));
break;
default:
throw damagedPDF(
"", 0, ("object " + og.unparse('/') + " has unexpected xref entry type"));
}
} catch (QPDFExc& e) {
warn(e);
} catch (std::exception& e) {
warn(damagedPDF(
"", 0, ("object " + og.unparse('/') + ": error reading object: " + e.what())));
}
if (isUnresolved(og)) {
@ -2107,7 +2103,7 @@ QPDF::getObjectForParser(int id, int gen, bool parse_pdf)
if (auto iter = m->obj_cache.find(og); iter != m->obj_cache.end()) {
return iter->second.object;
}
if (m->xref_table.count(og) || !m->xref_table.parsed) {
if (m->xref_table.type(og) || !m->xref_table.parsed) {
return m->obj_cache.insert({og, QPDF_Unresolved::create(this, og)}).first->second.object;
}
if (parse_pdf) {
@ -2123,9 +2119,8 @@ QPDF::getObjectForJSON(int id, int gen)
auto [it, inserted] = m->obj_cache.try_emplace(og);
auto& obj = it->second.object;
if (inserted) {
obj = (m->xref_table.parsed && !m->xref_table.count(og))
? QPDF_Null::create(this, og)
: QPDF_Unresolved::create(this, og);
obj = (m->xref_table.parsed && !m->xref_table.type(og)) ? QPDF_Null::create(this, og)
: QPDF_Unresolved::create(this, og);
}
return obj;
}
@ -2135,7 +2130,7 @@ QPDF::getObject(QPDFObjGen const& og)
{
if (auto it = m->obj_cache.find(og); it != m->obj_cache.end()) {
return {it->second.object};
} else if (m->xref_table.parsed && !m->xref_table.count(og)) {
} else if (m->xref_table.parsed && !m->xref_table.type(og)) {
return QPDF_Null::create();
} else {
auto result = m->obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og), -1, -1);

View File

@ -445,7 +445,7 @@ QPDF::checkLinearizationInternal()
for (size_t i = 0; i < toS(npages); ++i) {
QPDFObjectHandle const& page = pages.at(i);
QPDFObjGen og(page.getObjGen());
if (m->xref_table[og].getType() == 2) {
if (m->xref_table.type(og) == 2) {
linearizationWarning(
"page dictionary for page " + std::to_string(i) + " is compressed");
}
@ -556,23 +556,18 @@ QPDF::maxEnd(ObjUser const& ou)
qpdf_offset_t
QPDF::getLinearizationOffset(QPDFObjGen const& og)
{
QPDFXRefEntry entry = m->xref_table[og];
qpdf_offset_t result = 0;
switch (entry.getType()) {
switch (m->xref_table.type(og)) {
case 1:
result = entry.getOffset();
break;
return m->xref_table.offset(og);
case 2:
// For compressed objects, return the offset of the object stream that contains them.
result = getLinearizationOffset(QPDFObjGen(entry.getObjStreamNumber(), 0));
break;
return getLinearizationOffset(QPDFObjGen(m->xref_table.stream_number(og.getObj()), 0));
default:
stopOnError("getLinearizationOffset called for xref entry not of type 1 or 2");
break;
return 0; // unreachable
}
return result;
}
QPDFObjectHandle
@ -603,7 +598,7 @@ QPDF::lengthNextN(int first_object, int n)
int length = 0;
for (int i = 0; i < n; ++i) {
QPDFObjGen og(first_object + i, 0);
if (m->xref_table.count(og) == 0) {
if (m->xref_table.type(og) == 0) {
linearizationWarning(
"no xref table entry for " + std::to_string(first_object + i) + " 0");
} else {
@ -635,7 +630,7 @@ QPDF::checkHPageOffset(
int npages = toI(pages.size());
qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset);
QPDFObjGen first_page_og(pages.at(0).getObjGen());
if (m->xref_table.count(first_page_og) == 0) {
if (m->xref_table.type(first_page_og) == 0) {
stopOnError("supposed first page object is not known");
}
qpdf_offset_t offset = getLinearizationOffset(first_page_og);
@ -646,7 +641,7 @@ QPDF::checkHPageOffset(
for (int pageno = 0; pageno < npages; ++pageno) {
QPDFObjGen page_og(pages.at(toS(pageno)).getObjGen());
int first_object = page_og.getObj();
if (m->xref_table.count(page_og) == 0) {
if (m->xref_table.type(page_og) == 0) {
stopOnError("unknown object in page offset hint table");
}
offset = getLinearizationOffset(page_og);
@ -768,7 +763,7 @@ QPDF::checkHSharedObject(std::vector<QPDFObjectHandle> const& pages, std::map<in
cur_object = so.first_shared_obj;
QPDFObjGen og(cur_object, 0);
if (m->xref_table.count(og) == 0) {
if (m->xref_table.type(og) == 0) {
stopOnError("unknown object in shared object hint table");
}
qpdf_offset_t offset = getLinearizationOffset(og);
@ -819,7 +814,7 @@ QPDF::checkHOutlines()
return;
}
QPDFObjGen og(outlines.getObjGen());
if (m->xref_table.count(og) == 0) {
if (m->xref_table.type(og) == 0) {
stopOnError("unknown object in outlines hint table");
}
qpdf_offset_t offset = getLinearizationOffset(og);
@ -838,8 +833,7 @@ QPDF::checkHOutlines()
std::to_string(table_length) + "; computed = " + std::to_string(length));
}
} else {
linearizationWarning("incorrect first object number in outline "
"hints table.");
linearizationWarning("incorrect first object number in outline hints table.");
}
} else {
linearizationWarning("incorrect object count in outline hint table");

View File

@ -19,6 +19,37 @@ class QPDF::Xref_table: public std::map<QPDFObjGen, QPDFXRefEntry>
void show();
bool resolve();
// Returns 0 if og is not in table.
int
type(QPDFObjGen og) const
{
auto it = find(og);
return it == end() ? 0 : it->second.getType();
}
// Returns 0 if og is not in table.
qpdf_offset_t
offset(QPDFObjGen og) const
{
auto it = find(og);
return it == end() ? 0 : it->second.getOffset();
}
// Returns 0 if og is not in table.
int
stream_number(int id) const
{
auto it = find(QPDFObjGen(id, 0));
return it == end() ? 0 : it->second.getObjStreamNumber();
}
int
stream_index(int id) const
{
auto it = find(QPDFObjGen(id, 0));
return it == end() ? 0 : it->second.getObjStreamIndex();
}
QPDFObjectHandle trailer;
bool reconstructed{false};
// Various tables are indexed by object id, with potential size id + 1