From b670565abc579de5bda946b7538545aa967e6cd2 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Thu, 21 Dec 2023 17:38:49 -0500 Subject: [PATCH] Convert scientific notation in JSON to fixed point (fixes #1079) JSON accepts scientific notation, but PDF doesn't. --- ChangeLog | 4 ++ libqpdf/QPDF_json.cc | 9 +++ manual/release-notes.rst | 4 ++ qpdf/qtest/qpdf-json.test | 15 ++++- qpdf/qtest/qpdf/weird-tokens-alt.json | 83 +++++++++++++++++++++++++++ 5 files changed, 113 insertions(+), 2 deletions(-) create mode 100644 qpdf/qtest/qpdf/weird-tokens-alt.json diff --git a/ChangeLog b/ChangeLog index 7450ddbc..b267f4f6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ 2023-12-21 Jay Berkenbilt + * Fix to QPDF JSON: a floating point number that appears in + scientific notation will be converted to fixed-point notation, + rounded to six digits after the decimal point. Fixes #1079. + * Fix to QPDF JSON: the syntax "n:/pdf-syntax" is now accepted as an alternative way to represent names. This can be used for any name (e.g. "n:/text#2fplain"), but it is necessary when the name diff --git a/libqpdf/QPDF_json.cc b/libqpdf/QPDF_json.cc index 864e1a56..7951b1e4 100644 --- a/libqpdf/QPDF_json.cc +++ b/libqpdf/QPDF_json.cc @@ -732,6 +732,15 @@ QPDF::JSONReactor::makeObject(JSON const& value) if (QUtil::is_long_long(str_v.c_str())) { result = QPDFObjectHandle::newInteger(QUtil::string_to_ll(str_v.c_str())); } else { + // JSON allows scientific notation, but PDF does not. + if (str_v.find('e') != std::string::npos || str_v.find('E') != std::string::npos) { + try { + auto v = std::stod(str_v); + str_v = QUtil::double_to_string(v); + } catch (std::exception&) { + // Keep it as it was + } + } result = QPDFObjectHandle::newReal(str_v); } } else if (value.getString(str_v)) { diff --git a/manual/release-notes.rst b/manual/release-notes.rst index f720f99e..2fdd197f 100644 --- a/manual/release-notes.rst +++ b/manual/release-notes.rst @@ -52,6 +52,10 @@ Planned changes for future 12.x (subject to change): must be represented as ``"n:/one#a0two"`` since the single byte ``a0`` is not valid in JSON. + - QPDF JSON will convert floating numbers that appear in the JSON + in scientific notation to fixed-point notation since PDF doesn't + accept scientific notation. + - Build Enhancements: - The qpdf test suite now passes when qpdf is linked with an diff --git a/qpdf/qtest/qpdf-json.test b/qpdf/qtest/qpdf-json.test index 9691d995..0ea126ec 100644 --- a/qpdf/qtest/qpdf-json.test +++ b/qpdf/qtest/qpdf-json.test @@ -347,16 +347,27 @@ $td->runtest("check C API write to JSON stream", # (using #xx) would generate invalid JSON, even though qpdf's own JSON # parser would accept it. Also, the JSON spec allows real numbers in # scientific notation, but the PDF spec does not. -$n_tests += 2; +$n_tests += 4; $td->runtest("handle binary names", {$td->COMMAND => "qpdf --json-output weird-tokens.pdf a.json"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); -# Round-trip is tested above. +# Round-trip back to PDF is tested above. $td->runtest("check json", {$td->FILE => "a.json"}, {$td->FILE => "weird-tokens.json"}, $td->NORMALIZE_NEWLINES); +# Make sure we can properly handle JSON with scientific notation. +$td->runtest("weird tokens round trip json", + {$td->COMMAND => + "qpdf --json-input --json-output weird-tokens.json -"}, + {$td->FILE => "weird-tokens.json", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +$td->runtest("weird tokens with scientific notation", + {$td->COMMAND => + "qpdf --json-input --json-output weird-tokens-alt.json -"}, + {$td->FILE => "weird-tokens.json", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); cleanup(); $td->report($n_tests); diff --git a/qpdf/qtest/qpdf/weird-tokens-alt.json b/qpdf/qtest/qpdf/weird-tokens-alt.json new file mode 100644 index 00000000..5a9f8ff8 --- /dev/null +++ b/qpdf/qtest/qpdf/weird-tokens-alt.json @@ -0,0 +1,83 @@ +{ + "qpdf": [ + { + "jsonversion": 2, + "pdfversion": "2.0", + "pushedinheritedpageresources": false, + "calledgetallpages": false, + "maxobjectid": 6 + }, + { + "obj:1 0 R": { + "value": { + "/Extra": [ + "u:Names with binary data", + "n:/ABCDEF+#ba#da#cc#e5", + "/ABCEDEF+π", + "n:/one+#a0two", + "n:/text#2fplain", + "u:Very small/large reals", + 1e-05, + 1e12 + ], + "/Pages": "2 0 R", + "/Type": "/Catalog" + } + }, + "obj:2 0 R": { + "value": { + "/Count": 1, + "/Kids": [ + "3 0 R" + ], + "/Type": "/Pages" + } + }, + "obj:3 0 R": { + "value": { + "/Contents": "4 0 R", + "/MediaBox": [ + 0, + 0, + 612, + 792 + ], + "/Parent": "2 0 R", + "/Resources": { + "/Font": { + "/F1": "6 0 R" + } + }, + "/Type": "/Page" + } + }, + "obj:4 0 R": { + "stream": { + "data": "QlQKICAvRjEgMjQgVGYKICA3MiA3MjAgVGQKICAoUG90YXRvKSBUagpFVAo=", + "dict": {} + } + }, + "obj:5 0 R": { + "value": 44 + }, + "obj:6 0 R": { + "value": { + "/BaseFont": "/Helvetica", + "/Encoding": "/WinAnsiEncoding", + "/Subtype": "/Type1", + "/Type": "/Font" + } + }, + "trailer": { + "value": { + "/ID": [ + "b:42841c13bbf709d79a200fa1691836f8", + "b:728c020f464c3cf7e02c12605fa7d88b" + ], + "/Root": "1 0 R", + "/Size": 7 + } + } + } + ] +}