Convert scientific notation in JSON to fixed point (fixes #1079)

JSON accepts scientific notation, but PDF doesn't.
This commit is contained in:
Jay Berkenbilt 2023-12-21 17:38:49 -05:00
parent 4400ce84ee
commit b670565abc
5 changed files with 113 additions and 2 deletions

View File

@ -1,5 +1,9 @@
2023-12-21 Jay Berkenbilt <ejb@ql.org>
* Fix to QPDF JSON: a floating point number that appears in
scientific notation will be converted to fixed-point notation,
rounded to six digits after the decimal point. Fixes #1079.
* Fix to QPDF JSON: the syntax "n:/pdf-syntax" is now accepted as
an alternative way to represent names. This can be used for any
name (e.g. "n:/text#2fplain"), but it is necessary when the name

View File

@ -732,6 +732,15 @@ QPDF::JSONReactor::makeObject(JSON const& value)
if (QUtil::is_long_long(str_v.c_str())) {
result = QPDFObjectHandle::newInteger(QUtil::string_to_ll(str_v.c_str()));
} else {
// JSON allows scientific notation, but PDF does not.
if (str_v.find('e') != std::string::npos || str_v.find('E') != std::string::npos) {
try {
auto v = std::stod(str_v);
str_v = QUtil::double_to_string(v);
} catch (std::exception&) {
// Keep it as it was
}
}
result = QPDFObjectHandle::newReal(str_v);
}
} else if (value.getString(str_v)) {

View File

@ -52,6 +52,10 @@ Planned changes for future 12.x (subject to change):
must be represented as ``"n:/one#a0two"`` since the single byte
``a0`` is not valid in JSON.
- QPDF JSON will convert floating numbers that appear in the JSON
in scientific notation to fixed-point notation since PDF doesn't
accept scientific notation.
- Build Enhancements:
- The qpdf test suite now passes when qpdf is linked with an

View File

@ -347,16 +347,27 @@ $td->runtest("check C API write to JSON stream",
# (using #xx) would generate invalid JSON, even though qpdf's own JSON
# parser would accept it. Also, the JSON spec allows real numbers in
# scientific notation, but the PDF spec does not.
$n_tests += 2;
$n_tests += 4;
$td->runtest("handle binary names",
{$td->COMMAND =>
"qpdf --json-output weird-tokens.pdf a.json"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
# Round-trip is tested above.
# Round-trip back to PDF is tested above.
$td->runtest("check json",
{$td->FILE => "a.json"},
{$td->FILE => "weird-tokens.json"},
$td->NORMALIZE_NEWLINES);
# Make sure we can properly handle JSON with scientific notation.
$td->runtest("weird tokens round trip json",
{$td->COMMAND =>
"qpdf --json-input --json-output weird-tokens.json -"},
{$td->FILE => "weird-tokens.json", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("weird tokens with scientific notation",
{$td->COMMAND =>
"qpdf --json-input --json-output weird-tokens-alt.json -"},
{$td->FILE => "weird-tokens.json", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
cleanup();
$td->report($n_tests);

View File

@ -0,0 +1,83 @@
{
"qpdf": [
{
"jsonversion": 2,
"pdfversion": "2.0",
"pushedinheritedpageresources": false,
"calledgetallpages": false,
"maxobjectid": 6
},
{
"obj:1 0 R": {
"value": {
"/Extra": [
"u:Names with binary data",
"n:/ABCDEF+#ba#da#cc#e5",
"/ABCEDEF+π",
"n:/one+#a0two",
"n:/text#2fplain",
"u:Very small/large reals",
1e-05,
1e12
],
"/Pages": "2 0 R",
"/Type": "/Catalog"
}
},
"obj:2 0 R": {
"value": {
"/Count": 1,
"/Kids": [
"3 0 R"
],
"/Type": "/Pages"
}
},
"obj:3 0 R": {
"value": {
"/Contents": "4 0 R",
"/MediaBox": [
0,
0,
612,
792
],
"/Parent": "2 0 R",
"/Resources": {
"/Font": {
"/F1": "6 0 R"
}
},
"/Type": "/Page"
}
},
"obj:4 0 R": {
"stream": {
"data": "QlQKICAvRjEgMjQgVGYKICA3MiA3MjAgVGQKICAoUG90YXRvKSBUagpFVAo=",
"dict": {}
}
},
"obj:5 0 R": {
"value": 44
},
"obj:6 0 R": {
"value": {
"/BaseFont": "/Helvetica",
"/Encoding": "/WinAnsiEncoding",
"/Subtype": "/Type1",
"/Type": "/Font"
}
},
"trailer": {
"value": {
"/ID": [
"b:42841c13bbf709d79a200fa1691836f8",
"b:728c020f464c3cf7e02c12605fa7d88b"
],
"/Root": "1 0 R",
"/Size": 7
}
}
}
]
}