2
1
mirror of https://github.com/qpdf/qpdf.git synced 2025-01-08 17:24:06 +00:00

Merge pull request #752 from jberkenbilt/report-mem-usage

Report mem usage
This commit is contained in:
Jay Berkenbilt 2022-08-31 15:50:17 -04:00 committed by GitHub
commit a078202c1b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 176 additions and 10 deletions

4
TODO
View File

@ -4,10 +4,12 @@ Next
Before Release: Before Release:
* Review in order #729, #726, #747 * Review in order #726
* Make ./performance_check usable by other people by having published * Make ./performance_check usable by other people by having published
files to use for testing. files to use for testing.
* https://opensource.adobe.com/dc-acrobat-sdk-docs/standards/pdfstandards/pdf/PDF32000_2008.pdf * https://opensource.adobe.com/dc-acrobat-sdk-docs/standards/pdfstandards/pdf/PDF32000_2008.pdf
* Incorporate --report-mem-usage into performance testing. Make sure
there is some test somewhere that exercises the millions of nulls case.
* Evaluate issues tagged with `next` * Evaluate issues tagged with `next`
* Stay on top of https://github.com/pikepdf/pikepdf/pull/315 * Stay on top of https://github.com/pikepdf/pikepdf/pull/315

View File

@ -131,6 +131,7 @@
"esize", "esize",
"eval", "eval",
"extlibdir", "extlibdir",
"fclose",
"fdict", "fdict",
"ffield", "ffield",
"fghij", "fghij",
@ -268,6 +269,7 @@
"maxdepth", "maxdepth",
"maxobjectid", "maxobjectid",
"mdash", "mdash",
"memstream",
"mindepth", "mindepth",
"mkdir", "mkdir",
"mkinstalldirs", "mkinstalldirs",

View File

@ -711,6 +711,7 @@ class QPDFJob
bool json_input; bool json_input;
bool json_output; bool json_output;
std::string update_from_json; std::string update_from_json;
bool report_mem_usage;
}; };
std::shared_ptr<Members> m; std::shared_ptr<Members> m;
}; };

View File

@ -525,7 +525,17 @@ namespace QUtil
wchar_t const* const argv[], wchar_t const* const argv[],
std::function<int(int, char const* const[])> realmain); std::function<int(int, char const* const[])> realmain);
#endif // QPDF_NO_WCHAR_T #endif // QPDF_NO_WCHAR_T
}; // namespace QUtil
// Try to return the maximum amount of memory allocated by the
// current process and its threads. Return 0 if unable to
// determine. This is Linux-specific and not implemented to be
// completely reliable. It is used during development for
// performance testing to detect changes that may significantly
// change memory usage. It is not recommended for use for other
// purposes.
QPDF_DLL
size_t get_max_memory_usage();
}; // namespace QUtil
inline bool inline bool
QUtil::is_hex_digit(char ch) QUtil::is_hex_digit(char ch)

View File

@ -33,6 +33,7 @@ QPDF_DLL Config* qdf();
QPDF_DLL Config* rawStreamData(); QPDF_DLL Config* rawStreamData();
QPDF_DLL Config* recompressFlate(); QPDF_DLL Config* recompressFlate();
QPDF_DLL Config* removePageLabels(); QPDF_DLL Config* removePageLabels();
QPDF_DLL Config* reportMemUsage();
QPDF_DLL Config* requiresPassword(); QPDF_DLL Config* requiresPassword();
QPDF_DLL Config* showEncryption(); QPDF_DLL Config* showEncryption();
QPDF_DLL Config* showEncryptionKey(); QPDF_DLL Config* showEncryptionKey();

View File

@ -3,15 +3,15 @@ generate_auto_job 9abe2ec994fb98526f5e3c0c199ce2e61a868463cb522a5bc6e9730b655341
include/qpdf/auto_job_c_att.hh 4c2b171ea00531db54720bf49a43f8b34481586ae7fb6cbf225099ee42bc5bb4 include/qpdf/auto_job_c_att.hh 4c2b171ea00531db54720bf49a43f8b34481586ae7fb6cbf225099ee42bc5bb4
include/qpdf/auto_job_c_copy_att.hh 50609012bff14fd82f0649185940d617d05d530cdc522185c7f3920a561ccb42 include/qpdf/auto_job_c_copy_att.hh 50609012bff14fd82f0649185940d617d05d530cdc522185c7f3920a561ccb42
include/qpdf/auto_job_c_enc.hh 28446f3c32153a52afa239ea40503e6cc8ac2c026813526a349e0cd4ae17ddd5 include/qpdf/auto_job_c_enc.hh 28446f3c32153a52afa239ea40503e6cc8ac2c026813526a349e0cd4ae17ddd5
include/qpdf/auto_job_c_main.hh cdba1ae6ea5525a585d10a3dd95b7996d62b17de4211fe658b78d9d463b0f313 include/qpdf/auto_job_c_main.hh 493b9798f5ff8bbcb07c0238693554d77eefa4ae71ce1a0d466de94e3a7a3966
include/qpdf/auto_job_c_pages.hh b3cc0f21029f6d89efa043dcdbfa183cb59325b6506001c18911614fe8e568ec include/qpdf/auto_job_c_pages.hh b3cc0f21029f6d89efa043dcdbfa183cb59325b6506001c18911614fe8e568ec
include/qpdf/auto_job_c_uo.hh ae21b69a1efa9333050f4833d465f6daff87e5b38e5106e49bbef5d4132e4ed1 include/qpdf/auto_job_c_uo.hh ae21b69a1efa9333050f4833d465f6daff87e5b38e5106e49bbef5d4132e4ed1
job.yml f9564f18b08a45d17328af43652645771d3498471820c858b8c9013a193e1412 job.yml a6f22d425980ed960c77c0a4197f46924c14e7943358cd9f0b75811bb1c480ad
libqpdf/qpdf/auto_job_decl.hh 7844eba58edffb9494b19e8eca6fd59a24d6e152ca606c3b07da569f753df2da libqpdf/qpdf/auto_job_decl.hh 7844eba58edffb9494b19e8eca6fd59a24d6e152ca606c3b07da569f753df2da
libqpdf/qpdf/auto_job_help.hh 53306e4aef8aaca641c0087bc9e064ada1c44a94b826c0bcac7b4eb0c8c41fd5 libqpdf/qpdf/auto_job_help.hh 3e9385a7e0dae993467647466fa30f30baa5968f9270c73ff4e664f5aa415dbe
libqpdf/qpdf/auto_job_init.hh fd1635a5ad6ba16b7ae008467145560a59a5ecfd10d29c5ef7cd0d8347747cd2 libqpdf/qpdf/auto_job_init.hh ccb881733849dff5c05721f1aa5c35447cedd415e881c3fef6573901e45be056
libqpdf/qpdf/auto_job_json_decl.hh 06caa46eaf71db8a50c046f91866baa8087745a9474319fb7c86d92634cc8297 libqpdf/qpdf/auto_job_json_decl.hh 06caa46eaf71db8a50c046f91866baa8087745a9474319fb7c86d92634cc8297
libqpdf/qpdf/auto_job_json_init.hh 59545578a2e47c660ff98516ed53f06638be75eb4658e2a09d32cc08e0cb7268 libqpdf/qpdf/auto_job_json_init.hh 7ac8f42fb39eda56144ab62b30152a56e9bb2224d0596eb826b7bc421a78d26b
libqpdf/qpdf/auto_job_schema.hh 5352ef1be1ad7cc6f4f36dab88f2937d278e6bd3a0e2d46259794dc226c8ba6b libqpdf/qpdf/auto_job_schema.hh 17352791b09c3b8a8db766375cce31d70c98b67b44ecc398e2ac78984e34fe90
manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580 manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580
manual/cli.rst 41ee93f23f46160fe9eaf7c99fd2ab3bd2e0f6792a341a35bdac1a41cb853ed5 manual/cli.rst e3fa48bb30c981df1f74d474887155cd6a46f9010b91cd1b7b57e582bf3bf877

View File

@ -127,6 +127,7 @@ options:
- recompress-flate - recompress-flate
- remove-page-labels - remove-page-labels
- replace-input - replace-input
- report-mem-usage
- requires-password - requires-password
- show-encryption - show-encryption
- show-encryption-key - show-encryption-key
@ -413,6 +414,7 @@ json:
Pages.password: Pages.password:
_range: "page range" _range: "page range"
remove-page-labels: remove-page-labels:
report-mem-usage:
rotate: rotate:
overlay: overlay:
_file: "source file for overlay" _file: "source file for overlay"

View File

@ -376,6 +376,29 @@ int main(int argc, char* argv[]) {
endif() endif()
endfunction() endfunction()
check_c_source_compiles(
"#include <malloc.h>
#include <stdio.h>
int main(int argc, char* argv[]) {
malloc_info(0, stdout);
return 0;
}"
HAVE_MALLOC_INFO)
check_c_source_compiles(
"#include <stdio.h>
#include <stdlib.h>
int main(int argc, char* argv[]) {
char* buf;
size_t size;
FILE* f;
f = open_memstream(&buf, &size);
fclose(f);
free(buf);
return 0;
}"
HAVE_OPEN_MEMSTREAM)
qpdf_check_ll_fmt("%lld" fmt_lld) qpdf_check_ll_fmt("%lld" fmt_lld)
qpdf_check_ll_fmt("%I64d" fmt_i64d) qpdf_check_ll_fmt("%I64d" fmt_i64d)
qpdf_check_ll_fmt("%I64lld" fmt_i64lld) qpdf_check_ll_fmt("%I64lld" fmt_i64lld)

View File

@ -417,7 +417,8 @@ QPDFJob::Members::Members() :
check_is_encrypted(false), check_is_encrypted(false),
check_requires_password(false), check_requires_password(false),
json_input(false), json_input(false),
json_output(false) json_output(false),
report_mem_usage(false)
{ {
} }
@ -625,6 +626,14 @@ QPDFJob::run()
<< ": operation succeeded with warnings\n"; << ": operation succeeded with warnings\n";
} }
} }
if (m->report_mem_usage) {
// Call get_max_memory_usage before generating output. When
// debugging, it's easier if print statements from
// get_max_memory_usage are not interleaved with the output.
auto mem_usage = QUtil::get_max_memory_usage();
*this->m->log->getWarn()
<< "qpdf-max-memory-usage " << mem_usage << "\n";
}
} }
bool bool

View File

@ -502,6 +502,13 @@ QPDFJob::Config::removePageLabels()
return this; return this;
} }
QPDFJob::Config*
QPDFJob::Config::reportMemUsage()
{
o.m->report_mem_usage = true;
return this;
}
QPDFJob::Config* QPDFJob::Config*
QPDFJob::Config::requiresPassword() QPDFJob::Config::requiresPassword()
{ {

View File

@ -37,6 +37,9 @@
# include <sys/stat.h> # include <sys/stat.h>
# include <unistd.h> # include <unistd.h>
#endif #endif
#ifdef HAVE_MALLOC_INFO
# include <malloc.h>
#endif
// First element is 24 // First element is 24
static unsigned short pdf_doc_low_to_unicode[] = { static unsigned short pdf_doc_low_to_unicode[] = {
@ -1968,3 +1971,73 @@ QUtil::call_main_from_wmain(
} }
#endif // QPDF_NO_WCHAR_T #endif // QPDF_NO_WCHAR_T
size_t
QUtil::get_max_memory_usage()
{
#if defined(HAVE_MALLOC_INFO) && defined(HAVE_OPEN_MEMSTREAM)
static std::regex tag_re("<(/?\\w+)([^>]*?)>");
static std::regex attr_re("(\\w+)=\"(.*?)\"");
char* buf;
size_t size;
FILE* f = open_memstream(&buf, &size);
if (f == nullptr) {
return 0;
}
malloc_info(0, f);
fclose(f);
if (QUtil::get_env("QPDF_DEBUG_MEM_USAGE")) {
fprintf(stderr, "%s", buf);
}
// Warning: this code uses regular expression to extract data from
// an XML string. This is generally a bad idea, but we're going to
// do it anyway because QUtil.hh warns against using this function
// for other than development/testing, and if this function fails
// to generate reasonable output during performance testing, it
// will be noticed.
// This is my best guess at how to interpret malloc_info. Anyway
// it seems to provide useful information for detecting code
// changes that drastically change memory usage.
size_t result = 0;
try {
std::cregex_iterator m_begin(buf, buf + size, tag_re);
std::cregex_iterator cr_end;
std::sregex_iterator sr_end;
int in_heap = 0;
for (auto m = m_begin; m != cr_end; ++m) {
std::string tag(m->str(1));
if (tag == "heap") {
++in_heap;
} else if (tag == "/heap") {
--in_heap;
} else if (in_heap == 0) {
std::string rest = m->str(2);
std::map<std::string, std::string> attrs;
std::sregex_iterator a_begin(rest.begin(), rest.end(), attr_re);
for (auto m2 = a_begin; m2 != sr_end; ++m2) {
attrs[m2->str(1)] = m2->str(2);
}
if (tag == "total") {
if (attrs.count("size") > 0) {
result += QIntC::to_size(
QUtil::string_to_ull(attrs["size"].c_str()));
}
} else if (tag == "system" && attrs["type"] == "max") {
result += QIntC::to_size(
QUtil::string_to_ull(attrs["size"].c_str()));
}
}
}
} catch (...) {
// ignore -- just return 0
}
free(buf);
return result;
#else
return 0;
#endif
}

View File

@ -883,6 +883,9 @@ for debugging qpdf.
ap.addOptionHelp("--test-json-schema", "testing", "test generated json against schema", R"(This is used by qpdf's test suite to check consistency between ap.addOptionHelp("--test-json-schema", "testing", "test generated json against schema", R"(This is used by qpdf's test suite to check consistency between
the output of qpdf --json and the output of qpdf --json-help. the output of qpdf --json and the output of qpdf --json-help.
)"); )");
ap.addOptionHelp("--report-mem-usage", "testing", "best effort report of memory usage", R"(This is used by qpdf's performance test suite to report the
maximum amount of memory used in supported environments.
)");
} }
static void add_help(QPDFArgParser& ap) static void add_help(QPDFArgParser& ap)
{ {

View File

@ -69,6 +69,7 @@ this->ap.addBare("raw-stream-data", [this](){c_main->rawStreamData();});
this->ap.addBare("recompress-flate", [this](){c_main->recompressFlate();}); this->ap.addBare("recompress-flate", [this](){c_main->recompressFlate();});
this->ap.addBare("remove-page-labels", [this](){c_main->removePageLabels();}); this->ap.addBare("remove-page-labels", [this](){c_main->removePageLabels();});
this->ap.addBare("replace-input", b(&ArgParser::argReplaceInput)); this->ap.addBare("replace-input", b(&ArgParser::argReplaceInput));
this->ap.addBare("report-mem-usage", [this](){c_main->reportMemUsage();});
this->ap.addBare("requires-password", [this](){c_main->requiresPassword();}); this->ap.addBare("requires-password", [this](){c_main->requiresPassword();});
this->ap.addBare("show-encryption", [this](){c_main->showEncryption();}); this->ap.addBare("show-encryption", [this](){c_main->showEncryption();});
this->ap.addBare("show-encryption-key", [this](){c_main->showEncryptionKey();}); this->ap.addBare("show-encryption-key", [this](){c_main->showEncryptionKey();});

View File

@ -409,6 +409,9 @@ popHandler(); // key: pages
pushKey("removePageLabels"); pushKey("removePageLabels");
addBare([this]() { c_main->removePageLabels(); }); addBare([this]() { c_main->removePageLabels(); });
popHandler(); // key: removePageLabels popHandler(); // key: removePageLabels
pushKey("reportMemUsage");
addBare([this]() { c_main->reportMemUsage(); });
popHandler(); // key: reportMemUsage
pushKey("rotate"); pushKey("rotate");
addParameter([this](std::string const& p) { c_main->rotate(p); }); addParameter([this](std::string const& p) { c_main->rotate(p); });
popHandler(); // key: rotate popHandler(); // key: rotate

View File

@ -144,6 +144,7 @@ static constexpr char const* JOB_SCHEMA_DATA = R"({
} }
], ],
"removePageLabels": "remove explicit page numbers", "removePageLabels": "remove explicit page numbers",
"reportMemUsage": "best effort report of memory usage",
"rotate": "rotate pages", "rotate": "rotate pages",
"overlay": { "overlay": {
"file": "source file for overlay", "file": "source file for overlay",

View File

@ -21,6 +21,8 @@
#cmakedefine HAVE_LOCALTIME_R 1 #cmakedefine HAVE_LOCALTIME_R 1
#cmakedefine HAVE_RANDOM 1 #cmakedefine HAVE_RANDOM 1
#cmakedefine HAVE_TM_GMTOFF 1 #cmakedefine HAVE_TM_GMTOFF 1
#cmakedefine HAVE_MALLOC_INFO 1
#cmakedefine HAVE_OPEN_MEMSTREAM 1
/* printf format for long long */ /* printf format for long long */
#cmakedefine LL_FMT "${LL_FMT}" #cmakedefine LL_FMT "${LL_FMT}"

View File

@ -134,3 +134,5 @@ D:20210209191925Z
2021-02-09T19:19:25Z 2021-02-09T19:19:25Z
---- is_long_long ---- is_long_long
done done
---- memory usage
memory usage okay

View File

@ -703,6 +703,18 @@ is_long_long_test()
std::cout << "done" << std::endl; std::cout << "done" << std::endl;
} }
void
memory_usage_test()
{
auto u1 = QUtil::get_max_memory_usage();
if (u1 > 0) {
auto x = QUtil::make_shared_array<int>(10 << 20);
auto u2 = QUtil::get_max_memory_usage();
assert(u2 > u1);
}
std::cout << "memory usage okay" << std::endl;
}
int int
main(int argc, char* argv[]) main(int argc, char* argv[])
{ {
@ -739,6 +751,8 @@ main(int argc, char* argv[])
timestamp_test(); timestamp_test();
std::cout << "---- is_long_long" << std::endl; std::cout << "---- is_long_long" << std::endl;
is_long_long_test(); is_long_long_test();
std::cout << "---- memory usage" << std::endl;
memory_usage_test();
} catch (std::exception& e) { } catch (std::exception& e) {
std::cout << "unexpected exception: " << e.what() << std::endl; std::cout << "unexpected exception: " << e.what() << std::endl;
} }

View File

@ -3463,6 +3463,16 @@ Related Options
memory and is therefore unsuitable for use with large files. This memory and is therefore unsuitable for use with large files. This
is why it's also not on by default. is why it's also not on by default.
.. qpdf:option:: --report-mem-usage
.. help: best effort report of memory usage
This is used by qpdf's performance test suite to report the
maximum amount of memory used in supported environments.
This is used by qpdf's performance test suite to report the maximum
amount of memory used in supported environments.
.. _unicode-passwords: .. _unicode-passwords:
Unicode Passwords Unicode Passwords