diff --git a/include/qpdf/QUtil.hh b/include/qpdf/QUtil.hh index bcedd4d3..96c0530b 100644 --- a/include/qpdf/QUtil.hh +++ b/include/qpdf/QUtil.hh @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -220,6 +221,11 @@ namespace QUtil QPDF_DLL bool is_number(char const*); + + // This method parses the numeric range syntax used by the qpdf + // command-line tool. May throw std::runtime_error. + QPDF_DLL + std::vector parse_numrange(char const* range, int max); }; #endif // QUTIL_HH diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc index 56bac386..44ffec7f 100644 --- a/libqpdf/QUtil.cc +++ b/libqpdf/QUtil.cc @@ -718,3 +718,177 @@ QUtil::strcasecmp(char const *s1, char const *s2) return ::strcasecmp(s1, s2); #endif } + +static int maybe_from_end(int num, bool from_end, int max) +{ + if (from_end) + { + if (num > max) + { + num = 0; + } + else + { + num = max + 1 - num; + } + } + return num; +} + +std::vector +QUtil::parse_numrange(char const* range, int max) +{ + std::vector result; + char const* p = range; + try + { + std::vector work; + static int const comma = -1; + static int const dash = -2; + + enum { st_top, + st_in_number, + st_after_number } state = st_top; + bool last_separator_was_dash = false; + int cur_number = 0; + bool from_end = false; + while (*p) + { + char ch = *p; + if (isdigit(ch)) + { + if (! ((state == st_top) || (state == st_in_number))) + { + throw std::runtime_error("digit not expected"); + } + state = st_in_number; + cur_number *= 10; + cur_number += (ch - '0'); + } + else if (ch == 'z') + { + // z represents max + if (! (state == st_top)) + { + throw std::runtime_error("z not expected"); + } + state = st_after_number; + cur_number = max; + } + else if (ch == 'r') + { + if (! (state == st_top)) + { + throw std::runtime_error("r not expected"); + } + state = st_in_number; + from_end = true; + } + else if ((ch == ',') || (ch == '-')) + { + if (! ((state == st_in_number) || (state == st_after_number))) + { + throw std::runtime_error("unexpected separator"); + } + cur_number = maybe_from_end(cur_number, from_end, max); + work.push_back(cur_number); + cur_number = 0; + from_end = false; + if (ch == ',') + { + state = st_top; + last_separator_was_dash = false; + work.push_back(comma); + } + else if (ch == '-') + { + if (last_separator_was_dash) + { + throw std::runtime_error("unexpected dash"); + } + state = st_top; + last_separator_was_dash = true; + work.push_back(dash); + } + } + else + { + throw std::runtime_error("unexpected character"); + } + ++p; + } + if ((state == st_in_number) || (state == st_after_number)) + { + cur_number = maybe_from_end(cur_number, from_end, max); + work.push_back(cur_number); + } + else + { + throw std::runtime_error("number expected"); + } + + p = 0; + for (size_t i = 0; i < work.size(); i += 2) + { + int num = work.at(i); + // max == 0 means we don't know the max and are just + // testing for valid syntax. + if ((max > 0) && ((num < 1) || (num > max))) + { + throw std::runtime_error( + "number " + QUtil::int_to_string(num) + " out of range"); + } + if (i == 0) + { + result.push_back(work.at(i)); + } + else + { + int separator = work.at(i-1); + if (separator == comma) + { + result.push_back(num); + } + else if (separator == dash) + { + int lastnum = result.back(); + if (num > lastnum) + { + for (int j = lastnum + 1; j <= num; ++j) + { + result.push_back(j); + } + } + else + { + for (int j = lastnum - 1; j >= num; --j) + { + result.push_back(j); + } + } + } + else + { + throw std::logic_error( + "INTERNAL ERROR parsing numeric range"); + } + } + } + } + catch (std::runtime_error const& e) + { + std::string message; + if (p) + { + message = "error at * in numeric range " + + std::string(range, p - range) + "*" + p + ": " + e.what(); + } + else + { + message = "error in numeric range " + + std::string(range) + ": " + e.what(); + } + throw std::runtime_error(message); + } + return result; +} diff --git a/libtests/build.mk b/libtests/build.mk index 7143eb56..214bd17c 100644 --- a/libtests/build.mk +++ b/libtests/build.mk @@ -13,6 +13,7 @@ BINS_libtests = \ json \ lzw \ md5 \ + numrange \ pointer_holder \ predictors \ qutil \ diff --git a/libtests/numrange.cc b/libtests/numrange.cc new file mode 100644 index 00000000..6cbb90ac --- /dev/null +++ b/libtests/numrange.cc @@ -0,0 +1,36 @@ +#include +#include + +static void test_numrange(char const* range) +{ + if (range == 0) + { + std::cout << "null" << std::endl; + } + else + { + std::vector result = QUtil::parse_numrange(range, 15); + std::cout << "numeric range " << range << " ->"; + for (std::vector::iterator iter = result.begin(); + iter != result.end(); ++iter) + { + std::cout << " " << *iter; + } + std::cout << std::endl; + } +} + +int main(int argc, char* argv[]) +{ + try + { + test_numrange(argv[1]); + } + catch (std::exception& e) + { + std::cout << e.what() << std::endl; + return 2; + } + + return 0; +} diff --git a/libtests/qtest/numrange.test b/libtests/qtest/numrange.test new file mode 100644 index 00000000..9acf6ea4 --- /dev/null +++ b/libtests/qtest/numrange.test @@ -0,0 +1,63 @@ +#!/usr/bin/env perl +require 5.008; +use warnings; +use strict; + +require TestDriver; + +my $td = new TestDriver('numrange'); + +my @nrange_tests = ( + [",5", + "error at * in numeric range *,5: unexpected separator", + 2], + ["4,,5", + "error at * in numeric range 4,*,5: unexpected separator", + 2], + ["4,5,", + "error at * in numeric range 4,5,*: number expected", + 2], + ["z1,", + "error at * in numeric range z*1,: digit not expected", + 2], + ["1z,", + "error at * in numeric range 1*z,: z not expected", + 2], + ["1-5?", + "error at * in numeric range 1-5*?: unexpected character", + 2], + ["1-30", + "error in numeric range 1-30: number 30 out of range", + 2], + ["1-10,0,5", + "error in numeric range 1-10,0,5: number 0 out of range", + 2], + ["1-10,1234,5", + "error in numeric range 1-10,1234,5: number 1234 out of range", + 2], + ["1,r,3", + "error in numeric range 1,r,3: number 16 out of range", + 2], + ["1,r16,3", + "error in numeric range 1,r16,3: number 0 out of range", + 2], + ["1,3,5-10,z-13,13,9,z,2,r2-r4", + "numeric range 1,3,5-10,z-13,13,9,z,2,r2-r4" . + " -> 1 3 5 6 7 8 9 10 15 14 13 13 9 15 2 14 13 12", + 0], + ["r1-r15", # r\d+ at end + "numeric range r1-r15" . + " -> 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1", + 0], + ); +foreach my $d (@nrange_tests) +{ + my ($range, $output, $status) = @$d; + $td->runtest("numeric range $range", + {$td->COMMAND => ['numrange', $range], + $td->FILTER => "grep 'numeric range'"}, + {$td->STRING => $output . "\n", $td->EXIT_STATUS => $status}, + $td->NORMALIZE_NEWLINES); +} + +$td->report(scalar(@nrange_tests)); diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc index 46b984bc..b2945419 100644 --- a/qpdf/qpdf.cc +++ b/qpdf/qpdf.cc @@ -635,180 +635,25 @@ static void show_encryption(QPDF& pdf, Options& o) } } -static int maybe_from_end(int num, bool from_end, int max) -{ - if (from_end) - { - if (num > max) - { - num = 0; - } - else - { - num = max + 1 - num; - } - } - return num; -} - static std::vector parse_numrange(char const* range, int max, bool throw_error = false) { - std::vector result; - char const* p = range; try { - std::vector work; - static int const comma = -1; - static int const dash = -2; - - enum { st_top, - st_in_number, - st_after_number } state = st_top; - bool last_separator_was_dash = false; - int cur_number = 0; - bool from_end = false; - while (*p) - { - char ch = *p; - if (isdigit(ch)) - { - if (! ((state == st_top) || (state == st_in_number))) - { - throw std::runtime_error("digit not expected"); - } - state = st_in_number; - cur_number *= 10; - cur_number += (ch - '0'); - } - else if (ch == 'z') - { - // z represents max - if (! (state == st_top)) - { - throw std::runtime_error("z not expected"); - } - state = st_after_number; - cur_number = max; - } - else if (ch == 'r') - { - if (! (state == st_top)) - { - throw std::runtime_error("r not expected"); - } - state = st_in_number; - from_end = true; - } - else if ((ch == ',') || (ch == '-')) - { - if (! ((state == st_in_number) || (state == st_after_number))) - { - throw std::runtime_error("unexpected separator"); - } - cur_number = maybe_from_end(cur_number, from_end, max); - work.push_back(cur_number); - cur_number = 0; - from_end = false; - if (ch == ',') - { - state = st_top; - last_separator_was_dash = false; - work.push_back(comma); - } - else if (ch == '-') - { - if (last_separator_was_dash) - { - throw std::runtime_error("unexpected dash"); - } - state = st_top; - last_separator_was_dash = true; - work.push_back(dash); - } - } - else - { - throw std::runtime_error("unexpected character"); - } - ++p; - } - if ((state == st_in_number) || (state == st_after_number)) - { - cur_number = maybe_from_end(cur_number, from_end, max); - work.push_back(cur_number); - } - else - { - throw std::runtime_error("number expected"); - } - - p = 0; - for (size_t i = 0; i < work.size(); i += 2) - { - int num = work.at(i); - // max == 0 means we don't know the max and are just - // testing for valid syntax. - if ((max > 0) && ((num < 1) || (num > max))) - { - throw std::runtime_error( - "number " + QUtil::int_to_string(num) + " out of range"); - } - if (i == 0) - { - result.push_back(work.at(i)); - } - else - { - int separator = work.at(i-1); - if (separator == comma) - { - result.push_back(num); - } - else if (separator == dash) - { - int lastnum = result.back(); - if (num > lastnum) - { - for (int j = lastnum + 1; j <= num; ++j) - { - result.push_back(j); - } - } - else - { - for (int j = lastnum - 1; j >= num; --j) - { - result.push_back(j); - } - } - } - else - { - throw std::logic_error( - "INTERNAL ERROR parsing numeric range"); - } - } - } + return QUtil::parse_numrange(range, max); } - catch (std::runtime_error const& e) + catch (std::runtime_error& e) { if (throw_error) { - throw e; - } - if (p) - { - usage("error at * in numeric range " + - std::string(range, p - range) + "*" + p + ": " + e.what()); + throw(e); } else { - usage("error in numeric range " + - std::string(range) + ": " + e.what()); + usage(e.what()); } } - return result; + return std::vector(); } static void @@ -1213,25 +1058,6 @@ parse_pages_options( return result; } -static void test_numrange(char const* range) -{ - if (range == 0) - { - std::cout << "null" << std::endl; - } - else - { - std::vector result = parse_numrange(range, 15); - std::cout << "numeric range " << range << " ->"; - for (std::vector::iterator iter = result.begin(); - iter != result.end(); ++iter) - { - std::cout << " " << *iter; - } - std::cout << std::endl; - } -} - QPDFPageData::QPDFPageData(std::string const& filename, QPDF* qpdf, char const* range) : @@ -1429,14 +1255,7 @@ static void parse_options(int argc, char* argv[], Options& o) *parameter++ = 0; } - // Arguments that start with space are undocumented and - // are for use by the test suite. - if (strcmp(arg, " test-numrange") == 0) - { - test_numrange(parameter); - exit(0); - } - else if (strcmp(arg, "password") == 0) + if (strcmp(arg, "password") == 0) { if (parameter == 0) { diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 9b063cb7..a1077238 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -1319,63 +1319,6 @@ $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "minimal-rotated.pdf"}); -show_ntests(); -# ---------- -$td->notify("--- Numeric range parsing tests ---"); -my @nrange_tests = ( - [",5", - "qpdf: error at * in numeric range *,5: unexpected separator", - 2], - ["4,,5", - "qpdf: error at * in numeric range 4,*,5: unexpected separator", - 2], - ["4,5,", - "qpdf: error at * in numeric range 4,5,*: number expected", - 2], - ["z1,", - "qpdf: error at * in numeric range z*1,: digit not expected", - 2], - ["1z,", - "qpdf: error at * in numeric range 1*z,: z not expected", - 2], - ["1-5?", - "qpdf: error at * in numeric range 1-5*?: unexpected character", - 2], - ["1-30", - "qpdf: error in numeric range 1-30: number 30 out of range", - 2], - ["1-10,0,5", - "qpdf: error in numeric range 1-10,0,5: number 0 out of range", - 2], - ["1-10,1234,5", - "qpdf: error in numeric range 1-10,1234,5: number 1234 out of range", - 2], - ["1,r,3", - "qpdf: error in numeric range 1,r,3: number 16 out of range", - 2], - ["1,r16,3", - "qpdf: error in numeric range 1,r16,3: number 0 out of range", - 2], - ["1,3,5-10,z-13,13,9,z,2,r2-r4", - "numeric range 1,3,5-10,z-13,13,9,z,2,r2-r4" . - " -> 1 3 5 6 7 8 9 10 15 14 13 13 9 15 2 14 13 12", - 0], - ["r1-r15", # r\d+ at end - "numeric range r1-r15" . - " -> 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1", - 0], - ); -$n_tests += scalar(@nrange_tests); -foreach my $d (@nrange_tests) -{ - my ($range, $output, $status) = @$d; - $td->runtest("numeric range $range", - {$td->COMMAND => ['qpdf', '-- test-numrange=' . $range], - $td->FILTER => "grep 'numeric range'"}, - {$td->STRING => $output . "\n", $td->EXIT_STATUS => $status}, - $td->NORMALIZE_NEWLINES); -} - show_ntests(); # ---------- $td->notify("--- Merging and Splitting ---");