mirror of
https://github.com/qpdf/qpdf.git
synced 2024-10-31 19:02:30 +00:00
Completely rewrite QUtil::parse_numrange
This commit is contained in:
parent
d9b8b0aae1
commit
708ea4ef43
192
libqpdf/QUtil.cc
192
libqpdf/QUtil.cc
@ -9,15 +9,12 @@
|
|||||||
#include <qpdf/QPDFSystemError.hh>
|
#include <qpdf/QPDFSystemError.hh>
|
||||||
#include <qpdf/QTC.hh>
|
#include <qpdf/QTC.hh>
|
||||||
|
|
||||||
#include <cctype>
|
|
||||||
#include <cerrno>
|
#include <cerrno>
|
||||||
#include <cstdio>
|
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <locale>
|
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <regex>
|
#include <regex>
|
||||||
@ -1303,93 +1300,52 @@ QUtil::str_compare_nocase(char const* s1, char const* s2)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
|
||||||
maybe_from_end(int num, bool from_end, int max)
|
|
||||||
{
|
|
||||||
if (from_end) {
|
|
||||||
if (num > max) {
|
|
||||||
num = 0;
|
|
||||||
} else {
|
|
||||||
num = max + 1 - num;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return num;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<int>
|
std::vector<int>
|
||||||
QUtil::parse_numrange(char const* range, int max)
|
QUtil::parse_numrange(char const* range, int max)
|
||||||
{
|
{
|
||||||
std::vector<int> result;
|
static std::regex group_re(R"((x)?(z|r?\d+)(?:-(z|r?\d+))?)");
|
||||||
char const* p = range;
|
auto parse_num = [&max](std::string const& s) -> int {
|
||||||
|
if (s == "z") {
|
||||||
|
return max;
|
||||||
|
}
|
||||||
|
int num;
|
||||||
|
if (s.at(0) == 'r') {
|
||||||
|
num = max + 1 - string_to_int(s.substr(1).c_str());
|
||||||
|
} else {
|
||||||
|
num = string_to_int(s.c_str());
|
||||||
|
}
|
||||||
|
// max == 0 means we don't know the max and are just testing for valid syntax.
|
||||||
|
if ((max > 0) && ((num < 1) || (num > max))) {
|
||||||
|
throw std::runtime_error("number " + std::to_string(num) + " out of range");
|
||||||
|
}
|
||||||
|
return num;
|
||||||
|
};
|
||||||
|
|
||||||
|
auto populate = [](std::vector<int>& group, int first_num, bool is_span, int last_num) {
|
||||||
|
group.clear();
|
||||||
|
group.emplace_back(first_num);
|
||||||
|
if (is_span) {
|
||||||
|
if (first_num > last_num) {
|
||||||
|
for (auto i = first_num - 1; i >= last_num; --i) {
|
||||||
|
group.push_back(i);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (auto i = first_num + 1; i <= last_num; ++i) {
|
||||||
|
group.push_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
char const* p;
|
||||||
try {
|
try {
|
||||||
std::vector<int> work;
|
char const* range_end = range + strlen(range);
|
||||||
static int const comma = -1;
|
std::vector<int> result;
|
||||||
static int const dash = -2;
|
std::vector<int> last_group;
|
||||||
|
// See if range ends with :even or :odd.
|
||||||
size_t start_idx = 0;
|
size_t start_idx = 0;
|
||||||
size_t skip = 1;
|
size_t skip = 1;
|
||||||
|
p = std::find(range, range_end, ':');
|
||||||
enum { st_top, st_in_number, st_after_number } state = st_top;
|
|
||||||
bool last_separator_was_dash = false;
|
|
||||||
int cur_number = 0;
|
|
||||||
bool from_end = false;
|
|
||||||
while (*p) {
|
|
||||||
char ch = *p;
|
|
||||||
if (isdigit(ch)) {
|
|
||||||
if (!((state == st_top) || (state == st_in_number))) {
|
|
||||||
throw std::runtime_error("digit not expected");
|
|
||||||
}
|
|
||||||
state = st_in_number;
|
|
||||||
cur_number *= 10;
|
|
||||||
cur_number += (ch - '0');
|
|
||||||
} else if (ch == 'z') {
|
|
||||||
// z represents max
|
|
||||||
if (!(state == st_top)) {
|
|
||||||
throw std::runtime_error("z not expected");
|
|
||||||
}
|
|
||||||
state = st_after_number;
|
|
||||||
cur_number = max;
|
|
||||||
} else if (ch == 'r') {
|
|
||||||
if (!(state == st_top)) {
|
|
||||||
throw std::runtime_error("r not expected");
|
|
||||||
}
|
|
||||||
state = st_in_number;
|
|
||||||
from_end = true;
|
|
||||||
} else if ((ch == ',') || (ch == '-')) {
|
|
||||||
if (!((state == st_in_number) || (state == st_after_number))) {
|
|
||||||
throw std::runtime_error("unexpected separator");
|
|
||||||
}
|
|
||||||
cur_number = maybe_from_end(cur_number, from_end, max);
|
|
||||||
work.push_back(cur_number);
|
|
||||||
cur_number = 0;
|
|
||||||
from_end = false;
|
|
||||||
if (ch == ',') {
|
|
||||||
state = st_top;
|
|
||||||
last_separator_was_dash = false;
|
|
||||||
work.push_back(comma);
|
|
||||||
} else if (ch == '-') {
|
|
||||||
if (last_separator_was_dash) {
|
|
||||||
throw std::runtime_error("unexpected dash");
|
|
||||||
}
|
|
||||||
state = st_top;
|
|
||||||
last_separator_was_dash = true;
|
|
||||||
work.push_back(dash);
|
|
||||||
}
|
|
||||||
} else if (ch == ':') {
|
|
||||||
if (!((state == st_in_number) || (state == st_after_number))) {
|
|
||||||
throw std::runtime_error("unexpected colon");
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
throw std::runtime_error("unexpected character");
|
|
||||||
}
|
|
||||||
++p;
|
|
||||||
}
|
|
||||||
if ((state == st_in_number) || (state == st_after_number)) {
|
|
||||||
cur_number = maybe_from_end(cur_number, from_end, max);
|
|
||||||
work.push_back(cur_number);
|
|
||||||
} else {
|
|
||||||
throw std::runtime_error("number expected");
|
|
||||||
}
|
|
||||||
if (*p == ':') {
|
if (*p == ':') {
|
||||||
if (strcmp(p, ":odd") == 0) {
|
if (strcmp(p, ":odd") == 0) {
|
||||||
skip = 2;
|
skip = 2;
|
||||||
@ -1397,46 +1353,55 @@ QUtil::parse_numrange(char const* range, int max)
|
|||||||
skip = 2;
|
skip = 2;
|
||||||
start_idx = 1;
|
start_idx = 1;
|
||||||
} else {
|
} else {
|
||||||
throw std::runtime_error("unexpected even/odd modifier");
|
throw std::runtime_error("expected :even or :odd");
|
||||||
}
|
}
|
||||||
|
range_end = p;
|
||||||
}
|
}
|
||||||
|
|
||||||
p = nullptr;
|
// Divide the range into groups
|
||||||
for (size_t i = 0; i < work.size(); i += 2) {
|
p = range;
|
||||||
int num = work.at(i);
|
char const* group_end;
|
||||||
// max == 0 means we don't know the max and are just testing for valid syntax.
|
bool first = true;
|
||||||
if ((max > 0) && ((num < 1) || (num > max))) {
|
while (p != range_end) {
|
||||||
throw std::runtime_error("number " + QUtil::int_to_string(num) + " out of range");
|
group_end = std::find(p, range_end, ',');
|
||||||
|
std::cmatch m;
|
||||||
|
if (!std::regex_match(p, group_end, m, group_re)) {
|
||||||
|
throw std::runtime_error("invalid range syntax");
|
||||||
}
|
}
|
||||||
if (i == 0) {
|
auto is_exclude = m[1].matched;
|
||||||
result.push_back(work.at(i));
|
if (first && is_exclude) {
|
||||||
|
throw std::runtime_error("first range group may not be an exclusion");
|
||||||
|
}
|
||||||
|
first = false;
|
||||||
|
auto first_num = parse_num(m[2].str());
|
||||||
|
auto is_span = m[3].matched;
|
||||||
|
int last_num;
|
||||||
|
if (is_span) {
|
||||||
|
last_num = parse_num(m[3].str());
|
||||||
|
}
|
||||||
|
if (is_exclude) {
|
||||||
|
// XXX
|
||||||
} else {
|
} else {
|
||||||
int separator = work.at(i - 1);
|
result.insert(result.end(), last_group.begin(), last_group.end());
|
||||||
if (separator == comma) {
|
populate(last_group, first_num, is_span, last_num);
|
||||||
result.push_back(num);
|
|
||||||
} else if (separator == dash) {
|
|
||||||
int lastnum = result.back();
|
|
||||||
if (num > lastnum) {
|
|
||||||
for (int j = lastnum + 1; j <= num; ++j) {
|
|
||||||
result.push_back(j);
|
|
||||||
}
|
}
|
||||||
} else {
|
p = group_end;
|
||||||
for (int j = lastnum - 1; j >= num; --j) {
|
if (*p == ',') {
|
||||||
result.push_back(j);
|
++p;
|
||||||
}
|
if (p == range_end) {
|
||||||
}
|
throw std::runtime_error("trailing comma");
|
||||||
} else {
|
|
||||||
throw std::logic_error("INTERNAL ERROR parsing numeric range");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ((start_idx > 0) || (skip != 1)) {
|
result.insert(result.end(), last_group.begin(), last_group.end());
|
||||||
auto t = result;
|
if (skip == 1) {
|
||||||
result.clear();
|
return result;
|
||||||
for (size_t i = start_idx; i < t.size(); i += skip) {
|
|
||||||
result.push_back(t.at(i));
|
|
||||||
}
|
}
|
||||||
|
std::vector<int> filtered;
|
||||||
|
for (auto i = start_idx; i < result.size(); i += skip) {
|
||||||
|
filtered.emplace_back(result.at(i));
|
||||||
}
|
}
|
||||||
|
return filtered;
|
||||||
} catch (std::runtime_error const& e) {
|
} catch (std::runtime_error const& e) {
|
||||||
std::string message;
|
std::string message;
|
||||||
if (p) {
|
if (p) {
|
||||||
@ -1447,7 +1412,6 @@ QUtil::parse_numrange(char const* range, int max)
|
|||||||
}
|
}
|
||||||
throw std::runtime_error(message);
|
throw std::runtime_error(message);
|
||||||
}
|
}
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
enum encoding_e { e_utf16, e_ascii, e_winansi, e_macroman, e_pdfdoc };
|
enum encoding_e { e_utf16, e_ascii, e_winansi, e_macroman, e_pdfdoc };
|
||||||
|
@ -9,37 +9,37 @@ my $td = new TestDriver('numrange');
|
|||||||
|
|
||||||
my @nrange_tests = (
|
my @nrange_tests = (
|
||||||
[",5",
|
[",5",
|
||||||
"error at * in numeric range *,5: unexpected separator",
|
"error at * in numeric range *,5: invalid range syntax",
|
||||||
2],
|
2],
|
||||||
["4,,5",
|
["4,,5",
|
||||||
"error at * in numeric range 4,*,5: unexpected separator",
|
"error at * in numeric range 4,*,5: invalid range syntax",
|
||||||
2],
|
2],
|
||||||
["4,5,",
|
["4,5,",
|
||||||
"error at * in numeric range 4,5,*: number expected",
|
"error at * in numeric range 4,5,*: trailing comma",
|
||||||
2],
|
2],
|
||||||
["z1,",
|
["z1,",
|
||||||
"error at * in numeric range z*1,: digit not expected",
|
"error at * in numeric range *z1,: invalid range syntax",
|
||||||
2],
|
2],
|
||||||
["1z,",
|
["1z,",
|
||||||
"error at * in numeric range 1*z,: z not expected",
|
"error at * in numeric range *1z,: invalid range syntax",
|
||||||
2],
|
2],
|
||||||
["1-5?",
|
["1-5?",
|
||||||
"error at * in numeric range 1-5*?: unexpected character",
|
"error at * in numeric range *1-5?: invalid range syntax",
|
||||||
2],
|
2],
|
||||||
["1-30",
|
["1-30",
|
||||||
"error in numeric range 1-30: number 30 out of range",
|
"error at * in numeric range *1-30: number 30 out of range",
|
||||||
2],
|
2],
|
||||||
["1-10,0,5",
|
["1-10,0,5",
|
||||||
"error in numeric range 1-10,0,5: number 0 out of range",
|
"error at * in numeric range 1-10,*0,5: number 0 out of range",
|
||||||
2],
|
2],
|
||||||
["1-10,1234,5",
|
["1-10,1234,5",
|
||||||
"error in numeric range 1-10,1234,5: number 1234 out of range",
|
"error at * in numeric range 1-10,*1234,5: number 1234 out of range",
|
||||||
2],
|
2],
|
||||||
["1,r,3",
|
["1,r,3",
|
||||||
"error in numeric range 1,r,3: number 16 out of range",
|
"error at * in numeric range 1,*r,3: invalid range syntax",
|
||||||
2],
|
2],
|
||||||
["1,r16,3",
|
["1,r16,3",
|
||||||
"error in numeric range 1,r16,3: number 0 out of range",
|
"error at * in numeric range 1,*r16,3: number 0 out of range",
|
||||||
2],
|
2],
|
||||||
["1,3,5-10,z-13,13,9,z,2,r2-r4",
|
["1,3,5-10,z-13,13,9,z,2,r2-r4",
|
||||||
"numeric range 1,3,5-10,z-13,13,9,z,2,r2-r4" .
|
"numeric range 1,3,5-10,z-13,13,9,z,2,r2-r4" .
|
||||||
@ -50,16 +50,16 @@ my @nrange_tests = (
|
|||||||
" -> 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1",
|
" -> 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1",
|
||||||
0],
|
0],
|
||||||
["1-10:quack",
|
["1-10:quack",
|
||||||
"error at * in numeric range 1-10*:quack: unexpected even/odd modifier",
|
"error at * in numeric range 1-10*:quack: expected :even or :odd",
|
||||||
2],
|
2],
|
||||||
["1-10:",
|
["1-10:",
|
||||||
"error at * in numeric range 1-10*:: unexpected even/odd modifier",
|
"error at * in numeric range 1-10*:: expected :even or :odd",
|
||||||
2],
|
2],
|
||||||
["1-10,r:",
|
["1-10,r:",
|
||||||
"error at * in numeric range 1-10,r*:: unexpected even/odd modifier",
|
"error at * in numeric range 1-10,r*:: expected :even or :odd",
|
||||||
2],
|
2],
|
||||||
["1-10,:",
|
["1-10,:",
|
||||||
"error at * in numeric range 1-10,*:: unexpected colon",
|
"error at * in numeric range 1-10,*:: expected :even or :odd",
|
||||||
2],
|
2],
|
||||||
["1-6,8-12:odd",
|
["1-6,8-12:odd",
|
||||||
"numeric range 1-6,8-12:odd -> 1 3 5 8 10 12",
|
"numeric range 1-6,8-12:odd -> 1 3 5 8 10 12",
|
||||||
|
Loading…
Reference in New Issue
Block a user