From 137dc7acb9f46dfe40b73dd0079bf130eb6981e0 Mon Sep 17 00:00:00 2001
From: Jay Berkenbilt <ejb@ql.org>
Date: Sat, 11 Aug 2012 09:22:59 -0400
Subject: [PATCH] Refactor: move resolution of literal to its own method

---
 include/qpdf/QPDFTokenizer.hh |   1 +
 libqpdf/QPDFTokenizer.cc      | 158 ++++++++++++++++++----------------
 2 files changed, 83 insertions(+), 76 deletions(-)

diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh
index e888c113..6b385b4d 100644
--- a/include/qpdf/QPDFTokenizer.hh
+++ b/include/qpdf/QPDFTokenizer.hh
@@ -133,6 +133,7 @@ class QPDFTokenizer
 
   private:
     void reset();
+    void resolveLiteral();
 
     // Lexer state
     enum { st_top, st_in_comment, st_in_string, st_lt, st_gt,
diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc
index 979a79bf..78ab1551 100644
--- a/libqpdf/QPDFTokenizer.cc
+++ b/libqpdf/QPDFTokenizer.cc
@@ -50,10 +50,90 @@ QPDFTokenizer::reset()
 }
 
 void
-QPDFTokenizer::presentCharacter(char ch)
+QPDFTokenizer::resolveLiteral()
 {
     PCRE num_re("^[\\+\\-]?(?:\\.\\d+|\\d+(?:\\.\\d+)?)$");
 
+    if ((val.length() > 0) && (val[0] == '/'))
+    {
+        type = tt_name;
+        // Deal with # in name token.  Note: '/' by itself is a
+        // valid name, so don't strip leading /.  That way we
+        // don't have to deal with the empty string as a name.
+        std::string nval = "/";
+        char const* valstr = val.c_str() + 1;
+        for (char const* p = valstr; *p; ++p)
+        {
+            if ((*p == '#') && this->pound_special_in_name)
+            {
+                if (p[1] && p[2] &&
+                    is_hex_digit(p[1]) && is_hex_digit(p[2]))
+                {
+                    char num[3];
+                    num[0] = p[1];
+                    num[1] = p[2];
+                    num[2] = '\0';
+                    char ch = (char)(strtol(num, 0, 16));
+                    if (ch == '\0')
+                    {
+                        type = tt_bad;
+                        QTC::TC("qpdf", "QPDF_Tokenizer null in name");
+                        error_message =
+                            "null character not allowed in name token";
+                        nval += "#00";
+                    }
+                    else
+                    {
+                        nval += ch;
+                    }
+                    p += 2;
+                }
+                else
+                {
+                    QTC::TC("qpdf", "QPDF_Tokenizer bad name");
+                    type = tt_bad;
+                    error_message = "invalid name token";
+                    nval += *p;
+                }
+            }
+            else
+            {
+                nval += *p;
+            }
+        }
+        val = nval;
+    }
+    else if (num_re.match(val.c_str()))
+    {
+        if (val.find('.') != std::string::npos)
+        {
+            type = tt_real;
+        }
+        else
+        {
+            type = tt_integer;
+        }
+    }
+    else if ((val == "true") || (val == "false"))
+    {
+        type = tt_bool;
+    }
+    else if (val == "null")
+    {
+        type = tt_null;
+    }
+    else
+    {
+        // I don't really know what it is, so leave it as tt_word.
+        // Lots of cases ($, #, etc.) other than actual words fall
+        // into this category, but that's okay at least for now.
+        type = tt_word;
+    }
+}
+
+void
+QPDFTokenizer::presentCharacter(char ch)
+{
     if (state == st_token_ready)
     {
 	throw std::logic_error(
@@ -342,81 +422,7 @@ QPDFTokenizer::presentCharacter(char ch)
 
     if ((state == st_token_ready) && (type == tt_word))
     {
-	if ((val.length() > 0) && (val[0] == '/'))
-	{
-	    type = tt_name;
-	    // Deal with # in name token.  Note: '/' by itself is a
-	    // valid name, so don't strip leading /.  That way we
-	    // don't have to deal with the empty string as a name.
-	    std::string nval = "/";
-	    char const* valstr = val.c_str() + 1;
-	    for (char const* p = valstr; *p; ++p)
-	    {
-		if ((*p == '#') && this->pound_special_in_name)
-		{
-		    if (p[1] && p[2] &&
-			is_hex_digit(p[1]) && is_hex_digit(p[2]))
-		    {
-			char num[3];
-			num[0] = p[1];
-			num[1] = p[2];
-			num[2] = '\0';
-			char ch = (char)(strtol(num, 0, 16));
-			if (ch == '\0')
-			{
-			    type = tt_bad;
-			    QTC::TC("qpdf", "QPDF_Tokenizer null in name");
-			    error_message =
-				"null character not allowed in name token";
-			    nval += "#00";
-			}
-			else
-			{
-			    nval += ch;
-			}
-			p += 2;
-		    }
-		    else
-		    {
-			QTC::TC("qpdf", "QPDF_Tokenizer bad name");
-			type = tt_bad;
-			error_message = "invalid name token";
-			nval += *p;
-		    }
-		}
-		else
-		{
-		    nval += *p;
-		}
-	    }
-	    val = nval;
-	}
-	else if (num_re.match(val.c_str()))
-	{
-	    if (val.find('.') != std::string::npos)
-	    {
-		type = tt_real;
-	    }
-	    else
-	    {
-		type = tt_integer;
-	    }
-	}
-	else if ((val == "true") || (val == "false"))
-	{
-	    type = tt_bool;
-	}
-	else if (val == "null")
-	{
-	    type = tt_null;
-	}
-	else
-	{
-	    // I don't really know what it is, so leave it as tt_word.
-	    // Lots of cases ($, #, etc.) other than actual words fall
-	    // into this category, but that's okay at least for now.
-	    type = tt_word;
-	}
+        resolveLiteral();
     }
 
     if (! (betweenTokens() || ((state == st_token_ready) && unread_char)))