qpdf/libqpdf/Pl_QPDFTokenizer.cc

#include <qpdf/Pl_QPDFTokenizer.hh>
#include <qpdf/QPDF_String.hh>
#include <qpdf/QPDF_Name.hh>
#include <stdexcept>
#include <string.h>

Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, Pipeline* next) :
    Pipeline(identifier, next),
    newline_after_next_token(false),
    just_wrote_nl(false),
    last_char_was_cr(false),
    unread_char(false),
    char_to_unread('\0'),
    pass_through(false)
{
}

Pl_QPDFTokenizer::~Pl_QPDFTokenizer()
{
}

void
Pl_QPDFTokenizer::writeNext(char const* buf, int len)
{
    if (len)
    {
	unsigned char* t = new unsigned char[len];
	memcpy(t, buf, len);
	getNext()->write(t, len);
	delete [] t;
	this->just_wrote_nl = (buf[len-1] == '\n');
    }
}

void
Pl_QPDFTokenizer::writeToken(QPDFTokenizer::Token& token)
{
    std::string value = token.getRawValue();

    switch (token.getType())
    {
      case QPDFTokenizer::tt_string:
	value = QPDF_String(token.getValue()).unparse();
	break;

      case QPDFTokenizer::tt_name:
	value = QPDF_Name(token.getValue()).unparse();
	break;

      default:
	break;
    }
    writeNext(value.c_str(), value.length());
}

void
Pl_QPDFTokenizer::processChar(char ch)
{
    if (this->pass_through)
    {
	// We're not normalizing anymore -- just write this without
	// looking at it.
	writeNext(&ch, 1);
	return;
    }

    tokenizer.presentCharacter(ch);
    QPDFTokenizer::Token token;
    if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
    {
	writeToken(token);
	if (this->newline_after_next_token)
	{
	    writeNext("\n", 1);
	    this->newline_after_next_token = false;
	}
	if ((token.getType() == QPDFTokenizer::tt_word) &&
	    (token.getValue() == "BI"))
	{
	    // Uh oh.... we're not sophisticated enough to handle
	    // inline images safely.  We'd have to to set up all the
	    // filters and pipe the image data through it until the
	    // filtered output was the right size for an image of the
	    // specified dimensions.  Then we'd either have to write
	    // out raw image data or continue to write filtered data,
	    // resuming normalization when we get to the end.
	    // Instead, for now, we'll just turn off normalization for
	    // the remainder of this stream.
	    this->pass_through = true;
	    if (this->unread_char)
	    {
		writeNext(&this->char_to_unread, 1);
		this->unread_char = false;
	    }
	}
    }
    else
    {
	bool suppress = false;
	if ((ch == '\n') && (this->last_char_was_cr))
	{
	    // Always ignore \n following \r
	    suppress = true;
	}

	if ((this->last_char_was_cr = (ch == '\r')))
	{
	    ch = '\n';
	}

	if (this->tokenizer.betweenTokens())
	{
	    if (! suppress)
	    {
		writeNext(&ch, 1);
	    }
	}
	else
	{
	    if (ch == '\n')
	    {
		this->newline_after_next_token = true;
	    }
	}
    }
}


void
Pl_QPDFTokenizer::checkUnread()
{
    if (this->unread_char)
    {
	processChar(this->char_to_unread);
	if (this->unread_char)
	{
	    throw std::logic_error(
		"INTERNAL ERROR: unread_char still true after processing "
		"unread character");
	}
    }
}

void
Pl_QPDFTokenizer::write(unsigned char* buf, int len)
{
    checkUnread();
    for (int i = 0; i < len; ++i)
    {
	processChar(buf[i]);
	checkUnread();
    }
}

void
Pl_QPDFTokenizer::finish()
{
    this->tokenizer.presentEOF();
    if (! this->pass_through)
    {
	QPDFTokenizer::Token token;
	if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
	{
	    writeToken(token);
	    if (unread_char)
	    {
		if (this->char_to_unread == '\r')
		{
		    this->char_to_unread = '\n';
		}
		writeNext(&this->char_to_unread, 1);
	    }
	}
    }
    if (! this->just_wrote_nl)
    {
	writeNext("\n", 1);
    }

    getNext()->finish();
}
update release date to actual date git-svn-id: svn+q:///qpdf/trunk@599 71b93d88-0707-0410-a8cf-f5a4172ac649 2008-04-29 12:55:25 +00:00			`#include <qpdf/Pl_QPDFTokenizer.hh>`
			`#include <qpdf/QPDF_String.hh>`
			`#include <qpdf/QPDF_Name.hh>`
removed qexc; non-compatible ABI change git-svn-id: svn+q:///qpdf/trunk@709 71b93d88-0707-0410-a8cf-f5a4172ac649 2009-09-26 18:36:04 +00:00			`#include <stdexcept>`
missing header files for gcc 4.3 git-svn-id: svn+q:///qpdf/trunk@607 71b93d88-0707-0410-a8cf-f5a4172ac649 2008-05-04 16:02:53 +00:00			`#include <string.h>`
update release date to actual date git-svn-id: svn+q:///qpdf/trunk@599 71b93d88-0707-0410-a8cf-f5a4172ac649 2008-04-29 12:55:25 +00:00
			`Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, Pipeline* next) :`
			`Pipeline(identifier, next),`
			`newline_after_next_token(false),`
			`just_wrote_nl(false),`
			`last_char_was_cr(false),`
			`unread_char(false),`
			`char_to_unread('\0'),`
			`pass_through(false)`
			`{`
			`}`

			`Pl_QPDFTokenizer::~Pl_QPDFTokenizer()`
			`{`
			`}`

			`void`
			`Pl_QPDFTokenizer::writeNext(char const* buf, int len)`
			`{`
			`if (len)`
			`{`
			`unsigned char* t = new unsigned char[len];`
			`memcpy(t, buf, len);`
			`getNext()->write(t, len);`
			`delete [] t;`
			`this->just_wrote_nl = (buf[len-1] == '\n');`
			`}`
			`}`

			`void`
			`Pl_QPDFTokenizer::writeToken(QPDFTokenizer::Token& token)`
			`{`
			`std::string value = token.getRawValue();`

			`switch (token.getType())`
			`{`
			`case QPDFTokenizer::tt_string:`
			`value = QPDF_String(token.getValue()).unparse();`
			`break;`

			`case QPDFTokenizer::tt_name:`
			`value = QPDF_Name(token.getValue()).unparse();`
			`break;`

			`default:`
			`break;`
			`}`
			`writeNext(value.c_str(), value.length());`
			`}`

			`void`
			`Pl_QPDFTokenizer::processChar(char ch)`
			`{`
			`if (this->pass_through)`
			`{`
fix many typos in comments and strings git-svn-id: svn+q:///qpdf/trunk@651 71b93d88-0707-0410-a8cf-f5a4172ac649 2009-02-21 02:54:31 +00:00			`// We're not normalizing anymore -- just write this without`
update release date to actual date git-svn-id: svn+q:///qpdf/trunk@599 71b93d88-0707-0410-a8cf-f5a4172ac649 2008-04-29 12:55:25 +00:00			`// looking at it.`
			`writeNext(&ch, 1);`
			`return;`
			`}`

			`tokenizer.presentCharacter(ch);`
			`QPDFTokenizer::Token token;`
			`if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))`
			`{`
			`writeToken(token);`
			`if (this->newline_after_next_token)`
			`{`
			`writeNext("\n", 1);`
			`this->newline_after_next_token = false;`
			`}`
			`if ((token.getType() == QPDFTokenizer::tt_word) &&`
			`(token.getValue() == "BI"))`
			`{`
			`// Uh oh.... we're not sophisticated enough to handle`
			`// inline images safely. We'd have to to set up all the`
fix many typos in comments and strings git-svn-id: svn+q:///qpdf/trunk@651 71b93d88-0707-0410-a8cf-f5a4172ac649 2009-02-21 02:54:31 +00:00			`// filters and pipe the image data through it until the`
update release date to actual date git-svn-id: svn+q:///qpdf/trunk@599 71b93d88-0707-0410-a8cf-f5a4172ac649 2008-04-29 12:55:25 +00:00			`// filtered output was the right size for an image of the`
			`// specified dimensions. Then we'd either have to write`
			`// out raw image data or continue to write filtered data,`
			`// resuming normalization when we get to the end.`
fix many typos in comments and strings git-svn-id: svn+q:///qpdf/trunk@651 71b93d88-0707-0410-a8cf-f5a4172ac649 2009-02-21 02:54:31 +00:00			`// Instead, for now, we'll just turn off normalization for`
update release date to actual date git-svn-id: svn+q:///qpdf/trunk@599 71b93d88-0707-0410-a8cf-f5a4172ac649 2008-04-29 12:55:25 +00:00			`// the remainder of this stream.`
			`this->pass_through = true;`
			`if (this->unread_char)`
			`{`
			`writeNext(&this->char_to_unread, 1);`
			`this->unread_char = false;`
			`}`
			`}`
			`}`
			`else`
			`{`
			`bool suppress = false;`
			`if ((ch == '\n') && (this->last_char_was_cr))`
			`{`
			`// Always ignore \n following \r`
			`suppress = true;`
			`}`

			`if ((this->last_char_was_cr = (ch == '\r')))`
			`{`
			`ch = '\n';`
			`}`

			`if (this->tokenizer.betweenTokens())`
			`{`
			`if (! suppress)`
			`{`
			`writeNext(&ch, 1);`
			`}`
			`}`
			`else`
			`{`
			`if (ch == '\n')`
			`{`
			`this->newline_after_next_token = true;`
			`}`
			`}`
			`}`
			`}`


			`void`
			`Pl_QPDFTokenizer::checkUnread()`
			`{`
			`if (this->unread_char)`
			`{`
			`processChar(this->char_to_unread);`
			`if (this->unread_char)`
			`{`
removed qexc; non-compatible ABI change git-svn-id: svn+q:///qpdf/trunk@709 71b93d88-0707-0410-a8cf-f5a4172ac649 2009-09-26 18:36:04 +00:00			`throw std::logic_error(`
			`"INTERNAL ERROR: unread_char still true after processing "`
			`"unread character");`
update release date to actual date git-svn-id: svn+q:///qpdf/trunk@599 71b93d88-0707-0410-a8cf-f5a4172ac649 2008-04-29 12:55:25 +00:00			`}`
			`}`
			`}`

			`void`
			`Pl_QPDFTokenizer::write(unsigned char* buf, int len)`
			`{`
			`checkUnread();`
			`for (int i = 0; i < len; ++i)`
			`{`
			`processChar(buf[i]);`
			`checkUnread();`
			`}`
			`}`

			`void`
			`Pl_QPDFTokenizer::finish()`
			`{`
			`this->tokenizer.presentEOF();`
			`if (! this->pass_through)`
			`{`
			`QPDFTokenizer::Token token;`
			`if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))`
			`{`
			`writeToken(token);`
			`if (unread_char)`
			`{`
			`if (this->char_to_unread == '\r')`
			`{`
			`this->char_to_unread = '\n';`
			`}`
			`writeNext(&this->char_to_unread, 1);`
			`}`
			`}`
			`}`
			`if (! this->just_wrote_nl)`
			`{`
			`writeNext("\n", 1);`
			`}`

			`getNext()->finish();`
			`}`