Release of v5.1.1-alpha1

Move all banners to GitHub. Adds library phpspreadsheet to JCB. Adds import item example to demo component. Updates the Superpower class with the GetRemote class in the plugin. Ensures the super power autoloader triggers the correct repositories.
This commit is contained in:
2025-03-04 21:50:18 +00:00
parent 442263e387
commit 06185f8c3a
1141 changed files with 193033 additions and 158 deletions

View File

@ -0,0 +1,30 @@
<?php
/**
* Composite strategy that runs multiple strategies on tokens.
*/
abstract class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy
{
/**
* List of strategies to run tokens through.
* @type HTMLPurifier_Strategy[]
*/
protected $strategies = array();
/**
* @param HTMLPurifier_Token[] $tokens
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return HTMLPurifier_Token[]
*/
public function execute($tokens, $config, $context)
{
foreach ($this->strategies as $strategy) {
$tokens = $strategy->execute($tokens, $config, $context);
}
return $tokens;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,17 @@
<?php
/**
* Core strategy composed of the big four strategies.
*/
class HTMLPurifier_Strategy_Core extends HTMLPurifier_Strategy_Composite
{
public function __construct()
{
$this->strategies[] = new HTMLPurifier_Strategy_RemoveForeignElements();
$this->strategies[] = new HTMLPurifier_Strategy_MakeWellFormed();
$this->strategies[] = new HTMLPurifier_Strategy_FixNesting();
$this->strategies[] = new HTMLPurifier_Strategy_ValidateAttributes();
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,181 @@
<?php
/**
* Takes a well formed list of tokens and fixes their nesting.
*
* HTML elements dictate which elements are allowed to be their children,
* for example, you can't have a p tag in a span tag. Other elements have
* much more rigorous definitions: tables, for instance, require a specific
* order for their elements. There are also constraints not expressible by
* document type definitions, such as the chameleon nature of ins/del
* tags and global child exclusions.
*
* The first major objective of this strategy is to iterate through all
* the nodes and determine whether or not their children conform to the
* element's definition. If they do not, the child definition may
* optionally supply an amended list of elements that is valid or
* require that the entire node be deleted (and the previous node
* rescanned).
*
* The second objective is to ensure that explicitly excluded elements of
* an element do not appear in its children. Code that accomplishes this
* task is pervasive through the strategy, though the two are distinct tasks
* and could, theoretically, be seperated (although it's not recommended).
*
* @note Whether or not unrecognized children are silently dropped or
* translated into text depends on the child definitions.
*
* @todo Enable nodes to be bubbled out of the structure. This is
* easier with our new algorithm.
*/
class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
{
/**
* @param HTMLPurifier_Token[] $tokens
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return array|HTMLPurifier_Token[]
*/
public function execute($tokens, $config, $context)
{
//####################################################################//
// Pre-processing
// O(n) pass to convert to a tree, so that we can efficiently
// refer to substrings
$top_node = HTMLPurifier_Arborize::arborize($tokens, $config, $context);
// get a copy of the HTML definition
$definition = $config->getHTMLDefinition();
$excludes_enabled = !$config->get('Core.DisableExcludes');
// setup the context variable 'IsInline', for chameleon processing
// is 'false' when we are not inline, 'true' when it must always
// be inline, and an integer when it is inline for a certain
// branch of the document tree
$is_inline = $definition->info_parent_def->descendants_are_inline;
$context->register('IsInline', $is_inline);
// setup error collector
$e =& $context->get('ErrorCollector', true);
//####################################################################//
// Loop initialization
// stack that contains all elements that are excluded
// it is organized by parent elements, similar to $stack,
// but it is only populated when an element with exclusions is
// processed, i.e. there won't be empty exclusions.
$exclude_stack = array($definition->info_parent_def->excludes);
// variable that contains the start token while we are processing
// nodes. This enables error reporting to do its job
$node = $top_node;
// dummy token
list($token, $d) = $node->toTokenPair();
$context->register('CurrentNode', $node);
$context->register('CurrentToken', $token);
//####################################################################//
// Loop
// We need to implement a post-order traversal iteratively, to
// avoid running into stack space limits. This is pretty tricky
// to reason about, so we just manually stack-ify the recursive
// variant:
//
// function f($node) {
// foreach ($node->children as $child) {
// f($child);
// }
// validate($node);
// }
//
// Thus, we will represent a stack frame as array($node,
// $is_inline, stack of children)
// e.g. array_reverse($node->children) - already processed
// children.
$parent_def = $definition->info_parent_def;
$stack = array(
array($top_node,
$parent_def->descendants_are_inline,
$parent_def->excludes, // exclusions
0)
);
while (!empty($stack)) {
list($node, $is_inline, $excludes, $ix) = array_pop($stack);
// recursive call
$go = false;
$def = empty($stack) ? $definition->info_parent_def : $definition->info[$node->name];
while (isset($node->children[$ix])) {
$child = $node->children[$ix++];
if ($child instanceof HTMLPurifier_Node_Element) {
$go = true;
$stack[] = array($node, $is_inline, $excludes, $ix);
$stack[] = array($child,
// ToDo: I don't think it matters if it's def or
// child_def, but double check this...
$is_inline || $def->descendants_are_inline,
empty($def->excludes) ? $excludes
: array_merge($excludes, $def->excludes),
0);
break;
}
};
if ($go) continue;
list($token, $d) = $node->toTokenPair();
// base case
if ($excludes_enabled && isset($excludes[$node->name])) {
$node->dead = true;
if ($e) $e->send(E_ERROR, 'Strategy_FixNesting: Node excluded');
} else {
// XXX I suppose it would be slightly more efficient to
// avoid the allocation here and have children
// strategies handle it
$children = array();
foreach ($node->children as $child) {
if (!$child->dead) $children[] = $child;
}
$result = $def->child->validateChildren($children, $config, $context);
if ($result === true) {
// nop
$node->children = $children;
} elseif ($result === false) {
$node->dead = true;
if ($e) $e->send(E_ERROR, 'Strategy_FixNesting: Node removed');
} else {
$node->children = $result;
if ($e) {
// XXX This will miss mutations of internal nodes. Perhaps defer to the child validators
if (empty($result) && !empty($children)) {
$e->send(E_ERROR, 'Strategy_FixNesting: Node contents removed');
} else if ($result != $children) {
$e->send(E_WARNING, 'Strategy_FixNesting: Node reorganized');
}
}
}
}
}
//####################################################################//
// Post-processing
// remove context variables
$context->destroy('IsInline');
$context->destroy('CurrentNode');
$context->destroy('CurrentToken');
//####################################################################//
// Return
return HTMLPurifier_Arborize::flatten($node, $config, $context);
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,659 @@
<?php
/**
* Takes tokens makes them well-formed (balance end tags, etc.)
*
* Specification of the armor attributes this strategy uses:
*
* - MakeWellFormed_TagClosedError: This armor field is used to
* suppress tag closed errors for certain tokens [TagClosedSuppress],
* in particular, if a tag was generated automatically by HTML
* Purifier, we may rely on our infrastructure to close it for us
* and shouldn't report an error to the user [TagClosedAuto].
*/
class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
{
/**
* Array stream of tokens being processed.
* @type HTMLPurifier_Token[]
*/
protected $tokens;
/**
* Current token.
* @type HTMLPurifier_Token
*/
protected $token;
/**
* Zipper managing the true state.
* @type HTMLPurifier_Zipper
*/
protected $zipper;
/**
* Current nesting of elements.
* @type array
*/
protected $stack;
/**
* Injectors active in this stream processing.
* @type HTMLPurifier_Injector[]
*/
protected $injectors;
/**
* Current instance of HTMLPurifier_Config.
* @type HTMLPurifier_Config
*/
protected $config;
/**
* Current instance of HTMLPurifier_Context.
* @type HTMLPurifier_Context
*/
protected $context;
/**
* @param HTMLPurifier_Token[] $tokens
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return HTMLPurifier_Token[]
* @throws HTMLPurifier_Exception
*/
public function execute($tokens, $config, $context)
{
$definition = $config->getHTMLDefinition();
// local variables
$generator = new HTMLPurifier_Generator($config, $context);
$escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
// used for autoclose early abortion
$global_parent_allowed_elements = $definition->info_parent_def->child->getAllowedElements($config);
$e = $context->get('ErrorCollector', true);
$i = false; // injector index
list($zipper, $token) = HTMLPurifier_Zipper::fromArray($tokens);
if ($token === NULL) {
return array();
}
$reprocess = false; // whether or not to reprocess the same token
$stack = array();
// member variables
$this->stack =& $stack;
$this->tokens =& $tokens;
$this->token =& $token;
$this->zipper =& $zipper;
$this->config = $config;
$this->context = $context;
// context variables
$context->register('CurrentNesting', $stack);
$context->register('InputZipper', $zipper);
$context->register('CurrentToken', $token);
// -- begin INJECTOR --
$this->injectors = array();
$injectors = $config->getBatch('AutoFormat');
$def_injectors = $definition->info_injector;
$custom_injectors = $injectors['Custom'];
unset($injectors['Custom']); // special case
foreach ($injectors as $injector => $b) {
// XXX: Fix with a legitimate lookup table of enabled filters
if (strpos($injector, '.') !== false) {
continue;
}
$injector = "HTMLPurifier_Injector_$injector";
if (!$b) {
continue;
}
$this->injectors[] = new $injector;
}
foreach ($def_injectors as $injector) {
// assumed to be objects
$this->injectors[] = $injector;
}
foreach ($custom_injectors as $injector) {
if (!$injector) {
continue;
}
if (is_string($injector)) {
$injector = "HTMLPurifier_Injector_$injector";
$injector = new $injector;
}
$this->injectors[] = $injector;
}
// give the injectors references to the definition and context
// variables for performance reasons
foreach ($this->injectors as $ix => $injector) {
$error = $injector->prepare($config, $context);
if (!$error) {
continue;
}
array_splice($this->injectors, $ix, 1); // rm the injector
trigger_error("Cannot enable {$injector->name} injector because $error is not allowed", E_USER_WARNING);
}
// -- end INJECTOR --
// a note on reprocessing:
// In order to reduce code duplication, whenever some code needs
// to make HTML changes in order to make things "correct", the
// new HTML gets sent through the purifier, regardless of its
// status. This means that if we add a start token, because it
// was totally necessary, we don't have to update nesting; we just
// punt ($reprocess = true; continue;) and it does that for us.
// isset is in loop because $tokens size changes during loop exec
for (;;
// only increment if we don't need to reprocess
$reprocess ? $reprocess = false : $token = $zipper->next($token)) {
// check for a rewind
if (is_int($i)) {
// possibility: disable rewinding if the current token has a
// rewind set on it already. This would offer protection from
// infinite loop, but might hinder some advanced rewinding.
$rewind_offset = $this->injectors[$i]->getRewindOffset();
if (is_int($rewind_offset)) {
for ($j = 0; $j < $rewind_offset; $j++) {
if (empty($zipper->front)) break;
$token = $zipper->prev($token);
// indicate that other injectors should not process this token,
// but we need to reprocess it. See Note [Injector skips]
unset($token->skip[$i]);
$token->rewind = $i;
if ($token instanceof HTMLPurifier_Token_Start) {
array_pop($this->stack);
} elseif ($token instanceof HTMLPurifier_Token_End) {
$this->stack[] = $token->start;
}
}
}
$i = false;
}
// handle case of document end
if ($token === NULL) {
// kill processing if stack is empty
if (empty($this->stack)) {
break;
}
// peek
$top_nesting = array_pop($this->stack);
$this->stack[] = $top_nesting;
// send error [TagClosedSuppress]
if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting);
}
// append, don't splice, since this is the end
$token = new HTMLPurifier_Token_End($top_nesting->name);
// punt!
$reprocess = true;
continue;
}
//echo '<br>'; printZipper($zipper, $token);//printTokens($this->stack);
//flush();
// quick-check: if it's not a tag, no need to process
if (empty($token->is_tag)) {
if ($token instanceof HTMLPurifier_Token_Text) {
foreach ($this->injectors as $i => $injector) {
if (isset($token->skip[$i])) {
// See Note [Injector skips]
continue;
}
if ($token->rewind !== null && $token->rewind !== $i) {
continue;
}
// XXX fuckup
$r = $token;
$injector->handleText($r);
$token = $this->processToken($r, $i);
$reprocess = true;
break;
}
}
// another possibility is a comment
continue;
}
if (isset($definition->info[$token->name])) {
$type = $definition->info[$token->name]->child->type;
} else {
$type = false; // Type is unknown, treat accordingly
}
// quick tag checks: anything that's *not* an end tag
$ok = false;
if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
// claims to be a start tag but is empty
$token = new HTMLPurifier_Token_Empty(
$token->name,
$token->attr,
$token->line,
$token->col,
$token->armor
);
$ok = true;
} elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
// claims to be empty but really is a start tag
// NB: this assignment is required
$old_token = $token;
$token = new HTMLPurifier_Token_End($token->name);
$token = $this->insertBefore(
new HTMLPurifier_Token_Start($old_token->name, $old_token->attr, $old_token->line, $old_token->col, $old_token->armor)
);
// punt (since we had to modify the input stream in a non-trivial way)
$reprocess = true;
continue;
} elseif ($token instanceof HTMLPurifier_Token_Empty) {
// real empty token
$ok = true;
} elseif ($token instanceof HTMLPurifier_Token_Start) {
// start tag
// ...unless they also have to close their parent
if (!empty($this->stack)) {
// Performance note: you might think that it's rather
// inefficient, recalculating the autoclose information
// for every tag that a token closes (since when we
// do an autoclose, we push a new token into the
// stream and then /process/ that, before
// re-processing this token.) But this is
// necessary, because an injector can make an
// arbitrary transformations to the autoclosing
// tokens we introduce, so things may have changed
// in the meantime. Also, doing the inefficient thing is
// "easy" to reason about (for certain perverse definitions
// of "easy")
$parent = array_pop($this->stack);
$this->stack[] = $parent;
$parent_def = null;
$parent_elements = null;
$autoclose = false;
if (isset($definition->info[$parent->name])) {
$parent_def = $definition->info[$parent->name];
$parent_elements = $parent_def->child->getAllowedElements($config);
$autoclose = !isset($parent_elements[$token->name]);
}
if ($autoclose && $definition->info[$token->name]->wrap) {
// Check if an element can be wrapped by another
// element to make it valid in a context (for
// example, <ul><ul> needs a <li> in between)
$wrapname = $definition->info[$token->name]->wrap;
$wrapdef = $definition->info[$wrapname];
$elements = $wrapdef->child->getAllowedElements($config);
if (isset($elements[$token->name]) && isset($parent_elements[$wrapname])) {
$newtoken = new HTMLPurifier_Token_Start($wrapname);
$token = $this->insertBefore($newtoken);
$reprocess = true;
continue;
}
}
$carryover = false;
if ($autoclose && $parent_def->formatting) {
$carryover = true;
}
if ($autoclose) {
// check if this autoclose is doomed to fail
// (this rechecks $parent, which his harmless)
$autoclose_ok = isset($global_parent_allowed_elements[$token->name]);
if (!$autoclose_ok) {
foreach ($this->stack as $ancestor) {
$elements = $definition->info[$ancestor->name]->child->getAllowedElements($config);
if (isset($elements[$token->name])) {
$autoclose_ok = true;
break;
}
if ($definition->info[$token->name]->wrap) {
$wrapname = $definition->info[$token->name]->wrap;
$wrapdef = $definition->info[$wrapname];
$wrap_elements = $wrapdef->child->getAllowedElements($config);
if (isset($wrap_elements[$token->name]) && isset($elements[$wrapname])) {
$autoclose_ok = true;
break;
}
}
}
}
if ($autoclose_ok) {
// errors need to be updated
$new_token = new HTMLPurifier_Token_End($parent->name);
$new_token->start = $parent;
// [TagClosedSuppress]
if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
if (!$carryover) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
} else {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
}
}
if ($carryover) {
$element = clone $parent;
// [TagClosedAuto]
$element->armor['MakeWellFormed_TagClosedError'] = true;
$element->carryover = true;
$token = $this->processToken(array($new_token, $token, $element));
} else {
$token = $this->insertBefore($new_token);
}
} else {
$token = $this->remove();
}
$reprocess = true;
continue;
}
}
$ok = true;
}
if ($ok) {
foreach ($this->injectors as $i => $injector) {
if (isset($token->skip[$i])) {
// See Note [Injector skips]
continue;
}
if ($token->rewind !== null && $token->rewind !== $i) {
continue;
}
$r = $token;
$injector->handleElement($r);
$token = $this->processToken($r, $i);
$reprocess = true;
break;
}
if (!$reprocess) {
// ah, nothing interesting happened; do normal processing
if ($token instanceof HTMLPurifier_Token_Start) {
$this->stack[] = $token;
} elseif ($token instanceof HTMLPurifier_Token_End) {
throw new HTMLPurifier_Exception(
'Improper handling of end tag in start code; possible error in MakeWellFormed'
);
}
}
continue;
}
// sanity check: we should be dealing with a closing tag
if (!$token instanceof HTMLPurifier_Token_End) {
throw new HTMLPurifier_Exception('Unaccounted for tag token in input stream, bug in HTML Purifier');
}
// make sure that we have something open
if (empty($this->stack)) {
if ($escape_invalid_tags) {
if ($e) {
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
}
$token = new HTMLPurifier_Token_Text($generator->generateFromToken($token));
} else {
if ($e) {
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
}
$token = $this->remove();
}
$reprocess = true;
continue;
}
// first, check for the simplest case: everything closes neatly.
// Eventually, everything passes through here; if there are problems
// we modify the input stream accordingly and then punt, so that
// the tokens get processed again.
$current_parent = array_pop($this->stack);
if ($current_parent->name == $token->name) {
$token->start = $current_parent;
foreach ($this->injectors as $i => $injector) {
if (isset($token->skip[$i])) {
// See Note [Injector skips]
continue;
}
if ($token->rewind !== null && $token->rewind !== $i) {
continue;
}
$r = $token;
$injector->handleEnd($r);
$token = $this->processToken($r, $i);
$this->stack[] = $current_parent;
$reprocess = true;
break;
}
continue;
}
// okay, so we're trying to close the wrong tag
// undo the pop previous pop
$this->stack[] = $current_parent;
// scroll back the entire nest, trying to find our tag.
// (feature could be to specify how far you'd like to go)
$size = count($this->stack);
// -2 because -1 is the last element, but we already checked that
$skipped_tags = false;
for ($j = $size - 2; $j >= 0; $j--) {
if ($this->stack[$j]->name == $token->name) {
$skipped_tags = array_slice($this->stack, $j);
break;
}
}
// we didn't find the tag, so remove
if ($skipped_tags === false) {
if ($escape_invalid_tags) {
if ($e) {
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
}
$token = new HTMLPurifier_Token_Text($generator->generateFromToken($token));
} else {
if ($e) {
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
}
$token = $this->remove();
}
$reprocess = true;
continue;
}
// do errors, in REVERSE $j order: a,b,c with </a></b></c>
$c = count($skipped_tags);
if ($e) {
for ($j = $c - 1; $j > 0; $j--) {
// notice we exclude $j == 0, i.e. the current ending tag, from
// the errors... [TagClosedSuppress]
if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]);
}
}
}
// insert tags, in FORWARD $j order: c,b,a with </a></b></c>
$replace = array($token);
for ($j = 1; $j < $c; $j++) {
// ...as well as from the insertions
$new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name);
$new_token->start = $skipped_tags[$j];
array_unshift($replace, $new_token);
if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) {
// [TagClosedAuto]
$element = clone $skipped_tags[$j];
$element->carryover = true;
$element->armor['MakeWellFormed_TagClosedError'] = true;
$replace[] = $element;
}
}
$token = $this->processToken($replace);
$reprocess = true;
continue;
}
$context->destroy('CurrentToken');
$context->destroy('CurrentNesting');
$context->destroy('InputZipper');
unset($this->injectors, $this->stack, $this->tokens);
return $zipper->toArray($token);
}
/**
* Processes arbitrary token values for complicated substitution patterns.
* In general:
*
* If $token is an array, it is a list of tokens to substitute for the
* current token. These tokens then get individually processed. If there
* is a leading integer in the list, that integer determines how many
* tokens from the stream should be removed.
*
* If $token is a regular token, it is swapped with the current token.
*
* If $token is false, the current token is deleted.
*
* If $token is an integer, that number of tokens (with the first token
* being the current one) will be deleted.
*
* @param HTMLPurifier_Token|array|int|bool $token Token substitution value
* @param HTMLPurifier_Injector|int $injector Injector that performed the substitution; default is if
* this is not an injector related operation.
* @throws HTMLPurifier_Exception
*/
protected function processToken($token, $injector = -1)
{
// Zend OpCache miscompiles $token = array($token), so
// avoid this pattern. See: https://github.com/ezyang/htmlpurifier/issues/108
// normalize forms of token
if (is_object($token)) {
$tmp = $token;
$token = array(1, $tmp);
}
if (is_int($token)) {
$tmp = $token;
$token = array($tmp);
}
if ($token === false) {
$token = array(1);
}
if (!is_array($token)) {
throw new HTMLPurifier_Exception('Invalid token type from injector');
}
if (!is_int($token[0])) {
array_unshift($token, 1);
}
if ($token[0] === 0) {
throw new HTMLPurifier_Exception('Deleting zero tokens is not valid');
}
// $token is now an array with the following form:
// array(number nodes to delete, new node 1, new node 2, ...)
$delete = array_shift($token);
list($old, $r) = $this->zipper->splice($this->token, $delete, $token);
if ($injector > -1) {
// See Note [Injector skips]
// Determine appropriate skips. Here's what the code does:
// *If* we deleted one or more tokens, copy the skips
// of those tokens into the skips of the new tokens (in $token).
// Also, mark the newly inserted tokens as having come from
// $injector.
$oldskip = isset($old[0]) ? $old[0]->skip : array();
foreach ($token as $object) {
$object->skip = $oldskip;
$object->skip[$injector] = true;
}
}
return $r;
}
/**
* Inserts a token before the current token. Cursor now points to
* this token. You must reprocess after this.
* @param HTMLPurifier_Token $token
*/
private function insertBefore($token)
{
// NB not $this->zipper->insertBefore(), due to positioning
// differences
$splice = $this->zipper->splice($this->token, 0, array($token));
return $splice[1];
}
/**
* Removes current token. Cursor now points to new token occupying previously
* occupied space. You must reprocess after this.
*/
private function remove()
{
return $this->zipper->delete();
}
}
// Note [Injector skips]
// ~~~~~~~~~~~~~~~~~~~~~
// When I originally designed this class, the idea behind the 'skip'
// property of HTMLPurifier_Token was to help avoid infinite loops
// in injector processing. For example, suppose you wrote an injector
// that bolded swear words. Naively, you might write it so that
// whenever you saw ****, you replaced it with <strong>****</strong>.
//
// When this happens, we will reprocess all of the tokens with the
// other injectors. Now there is an opportunity for infinite loop:
// if we rerun the swear-word injector on these tokens, we might
// see **** and then reprocess again to get
// <strong><strong>****</strong></strong> ad infinitum.
//
// Thus, the idea of a skip is that once we process a token with
// an injector, we mark all of those tokens as having "come from"
// the injector, and we never run the injector again on these
// tokens.
//
// There were two more complications, however:
//
// - With HTMLPurifier_Injector_RemoveEmpty, we noticed that if
// you had <b><i></i></b>, after you removed the <i></i>, you
// really would like this injector to go back and reprocess
// the <b> tag, discovering that it is now empty and can be
// removed. So we reintroduced the possibility of infinite looping
// by adding a "rewind" function, which let you go back to an
// earlier point in the token stream and reprocess it with injectors.
// Needless to say, we need to UN-skip the token so it gets
// reprocessed.
//
// - Suppose that you successfuly process a token, replace it with
// one with your skip mark, but now another injector wants to
// process the skipped token with another token. Should you continue
// to skip that new token, or reprocess it? If you reprocess,
// you can end up with an infinite loop where one injector converts
// <a> to <b>, and then another injector converts it back. So
// we inherit the skips, but for some reason, I thought that we
// should inherit the skip from the first token of the token
// that we deleted. Why? Well, it seems to work OK.
//
// If I were to redesign this functionality, I would absolutely not
// go about doing it this way: the semantics are just not very well
// defined, and in any case you probably wanted to operate on trees,
// not token streams.
// vim: et sw=4 sts=4

View File

@ -0,0 +1,207 @@
<?php
/**
* Removes all unrecognized tags from the list of tokens.
*
* This strategy iterates through all the tokens and removes unrecognized
* tokens. If a token is not recognized but a TagTransform is defined for
* that element, the element will be transformed accordingly.
*/
class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
{
/**
* @param HTMLPurifier_Token[] $tokens
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return array|HTMLPurifier_Token[]
*/
public function execute($tokens, $config, $context)
{
$definition = $config->getHTMLDefinition();
$generator = new HTMLPurifier_Generator($config, $context);
$result = array();
$escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
$remove_invalid_img = $config->get('Core.RemoveInvalidImg');
// currently only used to determine if comments should be kept
$trusted = $config->get('HTML.Trusted');
$comment_lookup = $config->get('HTML.AllowedComments');
$comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
$check_comments = $comment_lookup !== array() || $comment_regexp !== null;
$remove_script_contents = $config->get('Core.RemoveScriptContents');
$hidden_elements = $config->get('Core.HiddenElements');
// remove script contents compatibility
if ($remove_script_contents === true) {
$hidden_elements['script'] = true;
} elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
unset($hidden_elements['script']);
}
$attr_validator = new HTMLPurifier_AttrValidator();
// removes tokens until it reaches a closing tag with its value
$remove_until = false;
// converts comments into text tokens when this is equal to a tag name
$textify_comments = false;
$token = false;
$context->register('CurrentToken', $token);
$e = false;
if ($config->get('Core.CollectErrors')) {
$e =& $context->get('ErrorCollector');
}
foreach ($tokens as $token) {
if ($remove_until) {
if (empty($token->is_tag) || $token->name !== $remove_until) {
continue;
}
}
if (!empty($token->is_tag)) {
// DEFINITION CALL
// before any processing, try to transform the element
if (isset($definition->info_tag_transform[$token->name])) {
$original_name = $token->name;
// there is a transformation for this tag
// DEFINITION CALL
$token = $definition->
info_tag_transform[$token->name]->transform($token, $config, $context);
if ($e) {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
}
}
if (isset($definition->info[$token->name])) {
// mostly everything's good, but
// we need to make sure required attributes are in order
if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
$definition->info[$token->name]->required_attr &&
($token->name != 'img' || $remove_invalid_img) // ensure config option still works
) {
$attr_validator->validateToken($token, $config, $context);
$ok = true;
foreach ($definition->info[$token->name]->required_attr as $name) {
if (!isset($token->attr[$name])) {
$ok = false;
break;
}
}
if (!$ok) {
if ($e) {
$e->send(
E_ERROR,
'Strategy_RemoveForeignElements: Missing required attribute',
$name
);
}
continue;
}
$token->armor['ValidateAttributes'] = true;
}
if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
$textify_comments = $token->name;
} elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
$textify_comments = false;
}
} elseif ($escape_invalid_tags) {
// invalid tag, generate HTML representation and insert in
if ($e) {
$e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
}
$token = new HTMLPurifier_Token_Text(
$generator->generateFromToken($token)
);
} else {
// check if we need to destroy all of the tag's children
// CAN BE GENERICIZED
if (isset($hidden_elements[$token->name])) {
if ($token instanceof HTMLPurifier_Token_Start) {
$remove_until = $token->name;
} elseif ($token instanceof HTMLPurifier_Token_Empty) {
// do nothing: we're still looking
} else {
$remove_until = false;
}
if ($e) {
$e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
}
} else {
if ($e) {
$e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
}
}
continue;
}
} elseif ($token instanceof HTMLPurifier_Token_Comment) {
// textify comments in script tags when they are allowed
if ($textify_comments !== false) {
$data = $token->data;
$token = new HTMLPurifier_Token_Text($data);
} elseif ($trusted || $check_comments) {
// always cleanup comments
$trailing_hyphen = false;
if ($e) {
// perform check whether or not there's a trailing hyphen
if (substr($token->data, -1) == '-') {
$trailing_hyphen = true;
}
}
$token->data = rtrim($token->data, '-');
$found_double_hyphen = false;
while (strpos($token->data, '--') !== false) {
$found_double_hyphen = true;
$token->data = str_replace('--', '-', $token->data);
}
if ($trusted || !empty($comment_lookup[trim($token->data)]) ||
($comment_regexp !== null && preg_match($comment_regexp, trim($token->data)))) {
// OK good
if ($e) {
if ($trailing_hyphen) {
$e->send(
E_NOTICE,
'Strategy_RemoveForeignElements: Trailing hyphen in comment removed'
);
}
if ($found_double_hyphen) {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
}
}
} else {
if ($e) {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
}
continue;
}
} else {
// strip comments
if ($e) {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
}
continue;
}
} elseif ($token instanceof HTMLPurifier_Token_Text) {
} else {
continue;
}
$result[] = $token;
}
if ($remove_until && $e) {
// we removed tokens until the end, throw error
$e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
}
$context->destroy('CurrentToken');
return $result;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,45 @@
<?php
/**
* Validate all attributes in the tokens.
*/
class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
{
/**
* @param HTMLPurifier_Token[] $tokens
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return HTMLPurifier_Token[]
*/
public function execute($tokens, $config, $context)
{
// setup validator
$validator = new HTMLPurifier_AttrValidator();
$token = false;
$context->register('CurrentToken', $token);
foreach ($tokens as $key => $token) {
// only process tokens that have attributes,
// namely start and empty tags
if (!$token instanceof HTMLPurifier_Token_Start && !$token instanceof HTMLPurifier_Token_Empty) {
continue;
}
// skip tokens that are armored
if (!empty($token->armor['ValidateAttributes'])) {
continue;
}
// note that we have no facilities here for removing tokens
$validator->validateToken($token, $config, $context);
}
$context->destroy('CurrentToken');
return $tokens;
}
}
// vim: et sw=4 sts=4