Verzeichnisstruktur phpBB-3.3.15
- Veröffentlicht
- 28.08.2024
So funktioniert es
|
Auf das letzte Element klicken. Dies geht jeweils ein Schritt zurück |
Auf das Icon klicken, dies öffnet das Verzeichnis. Nochmal klicken schließt das Verzeichnis. |
|
(Beispiel Datei-Icons)
|
Auf das Icon klicken um den Quellcode anzuzeigen |
Parser.php
0001 <?php
0002
0003 /**
0004 * @package s9e\TextFormatter
0005 * @copyright Copyright (c) 2010-2022 The s9e authors
0006 * @license http://www.opensource.org/licenses/mit-license.php The MIT License
0007 */
0008 namespace s9e\TextFormatter;
0009
0010 use InvalidArgumentException;
0011 use RuntimeException;
0012 use s9e\TextFormatter\Parser\FilterProcessing;
0013 use s9e\TextFormatter\Parser\Logger;
0014 use s9e\TextFormatter\Parser\Tag;
0015
0016 class Parser
0017 {
0018 /**#@+
0019 * Boolean rules bitfield
0020 */
0021 const RULE_AUTO_CLOSE = 1 << 0;
0022 const RULE_AUTO_REOPEN = 1 << 1;
0023 const RULE_BREAK_PARAGRAPH = 1 << 2;
0024 const RULE_CREATE_PARAGRAPHS = 1 << 3;
0025 const RULE_DISABLE_AUTO_BR = 1 << 4;
0026 const RULE_ENABLE_AUTO_BR = 1 << 5;
0027 const RULE_IGNORE_TAGS = 1 << 6;
0028 const RULE_IGNORE_TEXT = 1 << 7;
0029 const RULE_IGNORE_WHITESPACE = 1 << 8;
0030 const RULE_IS_TRANSPARENT = 1 << 9;
0031 const RULE_PREVENT_BR = 1 << 10;
0032 const RULE_SUSPEND_AUTO_BR = 1 << 11;
0033 const RULE_TRIM_FIRST_LINE = 1 << 12;
0034 /**#@-*/
0035
0036 /**
0037 * Bitwise disjunction of rules related to automatic line breaks
0038 */
0039 const RULES_AUTO_LINEBREAKS = self::RULE_DISABLE_AUTO_BR | self::RULE_ENABLE_AUTO_BR | self::RULE_SUSPEND_AUTO_BR;
0040
0041 /**
0042 * Bitwise disjunction of rules that are inherited by subcontexts
0043 */
0044 const RULES_INHERITANCE = self::RULE_ENABLE_AUTO_BR;
0045
0046 /**
0047 * All the characters that are considered whitespace
0048 */
0049 const WHITESPACE = " \n\t";
0050
0051 /**
0052 * @var array Number of open tags for each tag name
0053 */
0054 protected $cntOpen;
0055
0056 /**
0057 * @var array Number of times each tag has been used
0058 */
0059 protected $cntTotal;
0060
0061 /**
0062 * @var array Current context
0063 */
0064 protected $context;
0065
0066 /**
0067 * @var integer How hard the parser has worked on fixing bad markup so far
0068 */
0069 protected $currentFixingCost;
0070
0071 /**
0072 * @var Tag Current tag being processed
0073 */
0074 protected $currentTag;
0075
0076 /**
0077 * @var bool Whether the output contains "rich" tags, IOW any tag that is not <p> or <br/>
0078 */
0079 protected $isRich;
0080
0081 /**
0082 * @var Logger This parser's logger
0083 */
0084 protected $logger;
0085
0086 /**
0087 * @var integer How hard the parser should work on fixing bad markup
0088 */
0089 public $maxFixingCost = 10000;
0090
0091 /**
0092 * @var array Associative array of namespace prefixes in use in document (prefixes used as key)
0093 */
0094 protected $namespaces;
0095
0096 /**
0097 * @var array Stack of open tags (instances of Tag)
0098 */
0099 protected $openTags;
0100
0101 /**
0102 * @var string This parser's output
0103 */
0104 protected $output;
0105
0106 /**
0107 * @var integer Position of the cursor in the original text
0108 */
0109 protected $pos;
0110
0111 /**
0112 * @var array Array of callbacks, using plugin names as keys
0113 */
0114 protected $pluginParsers = [];
0115
0116 /**
0117 * @var array Associative array of [pluginName => pluginConfig]
0118 */
0119 protected $pluginsConfig;
0120
0121 /**
0122 * @var array Variables registered for use in filters
0123 */
0124 public $registeredVars = [];
0125
0126 /**
0127 * @var array Root context, used at the root of the document
0128 */
0129 protected $rootContext;
0130
0131 /**
0132 * @var array Tags' config
0133 */
0134 protected $tagsConfig;
0135
0136 /**
0137 * @var array Tag storage
0138 */
0139 protected $tagStack;
0140
0141 /**
0142 * @var bool Whether the tags in the stack are sorted
0143 */
0144 protected $tagStackIsSorted;
0145
0146 /**
0147 * @var string Text being parsed
0148 */
0149 protected $text;
0150
0151 /**
0152 * @var integer Length of the text being parsed
0153 */
0154 protected $textLen;
0155
0156 /**
0157 * @var integer Counter incremented everytime the parser is reset. Used to as a canary to detect
0158 * whether the parser was reset during execution
0159 */
0160 protected $uid = 0;
0161
0162 /**
0163 * @var integer Position before which we output text verbatim, without paragraphs or linebreaks
0164 */
0165 protected $wsPos;
0166
0167 /**
0168 * Constructor
0169 */
0170 public function __construct(array $config)
0171 {
0172 $this->pluginsConfig = $config['plugins'];
0173 $this->registeredVars = $config['registeredVars'];
0174 $this->rootContext = $config['rootContext'];
0175 $this->tagsConfig = $config['tags'];
0176
0177 $this->__wakeup();
0178 }
0179
0180 /**
0181 * Serializer
0182 *
0183 * Returns the properties that need to persist through serialization.
0184 *
0185 * NOTE: using __sleep() is preferable to implementing Serializable because it leaves the choice
0186 * of the serializer to the user (e.g. igbinary)
0187 *
0188 * @return array
0189 */
0190 public function __sleep()
0191 {
0192 return ['pluginsConfig', 'registeredVars', 'rootContext', 'tagsConfig'];
0193 }
0194
0195 /**
0196 * Unserializer
0197 *
0198 * @return void
0199 */
0200 public function __wakeup()
0201 {
0202 $this->logger = new Logger;
0203 }
0204
0205 /**
0206 * Reset the parser for a new parsing
0207 *
0208 * @param string $text Text to be parsed
0209 * @return void
0210 */
0211 protected function reset($text)
0212 {
0213 // Reject invalid UTF-8
0214 if (!preg_match('//u', $text))
0215 {
0216 throw new InvalidArgumentException('Invalid UTF-8 input');
0217 }
0218
0219 // Normalize CR/CRLF to LF, remove control characters that aren't allowed in XML
0220 $text = preg_replace('/\\r\\n?/', "\n", $text);
0221 $text = preg_replace('/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]+/S', '', $text);
0222
0223 // Clear the logs
0224 $this->logger->clear();
0225
0226 // Initialize the rest
0227 $this->cntOpen = [];
0228 $this->cntTotal = [];
0229 $this->currentFixingCost = 0;
0230 $this->currentTag = null;
0231 $this->isRich = false;
0232 $this->namespaces = [];
0233 $this->openTags = [];
0234 $this->output = '';
0235 $this->pos = 0;
0236 $this->tagStack = [];
0237 $this->tagStackIsSorted = false;
0238 $this->text = $text;
0239 $this->textLen = strlen($text);
0240 $this->wsPos = 0;
0241
0242 // Initialize the root context
0243 $this->context = $this->rootContext;
0244 $this->context['inParagraph'] = false;
0245
0246 // Bump the UID
0247 ++$this->uid;
0248 }
0249
0250 /**
0251 * Set a tag's option
0252 *
0253 * This method ensures that the tag's config is a value and not a reference, to prevent
0254 * potential side-effects. References contained *inside* the tag's config are left untouched
0255 *
0256 * @param string $tagName Tag's name
0257 * @param string $optionName Option's name
0258 * @param mixed $optionValue Option's value
0259 * @return void
0260 */
0261 protected function setTagOption($tagName, $optionName, $optionValue)
0262 {
0263 if (isset($this->tagsConfig[$tagName]))
0264 {
0265 // Copy the tag's config and remove it. That will destroy the reference
0266 $tagConfig = $this->tagsConfig[$tagName];
0267 unset($this->tagsConfig[$tagName]);
0268
0269 // Set the new value and replace the tag's config
0270 $tagConfig[$optionName] = $optionValue;
0271 $this->tagsConfig[$tagName] = $tagConfig;
0272 }
0273 }
0274
0275 //==========================================================================
0276 // Public API
0277 //==========================================================================
0278
0279 /**
0280 * Disable a tag
0281 *
0282 * @param string $tagName Name of the tag
0283 * @return void
0284 */
0285 public function disableTag($tagName)
0286 {
0287 $this->setTagOption($tagName, 'isDisabled', true);
0288 }
0289
0290 /**
0291 * Enable a tag
0292 *
0293 * @param string $tagName Name of the tag
0294 * @return void
0295 */
0296 public function enableTag($tagName)
0297 {
0298 if (isset($this->tagsConfig[$tagName]))
0299 {
0300 unset($this->tagsConfig[$tagName]['isDisabled']);
0301 }
0302 }
0303
0304 /**
0305 * Get this parser's Logger instance
0306 *
0307 * @return Logger
0308 */
0309 public function getLogger()
0310 {
0311 return $this->logger;
0312 }
0313
0314 /**
0315 * Return the last text parsed
0316 *
0317 * This method returns the normalized text, which may be slightly different from the original
0318 * text in that EOLs are normalized to LF and other control codes are stripped. This method is
0319 * meant to be used in support of processing log entries, which contain offsets based on the
0320 * normalized text
0321 *
0322 * @see Parser::reset()
0323 *
0324 * @return string
0325 */
0326 public function getText()
0327 {
0328 return $this->text;
0329 }
0330
0331 /**
0332 * Parse a text
0333 *
0334 * @param string $text Text to parse
0335 * @return string XML representation
0336 */
0337 public function parse($text)
0338 {
0339 // Reset the parser and save the uid
0340 $this->reset($text);
0341 $uid = $this->uid;
0342
0343 // Do the heavy lifting
0344 $this->executePluginParsers();
0345 $this->processTags();
0346
0347 // Finalize the document
0348 $this->finalizeOutput();
0349
0350 // Check the uid in case a plugin or a filter reset the parser mid-execution
0351 if ($this->uid !== $uid)
0352 {
0353 throw new RuntimeException('The parser has been reset during execution');
0354 }
0355
0356 // Log a warning if the fixing cost limit was exceeded
0357 if ($this->currentFixingCost > $this->maxFixingCost)
0358 {
0359 $this->logger->warn('Fixing cost limit exceeded');
0360 }
0361
0362 return $this->output;
0363 }
0364
0365 /**
0366 * Change a tag's tagLimit
0367 *
0368 * NOTE: the default tagLimit should generally be set during configuration instead
0369 *
0370 * @param string $tagName The tag's name, in UPPERCASE
0371 * @param integer $tagLimit
0372 * @return void
0373 */
0374 public function setTagLimit($tagName, $tagLimit)
0375 {
0376 $this->setTagOption($tagName, 'tagLimit', $tagLimit);
0377 }
0378
0379 /**
0380 * Change a tag's nestingLimit
0381 *
0382 * NOTE: the default nestingLimit should generally be set during configuration instead
0383 *
0384 * @param string $tagName The tag's name, in UPPERCASE
0385 * @param integer $nestingLimit
0386 * @return void
0387 */
0388 public function setNestingLimit($tagName, $nestingLimit)
0389 {
0390 $this->setTagOption($tagName, 'nestingLimit', $nestingLimit);
0391 }
0392
0393 //==========================================================================
0394 // Output handling
0395 //==========================================================================
0396
0397 /**
0398 * Finalize the output by appending the rest of the unprocessed text and create the root node
0399 *
0400 * @return void
0401 */
0402 protected function finalizeOutput()
0403 {
0404 // Output the rest of the text and close the last paragraph
0405 $this->outputText($this->textLen, 0, true);
0406
0407 // Remove empty tag pairs, e.g. <I><U></U></I> as well as empty paragraphs
0408 do
0409 {
0410 $this->output = preg_replace('(<([^ />]++)[^>]*></\\1>)', '', $this->output, -1, $cnt);
0411 }
0412 while ($cnt > 0);
0413
0414 // Merge consecutive <i> tags
0415 if (strpos($this->output, '</i><i>') !== false)
0416 {
0417 $this->output = str_replace('</i><i>', '', $this->output);
0418 }
0419
0420 // Remove control characters from the output to ensure it's valid XML
0421 $this->output = preg_replace('([\\x00-\\x08\\x0B-\\x1F])', '', $this->output);
0422
0423 // Encode Unicode characters that are outside of the BMP
0424 $this->output = Utils::encodeUnicodeSupplementaryCharacters($this->output);
0425
0426 // Use a <r> root if the text is rich, or <t> for plain text (including <p></p> and <br/>)
0427 $tagName = ($this->isRich) ? 'r' : 't';
0428
0429 // Prepare the root node with all the namespace declarations
0430 $tmp = '<' . $tagName;
0431 foreach (array_keys($this->namespaces) as $prefix)
0432 {
0433 $tmp .= ' xmlns:' . $prefix . '="urn:s9e:TextFormatter:' . $prefix . '"';
0434 }
0435
0436 $this->output = $tmp . '>' . $this->output . '</' . $tagName . '>';
0437 }
0438
0439 /**
0440 * Append a tag to the output
0441 *
0442 * @param Tag $tag Tag to append
0443 * @return void
0444 */
0445 protected function outputTag(Tag $tag)
0446 {
0447 $this->isRich = true;
0448
0449 $tagName = $tag->getName();
0450 $tagPos = $tag->getPos();
0451 $tagLen = $tag->getLen();
0452 $tagFlags = $tag->getFlags();
0453
0454 if ($tagFlags & self::RULE_IGNORE_WHITESPACE)
0455 {
0456 $skipBefore = 1;
0457 $skipAfter = ($tag->isEndTag()) ? 2 : 1;
0458 }
0459 else
0460 {
0461 $skipBefore = $skipAfter = 0;
0462 }
0463
0464 // Current paragraph must end before the tag if:
0465 // - the tag is a start (or self-closing) tag and it breaks paragraphs, or
0466 // - the tag is an end tag (but not self-closing)
0467 $closeParagraph = (!$tag->isStartTag() || ($tagFlags & self::RULE_BREAK_PARAGRAPH));
0468
0469 // Let the cursor catch up with this tag's position
0470 $this->outputText($tagPos, $skipBefore, $closeParagraph);
0471
0472 // Capture the text consumed by the tag
0473 $tagText = ($tagLen)
0474 ? htmlspecialchars(substr($this->text, $tagPos, $tagLen), ENT_NOQUOTES, 'UTF-8')
0475 : '';
0476
0477 // Output current tag
0478 if ($tag->isStartTag())
0479 {
0480 // Handle paragraphs before opening the tag
0481 if (!($tagFlags & self::RULE_BREAK_PARAGRAPH))
0482 {
0483 $this->outputParagraphStart($tagPos);
0484 }
0485
0486 // Record this tag's namespace, if applicable
0487 $colonPos = strpos($tagName, ':');
0488 if ($colonPos)
0489 {
0490 $this->namespaces[substr($tagName, 0, $colonPos)] = 0;
0491 }
0492
0493 // Open the start tag and add its attributes, but don't close the tag
0494 $this->output .= '<' . $tagName;
0495
0496 // We output the attributes in lexical order. Helps canonicalizing the output and could
0497 // prove useful someday
0498 $attributes = $tag->getAttributes();
0499 ksort($attributes);
0500
0501 foreach ($attributes as $attrName => $attrValue)
0502 {
0503 $this->output .= ' ' . $attrName . '="' . str_replace("\n", ' ', htmlspecialchars($attrValue, ENT_COMPAT, 'UTF-8')) . '"';
0504 }
0505
0506 if ($tag->isSelfClosingTag())
0507 {
0508 if ($tagLen)
0509 {
0510 $this->output .= '>' . $tagText . '</' . $tagName . '>';
0511 }
0512 else
0513 {
0514 $this->output .= '/>';
0515 }
0516 }
0517 elseif ($tagLen)
0518 {
0519 $this->output .= '><s>' . $tagText . '</s>';
0520 }
0521 else
0522 {
0523 $this->output .= '>';
0524 }
0525 }
0526 else
0527 {
0528 if ($tagLen)
0529 {
0530 $this->output .= '<e>' . $tagText . '</e>';
0531 }
0532
0533 $this->output .= '</' . $tagName . '>';
0534 }
0535
0536 // Move the cursor past the tag
0537 $this->pos = $tagPos + $tagLen;
0538
0539 // Skip newlines (no other whitespace) after this tag
0540 $this->wsPos = $this->pos;
0541 while ($skipAfter && $this->wsPos < $this->textLen && $this->text[$this->wsPos] === "\n")
0542 {
0543 // Decrement the number of lines to skip
0544 --$skipAfter;
0545
0546 // Move the cursor past the newline
0547 ++$this->wsPos;
0548 }
0549 }
0550
0551 /**
0552 * Output the text between the cursor's position (included) and given position (not included)
0553 *
0554 * @param integer $catchupPos Position we're catching up to
0555 * @param integer $maxLines Maximum number of lines to ignore at the end of the text
0556 * @param bool $closeParagraph Whether to close the paragraph at the end, if applicable
0557 * @return void
0558 */
0559 protected function outputText($catchupPos, $maxLines, $closeParagraph)
0560 {
0561 if ($closeParagraph)
0562 {
0563 if (!($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS))
0564 {
0565 $closeParagraph = false;
0566 }
0567 else
0568 {
0569 // Ignore any number of lines at the end if we're closing a paragraph
0570 $maxLines = -1;
0571 }
0572 }
0573
0574 if ($this->pos >= $catchupPos)
0575 {
0576 // We're already there, close the paragraph if applicable and return
0577 if ($closeParagraph)
0578 {
0579 $this->outputParagraphEnd();
0580 }
0581
0582 return;
0583 }
0584
0585 // Skip over previously identified whitespace if applicable
0586 if ($this->wsPos > $this->pos)
0587 {
0588 $skipPos = min($catchupPos, $this->wsPos);
0589 $this->output .= substr($this->text, $this->pos, $skipPos - $this->pos);
0590 $this->pos = $skipPos;
0591
0592 if ($this->pos >= $catchupPos)
0593 {
0594 // Skipped everything. Close the paragraph if applicable and return
0595 if ($closeParagraph)
0596 {
0597 $this->outputParagraphEnd();
0598 }
0599
0600 return;
0601 }
0602 }
0603
0604 // Test whether we're even supposed to output anything
0605 if ($this->context['flags'] & self::RULE_IGNORE_TEXT)
0606 {
0607 $catchupLen = $catchupPos - $this->pos;
0608 $catchupText = substr($this->text, $this->pos, $catchupLen);
0609
0610 // If the catchup text is not entirely composed of whitespace, we put it inside ignore
0611 // tags
0612 if (strspn($catchupText, " \n\t") < $catchupLen)
0613 {
0614 $catchupText = '<i>' . htmlspecialchars($catchupText, ENT_NOQUOTES, 'UTF-8') . '</i>';
0615 }
0616
0617 $this->output .= $catchupText;
0618 $this->pos = $catchupPos;
0619
0620 if ($closeParagraph)
0621 {
0622 $this->outputParagraphEnd();
0623 }
0624
0625 return;
0626 }
0627
0628 // Compute the amount of text to ignore at the end of the output
0629 $ignorePos = $catchupPos;
0630 $ignoreLen = 0;
0631
0632 // Ignore as many lines (including whitespace) as specified
0633 while ($maxLines && --$ignorePos >= $this->pos)
0634 {
0635 $c = $this->text[$ignorePos];
0636 if (strpos(self::WHITESPACE, $c) === false)
0637 {
0638 break;
0639 }
0640
0641 if ($c === "\n")
0642 {
0643 --$maxLines;
0644 }
0645
0646 ++$ignoreLen;
0647 }
0648
0649 // Adjust $catchupPos to ignore the text at the end
0650 $catchupPos -= $ignoreLen;
0651
0652 // Break down the text in paragraphs if applicable
0653 if ($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS)
0654 {
0655 if (!$this->context['inParagraph'])
0656 {
0657 $this->outputWhitespace($catchupPos);
0658
0659 if ($catchupPos > $this->pos)
0660 {
0661 $this->outputParagraphStart($catchupPos);
0662 }
0663 }
0664
0665 // Look for a paragraph break in this text
0666 $pbPos = strpos($this->text, "\n\n", $this->pos);
0667
0668 while ($pbPos !== false && $pbPos < $catchupPos)
0669 {
0670 $this->outputText($pbPos, 0, true);
0671 $this->outputParagraphStart($catchupPos);
0672
0673 $pbPos = strpos($this->text, "\n\n", $this->pos);
0674 }
0675 }
0676
0677 // Capture, escape and output the text
0678 if ($catchupPos > $this->pos)
0679 {
0680 $catchupText = htmlspecialchars(
0681 substr($this->text, $this->pos, $catchupPos - $this->pos),
0682 ENT_NOQUOTES,
0683 'UTF-8'
0684 );
0685
0686 // Format line breaks if applicable
0687 if (($this->context['flags'] & self::RULES_AUTO_LINEBREAKS) === self::RULE_ENABLE_AUTO_BR)
0688 {
0689 $catchupText = str_replace("\n", "<br/>\n", $catchupText);
0690 }
0691
0692 $this->output .= $catchupText;
0693 }
0694
0695 // Close the paragraph if applicable
0696 if ($closeParagraph)
0697 {
0698 $this->outputParagraphEnd();
0699 }
0700
0701 // Add the ignored text if applicable
0702 if ($ignoreLen)
0703 {
0704 $this->output .= substr($this->text, $catchupPos, $ignoreLen);
0705 }
0706
0707 // Move the cursor past the text
0708 $this->pos = $catchupPos + $ignoreLen;
0709 }
0710
0711 /**
0712 * Output a linebreak tag
0713 *
0714 * @param Tag $tag
0715 * @return void
0716 */
0717 protected function outputBrTag(Tag $tag)
0718 {
0719 $this->outputText($tag->getPos(), 0, false);
0720 $this->output .= '<br/>';
0721 }
0722
0723 /**
0724 * Output an ignore tag
0725 *
0726 * @param Tag $tag
0727 * @return void
0728 */
0729 protected function outputIgnoreTag(Tag $tag)
0730 {
0731 $tagPos = $tag->getPos();
0732 $tagLen = $tag->getLen();
0733
0734 // Capture the text to ignore
0735 $ignoreText = substr($this->text, $tagPos, $tagLen);
0736
0737 // Catch up with the tag's position then output the tag
0738 $this->outputText($tagPos, 0, false);
0739 $this->output .= '<i>' . htmlspecialchars($ignoreText, ENT_NOQUOTES, 'UTF-8') . '</i>';
0740 $this->isRich = true;
0741
0742 // Move the cursor past this tag
0743 $this->pos = $tagPos + $tagLen;
0744 }
0745
0746 /**
0747 * Start a paragraph between current position and given position, if applicable
0748 *
0749 * @param integer $maxPos Rightmost position at which the paragraph can be opened
0750 * @return void
0751 */
0752 protected function outputParagraphStart($maxPos)
0753 {
0754 // Do nothing if we're already in a paragraph, or if we don't use paragraphs
0755 if ($this->context['inParagraph']
0756 || !($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS))
0757 {
0758 return;
0759 }
0760
0761 // Output the whitespace between $this->pos and $maxPos if applicable
0762 $this->outputWhitespace($maxPos);
0763
0764 // Open the paragraph, but only if it's not at the very end of the text
0765 if ($this->pos < $this->textLen)
0766 {
0767 $this->output .= '<p>';
0768 $this->context['inParagraph'] = true;
0769 }
0770 }
0771
0772 /**
0773 * Close current paragraph at current position if applicable
0774 *
0775 * @return void
0776 */
0777 protected function outputParagraphEnd()
0778 {
0779 // Do nothing if we're not in a paragraph
0780 if (!$this->context['inParagraph'])
0781 {
0782 return;
0783 }
0784
0785 $this->output .= '</p>';
0786 $this->context['inParagraph'] = false;
0787 }
0788
0789 /**
0790 * Output the content of a verbatim tag
0791 *
0792 * @param Tag $tag
0793 * @return void
0794 */
0795 protected function outputVerbatim(Tag $tag)
0796 {
0797 $flags = $this->context['flags'];
0798 $this->context['flags'] = $tag->getFlags();
0799 $this->outputText($this->currentTag->getPos() + $this->currentTag->getLen(), 0, false);
0800 $this->context['flags'] = $flags;
0801 }
0802
0803 /**
0804 * Skip as much whitespace after current position as possible
0805 *
0806 * @param integer $maxPos Rightmost character to be skipped
0807 * @return void
0808 */
0809 protected function outputWhitespace($maxPos)
0810 {
0811 if ($maxPos > $this->pos)
0812 {
0813 $spn = strspn($this->text, self::WHITESPACE, $this->pos, $maxPos - $this->pos);
0814
0815 if ($spn)
0816 {
0817 $this->output .= substr($this->text, $this->pos, $spn);
0818 $this->pos += $spn;
0819 }
0820 }
0821 }
0822
0823 //==========================================================================
0824 // Plugins handling
0825 //==========================================================================
0826
0827 /**
0828 * Disable a plugin
0829 *
0830 * @param string $pluginName Name of the plugin
0831 * @return void
0832 */
0833 public function disablePlugin($pluginName)
0834 {
0835 if (isset($this->pluginsConfig[$pluginName]))
0836 {
0837 // Copy the plugin's config to remove the reference
0838 $pluginConfig = $this->pluginsConfig[$pluginName];
0839 unset($this->pluginsConfig[$pluginName]);
0840
0841 // Update the value and replace the plugin's config
0842 $pluginConfig['isDisabled'] = true;
0843 $this->pluginsConfig[$pluginName] = $pluginConfig;
0844 }
0845 }
0846
0847 /**
0848 * Enable a plugin
0849 *
0850 * @param string $pluginName Name of the plugin
0851 * @return void
0852 */
0853 public function enablePlugin($pluginName)
0854 {
0855 if (isset($this->pluginsConfig[$pluginName]))
0856 {
0857 $this->pluginsConfig[$pluginName]['isDisabled'] = false;
0858 }
0859 }
0860
0861 /**
0862 * Execute given plugin
0863 *
0864 * @param string $pluginName Plugin's name
0865 * @return void
0866 */
0867 protected function executePluginParser($pluginName)
0868 {
0869 $pluginConfig = $this->pluginsConfig[$pluginName];
0870 if (isset($pluginConfig['quickMatch']) && strpos($this->text, $pluginConfig['quickMatch']) === false)
0871 {
0872 return;
0873 }
0874
0875 $matches = [];
0876 if (isset($pluginConfig['regexp'], $pluginConfig['regexpLimit']))
0877 {
0878 $matches = $this->getMatches($pluginConfig['regexp'], $pluginConfig['regexpLimit']);
0879 if (empty($matches))
0880 {
0881 return;
0882 }
0883 }
0884
0885 // Execute the plugin's parser, which will add tags via $this->addStartTag() and others
0886 call_user_func($this->getPluginParser($pluginName), $this->text, $matches);
0887 }
0888
0889 /**
0890 * Execute all the plugins
0891 *
0892 * @return void
0893 */
0894 protected function executePluginParsers()
0895 {
0896 foreach ($this->pluginsConfig as $pluginName => $pluginConfig)
0897 {
0898 if (empty($pluginConfig['isDisabled']))
0899 {
0900 $this->executePluginParser($pluginName);
0901 }
0902 }
0903 }
0904
0905 /**
0906 * Execute given regexp and returns as many matches as given limit
0907 *
0908 * @param string $regexp
0909 * @param integer $limit
0910 * @return array
0911 */
0912 protected function getMatches($regexp, $limit)
0913 {
0914 $cnt = preg_match_all($regexp, $this->text, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
0915 if ($cnt > $limit)
0916 {
0917 $matches = array_slice($matches, 0, $limit);
0918 }
0919
0920 return $matches;
0921 }
0922
0923 /**
0924 * Get the cached callback for given plugin's parser
0925 *
0926 * @param string $pluginName Plugin's name
0927 * @return callable
0928 */
0929 protected function getPluginParser($pluginName)
0930 {
0931 // Cache a new instance of this plugin's parser if there isn't one already
0932 if (!isset($this->pluginParsers[$pluginName]))
0933 {
0934 $pluginConfig = $this->pluginsConfig[$pluginName];
0935 $className = (isset($pluginConfig['className']))
0936 ? $pluginConfig['className']
0937 : 's9e\\TextFormatter\\Plugins\\' . $pluginName . '\\Parser';
0938
0939 // Register the parser as a callback
0940 $this->pluginParsers[$pluginName] = [new $className($this, $pluginConfig), 'parse'];
0941 }
0942
0943 return $this->pluginParsers[$pluginName];
0944 }
0945
0946 /**
0947 * Register a parser
0948 *
0949 * Can be used to add a new parser with no plugin config, or pre-generate a parser for an
0950 * existing plugin
0951 *
0952 * @param string $pluginName
0953 * @param callable $parser
0954 * @param string $regexp
0955 * @param integer $limit
0956 * @return void
0957 */
0958 public function registerParser($pluginName, $parser, $regexp = null, $limit = PHP_INT_MAX)
0959 {
0960 if (!is_callable($parser))
0961 {
0962 throw new InvalidArgumentException('Argument 1 passed to ' . __METHOD__ . ' must be a valid callback');
0963 }
0964 // Create an empty config for this plugin to ensure it is executed
0965 if (!isset($this->pluginsConfig[$pluginName]))
0966 {
0967 $this->pluginsConfig[$pluginName] = [];
0968 }
0969 if (isset($regexp))
0970 {
0971 $this->pluginsConfig[$pluginName]['regexp'] = $regexp;
0972 $this->pluginsConfig[$pluginName]['regexpLimit'] = $limit;
0973 }
0974 $this->pluginParsers[$pluginName] = $parser;
0975 }
0976
0977 //==========================================================================
0978 // Rules handling
0979 //==========================================================================
0980
0981 /**
0982 * Apply closeAncestor rules associated with given tag
0983 *
0984 * @param Tag $tag Tag
0985 * @return bool Whether a new tag has been added
0986 */
0987 protected function closeAncestor(Tag $tag)
0988 {
0989 if (!empty($this->openTags))
0990 {
0991 $tagName = $tag->getName();
0992 $tagConfig = $this->tagsConfig[$tagName];
0993
0994 if (!empty($tagConfig['rules']['closeAncestor']))
0995 {
0996 $i = count($this->openTags);
0997
0998 while (--$i >= 0)
0999 {
1000 $ancestor = $this->openTags[$i];
1001 $ancestorName = $ancestor->getName();
1002
1003 if (isset($tagConfig['rules']['closeAncestor'][$ancestorName]))
1004 {
1005 ++$this->currentFixingCost;
1006
1007 // We have to close this ancestor. First we reinsert this tag...
1008 $this->tagStack[] = $tag;
1009
1010 // ...then we add a new end tag for it with a better priority
1011 $this->addMagicEndTag($ancestor, $tag->getPos(), $tag->getSortPriority() - 1);
1012
1013 return true;
1014 }
1015 }
1016 }
1017 }
1018
1019 return false;
1020 }
1021
1022 /**
1023 * Apply closeParent rules associated with given tag
1024 *
1025 * @param Tag $tag Tag
1026 * @return bool Whether a new tag has been added
1027 */
1028 protected function closeParent(Tag $tag)
1029 {
1030 if (!empty($this->openTags))
1031 {
1032 $tagName = $tag->getName();
1033 $tagConfig = $this->tagsConfig[$tagName];
1034
1035 if (!empty($tagConfig['rules']['closeParent']))
1036 {
1037 $parent = end($this->openTags);
1038 $parentName = $parent->getName();
1039
1040 if (isset($tagConfig['rules']['closeParent'][$parentName]))
1041 {
1042 ++$this->currentFixingCost;
1043
1044 // We have to close that parent. First we reinsert the tag...
1045 $this->tagStack[] = $tag;
1046
1047 // ...then we add a new end tag for it with a better priority
1048 $this->addMagicEndTag($parent, $tag->getPos(), $tag->getSortPriority() - 1);
1049
1050 return true;
1051 }
1052 }
1053 }
1054
1055 return false;
1056 }
1057
1058 /**
1059 * Apply the createChild rules associated with given tag
1060 *
1061 * @param Tag $tag Tag
1062 * @return void
1063 */
1064 protected function createChild(Tag $tag)
1065 {
1066 $tagConfig = $this->tagsConfig[$tag->getName()];
1067 if (isset($tagConfig['rules']['createChild']))
1068 {
1069 $priority = -1000;
1070 $tagPos = $this->pos + strspn($this->text, " \n\r\t", $this->pos);
1071 foreach ($tagConfig['rules']['createChild'] as $tagName)
1072 {
1073 $this->addStartTag($tagName, $tagPos, 0, ++$priority);
1074 }
1075 }
1076 }
1077
1078 /**
1079 * Apply fosterParent rules associated with given tag
1080 *
1081 * NOTE: this rule has the potential for creating an unbounded loop, either if a tag tries to
1082 * foster itself or two or more tags try to foster each other in a loop. We mitigate the
1083 * risk by preventing a tag from creating a child of itself (the parent still gets closed)
1084 * and by checking and increasing the currentFixingCost so that a loop of multiple tags
1085 * do not run indefinitely. The default tagLimit and nestingLimit also serve to prevent the
1086 * loop from running indefinitely
1087 *
1088 * @param Tag $tag Tag
1089 * @return bool Whether a new tag has been added
1090 */
1091 protected function fosterParent(Tag $tag)
1092 {
1093 if (!empty($this->openTags))
1094 {
1095 $tagName = $tag->getName();
1096 $tagConfig = $this->tagsConfig[$tagName];
1097
1098 if (!empty($tagConfig['rules']['fosterParent']))
1099 {
1100 $parent = end($this->openTags);
1101 $parentName = $parent->getName();
1102
1103 if (isset($tagConfig['rules']['fosterParent'][$parentName]))
1104 {
1105 if ($parentName !== $tagName && $this->currentFixingCost < $this->maxFixingCost)
1106 {
1107 $this->addFosterTag($tag, $parent);
1108 }
1109
1110 // Reinsert current tag
1111 $this->tagStack[] = $tag;
1112
1113 // And finally close its parent with a priority that ensures it is processed
1114 // before this tag
1115 $this->addMagicEndTag($parent, $tag->getPos(), $tag->getSortPriority() - 1);
1116
1117 // Adjust the fixing cost to account for the additional tags/processing
1118 $this->currentFixingCost += 4;
1119
1120 return true;
1121 }
1122 }
1123 }
1124
1125 return false;
1126 }
1127
1128 /**
1129 * Apply requireAncestor rules associated with given tag
1130 *
1131 * @param Tag $tag Tag
1132 * @return bool Whether this tag has an unfulfilled requireAncestor requirement
1133 */
1134 protected function requireAncestor(Tag $tag)
1135 {
1136 $tagName = $tag->getName();
1137 $tagConfig = $this->tagsConfig[$tagName];
1138
1139 if (isset($tagConfig['rules']['requireAncestor']))
1140 {
1141 foreach ($tagConfig['rules']['requireAncestor'] as $ancestorName)
1142 {
1143 if (!empty($this->cntOpen[$ancestorName]))
1144 {
1145 return false;
1146 }
1147 }
1148
1149 $this->logger->err('Tag requires an ancestor', [
1150 'requireAncestor' => implode(',', $tagConfig['rules']['requireAncestor']),
1151 'tag' => $tag
1152 ]);
1153
1154 return true;
1155 }
1156
1157 return false;
1158 }
1159
1160 //==========================================================================
1161 // Tag processing
1162 //==========================================================================
1163
1164 /**
1165 * Create and add a copy of a tag as a child of a given tag
1166 *
1167 * @param Tag $tag Current tag
1168 * @param Tag $fosterTag Tag to foster
1169 * @return void
1170 */
1171 protected function addFosterTag(Tag $tag, Tag $fosterTag)
1172 {
1173 list($childPos, $childPrio) = $this->getMagicStartCoords($tag->getPos() + $tag->getLen());
1174
1175 // Add a 0-width copy of the parent tag after this tag and make it depend on this tag
1176 $childTag = $this->addCopyTag($fosterTag, $childPos, 0, $childPrio);
1177 $tag->cascadeInvalidationTo($childTag);
1178 }
1179
1180 /**
1181 * Create and add an end tag for given start tag at given position
1182 *
1183 * @param Tag $startTag Start tag
1184 * @param integer $tagPos End tag's position (will be adjusted for whitespace if applicable)
1185 * @param integer $prio End tag's priority
1186 * @return Tag
1187 */
1188 protected function addMagicEndTag(Tag $startTag, $tagPos, $prio = 0)
1189 {
1190 $tagName = $startTag->getName();
1191
1192 // Adjust the end tag's position if whitespace is to be minimized
1193 if (($this->currentTag->getFlags() | $startTag->getFlags()) & self::RULE_IGNORE_WHITESPACE)
1194 {
1195 $tagPos = $this->getMagicEndPos($tagPos);
1196 }
1197
1198 // Add a 0-width end tag that is paired with the given start tag
1199 $endTag = $this->addEndTag($tagName, $tagPos, 0, $prio);
1200 $endTag->pairWith($startTag);
1201
1202 return $endTag;
1203 }
1204
1205 /**
1206 * Compute the position of a magic end tag, adjusted for whitespace
1207 *
1208 * @param integer $tagPos Rightmost possible position for the tag
1209 * @return integer
1210 */
1211 protected function getMagicEndPos($tagPos)
1212 {
1213 // Back up from given position to the cursor's position until we find a character that
1214 // is not whitespace
1215 while ($tagPos > $this->pos && strpos(self::WHITESPACE, $this->text[$tagPos - 1]) !== false)
1216 {
1217 --$tagPos;
1218 }
1219
1220 return $tagPos;
1221 }
1222
1223 /**
1224 * Compute the position and priority of a magic start tag, adjusted for whitespace
1225 *
1226 * @param integer $tagPos Leftmost possible position for the tag
1227 * @return integer[] [Tag pos, priority]
1228 */
1229 protected function getMagicStartCoords($tagPos)
1230 {
1231 if (empty($this->tagStack))
1232 {
1233 // Set the next position outside the text boundaries
1234 $nextPos = $this->textLen + 1;
1235 $nextPrio = 0;
1236 }
1237 else
1238 {
1239 $nextTag = end($this->tagStack);
1240 $nextPos = $nextTag->getPos();
1241 $nextPrio = $nextTag->getSortPriority();
1242 }
1243
1244 // Find the first non-whitespace position before next tag or the end of text
1245 while ($tagPos < $nextPos && strpos(self::WHITESPACE, $this->text[$tagPos]) !== false)
1246 {
1247 ++$tagPos;
1248 }
1249
1250 // Set a priority that ensures this tag appears before the next tag
1251 $prio = ($tagPos === $nextPos) ? $nextPrio - 1 : 0;
1252
1253 return [$tagPos, $prio];
1254 }
1255
1256 /**
1257 * Test whether given start tag is immediately followed by a closing tag
1258 *
1259 * @param Tag $tag Start tag
1260 * @return bool
1261 */
1262 protected function isFollowedByClosingTag(Tag $tag)
1263 {
1264 return (empty($this->tagStack)) ? false : end($this->tagStack)->canClose($tag);
1265 }
1266
1267 /**
1268 * Process all tags in the stack
1269 *
1270 * @return void
1271 */
1272 protected function processTags()
1273 {
1274 if (empty($this->tagStack))
1275 {
1276 return;
1277 }
1278
1279 // Initialize the count tables
1280 foreach (array_keys($this->tagsConfig) as $tagName)
1281 {
1282 $this->cntOpen[$tagName] = 0;
1283 $this->cntTotal[$tagName] = 0;
1284 }
1285
1286 // Process the tag stack, close tags that were left open and repeat until done
1287 do
1288 {
1289 while (!empty($this->tagStack))
1290 {
1291 if (!$this->tagStackIsSorted)
1292 {
1293 $this->sortTags();
1294 }
1295
1296 $this->currentTag = array_pop($this->tagStack);
1297 $this->processCurrentTag();
1298 }
1299
1300 // Close tags that were left open
1301 foreach ($this->openTags as $startTag)
1302 {
1303 // NOTE: we add tags in hierarchical order (ancestors to descendants) but since
1304 // the stack is processed in LIFO order, it means that tags get closed in
1305 // the correct order, from descendants to ancestors
1306 $this->addMagicEndTag($startTag, $this->textLen);
1307 }
1308 }
1309 while (!empty($this->tagStack));
1310 }
1311
1312 /**
1313 * Process current tag
1314 *
1315 * @return void
1316 */
1317 protected function processCurrentTag()
1318 {
1319 // Invalidate current tag if tags are disabled and current tag would not close the last open
1320 // tag and is not a system tag
1321 if (($this->context['flags'] & self::RULE_IGNORE_TAGS)
1322 && !$this->currentTag->canClose(end($this->openTags))
1323 && !$this->currentTag->isSystemTag())
1324 {
1325 $this->currentTag->invalidate();
1326 }
1327
1328 $tagPos = $this->currentTag->getPos();
1329 $tagLen = $this->currentTag->getLen();
1330
1331 // Test whether the cursor passed this tag's position already
1332 if ($this->pos > $tagPos && !$this->currentTag->isInvalid())
1333 {
1334 // Test whether this tag is paired with a start tag and this tag is still open
1335 $startTag = $this->currentTag->getStartTag();
1336
1337 if ($startTag && in_array($startTag, $this->openTags, true))
1338 {
1339 // Create an end tag that matches current tag's start tag, which consumes as much of
1340 // the same text as current tag and is paired with the same start tag
1341 $this->addEndTag(
1342 $startTag->getName(),
1343 $this->pos,
1344 max(0, $tagPos + $tagLen - $this->pos)
1345 )->pairWith($startTag);
1346
1347 // Note that current tag is not invalidated, it's merely replaced
1348 return;
1349 }
1350
1351 // If this is an ignore tag, try to ignore as much as the remaining text as possible
1352 if ($this->currentTag->isIgnoreTag())
1353 {
1354 $ignoreLen = $tagPos + $tagLen - $this->pos;
1355
1356 if ($ignoreLen > 0)
1357 {
1358 // Create a new ignore tag and move on
1359 $this->addIgnoreTag($this->pos, $ignoreLen);
1360
1361 return;
1362 }
1363 }
1364
1365 // Skipped tags are invalidated
1366 $this->currentTag->invalidate();
1367 }
1368
1369 if ($this->currentTag->isInvalid())
1370 {
1371 return;
1372 }
1373
1374 if ($this->currentTag->isIgnoreTag())
1375 {
1376 $this->outputIgnoreTag($this->currentTag);
1377 }
1378 elseif ($this->currentTag->isBrTag())
1379 {
1380 // Output the tag if it's allowed, ignore it otherwise
1381 if (!($this->context['flags'] & self::RULE_PREVENT_BR))
1382 {
1383 $this->outputBrTag($this->currentTag);
1384 }
1385 }
1386 elseif ($this->currentTag->isParagraphBreak())
1387 {
1388 $this->outputText($this->currentTag->getPos(), 0, true);
1389 }
1390 elseif ($this->currentTag->isVerbatim())
1391 {
1392 $this->outputVerbatim($this->currentTag);
1393 }
1394 elseif ($this->currentTag->isStartTag())
1395 {
1396 $this->processStartTag($this->currentTag);
1397 }
1398 else
1399 {
1400 $this->processEndTag($this->currentTag);
1401 }
1402 }
1403
1404 /**
1405 * Process given start tag (including self-closing tags) at current position
1406 *
1407 * @param Tag $tag Start tag (including self-closing)
1408 * @return void
1409 */
1410 protected function processStartTag(Tag $tag)
1411 {
1412 $tagName = $tag->getName();
1413 $tagConfig = $this->tagsConfig[$tagName];
1414
1415 // 1. Check that this tag has not reached its global limit tagLimit
1416 // 2. Execute this tag's filterChain, which will filter/validate its attributes
1417 // 3. Apply closeParent, closeAncestor and fosterParent rules
1418 // 4. Check for nestingLimit
1419 // 5. Apply requireAncestor rules
1420 //
1421 // This order ensures that the tag is valid and within the set limits before we attempt to
1422 // close parents or ancestors. We need to close ancestors before we can check for nesting
1423 // limits, whether this tag is allowed within current context (the context may change
1424 // as ancestors are closed) or whether the required ancestors are still there (they might
1425 // have been closed by a rule.)
1426 if ($this->cntTotal[$tagName] >= $tagConfig['tagLimit'])
1427 {
1428 $this->logger->err(
1429 'Tag limit exceeded',
1430 [
1431 'tag' => $tag,
1432 'tagName' => $tagName,
1433 'tagLimit' => $tagConfig['tagLimit']
1434 ]
1435 );
1436 $tag->invalidate();
1437
1438 return;
1439 }
1440
1441 FilterProcessing::filterTag($tag, $this, $this->tagsConfig, $this->openTags);
1442 if ($tag->isInvalid())
1443 {
1444 return;
1445 }
1446
1447 if ($this->currentFixingCost < $this->maxFixingCost)
1448 {
1449 if ($this->fosterParent($tag) || $this->closeParent($tag) || $this->closeAncestor($tag))
1450 {
1451 // This tag parent/ancestor needs to be closed, we just return (the tag is still valid)
1452 return;
1453 }
1454 }
1455
1456 if ($this->cntOpen[$tagName] >= $tagConfig['nestingLimit'])
1457 {
1458 $this->logger->err(
1459 'Nesting limit exceeded',
1460 [
1461 'tag' => $tag,
1462 'tagName' => $tagName,
1463 'nestingLimit' => $tagConfig['nestingLimit']
1464 ]
1465 );
1466 $tag->invalidate();
1467
1468 return;
1469 }
1470
1471 if (!$this->tagIsAllowed($tagName))
1472 {
1473 $msg = 'Tag is not allowed in this context';
1474 $context = ['tag' => $tag, 'tagName' => $tagName];
1475 if ($tag->getLen() > 0)
1476 {
1477 $this->logger->warn($msg, $context);
1478 }
1479 else
1480 {
1481 $this->logger->debug($msg, $context);
1482 }
1483 $tag->invalidate();
1484
1485 return;
1486 }
1487
1488 if ($this->requireAncestor($tag))
1489 {
1490 $tag->invalidate();
1491
1492 return;
1493 }
1494
1495 // If this tag has an autoClose rule and it's not self-closed, paired with an end tag, or
1496 // immediately followed by an end tag, we replace it with a self-closing tag with the same
1497 // properties
1498 if ($tag->getFlags() & self::RULE_AUTO_CLOSE
1499 && !$tag->isSelfClosingTag()
1500 && !$tag->getEndTag()
1501 && !$this->isFollowedByClosingTag($tag))
1502 {
1503 $newTag = new Tag(Tag::SELF_CLOSING_TAG, $tagName, $tag->getPos(), $tag->getLen());
1504 $newTag->setAttributes($tag->getAttributes());
1505 $newTag->setFlags($tag->getFlags());
1506
1507 $tag = $newTag;
1508 }
1509
1510 if ($tag->getFlags() & self::RULE_TRIM_FIRST_LINE
1511 && substr($this->text, $tag->getPos() + $tag->getLen(), 1) === "\n")
1512 {
1513 $this->addIgnoreTag($tag->getPos() + $tag->getLen(), 1);
1514 }
1515
1516 // This tag is valid, output it and update the context
1517 $this->outputTag($tag);
1518 $this->pushContext($tag);
1519
1520 // Apply the createChild rules if applicable
1521 $this->createChild($tag);
1522 }
1523
1524 /**
1525 * Process given end tag at current position
1526 *
1527 * @param Tag $tag end tag
1528 * @return void
1529 */
1530 protected function processEndTag(Tag $tag)
1531 {
1532 $tagName = $tag->getName();
1533
1534 if (empty($this->cntOpen[$tagName]))
1535 {
1536 // This is an end tag with no start tag
1537 return;
1538 }
1539
1540 /**
1541 * @var array List of tags need to be closed before given tag
1542 */
1543 $closeTags = [];
1544
1545 // Iterate through all open tags from last to first to find a match for our tag
1546 $i = count($this->openTags);
1547 while (--$i >= 0)
1548 {
1549 $openTag = $this->openTags[$i];
1550
1551 if ($tag->canClose($openTag))
1552 {
1553 break;
1554 }
1555
1556 $closeTags[] = $openTag;
1557 ++$this->currentFixingCost;
1558 }
1559
1560 if ($i < 0)
1561 {
1562 // Did not find a matching tag
1563 $this->logger->debug('Skipping end tag with no start tag', ['tag' => $tag]);
1564
1565 return;
1566 }
1567
1568 // Accumulate flags to determine whether whitespace should be trimmed
1569 $flags = $tag->getFlags();
1570 foreach ($closeTags as $openTag)
1571 {
1572 $flags |= $openTag->getFlags();
1573 }
1574 $ignoreWhitespace = (bool) ($flags & self::RULE_IGNORE_WHITESPACE);
1575
1576 // Only reopen tags if we haven't exceeded our "fixing" budget
1577 $keepReopening = (bool) ($this->currentFixingCost < $this->maxFixingCost);
1578
1579 // Iterate over tags that are being closed, output their end tag and collect tags to be
1580 // reopened
1581 $reopenTags = [];
1582 foreach ($closeTags as $openTag)
1583 {
1584 $openTagName = $openTag->getName();
1585
1586 // Test whether this tag should be reopened automatically
1587 if ($keepReopening)
1588 {
1589 if ($openTag->getFlags() & self::RULE_AUTO_REOPEN)
1590 {
1591 $reopenTags[] = $openTag;
1592 }
1593 else
1594 {
1595 $keepReopening = false;
1596 }
1597 }
1598
1599 // Find the earliest position we can close this open tag
1600 $tagPos = $tag->getPos();
1601 if ($ignoreWhitespace)
1602 {
1603 $tagPos = $this->getMagicEndPos($tagPos);
1604 }
1605
1606 // Output an end tag to close this start tag, then update the context
1607 $endTag = new Tag(Tag::END_TAG, $openTagName, $tagPos, 0);
1608 $endTag->setFlags($openTag->getFlags());
1609 $this->outputTag($endTag);
1610 $this->popContext();
1611 }
1612
1613 // Output our tag, moving the cursor past it, then update the context
1614 $this->outputTag($tag);
1615 $this->popContext();
1616
1617 // If our fixing budget allows it, peek at upcoming tags and remove end tags that would
1618 // close tags that are already being closed now. Also, filter our list of tags being
1619 // reopened by removing those that would immediately be closed
1620 if (!empty($closeTags) && $this->currentFixingCost < $this->maxFixingCost)
1621 {
1622 /**
1623 * @var integer Rightmost position of the portion of text to ignore
1624 */
1625 $ignorePos = $this->pos;
1626
1627 $i = count($this->tagStack);
1628 while (--$i >= 0 && ++$this->currentFixingCost < $this->maxFixingCost)
1629 {
1630 $upcomingTag = $this->tagStack[$i];
1631
1632 // Test whether the upcoming tag is positioned at current "ignore" position and it's
1633 // strictly an end tag (not a start tag or a self-closing tag)
1634 if ($upcomingTag->getPos() > $ignorePos
1635 || $upcomingTag->isStartTag())
1636 {
1637 break;
1638 }
1639
1640 // Test whether this tag would close any of the tags we're about to reopen
1641 $j = count($closeTags);
1642
1643 while (--$j >= 0 && ++$this->currentFixingCost < $this->maxFixingCost)
1644 {
1645 if ($upcomingTag->canClose($closeTags[$j]))
1646 {
1647 // Remove the tag from the lists and reset the keys
1648 array_splice($closeTags, $j, 1);
1649
1650 if (isset($reopenTags[$j]))
1651 {
1652 array_splice($reopenTags, $j, 1);
1653 }
1654
1655 // Extend the ignored text to cover this tag
1656 $ignorePos = max(
1657 $ignorePos,
1658 $upcomingTag->getPos() + $upcomingTag->getLen()
1659 );
1660
1661 break;
1662 }
1663 }
1664 }
1665
1666 if ($ignorePos > $this->pos)
1667 {
1668 /**
1669 * @todo have a method that takes (pos,len) rather than a Tag
1670 */
1671 $this->outputIgnoreTag(new Tag(Tag::SELF_CLOSING_TAG, 'i', $this->pos, $ignorePos - $this->pos));
1672 }
1673 }
1674
1675 // Re-add tags that need to be reopened, at current cursor position
1676 foreach ($reopenTags as $startTag)
1677 {
1678 $newTag = $this->addCopyTag($startTag, $this->pos, 0);
1679
1680 // Re-pair the new tag
1681 $endTag = $startTag->getEndTag();
1682 if ($endTag)
1683 {
1684 $newTag->pairWith($endTag);
1685 }
1686 }
1687 }
1688
1689 /**
1690 * Update counters and replace current context with its parent context
1691 *
1692 * @return void
1693 */
1694 protected function popContext()
1695 {
1696 $tag = array_pop($this->openTags);
1697 --$this->cntOpen[$tag->getName()];
1698 $this->context = $this->context['parentContext'];
1699 }
1700
1701 /**
1702 * Update counters and replace current context with a new context based on given tag
1703 *
1704 * If given tag is a self-closing tag, the context won't change
1705 *
1706 * @param Tag $tag Start tag (including self-closing)
1707 * @return void
1708 */
1709 protected function pushContext(Tag $tag)
1710 {
1711 $tagName = $tag->getName();
1712 $tagFlags = $tag->getFlags();
1713 $tagConfig = $this->tagsConfig[$tagName];
1714
1715 ++$this->cntTotal[$tagName];
1716
1717 // If this is a self-closing tag, the context remains the same
1718 if ($tag->isSelfClosingTag())
1719 {
1720 return;
1721 }
1722
1723 // Recompute the allowed tags
1724 $allowed = [];
1725 foreach ($this->context['allowed'] as $k => $v)
1726 {
1727 // If the current tag is not transparent, override the low bits (allowed children) of
1728 // current context with its high bits (allowed descendants)
1729 if (!($tagFlags & self::RULE_IS_TRANSPARENT))
1730 {
1731 $v = ($v & 0xFF00) | ($v >> 8);
1732 }
1733 $allowed[] = $tagConfig['allowed'][$k] & $v;
1734 }
1735
1736 // Use this tag's flags as a base for this context and add inherited rules
1737 $flags = $tagFlags | ($this->context['flags'] & self::RULES_INHERITANCE);
1738
1739 // RULE_DISABLE_AUTO_BR turns off RULE_ENABLE_AUTO_BR
1740 if ($flags & self::RULE_DISABLE_AUTO_BR)
1741 {
1742 $flags &= ~self::RULE_ENABLE_AUTO_BR;
1743 }
1744
1745 ++$this->cntOpen[$tagName];
1746 $this->openTags[] = $tag;
1747 $this->context = [
1748 'allowed' => $allowed,
1749 'flags' => $flags,
1750 'inParagraph' => false,
1751 'parentContext' => $this->context
1752 ];
1753 }
1754
1755 /**
1756 * Return whether given tag is allowed in current context
1757 *
1758 * @param string $tagName
1759 * @return bool
1760 */
1761 protected function tagIsAllowed($tagName)
1762 {
1763 $n = $this->tagsConfig[$tagName]['bitNumber'];
1764
1765 return (bool) ($this->context['allowed'][$n >> 3] & (1 << ($n & 7)));
1766 }
1767
1768 //==========================================================================
1769 // Tag stack
1770 //==========================================================================
1771
1772 /**
1773 * Add a start tag
1774 *
1775 * @param string $name Name of the tag
1776 * @param integer $pos Position of the tag in the text
1777 * @param integer $len Length of text consumed by the tag
1778 * @param integer $prio Tag's priority
1779 * @return Tag
1780 */
1781 public function addStartTag($name, $pos, $len, $prio = 0)
1782 {
1783 return $this->addTag(Tag::START_TAG, $name, $pos, $len, $prio);
1784 }
1785
1786 /**
1787 * Add an end tag
1788 *
1789 * @param string $name Name of the tag
1790 * @param integer $pos Position of the tag in the text
1791 * @param integer $len Length of text consumed by the tag
1792 * @param integer $prio Tag's priority
1793 * @return Tag
1794 */
1795 public function addEndTag($name, $pos, $len, $prio = 0)
1796 {
1797 return $this->addTag(Tag::END_TAG, $name, $pos, $len, $prio);
1798 }
1799
1800 /**
1801 * Add a self-closing tag
1802 *
1803 * @param string $name Name of the tag
1804 * @param integer $pos Position of the tag in the text
1805 * @param integer $len Length of text consumed by the tag
1806 * @param integer $prio Tag's priority
1807 * @return Tag
1808 */
1809 public function addSelfClosingTag($name, $pos, $len, $prio = 0)
1810 {
1811 return $this->addTag(Tag::SELF_CLOSING_TAG, $name, $pos, $len, $prio);
1812 }
1813
1814 /**
1815 * Add a 0-width "br" tag to force a line break at given position
1816 *
1817 * @param integer $pos Position of the tag in the text
1818 * @param integer $prio Tag's priority
1819 * @return Tag
1820 */
1821 public function addBrTag($pos, $prio = 0)
1822 {
1823 return $this->addTag(Tag::SELF_CLOSING_TAG, 'br', $pos, 0, $prio);
1824 }
1825
1826 /**
1827 * Add an "ignore" tag
1828 *
1829 * @param integer $pos Position of the tag in the text
1830 * @param integer $len Length of text consumed by the tag
1831 * @param integer $prio Tag's priority
1832 * @return Tag
1833 */
1834 public function addIgnoreTag($pos, $len, $prio = 0)
1835 {
1836 return $this->addTag(Tag::SELF_CLOSING_TAG, 'i', $pos, min($len, $this->textLen - $pos), $prio);
1837 }
1838
1839 /**
1840 * Add a paragraph break at given position
1841 *
1842 * Uses a zero-width tag that is actually never output in the result
1843 *
1844 * @param integer $pos Position of the tag in the text
1845 * @param integer $prio Tag's priority
1846 * @return Tag
1847 */
1848 public function addParagraphBreak($pos, $prio = 0)
1849 {
1850 return $this->addTag(Tag::SELF_CLOSING_TAG, 'pb', $pos, 0, $prio);
1851 }
1852
1853 /**
1854 * Add a copy of given tag at given position and length
1855 *
1856 * @param Tag $tag Original tag
1857 * @param integer $pos Copy's position
1858 * @param integer $len Copy's length
1859 * @param integer $prio Copy's priority (same as original by default)
1860 * @return Tag Copy tag
1861 */
1862 public function addCopyTag(Tag $tag, $pos, $len, $prio = null)
1863 {
1864 if (!isset($prio))
1865 {
1866 $prio = $tag->getSortPriority();
1867 }
1868 $copy = $this->addTag($tag->getType(), $tag->getName(), $pos, $len, $prio);
1869 $copy->setAttributes($tag->getAttributes());
1870
1871 return $copy;
1872 }
1873
1874 /**
1875 * Add a tag
1876 *
1877 * @param integer $type Tag's type
1878 * @param string $name Name of the tag
1879 * @param integer $pos Position of the tag in the text
1880 * @param integer $len Length of text consumed by the tag
1881 * @param integer $prio Tag's priority
1882 * @return Tag
1883 */
1884 protected function addTag($type, $name, $pos, $len, $prio)
1885 {
1886 // Create the tag
1887 $tag = new Tag($type, $name, $pos, $len, $prio);
1888
1889 // Set this tag's rules bitfield
1890 if (isset($this->tagsConfig[$name]))
1891 {
1892 $tag->setFlags($this->tagsConfig[$name]['rules']['flags']);
1893 }
1894
1895 // Invalidate this tag if it's an unknown tag, a disabled tag, if either of its length or
1896 // position is negative or if it's out of bounds
1897 if ((!isset($this->tagsConfig[$name]) && !$tag->isSystemTag())
1898 || $this->isInvalidTextSpan($pos, $len))
1899 {
1900 $tag->invalidate();
1901 }
1902 elseif (!empty($this->tagsConfig[$name]['isDisabled']))
1903 {
1904 $this->logger->warn(
1905 'Tag is disabled',
1906 [
1907 'tag' => $tag,
1908 'tagName' => $name
1909 ]
1910 );
1911 $tag->invalidate();
1912 }
1913 else
1914 {
1915 $this->insertTag($tag);
1916 }
1917
1918 return $tag;
1919 }
1920
1921 /**
1922 * Test whether given text span is outside text boundaries or an invalid UTF sequence
1923 *
1924 * @param integer $pos Start of text
1925 * @param integer $len Length of text
1926 * @return bool
1927 */
1928 protected function isInvalidTextSpan($pos, $len)
1929 {
1930 return ($len < 0 || $pos < 0 || $pos + $len > $this->textLen || preg_match('([\\x80-\\xBF])', substr($this->text, $pos, 1) . substr($this->text, $pos + $len, 1)));
1931 }
1932
1933 /**
1934 * Insert given tag in the tag stack
1935 *
1936 * @param Tag $tag
1937 * @return void
1938 */
1939 protected function insertTag(Tag $tag)
1940 {
1941 if (!$this->tagStackIsSorted)
1942 {
1943 $this->tagStack[] = $tag;
1944 }
1945 else
1946 {
1947 // Scan the stack and copy every tag to the next slot until we find the correct index
1948 $i = count($this->tagStack);
1949 $key = $this->getSortKey($tag);
1950 while ($i > 0 && $key > $this->getSortKey($this->tagStack[$i - 1]))
1951 {
1952 $this->tagStack[$i] = $this->tagStack[$i - 1];
1953 --$i;
1954 }
1955 $this->tagStack[$i] = $tag;
1956 }
1957 }
1958
1959 /**
1960 * Add a pair of tags
1961 *
1962 * @param string $name Name of the tags
1963 * @param integer $startPos Position of the start tag
1964 * @param integer $startLen Length of the start tag
1965 * @param integer $endPos Position of the start tag
1966 * @param integer $endLen Length of the start tag
1967 * @param integer $prio Start tag's priority (the end tag will be set to minus that value)
1968 * @return Tag Start tag
1969 */
1970 public function addTagPair($name, $startPos, $startLen, $endPos, $endLen, $prio = 0)
1971 {
1972 // NOTE: the end tag is added first to try to keep the stack in the correct order
1973 $endTag = $this->addEndTag($name, $endPos, $endLen, -$prio);
1974 $startTag = $this->addStartTag($name, $startPos, $startLen, $prio);
1975 $startTag->pairWith($endTag);
1976
1977 return $startTag;
1978 }
1979
1980 /**
1981 * Add a tag that represents a verbatim copy of the original text
1982 *
1983 * @param integer $pos Position of the tag in the text
1984 * @param integer $len Length of text consumed by the tag
1985 * @param integer $prio Tag's priority
1986 * @return Tag
1987 */
1988 public function addVerbatim($pos, $len, $prio = 0)
1989 {
1990 return $this->addTag(Tag::SELF_CLOSING_TAG, 'v', $pos, $len, $prio);
1991 }
1992
1993 /**
1994 * Sort tags by position and precedence
1995 *
1996 * @return void
1997 */
1998 protected function sortTags()
1999 {
2000 $arr = [];
2001 foreach ($this->tagStack as $i => $tag)
2002 {
2003 $key = $this->getSortKey($tag, $i);
2004 $arr[$key] = $tag;
2005 }
2006 krsort($arr);
2007
2008 $this->tagStack = array_values($arr);
2009 $this->tagStackIsSorted = true;
2010 }
2011
2012 /**
2013 * Generate a key for given tag that can be used to compare its position using lexical comparisons
2014 *
2015 * Tags are sorted by position first, then by priority, then by whether they consume any text,
2016 * then by length, and finally in order of their creation.
2017 *
2018 * The stack's array is in reverse order. Therefore, tags that appear at the start of the text
2019 * are at the end of the array.
2020 *
2021 * @param Tag $tag
2022 * @param integer $tagIndex
2023 * @return string
2024 */
2025 protected function getSortKey(Tag $tag, int $tagIndex = 0): string
2026 {
2027 // Ensure that negative values are sorted correctly by flagging them and making them positive
2028 $prioFlag = ($tag->getSortPriority() >= 0);
2029 $prio = $tag->getSortPriority();
2030 if (!$prioFlag)
2031 {
2032 $prio += (1 << 30);
2033 }
2034
2035 // Sort 0-width tags separately from the rest
2036 $lenFlag = ($tag->getLen() > 0);
2037 if ($lenFlag)
2038 {
2039 // Inverse their length so that longest matches are processed first
2040 $lenOrder = $this->textLen - $tag->getLen();
2041 }
2042 else
2043 {
2044 // Sort self-closing tags in-between start tags and end tags to keep them outside of tag
2045 // pairs
2046 $order = [
2047 Tag::END_TAG => 0,
2048 Tag::SELF_CLOSING_TAG => 1,
2049 Tag::START_TAG => 2
2050 ];
2051 $lenOrder = $order[$tag->getType()];
2052 }
2053
2054 return sprintf('%8x%d%8x%d%8x%8x', $tag->getPos(), $prioFlag, $prio, $lenFlag, $lenOrder, $tagIndex);
2055 }
2056 }