Verzeichnisstruktur phpBB-3.3.15
- Veröffentlicht
- 28.08.2024
So funktioniert es
|
Auf das letzte Element klicken. Dies geht jeweils ein Schritt zurück |
Auf das Icon klicken, dies öffnet das Verzeichnis. Nochmal klicken schließt das Verzeichnis. |
|
(Beispiel Datei-Icons)
|
Auf das Icon klicken um den Quellcode anzuzeigen |
Parser.js
0001 /**#@+
0002 * Boolean rules bitfield
0003 */
0004 /** @const */ var RULE_AUTO_CLOSE = 1 << 0;
0005 /** @const */ var RULE_AUTO_REOPEN = 1 << 1;
0006 /** @const */ var RULE_BREAK_PARAGRAPH = 1 << 2;
0007 /** @const */ var RULE_CREATE_PARAGRAPHS = 1 << 3;
0008 /** @const */ var RULE_DISABLE_AUTO_BR = 1 << 4;
0009 /** @const */ var RULE_ENABLE_AUTO_BR = 1 << 5;
0010 /** @const */ var RULE_IGNORE_TAGS = 1 << 6;
0011 /** @const */ var RULE_IGNORE_TEXT = 1 << 7;
0012 /** @const */ var RULE_IGNORE_WHITESPACE = 1 << 8;
0013 /** @const */ var RULE_IS_TRANSPARENT = 1 << 9;
0014 /** @const */ var RULE_PREVENT_BR = 1 << 10;
0015 /** @const */ var RULE_SUSPEND_AUTO_BR = 1 << 11;
0016 /** @const */ var RULE_TRIM_FIRST_LINE = 1 << 12;
0017 /**#@-*/
0018
0019 /**
0020 * @const Bitwise disjunction of rules related to automatic line breaks
0021 */
0022 var RULES_AUTO_LINEBREAKS = RULE_DISABLE_AUTO_BR | RULE_ENABLE_AUTO_BR | RULE_SUSPEND_AUTO_BR;
0023
0024 /**
0025 * @const Bitwise disjunction of rules that are inherited by subcontexts
0026 */
0027 var RULES_INHERITANCE = RULE_ENABLE_AUTO_BR;
0028
0029 /**
0030 * @const All the characters that are considered whitespace
0031 */
0032 var WHITESPACE = " \n\t";
0033
0034 /**
0035 * @type {!Object.<string,number>} Number of open tags for each tag name
0036 */
0037 var cntOpen;
0038
0039 /**
0040 * @type {!Object.<string,number>} Number of times each tag has been used
0041 */
0042 var cntTotal;
0043
0044 /**
0045 * @type {!Object} Current context
0046 */
0047 var context;
0048
0049 /**
0050 * @type {number} How hard the parser has worked on fixing bad markup so far
0051 */
0052 var currentFixingCost;
0053
0054 /**
0055 * @type {?Tag} Current tag being processed
0056 */
0057 var currentTag;
0058
0059 /**
0060 * @type {boolean} Whether the output contains "rich" tags, IOW any tag that is not <p> or <br/>
0061 */
0062 var isRich;
0063
0064 /**
0065 * @type {!Logger} This parser's logger
0066 */
0067 var logger = new Logger;
0068
0069 /**
0070 * @type {number} How hard the parser should work on fixing bad markup
0071 */
0072 var maxFixingCost = 10000;
0073
0074 /**
0075 * @type {!Object} Associative array of namespace prefixes in use in document (prefixes used as key)
0076 */
0077 var namespaces;
0078
0079 /**
0080 * @type {!Array.<!Tag>} Stack of open tags (instances of Tag)
0081 */
0082 var openTags;
0083
0084 /**
0085 * @type {string} This parser's output
0086 */
0087 var output;
0088
0089 /**
0090 * @type {!Object.<!Object>}
0091 */
0092 var plugins;
0093
0094 /**
0095 * @type {number} Position of the cursor in the original text
0096 */
0097 var pos;
0098
0099 /**
0100 * @type {!Object} Variables registered for use in filters
0101 */
0102 var registeredVars;
0103
0104 /**
0105 * @type {!Object} Root context, used at the root of the document
0106 */
0107 var rootContext;
0108
0109 /**
0110 * @type {!Object} Tags' config
0111 * @const
0112 */
0113 var tagsConfig;
0114
0115 /**
0116 * @type {!Array.<!Tag>} Tag storage
0117 */
0118 var tagStack;
0119
0120 /**
0121 * @type {boolean} Whether the tags in the stack are sorted
0122 */
0123 var tagStackIsSorted;
0124
0125 /**
0126 * @type {string} Text being parsed
0127 */
0128 var text;
0129
0130 /**
0131 * @type {number} Length of the text being parsed
0132 */
0133 var textLen;
0134
0135 /**
0136 * @type {number} Counter incremented everytime the parser is reset. Used to as a canary to detect
0137 * whether the parser was reset during execution
0138 */
0139 var uid = 0;
0140
0141 /**
0142 * @type {number} Position before which we output text verbatim, without paragraphs or linebreaks
0143 */
0144 var wsPos;
0145
0146 //==========================================================================
0147 // Public API
0148 //==========================================================================
0149
0150 /**
0151 * Disable a tag
0152 *
0153 * @param {string} tagName Name of the tag
0154 */
0155 function disableTag(tagName)
0156 {
0157 if (tagsConfig[tagName])
0158 {
0159 copyTagConfig(tagName).isDisabled = true;
0160 }
0161 }
0162
0163 /**
0164 * Enable a tag
0165 *
0166 * @param {string} tagName Name of the tag
0167 */
0168 function enableTag(tagName)
0169 {
0170 if (tagsConfig[tagName])
0171 {
0172 copyTagConfig(tagName).isDisabled = false;
0173 }
0174 }
0175
0176 /**
0177 * Get this parser's Logger instance
0178 *
0179 * @return {!Logger}
0180 */
0181 function getLogger()
0182 {
0183 return logger;
0184 }
0185
0186 /**
0187 * Parse a text
0188 *
0189 * @param {string} _text Text to parse
0190 * @return {string} XML representation
0191 */
0192 function parse(_text)
0193 {
0194 // Reset the parser and save the uid
0195 reset(_text);
0196 var _uid = uid;
0197
0198 // Do the heavy lifting
0199 executePluginParsers();
0200 processTags();
0201
0202 // Finalize the document
0203 finalizeOutput();
0204
0205 // Check the uid in case a plugin or a filter reset the parser mid-execution
0206 if (uid !== _uid)
0207 {
0208 throw 'The parser has been reset during execution';
0209 }
0210
0211 // Log a warning if the fixing cost limit was exceeded
0212 if (currentFixingCost > maxFixingCost)
0213 {
0214 logger.warn('Fixing cost limit exceeded');
0215 }
0216
0217 return output;
0218 }
0219
0220 /**
0221 * Reset the parser for a new parsing
0222 *
0223 * @param {string} _text Text to be parsed
0224 */
0225 function reset(_text)
0226 {
0227 // Normalize CR/CRLF to LF, remove control characters that aren't allowed in XML
0228 _text = _text.replace(/\r\n?/g, "\n");
0229 _text = _text.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]+/g, '');
0230
0231 // Clear the logs
0232 logger.clear();
0233
0234 // Initialize the rest
0235 cntOpen = {};
0236 cntTotal = {};
0237 currentFixingCost = 0;
0238 currentTag = null;
0239 isRich = false;
0240 namespaces = {};
0241 openTags = [];
0242 output = '';
0243 pos = 0;
0244 tagStack = [];
0245 tagStackIsSorted = false;
0246 text = _text;
0247 textLen = text.length;
0248 wsPos = 0;
0249
0250 // Initialize the root context
0251 context = rootContext;
0252 context.inParagraph = false;
0253
0254 // Bump the UID
0255 ++uid;
0256 }
0257
0258 /**
0259 * Change a tag's tagLimit
0260 *
0261 * NOTE: the default tagLimit should generally be set during configuration instead
0262 *
0263 * @param {string} tagName The tag's name, in UPPERCASE
0264 * @param {number} tagLimit
0265 */
0266 function setTagLimit(tagName, tagLimit)
0267 {
0268 if (tagsConfig[tagName])
0269 {
0270 copyTagConfig(tagName).tagLimit = tagLimit;
0271 }
0272 }
0273
0274 /**
0275 * Change a tag's nestingLimit
0276 *
0277 * NOTE: the default nestingLimit should generally be set during configuration instead
0278 *
0279 * @param {string} tagName The tag's name, in UPPERCASE
0280 * @param {number} nestingLimit
0281 */
0282 function setNestingLimit(tagName, nestingLimit)
0283 {
0284 if (tagsConfig[tagName])
0285 {
0286 copyTagConfig(tagName).nestingLimit = nestingLimit;
0287 }
0288 }
0289
0290 /**
0291 * Copy a tag's config
0292 *
0293 * This method ensures that the tag's config is its own object and not shared with another
0294 * identical tag
0295 *
0296 * @param {string} tagName Tag's name
0297 * @return {!Object} Tag's config
0298 */
0299 function copyTagConfig(tagName)
0300 {
0301 var tagConfig = {}, k;
0302 for (k in tagsConfig[tagName])
0303 {
0304 tagConfig[k] = tagsConfig[tagName][k];
0305 }
0306
0307 return tagsConfig[tagName] = tagConfig;
0308 }
0309
0310 //==========================================================================
0311 // Output handling
0312 //==========================================================================
0313
0314 /**
0315 * Replace Unicode characters outside the BMP with XML entities in the output
0316 */
0317 function encodeUnicodeSupplementaryCharacters()
0318 {
0319 output = output.replace(
0320 /[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
0321 encodeUnicodeSupplementaryCharactersCallback
0322 );
0323 }
0324
0325 /**
0326 * Encode given surrogate pair into an XML entity
0327 *
0328 * @param {string} pair Surrogate pair
0329 * @return {string} XML entity
0330 */
0331 function encodeUnicodeSupplementaryCharactersCallback(pair)
0332 {
0333 var cp = (pair.charCodeAt(0) << 10) + pair.charCodeAt(1) - 56613888;
0334
0335 return '&#' + cp + ';';
0336 }
0337
0338 /**
0339 * Finalize the output by appending the rest of the unprocessed text and create the root node
0340 */
0341 function finalizeOutput()
0342 {
0343 var tmp;
0344
0345 // Output the rest of the text and close the last paragraph
0346 outputText(textLen, 0, true);
0347
0348 // Remove empty tag pairs, e.g. <I><U></U></I> as well as empty paragraphs
0349 do
0350 {
0351 tmp = output;
0352 output = output.replace(/<([^ />]+)[^>]*><\/\1>/g, '');
0353 }
0354 while (output !== tmp);
0355
0356 // Merge consecutive <i> tags
0357 output = output.replace(/<\/i><i>/g, '');
0358
0359 // Remove control characters from the output to ensure it's valid XML
0360 output = output.replace(/[\x00-\x08\x0B-\x1F]/g, '');
0361
0362 // Encode Unicode characters that are outside of the BMP
0363 encodeUnicodeSupplementaryCharacters();
0364
0365 // Use a <r> root if the text is rich, or <t> for plain text (including <p></p> and <br/>)
0366 var tagName = (isRich) ? 'r' : 't';
0367
0368 // Prepare the root node with all the namespace declarations
0369 tmp = '<' + tagName;
0370 if (HINT.namespaces)
0371 {
0372 for (var prefix in namespaces)
0373 {
0374 tmp += ' xmlns:' + prefix + '="urn:s9e:TextFormatter:' + prefix + '"';
0375 }
0376 }
0377
0378 output = tmp + '>' + output + '</' + tagName + '>';
0379 }
0380
0381 /**
0382 * Append a tag to the output
0383 *
0384 * @param {!Tag} tag Tag to append
0385 */
0386 function outputTag(tag)
0387 {
0388 isRich = true;
0389
0390 var tagName = tag.getName(),
0391 tagPos = tag.getPos(),
0392 tagLen = tag.getLen(),
0393 tagFlags = tag.getFlags(),
0394 skipBefore = 0,
0395 skipAfter = 0;
0396
0397 if (HINT.RULE_IGNORE_WHITESPACE && (tagFlags & RULE_IGNORE_WHITESPACE))
0398 {
0399 skipBefore = 1;
0400 skipAfter = (tag.isEndTag()) ? 2 : 1;
0401 }
0402
0403 // Current paragraph must end before the tag if:
0404 // - the tag is a start (or self-closing) tag and it breaks paragraphs, or
0405 // - the tag is an end tag (but not self-closing)
0406 var closeParagraph = !!(!tag.isStartTag() || (HINT.RULE_BREAK_PARAGRAPH && (tagFlags & RULE_BREAK_PARAGRAPH)));
0407
0408 // Let the cursor catch up with this tag's position
0409 outputText(tagPos, skipBefore, closeParagraph);
0410
0411 // Capture the text consumed by the tag
0412 var tagText = (tagLen)
0413 ? htmlspecialchars_noquotes(text.substring(tagPos, tagPos + tagLen))
0414 : '';
0415
0416 // Output current tag
0417 if (tag.isStartTag())
0418 {
0419 // Handle paragraphs before opening the tag
0420 if (!HINT.RULE_BREAK_PARAGRAPH || !(tagFlags & RULE_BREAK_PARAGRAPH))
0421 {
0422 outputParagraphStart(tagPos);
0423 }
0424
0425 // Record this tag's namespace, if applicable
0426 if (HINT.namespaces)
0427 {
0428 var colonPos = tagName.indexOf(':');
0429 if (colonPos > 0)
0430 {
0431 namespaces[tagName.substring(0, colonPos)] = 0;
0432 }
0433 }
0434
0435 // Open the start tag and add its attributes, but don't close the tag
0436 output += '<' + tagName;
0437
0438 // We output the attributes in lexical order. Helps canonicalizing the output and could
0439 // prove useful someday
0440 var attributes = tag.getAttributes(),
0441 attributeNames = [];
0442 for (var attrName in attributes)
0443 {
0444 attributeNames.push(attrName);
0445 }
0446 attributeNames.sort(
0447 function(a, b)
0448 {
0449 return (a > b) ? 1 : -1;
0450 }
0451 );
0452 attributeNames.forEach(
0453 function(attrName)
0454 {
0455 output += ' ' + attrName + '="' + htmlspecialchars_compat(attributes[attrName].toString()).replace(/\n/g, ' ') + '"';
0456 }
0457 );
0458
0459 if (tag.isSelfClosingTag())
0460 {
0461 if (tagLen)
0462 {
0463 output += '>' + tagText + '</' + tagName + '>';
0464 }
0465 else
0466 {
0467 output += '/>';
0468 }
0469 }
0470 else if (tagLen)
0471 {
0472 output += '><s>' + tagText + '</s>';
0473 }
0474 else
0475 {
0476 output += '>';
0477 }
0478 }
0479 else
0480 {
0481 if (tagLen)
0482 {
0483 output += '<e>' + tagText + '</e>';
0484 }
0485
0486 output += '</' + tagName + '>';
0487 }
0488
0489 // Move the cursor past the tag
0490 pos = tagPos + tagLen;
0491
0492 // Skip newlines (no other whitespace) after this tag
0493 wsPos = pos;
0494 while (skipAfter && wsPos < textLen && text[wsPos] === "\n")
0495 {
0496 // Decrement the number of lines to skip
0497 --skipAfter;
0498
0499 // Move the cursor past the newline
0500 ++wsPos;
0501 }
0502 }
0503
0504 /**
0505 * Output the text between the cursor's position (included) and given position (not included)
0506 *
0507 * @param {number} catchupPos Position we're catching up to
0508 * @param {number} maxLines Maximum number of lines to ignore at the end of the text
0509 * @param {boolean} closeParagraph Whether to close the paragraph at the end, if applicable
0510 */
0511 function outputText(catchupPos, maxLines, closeParagraph)
0512 {
0513 if (closeParagraph)
0514 {
0515 if (!(context.flags & RULE_CREATE_PARAGRAPHS))
0516 {
0517 closeParagraph = false;
0518 }
0519 else
0520 {
0521 // Ignore any number of lines at the end if we're closing a paragraph
0522 maxLines = -1;
0523 }
0524 }
0525
0526 if (pos >= catchupPos)
0527 {
0528 // We're already there, close the paragraph if applicable and return
0529 if (closeParagraph)
0530 {
0531 outputParagraphEnd();
0532 }
0533 }
0534
0535 // Skip over previously identified whitespace if applicable
0536 if (wsPos > pos)
0537 {
0538 var skipPos = Math.min(catchupPos, wsPos);
0539 output += text.substring(pos, skipPos);
0540 pos = skipPos;
0541
0542 if (pos >= catchupPos)
0543 {
0544 // Skipped everything. Close the paragraph if applicable and return
0545 if (closeParagraph)
0546 {
0547 outputParagraphEnd();
0548 }
0549 }
0550 }
0551
0552 var catchupText;
0553
0554 // Test whether we're even supposed to output anything
0555 if (HINT.RULE_IGNORE_TEXT && context.flags & RULE_IGNORE_TEXT)
0556 {
0557 catchupText = text.substring(pos, catchupPos);
0558
0559 // If the catchup text is not entirely composed of whitespace, we put it inside ignore tags
0560 if (!/^[ \n\t]*$/.test(catchupText))
0561 {
0562 catchupText = '<i>' + htmlspecialchars_noquotes(catchupText) + '</i>';
0563 }
0564
0565 output += catchupText;
0566 pos = catchupPos;
0567
0568 if (closeParagraph)
0569 {
0570 outputParagraphEnd();
0571 }
0572
0573 return;
0574 }
0575
0576 // Compute the amount of text to ignore at the end of the output
0577 var ignorePos = catchupPos,
0578 ignoreLen = 0;
0579
0580 // Ignore as many lines (including whitespace) as specified
0581 while (maxLines && --ignorePos >= pos)
0582 {
0583 var c = text[ignorePos];
0584 if (c !== ' ' && c !== "\n" && c !== "\t")
0585 {
0586 break;
0587 }
0588
0589 if (c === "\n")
0590 {
0591 --maxLines;
0592 }
0593
0594 ++ignoreLen;
0595 }
0596
0597 // Adjust catchupPos to ignore the text at the end
0598 catchupPos -= ignoreLen;
0599
0600 // Break down the text in paragraphs if applicable
0601 if (HINT.RULE_CREATE_PARAGRAPHS && context.flags & RULE_CREATE_PARAGRAPHS)
0602 {
0603 if (!context.inParagraph)
0604 {
0605 outputWhitespace(catchupPos);
0606
0607 if (catchupPos > pos)
0608 {
0609 outputParagraphStart(catchupPos);
0610 }
0611 }
0612
0613 // Look for a paragraph break in this text
0614 var pbPos = text.indexOf("\n\n", pos);
0615
0616 while (pbPos > -1 && pbPos < catchupPos)
0617 {
0618 outputText(pbPos, 0, true);
0619 outputParagraphStart(catchupPos);
0620
0621 pbPos = text.indexOf("\n\n", pos);
0622 }
0623 }
0624
0625 // Capture, escape and output the text
0626 if (catchupPos > pos)
0627 {
0628 catchupText = htmlspecialchars_noquotes(
0629 text.substring(pos, catchupPos)
0630 );
0631
0632 // Format line breaks if applicable
0633 if (HINT.RULE_ENABLE_AUTO_BR && (context.flags & RULES_AUTO_LINEBREAKS) === RULE_ENABLE_AUTO_BR)
0634 {
0635 catchupText = catchupText.replace(/\n/g, "<br/>\n");
0636 }
0637
0638 output += catchupText;
0639 }
0640
0641 // Close the paragraph if applicable
0642 if (closeParagraph)
0643 {
0644 outputParagraphEnd();
0645 }
0646
0647 // Add the ignored text if applicable
0648 if (ignoreLen)
0649 {
0650 output += text.substring(catchupPos, catchupPos + ignoreLen);
0651 }
0652
0653 // Move the cursor past the text
0654 pos = catchupPos + ignoreLen;
0655 }
0656
0657 /**
0658 * Output a linebreak tag
0659 *
0660 * @param {!Tag} tag
0661 */
0662 function outputBrTag(tag)
0663 {
0664 outputText(tag.getPos(), 0, false);
0665 output += '<br/>';
0666 }
0667
0668 /**
0669 * Output an ignore tag
0670 *
0671 * @param {!Tag} tag
0672 */
0673 function outputIgnoreTag(tag)
0674 {
0675 var tagPos = tag.getPos(),
0676 tagLen = tag.getLen();
0677
0678 // Capture the text to ignore
0679 var ignoreText = text.substring(tagPos, tagPos + tagLen);
0680
0681 // Catch up with the tag's position then output the tag
0682 outputText(tagPos, 0, false);
0683 output += '<i>' + htmlspecialchars_noquotes(ignoreText) + '</i>';
0684 isRich = true;
0685
0686 // Move the cursor past this tag
0687 pos = tagPos + tagLen;
0688 }
0689
0690 /**
0691 * Start a paragraph between current position and given position, if applicable
0692 *
0693 * @param {number} maxPos Rightmost position at which the paragraph can be opened
0694 */
0695 function outputParagraphStart(maxPos)
0696 {
0697 if (!HINT.RULE_CREATE_PARAGRAPHS)
0698 {
0699 return;
0700 }
0701
0702 // Do nothing if we're already in a paragraph, or if we don't use paragraphs
0703 if (context.inParagraph
0704 || !(context.flags & RULE_CREATE_PARAGRAPHS))
0705 {
0706 return;
0707 }
0708
0709 // Output the whitespace between pos and maxPos if applicable
0710 outputWhitespace(maxPos);
0711
0712 // Open the paragraph, but only if it's not at the very end of the text
0713 if (pos < textLen)
0714 {
0715 output += '<p>';
0716 context.inParagraph = true;
0717 }
0718 }
0719
0720 /**
0721 * Close current paragraph at current position if applicable
0722 */
0723 function outputParagraphEnd()
0724 {
0725 // Do nothing if we're not in a paragraph
0726 if (!context.inParagraph)
0727 {
0728 return;
0729 }
0730
0731 output += '</p>';
0732 context.inParagraph = false;
0733 }
0734
0735 /**
0736 * Output the content of a verbatim tag
0737 *
0738 * @param {!Tag} tag
0739 */
0740 function outputVerbatim(tag)
0741 {
0742 var flags = context.flags;
0743 context.flags = tag.getFlags();
0744 outputText(currentTag.getPos() + currentTag.getLen(), 0, false);
0745 context.flags = flags;
0746 }
0747
0748 /**
0749 * Skip as much whitespace after current position as possible
0750 *
0751 * @param {number} maxPos Rightmost character to be skipped
0752 */
0753 function outputWhitespace(maxPos)
0754 {
0755 while (pos < maxPos && " \n\t".indexOf(text[pos]) > -1)
0756 {
0757 output += text[pos];
0758 ++pos;
0759 }
0760 }
0761
0762 //==========================================================================
0763 // Plugins handling
0764 //==========================================================================
0765
0766 /**
0767 * Disable a plugin
0768 *
0769 * @param {string} pluginName Name of the plugin
0770 */
0771 function disablePlugin(pluginName)
0772 {
0773 if (plugins[pluginName])
0774 {
0775 plugins[pluginName].isDisabled = true;
0776 }
0777 }
0778
0779 /**
0780 * Enable a plugin
0781 *
0782 * @param {string} pluginName Name of the plugin
0783 */
0784 function enablePlugin(pluginName)
0785 {
0786 if (plugins[pluginName])
0787 {
0788 plugins[pluginName].isDisabled = false;
0789 }
0790 }
0791
0792 /**
0793 * Execute given plugin
0794 *
0795 * @param {string} pluginName Plugin's name
0796 */
0797 function executePluginParser(pluginName)
0798 {
0799 var pluginConfig = plugins[pluginName];
0800 if (pluginConfig.quickMatch && text.indexOf(pluginConfig.quickMatch) < 0)
0801 {
0802 return;
0803 }
0804
0805 var matches = [];
0806 if (HINT.regexp && HINT.regexpLimit && typeof pluginConfig.regexp !== 'undefined' && typeof pluginConfig.regexpLimit !== 'undefined')
0807 {
0808 matches = getMatches(pluginConfig.regexp, pluginConfig.regexpLimit);
0809 if (!matches.length)
0810 {
0811 return;
0812 }
0813 }
0814
0815 // Execute the plugin's parser, which will add tags via addStartTag() and others
0816 getPluginParser(pluginName)(text, matches);
0817 }
0818
0819 /**
0820 * Execute all the plugins
0821 */
0822 function executePluginParsers()
0823 {
0824 for (var pluginName in plugins)
0825 {
0826 if (!plugins[pluginName].isDisabled)
0827 {
0828 executePluginParser(pluginName);
0829 }
0830 }
0831 }
0832
0833 /**
0834 * Get regexp matches in a manner similar to preg_match_all() with PREG_SET_ORDER | PREG_OFFSET_CAPTURE
0835 *
0836 * @param {!RegExp} regexp
0837 * @param {number} limit
0838 * @return {!Array.<!Array>}
0839 */
0840 function getMatches(regexp, limit)
0841 {
0842 // Reset the regexp
0843 regexp.lastIndex = 0;
0844 var matches = [], cnt = 0, m;
0845 while (++cnt <= limit && (m = regexp.exec(text)))
0846 {
0847 // NOTE: coercing m.index to a number because Closure Compiler thinks pos is a string otherwise
0848 var pos = m.index,
0849 match = [[m[0], pos]],
0850 i = 0;
0851 while (++i < m.length)
0852 {
0853 var str = m[i];
0854
0855 // Sub-expressions that were not evaluated return undefined
0856 if (str === undefined)
0857 {
0858 match.push(['', -1]);
0859 }
0860 else
0861 {
0862 match.push([str, text.indexOf(str, pos)]);
0863 pos += str.length;
0864 }
0865 }
0866
0867 matches.push(match);
0868 }
0869
0870 return matches;
0871 }
0872
0873 /**
0874 * Get the callback for given plugin's parser
0875 *
0876 * @param {string} pluginName
0877 * @return {function(string, !Array)}
0878 */
0879 function getPluginParser(pluginName)
0880 {
0881 return plugins[pluginName].parser;
0882 }
0883
0884 /**
0885 * Register a parser
0886 *
0887 * Can be used to add a new parser with no plugin config, or pre-generate a parser for an
0888 * existing plugin
0889 *
0890 * @param {string} pluginName
0891 * @param {!Function} parser
0892 * @param {?RegExp=} regexp
0893 * @param {number=} limit
0894 */
0895 function registerParser(pluginName, parser, regexp, limit)
0896 {
0897 // Create an empty config for this plugin to ensure it is executed
0898 if (!plugins[pluginName])
0899 {
0900 plugins[pluginName] = {};
0901 }
0902 if (regexp)
0903 {
0904 plugins[pluginName].regexp = regexp;
0905 plugins[pluginName].limit = limit || Infinity;
0906 }
0907 plugins[pluginName].parser = parser;
0908 }
0909
0910 //==========================================================================
0911 // Rules handling
0912 //==========================================================================
0913
0914 /**
0915 * Apply closeAncestor rules associated with given tag
0916 *
0917 * @param {!Tag} tag Tag
0918 * @return {boolean} Whether a new tag has been added
0919 */
0920 function closeAncestor(tag)
0921 {
0922 if (!HINT.closeAncestor)
0923 {
0924 return false;
0925 }
0926
0927 if (openTags.length)
0928 {
0929 var tagName = tag.getName(),
0930 tagConfig = tagsConfig[tagName];
0931
0932 if (tagConfig.rules.closeAncestor)
0933 {
0934 var i = openTags.length;
0935
0936 while (--i >= 0)
0937 {
0938 var ancestor = openTags[i],
0939 ancestorName = ancestor.getName();
0940
0941 if (tagConfig.rules.closeAncestor[ancestorName])
0942 {
0943 ++currentFixingCost;
0944
0945 // We have to close this ancestor. First we reinsert this tag...
0946 tagStack.push(tag);
0947
0948 // ...then we add a new end tag for it with a better priority
0949 addMagicEndTag(ancestor, tag.getPos(), tag.getSortPriority() - 1);
0950
0951 return true;
0952 }
0953 }
0954 }
0955 }
0956
0957 return false;
0958 }
0959
0960 /**
0961 * Apply closeParent rules associated with given tag
0962 *
0963 * @param {!Tag} tag Tag
0964 * @return {boolean} Whether a new tag has been added
0965 */
0966 function closeParent(tag)
0967 {
0968 if (!HINT.closeParent)
0969 {
0970 return false;
0971 }
0972
0973 if (openTags.length)
0974 {
0975 var tagName = tag.getName(),
0976 tagConfig = tagsConfig[tagName];
0977
0978 if (tagConfig.rules.closeParent)
0979 {
0980 var parent = openTags[openTags.length - 1],
0981 parentName = parent.getName();
0982
0983 if (tagConfig.rules.closeParent[parentName])
0984 {
0985 ++currentFixingCost;
0986
0987 // We have to close that parent. First we reinsert the tag...
0988 tagStack.push(tag);
0989
0990 // ...then we add a new end tag for it with a better priority
0991 addMagicEndTag(parent, tag.getPos(), tag.getSortPriority() - 1);
0992
0993 return true;
0994 }
0995 }
0996 }
0997
0998 return false;
0999 }
1000
1001 /**
1002 * Apply the createChild rules associated with given tag
1003 *
1004 * @param {!Tag} tag Tag
1005 */
1006 function createChild(tag)
1007 {
1008 if (!HINT.createChild)
1009 {
1010 return;
1011 }
1012
1013 var tagConfig = tagsConfig[tag.getName()];
1014 if (tagConfig.rules.createChild)
1015 {
1016 var priority = -1000,
1017 _text = text.substring(pos),
1018 tagPos = pos + _text.length - _text.replace(/^[ \n\r\t]+/, '').length;
1019 tagConfig.rules.createChild.forEach(function(tagName)
1020 {
1021 addStartTag(tagName, tagPos, 0, ++priority);
1022 });
1023 }
1024 }
1025
1026 /**
1027 * Apply fosterParent rules associated with given tag
1028 *
1029 * NOTE: this rule has the potential for creating an unbounded loop, either if a tag tries to
1030 * foster itself or two or more tags try to foster each other in a loop. We mitigate the
1031 * risk by preventing a tag from creating a child of itself (the parent still gets closed)
1032 * and by checking and increasing the currentFixingCost so that a loop of multiple tags
1033 * do not run indefinitely. The default tagLimit and nestingLimit also serve to prevent the
1034 * loop from running indefinitely
1035 *
1036 * @param {!Tag} tag Tag
1037 * @return {boolean} Whether a new tag has been added
1038 */
1039 function fosterParent(tag)
1040 {
1041 if (!HINT.fosterParent)
1042 {
1043 return false;
1044 }
1045
1046 if (openTags.length)
1047 {
1048 var tagName = tag.getName(),
1049 tagConfig = tagsConfig[tagName];
1050
1051 if (tagConfig.rules.fosterParent)
1052 {
1053 var parent = openTags[openTags.length - 1],
1054 parentName = parent.getName();
1055
1056 if (tagConfig.rules.fosterParent[parentName])
1057 {
1058 if (parentName !== tagName && currentFixingCost < maxFixingCost)
1059 {
1060 addFosterTag(tag, parent);
1061 }
1062
1063 // Reinsert current tag
1064 tagStack.push(tag);
1065
1066 // And finally close its parent with a priority that ensures it is processed
1067 // before this tag
1068 addMagicEndTag(parent, tag.getPos(), tag.getSortPriority() - 1);
1069
1070 // Adjust the fixing cost to account for the additional tags/processing
1071 currentFixingCost += 4;
1072
1073 return true;
1074 }
1075 }
1076 }
1077
1078 return false;
1079 }
1080
1081 /**
1082 * Apply requireAncestor rules associated with given tag
1083 *
1084 * @param {!Tag} tag Tag
1085 * @return {boolean} Whether this tag has an unfulfilled requireAncestor requirement
1086 */
1087 function requireAncestor(tag)
1088 {
1089 if (!HINT.requireAncestor)
1090 {
1091 return false;
1092 }
1093
1094 var tagName = tag.getName(),
1095 tagConfig = tagsConfig[tagName];
1096
1097 if (tagConfig.rules.requireAncestor)
1098 {
1099 var i = tagConfig.rules.requireAncestor.length;
1100 while (--i >= 0)
1101 {
1102 var ancestorName = tagConfig.rules.requireAncestor[i];
1103 if (cntOpen[ancestorName])
1104 {
1105 return false;
1106 }
1107 }
1108
1109 logger.err('Tag requires an ancestor', {
1110 'requireAncestor' : tagConfig.rules.requireAncestor.join(', '),
1111 'tag' : tag
1112 });
1113
1114 return true;
1115 }
1116
1117 return false;
1118 }
1119
1120 //==========================================================================
1121 // Tag processing
1122 //==========================================================================
1123
1124 /**
1125 * Create and add a copy of a tag as a child of a given tag
1126 *
1127 * @param {!Tag} tag Current tag
1128 * @param {!Tag} fosterTag Tag to foster
1129 */
1130 function addFosterTag(tag, fosterTag)
1131 {
1132 var coords = getMagicStartCoords(tag.getPos() + tag.getLen()),
1133 childPos = coords[0],
1134 childPrio = coords[1];
1135
1136 // Add a 0-width copy of the parent tag after this tag and make it depend on this tag
1137 var childTag = addCopyTag(fosterTag, childPos, 0, childPrio);
1138 tag.cascadeInvalidationTo(childTag);
1139 }
1140
1141 /**
1142 * Create and add an end tag for given start tag at given position
1143 *
1144 * @param {!Tag} startTag Start tag
1145 * @param {number} tagPos End tag's position (will be adjusted for whitespace if applicable)
1146 * @param {number=} prio End tag's priority
1147 * @return {!Tag}
1148 */
1149 function addMagicEndTag(startTag, tagPos, prio)
1150 {
1151 var tagName = startTag.getName();
1152
1153 // Adjust the end tag's position if whitespace is to be minimized
1154 if (HINT.RULE_IGNORE_WHITESPACE && ((currentTag.getFlags() | startTag.getFlags()) & RULE_IGNORE_WHITESPACE))
1155 {
1156 tagPos = getMagicEndPos(tagPos);
1157 }
1158
1159 // Add a 0-width end tag that is paired with the given start tag
1160 var endTag = addEndTag(tagName, tagPos, 0, prio || 0);
1161 endTag.pairWith(startTag);
1162
1163 return endTag;
1164 }
1165
1166 /**
1167 * Compute the position of a magic end tag, adjusted for whitespace
1168 *
1169 * @param {number} tagPos Rightmost possible position for the tag
1170 * @return {number}
1171 */
1172 function getMagicEndPos(tagPos)
1173 {
1174 // Back up from given position to the cursor's position until we find a character that
1175 // is not whitespace
1176 while (tagPos > pos && WHITESPACE.indexOf(text[tagPos - 1]) > -1)
1177 {
1178 --tagPos;
1179 }
1180
1181 return tagPos;
1182 }
1183
1184 /**
1185 * Compute the position and priority of a magic start tag, adjusted for whitespace
1186 *
1187 * @param {number} tagPos Leftmost possible position for the tag
1188 * @return {!Array} [Tag pos, priority]
1189 */
1190 function getMagicStartCoords(tagPos)
1191 {
1192 var nextPos, nextPrio, nextTag, prio;
1193 if (!tagStack.length)
1194 {
1195 // Set the next position outside the text boundaries
1196 nextPos = textLen + 1;
1197 nextPrio = 0;
1198 }
1199 else
1200 {
1201 nextTag = tagStack[tagStack.length - 1];
1202 nextPos = nextTag.getPos();
1203 nextPrio = nextTag.getSortPriority();
1204 }
1205
1206 // Find the first non-whitespace position before next tag or the end of text
1207 while (tagPos < nextPos && WHITESPACE.indexOf(text[tagPos]) > -1)
1208 {
1209 ++tagPos;
1210 }
1211
1212 // Set a priority that ensures this tag appears before the next tag
1213 prio = (tagPos === nextPos) ? nextPrio - 1 : 0;
1214
1215 return [tagPos, prio];
1216 }
1217
1218 /**
1219 * Test whether given start tag is immediately followed by a closing tag
1220 *
1221 * @param {!Tag} tag Start tag (including self-closing)
1222 * @return {boolean}
1223 */
1224 function isFollowedByClosingTag(tag)
1225 {
1226 return (!tagStack.length) ? false : tagStack[tagStack.length - 1].canClose(tag);
1227 }
1228
1229 /**
1230 * Process all tags in the stack
1231 */
1232 function processTags()
1233 {
1234 if (!tagStack.length)
1235 {
1236 return;
1237 }
1238
1239 // Initialize the count tables
1240 for (var tagName in tagsConfig)
1241 {
1242 cntOpen[tagName] = 0;
1243 cntTotal[tagName] = 0;
1244 }
1245
1246 // Process the tag stack, close tags that were left open and repeat until done
1247 do
1248 {
1249 while (tagStack.length)
1250 {
1251 if (!tagStackIsSorted)
1252 {
1253 sortTags();
1254 }
1255
1256 currentTag = tagStack.pop();
1257 processCurrentTag();
1258 }
1259
1260 // Close tags that were left open
1261 openTags.forEach(function (startTag)
1262 {
1263 // NOTE: we add tags in hierarchical order (ancestors to descendants) but since
1264 // the stack is processed in LIFO order, it means that tags get closed in
1265 // the correct order, from descendants to ancestors
1266 addMagicEndTag(startTag, textLen);
1267 });
1268 }
1269 while (tagStack.length);
1270 }
1271
1272 /**
1273 * Process current tag
1274 */
1275 function processCurrentTag()
1276 {
1277 // Invalidate current tag if tags are disabled and current tag would not close the last open
1278 // tag and is not a system tag
1279 if ((context.flags & RULE_IGNORE_TAGS)
1280 && !currentTag.canClose(openTags[openTags.length - 1])
1281 && !currentTag.isSystemTag())
1282 {
1283 currentTag.invalidate();
1284 }
1285
1286 var tagPos = currentTag.getPos(),
1287 tagLen = currentTag.getLen();
1288
1289 // Test whether the cursor passed this tag's position already
1290 if (pos > tagPos && !currentTag.isInvalid())
1291 {
1292 // Test whether this tag is paired with a start tag and this tag is still open
1293 var startTag = currentTag.getStartTag();
1294
1295 if (startTag && openTags.indexOf(startTag) >= 0)
1296 {
1297 // Create an end tag that matches current tag's start tag, which consumes as much of
1298 // the same text as current tag and is paired with the same start tag
1299 addEndTag(
1300 startTag.getName(),
1301 pos,
1302 Math.max(0, tagPos + tagLen - pos)
1303 ).pairWith(startTag);
1304
1305 // Note that current tag is not invalidated, it's merely replaced
1306 return;
1307 }
1308
1309 // If this is an ignore tag, try to ignore as much as the remaining text as possible
1310 if (currentTag.isIgnoreTag())
1311 {
1312 var ignoreLen = tagPos + tagLen - pos;
1313
1314 if (ignoreLen > 0)
1315 {
1316 // Create a new ignore tag and move on
1317 addIgnoreTag(pos, ignoreLen);
1318
1319 return;
1320 }
1321 }
1322
1323 // Skipped tags are invalidated
1324 currentTag.invalidate();
1325 }
1326
1327 if (currentTag.isInvalid())
1328 {
1329 return;
1330 }
1331
1332 if (currentTag.isIgnoreTag())
1333 {
1334 outputIgnoreTag(currentTag);
1335 }
1336 else if (currentTag.isBrTag())
1337 {
1338 // Output the tag if it's allowed, ignore it otherwise
1339 if (!HINT.RULE_PREVENT_BR || !(context.flags & RULE_PREVENT_BR))
1340 {
1341 outputBrTag(currentTag);
1342 }
1343 }
1344 else if (currentTag.isParagraphBreak())
1345 {
1346 outputText(currentTag.getPos(), 0, true);
1347 }
1348 else if (currentTag.isVerbatim())
1349 {
1350 outputVerbatim(currentTag);
1351 }
1352 else if (currentTag.isStartTag())
1353 {
1354 processStartTag(currentTag);
1355 }
1356 else
1357 {
1358 processEndTag(currentTag);
1359 }
1360 }
1361
1362 /**
1363 * Process given start tag (including self-closing tags) at current position
1364 *
1365 * @param {!Tag} tag Start tag (including self-closing)
1366 */
1367 function processStartTag(tag)
1368 {
1369 var tagName = tag.getName(),
1370 tagConfig = tagsConfig[tagName];
1371
1372 // 1. Check that this tag has not reached its global limit tagLimit
1373 // 2. Execute this tag's filterChain, which will filter/validate its attributes
1374 // 3. Apply closeParent, closeAncestor and fosterParent rules
1375 // 4. Check for nestingLimit
1376 // 5. Apply requireAncestor rules
1377 //
1378 // This order ensures that the tag is valid and within the set limits before we attempt to
1379 // close parents or ancestors. We need to close ancestors before we can check for nesting
1380 // limits, whether this tag is allowed within current context (the context may change
1381 // as ancestors are closed) or whether the required ancestors are still there (they might
1382 // have been closed by a rule.)
1383 if (cntTotal[tagName] >= tagConfig.tagLimit)
1384 {
1385 logger.err(
1386 'Tag limit exceeded',
1387 {
1388 'tag' : tag,
1389 'tagName' : tagName,
1390 'tagLimit' : tagConfig.tagLimit
1391 }
1392 );
1393 tag.invalidate();
1394
1395 return;
1396 }
1397
1398 filterTag(tag);
1399 if (tag.isInvalid())
1400 {
1401 return;
1402 }
1403
1404 if (currentFixingCost < maxFixingCost)
1405 {
1406 if (fosterParent(tag) || closeParent(tag) || closeAncestor(tag))
1407 {
1408 // This tag parent/ancestor needs to be closed, we just return (the tag is still valid)
1409 return;
1410 }
1411 }
1412
1413 if (cntOpen[tagName] >= tagConfig.nestingLimit)
1414 {
1415 logger.err(
1416 'Nesting limit exceeded',
1417 {
1418 'tag' : tag,
1419 'tagName' : tagName,
1420 'nestingLimit' : tagConfig.nestingLimit
1421 }
1422 );
1423 tag.invalidate();
1424
1425 return;
1426 }
1427
1428 if (!tagIsAllowed(tagName))
1429 {
1430 var msg = 'Tag is not allowed in this context',
1431 context = {'tag': tag, 'tagName': tagName};
1432 if (tag.getLen() > 0)
1433 {
1434 logger.warn(msg, context);
1435 }
1436 else
1437 {
1438 logger.debug(msg, context);
1439 }
1440 tag.invalidate();
1441
1442 return;
1443 }
1444
1445 if (requireAncestor(tag))
1446 {
1447 tag.invalidate();
1448
1449 return;
1450 }
1451
1452 // If this tag has an autoClose rule and it's not self-closed, paired with an end tag, or
1453 // immediately followed by an end tag, we replace it with a self-closing tag with the same
1454 // properties
1455 if (HINT.RULE_AUTO_CLOSE
1456 && tag.getFlags() & RULE_AUTO_CLOSE
1457 && !tag.isSelfClosingTag()
1458 && !tag.getEndTag()
1459 && !isFollowedByClosingTag(tag))
1460 {
1461 var newTag = new Tag(Tag.SELF_CLOSING_TAG, tagName, tag.getPos(), tag.getLen());
1462 newTag.setAttributes(tag.getAttributes());
1463 newTag.setFlags(tag.getFlags());
1464
1465 tag = newTag;
1466 }
1467
1468 if (HINT.RULE_TRIM_FIRST_LINE
1469 && tag.getFlags() & RULE_TRIM_FIRST_LINE
1470 && text[tag.getPos() + tag.getLen()] === "\n")
1471 {
1472 addIgnoreTag(tag.getPos() + tag.getLen(), 1);
1473 }
1474
1475 // This tag is valid, output it and update the context
1476 outputTag(tag);
1477 pushContext(tag);
1478
1479 // Apply the createChild rules if applicable
1480 createChild(tag);
1481 }
1482
1483 /**
1484 * Process given end tag at current position
1485 *
1486 * @param {!Tag} tag End tag
1487 */
1488 function processEndTag(tag)
1489 {
1490 var tagName = tag.getName();
1491
1492 if (!cntOpen[tagName])
1493 {
1494 // This is an end tag with no start tag
1495 return;
1496 }
1497
1498 /**
1499 * @type {!Array.<!Tag>} List of tags need to be closed before given tag
1500 */
1501 var closeTags = [];
1502
1503 // Iterate through all open tags from last to first to find a match for our tag
1504 var i = openTags.length;
1505 while (--i >= 0)
1506 {
1507 var openTag = openTags[i];
1508
1509 if (tag.canClose(openTag))
1510 {
1511 break;
1512 }
1513
1514 closeTags.push(openTag);
1515 ++currentFixingCost;
1516 }
1517
1518 if (i < 0)
1519 {
1520 // Did not find a matching tag
1521 logger.debug('Skipping end tag with no start tag', {'tag': tag});
1522
1523 return;
1524 }
1525
1526 // Accumulate flags to determine whether whitespace should be trimmed
1527 var flags = tag.getFlags();
1528 closeTags.forEach(function(openTag)
1529 {
1530 flags |= openTag.getFlags();
1531 });
1532 var ignoreWhitespace = (HINT.RULE_IGNORE_WHITESPACE && (flags & RULE_IGNORE_WHITESPACE));
1533
1534 // Only reopen tags if we haven't exceeded our "fixing" budget
1535 var keepReopening = HINT.RULE_AUTO_REOPEN && (currentFixingCost < maxFixingCost),
1536 reopenTags = [];
1537 closeTags.forEach(function(openTag)
1538 {
1539 var openTagName = openTag.getName();
1540
1541 // Test whether this tag should be reopened automatically
1542 if (keepReopening)
1543 {
1544 if (openTag.getFlags() & RULE_AUTO_REOPEN)
1545 {
1546 reopenTags.push(openTag);
1547 }
1548 else
1549 {
1550 keepReopening = false;
1551 }
1552 }
1553
1554 // Find the earliest position we can close this open tag
1555 var tagPos = tag.getPos();
1556 if (ignoreWhitespace)
1557 {
1558 tagPos = getMagicEndPos(tagPos);
1559 }
1560
1561 // Output an end tag to close this start tag, then update the context
1562 var endTag = new Tag(Tag.END_TAG, openTagName, tagPos, 0);
1563 endTag.setFlags(openTag.getFlags());
1564 outputTag(endTag);
1565 popContext();
1566 });
1567
1568 // Output our tag, moving the cursor past it, then update the context
1569 outputTag(tag);
1570 popContext();
1571
1572 // If our fixing budget allows it, peek at upcoming tags and remove end tags that would
1573 // close tags that are already being closed now. Also, filter our list of tags being
1574 // reopened by removing those that would immediately be closed
1575 if (closeTags.length && currentFixingCost < maxFixingCost)
1576 {
1577 /**
1578 * @type {number} Rightmost position of the portion of text to ignore
1579 */
1580 var ignorePos = pos;
1581
1582 i = tagStack.length;
1583 while (--i >= 0 && ++currentFixingCost < maxFixingCost)
1584 {
1585 var upcomingTag = tagStack[i];
1586
1587 // Test whether the upcoming tag is positioned at current "ignore" position and it's
1588 // strictly an end tag (not a start tag or a self-closing tag)
1589 if (upcomingTag.getPos() > ignorePos
1590 || upcomingTag.isStartTag())
1591 {
1592 break;
1593 }
1594
1595 // Test whether this tag would close any of the tags we're about to reopen
1596 var j = closeTags.length;
1597
1598 while (--j >= 0 && ++currentFixingCost < maxFixingCost)
1599 {
1600 if (upcomingTag.canClose(closeTags[j]))
1601 {
1602 // Remove the tag from the lists and reset the keys
1603 closeTags.splice(j, 1);
1604
1605 if (reopenTags[j])
1606 {
1607 reopenTags.splice(j, 1);
1608 }
1609
1610 // Extend the ignored text to cover this tag
1611 ignorePos = Math.max(
1612 ignorePos,
1613 upcomingTag.getPos() + upcomingTag.getLen()
1614 );
1615
1616 break;
1617 }
1618 }
1619 }
1620
1621 if (ignorePos > pos)
1622 {
1623 /**
1624 * @todo have a method that takes (pos,len) rather than a Tag
1625 */
1626 outputIgnoreTag(new Tag(Tag.SELF_CLOSING_TAG, 'i', pos, ignorePos - pos));
1627 }
1628 }
1629
1630 // Re-add tags that need to be reopened, at current cursor position
1631 reopenTags.forEach(function(startTag)
1632 {
1633 var newTag = addCopyTag(startTag, pos, 0);
1634
1635 // Re-pair the new tag
1636 var endTag = startTag.getEndTag();
1637 if (endTag)
1638 {
1639 newTag.pairWith(endTag);
1640 }
1641 });
1642 }
1643
1644 /**
1645 * Update counters and replace current context with its parent context
1646 */
1647 function popContext()
1648 {
1649 var tag = openTags.pop();
1650 --cntOpen[tag.getName()];
1651 context = context.parentContext;
1652 }
1653
1654 /**
1655 * Update counters and replace current context with a new context based on given tag
1656 *
1657 * If given tag is a self-closing tag, the context won't change
1658 *
1659 * @param {!Tag} tag Start tag (including self-closing)
1660 */
1661 function pushContext(tag)
1662 {
1663 var tagName = tag.getName(),
1664 tagFlags = tag.getFlags(),
1665 tagConfig = tagsConfig[tagName];
1666
1667 ++cntTotal[tagName];
1668
1669 // If this is a self-closing tag, the context remains the same
1670 if (tag.isSelfClosingTag())
1671 {
1672 return;
1673 }
1674
1675 // Recompute the allowed tags
1676 var allowed = [];
1677 context.allowed.forEach(function(v, k)
1678 {
1679 // If the current tag is not transparent, override the low bits (allowed children) of
1680 // current context with its high bits (allowed descendants)
1681 if (!HINT.RULE_IS_TRANSPARENT || !(tagFlags & RULE_IS_TRANSPARENT))
1682 {
1683 v = (v & 0xFF00) | (v >> 8);
1684 }
1685 allowed.push(tagConfig.allowed[k] & v);
1686 });
1687
1688 // Use this tag's flags as a base for this context and add inherited rules
1689 var flags = tagFlags | (context.flags & RULES_INHERITANCE);
1690
1691 // RULE_DISABLE_AUTO_BR turns off RULE_ENABLE_AUTO_BR
1692 if (flags & RULE_DISABLE_AUTO_BR)
1693 {
1694 flags &= ~RULE_ENABLE_AUTO_BR;
1695 }
1696
1697 ++cntOpen[tagName];
1698 openTags.push(tag);
1699 context = { parentContext : context };
1700 context.allowed = allowed;
1701 context.flags = flags;
1702 }
1703
1704 /**
1705 * Return whether given tag is allowed in current context
1706 *
1707 * @param {string} tagName
1708 * @return {boolean}
1709 */
1710 function tagIsAllowed(tagName)
1711 {
1712 var n = tagsConfig[tagName].bitNumber;
1713
1714 return !!(context.allowed[n >> 3] & (1 << (n & 7)));
1715 }
1716
1717 //==========================================================================
1718 // Tag stack
1719 //==========================================================================
1720
1721 /**
1722 * Add a start tag
1723 *
1724 * @param {string} name Name of the tag
1725 * @param {number} pos Position of the tag in the text
1726 * @param {number} len Length of text consumed by the tag
1727 * @param {number=} prio Tags' priority
1728 * @return {!Tag}
1729 */
1730 function addStartTag(name, pos, len, prio)
1731 {
1732 return addTag(Tag.START_TAG, name, pos, len, prio || 0);
1733 }
1734
1735 /**
1736 * Add an end tag
1737 *
1738 * @param {string} name Name of the tag
1739 * @param {number} pos Position of the tag in the text
1740 * @param {number} len Length of text consumed by the tag
1741 * @param {number=} prio Tags' priority
1742 * @return {!Tag}
1743 */
1744 function addEndTag(name, pos, len, prio)
1745 {
1746 return addTag(Tag.END_TAG, name, pos, len, prio || 0);
1747 }
1748
1749 /**
1750 * Add a self-closing tag
1751 *
1752 * @param {string} name Name of the tag
1753 * @param {number} pos Position of the tag in the text
1754 * @param {number} len Length of text consumed by the tag
1755 * @param {number=} prio Tags' priority
1756 * @return {!Tag}
1757 */
1758 function addSelfClosingTag(name, pos, len, prio)
1759 {
1760 return addTag(Tag.SELF_CLOSING_TAG, name, pos, len, prio || 0);
1761 }
1762
1763 /**
1764 * Add a 0-width "br" tag to force a line break at given position
1765 *
1766 * @param {number} pos Position of the tag in the text
1767 * @param {number=} prio Tags' priority
1768 * @return {!Tag}
1769 */
1770 function addBrTag(pos, prio)
1771 {
1772 return addTag(Tag.SELF_CLOSING_TAG, 'br', pos, 0, prio || 0);
1773 }
1774
1775 /**
1776 * Add an "ignore" tag
1777 *
1778 * @param {number} pos Position of the tag in the text
1779 * @param {number} len Length of text consumed by the tag
1780 * @param {number=} prio Tags' priority
1781 * @return {!Tag}
1782 */
1783 function addIgnoreTag(pos, len, prio)
1784 {
1785 return addTag(Tag.SELF_CLOSING_TAG, 'i', pos, Math.min(len, textLen - pos), prio || 0);
1786 }
1787
1788 /**
1789 * Add a paragraph break at given position
1790 *
1791 * Uses a zero-width tag that is actually never output in the result
1792 *
1793 * @param {number} pos Position of the tag in the text
1794 * @param {number=} prio Tags' priority
1795 * @return {!Tag}
1796 */
1797 function addParagraphBreak(pos, prio)
1798 {
1799 return addTag(Tag.SELF_CLOSING_TAG, 'pb', pos, 0, prio || 0);
1800 }
1801
1802 /**
1803 * Add a copy of given tag at given position and length
1804 *
1805 * @param {!Tag} tag Original tag
1806 * @param {number} pos Copy's position
1807 * @param {number} len Copy's length
1808 * @param {number=} prio Tags' priority
1809 * @return {!Tag} Copy tag
1810 */
1811 function addCopyTag(tag, pos, len, prio)
1812 {
1813 var copy = addTag(tag.getType(), tag.getName(), pos, len, tag.getSortPriority());
1814 copy.setAttributes(tag.getAttributes());
1815
1816 return copy;
1817 }
1818
1819 /**
1820 * Add a tag
1821 *
1822 * @param {number} type Tag's type
1823 * @param {string} name Name of the tag
1824 * @param {number} pos Position of the tag in the text
1825 * @param {number} len Length of text consumed by the tag
1826 * @param {number=} prio Tags' priority
1827 * @return {!Tag}
1828 */
1829 function addTag(type, name, pos, len, prio)
1830 {
1831 // Create the tag
1832 var tag = new Tag(type, name, pos, len, prio || 0);
1833
1834 // Set this tag's rules bitfield
1835 if (tagsConfig[name])
1836 {
1837 tag.setFlags(tagsConfig[name].rules.flags);
1838 }
1839
1840 // Invalidate this tag if it's an unknown tag, a disabled tag, if either of its length or
1841 // position is negative or if it's out of bounds
1842 if ((!tagsConfig[name] && !tag.isSystemTag()) || isInvalidTextSpan(pos, len))
1843 {
1844 tag.invalidate();
1845 }
1846 else if (tagsConfig[name] && tagsConfig[name].isDisabled)
1847 {
1848 logger.warn(
1849 'Tag is disabled',
1850 {
1851 'tag' : tag,
1852 'tagName' : name
1853 }
1854 );
1855 tag.invalidate();
1856 }
1857 else
1858 {
1859 insertTag(tag);
1860 }
1861
1862 return tag;
1863 }
1864
1865 /**
1866 * Test whether given text span is outside text boundaries or an invalid UTF sequence
1867 *
1868 * @param {number} pos Start of text
1869 * @param {number} len Length of text
1870 * @return {boolean}
1871 */
1872 function isInvalidTextSpan(pos, len)
1873 {
1874 return (len < 0 || pos < 0 || pos + len > textLen || /[\uDC00-\uDFFF]/.test(text.substring(pos, pos + 1) + text.substring(pos + len, pos + len + 1)));
1875 }
1876
1877 /**
1878 * Insert given tag in the tag stack
1879 *
1880 * @param {!Tag} tag
1881 */
1882 function insertTag(tag)
1883 {
1884 if (!tagStackIsSorted)
1885 {
1886 tagStack.push(tag);
1887 }
1888 else
1889 {
1890 // Scan the stack and copy every tag to the next slot until we find the correct index
1891 var i = tagStack.length,
1892 key = getSortKey(tag);
1893 while (i > 0 && key > getSortKey(tagStack[i - 1]))
1894 {
1895 tagStack[i] = tagStack[i - 1];
1896 --i;
1897 }
1898 tagStack[i] = tag;
1899 }
1900 }
1901
1902 /**
1903 * Add a pair of tags
1904 *
1905 * @param {string} name Name of the tags
1906 * @param {number} startPos Position of the start tag
1907 * @param {number} startLen Length of the start tag
1908 * @param {number} endPos Position of the start tag
1909 * @param {number} endLen Length of the start tag
1910 * @param {number=} prio Start tag's priority (the end tag will be set to minus that value)
1911 * @return {!Tag} Start tag
1912 */
1913 function addTagPair(name, startPos, startLen, endPos, endLen, prio)
1914 {
1915 // NOTE: the end tag is added first to try to keep the stack in the correct order
1916 var endTag = addEndTag(name, endPos, endLen, -prio || 0),
1917 startTag = addStartTag(name, startPos, startLen, prio || 0);
1918 startTag.pairWith(endTag);
1919
1920 return startTag;
1921 }
1922
1923 /**
1924 * Add a tag that represents a verbatim copy of the original text
1925 *
1926 * @param {number} pos Position of the tag in the text
1927 * @param {number} len Length of text consumed by the tag
1928 * @param {number=} prio Tag's priority
1929 * @return {!Tag}
1930 */
1931 function addVerbatim(pos, len, prio)
1932 {
1933 return addTag(Tag.SELF_CLOSING_TAG, 'v', pos, len, prio || 0);
1934 }
1935
1936 /**
1937 * Sort tags by position and precedence
1938 */
1939 function sortTags()
1940 {
1941 var arr = {},
1942 keys = [],
1943 i = tagStack.length;
1944 while (--i >= 0)
1945 {
1946 var tag = tagStack[i],
1947 key = getSortKey(tag, i);
1948 keys.push(key);
1949 arr[key] = tag;
1950 }
1951 keys.sort();
1952
1953 i = keys.length;
1954 tagStack = [];
1955 while (--i >= 0)
1956 {
1957 tagStack.push(arr[keys[i]]);
1958 }
1959
1960 tagStackIsSorted = true;
1961 }
1962
1963 /**
1964 * Generate a key for given tag that can be used to compare its position using lexical comparisons
1965 *
1966 * Tags are sorted by position first, then by priority, then by whether they consume any text,
1967 * then by length, and finally in order of their creation.
1968 *
1969 * The stack's array is in reverse order. Therefore, tags that appear at the start of the text
1970 * are at the end of the array.
1971 *
1972 * @param {!Tag} tag
1973 * @param {number=} tagIndex
1974 * @return {string}
1975 */
1976 function getSortKey(tag, tagIndex)
1977 {
1978 // Ensure that negative values are sorted correctly by flagging them and making them positive
1979 var prioFlag = (tag.getSortPriority() >= 0),
1980 prio = tag.getSortPriority();
1981 if (!prioFlag)
1982 {
1983 prio += (1 << 30);
1984 }
1985
1986 // Sort 0-width tags separately from the rest
1987 var lenFlag = (tag.getLen() > 0),
1988 lenOrder;
1989 if (lenFlag)
1990 {
1991 // Inverse their length so that longest matches are processed first
1992 lenOrder = textLen - tag.getLen();
1993 }
1994 else
1995 {
1996 // Sort self-closing tags in-between start tags and end tags to keep them outside of tag
1997 // pairs
1998 var order = {};
1999 order[Tag.END_TAG] = 0;
2000 order[Tag.SELF_CLOSING_TAG] = 1;
2001 order[Tag.START_TAG] = 2;
2002 lenOrder = order[tag.getType()];
2003 }
2004
2005 return hex32(tag.getPos()) + (+prioFlag) + hex32(prio) + (+lenFlag) + hex32(lenOrder) + hex32(tagIndex || 0);
2006 }
2007
2008 /**
2009 * Format given number to a 32 bit hex value
2010 *
2011 * @param {number} number
2012 * @return {string}
2013 */
2014 function hex32(number)
2015 {
2016 var hex = number.toString(16);
2017
2018 return " ".substring(hex.length) + hex;
2019 }