Verzeichnisstruktur phpBB-3.2.0
- Veröffentlicht
- 06.01.2017
So funktioniert es
|
Auf das letzte Element klicken. Dies geht jeweils ein Schritt zurück |
Auf das Icon klicken, dies öffnet das Verzeichnis. Nochmal klicken schließt das Verzeichnis. |
|
(Beispiel Datei-Icons)
|
Auf das Icon klicken um den Quellcode anzuzeigen |
Parser.js
0001 /**#@+
0002 * Boolean rules bitfield
0003 */
0004 /** @const */ var RULE_AUTO_CLOSE = 1 << 0;
0005 /** @const */ var RULE_AUTO_REOPEN = 1 << 1;
0006 /** @const */ var RULE_BREAK_PARAGRAPH = 1 << 2;
0007 /** @const */ var RULE_CREATE_PARAGRAPHS = 1 << 3;
0008 /** @const */ var RULE_DISABLE_AUTO_BR = 1 << 4;
0009 /** @const */ var RULE_ENABLE_AUTO_BR = 1 << 5;
0010 /** @const */ var RULE_IGNORE_TAGS = 1 << 6;
0011 /** @const */ var RULE_IGNORE_TEXT = 1 << 7;
0012 /** @const */ var RULE_IGNORE_WHITESPACE = 1 << 8;
0013 /** @const */ var RULE_IS_TRANSPARENT = 1 << 9;
0014 /** @const */ var RULE_PREVENT_BR = 1 << 10;
0015 /** @const */ var RULE_SUSPEND_AUTO_BR = 1 << 11;
0016 /** @const */ var RULE_TRIM_FIRST_LINE = 1 << 12;
0017 /**#@-*/
0018
0019 /**
0020 * @const Bitwise disjunction of rules related to automatic line breaks
0021 */
0022 var RULES_AUTO_LINEBREAKS = RULE_DISABLE_AUTO_BR | RULE_ENABLE_AUTO_BR | RULE_SUSPEND_AUTO_BR;
0023
0024 /**
0025 * @const Bitwise disjunction of rules that are inherited by subcontexts
0026 */
0027 var RULES_INHERITANCE = RULE_ENABLE_AUTO_BR;
0028
0029 /**
0030 * @const All the characters that are considered whitespace
0031 */
0032 var WHITESPACE = " \n\t";
0033
0034 /**
0035 * @type {!Object.<string,!number>} Number of open tags for each tag name
0036 */
0037 var cntOpen;
0038
0039 /**
0040 * @type {!Object.<string,!number>} Number of times each tag has been used
0041 */
0042 var cntTotal;
0043
0044 /**
0045 * @type {!Object} Current context
0046 */
0047 var context;
0048
0049 /**
0050 * @type {!number} How hard the parser has worked on fixing bad markup so far
0051 */
0052 var currentFixingCost;
0053
0054 /**
0055 * @type {Tag} Current tag being processed
0056 */
0057 var currentTag;
0058
0059 /**
0060 * @type {!boolean} Whether the output contains "rich" tags, IOW any tag that is not <p> or <br/>
0061 */
0062 var isRich;
0063
0064 /**
0065 * @type {!Logger} This parser's logger
0066 */
0067 var logger = new Logger;
0068
0069 /**
0070 * @type {!number} How hard the parser should work on fixing bad markup
0071 */
0072 var maxFixingCost = 1000;
0073
0074 /**
0075 * @type {!Object} Associative array of namespace prefixes in use in document (prefixes used as key)
0076 */
0077 var namespaces;
0078
0079 /**
0080 * @type {!Array.<!Tag>} Stack of open tags (instances of Tag)
0081 */
0082 var openTags;
0083
0084 /**
0085 * @type {!string} This parser's output
0086 */
0087 var output;
0088
0089 /**
0090 * @type {!Object.<!Object>}
0091 */
0092 var plugins;
0093
0094 /**
0095 * @type {!number} Position of the cursor in the original text
0096 */
0097 var pos;
0098
0099 /**
0100 * @type {!Object} Variables registered for use in filters
0101 */
0102 var registeredVars;
0103
0104 /**
0105 * @type {!Object} Root context, used at the root of the document
0106 */
0107 var rootContext;
0108
0109 /**
0110 * @type {!Object} Tags' config
0111 * @const
0112 */
0113 var tagsConfig;
0114
0115 /**
0116 * @type {!Array.<!Tag>} Tag storage
0117 */
0118 var tagStack;
0119
0120 /**
0121 * @type {!boolean} Whether the tags in the stack are sorted
0122 */
0123 var tagStackIsSorted;
0124
0125 /**
0126 * @type {!string} Text being parsed
0127 */
0128 var text;
0129
0130 /**
0131 * @type {!number} Length of the text being parsed
0132 */
0133 var textLen;
0134
0135 /**
0136 * @type {!number} Counter incremented everytime the parser is reset. Used to as a canary to detect
0137 * whether the parser was reset during execution
0138 */
0139 var uid = 0;
0140
0141 /**
0142 * @type {!number} Position before which we output text verbatim, without paragraphs or linebreaks
0143 */
0144 var wsPos;
0145
0146 //==========================================================================
0147 // Public API
0148 //==========================================================================
0149
0150 /**
0151 * Disable a tag
0152 *
0153 * @param {!string} tagName Name of the tag
0154 */
0155 function disableTag(tagName)
0156 {
0157 if (tagsConfig[tagName])
0158 {
0159 copyTagConfig(tagName).isDisabled = true;
0160 }
0161 }
0162
0163 /**
0164 * Enable a tag
0165 *
0166 * @param {!string} tagName Name of the tag
0167 */
0168 function enableTag(tagName)
0169 {
0170 if (tagsConfig[tagName])
0171 {
0172 copyTagConfig(tagName).isDisabled = false;
0173 }
0174 }
0175
0176 /**
0177 * Get this parser's Logger instance
0178 *
0179 * @return {!Logger}
0180 */
0181 function getLogger()
0182 {
0183 return logger;
0184 }
0185
0186 /**
0187 * Parse a text
0188 *
0189 * @param {!string} _text Text to parse
0190 * @return {!string} XML representation
0191 */
0192 function parse(_text)
0193 {
0194 // Reset the parser and save the uid
0195 reset(_text);
0196 var _uid = uid;
0197
0198 // Do the heavy lifting
0199 executePluginParsers();
0200 processTags();
0201
0202 // Finalize the document
0203 finalizeOutput();
0204
0205 // Check the uid in case a plugin or a filter reset the parser mid-execution
0206 if (uid !== _uid)
0207 {
0208 throw 'The parser has been reset during execution';
0209 }
0210
0211 // Log a warning if the fixing cost limit was exceeded
0212 if (currentFixingCost > maxFixingCost)
0213 {
0214 logger.warn('Fixing cost limit exceeded');
0215 }
0216
0217 return output;
0218 }
0219
0220 /**
0221 * Reset the parser for a new parsing
0222 *
0223 * @param {!string} _text Text to be parsed
0224 */
0225 function reset(_text)
0226 {
0227 // Normalize CR/CRLF to LF, remove control characters that aren't allowed in XML
0228 _text = _text.replace(/\r\n?/g, "\n");
0229 _text = _text.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]+/g, '');
0230
0231 // Clear the logs
0232 logger.clear();
0233
0234 // Initialize the rest
0235 cntOpen = {};
0236 cntTotal = {};
0237 currentFixingCost = 0;
0238 currentTag = null;
0239 isRich = false;
0240 namespaces = {};
0241 openTags = [];
0242 output = '';
0243 pos = 0;
0244 tagStack = [];
0245 tagStackIsSorted = false;
0246 text = _text;
0247 textLen = text.length;
0248 wsPos = 0;
0249
0250 // Initialize the root context
0251 context = rootContext;
0252 context.inParagraph = false;
0253
0254 // Bump the UID
0255 ++uid;
0256 }
0257
0258 /**
0259 * Change a tag's tagLimit
0260 *
0261 * NOTE: the default tagLimit should generally be set during configuration instead
0262 *
0263 * @param {!string} tagName The tag's name, in UPPERCASE
0264 * @param {!number} tagLimit
0265 */
0266 function setTagLimit(tagName, tagLimit)
0267 {
0268 if (tagsConfig[tagName])
0269 {
0270 copyTagConfig(tagName).tagLimit = tagLimit;
0271 }
0272 }
0273
0274 /**
0275 * Change a tag's nestingLimit
0276 *
0277 * NOTE: the default nestingLimit should generally be set during configuration instead
0278 *
0279 * @param {!string} tagName The tag's name, in UPPERCASE
0280 * @param {!number} nestingLimit
0281 */
0282 function setNestingLimit(tagName, nestingLimit)
0283 {
0284 if (tagsConfig[tagName])
0285 {
0286 copyTagConfig(tagName).nestingLimit = nestingLimit;
0287 }
0288 }
0289
0290 /**
0291 * Copy a tag's config
0292 *
0293 * This method ensures that the tag's config is its own object and not shared with another
0294 * identical tag
0295 *
0296 * @param {!string} tagName Tag's name
0297 * @return {!Object} Tag's config
0298 */
0299 function copyTagConfig(tagName)
0300 {
0301 var tagConfig = {}, k;
0302 for (k in tagsConfig[tagName])
0303 {
0304 tagConfig[k] = tagsConfig[tagName][k];
0305 }
0306
0307 return tagsConfig[tagName] = tagConfig;
0308 }
0309
0310 //==========================================================================
0311 // Filter processing
0312 //==========================================================================
0313
0314 /**
0315 * Execute all the attribute preprocessors of given tag
0316 *
0317 * @private
0318 *
0319 * @param {!Tag} tag Source tag
0320 * @param {!Object} tagConfig Tag's config
0321 * @return {!boolean} Unconditionally TRUE
0322 */
0323 function executeAttributePreprocessors(tag, tagConfig)
0324 {
0325 if (tagConfig.attributePreprocessors)
0326 {
0327 tagConfig.attributePreprocessors.forEach(function(attributePreprocessor)
0328 {
0329 var attrName = attributePreprocessor[0],
0330 regexp = attributePreprocessor[1],
0331 map = attributePreprocessor[2];
0332
0333 if (!tag.hasAttribute(attrName))
0334 {
0335 return;
0336 }
0337
0338 executeAttributePreprocessor(tag, attrName, regexp, map);
0339 });
0340 }
0341
0342 return true;
0343 }
0344
0345 /**
0346 * Execute an attribute preprocessor
0347 *
0348 * @param {!Tag} tag
0349 * @param {!string} attrName
0350 * @param {!string} regexp
0351 * @param {!Array<!string>} map
0352 */
0353 function executeAttributePreprocessor(tag, attrName, regexp, map)
0354 {
0355 var attrValue = tag.getAttribute(attrName),
0356 captures = getNamedCaptures(attrValue, regexp, map),
0357 k;
0358
0359 for (k in captures)
0360 {
0361 // Attribute preprocessors cannot overwrite other attributes but they can
0362 // overwrite themselves
0363 if (k === attrName || !tag.hasAttribute(k))
0364 {
0365 tag.setAttribute(k, captures[k]);
0366 }
0367 }
0368 }
0369
0370 /**
0371 * Execute a regexp and return the values of the mapped captures
0372 *
0373 * @param {!string} attrValue
0374 * @param {!string} regexp
0375 * @param {!Array<!string>} map
0376 * @return {!Object<!string,!string>}
0377 */
0378 function getNamedCaptures(attrValue, regexp, map)
0379 {
0380 var m = regexp.exec(attrValue);
0381 if (!m)
0382 {
0383 return [];
0384 }
0385
0386 var values = {};
0387 map.forEach(function(k, i)
0388 {
0389 if (typeof m[i] === 'string' && m[i] !== '')
0390 {
0391 values[k] = m[i];
0392 }
0393 });
0394
0395 return values;
0396 }
0397
0398 /**
0399 * Filter the attributes of given tag
0400 *
0401 * @private
0402 *
0403 * @param {!Tag} tag Tag being checked
0404 * @param {!Object} tagConfig Tag's config
0405 * @param {!Object} registeredVars Vars registered for use in attribute filters
0406 * @param {!Logger} logger This parser's Logger instance
0407 * @return {!boolean} Whether the whole attribute set is valid
0408 */
0409 function filterAttributes(tag, tagConfig, registeredVars, logger)
0410 {
0411 if (!tagConfig.attributes)
0412 {
0413 tag.setAttributes({});
0414
0415 return true;
0416 }
0417
0418 var attrName, attrConfig;
0419
0420 // Generate values for attributes with a generator set
0421 if (HINT.attributeGenerator)
0422 {
0423 for (attrName in tagConfig.attributes)
0424 {
0425 attrConfig = tagConfig.attributes[attrName];
0426
0427 if (attrConfig.generator)
0428 {
0429 tag.setAttribute(attrName, attrConfig.generator(attrName));
0430 }
0431 }
0432 }
0433
0434 // Filter and remove invalid attributes
0435 var attributes = tag.getAttributes();
0436 for (attrName in attributes)
0437 {
0438 var attrValue = attributes[attrName];
0439
0440 // Test whether this attribute exists and remove it if it doesn't
0441 if (!tagConfig.attributes[attrName])
0442 {
0443 tag.removeAttribute(attrName);
0444 continue;
0445 }
0446
0447 attrConfig = tagConfig.attributes[attrName];
0448
0449 // Test whether this attribute has a filterChain
0450 if (!attrConfig.filterChain)
0451 {
0452 continue;
0453 }
0454
0455 // Record the name of the attribute being filtered into the logger
0456 logger.setAttribute(attrName);
0457
0458 for (var i = 0; i < attrConfig.filterChain.length; ++i)
0459 {
0460 // NOTE: attrValue is intentionally set as the first argument to facilitate inlining
0461 attrValue = attrConfig.filterChain[i](attrValue, attrName);
0462
0463 if (attrValue === false)
0464 {
0465 tag.removeAttribute(attrName);
0466 break;
0467 }
0468 }
0469
0470 // Update the attribute value if it's valid
0471 if (attrValue !== false)
0472 {
0473 tag.setAttribute(attrName, attrValue);
0474 }
0475
0476 // Remove the attribute's name from the logger
0477 logger.unsetAttribute();
0478 }
0479
0480 // Iterate over the attribute definitions to handle missing attributes
0481 for (attrName in tagConfig.attributes)
0482 {
0483 attrConfig = tagConfig.attributes[attrName];
0484
0485 // Test whether this attribute is missing
0486 if (!tag.hasAttribute(attrName))
0487 {
0488 if (HINT.attributeDefaultValue && attrConfig.defaultValue !== undefined)
0489 {
0490 // Use the attribute's default value
0491 tag.setAttribute(attrName, attrConfig.defaultValue);
0492 }
0493 else if (attrConfig.required)
0494 {
0495 // This attribute is missing, has no default value and is required, which means
0496 // the attribute set is invalid
0497 return false;
0498 }
0499 }
0500 }
0501
0502 return true;
0503 }
0504
0505 /**
0506 * Execute given tag's filterChain
0507 *
0508 * @param {!Tag} tag Tag to filter
0509 * @return {!boolean} Whether the tag is valid
0510 */
0511 function filterTag(tag)
0512 {
0513 var tagName = tag.getName(),
0514 tagConfig = tagsConfig[tagName],
0515 isValid = true;
0516
0517 if (tagConfig.filterChain)
0518 {
0519 // Record the tag being processed into the logger it can be added to the context of
0520 // messages logged during the execution
0521 logger.setTag(tag);
0522
0523 for (var i = 0; i < tagConfig.filterChain.length; ++i)
0524 {
0525 if (!tagConfig.filterChain[i](tag, tagConfig))
0526 {
0527 isValid = false;
0528 break;
0529 }
0530 }
0531
0532 // Remove the tag from the logger
0533 logger.unsetTag();
0534 }
0535
0536 return isValid;
0537 }
0538
0539 //==========================================================================
0540 // Output handling
0541 //==========================================================================
0542
0543 /**
0544 * Replace Unicode characters outside the BMP with XML entities in the output
0545 */
0546 function encodeUnicodeSupplementaryCharacters()
0547 {
0548 output = output.replace(
0549 /[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
0550 encodeUnicodeSupplementaryCharactersCallback
0551 );
0552 }
0553
0554 /**
0555 * Encode given surrogate pair into an XML entity
0556 *
0557 * @param {!string} pair Surrogate pair
0558 * @return {!string} XML entity
0559 */
0560 function encodeUnicodeSupplementaryCharactersCallback(pair)
0561 {
0562 var cp = (pair.charCodeAt(0) << 10) + pair.charCodeAt(1) - 56613888;
0563
0564 return '&#' + cp + ';';
0565 }
0566
0567 /**
0568 * Finalize the output by appending the rest of the unprocessed text and create the root node
0569 */
0570 function finalizeOutput()
0571 {
0572 var tmp;
0573
0574 // Output the rest of the text and close the last paragraph
0575 outputText(textLen, 0, true);
0576
0577 // Remove empty tag pairs, e.g. <I><U></U></I> as well as empty paragraphs
0578 do
0579 {
0580 tmp = output;
0581 output = output.replace(/<([^ />]+)><\/\1>/g, '');
0582 }
0583 while (output !== tmp);
0584
0585 // Merge consecutive <i> tags
0586 output = output.replace(/<\/i><i>/g, '', output);
0587
0588 // Encode Unicode characters that are outside of the BMP
0589 encodeUnicodeSupplementaryCharacters();
0590
0591 // Use a <r> root if the text is rich, or <t> for plain text (including <p></p> and <br/>)
0592 var tagName = (isRich) ? 'r' : 't';
0593
0594 // Prepare the root node with all the namespace declarations
0595 tmp = '<' + tagName;
0596 if (HINT.namespaces)
0597 {
0598 for (var prefix in namespaces)
0599 {
0600 tmp += ' xmlns:' + prefix + '="urn:s9e:TextFormatter:' + prefix + '"';
0601 }
0602 }
0603
0604 output = tmp + '>' + output + '</' + tagName + '>';
0605 }
0606
0607 /**
0608 * Append a tag to the output
0609 *
0610 * @param {!Tag} tag Tag to append
0611 */
0612 function outputTag(tag)
0613 {
0614 isRich = true;
0615
0616 var tagName = tag.getName(),
0617 tagPos = tag.getPos(),
0618 tagLen = tag.getLen(),
0619 tagFlags = tag.getFlags(),
0620 skipBefore = 0,
0621 skipAfter = 0;
0622
0623 if (HINT.RULE_IGNORE_WHITESPACE && (tagFlags & RULE_IGNORE_WHITESPACE))
0624 {
0625 skipBefore = 1;
0626 skipAfter = (tag.isEndTag()) ? 2 : 1;
0627 }
0628
0629 // Current paragraph must end before the tag if:
0630 // - the tag is a start (or self-closing) tag and it breaks paragraphs, or
0631 // - the tag is an end tag (but not self-closing)
0632 var closeParagraph = false;
0633 if (tag.isStartTag())
0634 {
0635 if (HINT.RULE_BREAK_PARAGRAPH && (tagFlags & RULE_BREAK_PARAGRAPH))
0636 {
0637 closeParagraph = true;
0638 }
0639 }
0640 else
0641 {
0642 closeParagraph = true;
0643 }
0644
0645 // Let the cursor catch up with this tag's position
0646 outputText(tagPos, skipBefore, closeParagraph);
0647
0648 // Capture the text consumed by the tag
0649 var tagText = (tagLen)
0650 ? htmlspecialchars_noquotes(text.substr(tagPos, tagLen))
0651 : '';
0652
0653 // Output current tag
0654 if (tag.isStartTag())
0655 {
0656 // Handle paragraphs before opening the tag
0657 if (!HINT.RULE_BREAK_PARAGRAPH || !(tagFlags & RULE_BREAK_PARAGRAPH))
0658 {
0659 outputParagraphStart(tagPos);
0660 }
0661
0662 // Record this tag's namespace, if applicable
0663 if (HINT.namespaces)
0664 {
0665 var colonPos = tagName.indexOf(':');
0666 if (colonPos > 0)
0667 {
0668 namespaces[tagName.substr(0, colonPos)] = 0;
0669 }
0670 }
0671
0672 // Open the start tag and add its attributes, but don't close the tag
0673 output += '<' + tagName;
0674
0675 // We output the attributes in lexical order. Helps canonicalizing the output and could
0676 // prove useful someday
0677 var attributes = tag.getAttributes(),
0678 attributeNames = [];
0679 for (var attrName in attributes)
0680 {
0681 attributeNames.push(attrName);
0682 }
0683 attributeNames.sort(
0684 function(a, b)
0685 {
0686 return (a > b) ? 1 : -1;
0687 }
0688 );
0689 attributeNames.forEach(
0690 function(attrName)
0691 {
0692 output += ' ' + attrName + '="' + htmlspecialchars_compat(attributes[attrName].toString()).replace(/\n/g, ' ') + '"';
0693 }
0694 );
0695
0696 if (tag.isSelfClosingTag())
0697 {
0698 if (tagLen)
0699 {
0700 output += '>' + tagText + '</' + tagName + '>';
0701 }
0702 else
0703 {
0704 output += '/>';
0705 }
0706 }
0707 else if (tagLen)
0708 {
0709 output += '><s>' + tagText + '</s>';
0710 }
0711 else
0712 {
0713 output += '>';
0714 }
0715 }
0716 else
0717 {
0718 if (tagLen)
0719 {
0720 output += '<e>' + tagText + '</e>';
0721 }
0722
0723 output += '</' + tagName + '>';
0724 }
0725
0726 // Move the cursor past the tag
0727 pos = tagPos + tagLen;
0728
0729 // Skip newlines (no other whitespace) after this tag
0730 wsPos = pos;
0731 while (skipAfter && wsPos < textLen && text.charAt(wsPos) === "\n")
0732 {
0733 // Decrement the number of lines to skip
0734 --skipAfter;
0735
0736 // Move the cursor past the newline
0737 ++wsPos;
0738 }
0739 }
0740
0741 /**
0742 * Output the text between the cursor's position (included) and given position (not included)
0743 *
0744 * @param {!number} catchupPos Position we're catching up to
0745 * @param {!number} maxLines Maximum number of lines to ignore at the end of the text
0746 * @param {!boolean} closeParagraph Whether to close the paragraph at the end, if applicable
0747 */
0748 function outputText(catchupPos, maxLines, closeParagraph)
0749 {
0750 if (closeParagraph)
0751 {
0752 if (!(context.flags & RULE_CREATE_PARAGRAPHS))
0753 {
0754 closeParagraph = false;
0755 }
0756 else
0757 {
0758 // Ignore any number of lines at the end if we're closing a paragraph
0759 maxLines = -1;
0760 }
0761 }
0762
0763 if (pos >= catchupPos)
0764 {
0765 // We're already there, close the paragraph if applicable and return
0766 if (closeParagraph)
0767 {
0768 outputParagraphEnd();
0769 }
0770 }
0771
0772 // Skip over previously identified whitespace if applicable
0773 if (wsPos > pos)
0774 {
0775 var skipPos = Math.min(catchupPos, wsPos);
0776 output += text.substr(pos, skipPos - pos);
0777 pos = skipPos;
0778
0779 if (pos >= catchupPos)
0780 {
0781 // Skipped everything. Close the paragraph if applicable and return
0782 if (closeParagraph)
0783 {
0784 outputParagraphEnd();
0785 }
0786 }
0787 }
0788
0789 var catchupLen, catchupText;
0790
0791 // Test whether we're even supposed to output anything
0792 if (HINT.RULE_IGNORE_TEXT && context.flags & RULE_IGNORE_TEXT)
0793 {
0794 catchupLen = catchupPos - pos,
0795 catchupText = text.substr(pos, catchupLen);
0796
0797 // If the catchup text is not entirely composed of whitespace, we put it inside ignore tags
0798 if (!/^[ \n\t]*$/.test(catchupText))
0799 {
0800 catchupText = '<i>' + catchupText + '</i>';
0801 }
0802
0803 output += catchupText;
0804 pos = catchupPos;
0805
0806 if (closeParagraph)
0807 {
0808 outputParagraphEnd();
0809 }
0810
0811 return;
0812 }
0813
0814 // Compute the amount of text to ignore at the end of the output
0815 var ignorePos = catchupPos,
0816 ignoreLen = 0;
0817
0818 // Ignore as many lines (including whitespace) as specified
0819 while (maxLines && --ignorePos >= pos)
0820 {
0821 var c = text.charAt(ignorePos);
0822 if (c !== ' ' && c !== "\n" && c !== "\t")
0823 {
0824 break;
0825 }
0826
0827 if (c === "\n")
0828 {
0829 --maxLines;
0830 }
0831
0832 ++ignoreLen;
0833 }
0834
0835 // Adjust catchupPos to ignore the text at the end
0836 catchupPos -= ignoreLen;
0837
0838 // Break down the text in paragraphs if applicable
0839 if (HINT.RULE_CREATE_PARAGRAPHS && context.flags & RULE_CREATE_PARAGRAPHS)
0840 {
0841 if (!context.inParagraph)
0842 {
0843 outputWhitespace(catchupPos);
0844
0845 if (catchupPos > pos)
0846 {
0847 outputParagraphStart(catchupPos);
0848 }
0849 }
0850
0851 // Look for a paragraph break in this text
0852 var pbPos = text.indexOf("\n\n", pos);
0853
0854 while (pbPos > -1 && pbPos < catchupPos)
0855 {
0856 outputText(pbPos, 0, true);
0857 outputParagraphStart(catchupPos);
0858
0859 pbPos = text.indexOf("\n\n", pos);
0860 }
0861 }
0862
0863 // Capture, escape and output the text
0864 if (catchupPos > pos)
0865 {
0866 catchupText = htmlspecialchars_noquotes(
0867 text.substr(pos, catchupPos - pos)
0868 );
0869
0870 // Format line breaks if applicable
0871 if (HINT.RULE_ENABLE_AUTO_BR && (context.flags & RULES_AUTO_LINEBREAKS) === RULE_ENABLE_AUTO_BR)
0872 {
0873 catchupText = catchupText.replace(/\n/g, "<br/>\n");
0874 }
0875
0876 output += catchupText;
0877 }
0878
0879 // Close the paragraph if applicable
0880 if (closeParagraph)
0881 {
0882 outputParagraphEnd();
0883 }
0884
0885 // Add the ignored text if applicable
0886 if (ignoreLen)
0887 {
0888 output += text.substr(catchupPos, ignoreLen);
0889 }
0890
0891 // Move the cursor past the text
0892 pos = catchupPos + ignoreLen;
0893 }
0894
0895 /**
0896 * Output a linebreak tag
0897 *
0898 * @param {!Tag} tag
0899 * @return void
0900 */
0901 function outputBrTag(tag)
0902 {
0903 outputText(tag.getPos(), 0, false);
0904 output += '<br/>';
0905 }
0906
0907 /**
0908 * Output an ignore tag
0909 *
0910 * @param {!Tag} tag
0911 * @return void
0912 */
0913 function outputIgnoreTag(tag)
0914 {
0915 var tagPos = tag.getPos(),
0916 tagLen = tag.getLen();
0917
0918 // Capture the text to ignore
0919 var ignoreText = text.substr(tagPos, tagLen);
0920
0921 // Catch up with the tag's position then output the tag
0922 outputText(tagPos, 0, false);
0923 output += '<i>' + htmlspecialchars_noquotes(ignoreText) + '</i>';
0924 isRich = true;
0925
0926 // Move the cursor past this tag
0927 pos = tagPos + tagLen;
0928 }
0929
0930 /**
0931 * Start a paragraph between current position and given position, if applicable
0932 *
0933 * @param {!number} maxPos Rightmost position at which the paragraph can be opened
0934 */
0935 function outputParagraphStart(maxPos)
0936 {
0937 if (!HINT.RULE_CREATE_PARAGRAPHS)
0938 {
0939 return;
0940 }
0941
0942 // Do nothing if we're already in a paragraph, or if we don't use paragraphs
0943 if (context.inParagraph
0944 || !(context.flags & RULE_CREATE_PARAGRAPHS))
0945 {
0946 return;
0947 }
0948
0949 // Output the whitespace between pos and maxPos if applicable
0950 outputWhitespace(maxPos);
0951
0952 // Open the paragraph, but only if it's not at the very end of the text
0953 if (pos < textLen)
0954 {
0955 output += '<p>';
0956 context.inParagraph = true;
0957 }
0958 }
0959
0960 /**
0961 * Close current paragraph at current position if applicable
0962 */
0963 function outputParagraphEnd()
0964 {
0965 // Do nothing if we're not in a paragraph
0966 if (!context.inParagraph)
0967 {
0968 return;
0969 }
0970
0971 output += '</p>';
0972 context.inParagraph = false;
0973 }
0974
0975 /**
0976 * Output the content of a verbatim tag
0977 *
0978 * @param {!Tag} tag
0979 */
0980 function outputVerbatim(tag)
0981 {
0982 var flags = context.flags;
0983 context.flags = tag.getFlags();
0984 outputText(currentTag.getPos() + currentTag.getLen(), 0, false);
0985 context.flags = flags;
0986 }
0987
0988 /**
0989 * Skip as much whitespace after current position as possible
0990 *
0991 * @param {!number} maxPos Rightmost character to be skipped
0992 */
0993 function outputWhitespace(maxPos)
0994 {
0995 while (pos < maxPos && " \n\t".indexOf(text.charAt(pos)) > -1)
0996 {
0997 output += text.charAt(pos);
0998 ++pos;
0999 }
1000 }
1001
1002 //==========================================================================
1003 // Plugins handling
1004 //==========================================================================
1005
1006 /**
1007 * Disable a plugin
1008 *
1009 * @param {!string} pluginName Name of the plugin
1010 */
1011 function disablePlugin(pluginName)
1012 {
1013 if (plugins[pluginName])
1014 {
1015 plugins[pluginName].isDisabled = true;
1016 }
1017 }
1018
1019 /**
1020 * Enable a plugin
1021 *
1022 * @param {!string} pluginName Name of the plugin
1023 */
1024 function enablePlugin(pluginName)
1025 {
1026 if (plugins[pluginName])
1027 {
1028 plugins[pluginName].isDisabled = false;
1029 }
1030 }
1031
1032 /**
1033 * Execute given plugin
1034 *
1035 * @param {!string} pluginName Plugin's name
1036 */
1037 function executePluginParser(pluginName)
1038 {
1039 var pluginConfig = plugins[pluginName];
1040 if (pluginConfig.quickMatch && text.indexOf(pluginConfig.quickMatch) < 0)
1041 {
1042 return;
1043 }
1044
1045 var matches = [];
1046 if (pluginConfig.regexp)
1047 {
1048 matches = getMatches(pluginConfig.regexp, pluginConfig.regexpLimit);
1049 if (!matches.length)
1050 {
1051 return;
1052 }
1053 }
1054
1055 // Execute the plugin's parser, which will add tags via addStartTag() and others
1056 getPluginParser(pluginName)(text, matches);
1057 }
1058
1059 /**
1060 * Execute all the plugins
1061 */
1062 function executePluginParsers()
1063 {
1064 for (var pluginName in plugins)
1065 {
1066 if (!plugins[pluginName].isDisabled)
1067 {
1068 executePluginParser(pluginName);
1069 }
1070 }
1071 }
1072
1073 /**
1074 * Get regexp matches in a manner similar to preg_match_all() with PREG_SET_ORDER | PREG_OFFSET_CAPTURE
1075 *
1076 * @param {!RegExp} regexp
1077 * @param {!number} limit
1078 * @return {!Array.<!Array>}
1079 */
1080 function getMatches(regexp, limit)
1081 {
1082 // Reset the regexp
1083 regexp.lastIndex = 0;
1084 var matches = [], cnt = 0, m;
1085 while (++cnt <= limit && (m = regexp.exec(text)))
1086 {
1087 // NOTE: coercing m.index to a number because Closure Compiler thinks pos is a string otherwise
1088 var pos = +m['index'],
1089 match = [[m[0], pos]],
1090 i = 0;
1091 while (++i < m.length)
1092 {
1093 var str = m[i];
1094
1095 // Sub-expressions that were not evaluated return undefined
1096 if (str === undefined)
1097 {
1098 match.push(['', -1]);
1099 }
1100 else
1101 {
1102 match.push([str, text.indexOf(str, pos)]);
1103 pos += str.length;
1104 }
1105 }
1106
1107 matches.push(match);
1108 }
1109
1110 return matches;
1111 }
1112
1113 /**
1114 * Get the callback for given plugin's parser
1115 *
1116 * @param {!string} pluginName
1117 * @return {!function(string, Array)}
1118 */
1119 function getPluginParser(pluginName)
1120 {
1121 return plugins[pluginName].parser;
1122 }
1123
1124 /**
1125 * Register a parser
1126 *
1127 * Can be used to add a new parser with no plugin config, or pre-generate a parser for an
1128 * existing plugin
1129 *
1130 * @param {!string} pluginName
1131 * @param {!Function} parser
1132 * @param {RegExp} regexp
1133 * @param {number} limit
1134 */
1135 function registerParser(pluginName, parser, regexp, limit)
1136 {
1137 // Create an empty config for this plugin to ensure it is executed
1138 if (!plugins[pluginName])
1139 {
1140 plugins[pluginName] = {};
1141 }
1142 if (regexp)
1143 {
1144 plugins[pluginName].regexp = regexp;
1145 plugins[pluginName].limit = limit || Infinity;
1146 }
1147 plugins[pluginName].parser = parser;
1148 }
1149
1150 //==========================================================================
1151 // Rules handling
1152 //==========================================================================
1153
1154 /**
1155 * Apply closeAncestor rules associated with given tag
1156 *
1157 * @param {!Tag} tag Tag
1158 * @return {!boolean} Whether a new tag has been added
1159 */
1160 function closeAncestor(tag)
1161 {
1162 if (!HINT.closeAncestor)
1163 {
1164 return false;
1165 }
1166
1167 if (openTags.length)
1168 {
1169 var tagName = tag.getName(),
1170 tagConfig = tagsConfig[tagName];
1171
1172 if (tagConfig.rules.closeAncestor)
1173 {
1174 var i = openTags.length;
1175
1176 while (--i >= 0)
1177 {
1178 var ancestor = openTags[i],
1179 ancestorName = ancestor.getName();
1180
1181 if (tagConfig.rules.closeAncestor[ancestorName])
1182 {
1183 // We have to close this ancestor. First we reinsert this tag...
1184 tagStack.push(tag);
1185
1186 // ...then we add a new end tag for it
1187 addMagicEndTag(ancestor, tag.getPos());
1188
1189 return true;
1190 }
1191 }
1192 }
1193 }
1194
1195 return false;
1196 }
1197
1198 /**
1199 * Apply closeParent rules associated with given tag
1200 *
1201 * @param {!Tag} tag Tag
1202 * @return {!boolean} Whether a new tag has been added
1203 */
1204 function closeParent(tag)
1205 {
1206 if (!HINT.closeParent)
1207 {
1208 return false;
1209 }
1210
1211 if (openTags.length)
1212 {
1213 var tagName = tag.getName(),
1214 tagConfig = tagsConfig[tagName];
1215
1216 if (tagConfig.rules.closeParent)
1217 {
1218 var parent = openTags[openTags.length - 1],
1219 parentName = parent.getName();
1220
1221 if (tagConfig.rules.closeParent[parentName])
1222 {
1223 // We have to close that parent. First we reinsert the tag...
1224 tagStack.push(tag);
1225
1226 // ...then we add a new end tag for it
1227 addMagicEndTag(parent, tag.getPos());
1228
1229 return true;
1230 }
1231 }
1232 }
1233
1234 return false;
1235 }
1236
1237 /**
1238 * Apply the createChild rules associated with given tag
1239 *
1240 * @param {!Tag} tag Tag
1241 */
1242 function createChild(tag)
1243 {
1244 if (!HINT.createChild)
1245 {
1246 return;
1247 }
1248
1249 var tagConfig = tagsConfig[tag.getName()];
1250 if (tagConfig.rules.createChild)
1251 {
1252 var priority = -1000,
1253 _text = text.substr(pos),
1254 tagPos = pos + _text.length - _text.replace(/^[ \n\r\t]+/, '').length;
1255 tagConfig.rules.createChild.forEach(function(tagName)
1256 {
1257 addStartTag(tagName, tagPos, 0, ++priority);
1258 });
1259 }
1260 }
1261
1262 /**
1263 * Apply fosterParent rules associated with given tag
1264 *
1265 * NOTE: this rule has the potential for creating an unbounded loop, either if a tag tries to
1266 * foster itself or two or more tags try to foster each other in a loop. We mitigate the
1267 * risk by preventing a tag from creating a child of itself (the parent still gets closed)
1268 * and by checking and increasing the currentFixingCost so that a loop of multiple tags
1269 * do not run indefinitely. The default tagLimit and nestingLimit also serve to prevent the
1270 * loop from running indefinitely
1271 *
1272 * @param {!Tag} tag Tag
1273 * @return {!boolean} Whether a new tag has been added
1274 */
1275 function fosterParent(tag)
1276 {
1277 if (!HINT.fosterParent)
1278 {
1279 return false;
1280 }
1281
1282 if (openTags.length)
1283 {
1284 var tagName = tag.getName(),
1285 tagConfig = tagsConfig[tagName];
1286
1287 if (tagConfig.rules.fosterParent)
1288 {
1289 var parent = openTags[openTags.length - 1],
1290 parentName = parent.getName();
1291
1292 if (tagConfig.rules.fosterParent[parentName])
1293 {
1294 if (parentName !== tagName && currentFixingCost < maxFixingCost)
1295 {
1296 // Add a 0-width copy of the parent tag right after this tag, with a worse
1297 // priority and make it depend on this tag
1298 var child = addCopyTag(parent, tag.getPos() + tag.getLen(), 0, tag.getSortPriority() + 1);
1299 tag.cascadeInvalidationTo(child);
1300 }
1301
1302 // Reinsert current tag
1303 tagStack.push(tag);
1304
1305 // And finally close its parent with a priority that ensures it is processed
1306 // before this tag
1307 addMagicEndTag(parent, tag.getPos(), tag.getSortPriority() - 1);
1308
1309 // Adjust the fixing cost to account for the additional tags/processing
1310 currentFixingCost += 4;
1311
1312 return true;
1313 }
1314 }
1315 }
1316
1317 return false;
1318 }
1319
1320 /**
1321 * Apply requireAncestor rules associated with given tag
1322 *
1323 * @param {!Tag} tag Tag
1324 * @return {!boolean} Whether this tag has an unfulfilled requireAncestor requirement
1325 */
1326 function requireAncestor(tag)
1327 {
1328 if (!HINT.requireAncestor)
1329 {
1330 return false;
1331 }
1332
1333 var tagName = tag.getName(),
1334 tagConfig = tagsConfig[tagName];
1335
1336 if (tagConfig.rules.requireAncestor)
1337 {
1338 var i = tagConfig.rules.requireAncestor.length;
1339 while (--i >= 0)
1340 {
1341 var ancestorName = tagConfig.rules.requireAncestor[i];
1342 if (cntOpen[ancestorName])
1343 {
1344 return false;
1345 }
1346 }
1347
1348 logger.err('Tag requires an ancestor', {
1349 'requireAncestor' : tagConfig.rules.requireAncestor.join(', '),
1350 'tag' : tag
1351 });
1352
1353 return true;
1354 }
1355
1356 return false;
1357 }
1358
1359 //==========================================================================
1360 // Tag processing
1361 //==========================================================================
1362
1363 /**
1364 * Create and add an end tag for given start tag at given position
1365 *
1366 * @param {!Tag} startTag Start tag
1367 * @param {!number} tagPos End tag's position (will be adjusted for whitespace if applicable)
1368 * @return {!Tag}
1369 */
1370 function addMagicEndTag(startTag, tagPos)
1371 {
1372 var tagName = startTag.getName();
1373
1374 // Adjust the end tag's position if whitespace is to be minimized
1375 if (HINT.RULE_IGNORE_WHITESPACE && (startTag.getFlags() & RULE_IGNORE_WHITESPACE))
1376 {
1377 tagPos = getMagicPos(tagPos);
1378 }
1379
1380 // Add a 0-width end tag that is paired with the given start tag
1381 var endTag = addEndTag(tagName, tagPos, 0);
1382 endTag.pairWith(startTag);
1383
1384 return endTag;
1385 }
1386
1387 /**
1388 * Compute the position of a magic end tag, adjusted for whitespace
1389 *
1390 * @param {!number} tagPos Rightmost possible position for the tag
1391 * @return {!number}
1392 */
1393 function getMagicPos(tagPos)
1394 {
1395 // Back up from given position to the cursor's position until we find a character that
1396 // is not whitespace
1397 while (tagPos > pos && WHITESPACE.indexOf(text.charAt(tagPos - 1)) > -1)
1398 {
1399 --tagPos;
1400 }
1401
1402 return tagPos;
1403 }
1404
1405 /**
1406 * Test whether given start tag is immediately followed by a closing tag
1407 *
1408 * @param {!Tag} tag Start tag (including self-closing)
1409 * @return {!boolean}
1410 */
1411 function isFollowedByClosingTag(tag)
1412 {
1413 return (!tagStack.length) ? false : tagStack[tagStack.length - 1].canClose(tag);
1414 }
1415
1416 /**
1417 * Process all tags in the stack
1418 */
1419 function processTags()
1420 {
1421 if (!tagStack.length)
1422 {
1423 return;
1424 }
1425
1426 // Initialize the count tables
1427 for (var tagName in tagsConfig)
1428 {
1429 cntOpen[tagName] = 0;
1430 cntTotal[tagName] = 0;
1431 }
1432
1433 // Process the tag stack, close tags that were left open and repeat until done
1434 do
1435 {
1436 while (tagStack.length)
1437 {
1438 if (!tagStackIsSorted)
1439 {
1440 sortTags();
1441 }
1442
1443 currentTag = tagStack.pop();
1444 processCurrentTag();
1445 }
1446
1447 // Close tags that were left open
1448 openTags.forEach(function (startTag)
1449 {
1450 // NOTE: we add tags in hierarchical order (ancestors to descendants) but since
1451 // the stack is processed in LIFO order, it means that tags get closed in
1452 // the correct order, from descendants to ancestors
1453 addMagicEndTag(startTag, textLen);
1454 });
1455 }
1456 while (tagStack.length);
1457 }
1458
1459 /**
1460 * Process current tag
1461 */
1462 function processCurrentTag()
1463 {
1464 // Invalidate current tag if tags are disabled and current tag would not close the last open
1465 // tag and is not a system tag
1466 if ((context.flags & RULE_IGNORE_TAGS)
1467 && !currentTag.canClose(openTags[openTags.length - 1])
1468 && !currentTag.isSystemTag())
1469 {
1470 currentTag.invalidate();
1471 }
1472
1473 var tagPos = currentTag.getPos(),
1474 tagLen = currentTag.getLen();
1475
1476 // Test whether the cursor passed this tag's position already
1477 if (pos > tagPos && !currentTag.isInvalid())
1478 {
1479 // Test whether this tag is paired with a start tag and this tag is still open
1480 var startTag = currentTag.getStartTag();
1481
1482 if (startTag && openTags.indexOf(startTag) >= 0)
1483 {
1484 // Create an end tag that matches current tag's start tag, which consumes as much of
1485 // the same text as current tag and is paired with the same start tag
1486 addEndTag(
1487 startTag.getName(),
1488 pos,
1489 Math.max(0, tagPos + tagLen - pos)
1490 ).pairWith(startTag);
1491
1492 // Note that current tag is not invalidated, it's merely replaced
1493 return;
1494 }
1495
1496 // If this is an ignore tag, try to ignore as much as the remaining text as possible
1497 if (currentTag.isIgnoreTag())
1498 {
1499 var ignoreLen = tagPos + tagLen - pos;
1500
1501 if (ignoreLen > 0)
1502 {
1503 // Create a new ignore tag and move on
1504 addIgnoreTag(pos, ignoreLen);
1505
1506 return;
1507 }
1508 }
1509
1510 // Skipped tags are invalidated
1511 currentTag.invalidate();
1512 }
1513
1514 if (currentTag.isInvalid())
1515 {
1516 return;
1517 }
1518
1519 if (currentTag.isIgnoreTag())
1520 {
1521 outputIgnoreTag(currentTag);
1522 }
1523 else if (currentTag.isBrTag())
1524 {
1525 // Output the tag if it's allowed, ignore it otherwise
1526 if (!HINT.RULE_PREVENT_BR || !(context.flags & RULE_PREVENT_BR))
1527 {
1528 outputBrTag(currentTag);
1529 }
1530 }
1531 else if (currentTag.isParagraphBreak())
1532 {
1533 outputText(currentTag.getPos(), 0, true);
1534 }
1535 else if (currentTag.isVerbatim())
1536 {
1537 outputVerbatim(currentTag);
1538 }
1539 else if (currentTag.isStartTag())
1540 {
1541 processStartTag(currentTag);
1542 }
1543 else
1544 {
1545 processEndTag(currentTag);
1546 }
1547 }
1548
1549 /**
1550 * Process given start tag (including self-closing tags) at current position
1551 *
1552 * @param {!Tag} tag Start tag (including self-closing)
1553 */
1554 function processStartTag(tag)
1555 {
1556 var tagName = tag.getName(),
1557 tagConfig = tagsConfig[tagName];
1558
1559 // 1. Check that this tag has not reached its global limit tagLimit
1560 // 2. Execute this tag's filterChain, which will filter/validate its attributes
1561 // 3. Apply closeParent, closeAncestor and fosterParent rules
1562 // 4. Check for nestingLimit
1563 // 5. Apply requireAncestor rules
1564 //
1565 // This order ensures that the tag is valid and within the set limits before we attempt to
1566 // close parents or ancestors. We need to close ancestors before we can check for nesting
1567 // limits, whether this tag is allowed within current context (the context may change
1568 // as ancestors are closed) or whether the required ancestors are still there (they might
1569 // have been closed by a rule.)
1570 if (cntTotal[tagName] >= tagConfig.tagLimit)
1571 {
1572 logger.err(
1573 'Tag limit exceeded',
1574 {
1575 'tag' : tag,
1576 'tagName' : tagName,
1577 'tagLimit' : tagConfig.tagLimit
1578 }
1579 );
1580 tag.invalidate();
1581
1582 return;
1583 }
1584
1585 if (!filterTag(tag))
1586 {
1587 tag.invalidate();
1588
1589 return;
1590 }
1591
1592 if (fosterParent(tag) || closeParent(tag) || closeAncestor(tag))
1593 {
1594 // This tag parent/ancestor needs to be closed, we just return (the tag is still valid)
1595 return;
1596 }
1597
1598 if (cntOpen[tagName] >= tagConfig.nestingLimit)
1599 {
1600 logger.err(
1601 'Nesting limit exceeded',
1602 {
1603 'tag' : tag,
1604 'tagName' : tagName,
1605 'nestingLimit' : tagConfig.nestingLimit
1606 }
1607 );
1608 tag.invalidate();
1609
1610 return;
1611 }
1612
1613 if (!tagIsAllowed(tagName))
1614 {
1615 var msg = 'Tag is not allowed in this context',
1616 context = {'tag': tag, 'tagName': tagName};
1617 if (tag.getLen() > 0)
1618 {
1619 logger.warn(msg, context);
1620 }
1621 else
1622 {
1623 logger.debug(msg, context);
1624 }
1625 tag.invalidate();
1626
1627 return;
1628 }
1629
1630 if (requireAncestor(tag))
1631 {
1632 tag.invalidate();
1633
1634 return;
1635 }
1636
1637 // If this tag has an autoClose rule and it's not paired with an end tag or followed by an
1638 // end tag, we replace it with a self-closing tag with the same properties
1639 if (HINT.RULE_AUTO_CLOSE
1640 && tag.getFlags() & RULE_AUTO_CLOSE
1641 && !tag.getEndTag()
1642 && !isFollowedByClosingTag(tag))
1643 {
1644 var newTag = new Tag(Tag.SELF_CLOSING_TAG, tagName, tag.getPos(), tag.getLen());
1645 newTag.setAttributes(tag.getAttributes());
1646 newTag.setFlags(tag.getFlags());
1647
1648 tag = newTag;
1649 }
1650
1651 if (HINT.RULE_TRIM_FIRST_LINE
1652 && tag.getFlags() & RULE_TRIM_FIRST_LINE
1653 && !tag.getEndTag()
1654 && text.charAt(tag.getPos() + tag.getLen()) === "\n")
1655 {
1656 addIgnoreTag(tag.getPos() + tag.getLen(), 1);
1657 }
1658
1659 // This tag is valid, output it and update the context
1660 outputTag(tag);
1661 pushContext(tag);
1662
1663 // Apply the createChild rules if applicable
1664 createChild(tag);
1665 }
1666
1667 /**
1668 * Process given end tag at current position
1669 *
1670 * @param {!Tag} tag End tag
1671 */
1672 function processEndTag(tag)
1673 {
1674 var tagName = tag.getName();
1675
1676 if (!cntOpen[tagName])
1677 {
1678 // This is an end tag with no start tag
1679 return;
1680 }
1681
1682 /**
1683 * @type {!Array.<!Tag>} List of tags need to be closed before given tag
1684 */
1685 var closeTags = [];
1686
1687 // Iterate through all open tags from last to first to find a match for our tag
1688 var i = openTags.length;
1689 while (--i >= 0)
1690 {
1691 var openTag = openTags[i];
1692
1693 if (tag.canClose(openTag))
1694 {
1695 break;
1696 }
1697
1698 closeTags.push(openTag);
1699 ++currentFixingCost;
1700 }
1701
1702 if (i < 0)
1703 {
1704 // Did not find a matching tag
1705 logger.debug('Skipping end tag with no start tag', {'tag': tag});
1706
1707 return;
1708 }
1709
1710 // Only reopen tags if we haven't exceeded our "fixing" budget
1711 var keepReopening = HINT.RULE_AUTO_REOPEN && (currentFixingCost < maxFixingCost),
1712 reopenTags = [];
1713 closeTags.forEach(function(openTag)
1714 {
1715 var openTagName = openTag.getName();
1716
1717 // Test whether this tag should be reopened automatically
1718 if (keepReopening)
1719 {
1720 if (openTag.getFlags() & RULE_AUTO_REOPEN)
1721 {
1722 reopenTags.push(openTag);
1723 }
1724 else
1725 {
1726 keepReopening = false;
1727 }
1728 }
1729
1730 // Find the earliest position we can close this open tag
1731 var tagPos = tag.getPos();
1732 if (HINT.RULE_IGNORE_WHITESPACE && openTag.getFlags() & RULE_IGNORE_WHITESPACE)
1733 {
1734 tagPos = getMagicPos(tagPos);
1735 }
1736
1737 // Output an end tag to close this start tag, then update the context
1738 var endTag = new Tag(Tag.END_TAG, openTagName, tagPos, 0);
1739 endTag.setFlags(openTag.getFlags());
1740 outputTag(endTag);
1741 popContext();
1742 });
1743
1744 // Output our tag, moving the cursor past it, then update the context
1745 outputTag(tag);
1746 popContext();
1747
1748 // If our fixing budget allows it, peek at upcoming tags and remove end tags that would
1749 // close tags that are already being closed now. Also, filter our list of tags being
1750 // reopened by removing those that would immediately be closed
1751 if (closeTags.length && currentFixingCost < maxFixingCost)
1752 {
1753 /**
1754 * @type {number} Rightmost position of the portion of text to ignore
1755 */
1756 var ignorePos = pos;
1757
1758 i = tagStack.length;
1759 while (--i >= 0 && ++currentFixingCost < maxFixingCost)
1760 {
1761 var upcomingTag = tagStack[i];
1762
1763 // Test whether the upcoming tag is positioned at current "ignore" position and it's
1764 // strictly an end tag (not a start tag or a self-closing tag)
1765 if (upcomingTag.getPos() > ignorePos
1766 || upcomingTag.isStartTag())
1767 {
1768 break;
1769 }
1770
1771 // Test whether this tag would close any of the tags we're about to reopen
1772 var j = closeTags.length;
1773
1774 while (--j >= 0 && ++currentFixingCost < maxFixingCost)
1775 {
1776 if (upcomingTag.canClose(closeTags[j]))
1777 {
1778 // Remove the tag from the lists and reset the keys
1779 closeTags.splice(j, 1);
1780
1781 if (reopenTags[j])
1782 {
1783 reopenTags.splice(j, 1);
1784 }
1785
1786 // Extend the ignored text to cover this tag
1787 ignorePos = Math.max(
1788 ignorePos,
1789 upcomingTag.getPos() + upcomingTag.getLen()
1790 );
1791
1792 break;
1793 }
1794 }
1795 }
1796
1797 if (ignorePos > pos)
1798 {
1799 /**
1800 * @todo have a method that takes (pos,len) rather than a Tag
1801 */
1802 outputIgnoreTag(new Tag(Tag.SELF_CLOSING_TAG, 'i', pos, ignorePos - pos));
1803 }
1804 }
1805
1806 // Re-add tags that need to be reopened, at current cursor position
1807 reopenTags.forEach(function(startTag)
1808 {
1809 var newTag = addCopyTag(startTag, pos, 0);
1810
1811 // Re-pair the new tag
1812 var endTag = startTag.getEndTag();
1813 if (endTag)
1814 {
1815 newTag.pairWith(endTag);
1816 }
1817 });
1818 }
1819
1820 /**
1821 * Update counters and replace current context with its parent context
1822 */
1823 function popContext()
1824 {
1825 var tag = openTags.pop();
1826 --cntOpen[tag.getName()];
1827 context = context.parentContext;
1828 }
1829
1830 /**
1831 * Update counters and replace current context with a new context based on given tag
1832 *
1833 * If given tag is a self-closing tag, the context won't change
1834 *
1835 * @param {!Tag} tag Start tag (including self-closing)
1836 */
1837 function pushContext(tag)
1838 {
1839 var tagName = tag.getName(),
1840 tagFlags = tag.getFlags(),
1841 tagConfig = tagsConfig[tagName];
1842
1843 ++cntTotal[tagName];
1844
1845 // If this is a self-closing tag, the context remains the same
1846 if (tag.isSelfClosingTag())
1847 {
1848 return;
1849 }
1850
1851 // Recompute the allowed tags
1852 var allowed = [];
1853 if (HINT.RULE_IS_TRANSPARENT && (tagFlags & RULE_IS_TRANSPARENT))
1854 {
1855 context.allowed.forEach(function(v, k)
1856 {
1857 allowed.push(tagConfig.allowed[k] & v);
1858 });
1859 }
1860 else
1861 {
1862 context.allowed.forEach(function(v, k)
1863 {
1864 allowed.push(tagConfig.allowed[k] & ((v & 0xFF00) | (v >> 8)));
1865 });
1866 }
1867
1868 // Use this tag's flags as a base for this context and add inherited rules
1869 var flags = tagFlags | (context.flags & RULES_INHERITANCE);
1870
1871 // RULE_DISABLE_AUTO_BR turns off RULE_ENABLE_AUTO_BR
1872 if (flags & RULE_DISABLE_AUTO_BR)
1873 {
1874 flags &= ~RULE_ENABLE_AUTO_BR;
1875 }
1876
1877 ++cntOpen[tagName];
1878 openTags.push(tag);
1879 context = {
1880 allowed : allowed,
1881 flags : flags,
1882 parentContext : context
1883 };
1884 }
1885
1886 /**
1887 * Return whether given tag is allowed in current context
1888 *
1889 * @param {!string} tagName
1890 * @return {!boolean}
1891 */
1892 function tagIsAllowed(tagName)
1893 {
1894 var n = tagsConfig[tagName].bitNumber;
1895
1896 return !!(context.allowed[n >> 3] & (1 << (n & 7)));
1897 }
1898
1899 //==========================================================================
1900 // Tag stack
1901 //==========================================================================
1902
1903 /**
1904 * Add a start tag
1905 *
1906 * @param {!string} name Name of the tag
1907 * @param {!number} pos Position of the tag in the text
1908 * @param {!number} len Length of text consumed by the tag
1909 * @param {number} prio Tags' priority
1910 * @return {!Tag}
1911 */
1912 function addStartTag(name, pos, len, prio)
1913 {
1914 return addTag(Tag.START_TAG, name, pos, len, prio || 0);
1915 }
1916
1917 /**
1918 * Add an end tag
1919 *
1920 * @param {!string} name Name of the tag
1921 * @param {!number} pos Position of the tag in the text
1922 * @param {!number} len Length of text consumed by the tag
1923 * @param {number} prio Tags' priority
1924 * @return {!Tag}
1925 */
1926 function addEndTag(name, pos, len, prio)
1927 {
1928 return addTag(Tag.END_TAG, name, pos, len, prio || 0);
1929 }
1930
1931 /**
1932 * Add a self-closing tag
1933 *
1934 * @param {!string} name Name of the tag
1935 * @param {!number} pos Position of the tag in the text
1936 * @param {!number} len Length of text consumed by the tag
1937 * @param {number} prio Tags' priority
1938 * @return {!Tag}
1939 */
1940 function addSelfClosingTag(name, pos, len, prio)
1941 {
1942 return addTag(Tag.SELF_CLOSING_TAG, name, pos, len, prio || 0);
1943 }
1944
1945 /**
1946 * Add a 0-width "br" tag to force a line break at given position
1947 *
1948 * @param {!number} pos Position of the tag in the text
1949 * @param {number} prio Tags' priority
1950 * @return {!Tag}
1951 */
1952 function addBrTag(pos, prio)
1953 {
1954 return addTag(Tag.SELF_CLOSING_TAG, 'br', pos, 0, prio || 0);
1955 }
1956
1957 /**
1958 * Add an "ignore" tag
1959 *
1960 * @param {!number} pos Position of the tag in the text
1961 * @param {!number} len Length of text consumed by the tag
1962 * @param {number} prio Tags' priority
1963 * @return {!Tag}
1964 */
1965 function addIgnoreTag(pos, len, prio)
1966 {
1967 return addTag(Tag.SELF_CLOSING_TAG, 'i', pos, Math.min(len, textLen - pos), prio || 0);
1968 }
1969
1970 /**
1971 * Add a paragraph break at given position
1972 *
1973 * Uses a zero-width tag that is actually never output in the result
1974 *
1975 * @param {!number} pos Position of the tag in the text
1976 * @param {number} prio Tags' priority
1977 * @return {!Tag}
1978 */
1979 function addParagraphBreak(pos, prio)
1980 {
1981 return addTag(Tag.SELF_CLOSING_TAG, 'pb', pos, 0, prio || 0);
1982 }
1983
1984 /**
1985 * Add a copy of given tag at given position and length
1986 *
1987 * @param {!Tag} tag Original tag
1988 * @param {!number} pos Copy's position
1989 * @param {!number} len Copy's length
1990 * @param {number} prio Tags' priority
1991 * @return {!Tag} Copy tag
1992 */
1993 function addCopyTag(tag, pos, len, prio)
1994 {
1995 var copy = addTag(tag.getType(), tag.getName(), pos, len, tag.getSortPriority());
1996 copy.setAttributes(tag.getAttributes());
1997
1998 return copy;
1999 }
2000
2001 /**
2002 * Add a tag
2003 *
2004 * @param {!number} type Tag's type
2005 * @param {!string} name Name of the tag
2006 * @param {!number} pos Position of the tag in the text
2007 * @param {!number} len Length of text consumed by the tag
2008 * @param {number} prio Tags' priority
2009 * @return {!Tag}
2010 */
2011 function addTag(type, name, pos, len, prio)
2012 {
2013 // Create the tag
2014 var tag = new Tag(type, name, pos, len, prio || 0);
2015
2016 // Set this tag's rules bitfield
2017 if (tagsConfig[name])
2018 {
2019 tag.setFlags(tagsConfig[name].rules.flags);
2020 }
2021
2022 // Invalidate this tag if it's an unknown tag, a disabled tag, if either of its length or
2023 // position is negative or if it's out of bounds
2024 if (!tagsConfig[name] && !tag.isSystemTag())
2025 {
2026 tag.invalidate();
2027 }
2028 else if (tagsConfig[name] && tagsConfig[name].isDisabled)
2029 {
2030 logger.warn(
2031 'Tag is disabled',
2032 {
2033 'tag' : tag,
2034 'tagName' : name
2035 }
2036 );
2037 tag.invalidate();
2038 }
2039 else if (len < 0 || pos < 0 || pos + len > textLen)
2040 {
2041 tag.invalidate();
2042 }
2043 else
2044 {
2045 insertTag(tag);
2046 }
2047
2048 return tag;
2049 }
2050
2051 /**
2052 * Insert given tag in the tag stack
2053 *
2054 * @param {!Tag} tag
2055 */
2056 function insertTag(tag)
2057 {
2058 if (!tagStackIsSorted)
2059 {
2060 tagStack.push(tag);
2061 }
2062 else
2063 {
2064 // Scan the stack and copy every tag to the next slot until we find the correct index
2065 var i = tagStack.length;
2066 while (i > 0 && compareTags(tagStack[i - 1], tag) > 0)
2067 {
2068 tagStack[i] = tagStack[i - 1];
2069 --i;
2070 }
2071 tagStack[i] = tag;
2072 }
2073 }
2074
2075 /**
2076 * Add a pair of tags
2077 *
2078 * @param {!string} name Name of the tags
2079 * @param {!number} startPos Position of the start tag
2080 * @param {!number} startLen Length of the start tag
2081 * @param {!number} endPos Position of the start tag
2082 * @param {!number} endLen Length of the start tag
2083 * @param {number} prio Start tag's priority (the end tag will be set to minus that value)
2084 * @return {!Tag} Start tag
2085 */
2086 function addTagPair(name, startPos, startLen, endPos, endLen, prio)
2087 {
2088 // NOTE: the end tag is added first to try to keep the stack in the correct order
2089 var endTag = addEndTag(name, endPos, endLen, -prio || 0),
2090 startTag = addStartTag(name, startPos, startLen, prio || 0);
2091 startTag.pairWith(endTag);
2092
2093 return startTag;
2094 }
2095
2096 /**
2097 * Add a tag that represents a verbatim copy of the original text
2098 *
2099 * @param {!number} pos Position of the tag in the text
2100 * @param {!number} len Length of text consumed by the tag
2101 * @return {!Tag}
2102 */
2103 function addVerbatim(pos, len, prio)
2104 {
2105 return addTag(Tag.SELF_CLOSING_TAG, 'v', pos, len, prio || 0);
2106 }
2107
2108 /**
2109 * Sort tags by position and precedence
2110 */
2111 function sortTags()
2112 {
2113 tagStack.sort(compareTags);
2114 tagStackIsSorted = true;
2115 }
2116
2117 /**
2118 * sortTags() callback
2119 *
2120 * Tags are stored as a stack, in LIFO order. We sort tags by position _descending_ so that they
2121 * are processed in the order they appear in the text.
2122 *
2123 * @param {!Tag} a First tag to compare
2124 * @param {!Tag} b Second tag to compare
2125 * @return {!number}
2126 */
2127 function compareTags(a, b)
2128 {
2129 var aPos = a.getPos(),
2130 bPos = b.getPos();
2131
2132 // First we order by pos descending
2133 if (aPos !== bPos)
2134 {
2135 return bPos - aPos;
2136 }
2137
2138 // If the tags start at the same position, we'll use their sortPriority if applicable. Tags
2139 // with a lower value get sorted last, which means they'll be processed first. IOW, -10 is
2140 // processed before 10
2141 if (a.getSortPriority() !== b.getSortPriority())
2142 {
2143 return b.getSortPriority() - a.getSortPriority();
2144 }
2145
2146 // If the tags start at the same position and have the same priority, we'll sort them
2147 // according to their length, with special considerations for zero-width tags
2148 var aLen = a.getLen(),
2149 bLen = b.getLen();
2150
2151 if (!aLen || !bLen)
2152 {
2153 // Zero-width end tags are ordered after zero-width start tags so that a pair that ends
2154 // with a zero-width tag has the opportunity to be closed before another pair starts
2155 // with a zero-width tag. For example, the pairs that would enclose each of the letters
2156 // in the string "XY". Self-closing tags are ordered between end tags and start tags in
2157 // an attempt to keep them out of tag pairs
2158 if (!aLen && !bLen)
2159 {
2160 var order = {};
2161 order[Tag.END_TAG] = 0;
2162 order[Tag.SELF_CLOSING_TAG] = 1;
2163 order[Tag.START_TAG] = 2;
2164
2165 return order[b.getType()] - order[a.getType()];
2166 }
2167
2168 // Here, we know that only one of a or b is a zero-width tags. Zero-width tags are
2169 // ordered after wider tags so that they have a chance to be processed before the next
2170 // character is consumed, which would force them to be skipped
2171 return (aLen) ? -1 : 1;
2172 }
2173
2174 // Here we know that both tags start at the same position and have a length greater than 0.
2175 // We sort tags by length ascending, so that the longest matches are processed first. If
2176 // their length is identical, the order is undefined as PHP's sort isn't stable
2177 return aLen - bLen;
2178 }