wimCMS • Blick zurück von phpBB 1.0.0 bis heute

Verzeichnisstruktur phpBB-3.3.15

Veröffentlicht: 28.08.2024

So funktioniert es

Verzeichnis-Info phpBB-1.0.0 / auth.php	Auf das letzte Element klicken. Dies geht jeweils ein Schritt zurück
admin	Auf das Icon klicken, dies öffnet das Verzeichnis. Nochmal klicken schließt das Verzeichnis. Auf den Verzeichnisnamen klicken, dies zeigt nur das Verzeichnis mit Inhalt an
(Beispiel Datei-Icons)	Auf das Icon klicken um den Quellcode anzuzeigen

RegexpParser.php

Zuletzt modifiziert: 02.04.2025, 15:04 - Dateigröße: 9.77 KiB


     001  <?php

     002   

     003  /**

     004  * @package   s9e\TextFormatter

     005  * @copyright Copyright (c) 2010-2022 The s9e authors

     006  * @license   http://www.opensource.org/licenses/mit-license.php The MIT License

     007  */

     008  namespace s9e\TextFormatter\Configurator\Helpers;

     009   

     010  use RuntimeException;

     011   

     012  abstract class RegexpParser

     013  {

     014      /**

     015      * Generate a regexp that matches any single character allowed in a regexp

     016      *

     017      * This method will generate a regexp that can be used to determine whether a given character

     018      * could in theory be allowed in a string that matches the source regexp. For example, the source

     019      * regexp /^a+$/D would generate /a/ while /^foo\d+$/D would generate /[fo\d]/ whereas the regexp

     020      * /foo/ would generate // because it's not anchored so any characters could be found before or

     021      * after the literal "foo".

     022      *

     023      * @param  string $regexp Source regexp

     024      * @return string         Regexp that matches any single character allowed in the source regexp

     025      */

     026      public static function getAllowedCharacterRegexp($regexp)

     027      {

     028          $def = self::parse($regexp);

     029   

     030          // If the regexp is uses the multiline modifier, this regexp can't match the whole string if

     031          // it contains newlines, so in effect it could allow any content

     032          if (strpos($def['modifiers'], 'm') !== false)

     033          {

     034              return '//';

     035          }

     036   

     037          if (substr($def['regexp'], 0, 1) !== '^'

     038           || substr($def['regexp'], -1)   !== '$')

     039          {

     040              return '//';

     041          }

     042   

     043          // Append a token to mark the end of the regexp

     044          $def['tokens'][] = [

     045              'pos'  => strlen($def['regexp']),

     046              'len'  => 0,

     047              'type' => 'end'

     048          ];

     049   

     050          $patterns = [];

     051   

     052          // Collect the literal portions of the source regexp while testing for alternations

     053          $literal = '';

     054          $pos     = 0;

     055          $skipPos = 0;

     056          $depth   = 0;

     057          foreach ($def['tokens'] as $token)

     058          {

     059              // Skip options

     060              if ($token['type'] === 'option')

     061              {

     062                  $skipPos = max($skipPos, $token['pos'] + $token['len']);

     063              }

     064   

     065              // Skip assertions

     066              if (strpos($token['type'], 'AssertionStart') !== false)

     067              {

     068                  $endToken = $def['tokens'][$token['endToken']];

     069                  $skipPos  = max($skipPos, $endToken['pos'] + $endToken['len']);

     070              }

     071   

     072              if ($token['pos'] >= $skipPos)

     073              {

     074                  if ($token['type'] === 'characterClass')

     075                  {

     076                      $patterns[] = '[' . $token['content'] . ']';

     077                  }

     078   

     079                  if ($token['pos'] > $pos)

     080                  {

     081                      // Capture the content between last position and current position

     082                      $tmp = substr($def['regexp'], $pos, $token['pos'] - $pos);

     083   

     084                      // Append the content to the literal portion

     085                      $literal .= $tmp;

     086   

     087                      // Test for alternations if it's the root of the regexp

     088                      if (!$depth)

     089                      {

     090                          // Remove literal backslashes for convenience

     091                          $tmp = str_replace('\\\\', '', $tmp);

     092   

     093                          // Look for an unescaped | that is not followed by ^

     094                          if (preg_match('/(?<!\\\\)\\|(?!\\^)/', $tmp))

     095                          {

     096                              return '//';

     097                          }

     098   

     099                          // Look for an unescaped | that is not preceded by $

     100                          if (preg_match('/(?<![$\\\\])\\|/', $tmp))

     101                          {

     102                              return '//';

     103                          }

     104                      }

     105                  }

     106              }

     107   

     108              if (substr($token['type'], -5) === 'Start')

     109              {

     110                  ++$depth;

     111              }

     112              elseif (substr($token['type'], -3) === 'End')

     113              {

     114                  --$depth;

     115              }

     116   

     117              $pos = max($skipPos, $token['pos'] + $token['len']);

     118          }

     119   

     120          // Test for the presence of an unescaped dot

     121          if (preg_match('#(?<!\\\\)(?:\\\\\\\\)*\\.#', $literal))

     122          {

     123              if (strpos($def['modifiers'], 's') !== false

     124               || strpos($literal, "\n") !== false)

     125              {

     126                  return '//';

     127              }

     128   

     129              $patterns[] = '.';

     130   

     131              // Remove unescaped dots

     132              $literal = preg_replace('#(?<!\\\\)((?:\\\\\\\\)*)\\.#', '$1', $literal);

     133          }

     134   

     135          // Remove unescaped quantifiers *, + and ?

     136          $literal = preg_replace('#(?<!\\\\)((?:\\\\\\\\)*)[*+?]#', '$1', $literal);

     137   

     138          // Remove unescaped quantifiers {}

     139          $literal = preg_replace('#(?<!\\\\)((?:\\\\\\\\)*)\\{[^}]+\\}#', '$1', $literal);

     140   

     141          // Remove backslash assertions \b, \B, \A, \Z, \z and \G, as well as back references

     142          $literal = preg_replace('#(?<!\\\\)((?:\\\\\\\\)*)\\\\[bBAZzG1-9]#', '$1', $literal);

     143   

     144          // Remove unescaped ^, | and $

     145          $literal = preg_replace('#(?<!\\\\)((?:\\\\\\\\)*)[$^|]#', '$1', $literal);

     146   

     147          // Escape unescaped - and ] so they are safe to use in a character class

     148          $literal = preg_replace('#(?<!\\\\)((?:\\\\\\\\)*)([-^\\]])#', '$1\\\\$2', $literal);

     149   

     150          // If the regexp doesn't use PCRE_DOLLAR_ENDONLY, it could end with a \n

     151          if (strpos($def['modifiers'], 'D') === false)

     152          {

     153              $literal .= "\n";

     154          }

     155   

     156          // Add the literal portion of the regexp to the patterns, as a character class

     157          if ($literal !== '')

     158          {

     159              $patterns[] = '[' . $literal . ']';

     160          }

     161   

     162          // Test whether this regexp actually matches anything

     163          if (empty($patterns))

     164          {

     165              return '/^$/D';

     166          }

     167   

     168          // Build the allowed characters regexp

     169          $regexp = $def['delimiter'] . implode('|', $patterns) . $def['delimiter'];

     170   

     171          // Add the modifiers

     172          if (strpos($def['modifiers'], 'i') !== false)

     173          {

     174              $regexp .= 'i';

     175          }

     176          if (strpos($def['modifiers'], 'u') !== false)

     177          {

     178              $regexp .= 'u';

     179          }

     180   

     181          return $regexp;

     182      }

     183   

     184      /**

     185      * Return the name of each capture in given regexp

     186      *

     187      * Will return an empty string for unnamed captures

     188      *

     189      * @param  string   $regexp

     190      * @return string[]

     191      */

     192      public static function getCaptureNames($regexp)

     193      {

     194          $map        = [''];

     195          $regexpInfo = self::parse($regexp);

     196          foreach ($regexpInfo['tokens'] as $tok)

     197          {

     198              if ($tok['type'] === 'capturingSubpatternStart')

     199              {

     200                  $map[] = $tok['name'] ?? '';

     201              }

     202          }

     203   

     204          return $map;

     205      }

     206   

     207      /**

     208      * @param  string $regexp

     209      * @return array

     210      */

     211      public static function parse($regexp)

     212      {

     213          if (!preg_match('#^(.)(.*?)\\1([a-zA-Z]*)$#Ds', $regexp, $m))

     214          {

     215              throw new RuntimeException('Could not parse regexp delimiters');

     216          }

     217   

     218          $ret = [

     219              'delimiter' => $m[1],

     220              'modifiers' => $m[3],

     221              'regexp'    => $m[2],

     222              'tokens'    => []

     223          ];

     224   

     225          $regexp = $m[2];

     226   

     227          $openSubpatterns = [];

     228   

     229          $pos = 0;

     230          $regexpLen = strlen($regexp);

     231   

     232          while ($pos < $regexpLen)

     233          {

     234              switch ($regexp[$pos])

     235              {

     236                  case '\\':

     237                      // skip next character

     238                      $pos += 2;

     239                      break;

     240   

     241                  case '[':

     242                      if (!preg_match('#\\[(.*?(?<!\\\\)(?:\\\\\\\\)*+)\\]((?:[+*][+?]?|\\?)?)#A', $regexp, $m, 0, $pos))

     243                      {

     244                          throw new RuntimeException('Could not find matching bracket from pos ' . $pos);

     245                      }

     246   

     247                      $ret['tokens'][] = [

     248                          'pos'         => $pos,

     249                          'len'         => strlen($m[0]),

     250                          'type'        => 'characterClass',

     251                          'content'     => $m[1],

     252                          'quantifiers' => $m[2]

     253                      ];

     254   

     255                      $pos += strlen($m[0]);

     256                      break;

     257   

     258                  case '(':

     259                      if (preg_match('#\\(\\?([a-z]*)\\)#iA', $regexp, $m, 0, $pos))

     260                      {

     261                          // This is an option (?i) so we skip past the right parenthesis

     262                          $ret['tokens'][] = [

     263                              'pos'     => $pos,

     264                              'len'     => strlen($m[0]),

     265                              'type'    => 'option',

     266                              'options' => $m[1]

     267                          ];

     268   

     269                          $pos += strlen($m[0]);

     270                          break;

     271                      }

     272   

     273                      // This should be a subpattern, we just have to sniff which kind

     274                      if (preg_match("#(?J)\\(\\?(?:P?<(?<name>[a-z_0-9]+)>|'(?<name>[a-z_0-9]+)')#A", $regexp, $m, \PREG_OFFSET_CAPTURE, $pos))

     275                      {

     276                          // This is a named capture

     277                          $tok = [

     278                              'pos'  => $pos,

     279                              'len'  => strlen($m[0][0]),

     280                              'type' => 'capturingSubpatternStart',

     281                              'name' => $m['name'][0]

     282                          ];

     283   

     284                          $pos += strlen($m[0][0]);

     285                      }

     286                      elseif (preg_match('#\\(\\?([a-z]*):#iA', $regexp, $m, 0, $pos))

     287                      {

     288                          // This is a non-capturing subpattern (?:xxx)

     289                          $tok = [

     290                              'pos'     => $pos,

     291                              'len'     => strlen($m[0]),

     292                              'type'    => 'nonCapturingSubpatternStart',

     293                              'options' => $m[1]

     294                          ];

     295   

     296                          $pos += strlen($m[0]);

     297                      }

     298                      elseif (preg_match('#\\(\\?>#iA', $regexp, $m, 0, $pos))

     299                      {

     300                          /* This is a non-capturing subpattern with atomic grouping "(?>x+)" */

     301                          $tok = [

     302                              'pos'     => $pos,

     303                              'len'     => strlen($m[0]),

     304                              'type'    => 'nonCapturingSubpatternStart',

     305                              'subtype' => 'atomic'

     306                          ];

     307   

     308                          $pos += strlen($m[0]);

     309                      }

     310                      elseif (preg_match('#\\(\\?(<?[!=])#A', $regexp, $m, 0, $pos))

     311                      {

     312                          // This is an assertion

     313                          $assertions = [

     314                              '='  => 'lookahead',

     315                              '<=' => 'lookbehind',

     316                              '!'  => 'negativeLookahead',

     317                              '<!' => 'negativeLookbehind'

     318                          ];

     319   

     320                          $tok = [

     321                              'pos'     => $pos,

     322                              'len'     => strlen($m[0]),

     323                              'type'    => $assertions[$m[1]] . 'AssertionStart'

     324                          ];

     325   

     326                          $pos += strlen($m[0]);

     327                      }

     328                      elseif (preg_match('#\\(\\?#A', $regexp, $m, 0, $pos))

     329                      {

     330                          throw new RuntimeException('Unsupported subpattern type at pos ' . $pos);

     331                      }

     332                      else

     333                      {

     334                          // This should be a normal capture

     335                          $tok = [

     336                              'pos'  => $pos,

     337                              'len'  => 1,

     338                              'type' => 'capturingSubpatternStart'

     339                          ];

     340   

     341                          ++$pos;

     342                      }

     343   

     344                      $openSubpatterns[] = count($ret['tokens']);

     345                      $ret['tokens'][] = $tok;

     346                      break;

     347   

     348                  case ')':

     349                      if (empty($openSubpatterns))

     350                      {

     351                          throw new RuntimeException('Could not find matching pattern start for right parenthesis at pos ' . $pos);

     352                      }

     353   

     354                      // Add the key to this token to its matching token and capture this subpattern's

     355                      // content

     356                      $k = array_pop($openSubpatterns);

     357                      $startToken =& $ret['tokens'][$k];

     358                      $startToken['endToken'] = count($ret['tokens']);

     359                      $startToken['content']  = substr(

     360                          $regexp,

     361                          $startToken['pos'] + $startToken['len'],

     362                          $pos - ($startToken['pos'] + $startToken['len'])

     363                      );

     364   

     365                      // Look for quantifiers after the subpattern, e.g. (?:ab)++

     366                      $spn = strspn($regexp, '+*?', 1 + $pos);

     367                      $quantifiers = substr($regexp, 1 + $pos, $spn);

     368   

     369                      $ret['tokens'][] = [

     370                          'pos'  => $pos,

     371                          'len'  => 1 + $spn,

     372                          'type' => substr($startToken['type'], 0, -5) . 'End',

     373                          'quantifiers' => $quantifiers

     374                      ];

     375   

     376                      unset($startToken);

     377   

     378                      $pos += 1 + $spn;

     379                      break;

     380   

     381                  default:

     382                      ++$pos;

     383              }

     384          }

     385   

     386          if (!empty($openSubpatterns))

     387          {

     388              throw new RuntimeException('Could not find matching pattern end for left parenthesis at pos ' . $ret['tokens'][$openSubpatterns[0]]['pos']);

     389          }

     390   

     391          return $ret;

     392      }

     393  }

Verzeichnisstruktur phpBB-3.3.15

Zuletzt modifiziert: 02.04.2025, 15:04 - Dateigröße: 9.77 KiB

RegexpParser.php


     001  <?php

     002   

     003  /**

     004  * @package   s9e\TextFormatter

     005  * @copyright Copyright (c) 2010-2022 The s9e authors

     006  * @license   http://www.opensource.org/licenses/mit-license.php The MIT License

     007  */

     008  namespace s9e\TextFormatter\Configurator\Helpers;

     009   

     010  use RuntimeException;

     011   

     012  abstract class RegexpParser

     013  {

     014      /**

     015      * Generate a regexp that matches any single character allowed in a regexp

     016      *

     017      * This method will generate a regexp that can be used to determine whether a given character

     018      * could in theory be allowed in a string that matches the source regexp. For example, the source

     019      * regexp /^a+$/D would generate /a/ while /^foo\d+$/D would generate /[fo\d]/ whereas the regexp

     020      * /foo/ would generate // because it's not anchored so any characters could be found before or

     021      * after the literal "foo".

     022      *

     023      * @param  string $regexp Source regexp

     024      * @return string         Regexp that matches any single character allowed in the source regexp

     025      */

     026      public static function getAllowedCharacterRegexp($regexp)

     027      {

     028          $def = self::parse($regexp);

     029   

     030          // If the regexp is uses the multiline modifier, this regexp can't match the whole string if

     031          // it contains newlines, so in effect it could allow any content

     032          if (strpos($def['modifiers'], 'm') !== false)

     033          {

     034              return '//';

     035          }

     036   

     037          if (substr($def['regexp'], 0, 1) !== '^'

     038           || substr($def['regexp'], -1)   !== '$')

     039          {

     040              return '//';

     041          }

     042   

     043          // Append a token to mark the end of the regexp

     044          $def['tokens'][] = [

     045              'pos'  => strlen($def['regexp']),

     046              'len'  => 0,

     047              'type' => 'end'

     048          ];

     049   

     050          $patterns = [];

     051   

     052          // Collect the literal portions of the source regexp while testing for alternations

     053          $literal = '';

     054          $pos     = 0;

     055          $skipPos = 0;

     056          $depth   = 0;

     057          foreach ($def['tokens'] as $token)

     058          {

     059              // Skip options

     060              if ($token['type'] === 'option')

     061              {

     062                  $skipPos = max($skipPos, $token['pos'] + $token['len']);

     063              }

     064   

     065              // Skip assertions

     066              if (strpos($token['type'], 'AssertionStart') !== false)

     067              {

     068                  $endToken = $def['tokens'][$token['endToken']];

     069                  $skipPos  = max($skipPos, $endToken['pos'] + $endToken['len']);

     070              }

     071   

     072              if ($token['pos'] >= $skipPos)

     073              {

     074                  if ($token['type'] === 'characterClass')

     075                  {

     076                      $patterns[] = '[' . $token['content'] . ']';

     077                  }

     078   

     079                  if ($token['pos'] > $pos)

     080                  {

     081                      // Capture the content between last position and current position

     082                      $tmp = substr($def['regexp'], $pos, $token['pos'] - $pos);

     083   

     084                      // Append the content to the literal portion

     085                      $literal .= $tmp;

     086   

     087                      // Test for alternations if it's the root of the regexp

     088                      if (!$depth)

     089                      {

     090                          // Remove literal backslashes for convenience

     091                          $tmp = str_replace('\\\\', '', $tmp);

     092   

     093                          // Look for an unescaped | that is not followed by ^

     094                          if (preg_match('/(?<!\\\\)\\|(?!\\^)/', $tmp))

     095                          {

     096                              return '//';

     097                          }

     098   

     099                          // Look for an unescaped | that is not preceded by $

     100                          if (preg_match('/(?<![$\\\\])\\|/', $tmp))

     101                          {

     102                              return '//';

     103                          }

     104                      }

     105                  }

     106              }

     107   

     108              if (substr($token['type'], -5) === 'Start')

     109              {

     110                  ++$depth;

     111              }

     112              elseif (substr($token['type'], -3) === 'End')

     113              {

     114                  --$depth;

     115              }

     116   

     117              $pos = max($skipPos, $token['pos'] + $token['len']);

     118          }

     119   

     120          // Test for the presence of an unescaped dot

     121          if (preg_match('#(?<!\\\\)(?:\\\\\\\\)*\\.#', $literal))

     122          {

     123              if (strpos($def['modifiers'], 's') !== false

     124               || strpos($literal, "\n") !== false)

     125              {

     126                  return '//';

     127              }

     128   

     129              $patterns[] = '.';

     130   

     131              // Remove unescaped dots

     132              $literal = preg_replace('#(?<!\\\\)((?:\\\\\\\\)*)\\.#', '$1', $literal);

     133          }

     134   

     135          // Remove unescaped quantifiers *, + and ?

     136          $literal = preg_replace('#(?<!\\\\)((?:\\\\\\\\)*)[*+?]#', '$1', $literal);

     137   

     138          // Remove unescaped quantifiers {}

     139          $literal = preg_replace('#(?<!\\\\)((?:\\\\\\\\)*)\\{[^}]+\\}#', '$1', $literal);

     140   

     141          // Remove backslash assertions \b, \B, \A, \Z, \z and \G, as well as back references

     142          $literal = preg_replace('#(?<!\\\\)((?:\\\\\\\\)*)\\\\[bBAZzG1-9]#', '$1', $literal);

     143   

     144          // Remove unescaped ^, | and $

     145          $literal = preg_replace('#(?<!\\\\)((?:\\\\\\\\)*)[$^|]#', '$1', $literal);

     146   

     147          // Escape unescaped - and ] so they are safe to use in a character class

     148          $literal = preg_replace('#(?<!\\\\)((?:\\\\\\\\)*)([-^\\]])#', '$1\\\\$2', $literal);

     149   

     150          // If the regexp doesn't use PCRE_DOLLAR_ENDONLY, it could end with a \n

     151          if (strpos($def['modifiers'], 'D') === false)

     152          {

     153              $literal .= "\n";

     154          }

     155   

     156          // Add the literal portion of the regexp to the patterns, as a character class

     157          if ($literal !== '')

     158          {

     159              $patterns[] = '[' . $literal . ']';

     160          }

     161   

     162          // Test whether this regexp actually matches anything

     163          if (empty($patterns))

     164          {

     165              return '/^$/D';

     166          }

     167   

     168          // Build the allowed characters regexp

     169          $regexp = $def['delimiter'] . implode('|', $patterns) . $def['delimiter'];

     170   

     171          // Add the modifiers

     172          if (strpos($def['modifiers'], 'i') !== false)

     173          {

     174              $regexp .= 'i';

     175          }

     176          if (strpos($def['modifiers'], 'u') !== false)

     177          {

     178              $regexp .= 'u';

     179          }

     180   

     181          return $regexp;

     182      }

     183   

     184      /**

     185      * Return the name of each capture in given regexp

     186      *

     187      * Will return an empty string for unnamed captures

     188      *

     189      * @param  string   $regexp

     190      * @return string[]

     191      */

     192      public static function getCaptureNames($regexp)

     193      {

     194          $map        = [''];

     195          $regexpInfo = self::parse($regexp);

     196          foreach ($regexpInfo['tokens'] as $tok)

     197          {

     198              if ($tok['type'] === 'capturingSubpatternStart')

     199              {

     200                  $map[] = $tok['name'] ?? '';

     201              }

     202          }

     203   

     204          return $map;

     205      }

     206   

     207      /**

     208      * @param  string $regexp

     209      * @return array

     210      */

     211      public static function parse($regexp)

     212      {

     213          if (!preg_match('#^(.)(.*?)\\1([a-zA-Z]*)$#Ds', $regexp, $m))

     214          {

     215              throw new RuntimeException('Could not parse regexp delimiters');

     216          }

     217   

     218          $ret = [

     219              'delimiter' => $m[1],

     220              'modifiers' => $m[3],

     221              'regexp'    => $m[2],

     222              'tokens'    => []

     223          ];

     224   

     225          $regexp = $m[2];

     226   

     227          $openSubpatterns = [];

     228   

     229          $pos = 0;

     230          $regexpLen = strlen($regexp);

     231   

     232          while ($pos < $regexpLen)

     233          {

     234              switch ($regexp[$pos])

     235              {

     236                  case '\\':

     237                      // skip next character

     238                      $pos += 2;

     239                      break;

     240   

     241                  case '[':

     242                      if (!preg_match('#\\[(.*?(?<!\\\\)(?:\\\\\\\\)*+)\\]((?:[+*][+?]?|\\?)?)#A', $regexp, $m, 0, $pos))

     243                      {

     244                          throw new RuntimeException('Could not find matching bracket from pos ' . $pos);

     245                      }

     246   

     247                      $ret['tokens'][] = [

     248                          'pos'         => $pos,

     249                          'len'         => strlen($m[0]),

     250                          'type'        => 'characterClass',

     251                          'content'     => $m[1],

     252                          'quantifiers' => $m[2]

     253                      ];

     254   

     255                      $pos += strlen($m[0]);

     256                      break;

     257   

     258                  case '(':

     259                      if (preg_match('#\\(\\?([a-z]*)\\)#iA', $regexp, $m, 0, $pos))

     260                      {

     261                          // This is an option (?i) so we skip past the right parenthesis

     262                          $ret['tokens'][] = [

     263                              'pos'     => $pos,

     264                              'len'     => strlen($m[0]),

     265                              'type'    => 'option',

     266                              'options' => $m[1]

     267                          ];

     268   

     269                          $pos += strlen($m[0]);

     270                          break;

     271                      }

     272   

     273                      // This should be a subpattern, we just have to sniff which kind

     274                      if (preg_match("#(?J)\\(\\?(?:P?<(?<name>[a-z_0-9]+)>|'(?<name>[a-z_0-9]+)')#A", $regexp, $m, \PREG_OFFSET_CAPTURE, $pos))

     275                      {

     276                          // This is a named capture

     277                          $tok = [

     278                              'pos'  => $pos,

     279                              'len'  => strlen($m[0][0]),

     280                              'type' => 'capturingSubpatternStart',

     281                              'name' => $m['name'][0]

     282                          ];

     283   

     284                          $pos += strlen($m[0][0]);

     285                      }

     286                      elseif (preg_match('#\\(\\?([a-z]*):#iA', $regexp, $m, 0, $pos))

     287                      {

     288                          // This is a non-capturing subpattern (?:xxx)

     289                          $tok = [

     290                              'pos'     => $pos,

     291                              'len'     => strlen($m[0]),

     292                              'type'    => 'nonCapturingSubpatternStart',

     293                              'options' => $m[1]

     294                          ];

     295   

     296                          $pos += strlen($m[0]);

     297                      }

     298                      elseif (preg_match('#\\(\\?>#iA', $regexp, $m, 0, $pos))

     299                      {

     300                          /* This is a non-capturing subpattern with atomic grouping "(?>x+)" */

     301                          $tok = [

     302                              'pos'     => $pos,

     303                              'len'     => strlen($m[0]),

     304                              'type'    => 'nonCapturingSubpatternStart',

     305                              'subtype' => 'atomic'

     306                          ];

     307   

     308                          $pos += strlen($m[0]);

     309                      }

     310                      elseif (preg_match('#\\(\\?(<?[!=])#A', $regexp, $m, 0, $pos))

     311                      {

     312                          // This is an assertion

     313                          $assertions = [

     314                              '='  => 'lookahead',

     315                              '<=' => 'lookbehind',

     316                              '!'  => 'negativeLookahead',

     317                              '<!' => 'negativeLookbehind'

     318                          ];

     319   

     320                          $tok = [

     321                              'pos'     => $pos,

     322                              'len'     => strlen($m[0]),

     323                              'type'    => $assertions[$m[1]] . 'AssertionStart'

     324                          ];

     325   

     326                          $pos += strlen($m[0]);

     327                      }

     328                      elseif (preg_match('#\\(\\?#A', $regexp, $m, 0, $pos))

     329                      {

     330                          throw new RuntimeException('Unsupported subpattern type at pos ' . $pos);

     331                      }

     332                      else

     333                      {

     334                          // This should be a normal capture

     335                          $tok = [

     336                              'pos'  => $pos,

     337                              'len'  => 1,

     338                              'type' => 'capturingSubpatternStart'

     339                          ];

     340   

     341                          ++$pos;

     342                      }

     343   

     344                      $openSubpatterns[] = count($ret['tokens']);

     345                      $ret['tokens'][] = $tok;

     346                      break;

     347   

     348                  case ')':

     349                      if (empty($openSubpatterns))

     350                      {

     351                          throw new RuntimeException('Could not find matching pattern start for right parenthesis at pos ' . $pos);

     352                      }

     353   

     354                      // Add the key to this token to its matching token and capture this subpattern's

     355                      // content

     356                      $k = array_pop($openSubpatterns);

     357                      $startToken =& $ret['tokens'][$k];

     358                      $startToken['endToken'] = count($ret['tokens']);

     359                      $startToken['content']  = substr(

     360                          $regexp,

     361                          $startToken['pos'] + $startToken['len'],

     362                          $pos - ($startToken['pos'] + $startToken['len'])

     363                      );

     364   

     365                      // Look for quantifiers after the subpattern, e.g. (?:ab)++

     366                      $spn = strspn($regexp, '+*?', 1 + $pos);

     367                      $quantifiers = substr($regexp, 1 + $pos, $spn);

     368   

     369                      $ret['tokens'][] = [

     370                          'pos'  => $pos,

     371                          'len'  => 1 + $spn,

     372                          'type' => substr($startToken['type'], 0, -5) . 'End',

     373                          'quantifiers' => $quantifiers

     374                      ];

     375   

     376                      unset($startToken);

     377   

     378                      $pos += 1 + $spn;

     379                      break;

     380   

     381                  default:

     382                      ++$pos;

     383              }

     384          }

     385   

     386          if (!empty($openSubpatterns))

     387          {

     388              throw new RuntimeException('Could not find matching pattern end for left parenthesis at pos ' . $ret['tokens'][$openSubpatterns[0]]['pos']);

     389          }

     390   

     391          return $ret;

     392      }

     393  }