wimCMS • Blick zurück von phpBB 1.0.0 bis heute

Verzeichnisstruktur phpBB-3.3.15

Veröffentlicht: 28.08.2024

So funktioniert es

Verzeichnis-Info phpBB-1.0.0 / auth.php	Auf das letzte Element klicken. Dies geht jeweils ein Schritt zurück
admin	Auf das Icon klicken, dies öffnet das Verzeichnis. Nochmal klicken schließt das Verzeichnis. Auf den Verzeichnisnamen klicken, dies zeigt nur das Verzeichnis mit Inhalt an
(Beispiel Datei-Icons)	Auf das Icon klicken um den Quellcode anzuzeigen

UrlFilter.php

Zuletzt modifiziert: 02.04.2025, 15:04 - Dateigröße: 7.00 KiB


     001  <?php

     002   

     003  /**

     004  * @package   s9e\TextFormatter

     005  * @copyright Copyright (c) 2010-2022 The s9e authors

     006  * @license   http://www.opensource.org/licenses/mit-license.php The MIT License

     007  */

     008  namespace s9e\TextFormatter\Parser\AttributeFilters;

     009   

     010  use s9e\TextFormatter\Parser\Logger;

     011   

     012  class UrlFilter

     013  {

     014      /**

     015      * Filter a URL

     016      *

     017      * @param  mixed  $attrValue Original URL

     018      * @param  array  $urlConfig URL config

     019      * @param  Logger $logger    Parser's logger

     020      * @return mixed             Cleaned up URL if valid, FALSE otherwise

     021      */

     022      public static function filter($attrValue, array $urlConfig, Logger $logger = null)

     023      {

     024          /**

     025          * Trim the URL to conform with HTML5 then parse it

     026          * @link http://dev.w3.org/html5/spec/links.html#attr-hyperlink-href

     027          */

     028          $p = self::parseUrl(trim($attrValue));

     029   

     030          $error = self::validateUrl($urlConfig, $p);

     031          if (!empty($error))

     032          {

     033              if (isset($logger))

     034              {

     035                  $p['attrValue'] = $attrValue;

     036                  $logger->err($error, $p);

     037              }

     038   

     039              return false;

     040          }

     041   

     042          return self::rebuildUrl($p);

     043      }

     044   

     045      /**

     046      * Parse a URL and return its components

     047      *

     048      * Similar to PHP's own parse_url() except that all parts are always returned

     049      *

     050      * @param  string $url Original URL

     051      * @return array

     052      */

     053      protected static function parseUrl($url)

     054      {

     055          $regexp = '(^(?:([a-z][-+.\\w]*):)?(?://(?:([^:/?#]*)(?::([^/?#]*)?)?@)?(?:(\\[[a-f\\d:]+\\]|[^:/?#]+)(?::(\\d*))?)?(?![^/?#]))?([^?#]*)(\\?[^#]*)?(#.*)?$)Di';

     056   

     057          // NOTE: this regexp always matches because of the last three captures

     058          preg_match($regexp, $url, $m);

     059   

     060          $parts  = [];

     061          $tokens = ['scheme', 'user', 'pass', 'host', 'port', 'path', 'query', 'fragment'];

     062          foreach ($tokens as $i => $name)

     063          {

     064              $parts[$name] = $m[$i + 1] ?? '';

     065          }

     066   

     067          /**

     068          * @link http://tools.ietf.org/html/rfc3986#section-3.1

     069          *

     070          * 'An implementation should accept uppercase letters as equivalent to lowercase in

     071          * scheme names (e.g., allow "HTTP" as well as "http") for the sake of robustness but

     072          * should only produce lowercase scheme names for consistency.'

     073          */

     074          $parts['scheme'] = strtolower($parts['scheme']);

     075   

     076          /**

     077          * Normalize the domain label separators and remove trailing dots

     078          * @link http://url.spec.whatwg.org/#domain-label-separators

     079          */

     080          $parts['host'] = rtrim(preg_replace("/\xE3\x80\x82|\xEF(?:\xBC\x8E|\xBD\xA1)/s", '.', $parts['host']), '.');

     081   

     082          // Test whether host has non-ASCII characters and punycode it if possible

     083          if (preg_match('#[^[:ascii:]]#', $parts['host']) && function_exists('idn_to_ascii'))

     084          {

     085              $variant = (defined('INTL_IDNA_VARIANT_UTS46')) ? INTL_IDNA_VARIANT_UTS46 : 0;

     086              $parts['host'] = idn_to_ascii($parts['host'], 0, $variant);

     087          }

     088   

     089          return $parts;

     090      }

     091   

     092      /**

     093      * Rebuild a parsed URL

     094      *

     095      * @param  array  $p Parsed URL

     096      * @return string

     097      */

     098      protected static function rebuildUrl(array $p)

     099      {

     100          $url = '';

     101          if ($p['scheme'] !== '')

     102          {

     103              $url .= $p['scheme'] . ':';

     104          }

     105          if ($p['host'] !== '')

     106          {

     107              $url .= '//';

     108   

     109              // Add the credentials if applicable

     110              if ($p['user'] !== '')

     111              {

     112                  // Reencode the credentials in case there are invalid chars in them, or suspicious

     113                  // characters such as : or @ that could confuse a browser into connecting to the

     114                  // wrong host (or at least, to a host that is different than the one we thought)

     115                  $url .= rawurlencode(urldecode($p['user']));

     116   

     117                  if ($p['pass'] !== '')

     118                  {

     119                      $url .= ':' . rawurlencode(urldecode($p['pass']));

     120                  }

     121   

     122                  $url .= '@';

     123              }

     124   

     125              $url .= $p['host'];

     126   

     127              // Append the port number (note that as per the regexp it can only contain digits)

     128              if ($p['port'] !== '')

     129              {

     130                  $url .= ':' . $p['port'];

     131              }

     132          }

     133          elseif ($p['scheme'] === 'file')

     134          {

     135              // Allow the file: scheme to not have a host and ensure it starts with slashes

     136              $url .= '//';

     137          }

     138   

     139          // Build the path, including the query and fragment parts

     140          $path = $p['path'] . $p['query'] . $p['fragment'];

     141   

     142          /**

     143          * "For consistency, URI producers and normalizers should use uppercase hexadecimal digits

     144          * for all percent- encodings."

     145          *

     146          * @link http://tools.ietf.org/html/rfc3986#section-2.1

     147          */

     148          $path = preg_replace_callback(

     149              '/%.?[a-f]/',

     150              function ($m)

     151              {

     152                  return strtoupper($m[0]);

     153              },

     154              $path

     155          );

     156   

     157          // Append the sanitized path to the URL

     158          $url .= self::sanitizeUrl($path);

     159   

     160          // Replace the first colon if there's no scheme and it could potentially be interpreted as

     161          // the scheme separator

     162          if (!$p['scheme'])

     163          {

     164              $url = preg_replace('#^([^/]*):#', '$1%3A', $url);

     165          }

     166   

     167          return $url;

     168      }

     169   

     170      /**

     171      * Sanitize a URL for safe use regardless of context

     172      *

     173      * This method URL-encodes some sensitive characters in case someone would want to use the URL in

     174      * some JavaScript thingy, or in CSS. We also encode characters that are not allowed in the path

     175      * of a URL as defined in RFC 3986 appendix A, including percent signs that are not immediately

     176      * followed by two hex digits.

     177      *

     178      * " and ' to prevent breaking out of quotes (JavaScript or otherwise)

     179      * ( and ) to prevent the use of functions in JavaScript (eval()) or CSS (expression())

     180      * < and > to prevent breaking out of <script>

     181      * \r and \n because they're illegal in JavaScript

     182      * [ and ] because the W3 validator rejects them and they "should" be escaped as per RFC 3986

     183      * Non-ASCII characters as per RFC 3986

     184      * Control codes and spaces, as per RFC 3986

     185      *

     186      * @link http://sla.ckers.org/forum/read.php?2,51478

     187      * @link http://timelessrepo.com/json-isnt-a-javascript-subset

     188      * @link http://www.ietf.org/rfc/rfc3986.txt

     189      * @link http://stackoverflow.com/a/1547922

     190      * @link http://tools.ietf.org/html/rfc3986#appendix-A

     191      *

     192      * @param  string $url Original URL

     193      * @return string      Sanitized URL

     194      */

     195      public static function sanitizeUrl($url)

     196      {

     197          return preg_replace_callback(

     198              '/%(?![0-9A-Fa-f]{2})|[^!#-&*-;=?-Z_a-z~]/',

     199              function ($m)

     200              {

     201                  return rawurlencode($m[0]);

     202              },

     203              $url

     204          );

     205      }

     206   

     207      /**

     208      * Validate a parsed URL

     209      *

     210      * @param  array      $urlConfig URL config

     211      * @param  array      $p         Parsed URL

     212      * @return string|null           Error message if invalid, or NULL

     213      */

     214      protected static function validateUrl(array $urlConfig, array $p)

     215      {

     216          if ($p['scheme'] !== '' && !preg_match($urlConfig['allowedSchemes'], $p['scheme']))

     217          {

     218              return 'URL scheme is not allowed';

     219          }

     220   

     221          if ($p['host'] !== '')

     222          {

     223              /**

     224              * Test whether the host is valid

     225              * @link http://tools.ietf.org/html/rfc1035#section-2.3.1

     226              * @link http://tools.ietf.org/html/rfc1123#section-2

     227              */

     228              $regexp = '/^(?!-)[-a-z0-9]{0,62}[a-z0-9](?:\\.(?!-)[-a-z0-9]{0,62}[a-z0-9])*$/i';

     229              if (!preg_match($regexp, $p['host']))

     230              {

     231                  // If the host invalid, retest as an IPv4 and IPv6 address (IPv6 in brackets)

     232                  if (!NetworkFilter::filterIpv4($p['host'])

     233                   && !NetworkFilter::filterIpv6(preg_replace('/^\\[(.*)\\]$/', '$1', $p['host'])))

     234                  {

     235                      return 'URL host is invalid';

     236                  }

     237              }

     238   

     239              if ((isset($urlConfig['disallowedHosts']) && preg_match($urlConfig['disallowedHosts'], $p['host']))

     240               || (isset($urlConfig['restrictedHosts']) && !preg_match($urlConfig['restrictedHosts'], $p['host'])))

     241              {

     242                  return 'URL host is not allowed';

     243              }

     244          }

     245          elseif (preg_match('(^(?:(?:f|ht)tps?)$)', $p['scheme']))

     246          {

     247              return 'Missing host';

     248          }

     249      }

     250  }

Verzeichnisstruktur phpBB-3.3.15

Zuletzt modifiziert: 02.04.2025, 15:04 - Dateigröße: 7.00 KiB

UrlFilter.php


     001  <?php

     002   

     003  /**

     004  * @package   s9e\TextFormatter

     005  * @copyright Copyright (c) 2010-2022 The s9e authors

     006  * @license   http://www.opensource.org/licenses/mit-license.php The MIT License

     007  */

     008  namespace s9e\TextFormatter\Parser\AttributeFilters;

     009   

     010  use s9e\TextFormatter\Parser\Logger;

     011   

     012  class UrlFilter

     013  {

     014      /**

     015      * Filter a URL

     016      *

     017      * @param  mixed  $attrValue Original URL

     018      * @param  array  $urlConfig URL config

     019      * @param  Logger $logger    Parser's logger

     020      * @return mixed             Cleaned up URL if valid, FALSE otherwise

     021      */

     022      public static function filter($attrValue, array $urlConfig, Logger $logger = null)

     023      {

     024          /**

     025          * Trim the URL to conform with HTML5 then parse it

     026          * @link http://dev.w3.org/html5/spec/links.html#attr-hyperlink-href

     027          */

     028          $p = self::parseUrl(trim($attrValue));

     029   

     030          $error = self::validateUrl($urlConfig, $p);

     031          if (!empty($error))

     032          {

     033              if (isset($logger))

     034              {

     035                  $p['attrValue'] = $attrValue;

     036                  $logger->err($error, $p);

     037              }

     038   

     039              return false;

     040          }

     041   

     042          return self::rebuildUrl($p);

     043      }

     044   

     045      /**

     046      * Parse a URL and return its components

     047      *

     048      * Similar to PHP's own parse_url() except that all parts are always returned

     049      *

     050      * @param  string $url Original URL

     051      * @return array

     052      */

     053      protected static function parseUrl($url)

     054      {

     055          $regexp = '(^(?:([a-z][-+.\\w]*):)?(?://(?:([^:/?#]*)(?::([^/?#]*)?)?@)?(?:(\\[[a-f\\d:]+\\]|[^:/?#]+)(?::(\\d*))?)?(?![^/?#]))?([^?#]*)(\\?[^#]*)?(#.*)?$)Di';

     056   

     057          // NOTE: this regexp always matches because of the last three captures

     058          preg_match($regexp, $url, $m);

     059   

     060          $parts  = [];

     061          $tokens = ['scheme', 'user', 'pass', 'host', 'port', 'path', 'query', 'fragment'];

     062          foreach ($tokens as $i => $name)

     063          {

     064              $parts[$name] = $m[$i + 1] ?? '';

     065          }

     066   

     067          /**

     068          * @link http://tools.ietf.org/html/rfc3986#section-3.1

     069          *

     070          * 'An implementation should accept uppercase letters as equivalent to lowercase in

     071          * scheme names (e.g., allow "HTTP" as well as "http") for the sake of robustness but

     072          * should only produce lowercase scheme names for consistency.'

     073          */

     074          $parts['scheme'] = strtolower($parts['scheme']);

     075   

     076          /**

     077          * Normalize the domain label separators and remove trailing dots

     078          * @link http://url.spec.whatwg.org/#domain-label-separators

     079          */

     080          $parts['host'] = rtrim(preg_replace("/\xE3\x80\x82|\xEF(?:\xBC\x8E|\xBD\xA1)/s", '.', $parts['host']), '.');

     081   

     082          // Test whether host has non-ASCII characters and punycode it if possible

     083          if (preg_match('#[^[:ascii:]]#', $parts['host']) && function_exists('idn_to_ascii'))

     084          {

     085              $variant = (defined('INTL_IDNA_VARIANT_UTS46')) ? INTL_IDNA_VARIANT_UTS46 : 0;

     086              $parts['host'] = idn_to_ascii($parts['host'], 0, $variant);

     087          }

     088   

     089          return $parts;

     090      }

     091   

     092      /**

     093      * Rebuild a parsed URL

     094      *

     095      * @param  array  $p Parsed URL

     096      * @return string

     097      */

     098      protected static function rebuildUrl(array $p)

     099      {

     100          $url = '';

     101          if ($p['scheme'] !== '')

     102          {

     103              $url .= $p['scheme'] . ':';

     104          }

     105          if ($p['host'] !== '')

     106          {

     107              $url .= '//';

     108   

     109              // Add the credentials if applicable

     110              if ($p['user'] !== '')

     111              {

     112                  // Reencode the credentials in case there are invalid chars in them, or suspicious

     113                  // characters such as : or @ that could confuse a browser into connecting to the

     114                  // wrong host (or at least, to a host that is different than the one we thought)

     115                  $url .= rawurlencode(urldecode($p['user']));

     116   

     117                  if ($p['pass'] !== '')

     118                  {

     119                      $url .= ':' . rawurlencode(urldecode($p['pass']));

     120                  }

     121   

     122                  $url .= '@';

     123              }

     124   

     125              $url .= $p['host'];

     126   

     127              // Append the port number (note that as per the regexp it can only contain digits)

     128              if ($p['port'] !== '')

     129              {

     130                  $url .= ':' . $p['port'];

     131              }

     132          }

     133          elseif ($p['scheme'] === 'file')

     134          {

     135              // Allow the file: scheme to not have a host and ensure it starts with slashes

     136              $url .= '//';

     137          }

     138   

     139          // Build the path, including the query and fragment parts

     140          $path = $p['path'] . $p['query'] . $p['fragment'];

     141   

     142          /**

     143          * "For consistency, URI producers and normalizers should use uppercase hexadecimal digits

     144          * for all percent- encodings."

     145          *

     146          * @link http://tools.ietf.org/html/rfc3986#section-2.1

     147          */

     148          $path = preg_replace_callback(

     149              '/%.?[a-f]/',

     150              function ($m)

     151              {

     152                  return strtoupper($m[0]);

     153              },

     154              $path

     155          );

     156   

     157          // Append the sanitized path to the URL

     158          $url .= self::sanitizeUrl($path);

     159   

     160          // Replace the first colon if there's no scheme and it could potentially be interpreted as

     161          // the scheme separator

     162          if (!$p['scheme'])

     163          {

     164              $url = preg_replace('#^([^/]*):#', '$1%3A', $url);

     165          }

     166   

     167          return $url;

     168      }

     169   

     170      /**

     171      * Sanitize a URL for safe use regardless of context

     172      *

     173      * This method URL-encodes some sensitive characters in case someone would want to use the URL in

     174      * some JavaScript thingy, or in CSS. We also encode characters that are not allowed in the path

     175      * of a URL as defined in RFC 3986 appendix A, including percent signs that are not immediately

     176      * followed by two hex digits.

     177      *

     178      * " and ' to prevent breaking out of quotes (JavaScript or otherwise)

     179      * ( and ) to prevent the use of functions in JavaScript (eval()) or CSS (expression())

     180      * < and > to prevent breaking out of <script>

     181      * \r and \n because they're illegal in JavaScript

     182      * [ and ] because the W3 validator rejects them and they "should" be escaped as per RFC 3986

     183      * Non-ASCII characters as per RFC 3986

     184      * Control codes and spaces, as per RFC 3986

     185      *

     186      * @link http://sla.ckers.org/forum/read.php?2,51478

     187      * @link http://timelessrepo.com/json-isnt-a-javascript-subset

     188      * @link http://www.ietf.org/rfc/rfc3986.txt

     189      * @link http://stackoverflow.com/a/1547922

     190      * @link http://tools.ietf.org/html/rfc3986#appendix-A

     191      *

     192      * @param  string $url Original URL

     193      * @return string      Sanitized URL

     194      */

     195      public static function sanitizeUrl($url)

     196      {

     197          return preg_replace_callback(

     198              '/%(?![0-9A-Fa-f]{2})|[^!#-&*-;=?-Z_a-z~]/',

     199              function ($m)

     200              {

     201                  return rawurlencode($m[0]);

     202              },

     203              $url

     204          );

     205      }

     206   

     207      /**

     208      * Validate a parsed URL

     209      *

     210      * @param  array      $urlConfig URL config

     211      * @param  array      $p         Parsed URL

     212      * @return string|null           Error message if invalid, or NULL

     213      */

     214      protected static function validateUrl(array $urlConfig, array $p)

     215      {

     216          if ($p['scheme'] !== '' && !preg_match($urlConfig['allowedSchemes'], $p['scheme']))

     217          {

     218              return 'URL scheme is not allowed';

     219          }

     220   

     221          if ($p['host'] !== '')

     222          {

     223              /**

     224              * Test whether the host is valid

     225              * @link http://tools.ietf.org/html/rfc1035#section-2.3.1

     226              * @link http://tools.ietf.org/html/rfc1123#section-2

     227              */

     228              $regexp = '/^(?!-)[-a-z0-9]{0,62}[a-z0-9](?:\\.(?!-)[-a-z0-9]{0,62}[a-z0-9])*$/i';

     229              if (!preg_match($regexp, $p['host']))

     230              {

     231                  // If the host invalid, retest as an IPv4 and IPv6 address (IPv6 in brackets)

     232                  if (!NetworkFilter::filterIpv4($p['host'])

     233                   && !NetworkFilter::filterIpv6(preg_replace('/^\\[(.*)\\]$/', '$1', $p['host'])))

     234                  {

     235                      return 'URL host is invalid';

     236                  }

     237              }

     238   

     239              if ((isset($urlConfig['disallowedHosts']) && preg_match($urlConfig['disallowedHosts'], $p['host']))

     240               || (isset($urlConfig['restrictedHosts']) && !preg_match($urlConfig['restrictedHosts'], $p['host'])))

     241              {

     242                  return 'URL host is not allowed';

     243              }

     244          }

     245          elseif (preg_match('(^(?:(?:f|ht)tps?)$)', $p['scheme']))

     246          {

     247              return 'Missing host';

     248          }

     249      }

     250  }