Verzeichnisstruktur phpBB-3.3.15
- Veröffentlicht
- 28.08.2024
So funktioniert es
|
Auf das letzte Element klicken. Dies geht jeweils ein Schritt zurück |
Auf das Icon klicken, dies öffnet das Verzeichnis. Nochmal klicken schließt das Verzeichnis. |
|
(Beispiel Datei-Icons)
|
Auf das Icon klicken um den Quellcode anzuzeigen |
UrlFilter.php
001 <?php
002
003 /**
004 * @package s9e\TextFormatter
005 * @copyright Copyright (c) 2010-2022 The s9e authors
006 * @license http://www.opensource.org/licenses/mit-license.php The MIT License
007 */
008 namespace s9e\TextFormatter\Parser\AttributeFilters;
009
010 use s9e\TextFormatter\Parser\Logger;
011
012 class UrlFilter
013 {
014 /**
015 * Filter a URL
016 *
017 * @param mixed $attrValue Original URL
018 * @param array $urlConfig URL config
019 * @param Logger $logger Parser's logger
020 * @return mixed Cleaned up URL if valid, FALSE otherwise
021 */
022 public static function filter($attrValue, array $urlConfig, Logger $logger = null)
023 {
024 /**
025 * Trim the URL to conform with HTML5 then parse it
026 * @link http://dev.w3.org/html5/spec/links.html#attr-hyperlink-href
027 */
028 $p = self::parseUrl(trim($attrValue));
029
030 $error = self::validateUrl($urlConfig, $p);
031 if (!empty($error))
032 {
033 if (isset($logger))
034 {
035 $p['attrValue'] = $attrValue;
036 $logger->err($error, $p);
037 }
038
039 return false;
040 }
041
042 return self::rebuildUrl($p);
043 }
044
045 /**
046 * Parse a URL and return its components
047 *
048 * Similar to PHP's own parse_url() except that all parts are always returned
049 *
050 * @param string $url Original URL
051 * @return array
052 */
053 protected static function parseUrl($url)
054 {
055 $regexp = '(^(?:([a-z][-+.\\w]*):)?(?://(?:([^:/?#]*)(?::([^/?#]*)?)?@)?(?:(\\[[a-f\\d:]+\\]|[^:/?#]+)(?::(\\d*))?)?(?![^/?#]))?([^?#]*)(\\?[^#]*)?(#.*)?$)Di';
056
057 // NOTE: this regexp always matches because of the last three captures
058 preg_match($regexp, $url, $m);
059
060 $parts = [];
061 $tokens = ['scheme', 'user', 'pass', 'host', 'port', 'path', 'query', 'fragment'];
062 foreach ($tokens as $i => $name)
063 {
064 $parts[$name] = $m[$i + 1] ?? '';
065 }
066
067 /**
068 * @link http://tools.ietf.org/html/rfc3986#section-3.1
069 *
070 * 'An implementation should accept uppercase letters as equivalent to lowercase in
071 * scheme names (e.g., allow "HTTP" as well as "http") for the sake of robustness but
072 * should only produce lowercase scheme names for consistency.'
073 */
074 $parts['scheme'] = strtolower($parts['scheme']);
075
076 /**
077 * Normalize the domain label separators and remove trailing dots
078 * @link http://url.spec.whatwg.org/#domain-label-separators
079 */
080 $parts['host'] = rtrim(preg_replace("/\xE3\x80\x82|\xEF(?:\xBC\x8E|\xBD\xA1)/s", '.', $parts['host']), '.');
081
082 // Test whether host has non-ASCII characters and punycode it if possible
083 if (preg_match('#[^[:ascii:]]#', $parts['host']) && function_exists('idn_to_ascii'))
084 {
085 $variant = (defined('INTL_IDNA_VARIANT_UTS46')) ? INTL_IDNA_VARIANT_UTS46 : 0;
086 $parts['host'] = idn_to_ascii($parts['host'], 0, $variant);
087 }
088
089 return $parts;
090 }
091
092 /**
093 * Rebuild a parsed URL
094 *
095 * @param array $p Parsed URL
096 * @return string
097 */
098 protected static function rebuildUrl(array $p)
099 {
100 $url = '';
101 if ($p['scheme'] !== '')
102 {
103 $url .= $p['scheme'] . ':';
104 }
105 if ($p['host'] !== '')
106 {
107 $url .= '//';
108
109 // Add the credentials if applicable
110 if ($p['user'] !== '')
111 {
112 // Reencode the credentials in case there are invalid chars in them, or suspicious
113 // characters such as : or @ that could confuse a browser into connecting to the
114 // wrong host (or at least, to a host that is different than the one we thought)
115 $url .= rawurlencode(urldecode($p['user']));
116
117 if ($p['pass'] !== '')
118 {
119 $url .= ':' . rawurlencode(urldecode($p['pass']));
120 }
121
122 $url .= '@';
123 }
124
125 $url .= $p['host'];
126
127 // Append the port number (note that as per the regexp it can only contain digits)
128 if ($p['port'] !== '')
129 {
130 $url .= ':' . $p['port'];
131 }
132 }
133 elseif ($p['scheme'] === 'file')
134 {
135 // Allow the file: scheme to not have a host and ensure it starts with slashes
136 $url .= '//';
137 }
138
139 // Build the path, including the query and fragment parts
140 $path = $p['path'] . $p['query'] . $p['fragment'];
141
142 /**
143 * "For consistency, URI producers and normalizers should use uppercase hexadecimal digits
144 * for all percent- encodings."
145 *
146 * @link http://tools.ietf.org/html/rfc3986#section-2.1
147 */
148 $path = preg_replace_callback(
149 '/%.?[a-f]/',
150 function ($m)
151 {
152 return strtoupper($m[0]);
153 },
154 $path
155 );
156
157 // Append the sanitized path to the URL
158 $url .= self::sanitizeUrl($path);
159
160 // Replace the first colon if there's no scheme and it could potentially be interpreted as
161 // the scheme separator
162 if (!$p['scheme'])
163 {
164 $url = preg_replace('#^([^/]*):#', '$1%3A', $url);
165 }
166
167 return $url;
168 }
169
170 /**
171 * Sanitize a URL for safe use regardless of context
172 *
173 * This method URL-encodes some sensitive characters in case someone would want to use the URL in
174 * some JavaScript thingy, or in CSS. We also encode characters that are not allowed in the path
175 * of a URL as defined in RFC 3986 appendix A, including percent signs that are not immediately
176 * followed by two hex digits.
177 *
178 * " and ' to prevent breaking out of quotes (JavaScript or otherwise)
179 * ( and ) to prevent the use of functions in JavaScript (eval()) or CSS (expression())
180 * < and > to prevent breaking out of <script>
181 * \r and \n because they're illegal in JavaScript
182 * [ and ] because the W3 validator rejects them and they "should" be escaped as per RFC 3986
183 * Non-ASCII characters as per RFC 3986
184 * Control codes and spaces, as per RFC 3986
185 *
186 * @link http://sla.ckers.org/forum/read.php?2,51478
187 * @link http://timelessrepo.com/json-isnt-a-javascript-subset
188 * @link http://www.ietf.org/rfc/rfc3986.txt
189 * @link http://stackoverflow.com/a/1547922
190 * @link http://tools.ietf.org/html/rfc3986#appendix-A
191 *
192 * @param string $url Original URL
193 * @return string Sanitized URL
194 */
195 public static function sanitizeUrl($url)
196 {
197 return preg_replace_callback(
198 '/%(?![0-9A-Fa-f]{2})|[^!#-&*-;=?-Z_a-z~]/',
199 function ($m)
200 {
201 return rawurlencode($m[0]);
202 },
203 $url
204 );
205 }
206
207 /**
208 * Validate a parsed URL
209 *
210 * @param array $urlConfig URL config
211 * @param array $p Parsed URL
212 * @return string|null Error message if invalid, or NULL
213 */
214 protected static function validateUrl(array $urlConfig, array $p)
215 {
216 if ($p['scheme'] !== '' && !preg_match($urlConfig['allowedSchemes'], $p['scheme']))
217 {
218 return 'URL scheme is not allowed';
219 }
220
221 if ($p['host'] !== '')
222 {
223 /**
224 * Test whether the host is valid
225 * @link http://tools.ietf.org/html/rfc1035#section-2.3.1
226 * @link http://tools.ietf.org/html/rfc1123#section-2
227 */
228 $regexp = '/^(?!-)[-a-z0-9]{0,62}[a-z0-9](?:\\.(?!-)[-a-z0-9]{0,62}[a-z0-9])*$/i';
229 if (!preg_match($regexp, $p['host']))
230 {
231 // If the host invalid, retest as an IPv4 and IPv6 address (IPv6 in brackets)
232 if (!NetworkFilter::filterIpv4($p['host'])
233 && !NetworkFilter::filterIpv6(preg_replace('/^\\[(.*)\\]$/', '$1', $p['host'])))
234 {
235 return 'URL host is invalid';
236 }
237 }
238
239 if ((isset($urlConfig['disallowedHosts']) && preg_match($urlConfig['disallowedHosts'], $p['host']))
240 || (isset($urlConfig['restrictedHosts']) && !preg_match($urlConfig['restrictedHosts'], $p['host'])))
241 {
242 return 'URL host is not allowed';
243 }
244 }
245 elseif (preg_match('(^(?:(?:f|ht)tps?)$)', $p['scheme']))
246 {
247 return 'Missing host';
248 }
249 }
250 }