Verzeichnisstruktur phpBB-3.3.16
- Veröffentlicht
- 27.04.2026
So funktioniert es
|
|
Auf das letzte Element klicken. Dies geht jeweils ein Schritt zurück |
Auf das Icon klicken, dies öffnet das Verzeichnis. Nochmal klicken schließt das Verzeichnis. |
|
|
(Beispiel Datei-Icons)
|
Auf das Icon klicken um den Quellcode anzuzeigen |
Idn.php
001 <?php
002
003 /*
004 * This file is part of the Symfony package.
005 *
006 * (c) Fabien Potencier <fabien@symfony.com> and Trevor Rowbotham <trevor.rowbotham@pm.me>
007 *
008 * For the full copyright and license information, please view the LICENSE
009 * file that was distributed with this source code.
010 */
011
012 namespace Symfony\Polyfill\Intl\Idn;
013
014 use Symfony\Polyfill\Intl\Idn\Resources\unidata\DisallowedRanges;
015 use Symfony\Polyfill\Intl\Idn\Resources\unidata\Regex;
016
017 /**
018 * @see https://www.unicode.org/reports/tr46/
019 *
020 * @internal
021 */
022 final class Idn
023 {
024 public const ERROR_EMPTY_LABEL = 1;
025 public const ERROR_LABEL_TOO_LONG = 2;
026 public const ERROR_DOMAIN_NAME_TOO_LONG = 4;
027 public const ERROR_LEADING_HYPHEN = 8;
028 public const ERROR_TRAILING_HYPHEN = 0x10;
029 public const ERROR_HYPHEN_3_4 = 0x20;
030 public const ERROR_LEADING_COMBINING_MARK = 0x40;
031 public const ERROR_DISALLOWED = 0x80;
032 public const ERROR_PUNYCODE = 0x100;
033 public const ERROR_LABEL_HAS_DOT = 0x200;
034 public const ERROR_INVALID_ACE_LABEL = 0x400;
035 public const ERROR_BIDI = 0x800;
036 public const ERROR_CONTEXTJ = 0x1000;
037 public const ERROR_CONTEXTO_PUNCTUATION = 0x2000;
038 public const ERROR_CONTEXTO_DIGITS = 0x4000;
039
040 public const INTL_IDNA_VARIANT_2003 = 0;
041 public const INTL_IDNA_VARIANT_UTS46 = 1;
042
043 public const IDNA_DEFAULT = 0;
044 public const IDNA_ALLOW_UNASSIGNED = 1;
045 public const IDNA_USE_STD3_RULES = 2;
046 public const IDNA_CHECK_BIDI = 4;
047 public const IDNA_CHECK_CONTEXTJ = 8;
048 public const IDNA_NONTRANSITIONAL_TO_ASCII = 16;
049 public const IDNA_NONTRANSITIONAL_TO_UNICODE = 32;
050
051 public const MAX_DOMAIN_SIZE = 253;
052 public const MAX_LABEL_SIZE = 63;
053
054 public const BASE = 36;
055 public const TMIN = 1;
056 public const TMAX = 26;
057 public const SKEW = 38;
058 public const DAMP = 700;
059 public const INITIAL_BIAS = 72;
060 public const INITIAL_N = 128;
061 public const DELIMITER = '-';
062 public const MAX_INT = 2147483647;
063
064 /**
065 * Contains the numeric value of a basic code point (for use in representing integers) in the
066 * range 0 to BASE-1, or -1 if b is does not represent a value.
067 *
068 * @var array<int, int>
069 */
070 private static $basicToDigit = [
071 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
072 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
073
074 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
075 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,
076
077 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
078 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
079
080 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
081 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
082
083 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
084 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
085
086 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
087 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
088
089 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
090 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
091
092 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
093 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
094 ];
095
096 /**
097 * @var array<int, int>
098 */
099 private static $virama;
100
101 /**
102 * @var array<int, string>
103 */
104 private static $mapped;
105
106 /**
107 * @var array<int, bool>
108 */
109 private static $ignored;
110
111 /**
112 * @var array<int, string>
113 */
114 private static $deviation;
115
116 /**
117 * @var array<int, bool>
118 */
119 private static $disallowed;
120
121 /**
122 * @var array<int, string>
123 */
124 private static $disallowed_STD3_mapped;
125
126 /**
127 * @var array<int, bool>
128 */
129 private static $disallowed_STD3_valid;
130
131 /**
132 * @var bool
133 */
134 private static $mappingTableLoaded = false;
135
136 /**
137 * @see https://www.unicode.org/reports/tr46/#ToASCII
138 *
139 * @param string $domainName
140 * @param int $options
141 * @param int $variant
142 * @param array $idna_info
143 *
144 * @return string|false
145 */
146 public static function idn_to_ascii($domainName, $options = self::IDNA_DEFAULT, $variant = self::INTL_IDNA_VARIANT_UTS46, &$idna_info = [])
147 {
148 if (\PHP_VERSION_ID > 80400 && '' === $domainName) {
149 throw new \ValueError('idn_to_ascii(): Argument #1 ($domain) cannot be empty');
150 }
151
152 if (self::INTL_IDNA_VARIANT_2003 === $variant) {
153 @trigger_error('idn_to_ascii(): INTL_IDNA_VARIANT_2003 is deprecated', \E_USER_DEPRECATED);
154 }
155
156 $options = [
157 'CheckHyphens' => true,
158 'CheckBidi' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 !== ($options & self::IDNA_CHECK_BIDI),
159 'CheckJoiners' => self::INTL_IDNA_VARIANT_UTS46 === $variant && 0 !== ($options & self::IDNA_CHECK_CONTEXTJ),
160 'UseSTD3ASCIIRules' => 0 !== ($options & self::IDNA_USE_STD3_RULES),
161 'Transitional_Processing' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 === ($options & self::IDNA_NONTRANSITIONAL_TO_ASCII),
162 'VerifyDnsLength' => true,
163 ];
164 $info = new Info();
165 $labels = self::process((string) $domainName, $options, $info);
166
167 foreach ($labels as $i => $label) {
168 // Only convert labels to punycode that contain non-ASCII code points
169 if (1 === preg_match('/[^\x00-\x7F]/', $label)) {
170 try {
171 $label = 'xn--'.self::punycodeEncode($label);
172 } catch (\Exception $e) {
173 $info->errors |= self::ERROR_PUNYCODE;
174 }
175
176 $labels[$i] = $label;
177 }
178 }
179
180 if ($options['VerifyDnsLength']) {
181 self::validateDomainAndLabelLength($labels, $info);
182 }
183
184 $idna_info = [
185 'result' => implode('.', $labels),
186 'isTransitionalDifferent' => $info->transitionalDifferent,
187 'errors' => $info->errors,
188 ];
189
190 return 0 === $info->errors ? $idna_info['result'] : false;
191 }
192
193 /**
194 * @see https://www.unicode.org/reports/tr46/#ToUnicode
195 *
196 * @param string $domainName
197 * @param int $options
198 * @param int $variant
199 * @param array $idna_info
200 *
201 * @return string|false
202 */
203 public static function idn_to_utf8($domainName, $options = self::IDNA_DEFAULT, $variant = self::INTL_IDNA_VARIANT_UTS46, &$idna_info = [])
204 {
205 if (\PHP_VERSION_ID > 80400 && '' === $domainName) {
206 throw new \ValueError('idn_to_utf8(): Argument #1 ($domain) cannot be empty');
207 }
208
209 if (self::INTL_IDNA_VARIANT_2003 === $variant) {
210 @trigger_error('idn_to_utf8(): INTL_IDNA_VARIANT_2003 is deprecated', \E_USER_DEPRECATED);
211 }
212
213 $info = new Info();
214 $labels = self::process((string) $domainName, [
215 'CheckHyphens' => true,
216 'CheckBidi' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 !== ($options & self::IDNA_CHECK_BIDI),
217 'CheckJoiners' => self::INTL_IDNA_VARIANT_UTS46 === $variant && 0 !== ($options & self::IDNA_CHECK_CONTEXTJ),
218 'UseSTD3ASCIIRules' => 0 !== ($options & self::IDNA_USE_STD3_RULES),
219 'Transitional_Processing' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 === ($options & self::IDNA_NONTRANSITIONAL_TO_UNICODE),
220 ], $info);
221 $idna_info = [
222 'result' => implode('.', $labels),
223 'isTransitionalDifferent' => $info->transitionalDifferent,
224 'errors' => $info->errors,
225 ];
226
227 return 0 === $info->errors ? $idna_info['result'] : false;
228 }
229
230 /**
231 * @param string $label
232 *
233 * @return bool
234 */
235 private static function isValidContextJ(array $codePoints, $label)
236 {
237 if (!isset(self::$virama)) {
238 self::$virama = require __DIR__.\DIRECTORY_SEPARATOR.'Resources'.\DIRECTORY_SEPARATOR.'unidata'.\DIRECTORY_SEPARATOR.'virama.php';
239 }
240
241 $offset = 0;
242
243 foreach ($codePoints as $i => $codePoint) {
244 if (0x200C !== $codePoint && 0x200D !== $codePoint) {
245 continue;
246 }
247
248 if (!isset($codePoints[$i - 1])) {
249 return false;
250 }
251
252 // If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True;
253 if (isset(self::$virama[$codePoints[$i - 1]])) {
254 continue;
255 }
256
257 // If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C(Joining_Type:T)*(Joining_Type:{R,D})) Then
258 // True;
259 // Generated RegExp = ([Joining_Type:{L,D}][Joining_Type:T]*\u200C[Joining_Type:T]*)[Joining_Type:{R,D}]
260 if (0x200C === $codePoint && 1 === preg_match(Regex::ZWNJ, $label, $matches, \PREG_OFFSET_CAPTURE, $offset)) {
261 $offset += \strlen($matches[1][0]);
262
263 continue;
264 }
265
266 return false;
267 }
268
269 return true;
270 }
271
272 /**
273 * @see https://www.unicode.org/reports/tr46/#ProcessingStepMap
274 *
275 * @param string $input
276 * @param array<string, bool> $options
277 *
278 * @return string
279 */
280 private static function mapCodePoints($input, array $options, Info $info)
281 {
282 $str = '';
283 $useSTD3ASCIIRules = $options['UseSTD3ASCIIRules'];
284 $transitional = $options['Transitional_Processing'];
285
286 foreach (self::utf8Decode($input) as $codePoint) {
287 $data = self::lookupCodePointStatus($codePoint, $useSTD3ASCIIRules);
288
289 switch ($data['status']) {
290 case 'disallowed':
291 case 'valid':
292 $str .= mb_chr($codePoint, 'utf-8');
293
294 break;
295
296 case 'ignored':
297 // Do nothing.
298 break;
299
300 case 'mapped':
301 $str .= $transitional && 0x1E9E === $codePoint ? 'ss' : $data['mapping'];
302
303 break;
304
305 case 'deviation':
306 $info->transitionalDifferent = true;
307 $str .= ($transitional ? $data['mapping'] : mb_chr($codePoint, 'utf-8'));
308
309 break;
310 }
311 }
312
313 return $str;
314 }
315
316 /**
317 * @see https://www.unicode.org/reports/tr46/#Processing
318 *
319 * @param string $domain
320 * @param array<string, bool> $options
321 *
322 * @return array<int, string>
323 */
324 private static function process($domain, array $options, Info $info)
325 {
326 // If VerifyDnsLength is not set, we are doing ToUnicode otherwise we are doing ToASCII and
327 // we need to respect the VerifyDnsLength option.
328 $checkForEmptyLabels = !isset($options['VerifyDnsLength']) || $options['VerifyDnsLength'];
329
330 if ($checkForEmptyLabels && '' === $domain) {
331 $info->errors |= self::ERROR_EMPTY_LABEL;
332
333 return [$domain];
334 }
335
336 // Step 1. Map each code point in the domain name string
337 $domain = self::mapCodePoints($domain, $options, $info);
338
339 // Step 2. Normalize the domain name string to Unicode Normalization Form C.
340 if (!\Normalizer::isNormalized($domain, \Normalizer::FORM_C)) {
341 $domain = \Normalizer::normalize($domain, \Normalizer::FORM_C);
342 }
343
344 // Step 3. Break the string into labels at U+002E (.) FULL STOP.
345 $labels = explode('.', $domain);
346 $lastLabelIndex = \count($labels) - 1;
347
348 // Step 4. Convert and validate each label in the domain name string.
349 foreach ($labels as $i => $label) {
350 $validationOptions = $options;
351
352 if ('xn--' === substr($label, 0, 4)) {
353 // Step 4.1. If the label contains any non-ASCII code point (i.e., a code point greater than U+007F),
354 // record that there was an error, and continue with the next label.
355 if (preg_match('/[^\x00-\x7F]/', $label)) {
356 $info->errors |= self::ERROR_PUNYCODE;
357
358 continue;
359 }
360
361 // Step 4.2. Attempt to convert the rest of the label to Unicode according to Punycode [RFC3492]. If
362 // that conversion fails, record that there was an error, and continue
363 // with the next label. Otherwise replace the original label in the string by the results of the
364 // conversion.
365 try {
366 $label = self::punycodeDecode(substr($label, 4));
367 } catch (\Exception $e) {
368 $info->errors |= self::ERROR_PUNYCODE;
369
370 continue;
371 }
372
373 $validationOptions['Transitional_Processing'] = false;
374 $labels[$i] = $label;
375 }
376
377 self::validateLabel($label, $info, $validationOptions, $i > 0 && $i === $lastLabelIndex);
378 }
379
380 if ($info->bidiDomain && !$info->validBidiDomain) {
381 $info->errors |= self::ERROR_BIDI;
382 }
383
384 // Any input domain name string that does not record an error has been successfully
385 // processed according to this specification. Conversely, if an input domain_name string
386 // causes an error, then the processing of the input domain_name string fails. Determining
387 // what to do with error input is up to the caller, and not in the scope of this document.
388 return $labels;
389 }
390
391 /**
392 * @see https://tools.ietf.org/html/rfc5893#section-2
393 *
394 * @param string $label
395 */
396 private static function validateBidiLabel($label, Info $info)
397 {
398 if (1 === preg_match(Regex::RTL_LABEL, $label)) {
399 $info->bidiDomain = true;
400
401 // Step 1. The first character must be a character with Bidi property L, R, or AL.
402 // If it has the R or AL property, it is an RTL label
403 if (1 !== preg_match(Regex::BIDI_STEP_1_RTL, $label)) {
404 $info->validBidiDomain = false;
405
406 return;
407 }
408
409 // Step 2. In an RTL label, only characters with the Bidi properties R, AL, AN, EN, ES,
410 // CS, ET, ON, BN, or NSM are allowed.
411 if (1 === preg_match(Regex::BIDI_STEP_2, $label)) {
412 $info->validBidiDomain = false;
413
414 return;
415 }
416
417 // Step 3. In an RTL label, the end of the label must be a character with Bidi property
418 // R, AL, EN, or AN, followed by zero or more characters with Bidi property NSM.
419 if (1 !== preg_match(Regex::BIDI_STEP_3, $label)) {
420 $info->validBidiDomain = false;
421
422 return;
423 }
424
425 // Step 4. In an RTL label, if an EN is present, no AN may be present, and vice versa.
426 if (1 === preg_match(Regex::BIDI_STEP_4_AN, $label) && 1 === preg_match(Regex::BIDI_STEP_4_EN, $label)) {
427 $info->validBidiDomain = false;
428
429 return;
430 }
431
432 return;
433 }
434
435 // We are a LTR label
436 // Step 1. The first character must be a character with Bidi property L, R, or AL.
437 // If it has the L property, it is an LTR label.
438 if (1 !== preg_match(Regex::BIDI_STEP_1_LTR, $label)) {
439 $info->validBidiDomain = false;
440
441 return;
442 }
443
444 // Step 5. In an LTR label, only characters with the Bidi properties L, EN,
445 // ES, CS, ET, ON, BN, or NSM are allowed.
446 if (1 === preg_match(Regex::BIDI_STEP_5, $label)) {
447 $info->validBidiDomain = false;
448
449 return;
450 }
451
452 // Step 6.In an LTR label, the end of the label must be a character with Bidi property L or
453 // EN, followed by zero or more characters with Bidi property NSM.
454 if (1 !== preg_match(Regex::BIDI_STEP_6, $label)) {
455 $info->validBidiDomain = false;
456
457 return;
458 }
459 }
460
461 /**
462 * @param array<int, string> $labels
463 */
464 private static function validateDomainAndLabelLength(array $labels, Info $info)
465 {
466 $maxDomainSize = self::MAX_DOMAIN_SIZE;
467 $length = \count($labels);
468
469 // Number of "." delimiters.
470 $domainLength = $length - 1;
471
472 // If the last label is empty and it is not the first label, then it is the root label.
473 // Increase the max size by 1, making it 254, to account for the root label's "."
474 // delimiter. This also means we don't need to check the last label's length for being too
475 // long.
476 if ($length > 1 && '' === $labels[$length - 1]) {
477 ++$maxDomainSize;
478 --$length;
479 }
480
481 for ($i = 0; $i < $length; ++$i) {
482 $bytes = \strlen($labels[$i]);
483 $domainLength += $bytes;
484
485 if ($bytes > self::MAX_LABEL_SIZE) {
486 $info->errors |= self::ERROR_LABEL_TOO_LONG;
487 }
488 }
489
490 if ($domainLength > $maxDomainSize) {
491 $info->errors |= self::ERROR_DOMAIN_NAME_TOO_LONG;
492 }
493 }
494
495 /**
496 * @see https://www.unicode.org/reports/tr46/#Validity_Criteria
497 *
498 * @param string $label
499 * @param array<string, bool> $options
500 * @param bool $canBeEmpty
501 */
502 private static function validateLabel($label, Info $info, array $options, $canBeEmpty)
503 {
504 if ('' === $label) {
505 if (!$canBeEmpty && (!isset($options['VerifyDnsLength']) || $options['VerifyDnsLength'])) {
506 $info->errors |= self::ERROR_EMPTY_LABEL;
507 }
508
509 return;
510 }
511
512 // Step 1. The label must be in Unicode Normalization Form C.
513 if (!\Normalizer::isNormalized($label, \Normalizer::FORM_C)) {
514 $info->errors |= self::ERROR_INVALID_ACE_LABEL;
515 }
516
517 $codePoints = self::utf8Decode($label);
518
519 if ($options['CheckHyphens']) {
520 // Step 2. If CheckHyphens, the label must not contain a U+002D HYPHEN-MINUS character
521 // in both the thrid and fourth positions.
522 if (isset($codePoints[2], $codePoints[3]) && 0x002D === $codePoints[2] && 0x002D === $codePoints[3]) {
523 $info->errors |= self::ERROR_HYPHEN_3_4;
524 }
525
526 // Step 3. If CheckHyphens, the label must neither begin nor end with a U+002D
527 // HYPHEN-MINUS character.
528 if ('-' === substr($label, 0, 1)) {
529 $info->errors |= self::ERROR_LEADING_HYPHEN;
530 }
531
532 if ('-' === substr($label, -1, 1)) {
533 $info->errors |= self::ERROR_TRAILING_HYPHEN;
534 }
535 } elseif ('xn--' === substr($label, 0, 4)) {
536 $info->errors |= self::ERROR_PUNYCODE;
537 }
538
539 // Step 4. The label must not contain a U+002E (.) FULL STOP.
540 if (false !== strpos($label, '.')) {
541 $info->errors |= self::ERROR_LABEL_HAS_DOT;
542 }
543
544 // Step 5. The label must not begin with a combining mark, that is: General_Category=Mark.
545 if (1 === preg_match(Regex::COMBINING_MARK, $label)) {
546 $info->errors |= self::ERROR_LEADING_COMBINING_MARK;
547 }
548
549 // Step 6. Each code point in the label must only have certain status values according to
550 // Section 5, IDNA Mapping Table:
551 $transitional = $options['Transitional_Processing'];
552 $useSTD3ASCIIRules = $options['UseSTD3ASCIIRules'];
553
554 foreach ($codePoints as $codePoint) {
555 $data = self::lookupCodePointStatus($codePoint, $useSTD3ASCIIRules);
556 $status = $data['status'];
557
558 if ('valid' === $status || (!$transitional && 'deviation' === $status)) {
559 continue;
560 }
561
562 $info->errors |= self::ERROR_DISALLOWED;
563
564 break;
565 }
566
567 // Step 7. If CheckJoiners, the label must satisify the ContextJ rules from Appendix A, in
568 // The Unicode Code Points and Internationalized Domain Names for Applications (IDNA)
569 // [IDNA2008].
570 if ($options['CheckJoiners'] && !self::isValidContextJ($codePoints, $label)) {
571 $info->errors |= self::ERROR_CONTEXTJ;
572 }
573
574 // Step 8. If CheckBidi, and if the domain name is a Bidi domain name, then the label must
575 // satisfy all six of the numbered conditions in [IDNA2008] RFC 5893, Section 2.
576 if ($options['CheckBidi'] && (!$info->bidiDomain || $info->validBidiDomain)) {
577 self::validateBidiLabel($label, $info);
578 }
579 }
580
581 /**
582 * @see https://tools.ietf.org/html/rfc3492#section-6.2
583 *
584 * @param string $input
585 *
586 * @return string
587 */
588 private static function punycodeDecode($input)
589 {
590 $n = self::INITIAL_N;
591 $out = 0;
592 $i = 0;
593 $bias = self::INITIAL_BIAS;
594 $lastDelimIndex = strrpos($input, self::DELIMITER);
595 $b = false === $lastDelimIndex ? 0 : $lastDelimIndex;
596 $inputLength = \strlen($input);
597 $output = [];
598 $bytes = array_map('ord', str_split($input));
599
600 for ($j = 0; $j < $b; ++$j) {
601 if ($bytes[$j] > 0x7F) {
602 throw new \Exception('Invalid input');
603 }
604
605 $output[$out++] = $input[$j];
606 }
607
608 if ($b > 0) {
609 ++$b;
610 }
611
612 for ($in = $b; $in < $inputLength; ++$out) {
613 $oldi = $i;
614 $w = 1;
615
616 for ($k = self::BASE; /* no condition */; $k += self::BASE) {
617 if ($in >= $inputLength) {
618 throw new \Exception('Invalid input');
619 }
620
621 $digit = self::$basicToDigit[$bytes[$in++] & 0xFF];
622
623 if ($digit < 0) {
624 throw new \Exception('Invalid input');
625 }
626
627 if ($digit > intdiv(self::MAX_INT - $i, $w)) {
628 throw new \Exception('Integer overflow');
629 }
630
631 $i += $digit * $w;
632
633 if ($k <= $bias) {
634 $t = self::TMIN;
635 } elseif ($k >= $bias + self::TMAX) {
636 $t = self::TMAX;
637 } else {
638 $t = $k - $bias;
639 }
640
641 if ($digit < $t) {
642 break;
643 }
644
645 $baseMinusT = self::BASE - $t;
646
647 if ($w > intdiv(self::MAX_INT, $baseMinusT)) {
648 throw new \Exception('Integer overflow');
649 }
650
651 $w *= $baseMinusT;
652 }
653
654 $outPlusOne = $out + 1;
655 $bias = self::adaptBias($i - $oldi, $outPlusOne, 0 === $oldi);
656
657 if (intdiv($i, $outPlusOne) > self::MAX_INT - $n) {
658 throw new \Exception('Integer overflow');
659 }
660
661 $n += intdiv($i, $outPlusOne);
662 $i %= $outPlusOne;
663 array_splice($output, $i++, 0, [mb_chr($n, 'utf-8')]);
664 }
665
666 return implode('', $output);
667 }
668
669 /**
670 * @see https://tools.ietf.org/html/rfc3492#section-6.3
671 *
672 * @param string $input
673 *
674 * @return string
675 */
676 private static function punycodeEncode($input)
677 {
678 $n = self::INITIAL_N;
679 $delta = 0;
680 $out = 0;
681 $bias = self::INITIAL_BIAS;
682 $inputLength = 0;
683 $output = '';
684 $iter = self::utf8Decode($input);
685
686 foreach ($iter as $codePoint) {
687 ++$inputLength;
688
689 if ($codePoint < 0x80) {
690 $output .= \chr($codePoint);
691 ++$out;
692 }
693 }
694
695 $h = $out;
696 $b = $out;
697
698 if ($b > 0) {
699 $output .= self::DELIMITER;
700 ++$out;
701 }
702
703 while ($h < $inputLength) {
704 $m = self::MAX_INT;
705
706 foreach ($iter as $codePoint) {
707 if ($codePoint >= $n && $codePoint < $m) {
708 $m = $codePoint;
709 }
710 }
711
712 if ($m - $n > intdiv(self::MAX_INT - $delta, $h + 1)) {
713 throw new \Exception('Integer overflow');
714 }
715
716 $delta += ($m - $n) * ($h + 1);
717 $n = $m;
718
719 foreach ($iter as $codePoint) {
720 if ($codePoint < $n && 0 === ++$delta) {
721 throw new \Exception('Integer overflow');
722 }
723
724 if ($codePoint === $n) {
725 $q = $delta;
726
727 for ($k = self::BASE; /* no condition */; $k += self::BASE) {
728 if ($k <= $bias) {
729 $t = self::TMIN;
730 } elseif ($k >= $bias + self::TMAX) {
731 $t = self::TMAX;
732 } else {
733 $t = $k - $bias;
734 }
735
736 if ($q < $t) {
737 break;
738 }
739
740 $qMinusT = $q - $t;
741 $baseMinusT = self::BASE - $t;
742 $output .= self::encodeDigit($t + $qMinusT % $baseMinusT, false);
743 ++$out;
744 $q = intdiv($qMinusT, $baseMinusT);
745 }
746
747 $output .= self::encodeDigit($q, false);
748 ++$out;
749 $bias = self::adaptBias($delta, $h + 1, $h === $b);
750 $delta = 0;
751 ++$h;
752 }
753 }
754
755 ++$delta;
756 ++$n;
757 }
758
759 return $output;
760 }
761
762 /**
763 * @see https://tools.ietf.org/html/rfc3492#section-6.1
764 *
765 * @param int $delta
766 * @param int $numPoints
767 * @param bool $firstTime
768 *
769 * @return int
770 */
771 private static function adaptBias($delta, $numPoints, $firstTime)
772 {
773 // xxx >> 1 is a faster way of doing intdiv(xxx, 2)
774 $delta = $firstTime ? intdiv($delta, self::DAMP) : $delta >> 1;
775 $delta += intdiv($delta, $numPoints);
776 $k = 0;
777
778 while ($delta > ((self::BASE - self::TMIN) * self::TMAX) >> 1) {
779 $delta = intdiv($delta, self::BASE - self::TMIN);
780 $k += self::BASE;
781 }
782
783 return $k + intdiv((self::BASE - self::TMIN + 1) * $delta, $delta + self::SKEW);
784 }
785
786 /**
787 * @param int $d
788 * @param bool $flag
789 *
790 * @return string
791 */
792 private static function encodeDigit($d, $flag)
793 {
794 return \chr($d + 22 + 75 * ($d < 26 ? 1 : 0) - (($flag ? 1 : 0) << 5));
795 }
796
797 /**
798 * Takes a UTF-8 encoded string and converts it into a series of integer code points. Any
799 * invalid byte sequences will be replaced by a U+FFFD replacement code point.
800 *
801 * @see https://encoding.spec.whatwg.org/#utf-8-decoder
802 *
803 * @param string $input
804 *
805 * @return array<int, int>
806 */
807 private static function utf8Decode($input)
808 {
809 $bytesSeen = 0;
810 $bytesNeeded = 0;
811 $lowerBoundary = 0x80;
812 $upperBoundary = 0xBF;
813 $codePoint = 0;
814 $codePoints = [];
815 $length = \strlen($input);
816
817 for ($i = 0; $i < $length; ++$i) {
818 $byte = \ord($input[$i]);
819
820 if (0 === $bytesNeeded) {
821 if ($byte >= 0x00 && $byte <= 0x7F) {
822 $codePoints[] = $byte;
823
824 continue;
825 }
826
827 if ($byte >= 0xC2 && $byte <= 0xDF) {
828 $bytesNeeded = 1;
829 $codePoint = $byte & 0x1F;
830 } elseif ($byte >= 0xE0 && $byte <= 0xEF) {
831 if (0xE0 === $byte) {
832 $lowerBoundary = 0xA0;
833 } elseif (0xED === $byte) {
834 $upperBoundary = 0x9F;
835 }
836
837 $bytesNeeded = 2;
838 $codePoint = $byte & 0xF;
839 } elseif ($byte >= 0xF0 && $byte <= 0xF4) {
840 if (0xF0 === $byte) {
841 $lowerBoundary = 0x90;
842 } elseif (0xF4 === $byte) {
843 $upperBoundary = 0x8F;
844 }
845
846 $bytesNeeded = 3;
847 $codePoint = $byte & 0x7;
848 } else {
849 $codePoints[] = 0xFFFD;
850 }
851
852 continue;
853 }
854
855 if ($byte < $lowerBoundary || $byte > $upperBoundary) {
856 $codePoint = 0;
857 $bytesNeeded = 0;
858 $bytesSeen = 0;
859 $lowerBoundary = 0x80;
860 $upperBoundary = 0xBF;
861 --$i;
862 $codePoints[] = 0xFFFD;
863
864 continue;
865 }
866
867 $lowerBoundary = 0x80;
868 $upperBoundary = 0xBF;
869 $codePoint = ($codePoint << 6) | ($byte & 0x3F);
870
871 if (++$bytesSeen !== $bytesNeeded) {
872 continue;
873 }
874
875 $codePoints[] = $codePoint;
876 $codePoint = 0;
877 $bytesNeeded = 0;
878 $bytesSeen = 0;
879 }
880
881 // String unexpectedly ended, so append a U+FFFD code point.
882 if (0 !== $bytesNeeded) {
883 $codePoints[] = 0xFFFD;
884 }
885
886 return $codePoints;
887 }
888
889 /**
890 * @param int $codePoint
891 * @param bool $useSTD3ASCIIRules
892 *
893 * @return array{status: string, mapping?: string}
894 */
895 private static function lookupCodePointStatus($codePoint, $useSTD3ASCIIRules)
896 {
897 if (!self::$mappingTableLoaded) {
898 self::$mappingTableLoaded = true;
899 self::$mapped = require __DIR__.'/Resources/unidata/mapped.php';
900 self::$ignored = require __DIR__.'/Resources/unidata/ignored.php';
901 self::$deviation = require __DIR__.'/Resources/unidata/deviation.php';
902 self::$disallowed = require __DIR__.'/Resources/unidata/disallowed.php';
903 self::$disallowed_STD3_mapped = require __DIR__.'/Resources/unidata/disallowed_STD3_mapped.php';
904 self::$disallowed_STD3_valid = require __DIR__.'/Resources/unidata/disallowed_STD3_valid.php';
905 }
906
907 if (isset(self::$mapped[$codePoint])) {
908 return ['status' => 'mapped', 'mapping' => self::$mapped[$codePoint]];
909 }
910
911 if (isset(self::$ignored[$codePoint])) {
912 return ['status' => 'ignored'];
913 }
914
915 if (isset(self::$deviation[$codePoint])) {
916 return ['status' => 'deviation', 'mapping' => self::$deviation[$codePoint]];
917 }
918
919 if (isset(self::$disallowed[$codePoint]) || DisallowedRanges::inRange($codePoint)) {
920 return ['status' => 'disallowed'];
921 }
922
923 $isDisallowedMapped = isset(self::$disallowed_STD3_mapped[$codePoint]);
924
925 if ($isDisallowedMapped || isset(self::$disallowed_STD3_valid[$codePoint])) {
926 $status = 'disallowed';
927
928 if (!$useSTD3ASCIIRules) {
929 $status = $isDisallowedMapped ? 'mapped' : 'valid';
930 }
931
932 if ($isDisallowedMapped) {
933 return ['status' => $status, 'mapping' => self::$disallowed_STD3_mapped[$codePoint]];
934 }
935
936 return ['status' => $status];
937 }
938
939 return ['status' => 'valid'];
940 }
941 }
942