Verzeichnisstruktur phpBB-3.3.15
- Veröffentlicht
- 28.08.2024
So funktioniert es
|
Auf das letzte Element klicken. Dies geht jeweils ein Schritt zurück |
Auf das Icon klicken, dies öffnet das Verzeichnis. Nochmal klicken schließt das Verzeichnis. |
|
(Beispiel Datei-Icons)
|
Auf das Icon klicken um den Quellcode anzuzeigen |
Idn.php
001 <?php
002
003 /*
004 * This file is part of the Symfony package.
005 *
006 * (c) Fabien Potencier <fabien@symfony.com> and Trevor Rowbotham <trevor.rowbotham@pm.me>
007 *
008 * For the full copyright and license information, please view the LICENSE
009 * file that was distributed with this source code.
010 */
011
012 namespace Symfony\Polyfill\Intl\Idn;
013
014 use Symfony\Polyfill\Intl\Idn\Resources\unidata\DisallowedRanges;
015 use Symfony\Polyfill\Intl\Idn\Resources\unidata\Regex;
016
017 /**
018 * @see https://www.unicode.org/reports/tr46/
019 *
020 * @internal
021 */
022 final class Idn
023 {
024 public const ERROR_EMPTY_LABEL = 1;
025 public const ERROR_LABEL_TOO_LONG = 2;
026 public const ERROR_DOMAIN_NAME_TOO_LONG = 4;
027 public const ERROR_LEADING_HYPHEN = 8;
028 public const ERROR_TRAILING_HYPHEN = 0x10;
029 public const ERROR_HYPHEN_3_4 = 0x20;
030 public const ERROR_LEADING_COMBINING_MARK = 0x40;
031 public const ERROR_DISALLOWED = 0x80;
032 public const ERROR_PUNYCODE = 0x100;
033 public const ERROR_LABEL_HAS_DOT = 0x200;
034 public const ERROR_INVALID_ACE_LABEL = 0x400;
035 public const ERROR_BIDI = 0x800;
036 public const ERROR_CONTEXTJ = 0x1000;
037 public const ERROR_CONTEXTO_PUNCTUATION = 0x2000;
038 public const ERROR_CONTEXTO_DIGITS = 0x4000;
039
040 public const INTL_IDNA_VARIANT_2003 = 0;
041 public const INTL_IDNA_VARIANT_UTS46 = 1;
042
043 public const IDNA_DEFAULT = 0;
044 public const IDNA_ALLOW_UNASSIGNED = 1;
045 public const IDNA_USE_STD3_RULES = 2;
046 public const IDNA_CHECK_BIDI = 4;
047 public const IDNA_CHECK_CONTEXTJ = 8;
048 public const IDNA_NONTRANSITIONAL_TO_ASCII = 16;
049 public const IDNA_NONTRANSITIONAL_TO_UNICODE = 32;
050
051 public const MAX_DOMAIN_SIZE = 253;
052 public const MAX_LABEL_SIZE = 63;
053
054 public const BASE = 36;
055 public const TMIN = 1;
056 public const TMAX = 26;
057 public const SKEW = 38;
058 public const DAMP = 700;
059 public const INITIAL_BIAS = 72;
060 public const INITIAL_N = 128;
061 public const DELIMITER = '-';
062 public const MAX_INT = 2147483647;
063
064 /**
065 * Contains the numeric value of a basic code point (for use in representing integers) in the
066 * range 0 to BASE-1, or -1 if b is does not represent a value.
067 *
068 * @var array<int, int>
069 */
070 private static $basicToDigit = [
071 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
072 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
073
074 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
075 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,
076
077 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
078 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
079
080 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
081 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
082
083 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
084 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
085
086 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
087 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
088
089 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
090 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
091
092 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
093 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
094 ];
095
096 /**
097 * @var array<int, int>
098 */
099 private static $virama;
100
101 /**
102 * @var array<int, string>
103 */
104 private static $mapped;
105
106 /**
107 * @var array<int, bool>
108 */
109 private static $ignored;
110
111 /**
112 * @var array<int, string>
113 */
114 private static $deviation;
115
116 /**
117 * @var array<int, bool>
118 */
119 private static $disallowed;
120
121 /**
122 * @var array<int, string>
123 */
124 private static $disallowed_STD3_mapped;
125
126 /**
127 * @var array<int, bool>
128 */
129 private static $disallowed_STD3_valid;
130
131 /**
132 * @var bool
133 */
134 private static $mappingTableLoaded = false;
135
136 /**
137 * @see https://www.unicode.org/reports/tr46/#ToASCII
138 *
139 * @param string $domainName
140 * @param int $options
141 * @param int $variant
142 * @param array $idna_info
143 *
144 * @return string|false
145 */
146 public static function idn_to_ascii($domainName, $options = self::IDNA_DEFAULT, $variant = self::INTL_IDNA_VARIANT_UTS46, &$idna_info = [])
147 {
148 if (\PHP_VERSION_ID >= 70200 && self::INTL_IDNA_VARIANT_2003 === $variant) {
149 @trigger_error('idn_to_ascii(): INTL_IDNA_VARIANT_2003 is deprecated', \E_USER_DEPRECATED);
150 }
151
152 $options = [
153 'CheckHyphens' => true,
154 'CheckBidi' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 !== ($options & self::IDNA_CHECK_BIDI),
155 'CheckJoiners' => self::INTL_IDNA_VARIANT_UTS46 === $variant && 0 !== ($options & self::IDNA_CHECK_CONTEXTJ),
156 'UseSTD3ASCIIRules' => 0 !== ($options & self::IDNA_USE_STD3_RULES),
157 'Transitional_Processing' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 === ($options & self::IDNA_NONTRANSITIONAL_TO_ASCII),
158 'VerifyDnsLength' => true,
159 ];
160 $info = new Info();
161 $labels = self::process((string) $domainName, $options, $info);
162
163 foreach ($labels as $i => $label) {
164 // Only convert labels to punycode that contain non-ASCII code points
165 if (1 === preg_match('/[^\x00-\x7F]/', $label)) {
166 try {
167 $label = 'xn--'.self::punycodeEncode($label);
168 } catch (\Exception $e) {
169 $info->errors |= self::ERROR_PUNYCODE;
170 }
171
172 $labels[$i] = $label;
173 }
174 }
175
176 if ($options['VerifyDnsLength']) {
177 self::validateDomainAndLabelLength($labels, $info);
178 }
179
180 $idna_info = [
181 'result' => implode('.', $labels),
182 'isTransitionalDifferent' => $info->transitionalDifferent,
183 'errors' => $info->errors,
184 ];
185
186 return 0 === $info->errors ? $idna_info['result'] : false;
187 }
188
189 /**
190 * @see https://www.unicode.org/reports/tr46/#ToUnicode
191 *
192 * @param string $domainName
193 * @param int $options
194 * @param int $variant
195 * @param array $idna_info
196 *
197 * @return string|false
198 */
199 public static function idn_to_utf8($domainName, $options = self::IDNA_DEFAULT, $variant = self::INTL_IDNA_VARIANT_UTS46, &$idna_info = [])
200 {
201 if (\PHP_VERSION_ID >= 70200 && self::INTL_IDNA_VARIANT_2003 === $variant) {
202 @trigger_error('idn_to_utf8(): INTL_IDNA_VARIANT_2003 is deprecated', \E_USER_DEPRECATED);
203 }
204
205 $info = new Info();
206 $labels = self::process((string) $domainName, [
207 'CheckHyphens' => true,
208 'CheckBidi' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 !== ($options & self::IDNA_CHECK_BIDI),
209 'CheckJoiners' => self::INTL_IDNA_VARIANT_UTS46 === $variant && 0 !== ($options & self::IDNA_CHECK_CONTEXTJ),
210 'UseSTD3ASCIIRules' => 0 !== ($options & self::IDNA_USE_STD3_RULES),
211 'Transitional_Processing' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 === ($options & self::IDNA_NONTRANSITIONAL_TO_UNICODE),
212 ], $info);
213 $idna_info = [
214 'result' => implode('.', $labels),
215 'isTransitionalDifferent' => $info->transitionalDifferent,
216 'errors' => $info->errors,
217 ];
218
219 return 0 === $info->errors ? $idna_info['result'] : false;
220 }
221
222 /**
223 * @param string $label
224 *
225 * @return bool
226 */
227 private static function isValidContextJ(array $codePoints, $label)
228 {
229 if (!isset(self::$virama)) {
230 self::$virama = require __DIR__.\DIRECTORY_SEPARATOR.'Resources'.\DIRECTORY_SEPARATOR.'unidata'.\DIRECTORY_SEPARATOR.'virama.php';
231 }
232
233 $offset = 0;
234
235 foreach ($codePoints as $i => $codePoint) {
236 if (0x200C !== $codePoint && 0x200D !== $codePoint) {
237 continue;
238 }
239
240 if (!isset($codePoints[$i - 1])) {
241 return false;
242 }
243
244 // If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True;
245 if (isset(self::$virama[$codePoints[$i - 1]])) {
246 continue;
247 }
248
249 // If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C(Joining_Type:T)*(Joining_Type:{R,D})) Then
250 // True;
251 // Generated RegExp = ([Joining_Type:{L,D}][Joining_Type:T]*\u200C[Joining_Type:T]*)[Joining_Type:{R,D}]
252 if (0x200C === $codePoint && 1 === preg_match(Regex::ZWNJ, $label, $matches, \PREG_OFFSET_CAPTURE, $offset)) {
253 $offset += \strlen($matches[1][0]);
254
255 continue;
256 }
257
258 return false;
259 }
260
261 return true;
262 }
263
264 /**
265 * @see https://www.unicode.org/reports/tr46/#ProcessingStepMap
266 *
267 * @param string $input
268 * @param array<string, bool> $options
269 *
270 * @return string
271 */
272 private static function mapCodePoints($input, array $options, Info $info)
273 {
274 $str = '';
275 $useSTD3ASCIIRules = $options['UseSTD3ASCIIRules'];
276 $transitional = $options['Transitional_Processing'];
277
278 foreach (self::utf8Decode($input) as $codePoint) {
279 $data = self::lookupCodePointStatus($codePoint, $useSTD3ASCIIRules);
280
281 switch ($data['status']) {
282 case 'disallowed':
283 $info->errors |= self::ERROR_DISALLOWED;
284
285 // no break.
286
287 case 'valid':
288 $str .= mb_chr($codePoint, 'utf-8');
289
290 break;
291
292 case 'ignored':
293 // Do nothing.
294 break;
295
296 case 'mapped':
297 $str .= $data['mapping'];
298
299 break;
300
301 case 'deviation':
302 $info->transitionalDifferent = true;
303 $str .= ($transitional ? $data['mapping'] : mb_chr($codePoint, 'utf-8'));
304
305 break;
306 }
307 }
308
309 return $str;
310 }
311
312 /**
313 * @see https://www.unicode.org/reports/tr46/#Processing
314 *
315 * @param string $domain
316 * @param array<string, bool> $options
317 *
318 * @return array<int, string>
319 */
320 private static function process($domain, array $options, Info $info)
321 {
322 // If VerifyDnsLength is not set, we are doing ToUnicode otherwise we are doing ToASCII and
323 // we need to respect the VerifyDnsLength option.
324 $checkForEmptyLabels = !isset($options['VerifyDnsLength']) || $options['VerifyDnsLength'];
325
326 if ($checkForEmptyLabels && '' === $domain) {
327 $info->errors |= self::ERROR_EMPTY_LABEL;
328
329 return [$domain];
330 }
331
332 // Step 1. Map each code point in the domain name string
333 $domain = self::mapCodePoints($domain, $options, $info);
334
335 // Step 2. Normalize the domain name string to Unicode Normalization Form C.
336 if (!\Normalizer::isNormalized($domain, \Normalizer::FORM_C)) {
337 $domain = \Normalizer::normalize($domain, \Normalizer::FORM_C);
338 }
339
340 // Step 3. Break the string into labels at U+002E (.) FULL STOP.
341 $labels = explode('.', $domain);
342 $lastLabelIndex = \count($labels) - 1;
343
344 // Step 4. Convert and validate each label in the domain name string.
345 foreach ($labels as $i => $label) {
346 $validationOptions = $options;
347
348 if ('xn--' === substr($label, 0, 4)) {
349 try {
350 $label = self::punycodeDecode(substr($label, 4));
351 } catch (\Exception $e) {
352 $info->errors |= self::ERROR_PUNYCODE;
353
354 continue;
355 }
356
357 $validationOptions['Transitional_Processing'] = false;
358 $labels[$i] = $label;
359 }
360
361 self::validateLabel($label, $info, $validationOptions, $i > 0 && $i === $lastLabelIndex);
362 }
363
364 if ($info->bidiDomain && !$info->validBidiDomain) {
365 $info->errors |= self::ERROR_BIDI;
366 }
367
368 // Any input domain name string that does not record an error has been successfully
369 // processed according to this specification. Conversely, if an input domain_name string
370 // causes an error, then the processing of the input domain_name string fails. Determining
371 // what to do with error input is up to the caller, and not in the scope of this document.
372 return $labels;
373 }
374
375 /**
376 * @see https://tools.ietf.org/html/rfc5893#section-2
377 *
378 * @param string $label
379 */
380 private static function validateBidiLabel($label, Info $info)
381 {
382 if (1 === preg_match(Regex::RTL_LABEL, $label)) {
383 $info->bidiDomain = true;
384
385 // Step 1. The first character must be a character with Bidi property L, R, or AL.
386 // If it has the R or AL property, it is an RTL label
387 if (1 !== preg_match(Regex::BIDI_STEP_1_RTL, $label)) {
388 $info->validBidiDomain = false;
389
390 return;
391 }
392
393 // Step 2. In an RTL label, only characters with the Bidi properties R, AL, AN, EN, ES,
394 // CS, ET, ON, BN, or NSM are allowed.
395 if (1 === preg_match(Regex::BIDI_STEP_2, $label)) {
396 $info->validBidiDomain = false;
397
398 return;
399 }
400
401 // Step 3. In an RTL label, the end of the label must be a character with Bidi property
402 // R, AL, EN, or AN, followed by zero or more characters with Bidi property NSM.
403 if (1 !== preg_match(Regex::BIDI_STEP_3, $label)) {
404 $info->validBidiDomain = false;
405
406 return;
407 }
408
409 // Step 4. In an RTL label, if an EN is present, no AN may be present, and vice versa.
410 if (1 === preg_match(Regex::BIDI_STEP_4_AN, $label) && 1 === preg_match(Regex::BIDI_STEP_4_EN, $label)) {
411 $info->validBidiDomain = false;
412
413 return;
414 }
415
416 return;
417 }
418
419 // We are a LTR label
420 // Step 1. The first character must be a character with Bidi property L, R, or AL.
421 // If it has the L property, it is an LTR label.
422 if (1 !== preg_match(Regex::BIDI_STEP_1_LTR, $label)) {
423 $info->validBidiDomain = false;
424
425 return;
426 }
427
428 // Step 5. In an LTR label, only characters with the Bidi properties L, EN,
429 // ES, CS, ET, ON, BN, or NSM are allowed.
430 if (1 === preg_match(Regex::BIDI_STEP_5, $label)) {
431 $info->validBidiDomain = false;
432
433 return;
434 }
435
436 // Step 6.In an LTR label, the end of the label must be a character with Bidi property L or
437 // EN, followed by zero or more characters with Bidi property NSM.
438 if (1 !== preg_match(Regex::BIDI_STEP_6, $label)) {
439 $info->validBidiDomain = false;
440
441 return;
442 }
443 }
444
445 /**
446 * @param array<int, string> $labels
447 */
448 private static function validateDomainAndLabelLength(array $labels, Info $info)
449 {
450 $maxDomainSize = self::MAX_DOMAIN_SIZE;
451 $length = \count($labels);
452
453 // Number of "." delimiters.
454 $domainLength = $length - 1;
455
456 // If the last label is empty and it is not the first label, then it is the root label.
457 // Increase the max size by 1, making it 254, to account for the root label's "."
458 // delimiter. This also means we don't need to check the last label's length for being too
459 // long.
460 if ($length > 1 && '' === $labels[$length - 1]) {
461 ++$maxDomainSize;
462 --$length;
463 }
464
465 for ($i = 0; $i < $length; ++$i) {
466 $bytes = \strlen($labels[$i]);
467 $domainLength += $bytes;
468
469 if ($bytes > self::MAX_LABEL_SIZE) {
470 $info->errors |= self::ERROR_LABEL_TOO_LONG;
471 }
472 }
473
474 if ($domainLength > $maxDomainSize) {
475 $info->errors |= self::ERROR_DOMAIN_NAME_TOO_LONG;
476 }
477 }
478
479 /**
480 * @see https://www.unicode.org/reports/tr46/#Validity_Criteria
481 *
482 * @param string $label
483 * @param array<string, bool> $options
484 * @param bool $canBeEmpty
485 */
486 private static function validateLabel($label, Info $info, array $options, $canBeEmpty)
487 {
488 if ('' === $label) {
489 if (!$canBeEmpty && (!isset($options['VerifyDnsLength']) || $options['VerifyDnsLength'])) {
490 $info->errors |= self::ERROR_EMPTY_LABEL;
491 }
492
493 return;
494 }
495
496 // Step 1. The label must be in Unicode Normalization Form C.
497 if (!\Normalizer::isNormalized($label, \Normalizer::FORM_C)) {
498 $info->errors |= self::ERROR_INVALID_ACE_LABEL;
499 }
500
501 $codePoints = self::utf8Decode($label);
502
503 if ($options['CheckHyphens']) {
504 // Step 2. If CheckHyphens, the label must not contain a U+002D HYPHEN-MINUS character
505 // in both the thrid and fourth positions.
506 if (isset($codePoints[2], $codePoints[3]) && 0x002D === $codePoints[2] && 0x002D === $codePoints[3]) {
507 $info->errors |= self::ERROR_HYPHEN_3_4;
508 }
509
510 // Step 3. If CheckHyphens, the label must neither begin nor end with a U+002D
511 // HYPHEN-MINUS character.
512 if ('-' === substr($label, 0, 1)) {
513 $info->errors |= self::ERROR_LEADING_HYPHEN;
514 }
515
516 if ('-' === substr($label, -1, 1)) {
517 $info->errors |= self::ERROR_TRAILING_HYPHEN;
518 }
519 }
520
521 // Step 4. The label must not contain a U+002E (.) FULL STOP.
522 if (false !== strpos($label, '.')) {
523 $info->errors |= self::ERROR_LABEL_HAS_DOT;
524 }
525
526 // Step 5. The label must not begin with a combining mark, that is: General_Category=Mark.
527 if (1 === preg_match(Regex::COMBINING_MARK, $label)) {
528 $info->errors |= self::ERROR_LEADING_COMBINING_MARK;
529 }
530
531 // Step 6. Each code point in the label must only have certain status values according to
532 // Section 5, IDNA Mapping Table:
533 $transitional = $options['Transitional_Processing'];
534 $useSTD3ASCIIRules = $options['UseSTD3ASCIIRules'];
535
536 foreach ($codePoints as $codePoint) {
537 $data = self::lookupCodePointStatus($codePoint, $useSTD3ASCIIRules);
538 $status = $data['status'];
539
540 if ('valid' === $status || (!$transitional && 'deviation' === $status)) {
541 continue;
542 }
543
544 $info->errors |= self::ERROR_DISALLOWED;
545
546 break;
547 }
548
549 // Step 7. If CheckJoiners, the label must satisify the ContextJ rules from Appendix A, in
550 // The Unicode Code Points and Internationalized Domain Names for Applications (IDNA)
551 // [IDNA2008].
552 if ($options['CheckJoiners'] && !self::isValidContextJ($codePoints, $label)) {
553 $info->errors |= self::ERROR_CONTEXTJ;
554 }
555
556 // Step 8. If CheckBidi, and if the domain name is a Bidi domain name, then the label must
557 // satisfy all six of the numbered conditions in [IDNA2008] RFC 5893, Section 2.
558 if ($options['CheckBidi'] && (!$info->bidiDomain || $info->validBidiDomain)) {
559 self::validateBidiLabel($label, $info);
560 }
561 }
562
563 /**
564 * @see https://tools.ietf.org/html/rfc3492#section-6.2
565 *
566 * @param string $input
567 *
568 * @return string
569 */
570 private static function punycodeDecode($input)
571 {
572 $n = self::INITIAL_N;
573 $out = 0;
574 $i = 0;
575 $bias = self::INITIAL_BIAS;
576 $lastDelimIndex = strrpos($input, self::DELIMITER);
577 $b = false === $lastDelimIndex ? 0 : $lastDelimIndex;
578 $inputLength = \strlen($input);
579 $output = [];
580 $bytes = array_map('ord', str_split($input));
581
582 for ($j = 0; $j < $b; ++$j) {
583 if ($bytes[$j] > 0x7F) {
584 throw new \Exception('Invalid input');
585 }
586
587 $output[$out++] = $input[$j];
588 }
589
590 if ($b > 0) {
591 ++$b;
592 }
593
594 for ($in = $b; $in < $inputLength; ++$out) {
595 $oldi = $i;
596 $w = 1;
597
598 for ($k = self::BASE; /* no condition */; $k += self::BASE) {
599 if ($in >= $inputLength) {
600 throw new \Exception('Invalid input');
601 }
602
603 $digit = self::$basicToDigit[$bytes[$in++] & 0xFF];
604
605 if ($digit < 0) {
606 throw new \Exception('Invalid input');
607 }
608
609 if ($digit > intdiv(self::MAX_INT - $i, $w)) {
610 throw new \Exception('Integer overflow');
611 }
612
613 $i += $digit * $w;
614
615 if ($k <= $bias) {
616 $t = self::TMIN;
617 } elseif ($k >= $bias + self::TMAX) {
618 $t = self::TMAX;
619 } else {
620 $t = $k - $bias;
621 }
622
623 if ($digit < $t) {
624 break;
625 }
626
627 $baseMinusT = self::BASE - $t;
628
629 if ($w > intdiv(self::MAX_INT, $baseMinusT)) {
630 throw new \Exception('Integer overflow');
631 }
632
633 $w *= $baseMinusT;
634 }
635
636 $outPlusOne = $out + 1;
637 $bias = self::adaptBias($i - $oldi, $outPlusOne, 0 === $oldi);
638
639 if (intdiv($i, $outPlusOne) > self::MAX_INT - $n) {
640 throw new \Exception('Integer overflow');
641 }
642
643 $n += intdiv($i, $outPlusOne);
644 $i %= $outPlusOne;
645 array_splice($output, $i++, 0, [mb_chr($n, 'utf-8')]);
646 }
647
648 return implode('', $output);
649 }
650
651 /**
652 * @see https://tools.ietf.org/html/rfc3492#section-6.3
653 *
654 * @param string $input
655 *
656 * @return string
657 */
658 private static function punycodeEncode($input)
659 {
660 $n = self::INITIAL_N;
661 $delta = 0;
662 $out = 0;
663 $bias = self::INITIAL_BIAS;
664 $inputLength = 0;
665 $output = '';
666 $iter = self::utf8Decode($input);
667
668 foreach ($iter as $codePoint) {
669 ++$inputLength;
670
671 if ($codePoint < 0x80) {
672 $output .= \chr($codePoint);
673 ++$out;
674 }
675 }
676
677 $h = $out;
678 $b = $out;
679
680 if ($b > 0) {
681 $output .= self::DELIMITER;
682 ++$out;
683 }
684
685 while ($h < $inputLength) {
686 $m = self::MAX_INT;
687
688 foreach ($iter as $codePoint) {
689 if ($codePoint >= $n && $codePoint < $m) {
690 $m = $codePoint;
691 }
692 }
693
694 if ($m - $n > intdiv(self::MAX_INT - $delta, $h + 1)) {
695 throw new \Exception('Integer overflow');
696 }
697
698 $delta += ($m - $n) * ($h + 1);
699 $n = $m;
700
701 foreach ($iter as $codePoint) {
702 if ($codePoint < $n && 0 === ++$delta) {
703 throw new \Exception('Integer overflow');
704 }
705
706 if ($codePoint === $n) {
707 $q = $delta;
708
709 for ($k = self::BASE; /* no condition */; $k += self::BASE) {
710 if ($k <= $bias) {
711 $t = self::TMIN;
712 } elseif ($k >= $bias + self::TMAX) {
713 $t = self::TMAX;
714 } else {
715 $t = $k - $bias;
716 }
717
718 if ($q < $t) {
719 break;
720 }
721
722 $qMinusT = $q - $t;
723 $baseMinusT = self::BASE - $t;
724 $output .= self::encodeDigit($t + $qMinusT % $baseMinusT, false);
725 ++$out;
726 $q = intdiv($qMinusT, $baseMinusT);
727 }
728
729 $output .= self::encodeDigit($q, false);
730 ++$out;
731 $bias = self::adaptBias($delta, $h + 1, $h === $b);
732 $delta = 0;
733 ++$h;
734 }
735 }
736
737 ++$delta;
738 ++$n;
739 }
740
741 return $output;
742 }
743
744 /**
745 * @see https://tools.ietf.org/html/rfc3492#section-6.1
746 *
747 * @param int $delta
748 * @param int $numPoints
749 * @param bool $firstTime
750 *
751 * @return int
752 */
753 private static function adaptBias($delta, $numPoints, $firstTime)
754 {
755 // xxx >> 1 is a faster way of doing intdiv(xxx, 2)
756 $delta = $firstTime ? intdiv($delta, self::DAMP) : $delta >> 1;
757 $delta += intdiv($delta, $numPoints);
758 $k = 0;
759
760 while ($delta > ((self::BASE - self::TMIN) * self::TMAX) >> 1) {
761 $delta = intdiv($delta, self::BASE - self::TMIN);
762 $k += self::BASE;
763 }
764
765 return $k + intdiv((self::BASE - self::TMIN + 1) * $delta, $delta + self::SKEW);
766 }
767
768 /**
769 * @param int $d
770 * @param bool $flag
771 *
772 * @return string
773 */
774 private static function encodeDigit($d, $flag)
775 {
776 return \chr($d + 22 + 75 * ($d < 26 ? 1 : 0) - (($flag ? 1 : 0) << 5));
777 }
778
779 /**
780 * Takes a UTF-8 encoded string and converts it into a series of integer code points. Any
781 * invalid byte sequences will be replaced by a U+FFFD replacement code point.
782 *
783 * @see https://encoding.spec.whatwg.org/#utf-8-decoder
784 *
785 * @param string $input
786 *
787 * @return array<int, int>
788 */
789 private static function utf8Decode($input)
790 {
791 $bytesSeen = 0;
792 $bytesNeeded = 0;
793 $lowerBoundary = 0x80;
794 $upperBoundary = 0xBF;
795 $codePoint = 0;
796 $codePoints = [];
797 $length = \strlen($input);
798
799 for ($i = 0; $i < $length; ++$i) {
800 $byte = \ord($input[$i]);
801
802 if (0 === $bytesNeeded) {
803 if ($byte >= 0x00 && $byte <= 0x7F) {
804 $codePoints[] = $byte;
805
806 continue;
807 }
808
809 if ($byte >= 0xC2 && $byte <= 0xDF) {
810 $bytesNeeded = 1;
811 $codePoint = $byte & 0x1F;
812 } elseif ($byte >= 0xE0 && $byte <= 0xEF) {
813 if (0xE0 === $byte) {
814 $lowerBoundary = 0xA0;
815 } elseif (0xED === $byte) {
816 $upperBoundary = 0x9F;
817 }
818
819 $bytesNeeded = 2;
820 $codePoint = $byte & 0xF;
821 } elseif ($byte >= 0xF0 && $byte <= 0xF4) {
822 if (0xF0 === $byte) {
823 $lowerBoundary = 0x90;
824 } elseif (0xF4 === $byte) {
825 $upperBoundary = 0x8F;
826 }
827
828 $bytesNeeded = 3;
829 $codePoint = $byte & 0x7;
830 } else {
831 $codePoints[] = 0xFFFD;
832 }
833
834 continue;
835 }
836
837 if ($byte < $lowerBoundary || $byte > $upperBoundary) {
838 $codePoint = 0;
839 $bytesNeeded = 0;
840 $bytesSeen = 0;
841 $lowerBoundary = 0x80;
842 $upperBoundary = 0xBF;
843 --$i;
844 $codePoints[] = 0xFFFD;
845
846 continue;
847 }
848
849 $lowerBoundary = 0x80;
850 $upperBoundary = 0xBF;
851 $codePoint = ($codePoint << 6) | ($byte & 0x3F);
852
853 if (++$bytesSeen !== $bytesNeeded) {
854 continue;
855 }
856
857 $codePoints[] = $codePoint;
858 $codePoint = 0;
859 $bytesNeeded = 0;
860 $bytesSeen = 0;
861 }
862
863 // String unexpectedly ended, so append a U+FFFD code point.
864 if (0 !== $bytesNeeded) {
865 $codePoints[] = 0xFFFD;
866 }
867
868 return $codePoints;
869 }
870
871 /**
872 * @param int $codePoint
873 * @param bool $useSTD3ASCIIRules
874 *
875 * @return array{status: string, mapping?: string}
876 */
877 private static function lookupCodePointStatus($codePoint, $useSTD3ASCIIRules)
878 {
879 if (!self::$mappingTableLoaded) {
880 self::$mappingTableLoaded = true;
881 self::$mapped = require __DIR__.'/Resources/unidata/mapped.php';
882 self::$ignored = require __DIR__.'/Resources/unidata/ignored.php';
883 self::$deviation = require __DIR__.'/Resources/unidata/deviation.php';
884 self::$disallowed = require __DIR__.'/Resources/unidata/disallowed.php';
885 self::$disallowed_STD3_mapped = require __DIR__.'/Resources/unidata/disallowed_STD3_mapped.php';
886 self::$disallowed_STD3_valid = require __DIR__.'/Resources/unidata/disallowed_STD3_valid.php';
887 }
888
889 if (isset(self::$mapped[$codePoint])) {
890 return ['status' => 'mapped', 'mapping' => self::$mapped[$codePoint]];
891 }
892
893 if (isset(self::$ignored[$codePoint])) {
894 return ['status' => 'ignored'];
895 }
896
897 if (isset(self::$deviation[$codePoint])) {
898 return ['status' => 'deviation', 'mapping' => self::$deviation[$codePoint]];
899 }
900
901 if (isset(self::$disallowed[$codePoint]) || DisallowedRanges::inRange($codePoint)) {
902 return ['status' => 'disallowed'];
903 }
904
905 $isDisallowedMapped = isset(self::$disallowed_STD3_mapped[$codePoint]);
906
907 if ($isDisallowedMapped || isset(self::$disallowed_STD3_valid[$codePoint])) {
908 $status = 'disallowed';
909
910 if (!$useSTD3ASCIIRules) {
911 $status = $isDisallowedMapped ? 'mapped' : 'valid';
912 }
913
914 if ($isDisallowedMapped) {
915 return ['status' => $status, 'mapping' => self::$disallowed_STD3_mapped[$codePoint]];
916 }
917
918 return ['status' => $status];
919 }
920
921 return ['status' => 'valid'];
922 }
923 }
924