Verzeichnisstruktur phpBB-3.3.15
- Veröffentlicht
- 28.08.2024
So funktioniert es
|
Auf das letzte Element klicken. Dies geht jeweils ein Schritt zurück |
Auf das Icon klicken, dies öffnet das Verzeichnis. Nochmal klicken schließt das Verzeichnis. |
|
(Beispiel Datei-Icons)
|
Auf das Icon klicken um den Quellcode anzuzeigen |
Utf8.php
001 <?php declare(strict_types=1);
002
003 /**
004 * @package s9e\RegexpBuilder
005 * @copyright Copyright (c) 2016-2022 The s9e authors
006 * @license http://www.opensource.org/licenses/mit-license.php The MIT License
007 */
008 namespace s9e\RegexpBuilder\Input;
009
010 use InvalidArgumentException;
011 use function array_map, ord, preg_match_all;
012
013 class Utf8 extends BaseImplementation
014 {
015 /**
016 * @var bool Whether to use surrogates to represent higher codepoints
017 */
018 protected $useSurrogates;
019
020 /**
021 * {@inheritdoc}
022 */
023 public function __construct(array $options = [])
024 {
025 $this->useSurrogates = !empty($options['useSurrogates']);
026 }
027
028 /**
029 * {@inheritdoc}
030 */
031 public function split(string $string): array
032 {
033 if (preg_match_all('(.)us', $string, $matches) === false)
034 {
035 throw new InvalidArgumentException('Invalid UTF-8 string');
036 }
037
038 return ($this->useSurrogates) ? $this->charsToCodepointsWithSurrogates($matches[0]) : $this->charsToCodepoints($matches[0]);
039 }
040
041 /**
042 * Convert a list of UTF-8 characters into a list of Unicode codepoint
043 *
044 * @param string[] $chars
045 * @return integer[]
046 */
047 protected function charsToCodepoints(array $chars): array
048 {
049 return array_map([$this, 'cp'], $chars);
050 }
051
052 /**
053 * Convert a list of UTF-8 characters into a list of Unicode codepoint with surrogates
054 *
055 * @param string[] $chars
056 * @return integer[]
057 */
058 protected function charsToCodepointsWithSurrogates(array $chars): array
059 {
060 $codepoints = [];
061 foreach ($chars as $char)
062 {
063 $cp = $this->cp($char);
064 if ($cp < 0x10000)
065 {
066 $codepoints[] = $cp;
067 }
068 else
069 {
070 $codepoints[] = 0xD7C0 + ($cp >> 10);
071 $codepoints[] = 0xDC00 + ($cp & 0x3FF);
072 }
073 }
074
075 return $codepoints;
076 }
077
078 /**
079 * Compute and return the Unicode codepoint for given UTF-8 char
080 *
081 * @param string $char UTF-8 char
082 * @return integer
083 */
084 protected function cp(string $char): int
085 {
086 $cp = ord($char[0]);
087 if ($cp >= 0xF0)
088 {
089 $cp = ($cp << 18) + (ord($char[1]) << 12) + (ord($char[2]) << 6) + ord($char[3]) - 0x3C82080;
090 }
091 elseif ($cp >= 0xE0)
092 {
093 $cp = ($cp << 12) + (ord($char[1]) << 6) + ord($char[2]) - 0xE2080;
094 }
095 elseif ($cp >= 0xC0)
096 {
097 $cp = ($cp << 6) + ord($char[1]) - 0x3080;
098 }
099
100 return $cp;
101 }
102 }