Verzeichnisstruktur phpBB-3.2.0
- Veröffentlicht
- 06.01.2017
So funktioniert es
|
Auf das letzte Element klicken. Dies geht jeweils ein Schritt zurück |
Auf das Icon klicken, dies öffnet das Verzeichnis. Nochmal klicken schließt das Verzeichnis. |
|
(Beispiel Datei-Icons)
|
Auf das Icon klicken um den Quellcode anzuzeigen |
Normalizer.php
001 <?php
002
003 /*
004 * Copyright (C) 2016 Nicolas Grekas - p@tchwork.com
005 *
006 * This library is free software; you can redistribute it and/or modify it
007 * under the terms of the (at your option):
008 * Apache License v2.0 (http://apache.org/licenses/LICENSE-2.0.txt), or
009 * GNU General Public License v2.0 (http://gnu.org/licenses/gpl-2.0.txt).
010 */
011
012 namespace Patchwork\PHP\Shim;
013
014 /**
015 * Normalizer is a PHP fallback implementation of the Normalizer class provided by the intl extension.
016 *
017 * It has been validated with Unicode 6.3 Normalization Conformance Test.
018 * See http://www.unicode.org/reports/tr15/ for detailed info about Unicode normalizations.
019 *
020 * @internal
021 */
022 class Normalizer
023 {
024 const NONE = 1;
025 const FORM_D = 2;
026 const FORM_KD = 3;
027 const FORM_C = 4;
028 const FORM_KC = 5;
029 const NFD = 2;
030 const NFKD = 3;
031 const NFC = 4;
032 const NFKC = 5;
033
034 private static $C;
035 private static $D;
036 private static $KD;
037 private static $cC;
038 private static $ulenMask = array("\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4);
039 private static $ASCII = "\x20\x65\x69\x61\x73\x6E\x74\x72\x6F\x6C\x75\x64\x5D\x5B\x63\x6D\x70\x27\x0A\x67\x7C\x68\x76\x2E\x66\x62\x2C\x3A\x3D\x2D\x71\x31\x30\x43\x32\x2A\x79\x78\x29\x28\x4C\x39\x41\x53\x2F\x50\x22\x45\x6A\x4D\x49\x6B\x33\x3E\x35\x54\x3C\x44\x34\x7D\x42\x7B\x38\x46\x77\x52\x36\x37\x55\x47\x4E\x3B\x4A\x7A\x56\x23\x48\x4F\x57\x5F\x26\x21\x4B\x3F\x58\x51\x25\x59\x5C\x09\x5A\x2B\x7E\x5E\x24\x40\x60\x7F\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F";
040
041 public static function isNormalized($s, $form = self::NFC)
042 {
043 if ($form <= self::NONE || self::NFKC < $form) {
044 return false;
045 }
046 if (!isset($s[strspn($s .= '', self::$ASCII)])) {
047 return true;
048 }
049 if (self::NFC === $form && preg_match('//u', $s) && !preg_match('/[^\x00-\x{2FF}]/u', $s)) {
050 return true;
051 }
052
053 return false; // Pretend false as quick checks implementented in PHP won't be so quick
054 }
055
056 public static function normalize($s, $form = self::NFC)
057 {
058 if (!preg_match('//u', $s .= '')) {
059 return false;
060 }
061
062 switch ($form) {
063 case self::NONE: return $s;
064 case self::NFC: $C = true; $K = false; break;
065 case self::NFD: $C = false; $K = false; break;
066 case self::NFKC: $C = true; $K = true; break;
067 case self::NFKD: $C = false; $K = true; break;
068 default: return false;
069 }
070
071 if ('' === $s) {
072 return '';
073 }
074
075 if ($K && null === self::$KD) {
076 self::$KD = self::getData('compatibilityDecomposition');
077 }
078
079 if (null === self::$D) {
080 self::$D = self::getData('canonicalDecomposition');
081 self::$cC = self::getData('combiningClass');
082 }
083
084 if (null !== $mbEncoding = (2 /* MB_OVERLOAD_STRING */ & (int) ini_get('mbstring.func_overload')) ? mb_internal_encoding() : null) {
085 mb_internal_encoding('8bit');
086 }
087
088 $r = self::decompose($s, $K);
089
090 if ($C) {
091 if (null === self::$C) {
092 self::$C = self::getData('canonicalComposition');
093 }
094
095 $r = self::recompose($r);
096 }
097 if (null !== $mbEncoding) {
098 mb_internal_encoding($mbEncoding);
099 }
100
101 return $r;
102 }
103
104 private static function recompose($s)
105 {
106 $ASCII = self::$ASCII;
107 $compMap = self::$C;
108 $combClass = self::$cC;
109 $ulenMask = self::$ulenMask;
110
111 $result = $tail = '';
112
113 $i = $s[0] < "\x80" ? 1 : $ulenMask[$s[0] & "\xF0"];
114 $len = strlen($s);
115
116 $lastUchr = substr($s, 0, $i);
117 $lastUcls = isset($combClass[$lastUchr]) ? 256 : 0;
118
119 while ($i < $len) {
120 if ($s[$i] < "\x80") {
121 // ASCII chars
122
123 if ($tail) {
124 $lastUchr .= $tail;
125 $tail = '';
126 }
127
128 if ($j = strspn($s, $ASCII, $i + 1)) {
129 $lastUchr .= substr($s, $i, $j);
130 $i += $j;
131 }
132
133 $result .= $lastUchr;
134 $lastUchr = $s[$i];
135 $lastUcls = 0;
136 ++$i;
137 continue;
138 }
139
140 $ulen = $ulenMask[$s[$i] & "\xF0"];
141 $uchr = substr($s, $i, $ulen);
142
143 if ($lastUchr < "\xE1\x84\x80" || "\xE1\x84\x92" < $lastUchr
144 || $uchr < "\xE1\x85\xA1" || "\xE1\x85\xB5" < $uchr
145 || $lastUcls) {
146 // Table lookup and combining chars composition
147
148 $ucls = isset($combClass[$uchr]) ? $combClass[$uchr] : 0;
149
150 if (isset($compMap[$lastUchr.$uchr]) && (!$lastUcls || $lastUcls < $ucls)) {
151 $lastUchr = $compMap[$lastUchr.$uchr];
152 } elseif ($lastUcls = $ucls) {
153 $tail .= $uchr;
154 } else {
155 if ($tail) {
156 $lastUchr .= $tail;
157 $tail = '';
158 }
159
160 $result .= $lastUchr;
161 $lastUchr = $uchr;
162 }
163 } else {
164 // Hangul chars
165
166 $L = ord($lastUchr[2]) - 0x80;
167 $V = ord($uchr[2]) - 0xA1;
168 $T = 0;
169
170 $uchr = substr($s, $i + $ulen, 3);
171
172 if ("\xE1\x86\xA7" <= $uchr && $uchr <= "\xE1\x87\x82") {
173 $T = ord($uchr[2]) - 0xA7;
174 0 > $T && $T += 0x40;
175 $ulen += 3;
176 }
177
178 $L = 0xAC00 + ($L * 21 + $V) * 28 + $T;
179 $lastUchr = chr(0xE0 | $L >> 12).chr(0x80 | $L >> 6 & 0x3F).chr(0x80 | $L & 0x3F);
180 }
181
182 $i += $ulen;
183 }
184
185 return $result.$lastUchr.$tail;
186 }
187
188 private static function decompose($s, $c)
189 {
190 $result = '';
191
192 $ASCII = self::$ASCII;
193 $decompMap = self::$D;
194 $combClass = self::$cC;
195 $ulenMask = self::$ulenMask;
196 if ($c) {
197 $compatMap = self::$KD;
198 }
199
200 $c = array();
201 $i = 0;
202 $len = strlen($s);
203
204 while ($i < $len) {
205 if ($s[$i] < "\x80") {
206 // ASCII chars
207
208 if ($c) {
209 ksort($c);
210 $result .= implode('', $c);
211 $c = array();
212 }
213
214 $j = 1 + strspn($s, $ASCII, $i + 1);
215 $result .= substr($s, $i, $j);
216 $i += $j;
217 continue;
218 }
219
220 $ulen = $ulenMask[$s[$i] & "\xF0"];
221 $uchr = substr($s, $i, $ulen);
222 $i += $ulen;
223
224 if ($uchr < "\xEA\xB0\x80" || "\xED\x9E\xA3" < $uchr) {
225 // Table lookup
226
227 if ($uchr !== $j = isset($compatMap[$uchr]) ? $compatMap[$uchr] : (isset($decompMap[$uchr]) ? $decompMap[$uchr] : $uchr)) {
228 $uchr = $j;
229
230 $j = strlen($uchr);
231 $ulen = $uchr[0] < "\x80" ? 1 : $ulenMask[$uchr[0] & "\xF0"];
232
233 if ($ulen != $j) {
234 // Put trailing chars in $s
235
236 $j -= $ulen;
237 $i -= $j;
238
239 if (0 > $i) {
240 $s = str_repeat(' ', -$i).$s;
241 $len -= $i;
242 $i = 0;
243 }
244
245 while ($j--) {
246 $s[$i + $j] = $uchr[$ulen + $j];
247 }
248
249 $uchr = substr($uchr, 0, $ulen);
250 }
251 }
252 if (isset($combClass[$uchr])) {
253 // Combining chars, for sorting
254
255 if (!isset($c[$combClass[$uchr]])) {
256 $c[$combClass[$uchr]] = '';
257 }
258 $c[$combClass[$uchr]] .= $uchr;
259 continue;
260 }
261 } else {
262 // Hangul chars
263
264 $uchr = unpack('C*', $uchr);
265 $j = (($uchr[1] - 224) << 12) + (($uchr[2] - 128) << 6) + $uchr[3] - 0xAC80;
266
267 $uchr = "\xE1\x84".chr(0x80 + (int) ($j / 588))
268 ."\xE1\x85".chr(0xA1 + (int) (($j % 588) / 28));
269
270 if ($j %= 28) {
271 $uchr .= $j < 25
272 ? ("\xE1\x86".chr(0xA7 + $j))
273 : ("\xE1\x87".chr(0x67 + $j));
274 }
275 }
276 if ($c) {
277 ksort($c);
278 $result .= implode('', $c);
279 $c = array();
280 }
281
282 $result .= $uchr;
283 }
284
285 if ($c) {
286 ksort($c);
287 $result .= implode('', $c);
288 }
289
290 return $result;
291 }
292
293 private static function getData($file)
294 {
295 if (file_exists($file = __DIR__.'/unidata/'.$file.'.ser')) {
296 return unserialize(file_get_contents($file));
297 }
298
299 return false;
300 }
301 }
302