Verzeichnisstruktur phpBB-3.0.0
- Veröffentlicht
- 12.12.2007
So funktioniert es
|
Auf das letzte Element klicken. Dies geht jeweils ein Schritt zurück |
Auf das Icon klicken, dies öffnet das Verzeichnis. Nochmal klicken schließt das Verzeichnis. |
|
(Beispiel Datei-Icons)
|
Auf das Icon klicken um den Quellcode anzuzeigen |
utf_normalizer_test.php
001 <?php
002 /**
003 *
004 * @package phpBB3
005 * @version $Id$
006 * @copyright (c) 2005 phpBB Group
007 * @license http://opensource.org/licenses/gpl-license.php GNU Public License
008 *
009 */
010
011 if (php_sapi_name() != 'cli')
012 {
013 die("This program must be run from the command line.\n");
014 }
015
016 //
017 // Security message:
018 //
019 // This script is potentially dangerous.
020 // Remove or comment the next line (die(".... ) to enable this script.
021 // Do NOT FORGET to either remove this script or disable it after you have used it.
022 //
023 die("Please read the first lines of this script for instructions on how to enable it");
024
025 set_time_limit(0);
026 error_reporting(E_ALL);
027
028 define('IN_PHPBB', true);
029 $phpbb_root_path = '../';
030 $phpEx = substr(strrchr(__FILE__, '.'), 1);
031
032
033 /**
034 * Let's download some files we need
035 */
036 download('http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt');
037 download('http://www.unicode.org/Public/UNIDATA/UnicodeData.txt');
038
039 /**
040 * Those are the tests we run
041 */
042 $test_suite = array(
043 /**
044 * NFC
045 * c2 == NFC(c1) == NFC(c2) == NFC(c3)
046 * c4 == NFC(c4) == NFC(c5)
047 */
048 'NFC' => array(
049 'c2' => array('c1', 'c2', 'c3'),
050 'c4' => array('c4', 'c5')
051 ),
052
053 /**
054 * NFD
055 * c3 == NFD(c1) == NFD(c2) == NFD(c3)
056 * c5 == NFD(c4) == NFD(c5)
057 */
058 'NFD' => array(
059 'c3' => array('c1', 'c2', 'c3'),
060 'c5' => array('c4', 'c5')
061 ),
062
063 /**
064 * NFKC
065 * c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
066 */
067 'NFKC' => array(
068 'c4' => array('c1', 'c2', 'c3', 'c4', 'c5')
069 ),
070
071 /**
072 * NFKD
073 * c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
074 */
075 'NFKD' => array(
076 'c5' => array('c1', 'c2', 'c3', 'c4', 'c5')
077 )
078 );
079
080 require_once($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
081
082 $i = $n = 0;
083 $failed = false;
084 $tested_chars = array();
085
086 $fp = fopen($phpbb_root_path . 'develop/NormalizationTest.txt', 'rb');
087 while (!feof($fp))
088 {
089 $line = fgets($fp);
090 ++$n;
091
092 if ($line[0] == '@')
093 {
094 if ($i)
095 {
096 echo "done\n";
097 }
098
099 $i = 0;
100 echo "\n", substr($line, 1), "\n\n";
101 continue;
102 }
103
104 if (!strpos(' 0123456789ABCDEF', $line[0]))
105 {
106 continue;
107 }
108
109 if (++$i % 100 == 0)
110 {
111 echo $i, ' ';
112 }
113
114 list($c1, $c2, $c3, $c4, $c5) = explode(';', $line);
115
116 if (!strpos($c1, ' '))
117 {
118 /**
119 * We are currently testing a single character, we add it to the list of
120 * characters we have processed so that we can exclude it when testing
121 * for invariants
122 */
123 $tested_chars[$c1] = 1;
124 }
125
126 foreach ($test_suite as $form => $serie)
127 {
128 foreach ($serie as $expected => $tests)
129 {
130 $hex_expected = ${$expected};
131 $utf_expected = hexseq_to_utf($hex_expected);
132
133 foreach ($tests as $test)
134 {
135 $utf_result = $utf_expected;
136 call_user_func(array('utf_normalizer', $form), $utf_result);
137
138 if (strcmp($utf_expected, $utf_result))
139 {
140 $failed = true;
141 $hex_result = utf_to_hexseq($utf_result);
142
143 echo "\nFAILED $expected == $form($test) ($hex_expected != $hex_result)";
144 }
145 }
146 }
147
148 if ($failed)
149 {
150 die("\n\nFailed at line $n\n");
151 }
152 }
153 }
154 fclose($fp);
155
156 /**
157 * Test for invariants
158 */
159 echo "\n\nTesting for invariants...\n\n";
160
161 $fp = fopen($phpbb_root_path . 'develop/UnicodeData.txt', 'rt');
162
163 $n = 0;
164 while (!feof($fp))
165 {
166 if (++$n % 100 == 0)
167 {
168 echo $n, ' ';
169 }
170
171 $line = fgets($fp, 1024);
172
173 if (!$pos = strpos($line, ';'))
174 {
175 continue;
176 }
177
178 $hex_tested = $hex_expected = substr($line, 0, $pos);
179
180 if (isset($tested_chars[$hex_tested]))
181 {
182 continue;
183 }
184
185 $utf_expected = hex_to_utf($hex_expected);
186
187 if ($utf_expected >= UTF8_SURROGATE_FIRST
188 && $utf_expected <= UTF8_SURROGATE_LAST)
189 {
190 /**
191 * Surrogates are illegal on their own, we expect the normalizer
192 * to return a replacement char
193 */
194 $utf_expected = UTF8_REPLACEMENT;
195 $hex_expected = utf_to_hexseq($utf_expected);
196 }
197
198 foreach (array('nfc', 'nfkc', 'nfd', 'nfkd') as $form)
199 {
200 $utf_result = $utf_expected;
201 utf_normalizer::$form($utf_result);
202 $hex_result = utf_to_hexseq($utf_result);
203 // echo "$form($utf_expected) == $utf_result\n";
204
205 if (strcmp($utf_expected, $utf_result))
206 {
207 $failed = 1;
208
209 echo "\nFAILED $hex_expected == $form($hex_tested) ($hex_expected != $hex_result)";
210 }
211 }
212
213 if ($failed)
214 {
215 die("\n\nFailed at line $n\n");
216 }
217 }
218 fclose($fp);
219
220 die("\n\nALL TESTS PASSED SUCCESSFULLY\n");
221
222 /**
223 * Download a file to the develop/ dir
224 *
225 * @param string $url URL of the file to download
226 * @return void
227 */
228 function download($url)
229 {
230 global $phpbb_root_path;
231
232 if (file_exists($phpbb_root_path . 'develop/' . basename($url)))
233 {
234 return;
235 }
236
237 echo 'Downloading from ', $url, ' ';
238
239 if (!$fpr = fopen($url, 'rb'))
240 {
241 die("Can't download from $url\nPlease download it yourself and put it in the develop/ dir, kthxbai");
242 }
243
244 if (!$fpw = fopen($phpbb_root_path . 'develop/' . basename($url), 'wb'))
245 {
246 die("Can't open develop/" . basename($url) . " for output... please check your permissions or something");
247 }
248
249 $i = 0;
250 $chunk = 32768;
251 $done = '';
252
253 while (!feof($fpr))
254 {
255 $i += fwrite($fpw, fread($fpr, $chunk));
256 echo str_repeat("\x08", strlen($done));
257
258 $done = ($i >> 10) . ' KiB';
259 echo $done;
260 }
261 fclose($fpr);
262 fclose($fpw);
263
264 echo "\n";
265 }
266
267 /**
268 * Convert a UTF string to a sequence of codepoints in hexadecimal
269 *
270 * @param string $utf UTF string
271 * @return integer Unicode codepoints in hex
272 */
273 function utf_to_hexseq($str)
274 {
275 $pos = 0;
276 $len = strlen($str);
277 $ret = array();
278
279 while ($pos < $len)
280 {
281 $c = $str[$pos];
282 switch ($c & "\xF0")
283 {
284 case "\xC0":
285 case "\xD0":
286 $utf_char = substr($str, $pos, 2);
287 $pos += 2;
288 break;
289
290 case "\xE0":
291 $utf_char = substr($str, $pos, 3);
292 $pos += 3;
293 break;
294
295 case "\xF0":
296 $utf_char = substr($str, $pos, 4);
297 $pos += 4;
298 break;
299
300 default:
301 $utf_char = $c;
302 ++$pos;
303 }
304
305 $hex = dechex(utf_to_cp($utf_char));
306
307 if (!isset($hex[3]))
308 {
309 $hex = substr('000' . $hex, -4);
310 }
311
312 $ret[] = $hex;
313 }
314
315 return strtr(implode(' ', $ret), 'abcdef', 'ABCDEF');
316 }
317
318 /**
319 * Convert a UTF-8 char to its codepoint
320 *
321 * @param string $utf_char UTF-8 char
322 * @return integer Unicode codepoint
323 */
324 function utf_to_cp($utf_char)
325 {
326 switch (strlen($utf_char))
327 {
328 case 1:
329 return ord($utf_char);
330
331 case 2:
332 return ((ord($utf_char[0]) & 0x1F) << 6) | (ord($utf_char[1]) & 0x3F);
333
334 case 3:
335 return ((ord($utf_char[0]) & 0x0F) << 12) | ((ord($utf_char[1]) & 0x3F) << 6) | (ord($utf_char[2]) & 0x3F);
336
337 case 4:
338 return ((ord($utf_char[0]) & 0x07) << 18) | ((ord($utf_char[1]) & 0x3F) << 12) | ((ord($utf_char[2]) & 0x3F) << 6) | (ord($utf_char[3]) & 0x3F);
339
340 default:
341 die('UTF-8 chars can only be 1-4 bytes long');
342 }
343 }
344
345 /**
346 * Return a UTF string formed from a sequence of codepoints in hexadecimal
347 *
348 * @param string $seq Sequence of codepoints, separated with a space
349 * @return string UTF-8 string
350 */
351 function hexseq_to_utf($seq)
352 {
353 return implode('', array_map('hex_to_utf', explode(' ', $seq)));
354 }
355
356 /**
357 * Convert a codepoint in hexadecimal to a UTF-8 char
358 *
359 * @param string $hex Codepoint, in hexadecimal
360 * @return string UTF-8 char
361 */
362 function hex_to_utf($hex)
363 {
364 return cp_to_utf(hexdec($hex));
365 }
366
367 /**
368 * Convert a codepoint to a UTF-8 char
369 *
370 * @param integer $cp Unicode codepoint
371 * @return string UTF-8 string
372 */
373 function cp_to_utf($cp)
374 {
375 if ($cp > 0xFFFF)
376 {
377 return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
378 }
379 else if ($cp > 0x7FF)
380 {
381 return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
382 }
383 else if ($cp > 0x7F)
384 {
385 return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F));
386 }
387 else
388 {
389 return chr($cp);
390 }
391 }