Verzeichnisstruktur phpBB-3.1.0


Veröffentlicht
27.10.2014

So funktioniert es


Auf das letzte Element klicken. Dies geht jeweils ein Schritt zurück

Auf das Icon klicken, dies öffnet das Verzeichnis. Nochmal klicken schließt das Verzeichnis.
Auf den Verzeichnisnamen klicken, dies zeigt nur das Verzeichnis mit Inhalt an

(Beispiel Datei-Icons)

Auf das Icon klicken um den Quellcode anzuzeigen

utf_tools.php

Zuletzt modifiziert: 09.10.2024, 12:52 - Dateigröße: 59.65 KiB


0001  <?php
0002  /**
0003  *
0004  * This file is part of the phpBB Forum Software package.
0005  *
0006  * @copyright (c) phpBB Limited <https://www.phpbb.com>
0007  * @license GNU General Public License, version 2 (GPL-2.0)
0008  *
0009  * For full copyright and license information, please see
0010  * the docs/CREDITS.txt file.
0011  *
0012  */
0013   
0014  /**
0015  */
0016  if (!defined('IN_PHPBB'))
0017  {
0018      exit;
0019  }
0020   
0021  // Enforce ASCII only string handling
0022  setlocale(LC_CTYPE, 'C');
0023   
0024  /**
0025  * UTF-8 tools
0026  *
0027  * Whenever possible, these functions will try to use PHP's built-in functions or
0028  * extensions, otherwise they will default to custom routines.
0029  *
0030  */
0031   
0032  if (!extension_loaded('xml'))
0033  {
0034      /**
0035      * Implementation of PHP's native utf8_encode for people without XML support
0036      * This function exploits some nice things that ISO-8859-1 and UTF-8 have in common
0037      *
0038      * @param string $str ISO-8859-1 encoded data
0039      * @return string UTF-8 encoded data
0040      */
0041      function utf8_encode($str)
0042      {
0043          $out = '';
0044          for ($i = 0, $len = strlen($str); $i < $len; $i++)
0045          {
0046              $letter = $str[$i];
0047              $num = ord($letter);
0048              if ($num < 0x80)
0049              {
0050                  $out .= $letter;
0051              }
0052              else if ($num < 0xC0)
0053              {
0054                  $out .= "\xC2" . $letter;
0055              }
0056              else
0057              {
0058                  $out .= "\xC3" . chr($num - 64);
0059              }
0060          }
0061          return $out;
0062      }
0063   
0064      /**
0065      * Implementation of PHP's native utf8_decode for people without XML support
0066      *
0067      * @param string $str UTF-8 encoded data
0068      * @return string ISO-8859-1 encoded data
0069      */
0070      function utf8_decode($str)
0071      {
0072          $pos = 0;
0073          $len = strlen($str);
0074          $ret = '';
0075   
0076          while ($pos < $len)
0077          {
0078              $ord = ord($str[$pos]) & 0xF0;
0079              if ($ord === 0xC0 || $ord === 0xD0)
0080              {
0081                  $charval = ((ord($str[$pos]) & 0x1F) << 6) | (ord($str[$pos + 1]) & 0x3F);
0082                  $pos += 2;
0083                  $ret .= (($charval < 256) ? chr($charval) : '?');
0084              }
0085              else if ($ord === 0xE0)
0086              {
0087                  $ret .= '?';
0088                  $pos += 3;
0089              }
0090              else if ($ord === 0xF0)
0091              {
0092                  $ret .= '?';
0093                  $pos += 4;
0094              }
0095              else
0096              {
0097                  $ret .= $str[$pos];
0098                  ++$pos;
0099              }
0100          }
0101          return $ret;
0102      }
0103  }
0104   
0105  // mbstring is old and has it's functions around for older versions of PHP.
0106  // if mbstring is not loaded, we go into native mode.
0107  if (extension_loaded('mbstring'))
0108  {
0109      mb_internal_encoding('UTF-8');
0110   
0111      /**
0112      * UTF-8 aware alternative to strrpos
0113      * Find position of last occurrence of a char in a string
0114      */
0115      /**
0116      * UTF-8 aware alternative to strrpos
0117      * @ignore
0118      */
0119      function utf8_strrpos($str,    $needle, $offset = null)
0120      {
0121          // Emulate behaviour of strrpos rather than raising warning
0122          if (empty($str))
0123          {
0124              return false;
0125          }
0126   
0127          if (is_null($offset))
0128          {
0129              return mb_strrpos($str, $needle);
0130          }
0131          else
0132          {
0133              return mb_strrpos($str, $needle, $offset);
0134          }
0135      }
0136   
0137      /**
0138      * UTF-8 aware alternative to strpos
0139      * @ignore
0140      */
0141      function utf8_strpos($str, $needle, $offset = null)
0142      {
0143          if (is_null($offset))
0144          {
0145              return mb_strpos($str, $needle);
0146          }
0147          else
0148          {
0149              return mb_strpos($str, $needle, $offset);
0150          }
0151      }
0152   
0153      /**
0154      * UTF-8 aware alternative to strtolower
0155      * @ignore
0156      */
0157      function utf8_strtolower($str)
0158      {
0159          return mb_strtolower($str);
0160      }
0161   
0162      /**
0163      * UTF-8 aware alternative to strtoupper
0164      * @ignore
0165      */
0166      function utf8_strtoupper($str)
0167      {
0168          return mb_strtoupper($str);
0169      }
0170   
0171      /**
0172      * UTF-8 aware alternative to substr
0173      * @ignore
0174      */
0175      function utf8_substr($str, $offset, $length = null)
0176      {
0177          if (is_null($length))
0178          {
0179              return mb_substr($str, $offset);
0180          }
0181          else
0182          {
0183              return mb_substr($str, $offset, $length);
0184          }
0185      }
0186   
0187      /**
0188      * Return the length (in characters) of a UTF-8 string
0189      * @ignore
0190      */
0191      function utf8_strlen($text)
0192      {
0193          return mb_strlen($text, 'utf-8');
0194      }
0195  }
0196  else
0197  {
0198      /**
0199      * UTF-8 aware alternative to strrpos
0200      * Find position of last occurrence of a char in a string
0201      *
0202      * @author Harry Fuecks
0203      * @param string $str haystack
0204      * @param string $needle needle
0205      * @param integer $offset (optional) offset (from left)
0206      * @return mixed integer position or FALSE on failure
0207      */
0208      function utf8_strrpos($str,    $needle, $offset = null)
0209      {
0210          if (is_null($offset))
0211          {
0212              $ar    = explode($needle, $str);
0213   
0214              if (sizeof($ar) > 1)
0215              {
0216                  // Pop off the end of the string where the last    match was made
0217                  array_pop($ar);
0218                  $str = join($needle, $ar);
0219   
0220                  return utf8_strlen($str);
0221              }
0222              return false;
0223          }
0224          else
0225          {
0226              if (!is_int($offset))
0227              {
0228                  trigger_error('utf8_strrpos    expects    parameter 3    to be long', E_USER_ERROR);
0229                  return false;
0230              }
0231   
0232              $str = utf8_substr($str, $offset);
0233   
0234              if (false !== ($pos = utf8_strrpos($str, $needle)))
0235              {
0236                  return $pos    + $offset;
0237              }
0238   
0239              return false;
0240          }
0241      }
0242   
0243      /**
0244      * UTF-8 aware alternative to strpos
0245      * Find position of first occurrence of a string
0246      *
0247      * @author Harry Fuecks
0248      * @param string $str haystack
0249      * @param string $needle needle
0250      * @param integer $offset offset in characters (from left)
0251      * @return mixed integer position or FALSE on failure
0252      */
0253      function utf8_strpos($str, $needle, $offset = null)
0254      {
0255          if (is_null($offset))
0256          {
0257              $ar = explode($needle, $str);
0258              if (sizeof($ar) > 1)
0259              {
0260                  return utf8_strlen($ar[0]);
0261              }
0262              return false;
0263          }
0264          else
0265          {
0266              if (!is_int($offset))
0267              {
0268                  trigger_error('utf8_strpos:  Offset must  be an integer', E_USER_ERROR);
0269                  return false;
0270              }
0271   
0272              $str = utf8_substr($str, $offset);
0273   
0274              if (false !== ($pos = utf8_strpos($str, $needle)))
0275              {
0276                  return $pos + $offset;
0277              }
0278   
0279              return false;
0280          }
0281      }
0282   
0283      /**
0284      * UTF-8 aware alternative to strtolower
0285      * Make a string lowercase
0286      * Note: The concept of a characters "case" only exists is some alphabets
0287      * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
0288      * not exist in the Chinese alphabet, for example. See Unicode Standard
0289      * Annex #21: Case Mappings
0290      *
0291      * @param string
0292      * @return string string in lowercase
0293      */
0294      function utf8_strtolower($string)
0295      {
0296          static $utf8_upper_to_lower = array(
0297              "\xC3\x80" => "\xC3\xA0", "\xC3\x81" => "\xC3\xA1",
0298              "\xC3\x82" => "\xC3\xA2", "\xC3\x83" => "\xC3\xA3", "\xC3\x84" => "\xC3\xA4", "\xC3\x85" => "\xC3\xA5",
0299              "\xC3\x86" => "\xC3\xA6", "\xC3\x87" => "\xC3\xA7", "\xC3\x88" => "\xC3\xA8", "\xC3\x89" => "\xC3\xA9",
0300              "\xC3\x8A" => "\xC3\xAA", "\xC3\x8B" => "\xC3\xAB", "\xC3\x8C" => "\xC3\xAC", "\xC3\x8D" => "\xC3\xAD",
0301              "\xC3\x8E" => "\xC3\xAE", "\xC3\x8F" => "\xC3\xAF", "\xC3\x90" => "\xC3\xB0", "\xC3\x91" => "\xC3\xB1",
0302              "\xC3\x92" => "\xC3\xB2", "\xC3\x93" => "\xC3\xB3", "\xC3\x94" => "\xC3\xB4", "\xC3\x95" => "\xC3\xB5",
0303              "\xC3\x96" => "\xC3\xB6", "\xC3\x98" => "\xC3\xB8", "\xC3\x99" => "\xC3\xB9", "\xC3\x9A" => "\xC3\xBA",
0304              "\xC3\x9B" => "\xC3\xBB", "\xC3\x9C" => "\xC3\xBC", "\xC3\x9D" => "\xC3\xBD", "\xC3\x9E" => "\xC3\xBE",
0305              "\xC4\x80" => "\xC4\x81", "\xC4\x82" => "\xC4\x83", "\xC4\x84" => "\xC4\x85", "\xC4\x86" => "\xC4\x87",
0306              "\xC4\x88" => "\xC4\x89", "\xC4\x8A" => "\xC4\x8B", "\xC4\x8C" => "\xC4\x8D", "\xC4\x8E" => "\xC4\x8F",
0307              "\xC4\x90" => "\xC4\x91", "\xC4\x92" => "\xC4\x93", "\xC4\x96" => "\xC4\x97", "\xC4\x98" => "\xC4\x99",
0308              "\xC4\x9A" => "\xC4\x9B", "\xC4\x9C" => "\xC4\x9D", "\xC4\x9E" => "\xC4\x9F", "\xC4\xA0" => "\xC4\xA1",
0309              "\xC4\xA2" => "\xC4\xA3", "\xC4\xA4" => "\xC4\xA5", "\xC4\xA6" => "\xC4\xA7", "\xC4\xA8" => "\xC4\xA9",
0310              "\xC4\xAA" => "\xC4\xAB", "\xC4\xAE" => "\xC4\xAF", "\xC4\xB4" => "\xC4\xB5", "\xC4\xB6" => "\xC4\xB7",
0311              "\xC4\xB9" => "\xC4\xBA", "\xC4\xBB" => "\xC4\xBC", "\xC4\xBD" => "\xC4\xBE", "\xC5\x81" => "\xC5\x82",
0312              "\xC5\x83" => "\xC5\x84", "\xC5\x85" => "\xC5\x86", "\xC5\x87" => "\xC5\x88", "\xC5\x8A" => "\xC5\x8B",
0313              "\xC5\x8C" => "\xC5\x8D", "\xC5\x90" => "\xC5\x91", "\xC5\x94" => "\xC5\x95", "\xC5\x96" => "\xC5\x97",
0314              "\xC5\x98" => "\xC5\x99", "\xC5\x9A" => "\xC5\x9B", "\xC5\x9C" => "\xC5\x9D", "\xC5\x9E" => "\xC5\x9F",
0315              "\xC5\xA0" => "\xC5\xA1", "\xC5\xA2" => "\xC5\xA3", "\xC5\xA4" => "\xC5\xA5", "\xC5\xA6" => "\xC5\xA7",
0316              "\xC5\xA8" => "\xC5\xA9", "\xC5\xAA" => "\xC5\xAB", "\xC5\xAC" => "\xC5\xAD", "\xC5\xAE" => "\xC5\xAF",
0317              "\xC5\xB0" => "\xC5\xB1", "\xC5\xB2" => "\xC5\xB3", "\xC5\xB4" => "\xC5\xB5", "\xC5\xB6" => "\xC5\xB7",
0318              "\xC5\xB8" => "\xC3\xBF", "\xC5\xB9" => "\xC5\xBA", "\xC5\xBB" => "\xC5\xBC", "\xC5\xBD" => "\xC5\xBE",
0319              "\xC6\xA0" => "\xC6\xA1", "\xC6\xAF" => "\xC6\xB0", "\xC8\x98" => "\xC8\x99", "\xC8\x9A" => "\xC8\x9B",
0320              "\xCE\x86" => "\xCE\xAC", "\xCE\x88" => "\xCE\xAD", "\xCE\x89" => "\xCE\xAE", "\xCE\x8A" => "\xCE\xAF",
0321              "\xCE\x8C" => "\xCF\x8C", "\xCE\x8E" => "\xCF\x8D", "\xCE\x8F" => "\xCF\x8E", "\xCE\x91" => "\xCE\xB1",
0322              "\xCE\x92" => "\xCE\xB2", "\xCE\x93" => "\xCE\xB3", "\xCE\x94" => "\xCE\xB4", "\xCE\x95" => "\xCE\xB5",
0323              "\xCE\x96" => "\xCE\xB6", "\xCE\x97" => "\xCE\xB7", "\xCE\x98" => "\xCE\xB8", "\xCE\x99" => "\xCE\xB9",
0324              "\xCE\x9A" => "\xCE\xBA", "\xCE\x9B" => "\xCE\xBB", "\xCE\x9C" => "\xCE\xBC", "\xCE\x9D" => "\xCE\xBD",
0325              "\xCE\x9E" => "\xCE\xBE", "\xCE\x9F" => "\xCE\xBF", "\xCE\xA0" => "\xCF\x80", "\xCE\xA1" => "\xCF\x81",
0326              "\xCE\xA3" => "\xCF\x83", "\xCE\xA4" => "\xCF\x84", "\xCE\xA5" => "\xCF\x85", "\xCE\xA6" => "\xCF\x86",
0327              "\xCE\xA7" => "\xCF\x87", "\xCE\xA8" => "\xCF\x88", "\xCE\xA9" => "\xCF\x89", "\xCE\xAA" => "\xCF\x8A",
0328              "\xCE\xAB" => "\xCF\x8B", "\xD0\x81" => "\xD1\x91", "\xD0\x82" => "\xD1\x92", "\xD0\x83" => "\xD1\x93",
0329              "\xD0\x84" => "\xD1\x94", "\xD0\x85" => "\xD1\x95", "\xD0\x86" => "\xD1\x96", "\xD0\x87" => "\xD1\x97",
0330              "\xD0\x88" => "\xD1\x98", "\xD0\x89" => "\xD1\x99", "\xD0\x8A" => "\xD1\x9A", "\xD0\x8B" => "\xD1\x9B",
0331              "\xD0\x8C" => "\xD1\x9C", "\xD0\x8E" => "\xD1\x9E", "\xD0\x8F" => "\xD1\x9F", "\xD0\x90" => "\xD0\xB0",
0332              "\xD0\x91" => "\xD0\xB1", "\xD0\x92" => "\xD0\xB2", "\xD0\x93" => "\xD0\xB3", "\xD0\x94" => "\xD0\xB4",
0333              "\xD0\x95" => "\xD0\xB5", "\xD0\x96" => "\xD0\xB6", "\xD0\x97" => "\xD0\xB7", "\xD0\x98" => "\xD0\xB8",
0334              "\xD0\x99" => "\xD0\xB9", "\xD0\x9A" => "\xD0\xBA", "\xD0\x9B" => "\xD0\xBB", "\xD0\x9C" => "\xD0\xBC",
0335              "\xD0\x9D" => "\xD0\xBD", "\xD0\x9E" => "\xD0\xBE", "\xD0\x9F" => "\xD0\xBF", "\xD0\xA0" => "\xD1\x80",
0336              "\xD0\xA1" => "\xD1\x81", "\xD0\xA2" => "\xD1\x82", "\xD0\xA3" => "\xD1\x83", "\xD0\xA4" => "\xD1\x84",
0337              "\xD0\xA5" => "\xD1\x85", "\xD0\xA6" => "\xD1\x86", "\xD0\xA7" => "\xD1\x87", "\xD0\xA8" => "\xD1\x88",
0338              "\xD0\xA9" => "\xD1\x89", "\xD0\xAA" => "\xD1\x8A", "\xD0\xAB" => "\xD1\x8B", "\xD0\xAC" => "\xD1\x8C",
0339              "\xD0\xAD" => "\xD1\x8D", "\xD0\xAE" => "\xD1\x8E", "\xD0\xAF" => "\xD1\x8F", "\xD2\x90" => "\xD2\x91",
0340              "\xE1\xB8\x82" => "\xE1\xB8\x83", "\xE1\xB8\x8A" => "\xE1\xB8\x8B", "\xE1\xB8\x9E" => "\xE1\xB8\x9F", "\xE1\xB9\x80" => "\xE1\xB9\x81",
0341              "\xE1\xB9\x96" => "\xE1\xB9\x97", "\xE1\xB9\xA0" => "\xE1\xB9\xA1", "\xE1\xB9\xAA" => "\xE1\xB9\xAB", "\xE1\xBA\x80" => "\xE1\xBA\x81",
0342              "\xE1\xBA\x82" => "\xE1\xBA\x83", "\xE1\xBA\x84" => "\xE1\xBA\x85", "\xE1\xBB\xB2" => "\xE1\xBB\xB3"
0343          );
0344   
0345          return strtr(strtolower($string), $utf8_upper_to_lower);
0346      }
0347   
0348      /**
0349      * UTF-8 aware alternative to strtoupper
0350      * Make a string uppercase
0351      * Note: The concept of a characters "case" only exists is some alphabets
0352      * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
0353      * not exist in the Chinese alphabet, for example. See Unicode Standard
0354      * Annex #21: Case Mappings
0355      *
0356      * @param string
0357      * @return string string in uppercase
0358      */
0359      function utf8_strtoupper($string)
0360      {
0361          static $utf8_lower_to_upper = array(
0362              "\xC3\xA0" => "\xC3\x80", "\xC3\xA1" => "\xC3\x81",
0363              "\xC3\xA2" => "\xC3\x82", "\xC3\xA3" => "\xC3\x83", "\xC3\xA4" => "\xC3\x84", "\xC3\xA5" => "\xC3\x85",
0364              "\xC3\xA6" => "\xC3\x86", "\xC3\xA7" => "\xC3\x87", "\xC3\xA8" => "\xC3\x88", "\xC3\xA9" => "\xC3\x89",
0365              "\xC3\xAA" => "\xC3\x8A", "\xC3\xAB" => "\xC3\x8B", "\xC3\xAC" => "\xC3\x8C", "\xC3\xAD" => "\xC3\x8D",
0366              "\xC3\xAE" => "\xC3\x8E", "\xC3\xAF" => "\xC3\x8F", "\xC3\xB0" => "\xC3\x90", "\xC3\xB1" => "\xC3\x91",
0367              "\xC3\xB2" => "\xC3\x92", "\xC3\xB3" => "\xC3\x93", "\xC3\xB4" => "\xC3\x94", "\xC3\xB5" => "\xC3\x95",
0368              "\xC3\xB6" => "\xC3\x96", "\xC3\xB8" => "\xC3\x98", "\xC3\xB9" => "\xC3\x99", "\xC3\xBA" => "\xC3\x9A",
0369              "\xC3\xBB" => "\xC3\x9B", "\xC3\xBC" => "\xC3\x9C", "\xC3\xBD" => "\xC3\x9D", "\xC3\xBE" => "\xC3\x9E",
0370              "\xC3\xBF" => "\xC5\xB8", "\xC4\x81" => "\xC4\x80", "\xC4\x83" => "\xC4\x82", "\xC4\x85" => "\xC4\x84",
0371              "\xC4\x87" => "\xC4\x86", "\xC4\x89" => "\xC4\x88", "\xC4\x8B" => "\xC4\x8A", "\xC4\x8D" => "\xC4\x8C",
0372              "\xC4\x8F" => "\xC4\x8E", "\xC4\x91" => "\xC4\x90", "\xC4\x93" => "\xC4\x92", "\xC4\x97" => "\xC4\x96",
0373              "\xC4\x99" => "\xC4\x98", "\xC4\x9B" => "\xC4\x9A", "\xC4\x9D" => "\xC4\x9C", "\xC4\x9F" => "\xC4\x9E",
0374              "\xC4\xA1" => "\xC4\xA0", "\xC4\xA3" => "\xC4\xA2", "\xC4\xA5" => "\xC4\xA4", "\xC4\xA7" => "\xC4\xA6",
0375              "\xC4\xA9" => "\xC4\xA8", "\xC4\xAB" => "\xC4\xAA", "\xC4\xAF" => "\xC4\xAE", "\xC4\xB5" => "\xC4\xB4",
0376              "\xC4\xB7" => "\xC4\xB6", "\xC4\xBA" => "\xC4\xB9", "\xC4\xBC" => "\xC4\xBB", "\xC4\xBE" => "\xC4\xBD",
0377              "\xC5\x82" => "\xC5\x81", "\xC5\x84" => "\xC5\x83", "\xC5\x86" => "\xC5\x85", "\xC5\x88" => "\xC5\x87",
0378              "\xC5\x8B" => "\xC5\x8A", "\xC5\x8D" => "\xC5\x8C", "\xC5\x91" => "\xC5\x90", "\xC5\x95" => "\xC5\x94",
0379              "\xC5\x97" => "\xC5\x96", "\xC5\x99" => "\xC5\x98", "\xC5\x9B" => "\xC5\x9A", "\xC5\x9D" => "\xC5\x9C",
0380              "\xC5\x9F" => "\xC5\x9E", "\xC5\xA1" => "\xC5\xA0", "\xC5\xA3" => "\xC5\xA2", "\xC5\xA5" => "\xC5\xA4",
0381              "\xC5\xA7" => "\xC5\xA6", "\xC5\xA9" => "\xC5\xA8", "\xC5\xAB" => "\xC5\xAA", "\xC5\xAD" => "\xC5\xAC",
0382              "\xC5\xAF" => "\xC5\xAE", "\xC5\xB1" => "\xC5\xB0", "\xC5\xB3" => "\xC5\xB2", "\xC5\xB5" => "\xC5\xB4",
0383              "\xC5\xB7" => "\xC5\xB6", "\xC5\xBA" => "\xC5\xB9", "\xC5\xBC" => "\xC5\xBB", "\xC5\xBE" => "\xC5\xBD",
0384              "\xC6\xA1" => "\xC6\xA0", "\xC6\xB0" => "\xC6\xAF", "\xC8\x99" => "\xC8\x98", "\xC8\x9B" => "\xC8\x9A",
0385              "\xCE\xAC" => "\xCE\x86", "\xCE\xAD" => "\xCE\x88", "\xCE\xAE" => "\xCE\x89", "\xCE\xAF" => "\xCE\x8A",
0386              "\xCE\xB1" => "\xCE\x91", "\xCE\xB2" => "\xCE\x92", "\xCE\xB3" => "\xCE\x93", "\xCE\xB4" => "\xCE\x94",
0387              "\xCE\xB5" => "\xCE\x95", "\xCE\xB6" => "\xCE\x96", "\xCE\xB7" => "\xCE\x97", "\xCE\xB8" => "\xCE\x98",
0388              "\xCE\xB9" => "\xCE\x99", "\xCE\xBA" => "\xCE\x9A", "\xCE\xBB" => "\xCE\x9B", "\xCE\xBC" => "\xCE\x9C",
0389              "\xCE\xBD" => "\xCE\x9D", "\xCE\xBE" => "\xCE\x9E", "\xCE\xBF" => "\xCE\x9F", "\xCF\x80" => "\xCE\xA0",
0390              "\xCF\x81" => "\xCE\xA1", "\xCF\x83" => "\xCE\xA3", "\xCF\x84" => "\xCE\xA4", "\xCF\x85" => "\xCE\xA5",
0391              "\xCF\x86" => "\xCE\xA6", "\xCF\x87" => "\xCE\xA7", "\xCF\x88" => "\xCE\xA8", "\xCF\x89" => "\xCE\xA9",
0392              "\xCF\x8A" => "\xCE\xAA", "\xCF\x8B" => "\xCE\xAB", "\xCF\x8C" => "\xCE\x8C", "\xCF\x8D" => "\xCE\x8E",
0393              "\xCF\x8E" => "\xCE\x8F", "\xD0\xB0" => "\xD0\x90", "\xD0\xB1" => "\xD0\x91", "\xD0\xB2" => "\xD0\x92",
0394              "\xD0\xB3" => "\xD0\x93", "\xD0\xB4" => "\xD0\x94", "\xD0\xB5" => "\xD0\x95", "\xD0\xB6" => "\xD0\x96",
0395              "\xD0\xB7" => "\xD0\x97", "\xD0\xB8" => "\xD0\x98", "\xD0\xB9" => "\xD0\x99", "\xD0\xBA" => "\xD0\x9A",
0396              "\xD0\xBB" => "\xD0\x9B", "\xD0\xBC" => "\xD0\x9C", "\xD0\xBD" => "\xD0\x9D", "\xD0\xBE" => "\xD0\x9E",
0397              "\xD0\xBF" => "\xD0\x9F", "\xD1\x80" => "\xD0\xA0", "\xD1\x81" => "\xD0\xA1", "\xD1\x82" => "\xD0\xA2",
0398              "\xD1\x83" => "\xD0\xA3", "\xD1\x84" => "\xD0\xA4", "\xD1\x85" => "\xD0\xA5", "\xD1\x86" => "\xD0\xA6",
0399              "\xD1\x87" => "\xD0\xA7", "\xD1\x88" => "\xD0\xA8", "\xD1\x89" => "\xD0\xA9", "\xD1\x8A" => "\xD0\xAA",
0400              "\xD1\x8B" => "\xD0\xAB", "\xD1\x8C" => "\xD0\xAC", "\xD1\x8D" => "\xD0\xAD", "\xD1\x8E" => "\xD0\xAE",
0401              "\xD1\x8F" => "\xD0\xAF", "\xD1\x91" => "\xD0\x81", "\xD1\x92" => "\xD0\x82", "\xD1\x93" => "\xD0\x83",
0402              "\xD1\x94" => "\xD0\x84", "\xD1\x95" => "\xD0\x85", "\xD1\x96" => "\xD0\x86", "\xD1\x97" => "\xD0\x87",
0403              "\xD1\x98" => "\xD0\x88", "\xD1\x99" => "\xD0\x89", "\xD1\x9A" => "\xD0\x8A", "\xD1\x9B" => "\xD0\x8B",
0404              "\xD1\x9C" => "\xD0\x8C", "\xD1\x9E" => "\xD0\x8E", "\xD1\x9F" => "\xD0\x8F", "\xD2\x91" => "\xD2\x90",
0405              "\xE1\xB8\x83" => "\xE1\xB8\x82", "\xE1\xB8\x8B" => "\xE1\xB8\x8A", "\xE1\xB8\x9F" => "\xE1\xB8\x9E", "\xE1\xB9\x81" => "\xE1\xB9\x80",
0406              "\xE1\xB9\x97" => "\xE1\xB9\x96", "\xE1\xB9\xA1" => "\xE1\xB9\xA0", "\xE1\xB9\xAB" => "\xE1\xB9\xAA", "\xE1\xBA\x81" => "\xE1\xBA\x80",
0407              "\xE1\xBA\x83" => "\xE1\xBA\x82", "\xE1\xBA\x85" => "\xE1\xBA\x84", "\xE1\xBB\xB3" => "\xE1\xBB\xB2"
0408          );
0409   
0410          return strtr(strtoupper($string), $utf8_lower_to_upper);
0411      }
0412   
0413      /**
0414      * UTF-8 aware alternative to substr
0415      * Return part of a string given character offset (and optionally length)
0416      *
0417      * Note arguments: comparied to substr - if offset or length are
0418      * not integers, this version will not complain but rather massages them
0419      * into an integer.
0420      *
0421      * Note on returned values: substr documentation states false can be
0422      * returned in some cases (e.g. offset > string length)
0423      * mb_substr never returns false, it will return an empty string instead.
0424      * This adopts the mb_substr approach
0425      *
0426      * Note on implementation: PCRE only supports repetitions of less than
0427      * 65536, in order to accept up to MAXINT values for offset and length,
0428      * we'll repeat a group of 65535 characters when needed.
0429      *
0430      * Note on implementation: calculating the number of characters in the
0431      * string is a relatively expensive operation, so we only carry it out when
0432      * necessary. It isn't necessary for +ve offsets and no specified length
0433      *
0434      * @author Chris Smith<chris@jalakai.co.uk>
0435      * @param string $str
0436      * @param integer $offset number of UTF-8 characters offset (from left)
0437      * @param integer $length (optional) length in UTF-8 characters from offset
0438      * @return mixed string or FALSE if failure
0439      */
0440      function utf8_substr($str, $offset, $length = NULL)
0441      {
0442          // generates E_NOTICE
0443          // for PHP4 objects, but not PHP5 objects
0444          $str = (string) $str;
0445          $offset = (int) $offset;
0446          if (!is_null($length))
0447          {
0448              $length = (int) $length;
0449          }
0450   
0451          // handle trivial cases
0452          if ($length === 0 || ($offset < 0 && $length < 0 && $length < $offset))
0453          {
0454              return '';
0455          }
0456   
0457          // normalise negative offsets (we could use a tail
0458          // anchored pattern, but they are horribly slow!)
0459          if ($offset < 0)
0460          {
0461              // see notes
0462              $strlen = utf8_strlen($str);
0463              $offset = $strlen + $offset;
0464              if ($offset < 0)
0465              {
0466                  $offset = 0;
0467              }
0468          }
0469   
0470          $op = '';
0471          $lp = '';
0472   
0473          // establish a pattern for offset, a
0474          // non-captured group equal in length to offset
0475          if ($offset > 0)
0476          {
0477              $ox = (int) ($offset / 65535);
0478              $oy = $offset % 65535;
0479   
0480              if ($ox)
0481              {
0482                  $op = '(?:.{65535}){' . $ox . '}';
0483              }
0484   
0485              $op = '^(?:' . $op . '.{' . $oy . '})';
0486          }
0487          else
0488          {
0489              // offset == 0; just anchor the pattern
0490              $op = '^';
0491          }
0492   
0493          // establish a pattern for length
0494          if (is_null($length))
0495          {
0496              // the rest of the string
0497              $lp = '(.*)$';
0498          }
0499          else
0500          {
0501              if (!isset($strlen))
0502              {
0503                  // see notes
0504                  $strlen = utf8_strlen($str);
0505              }
0506   
0507              // another trivial case
0508              if ($offset > $strlen)
0509              {
0510                  return '';
0511              }
0512   
0513              if ($length > 0)
0514              {
0515                  // reduce any length that would
0516                  // go passed the end of the string
0517                  $length = min($strlen - $offset, $length);
0518   
0519                  $lx = (int) ($length / 65535);
0520                  $ly = $length % 65535;
0521   
0522                  // negative length requires a captured group
0523                  // of length characters
0524                  if ($lx)
0525                  {
0526                      $lp = '(?:.{65535}){' . $lx . '}';
0527                  }
0528                  $lp = '(' . $lp . '.{'. $ly . '})';
0529              }
0530              else if ($length < 0)
0531              {
0532                  if ($length < ($offset - $strlen))
0533                  {
0534                      return '';
0535                  }
0536   
0537                  $lx = (int) ((-$length) / 65535);
0538                  $ly = (-$length) % 65535;
0539   
0540                  // negative length requires ... capture everything
0541                  // except a group of  -length characters
0542                  // anchored at the tail-end of the string
0543                  if ($lx)
0544                  {
0545                      $lp = '(?:.{65535}){' . $lx . '}';
0546                  }
0547                  $lp = '(.*)(?:' . $lp . '.{' . $ly . '})$';
0548              }
0549          }
0550   
0551          if (!preg_match('#' . $op . $lp . '#us', $str, $match))
0552          {
0553              return '';
0554          }
0555   
0556          return $match[1];
0557      }
0558   
0559      /**
0560      * Return the length (in characters) of a UTF-8 string
0561      *
0562      * @param    string    $text        UTF-8 string
0563      * @return    integer                Length (in chars) of given string
0564      */
0565      function utf8_strlen($text)
0566      {
0567          // Since utf8_decode is replacing multibyte characters to ? strlen works fine
0568          return strlen(utf8_decode($text));
0569      }
0570  }
0571   
0572  /**
0573  * UTF-8 aware alternative to str_split
0574  * Convert a string to an array
0575  *
0576  * @author Harry Fuecks
0577  * @param string $str UTF-8 encoded
0578  * @param int $split_len number to characters to split string by
0579  * @return array characters in string reverses
0580  */
0581  function utf8_str_split($str, $split_len = 1)
0582  {
0583      if (!is_int($split_len) || $split_len < 1)
0584      {
0585          return false;
0586      }
0587   
0588      $len = utf8_strlen($str);
0589      if ($len <= $split_len)
0590      {
0591          return array($str);
0592      }
0593   
0594      preg_match_all('/.{' . $split_len . '}|[^\x00]{1,' . $split_len . '}$/us', $str, $ar);
0595      return $ar[0];
0596  }
0597   
0598  /**
0599  * UTF-8 aware alternative to strspn
0600  * Find length of initial segment matching the mask
0601  *
0602  * @author Harry Fuecks
0603  */
0604  function utf8_strspn($str, $mask, $start = null, $length = null)
0605  {
0606      if ($start !== null || $length !== null)
0607      {
0608          $str = utf8_substr($str, $start, $length);
0609      }
0610   
0611      preg_match('/^[' . $mask . ']+/u', $str, $matches);
0612   
0613      if (isset($matches[0]))
0614      {
0615          return utf8_strlen($matches[0]);
0616      }
0617   
0618      return 0;
0619  }
0620   
0621  /**
0622  * UTF-8 aware alternative to ucfirst
0623  * Make a string's first character uppercase
0624  *
0625  * @author Harry Fuecks
0626  * @param string
0627  * @return string with first character as upper case (if applicable)
0628  */
0629  function utf8_ucfirst($str)
0630  {
0631      switch (utf8_strlen($str))
0632      {
0633          case 0:
0634              return '';
0635          break;
0636   
0637          case 1:
0638              return utf8_strtoupper($str);
0639          break;
0640   
0641          default:
0642              preg_match('/^(.{1})(.*)$/us', $str, $matches);
0643              return utf8_strtoupper($matches[1]) . $matches[2];
0644          break;
0645      }
0646  }
0647   
0648  /**
0649  * Recode a string to UTF-8
0650  *
0651  * If the encoding is not supported, the string is returned as-is
0652  *
0653  * @param    string    $string        Original string
0654  * @param    string    $encoding    Original encoding (lowered)
0655  * @return    string                The string, encoded in UTF-8
0656  */
0657  function utf8_recode($string, $encoding)
0658  {
0659      $encoding = strtolower($encoding);
0660   
0661      if ($encoding == 'utf-8' || !is_string($string) || empty($string))
0662      {
0663          return $string;
0664      }
0665   
0666      // we force iso-8859-1 to be cp1252
0667      if ($encoding == 'iso-8859-1')
0668      {
0669          $encoding = 'cp1252';
0670      }
0671      // convert iso-8859-8-i to iso-8859-8
0672      else if ($encoding == 'iso-8859-8-i')
0673      {
0674          $encoding = 'iso-8859-8';
0675          $string = hebrev($string);
0676      }
0677   
0678      // First, try iconv()
0679      if (function_exists('iconv'))
0680      {
0681          $ret = @iconv($encoding, 'utf-8', $string);
0682   
0683          if (!empty($ret))
0684          {
0685              return $ret;
0686          }
0687      }
0688   
0689      // Try the mb_string extension
0690      if (function_exists('mb_convert_encoding'))
0691      {
0692          // mbstring is nasty on PHP4, we must make *sure* that we send a good encoding
0693          switch ($encoding)
0694          {
0695              case 'iso-8859-1':
0696              case 'iso-8859-2':
0697              case 'iso-8859-4':
0698              case 'iso-8859-7':
0699              case 'iso-8859-9':
0700              case 'iso-8859-15':
0701              case 'windows-1251':
0702              case 'windows-1252':
0703              case 'cp1252':
0704              case 'shift_jis':
0705              case 'euc-kr':
0706              case 'big5':
0707              case 'gb2312':
0708                  $ret = @mb_convert_encoding($string, 'utf-8', $encoding);
0709   
0710                  if (!empty($ret))
0711                  {
0712                      return $ret;
0713                  }
0714          }
0715      }
0716   
0717      // Try the recode extension
0718      if (function_exists('recode_string'))
0719      {
0720          $ret = @recode_string($encoding . '..utf-8', $string);
0721   
0722          if (!empty($ret))
0723          {
0724              return $ret;
0725          }
0726      }
0727   
0728      // If nothing works, check if we have a custom transcoder available
0729      if (!preg_match('#^[a-z0-9_ \\-]+$#', $encoding))
0730      {
0731          // Make sure the encoding name is alphanumeric, we don't want it to be abused into loading arbitrary files
0732          trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
0733      }
0734   
0735      global $phpbb_root_path, $phpEx;
0736   
0737      // iso-8859-* character encoding
0738      if (preg_match('/iso[_ -]?8859[_ -]?(\\d+)/', $encoding, $array))
0739      {
0740          switch ($array[1])
0741          {
0742              case '1':
0743              case '2':
0744              case '4':
0745              case '7':
0746              case '8':
0747              case '9':
0748              case '15':
0749                  if (!function_exists('iso_8859_' . $array[1]))
0750                  {
0751                      if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
0752                      {
0753                          trigger_error('Basic reencoder file is missing', E_USER_ERROR);
0754                      }
0755                      include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
0756                  }
0757                  return call_user_func('iso_8859_' . $array[1], $string);
0758              break;
0759   
0760              default:
0761                  trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
0762              break;
0763          }
0764      }
0765   
0766      // CP/WIN character encoding
0767      if (preg_match('/(?:cp|windows)[_\- ]?(\\d+)/', $encoding, $array))
0768      {
0769          switch ($array[1])
0770          {
0771              case '932':
0772              break;
0773              case '1250':
0774              case '1251':
0775              case '1252':
0776              case '1254':
0777              case '1255':
0778              case '1256':
0779              case '1257':
0780              case '874':
0781                  if (!function_exists('cp' . $array[1]))
0782                  {
0783                      if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
0784                      {
0785                          trigger_error('Basic reencoder file is missing', E_USER_ERROR);
0786                      }
0787                      include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
0788                  }
0789                  return call_user_func('cp' . $array[1], $string);
0790              break;
0791   
0792              default:
0793                  trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
0794              break;
0795          }
0796      }
0797   
0798      // TIS-620
0799      if (preg_match('/tis[_ -]?620/', $encoding))
0800      {
0801          if (!function_exists('tis_620'))
0802          {
0803              if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
0804              {
0805                  trigger_error('Basic reencoder file is missing', E_USER_ERROR);
0806              }
0807              include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
0808          }
0809          return tis_620($string);
0810      }
0811   
0812      // SJIS
0813      if (preg_match('/sjis(?:[_ -]?win)?|(?:cp|ibm)[_ -]?932|shift[_ -]?jis/', $encoding))
0814      {
0815          if (!function_exists('sjis'))
0816          {
0817              if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
0818              {
0819                  trigger_error('CJK reencoder file is missing', E_USER_ERROR);
0820              }
0821              include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
0822          }
0823          return sjis($string);
0824      }
0825   
0826      // EUC_KR
0827      if (preg_match('/euc[_ -]?kr/', $encoding))
0828      {
0829          if (!function_exists('euc_kr'))
0830          {
0831              if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
0832              {
0833                  trigger_error('CJK reencoder file is missing', E_USER_ERROR);
0834              }
0835              include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
0836          }
0837          return euc_kr($string);
0838      }
0839   
0840      // BIG-5
0841      if (preg_match('/big[_ -]?5/', $encoding))
0842      {
0843          if (!function_exists('big5'))
0844          {
0845              if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
0846              {
0847                  trigger_error('CJK reencoder file is missing', E_USER_ERROR);
0848              }
0849              include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
0850          }
0851          return big5($string);
0852      }
0853   
0854      // GB2312
0855      if (preg_match('/gb[_ -]?2312/', $encoding))
0856      {
0857          if (!function_exists('gb2312'))
0858          {
0859              if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
0860              {
0861                  trigger_error('CJK reencoder file is missing', E_USER_ERROR);
0862              }
0863              include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
0864          }
0865          return gb2312($string);
0866      }
0867   
0868      // Trigger an error?! Fow now just give bad data :-(
0869      trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
0870      //return $string; // use utf_normalizer::cleanup() ?
0871  }
0872   
0873  /**
0874  * Replace all UTF-8 chars that are not in ASCII with their NCR
0875  *
0876  * @param    string    $text        UTF-8 string in NFC
0877  * @return    string                ASCII string using NCRs for non-ASCII chars
0878  */
0879  function utf8_encode_ncr($text)
0880  {
0881      return preg_replace_callback('#[\\xC2-\\xF4][\\x80-\\xBF]{1,3}#', 'utf8_encode_ncr_callback', $text);
0882  }
0883   
0884  /**
0885  * Callback used in encode_ncr()
0886  *
0887  * Takes a UTF-8 char and replaces it with its NCR. Attention, $m is an array
0888  *
0889  * @param    array    $m            0-based numerically indexed array passed by preg_replace_callback()
0890  * @return    string                A HTML NCR if the character is valid, or the original string otherwise
0891  */
0892  function utf8_encode_ncr_callback($m)
0893  {
0894      return '&#' . utf8_ord($m[0]) . ';';
0895  }
0896   
0897  /**
0898  * Converts a UTF-8 char to an NCR
0899  *
0900  * @param string $chr UTF-8 char
0901  * @return integer UNICODE code point
0902  */
0903  function utf8_ord($chr)
0904  {
0905      switch (strlen($chr))
0906      {
0907          case 1:
0908              return ord($chr);
0909          break;
0910   
0911          case 2:
0912              return ((ord($chr[0]) & 0x1F) << 6) | (ord($chr[1]) & 0x3F);
0913          break;
0914   
0915          case 3:
0916              return ((ord($chr[0]) & 0x0F) << 12) | ((ord($chr[1]) & 0x3F) << 6) | (ord($chr[2]) & 0x3F);
0917          break;
0918   
0919          case 4:
0920              return ((ord($chr[0]) & 0x07) << 18) | ((ord($chr[1]) & 0x3F) << 12) | ((ord($chr[2]) & 0x3F) << 6) | (ord($chr[3]) & 0x3F);
0921          break;
0922   
0923          default:
0924              return $chr;
0925      }
0926  }
0927   
0928  /**
0929  * Converts an NCR to a UTF-8 char
0930  *
0931  * @param    int        $cp    UNICODE code point
0932  * @return    string        UTF-8 char
0933  */
0934  function utf8_chr($cp)
0935  {
0936      if ($cp > 0xFFFF)
0937      {
0938          return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
0939      }
0940      else if ($cp > 0x7FF)
0941      {
0942          return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
0943      }
0944      else if ($cp > 0x7F)
0945      {
0946          return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F));
0947      }
0948      else
0949      {
0950          return chr($cp);
0951      }
0952  }
0953   
0954  /**
0955  * Convert Numeric Character References to UTF-8 chars
0956  *
0957  * Notes:
0958  *    - we do not convert NCRs recursively, if you pass &#38;#38; it will return &#38;
0959  *    - we DO NOT check for the existence of the Unicode characters, therefore an entity may be converted to an inexistent codepoint
0960  *
0961  * @param    string    $text        String to convert, encoded in UTF-8 (no normal form required)
0962  * @return    string                UTF-8 string where NCRs have been replaced with the actual chars
0963  */
0964  function utf8_decode_ncr($text)
0965  {
0966      return preg_replace_callback('/&#([0-9]{1,6}|x[0-9A-F]{1,5});/i', 'utf8_decode_ncr_callback', $text);
0967  }
0968   
0969  /**
0970  * Callback used in decode_ncr()
0971  *
0972  * Takes a NCR (in decimal or hexadecimal) and returns a UTF-8 char. Attention, $m is an array.
0973  * It will ignore most of invalid NCRs, but not all!
0974  *
0975  * @param    array    $m            0-based numerically indexed array passed by preg_replace_callback()
0976  * @return    string                UTF-8 char
0977  */
0978  function utf8_decode_ncr_callback($m)
0979  {
0980      $cp = (strncasecmp($m[1], 'x', 1)) ? $m[1] : hexdec(substr($m[1], 1));
0981   
0982      return utf8_chr($cp);
0983  }
0984   
0985  /**
0986  * Case folds a unicode string as per Unicode 5.0, section 3.13
0987  *
0988  * @param    string    $text    text to be case folded
0989  * @param    string    $option    determines how we will fold the cases
0990  * @return    string            case folded text
0991  */
0992  function utf8_case_fold($text, $option = 'full')
0993  {
0994      static $uniarray = array();
0995      global $phpbb_root_path, $phpEx;
0996   
0997      // common is always set
0998      if (!isset($uniarray['c']))
0999      {
1000          $uniarray['c'] = include($phpbb_root_path . 'includes/utf/data/case_fold_c.' . $phpEx);
1001      }
1002   
1003      // only set full if we need to
1004      if ($option === 'full' && !isset($uniarray['f']))
1005      {
1006          $uniarray['f'] = include($phpbb_root_path . 'includes/utf/data/case_fold_f.' . $phpEx);
1007      }
1008   
1009      // only set simple if we need to
1010      if ($option !== 'full' && !isset($uniarray['s']))
1011      {
1012          $uniarray['s'] = include($phpbb_root_path . 'includes/utf/data/case_fold_s.' . $phpEx);
1013      }
1014   
1015      // common is always replaced
1016      $text = strtr($text, $uniarray['c']);
1017   
1018      if ($option === 'full')
1019      {
1020          // full replaces a character with multiple characters
1021          $text = strtr($text, $uniarray['f']);
1022      }
1023      else
1024      {
1025          // simple replaces a character with another character
1026          $text = strtr($text, $uniarray['s']);
1027      }
1028   
1029      return $text;
1030  }
1031   
1032  /**
1033  * Takes the input and does a "special" case fold. It does minor normalization
1034  * and returns NFKC compatable text
1035  *
1036  * @param    string    $text    text to be case folded
1037  * @param    string    $option    determines how we will fold the cases
1038  * @return    string            case folded text
1039  */
1040  function utf8_case_fold_nfkc($text, $option = 'full')
1041  {
1042      static $fc_nfkc_closure = array(
1043          "\xCD\xBA"    => "\x20\xCE\xB9",
1044          "\xCF\x92"    => "\xCF\x85",
1045          "\xCF\x93"    => "\xCF\x8D",
1046          "\xCF\x94"    => "\xCF\x8B",
1047          "\xCF\xB2"    => "\xCF\x83",
1048          "\xCF\xB9"    => "\xCF\x83",
1049          "\xE1\xB4\xAC"    => "\x61",
1050          "\xE1\xB4\xAD"    => "\xC3\xA6",
1051          "\xE1\xB4\xAE"    => "\x62",
1052          "\xE1\xB4\xB0"    => "\x64",
1053          "\xE1\xB4\xB1"    => "\x65",
1054          "\xE1\xB4\xB2"    => "\xC7\x9D",
1055          "\xE1\xB4\xB3"    => "\x67",
1056          "\xE1\xB4\xB4"    => "\x68",
1057          "\xE1\xB4\xB5"    => "\x69",
1058          "\xE1\xB4\xB6"    => "\x6A",
1059          "\xE1\xB4\xB7"    => "\x6B",
1060          "\xE1\xB4\xB8"    => "\x6C",
1061          "\xE1\xB4\xB9"    => "\x6D",
1062          "\xE1\xB4\xBA"    => "\x6E",
1063          "\xE1\xB4\xBC"    => "\x6F",
1064          "\xE1\xB4\xBD"    => "\xC8\xA3",
1065          "\xE1\xB4\xBE"    => "\x70",
1066          "\xE1\xB4\xBF"    => "\x72",
1067          "\xE1\xB5\x80"    => "\x74",
1068          "\xE1\xB5\x81"    => "\x75",
1069          "\xE1\xB5\x82"    => "\x77",
1070          "\xE2\x82\xA8"    => "\x72\x73",
1071          "\xE2\x84\x82"    => "\x63",
1072          "\xE2\x84\x83"    => "\xC2\xB0\x63",
1073          "\xE2\x84\x87"    => "\xC9\x9B",
1074          "\xE2\x84\x89"    => "\xC2\xB0\x66",
1075          "\xE2\x84\x8B"    => "\x68",
1076          "\xE2\x84\x8C"    => "\x68",
1077          "\xE2\x84\x8D"    => "\x68",
1078          "\xE2\x84\x90"    => "\x69",
1079          "\xE2\x84\x91"    => "\x69",
1080          "\xE2\x84\x92"    => "\x6C",
1081          "\xE2\x84\x95"    => "\x6E",
1082          "\xE2\x84\x96"    => "\x6E\x6F",
1083          "\xE2\x84\x99"    => "\x70",
1084          "\xE2\x84\x9A"    => "\x71",
1085          "\xE2\x84\x9B"    => "\x72",
1086          "\xE2\x84\x9C"    => "\x72",
1087          "\xE2\x84\x9D"    => "\x72",
1088          "\xE2\x84\xA0"    => "\x73\x6D",
1089          "\xE2\x84\xA1"    => "\x74\x65\x6C",
1090          "\xE2\x84\xA2"    => "\x74\x6D",
1091          "\xE2\x84\xA4"    => "\x7A",
1092          "\xE2\x84\xA8"    => "\x7A",
1093          "\xE2\x84\xAC"    => "\x62",
1094          "\xE2\x84\xAD"    => "\x63",
1095          "\xE2\x84\xB0"    => "\x65",
1096          "\xE2\x84\xB1"    => "\x66",
1097          "\xE2\x84\xB3"    => "\x6D",
1098          "\xE2\x84\xBB"    => "\x66\x61\x78",
1099          "\xE2\x84\xBE"    => "\xCE\xB3",
1100          "\xE2\x84\xBF"    => "\xCF\x80",
1101          "\xE2\x85\x85"    => "\x64",
1102          "\xE3\x89\x90"    => "\x70\x74\x65",
1103          "\xE3\x8B\x8C"    => "\x68\x67",
1104          "\xE3\x8B\x8E"    => "\x65\x76",
1105          "\xE3\x8B\x8F"    => "\x6C\x74\x64",
1106          "\xE3\x8D\xB1"    => "\x68\x70\x61",
1107          "\xE3\x8D\xB3"    => "\x61\x75",
1108          "\xE3\x8D\xB5"    => "\x6F\x76",
1109          "\xE3\x8D\xBA"    => "\x69\x75",
1110          "\xE3\x8E\x80"    => "\x70\x61",
1111          "\xE3\x8E\x81"    => "\x6E\x61",
1112          "\xE3\x8E\x82"    => "\xCE\xBC\x61",
1113          "\xE3\x8E\x83"    => "\x6D\x61",
1114          "\xE3\x8E\x84"    => "\x6B\x61",
1115          "\xE3\x8E\x85"    => "\x6B\x62",
1116          "\xE3\x8E\x86"    => "\x6D\x62",
1117          "\xE3\x8E\x87"    => "\x67\x62",
1118          "\xE3\x8E\x8A"    => "\x70\x66",
1119          "\xE3\x8E\x8B"    => "\x6E\x66",
1120          "\xE3\x8E\x8C"    => "\xCE\xBC\x66",
1121          "\xE3\x8E\x90"    => "\x68\x7A",
1122          "\xE3\x8E\x91"    => "\x6B\x68\x7A",
1123          "\xE3\x8E\x92"    => "\x6D\x68\x7A",
1124          "\xE3\x8E\x93"    => "\x67\x68\x7A",
1125          "\xE3\x8E\x94"    => "\x74\x68\x7A",
1126          "\xE3\x8E\xA9"    => "\x70\x61",
1127          "\xE3\x8E\xAA"    => "\x6B\x70\x61",
1128          "\xE3\x8E\xAB"    => "\x6D\x70\x61",
1129          "\xE3\x8E\xAC"    => "\x67\x70\x61",
1130          "\xE3\x8E\xB4"    => "\x70\x76",
1131          "\xE3\x8E\xB5"    => "\x6E\x76",
1132          "\xE3\x8E\xB6"    => "\xCE\xBC\x76",
1133          "\xE3\x8E\xB7"    => "\x6D\x76",
1134          "\xE3\x8E\xB8"    => "\x6B\x76",
1135          "\xE3\x8E\xB9"    => "\x6D\x76",
1136          "\xE3\x8E\xBA"    => "\x70\x77",
1137          "\xE3\x8E\xBB"    => "\x6E\x77",
1138          "\xE3\x8E\xBC"    => "\xCE\xBC\x77",
1139          "\xE3\x8E\xBD"    => "\x6D\x77",
1140          "\xE3\x8E\xBE"    => "\x6B\x77",
1141          "\xE3\x8E\xBF"    => "\x6D\x77",
1142          "\xE3\x8F\x80"    => "\x6B\xCF\x89",
1143          "\xE3\x8F\x81"    => "\x6D\xCF\x89",
1144          "\xE3\x8F\x83"    => "\x62\x71",
1145          "\xE3\x8F\x86"    => "\x63\xE2\x88\x95\x6B\x67",
1146          "\xE3\x8F\x87"    => "\x63\x6F\x2E",
1147          "\xE3\x8F\x88"    => "\x64\x62",
1148          "\xE3\x8F\x89"    => "\x67\x79",
1149          "\xE3\x8F\x8B"    => "\x68\x70",
1150          "\xE3\x8F\x8D"    => "\x6B\x6B",
1151          "\xE3\x8F\x8E"    => "\x6B\x6D",
1152          "\xE3\x8F\x97"    => "\x70\x68",
1153          "\xE3\x8F\x99"    => "\x70\x70\x6D",
1154          "\xE3\x8F\x9A"    => "\x70\x72",
1155          "\xE3\x8F\x9C"    => "\x73\x76",
1156          "\xE3\x8F\x9D"    => "\x77\x62",
1157          "\xE3\x8F\x9E"    => "\x76\xE2\x88\x95\x6D",
1158          "\xE3\x8F\x9F"    => "\x61\xE2\x88\x95\x6D",
1159          "\xF0\x9D\x90\x80"    => "\x61",
1160          "\xF0\x9D\x90\x81"    => "\x62",
1161          "\xF0\x9D\x90\x82"    => "\x63",
1162          "\xF0\x9D\x90\x83"    => "\x64",
1163          "\xF0\x9D\x90\x84"    => "\x65",
1164          "\xF0\x9D\x90\x85"    => "\x66",
1165          "\xF0\x9D\x90\x86"    => "\x67",
1166          "\xF0\x9D\x90\x87"    => "\x68",
1167          "\xF0\x9D\x90\x88"    => "\x69",
1168          "\xF0\x9D\x90\x89"    => "\x6A",
1169          "\xF0\x9D\x90\x8A"    => "\x6B",
1170          "\xF0\x9D\x90\x8B"    => "\x6C",
1171          "\xF0\x9D\x90\x8C"    => "\x6D",
1172          "\xF0\x9D\x90\x8D"    => "\x6E",
1173          "\xF0\x9D\x90\x8E"    => "\x6F",
1174          "\xF0\x9D\x90\x8F"    => "\x70",
1175          "\xF0\x9D\x90\x90"    => "\x71",
1176          "\xF0\x9D\x90\x91"    => "\x72",
1177          "\xF0\x9D\x90\x92"    => "\x73",
1178          "\xF0\x9D\x90\x93"    => "\x74",
1179          "\xF0\x9D\x90\x94"    => "\x75",
1180          "\xF0\x9D\x90\x95"    => "\x76",
1181          "\xF0\x9D\x90\x96"    => "\x77",
1182          "\xF0\x9D\x90\x97"    => "\x78",
1183          "\xF0\x9D\x90\x98"    => "\x79",
1184          "\xF0\x9D\x90\x99"    => "\x7A",
1185          "\xF0\x9D\x90\xB4"    => "\x61",
1186          "\xF0\x9D\x90\xB5"    => "\x62",
1187          "\xF0\x9D\x90\xB6"    => "\x63",
1188          "\xF0\x9D\x90\xB7"    => "\x64",
1189          "\xF0\x9D\x90\xB8"    => "\x65",
1190          "\xF0\x9D\x90\xB9"    => "\x66",
1191          "\xF0\x9D\x90\xBA"    => "\x67",
1192          "\xF0\x9D\x90\xBB"    => "\x68",
1193          "\xF0\x9D\x90\xBC"    => "\x69",
1194          "\xF0\x9D\x90\xBD"    => "\x6A",
1195          "\xF0\x9D\x90\xBE"    => "\x6B",
1196          "\xF0\x9D\x90\xBF"    => "\x6C",
1197          "\xF0\x9D\x91\x80"    => "\x6D",
1198          "\xF0\x9D\x91\x81"    => "\x6E",
1199          "\xF0\x9D\x91\x82"    => "\x6F",
1200          "\xF0\x9D\x91\x83"    => "\x70",
1201          "\xF0\x9D\x91\x84"    => "\x71",
1202          "\xF0\x9D\x91\x85"    => "\x72",
1203          "\xF0\x9D\x91\x86"    => "\x73",
1204          "\xF0\x9D\x91\x87"    => "\x74",
1205          "\xF0\x9D\x91\x88"    => "\x75",
1206          "\xF0\x9D\x91\x89"    => "\x76",
1207          "\xF0\x9D\x91\x8A"    => "\x77",
1208          "\xF0\x9D\x91\x8B"    => "\x78",
1209          "\xF0\x9D\x91\x8C"    => "\x79",
1210          "\xF0\x9D\x91\x8D"    => "\x7A",
1211          "\xF0\x9D\x91\xA8"    => "\x61",
1212          "\xF0\x9D\x91\xA9"    => "\x62",
1213          "\xF0\x9D\x91\xAA"    => "\x63",
1214          "\xF0\x9D\x91\xAB"    => "\x64",
1215          "\xF0\x9D\x91\xAC"    => "\x65",
1216          "\xF0\x9D\x91\xAD"    => "\x66",
1217          "\xF0\x9D\x91\xAE"    => "\x67",
1218          "\xF0\x9D\x91\xAF"    => "\x68",
1219          "\xF0\x9D\x91\xB0"    => "\x69",
1220          "\xF0\x9D\x91\xB1"    => "\x6A",
1221          "\xF0\x9D\x91\xB2"    => "\x6B",
1222          "\xF0\x9D\x91\xB3"    => "\x6C",
1223          "\xF0\x9D\x91\xB4"    => "\x6D",
1224          "\xF0\x9D\x91\xB5"    => "\x6E",
1225          "\xF0\x9D\x91\xB6"    => "\x6F",
1226          "\xF0\x9D\x91\xB7"    => "\x70",
1227          "\xF0\x9D\x91\xB8"    => "\x71",
1228          "\xF0\x9D\x91\xB9"    => "\x72",
1229          "\xF0\x9D\x91\xBA"    => "\x73",
1230          "\xF0\x9D\x91\xBB"    => "\x74",
1231          "\xF0\x9D\x91\xBC"    => "\x75",
1232          "\xF0\x9D\x91\xBD"    => "\x76",
1233          "\xF0\x9D\x91\xBE"    => "\x77",
1234          "\xF0\x9D\x91\xBF"    => "\x78",
1235          "\xF0\x9D\x92\x80"    => "\x79",
1236          "\xF0\x9D\x92\x81"    => "\x7A",
1237          "\xF0\x9D\x92\x9C"    => "\x61",
1238          "\xF0\x9D\x92\x9E"    => "\x63",
1239          "\xF0\x9D\x92\x9F"    => "\x64",
1240          "\xF0\x9D\x92\xA2"    => "\x67",
1241          "\xF0\x9D\x92\xA5"    => "\x6A",
1242          "\xF0\x9D\x92\xA6"    => "\x6B",
1243          "\xF0\x9D\x92\xA9"    => "\x6E",
1244          "\xF0\x9D\x92\xAA"    => "\x6F",
1245          "\xF0\x9D\x92\xAB"    => "\x70",
1246          "\xF0\x9D\x92\xAC"    => "\x71",
1247          "\xF0\x9D\x92\xAE"    => "\x73",
1248          "\xF0\x9D\x92\xAF"    => "\x74",
1249          "\xF0\x9D\x92\xB0"    => "\x75",
1250          "\xF0\x9D\x92\xB1"    => "\x76",
1251          "\xF0\x9D\x92\xB2"    => "\x77",
1252          "\xF0\x9D\x92\xB3"    => "\x78",
1253          "\xF0\x9D\x92\xB4"    => "\x79",
1254          "\xF0\x9D\x92\xB5"    => "\x7A",
1255          "\xF0\x9D\x93\x90"    => "\x61",
1256          "\xF0\x9D\x93\x91"    => "\x62",
1257          "\xF0\x9D\x93\x92"    => "\x63",
1258          "\xF0\x9D\x93\x93"    => "\x64",
1259          "\xF0\x9D\x93\x94"    => "\x65",
1260          "\xF0\x9D\x93\x95"    => "\x66",
1261          "\xF0\x9D\x93\x96"    => "\x67",
1262          "\xF0\x9D\x93\x97"    => "\x68",
1263          "\xF0\x9D\x93\x98"    => "\x69",
1264          "\xF0\x9D\x93\x99"    => "\x6A",
1265          "\xF0\x9D\x93\x9A"    => "\x6B",
1266          "\xF0\x9D\x93\x9B"    => "\x6C",
1267          "\xF0\x9D\x93\x9C"    => "\x6D",
1268          "\xF0\x9D\x93\x9D"    => "\x6E",
1269          "\xF0\x9D\x93\x9E"    => "\x6F",
1270          "\xF0\x9D\x93\x9F"    => "\x70",
1271          "\xF0\x9D\x93\xA0"    => "\x71",
1272          "\xF0\x9D\x93\xA1"    => "\x72",
1273          "\xF0\x9D\x93\xA2"    => "\x73",
1274          "\xF0\x9D\x93\xA3"    => "\x74",
1275          "\xF0\x9D\x93\xA4"    => "\x75",
1276          "\xF0\x9D\x93\xA5"    => "\x76",
1277          "\xF0\x9D\x93\xA6"    => "\x77",
1278          "\xF0\x9D\x93\xA7"    => "\x78",
1279          "\xF0\x9D\x93\xA8"    => "\x79",
1280          "\xF0\x9D\x93\xA9"    => "\x7A",
1281          "\xF0\x9D\x94\x84"    => "\x61",
1282          "\xF0\x9D\x94\x85"    => "\x62",
1283          "\xF0\x9D\x94\x87"    => "\x64",
1284          "\xF0\x9D\x94\x88"    => "\x65",
1285          "\xF0\x9D\x94\x89"    => "\x66",
1286          "\xF0\x9D\x94\x8A"    => "\x67",
1287          "\xF0\x9D\x94\x8D"    => "\x6A",
1288          "\xF0\x9D\x94\x8E"    => "\x6B",
1289          "\xF0\x9D\x94\x8F"    => "\x6C",
1290          "\xF0\x9D\x94\x90"    => "\x6D",
1291          "\xF0\x9D\x94\x91"    => "\x6E",
1292          "\xF0\x9D\x94\x92"    => "\x6F",
1293          "\xF0\x9D\x94\x93"    => "\x70",
1294          "\xF0\x9D\x94\x94"    => "\x71",
1295          "\xF0\x9D\x94\x96"    => "\x73",
1296          "\xF0\x9D\x94\x97"    => "\x74",
1297          "\xF0\x9D\x94\x98"    => "\x75",
1298          "\xF0\x9D\x94\x99"    => "\x76",
1299          "\xF0\x9D\x94\x9A"    => "\x77",
1300          "\xF0\x9D\x94\x9B"    => "\x78",
1301          "\xF0\x9D\x94\x9C"    => "\x79",
1302          "\xF0\x9D\x94\xB8"    => "\x61",
1303          "\xF0\x9D\x94\xB9"    => "\x62",
1304          "\xF0\x9D\x94\xBB"    => "\x64",
1305          "\xF0\x9D\x94\xBC"    => "\x65",
1306          "\xF0\x9D\x94\xBD"    => "\x66",
1307          "\xF0\x9D\x94\xBE"    => "\x67",
1308          "\xF0\x9D\x95\x80"    => "\x69",
1309          "\xF0\x9D\x95\x81"    => "\x6A",
1310          "\xF0\x9D\x95\x82"    => "\x6B",
1311          "\xF0\x9D\x95\x83"    => "\x6C",
1312          "\xF0\x9D\x95\x84"    => "\x6D",
1313          "\xF0\x9D\x95\x86"    => "\x6F",
1314          "\xF0\x9D\x95\x8A"    => "\x73",
1315          "\xF0\x9D\x95\x8B"    => "\x74",
1316          "\xF0\x9D\x95\x8C"    => "\x75",
1317          "\xF0\x9D\x95\x8D"    => "\x76",
1318          "\xF0\x9D\x95\x8E"    => "\x77",
1319          "\xF0\x9D\x95\x8F"    => "\x78",
1320          "\xF0\x9D\x95\x90"    => "\x79",
1321          "\xF0\x9D\x95\xAC"    => "\x61",
1322          "\xF0\x9D\x95\xAD"    => "\x62",
1323          "\xF0\x9D\x95\xAE"    => "\x63",
1324          "\xF0\x9D\x95\xAF"    => "\x64",
1325          "\xF0\x9D\x95\xB0"    => "\x65",
1326          "\xF0\x9D\x95\xB1"    => "\x66",
1327          "\xF0\x9D\x95\xB2"    => "\x67",
1328          "\xF0\x9D\x95\xB3"    => "\x68",
1329          "\xF0\x9D\x95\xB4"    => "\x69",
1330          "\xF0\x9D\x95\xB5"    => "\x6A",
1331          "\xF0\x9D\x95\xB6"    => "\x6B",
1332          "\xF0\x9D\x95\xB7"    => "\x6C",
1333          "\xF0\x9D\x95\xB8"    => "\x6D",
1334          "\xF0\x9D\x95\xB9"    => "\x6E",
1335          "\xF0\x9D\x95\xBA"    => "\x6F",
1336          "\xF0\x9D\x95\xBB"    => "\x70",
1337          "\xF0\x9D\x95\xBC"    => "\x71",
1338          "\xF0\x9D\x95\xBD"    => "\x72",
1339          "\xF0\x9D\x95\xBE"    => "\x73",
1340          "\xF0\x9D\x95\xBF"    => "\x74",
1341          "\xF0\x9D\x96\x80"    => "\x75",
1342          "\xF0\x9D\x96\x81"    => "\x76",
1343          "\xF0\x9D\x96\x82"    => "\x77",
1344          "\xF0\x9D\x96\x83"    => "\x78",
1345          "\xF0\x9D\x96\x84"    => "\x79",
1346          "\xF0\x9D\x96\x85"    => "\x7A",
1347          "\xF0\x9D\x96\xA0"    => "\x61",
1348          "\xF0\x9D\x96\xA1"    => "\x62",
1349          "\xF0\x9D\x96\xA2"    => "\x63",
1350          "\xF0\x9D\x96\xA3"    => "\x64",
1351          "\xF0\x9D\x96\xA4"    => "\x65",
1352          "\xF0\x9D\x96\xA5"    => "\x66",
1353          "\xF0\x9D\x96\xA6"    => "\x67",
1354          "\xF0\x9D\x96\xA7"    => "\x68",
1355          "\xF0\x9D\x96\xA8"    => "\x69",
1356          "\xF0\x9D\x96\xA9"    => "\x6A",
1357          "\xF0\x9D\x96\xAA"    => "\x6B",
1358          "\xF0\x9D\x96\xAB"    => "\x6C",
1359          "\xF0\x9D\x96\xAC"    => "\x6D",
1360          "\xF0\x9D\x96\xAD"    => "\x6E",
1361          "\xF0\x9D\x96\xAE"    => "\x6F",
1362          "\xF0\x9D\x96\xAF"    => "\x70",
1363          "\xF0\x9D\x96\xB0"    => "\x71",
1364          "\xF0\x9D\x96\xB1"    => "\x72",
1365          "\xF0\x9D\x96\xB2"    => "\x73",
1366          "\xF0\x9D\x96\xB3"    => "\x74",
1367          "\xF0\x9D\x96\xB4"    => "\x75",
1368          "\xF0\x9D\x96\xB5"    => "\x76",
1369          "\xF0\x9D\x96\xB6"    => "\x77",
1370          "\xF0\x9D\x96\xB7"    => "\x78",
1371          "\xF0\x9D\x96\xB8"    => "\x79",
1372          "\xF0\x9D\x96\xB9"    => "\x7A",
1373          "\xF0\x9D\x97\x94"    => "\x61",
1374          "\xF0\x9D\x97\x95"    => "\x62",
1375          "\xF0\x9D\x97\x96"    => "\x63",
1376          "\xF0\x9D\x97\x97"    => "\x64",
1377          "\xF0\x9D\x97\x98"    => "\x65",
1378          "\xF0\x9D\x97\x99"    => "\x66",
1379          "\xF0\x9D\x97\x9A"    => "\x67",
1380          "\xF0\x9D\x97\x9B"    => "\x68",
1381          "\xF0\x9D\x97\x9C"    => "\x69",
1382          "\xF0\x9D\x97\x9D"    => "\x6A",
1383          "\xF0\x9D\x97\x9E"    => "\x6B",
1384          "\xF0\x9D\x97\x9F"    => "\x6C",
1385          "\xF0\x9D\x97\xA0"    => "\x6D",
1386          "\xF0\x9D\x97\xA1"    => "\x6E",
1387          "\xF0\x9D\x97\xA2"    => "\x6F",
1388          "\xF0\x9D\x97\xA3"    => "\x70",
1389          "\xF0\x9D\x97\xA4"    => "\x71",
1390          "\xF0\x9D\x97\xA5"    => "\x72",
1391          "\xF0\x9D\x97\xA6"    => "\x73",
1392          "\xF0\x9D\x97\xA7"    => "\x74",
1393          "\xF0\x9D\x97\xA8"    => "\x75",
1394          "\xF0\x9D\x97\xA9"    => "\x76",
1395          "\xF0\x9D\x97\xAA"    => "\x77",
1396          "\xF0\x9D\x97\xAB"    => "\x78",
1397          "\xF0\x9D\x97\xAC"    => "\x79",
1398          "\xF0\x9D\x97\xAD"    => "\x7A",
1399          "\xF0\x9D\x98\x88"    => "\x61",
1400          "\xF0\x9D\x98\x89"    => "\x62",
1401          "\xF0\x9D\x98\x8A"    => "\x63",
1402          "\xF0\x9D\x98\x8B"    => "\x64",
1403          "\xF0\x9D\x98\x8C"    => "\x65",
1404          "\xF0\x9D\x98\x8D"    => "\x66",
1405          "\xF0\x9D\x98\x8E"    => "\x67",
1406          "\xF0\x9D\x98\x8F"    => "\x68",
1407          "\xF0\x9D\x98\x90"    => "\x69",
1408          "\xF0\x9D\x98\x91"    => "\x6A",
1409          "\xF0\x9D\x98\x92"    => "\x6B",
1410          "\xF0\x9D\x98\x93"    => "\x6C",
1411          "\xF0\x9D\x98\x94"    => "\x6D",
1412          "\xF0\x9D\x98\x95"    => "\x6E",
1413          "\xF0\x9D\x98\x96"    => "\x6F",
1414          "\xF0\x9D\x98\x97"    => "\x70",
1415          "\xF0\x9D\x98\x98"    => "\x71",
1416          "\xF0\x9D\x98\x99"    => "\x72",
1417          "\xF0\x9D\x98\x9A"    => "\x73",
1418          "\xF0\x9D\x98\x9B"    => "\x74",
1419          "\xF0\x9D\x98\x9C"    => "\x75",
1420          "\xF0\x9D\x98\x9D"    => "\x76",
1421          "\xF0\x9D\x98\x9E"    => "\x77",
1422          "\xF0\x9D\x98\x9F"    => "\x78",
1423          "\xF0\x9D\x98\xA0"    => "\x79",
1424          "\xF0\x9D\x98\xA1"    => "\x7A",
1425          "\xF0\x9D\x98\xBC"    => "\x61",
1426          "\xF0\x9D\x98\xBD"    => "\x62",
1427          "\xF0\x9D\x98\xBE"    => "\x63",
1428          "\xF0\x9D\x98\xBF"    => "\x64",
1429          "\xF0\x9D\x99\x80"    => "\x65",
1430          "\xF0\x9D\x99\x81"    => "\x66",
1431          "\xF0\x9D\x99\x82"    => "\x67",
1432          "\xF0\x9D\x99\x83"    => "\x68",
1433          "\xF0\x9D\x99\x84"    => "\x69",
1434          "\xF0\x9D\x99\x85"    => "\x6A",
1435          "\xF0\x9D\x99\x86"    => "\x6B",
1436          "\xF0\x9D\x99\x87"    => "\x6C",
1437          "\xF0\x9D\x99\x88"    => "\x6D",
1438          "\xF0\x9D\x99\x89"    => "\x6E",
1439          "\xF0\x9D\x99\x8A"    => "\x6F",
1440          "\xF0\x9D\x99\x8B"    => "\x70",
1441          "\xF0\x9D\x99\x8C"    => "\x71",
1442          "\xF0\x9D\x99\x8D"    => "\x72",
1443          "\xF0\x9D\x99\x8E"    => "\x73",
1444          "\xF0\x9D\x99\x8F"    => "\x74",
1445          "\xF0\x9D\x99\x90"    => "\x75",
1446          "\xF0\x9D\x99\x91"    => "\x76",
1447          "\xF0\x9D\x99\x92"    => "\x77",
1448          "\xF0\x9D\x99\x93"    => "\x78",
1449          "\xF0\x9D\x99\x94"    => "\x79",
1450          "\xF0\x9D\x99\x95"    => "\x7A",
1451          "\xF0\x9D\x99\xB0"    => "\x61",
1452          "\xF0\x9D\x99\xB1"    => "\x62",
1453          "\xF0\x9D\x99\xB2"    => "\x63",
1454          "\xF0\x9D\x99\xB3"    => "\x64",
1455          "\xF0\x9D\x99\xB4"    => "\x65",
1456          "\xF0\x9D\x99\xB5"    => "\x66",
1457          "\xF0\x9D\x99\xB6"    => "\x67",
1458          "\xF0\x9D\x99\xB7"    => "\x68",
1459          "\xF0\x9D\x99\xB8"    => "\x69",
1460          "\xF0\x9D\x99\xB9"    => "\x6A",
1461          "\xF0\x9D\x99\xBA"    => "\x6B",
1462          "\xF0\x9D\x99\xBB"    => "\x6C",
1463          "\xF0\x9D\x99\xBC"    => "\x6D",
1464          "\xF0\x9D\x99\xBD"    => "\x6E",
1465          "\xF0\x9D\x99\xBE"    => "\x6F",
1466          "\xF0\x9D\x99\xBF"    => "\x70",
1467          "\xF0\x9D\x9A\x80"    => "\x71",
1468          "\xF0\x9D\x9A\x81"    => "\x72",
1469          "\xF0\x9D\x9A\x82"    => "\x73",
1470          "\xF0\x9D\x9A\x83"    => "\x74",
1471          "\xF0\x9D\x9A\x84"    => "\x75",
1472          "\xF0\x9D\x9A\x85"    => "\x76",
1473          "\xF0\x9D\x9A\x86"    => "\x77",
1474          "\xF0\x9D\x9A\x87"    => "\x78",
1475          "\xF0\x9D\x9A\x88"    => "\x79",
1476          "\xF0\x9D\x9A\x89"    => "\x7A",
1477          "\xF0\x9D\x9A\xA8"    => "\xCE\xB1",
1478          "\xF0\x9D\x9A\xA9"    => "\xCE\xB2",
1479          "\xF0\x9D\x9A\xAA"    => "\xCE\xB3",
1480          "\xF0\x9D\x9A\xAB"    => "\xCE\xB4",
1481          "\xF0\x9D\x9A\xAC"    => "\xCE\xB5",
1482          "\xF0\x9D\x9A\xAD"    => "\xCE\xB6",
1483          "\xF0\x9D\x9A\xAE"    => "\xCE\xB7",
1484          "\xF0\x9D\x9A\xAF"    => "\xCE\xB8",
1485          "\xF0\x9D\x9A\xB0"    => "\xCE\xB9",
1486          "\xF0\x9D\x9A\xB1"    => "\xCE\xBA",
1487          "\xF0\x9D\x9A\xB2"    => "\xCE\xBB",
1488          "\xF0\x9D\x9A\xB3"    => "\xCE\xBC",
1489          "\xF0\x9D\x9A\xB4"    => "\xCE\xBD",
1490          "\xF0\x9D\x9A\xB5"    => "\xCE\xBE",
1491          "\xF0\x9D\x9A\xB6"    => "\xCE\xBF",
1492          "\xF0\x9D\x9A\xB7"    => "\xCF\x80",
1493          "\xF0\x9D\x9A\xB8"    => "\xCF\x81",
1494          "\xF0\x9D\x9A\xB9"    => "\xCE\xB8",
1495          "\xF0\x9D\x9A\xBA"    => "\xCF\x83",
1496          "\xF0\x9D\x9A\xBB"    => "\xCF\x84",
1497          "\xF0\x9D\x9A\xBC"    => "\xCF\x85",
1498          "\xF0\x9D\x9A\xBD"    => "\xCF\x86",
1499          "\xF0\x9D\x9A\xBE"    => "\xCF\x87",
1500          "\xF0\x9D\x9A\xBF"    => "\xCF\x88",
1501          "\xF0\x9D\x9B\x80"    => "\xCF\x89",
1502          "\xF0\x9D\x9B\x93"    => "\xCF\x83",
1503          "\xF0\x9D\x9B\xA2"    => "\xCE\xB1",
1504          "\xF0\x9D\x9B\xA3"    => "\xCE\xB2",
1505          "\xF0\x9D\x9B\xA4"    => "\xCE\xB3",
1506          "\xF0\x9D\x9B\xA5"    => "\xCE\xB4",
1507          "\xF0\x9D\x9B\xA6"    => "\xCE\xB5",
1508          "\xF0\x9D\x9B\xA7"    => "\xCE\xB6",
1509          "\xF0\x9D\x9B\xA8"    => "\xCE\xB7",
1510          "\xF0\x9D\x9B\xA9"    => "\xCE\xB8",
1511          "\xF0\x9D\x9B\xAA"    => "\xCE\xB9",
1512          "\xF0\x9D\x9B\xAB"    => "\xCE\xBA",
1513          "\xF0\x9D\x9B\xAC"    => "\xCE\xBB",
1514          "\xF0\x9D\x9B\xAD"    => "\xCE\xBC",
1515          "\xF0\x9D\x9B\xAE"    => "\xCE\xBD",
1516          "\xF0\x9D\x9B\xAF"    => "\xCE\xBE",
1517          "\xF0\x9D\x9B\xB0"    => "\xCE\xBF",
1518          "\xF0\x9D\x9B\xB1"    => "\xCF\x80",
1519          "\xF0\x9D\x9B\xB2"    => "\xCF\x81",
1520          "\xF0\x9D\x9B\xB3"    => "\xCE\xB8",
1521          "\xF0\x9D\x9B\xB4"    => "\xCF\x83",
1522          "\xF0\x9D\x9B\xB5"    => "\xCF\x84",
1523          "\xF0\x9D\x9B\xB6"    => "\xCF\x85",
1524          "\xF0\x9D\x9B\xB7"    => "\xCF\x86",
1525          "\xF0\x9D\x9B\xB8"    => "\xCF\x87",
1526          "\xF0\x9D\x9B\xB9"    => "\xCF\x88",
1527          "\xF0\x9D\x9B\xBA"    => "\xCF\x89",
1528          "\xF0\x9D\x9C\x8D"    => "\xCF\x83",
1529          "\xF0\x9D\x9C\x9C"    => "\xCE\xB1",
1530          "\xF0\x9D\x9C\x9D"    => "\xCE\xB2",
1531          "\xF0\x9D\x9C\x9E"    => "\xCE\xB3",
1532          "\xF0\x9D\x9C\x9F"    => "\xCE\xB4",
1533          "\xF0\x9D\x9C\xA0"    => "\xCE\xB5",
1534          "\xF0\x9D\x9C\xA1"    => "\xCE\xB6",
1535          "\xF0\x9D\x9C\xA2"    => "\xCE\xB7",
1536          "\xF0\x9D\x9C\xA3"    => "\xCE\xB8",
1537          "\xF0\x9D\x9C\xA4"    => "\xCE\xB9",
1538          "\xF0\x9D\x9C\xA5"    => "\xCE\xBA",
1539          "\xF0\x9D\x9C\xA6"    => "\xCE\xBB",
1540          "\xF0\x9D\x9C\xA7"    => "\xCE\xBC",
1541          "\xF0\x9D\x9C\xA8"    => "\xCE\xBD",
1542          "\xF0\x9D\x9C\xA9"    => "\xCE\xBE",
1543          "\xF0\x9D\x9C\xAA"    => "\xCE\xBF",
1544          "\xF0\x9D\x9C\xAB"    => "\xCF\x80",
1545          "\xF0\x9D\x9C\xAC"    => "\xCF\x81",
1546          "\xF0\x9D\x9C\xAD"    => "\xCE\xB8",
1547          "\xF0\x9D\x9C\xAE"    => "\xCF\x83",
1548          "\xF0\x9D\x9C\xAF"    => "\xCF\x84",
1549          "\xF0\x9D\x9C\xB0"    => "\xCF\x85",
1550          "\xF0\x9D\x9C\xB1"    => "\xCF\x86",
1551          "\xF0\x9D\x9C\xB2"    => "\xCF\x87",
1552          "\xF0\x9D\x9C\xB3"    => "\xCF\x88",
1553          "\xF0\x9D\x9C\xB4"    => "\xCF\x89",
1554          "\xF0\x9D\x9D\x87"    => "\xCF\x83",
1555          "\xF0\x9D\x9D\x96"    => "\xCE\xB1",
1556          "\xF0\x9D\x9D\x97"    => "\xCE\xB2",
1557          "\xF0\x9D\x9D\x98"    => "\xCE\xB3",
1558          "\xF0\x9D\x9D\x99"    => "\xCE\xB4",
1559          "\xF0\x9D\x9D\x9A"    => "\xCE\xB5",
1560          "\xF0\x9D\x9D\x9B"    => "\xCE\xB6",
1561          "\xF0\x9D\x9D\x9C"    => "\xCE\xB7",
1562          "\xF0\x9D\x9D\x9D"    => "\xCE\xB8",
1563          "\xF0\x9D\x9D\x9E"    => "\xCE\xB9",
1564          "\xF0\x9D\x9D\x9F"    => "\xCE\xBA",
1565          "\xF0\x9D\x9D\xA0"    => "\xCE\xBB",
1566          "\xF0\x9D\x9D\xA1"    => "\xCE\xBC",
1567          "\xF0\x9D\x9D\xA2"    => "\xCE\xBD",
1568          "\xF0\x9D\x9D\xA3"    => "\xCE\xBE",
1569          "\xF0\x9D\x9D\xA4"    => "\xCE\xBF",
1570          "\xF0\x9D\x9D\xA5"    => "\xCF\x80",
1571          "\xF0\x9D\x9D\xA6"    => "\xCF\x81",
1572          "\xF0\x9D\x9D\xA7"    => "\xCE\xB8",
1573          "\xF0\x9D\x9D\xA8"    => "\xCF\x83",
1574          "\xF0\x9D\x9D\xA9"    => "\xCF\x84",
1575          "\xF0\x9D\x9D\xAA"    => "\xCF\x85",
1576          "\xF0\x9D\x9D\xAB"    => "\xCF\x86",
1577          "\xF0\x9D\x9D\xAC"    => "\xCF\x87",
1578          "\xF0\x9D\x9D\xAD"    => "\xCF\x88",
1579          "\xF0\x9D\x9D\xAE"    => "\xCF\x89",
1580          "\xF0\x9D\x9E\x81"    => "\xCF\x83",
1581          "\xF0\x9D\x9E\x90"    => "\xCE\xB1",
1582          "\xF0\x9D\x9E\x91"    => "\xCE\xB2",
1583          "\xF0\x9D\x9E\x92"    => "\xCE\xB3",
1584          "\xF0\x9D\x9E\x93"    => "\xCE\xB4",
1585          "\xF0\x9D\x9E\x94"    => "\xCE\xB5",
1586          "\xF0\x9D\x9E\x95"    => "\xCE\xB6",
1587          "\xF0\x9D\x9E\x96"    => "\xCE\xB7",
1588          "\xF0\x9D\x9E\x97"    => "\xCE\xB8",
1589          "\xF0\x9D\x9E\x98"    => "\xCE\xB9",
1590          "\xF0\x9D\x9E\x99"    => "\xCE\xBA",
1591          "\xF0\x9D\x9E\x9A"    => "\xCE\xBB",
1592          "\xF0\x9D\x9E\x9B"    => "\xCE\xBC",
1593          "\xF0\x9D\x9E\x9C"    => "\xCE\xBD",
1594          "\xF0\x9D\x9E\x9D"    => "\xCE\xBE",
1595          "\xF0\x9D\x9E\x9E"    => "\xCE\xBF",
1596          "\xF0\x9D\x9E\x9F"    => "\xCF\x80",
1597          "\xF0\x9D\x9E\xA0"    => "\xCF\x81",
1598          "\xF0\x9D\x9E\xA1"    => "\xCE\xB8",
1599          "\xF0\x9D\x9E\xA2"    => "\xCF\x83",
1600          "\xF0\x9D\x9E\xA3"    => "\xCF\x84",
1601          "\xF0\x9D\x9E\xA4"    => "\xCF\x85",
1602          "\xF0\x9D\x9E\xA5"    => "\xCF\x86",
1603          "\xF0\x9D\x9E\xA6"    => "\xCF\x87",
1604          "\xF0\x9D\x9E\xA7"    => "\xCF\x88",
1605          "\xF0\x9D\x9E\xA8"    => "\xCF\x89",
1606          "\xF0\x9D\x9E\xBB"    => "\xCF\x83",
1607          "\xF0\x9D\x9F\x8A"    => "\xCF\x9D",
1608      );
1609      global $phpbb_root_path, $phpEx;
1610   
1611      // do the case fold
1612      $text = utf8_case_fold($text, $option);
1613   
1614      if (!class_exists('utf_normalizer'))
1615      {
1616          global $phpbb_root_path, $phpEx;
1617          include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
1618      }
1619   
1620      // convert to NFKC
1621      utf_normalizer::nfkc($text);
1622   
1623      // FC_NFKC_Closure, http://www.unicode.org/Public/5.0.0/ucd/DerivedNormalizationProps.txt
1624      $text = strtr($text, $fc_nfkc_closure);
1625   
1626      return $text;
1627  }
1628   
1629  /**
1630  * Assume the input is NFC:
1631  * Takes the input and does a "special" case fold. It does minor normalization as well.
1632  *
1633  * @param    string    $text    text to be case folded
1634  * @param    string    $option    determines how we will fold the cases
1635  * @return    string            case folded text
1636  */
1637  function utf8_case_fold_nfc($text, $option = 'full')
1638  {
1639      static $uniarray = array();
1640      static $ypogegrammeni = array(
1641          "\xCD\xBA"        => "\x20\xCD\x85",
1642          "\xE1\xBE\x80"    => "\xE1\xBC\x80\xCD\x85",
1643          "\xE1\xBE\x81"    => "\xE1\xBC\x81\xCD\x85",
1644          "\xE1\xBE\x82"    => "\xE1\xBC\x82\xCD\x85",
1645          "\xE1\xBE\x83"    => "\xE1\xBC\x83\xCD\x85",
1646          "\xE1\xBE\x84"    => "\xE1\xBC\x84\xCD\x85",
1647          "\xE1\xBE\x85"    => "\xE1\xBC\x85\xCD\x85",
1648          "\xE1\xBE\x86"    => "\xE1\xBC\x86\xCD\x85",
1649          "\xE1\xBE\x87"    => "\xE1\xBC\x87\xCD\x85",
1650          "\xE1\xBE\x88"    => "\xE1\xBC\x88\xCD\x85",
1651          "\xE1\xBE\x89"    => "\xE1\xBC\x89\xCD\x85",
1652          "\xE1\xBE\x8A"    => "\xE1\xBC\x8A\xCD\x85",
1653          "\xE1\xBE\x8B"    => "\xE1\xBC\x8B\xCD\x85",
1654          "\xE1\xBE\x8C"    => "\xE1\xBC\x8C\xCD\x85",
1655          "\xE1\xBE\x8D"    => "\xE1\xBC\x8D\xCD\x85",
1656          "\xE1\xBE\x8E"    => "\xE1\xBC\x8E\xCD\x85",
1657          "\xE1\xBE\x8F"    => "\xE1\xBC\x8F\xCD\x85",
1658          "\xE1\xBE\x90"    => "\xE1\xBC\xA0\xCD\x85",
1659          "\xE1\xBE\x91"    => "\xE1\xBC\xA1\xCD\x85",
1660          "\xE1\xBE\x92"    => "\xE1\xBC\xA2\xCD\x85",
1661          "\xE1\xBE\x93"    => "\xE1\xBC\xA3\xCD\x85",
1662          "\xE1\xBE\x94"    => "\xE1\xBC\xA4\xCD\x85",
1663          "\xE1\xBE\x95"    => "\xE1\xBC\xA5\xCD\x85",
1664          "\xE1\xBE\x96"    => "\xE1\xBC\xA6\xCD\x85",
1665          "\xE1\xBE\x97"    => "\xE1\xBC\xA7\xCD\x85",
1666          "\xE1\xBE\x98"    => "\xE1\xBC\xA8\xCD\x85",
1667          "\xE1\xBE\x99"    => "\xE1\xBC\xA9\xCD\x85",
1668          "\xE1\xBE\x9A"    => "\xE1\xBC\xAA\xCD\x85",
1669          "\xE1\xBE\x9B"    => "\xE1\xBC\xAB\xCD\x85",
1670          "\xE1\xBE\x9C"    => "\xE1\xBC\xAC\xCD\x85",
1671          "\xE1\xBE\x9D"    => "\xE1\xBC\xAD\xCD\x85",
1672          "\xE1\xBE\x9E"    => "\xE1\xBC\xAE\xCD\x85",
1673          "\xE1\xBE\x9F"    => "\xE1\xBC\xAF\xCD\x85",
1674          "\xE1\xBE\xA0"    => "\xE1\xBD\xA0\xCD\x85",
1675          "\xE1\xBE\xA1"    => "\xE1\xBD\xA1\xCD\x85",
1676          "\xE1\xBE\xA2"    => "\xE1\xBD\xA2\xCD\x85",
1677          "\xE1\xBE\xA3"    => "\xE1\xBD\xA3\xCD\x85",
1678          "\xE1\xBE\xA4"    => "\xE1\xBD\xA4\xCD\x85",
1679          "\xE1\xBE\xA5"    => "\xE1\xBD\xA5\xCD\x85",
1680          "\xE1\xBE\xA6"    => "\xE1\xBD\xA6\xCD\x85",
1681          "\xE1\xBE\xA7"    => "\xE1\xBD\xA7\xCD\x85",
1682          "\xE1\xBE\xA8"    => "\xE1\xBD\xA8\xCD\x85",
1683          "\xE1\xBE\xA9"    => "\xE1\xBD\xA9\xCD\x85",
1684          "\xE1\xBE\xAA"    => "\xE1\xBD\xAA\xCD\x85",
1685          "\xE1\xBE\xAB"    => "\xE1\xBD\xAB\xCD\x85",
1686          "\xE1\xBE\xAC"    => "\xE1\xBD\xAC\xCD\x85",
1687          "\xE1\xBE\xAD"    => "\xE1\xBD\xAD\xCD\x85",
1688          "\xE1\xBE\xAE"    => "\xE1\xBD\xAE\xCD\x85",
1689          "\xE1\xBE\xAF"    => "\xE1\xBD\xAF\xCD\x85",
1690          "\xE1\xBE\xB2"    => "\xE1\xBD\xB0\xCD\x85",
1691          "\xE1\xBE\xB3"    => "\xCE\xB1\xCD\x85",
1692          "\xE1\xBE\xB4"    => "\xCE\xAC\xCD\x85",
1693          "\xE1\xBE\xB7"    => "\xE1\xBE\xB6\xCD\x85",
1694          "\xE1\xBE\xBC"    => "\xCE\x91\xCD\x85",
1695          "\xE1\xBF\x82"    => "\xE1\xBD\xB4\xCD\x85",
1696          "\xE1\xBF\x83"    => "\xCE\xB7\xCD\x85",
1697          "\xE1\xBF\x84"    => "\xCE\xAE\xCD\x85",
1698          "\xE1\xBF\x87"    => "\xE1\xBF\x86\xCD\x85",
1699          "\xE1\xBF\x8C"    => "\xCE\x97\xCD\x85",
1700          "\xE1\xBF\xB2"    => "\xE1\xBD\xBC\xCD\x85",
1701          "\xE1\xBF\xB3"    => "\xCF\x89\xCD\x85",
1702          "\xE1\xBF\xB4"    => "\xCF\x8E\xCD\x85",
1703          "\xE1\xBF\xB7"    => "\xE1\xBF\xB6\xCD\x85",
1704          "\xE1\xBF\xBC"    => "\xCE\xA9\xCD\x85",
1705      );
1706      global $phpbb_root_path, $phpEx;
1707   
1708      // perform a small trick, avoid further normalization on composed points that contain U+0345 in their decomposition
1709      $text = strtr($text, $ypogegrammeni);
1710   
1711      // do the case fold
1712      $text = utf8_case_fold($text, $option);
1713   
1714      return $text;
1715  }
1716   
1717  if (extension_loaded('intl'))
1718  {
1719      /**
1720      * wrapper around PHP's native normalizer from intl
1721      * previously a PECL extension, included in the core since PHP 5.3.0
1722      * http://php.net/manual/en/normalizer.normalize.php
1723      *
1724      * @param    mixed    $strings    a string or an array of strings to normalize
1725      * @return    mixed                the normalized content, preserving array keys if array given.
1726      */
1727      function utf8_normalize_nfc($strings)
1728      {
1729          if (empty($strings))
1730          {
1731              return $strings;
1732          }
1733   
1734          if (!is_array($strings))
1735          {
1736              if (Normalizer::isNormalized($strings))
1737              {
1738                  return $strings;
1739              }
1740              return (string) Normalizer::normalize($strings);
1741          }
1742          else
1743          {
1744              foreach ($strings as $key => $string)
1745              {
1746                  if (is_array($string))
1747                  {
1748                      foreach ($string as $_key => $_string)
1749                      {
1750                          if (Normalizer::isNormalized($strings[$key][$_key]))
1751                          {
1752                              continue;
1753                          }
1754                          $strings[$key][$_key] = (string) Normalizer::normalize($strings[$key][$_key]);
1755                      }
1756                  }
1757                  else
1758                  {
1759                      if (Normalizer::isNormalized($strings[$key]))
1760                      {
1761                          continue;
1762                      }
1763                      $strings[$key] = (string) Normalizer::normalize($strings[$key]);
1764                  }
1765              }
1766          }
1767   
1768          return $strings;
1769      }
1770  }
1771  else
1772  {
1773      /**
1774      * A wrapper function for the normalizer which takes care of including the class if
1775      * required and modifies the passed strings to be in NFC (Normalization Form Composition).
1776      *
1777      * @param    mixed    $strings    a string or an array of strings to normalize
1778      * @return    mixed                the normalized content, preserving array keys if array given.
1779      */
1780      function utf8_normalize_nfc($strings)
1781      {
1782          if (empty($strings))
1783          {
1784              return $strings;
1785          }
1786   
1787          if (!class_exists('utf_normalizer'))
1788          {
1789              global $phpbb_root_path, $phpEx;
1790              include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
1791          }
1792   
1793          if (!is_array($strings))
1794          {
1795              utf_normalizer::nfc($strings);
1796          }
1797          else if (is_array($strings))
1798          {
1799              foreach ($strings as $key => $string)
1800              {
1801                  if (is_array($string))
1802                  {
1803                      foreach ($string as $_key => $_string)
1804                      {
1805                          utf_normalizer::nfc($strings[$key][$_key]);
1806                      }
1807                  }
1808                  else
1809                  {
1810                      utf_normalizer::nfc($strings[$key]);
1811                  }
1812              }
1813          }
1814   
1815          return $strings;
1816      }
1817  }
1818   
1819  /**
1820  * This function is used to generate a "clean" version of a string.
1821  * Clean means that it is a case insensitive form (case folding) and that it is normalized (NFC).
1822  * Additionally a homographs of one character are transformed into one specific character (preferably ASCII
1823  * if it is an ASCII character).
1824  *
1825  * Please be aware that if you change something within this function or within
1826  * functions used here you need to rebuild/update the username_clean column in the users table. And all other
1827  * columns that store a clean string otherwise you will break this functionality.
1828  *
1829  * @param    string    $text    An unclean string, mabye user input (has to be valid UTF-8!)
1830  * @return    string            Cleaned up version of the input string
1831  */
1832  function utf8_clean_string($text)
1833  {
1834      global $phpbb_root_path, $phpEx;
1835   
1836      static $homographs = array();
1837      if (empty($homographs))
1838      {
1839          $homographs = include($phpbb_root_path . 'includes/utf/data/confusables.' . $phpEx);
1840      }
1841   
1842      $text = utf8_case_fold_nfkc($text);
1843      $text = strtr($text, $homographs);
1844      // Other control characters
1845      $text = preg_replace('#(?:[\x00-\x1F\x7F]+|(?:\xC2[\x80-\x9F])+)#', '', $text);
1846   
1847      // we need to reduce multiple spaces to a single one
1848      $text = preg_replace('# {2,}#', ' ', $text);
1849   
1850      // we can use trim here as all the other space characters should have been turned
1851      // into normal ASCII spaces by now
1852      return trim($text);
1853  }
1854   
1855  /**
1856  * A wrapper for htmlspecialchars($value, ENT_COMPAT, 'UTF-8')
1857  */
1858  function utf8_htmlspecialchars($value)
1859  {
1860      return htmlspecialchars($value, ENT_COMPAT, 'UTF-8');
1861  }
1862   
1863  /**
1864  * Trying to convert returned system message to utf8
1865  *
1866  * PHP assumes such messages are ISO-8859-1 so we'll do that too
1867  * and if it breaks messages we'll blame it on them ;-)
1868  */
1869  function utf8_convert_message($message)
1870  {
1871      // First of all check if conversion is neded at all, as there is no point
1872      // in converting ASCII messages from ISO-8859-1 to UTF-8
1873      if (!preg_match('/[\x80-\xFF]/', $message))
1874      {
1875          return utf8_htmlspecialchars($message);
1876      }
1877   
1878      // else we need to convert some part of the message
1879      return utf8_htmlspecialchars(utf8_recode($message, 'ISO-8859-1'));
1880  }
1881   
1882  /**
1883  * UTF8-compatible wordwrap replacement
1884  *
1885  * @param    string    $string    The input string
1886  * @param    int        $width    The column width. Defaults to 75.
1887  * @param    string    $break    The line is broken using the optional break parameter. Defaults to '\n'.
1888  * @param    bool    $cut    If the cut is set to TRUE, the string is always wrapped at the specified width. So if you have a word that is larger than the given width, it is broken apart.
1889  *
1890  * @return    string            the given string wrapped at the specified column.
1891  *
1892  */
1893  function utf8_wordwrap($string, $width = 75, $break = "\n", $cut = false)
1894  {
1895      // We first need to explode on $break, not destroying existing (intended) breaks
1896      $lines = explode($break, $string);
1897      $new_lines = array(0 => '');
1898      $index = 0;
1899   
1900      foreach ($lines as $line)
1901      {
1902          $words = explode(' ', $line);
1903   
1904          for ($i = 0, $size = sizeof($words); $i < $size; $i++)
1905          {
1906              $word = $words[$i];
1907   
1908              // If cut is true we need to cut the word if it is > width chars
1909              if ($cut && utf8_strlen($word) > $width)
1910              {
1911                  $words[$i] = utf8_substr($word, $width);
1912                  $word = utf8_substr($word, 0, $width);
1913                  $i--;
1914              }
1915   
1916              if (utf8_strlen($new_lines[$index] . $word) > $width)
1917              {
1918                  $new_lines[$index] = substr($new_lines[$index], 0, -1);
1919                  $index++;
1920                  $new_lines[$index] = '';
1921              }
1922   
1923              $new_lines[$index] .= $word . ' ';
1924          }
1925   
1926          $new_lines[$index] = substr($new_lines[$index], 0, -1);
1927          $index++;
1928          $new_lines[$index] = '';
1929      }
1930   
1931      unset($new_lines[$index]);
1932      return implode($break, $new_lines);
1933  }
1934   
1935  /**
1936  * UTF8-safe basename() function
1937  *
1938  * basename() has some limitations and is dependent on the locale setting
1939  * according to the PHP manual. Therefore we provide our own locale independent
1940  * basename function.
1941  *
1942  * @param string $filename The filename basename() should be applied to
1943  * @return string The basenamed filename
1944  */
1945  function utf8_basename($filename)
1946  {
1947      // We always check for forward slash AND backward slash
1948      // because they could be mixed or "sneaked" in. ;)
1949      // You know, never trust user input...
1950      if (strpos($filename, '/') !== false)
1951      {
1952          $filename = utf8_substr($filename, utf8_strrpos($filename, '/') + 1);
1953      }
1954   
1955      if (strpos($filename, '\\') !== false)
1956      {
1957          $filename = utf8_substr($filename, utf8_strrpos($filename, '\\') + 1);
1958      }
1959   
1960      return $filename;
1961  }
1962   
1963  /**
1964  * UTF8-safe str_replace() function
1965  *
1966  * @param string $search The value to search for
1967  * @param string $replace The replacement string
1968  * @param string $subject The target string
1969  * @return string The resultant string
1970  */
1971  function utf8_str_replace($search, $replace, $subject)
1972  {
1973      if (!is_array($search))
1974      {
1975          $search = array($search);
1976          if (is_array($replace))
1977          {
1978              $replace = (string) $replace;
1979              trigger_error('Array to string conversion', E_USER_NOTICE);
1980          }
1981      }
1982   
1983      $length = sizeof($search);
1984   
1985      if (!is_array($replace))
1986      {
1987          $replace = array_fill(0, $length, $replace);
1988      }
1989      else
1990      {
1991          $replace = array_pad($replace, $length, '');
1992      }
1993   
1994      for ($i = 0; $i < $length; $i++)
1995      {
1996          $search_length = utf8_strlen($search[$i]);
1997          $replace_length = utf8_strlen($replace[$i]);
1998   
1999          $offset = 0;
2000          while (($start = utf8_strpos($subject, $search[$i], $offset)) !== false)
2001          {
2002              $subject = utf8_substr($subject, 0, $start) . $replace[$i] . utf8_substr($subject, $start + $search_length);
2003              $offset = $start + $replace_length;
2004          }
2005      }
2006   
2007      return $subject;
2008  }
2009