Verzeichnisstruktur phpBB-3.0.0


Veröffentlicht
12.12.2007

So funktioniert es


Auf das letzte Element klicken. Dies geht jeweils ein Schritt zurück

Auf das Icon klicken, dies öffnet das Verzeichnis. Nochmal klicken schließt das Verzeichnis.
Auf den Verzeichnisnamen klicken, dies zeigt nur das Verzeichnis mit Inhalt an

(Beispiel Datei-Icons)

Auf das Icon klicken um den Quellcode anzuzeigen

utf_tools.php

Zuletzt modifiziert: 09.10.2024, 12:51 - Dateigröße: 57.43 KiB


0001  <?php
0002  /**
0003  *
0004  * @package utf
0005  * @version $Id$
0006  * @copyright (c) 2006 phpBB Group
0007  * @license http://opensource.org/licenses/gpl-license.php GNU Public License
0008  *
0009  */
0010   
0011  /**
0012  */
0013  if (!defined('IN_PHPBB'))
0014  {
0015      exit;
0016  }
0017   
0018  // Enforce ASCII only string handling
0019  setlocale(LC_CTYPE, 'C');
0020   
0021  /**
0022  * UTF-8 tools
0023  *
0024  * Whenever possible, these functions will try to use PHP's built-in functions or
0025  * extensions, otherwise they will default to custom routines.
0026  *
0027  * @package utf
0028  */
0029   
0030  if (!extension_loaded('xml'))
0031  {
0032      /**
0033      * Implementation of PHP's native utf8_encode for people without XML support
0034      * This function exploits some nice things that ISO-8859-1 and UTF-8 have in common
0035      *
0036      * @param string $str ISO-8859-1 encoded data
0037      * @return string UTF-8 encoded data
0038      */
0039      function utf8_encode($str)
0040      {
0041          $out = '';
0042          for ($i = 0, $len = strlen($str); $i < $len; $i++)
0043          {
0044              $letter = $str[$i];
0045              $num = ord($letter);
0046              if ($num < 0x80)
0047              {
0048                  $out .= $letter;
0049              }
0050              else if ($num < 0xC0)
0051              {
0052                  $out .= "\xC2" . $letter;
0053              }
0054              else
0055              {
0056                  $out .= "\xC3" . chr($num - 64);
0057              }
0058          }
0059          return $out;
0060      }
0061   
0062      /**
0063      * Implementation of PHP's native utf8_decode for people without XML support
0064      *
0065      * @param string $str UTF-8 encoded data
0066      * @return string ISO-8859-1 encoded data
0067      */
0068      function utf8_decode($str)
0069      {
0070          $pos = 0;
0071          $len = strlen($str);
0072          $ret = '';
0073      
0074          while ($pos < $len)
0075          {
0076              $ord = ord($str[$pos]) & 0xF0;
0077              if ($ord === 0xC0 || $ord === 0xD0)
0078              {
0079                  $charval = ((ord($str[$pos]) & 0x1F) << 6) | (ord($str[$pos + 1]) & 0x3F);
0080                  $pos += 2;
0081                  $ret .= (($charval < 256) ? chr($charval) : '?');
0082              }
0083              else if ($ord === 0xE0)
0084              {
0085                  $ret .= '?';
0086                  $pos += 3;
0087              }
0088              else if ($ord === 0xF0)
0089              {
0090                  $ret .= '?';
0091                  $pos += 4;
0092              }
0093              else
0094              {
0095                  $ret .= $str[$pos];
0096                  ++$pos;
0097              }
0098          }
0099          return $ret;
0100      }
0101  }
0102   
0103  // mbstring is old and has it's functions around for older versions of PHP.
0104  // if mbstring is not loaded, we go into native mode.
0105  if (extension_loaded('mbstring'))
0106  {
0107      mb_internal_encoding('UTF-8');
0108   
0109      /**
0110      * UTF-8 aware alternative to strrpos
0111      * Find position of last occurrence of a char in a string
0112      *
0113      * Notes:
0114      * - offset for mb_strrpos was added in 5.2.0, we emulate if it is lower
0115      */
0116      if (version_compare(PHP_VERSION, '5.2.0', '>='))
0117      {
0118          /**
0119          * UTF-8 aware alternative to strrpos
0120          * @ignore
0121          */
0122          function utf8_strrpos($str,    $needle, $offset = null)
0123          {
0124              // Emulate behaviour of strrpos rather than raising warning
0125              if (empty($str))
0126              {
0127                  return false;
0128              }
0129   
0130              if (is_null($offset))
0131              {
0132                  return mb_strrpos($str, $needle);
0133              }
0134              else
0135              {
0136                  return mb_strrpos($str, $needle, $offset);
0137              }
0138          }
0139      }
0140      else
0141      {
0142          /**
0143          * UTF-8 aware alternative to strrpos
0144          * @ignore
0145          */
0146          function utf8_strrpos($str,    $needle, $offset = null)
0147          {
0148              // offset for mb_strrpos was added in 5.2.0
0149              if (is_null($offset))
0150              {
0151                  // Emulate behaviour of strrpos rather than raising warning
0152                  if (empty($str))
0153                  {
0154                      return false;
0155                  }
0156   
0157                  return mb_strrpos($str, $needle);
0158              }
0159              else
0160              {
0161                  if (!is_int($offset))
0162                  {
0163                      trigger_error('utf8_strrpos expects parameter 3 to be long', E_USER_ERROR);
0164                      return false;
0165                  }
0166   
0167                  $str = mb_substr($str, $offset);
0168   
0169                  if (false !== ($pos = mb_strrpos($str, $needle)))
0170                  {
0171                      return $pos + $offset;
0172                  }
0173   
0174                  return false;
0175              }
0176          }
0177      }
0178   
0179      /**
0180      * UTF-8 aware alternative to strpos
0181      * @ignore
0182      */
0183      function utf8_strpos($str, $needle, $offset = null)
0184      {
0185          if (is_null($offset))
0186          {
0187              return mb_strpos($str, $needle);
0188          }
0189          else
0190          {
0191              return mb_strpos($str, $needle, $offset);
0192          }
0193      }
0194   
0195      /**
0196      * UTF-8 aware alternative to strtolower
0197      * @ignore
0198      */
0199      function utf8_strtolower($str)
0200      {
0201          return mb_strtolower($str);
0202      }
0203   
0204      /**
0205      * UTF-8 aware alternative to strtoupper
0206      * @ignore
0207      */
0208      function utf8_strtoupper($str)
0209      {
0210          return mb_strtoupper($str);
0211      }
0212   
0213      /**
0214      * UTF-8 aware alternative to substr
0215      * @ignore
0216      */
0217      function utf8_substr($str, $offset, $length = null)
0218      {
0219          if (is_null($length))
0220          {
0221              return mb_substr($str, $offset);
0222          }
0223          else
0224          {
0225              return mb_substr($str, $offset, $length);
0226          }
0227      }
0228   
0229      /**
0230      * Return the length (in characters) of a UTF-8 string
0231      * @ignore
0232      */
0233      function utf8_strlen($text)
0234      {
0235          return mb_strlen($text, 'utf-8');
0236      }
0237  }
0238  else
0239  {
0240      /**
0241      * UTF-8 aware alternative to strrpos
0242      * Find position of last occurrence of a char in a string
0243      *
0244      * @author Harry Fuecks
0245      * @param string $str haystack
0246      * @param string $needle needle
0247      * @param integer $offset (optional) offset (from left)
0248      * @return mixed integer position or FALSE on failure
0249      */
0250      function utf8_strrpos($str,    $needle, $offset = null)
0251      {
0252          if (is_null($offset))
0253          {
0254              $ar    = explode($needle, $str);
0255              
0256              if (sizeof($ar) > 1)
0257              {
0258                  // Pop off the end of the string where the last    match was made
0259                  array_pop($ar);
0260                  $str = join($needle, $ar);
0261   
0262                  return utf8_strlen($str);
0263              }
0264              return false;
0265          }
0266          else
0267          {
0268              if (!is_int($offset))
0269              {
0270                  trigger_error('utf8_strrpos    expects    parameter 3    to be long', E_USER_ERROR);
0271                  return false;
0272              }
0273   
0274              $str = utf8_substr($str, $offset);
0275   
0276              if (false !== ($pos = utf8_strrpos($str, $needle)))
0277              {
0278                  return $pos    + $offset;
0279              }
0280   
0281              return false;
0282          }
0283      }
0284   
0285      /**
0286      * UTF-8 aware alternative to strpos
0287      * Find position of first occurrence of a string
0288      *
0289      * @author Harry Fuecks
0290      * @param string $str haystack
0291      * @param string $needle needle
0292      * @param integer $offset offset in characters (from left)
0293      * @return mixed integer position or FALSE on failure
0294      */
0295      function utf8_strpos($str, $needle, $offset = null)
0296      {
0297          if (is_null($offset))
0298          {
0299              $ar = explode($needle, $str);
0300              if (sizeof($ar) > 1)
0301              {
0302                  return utf8_strlen($ar[0]);
0303              }
0304              return false;
0305          }
0306          else
0307          {
0308              if (!is_int($offset))
0309              {
0310                  trigger_error('utf8_strpos:  Offset must  be an integer', E_USER_ERROR);
0311                  return false;
0312              }
0313   
0314              $str = utf8_substr($str, $offset);
0315   
0316              if (false !== ($pos = utf8_strpos($str, $needle)))
0317              {
0318                  return $pos + $offset;
0319              }
0320   
0321              return false;
0322          }
0323      }
0324   
0325      /**
0326      * UTF-8 aware alternative to strtolower
0327      * Make a string lowercase
0328      * Note: The concept of a characters "case" only exists is some alphabets
0329      * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
0330      * not exist in the Chinese alphabet, for example. See Unicode Standard
0331      * Annex #21: Case Mappings
0332      *
0333      * @param string
0334      * @return string string in lowercase
0335      */
0336      function utf8_strtolower($string)
0337      {
0338          static $utf8_upper_to_lower = array(
0339              "\xC3\x80" => "\xC3\xA0", "\xC3\x81" => "\xC3\xA1",
0340              "\xC3\x82" => "\xC3\xA2", "\xC3\x83" => "\xC3\xA3", "\xC3\x84" => "\xC3\xA4", "\xC3\x85" => "\xC3\xA5",
0341              "\xC3\x86" => "\xC3\xA6", "\xC3\x87" => "\xC3\xA7", "\xC3\x88" => "\xC3\xA8", "\xC3\x89" => "\xC3\xA9",
0342              "\xC3\x8A" => "\xC3\xAA", "\xC3\x8B" => "\xC3\xAB", "\xC3\x8C" => "\xC3\xAC", "\xC3\x8D" => "\xC3\xAD",
0343              "\xC3\x8E" => "\xC3\xAE", "\xC3\x8F" => "\xC3\xAF", "\xC3\x90" => "\xC3\xB0", "\xC3\x91" => "\xC3\xB1",
0344              "\xC3\x92" => "\xC3\xB2", "\xC3\x93" => "\xC3\xB3", "\xC3\x94" => "\xC3\xB4", "\xC3\x95" => "\xC3\xB5",
0345              "\xC3\x96" => "\xC3\xB6", "\xC3\x98" => "\xC3\xB8", "\xC3\x99" => "\xC3\xB9", "\xC3\x9A" => "\xC3\xBA",
0346              "\xC3\x9B" => "\xC3\xBB", "\xC3\x9C" => "\xC3\xBC", "\xC3\x9D" => "\xC3\xBD", "\xC3\x9E" => "\xC3\xBE",
0347              "\xC4\x80" => "\xC4\x81", "\xC4\x82" => "\xC4\x83", "\xC4\x84" => "\xC4\x85", "\xC4\x86" => "\xC4\x87",
0348              "\xC4\x88" => "\xC4\x89", "\xC4\x8A" => "\xC4\x8B", "\xC4\x8C" => "\xC4\x8D", "\xC4\x8E" => "\xC4\x8F",
0349              "\xC4\x90" => "\xC4\x91", "\xC4\x92" => "\xC4\x93", "\xC4\x96" => "\xC4\x97", "\xC4\x98" => "\xC4\x99",
0350              "\xC4\x9A" => "\xC4\x9B", "\xC4\x9C" => "\xC4\x9D", "\xC4\x9E" => "\xC4\x9F", "\xC4\xA0" => "\xC4\xA1",
0351              "\xC4\xA2" => "\xC4\xA3", "\xC4\xA4" => "\xC4\xA5", "\xC4\xA6" => "\xC4\xA7", "\xC4\xA8" => "\xC4\xA9",
0352              "\xC4\xAA" => "\xC4\xAB", "\xC4\xAE" => "\xC4\xAF", "\xC4\xB4" => "\xC4\xB5", "\xC4\xB6" => "\xC4\xB7",
0353              "\xC4\xB9" => "\xC4\xBA", "\xC4\xBB" => "\xC4\xBC", "\xC4\xBD" => "\xC4\xBE", "\xC5\x81" => "\xC5\x82",
0354              "\xC5\x83" => "\xC5\x84", "\xC5\x85" => "\xC5\x86", "\xC5\x87" => "\xC5\x88", "\xC5\x8A" => "\xC5\x8B",
0355              "\xC5\x8C" => "\xC5\x8D", "\xC5\x90" => "\xC5\x91", "\xC5\x94" => "\xC5\x95", "\xC5\x96" => "\xC5\x97",
0356              "\xC5\x98" => "\xC5\x99", "\xC5\x9A" => "\xC5\x9B", "\xC5\x9C" => "\xC5\x9D", "\xC5\x9E" => "\xC5\x9F",
0357              "\xC5\xA0" => "\xC5\xA1", "\xC5\xA2" => "\xC5\xA3", "\xC5\xA4" => "\xC5\xA5", "\xC5\xA6" => "\xC5\xA7",
0358              "\xC5\xA8" => "\xC5\xA9", "\xC5\xAA" => "\xC5\xAB", "\xC5\xAC" => "\xC5\xAD", "\xC5\xAE" => "\xC5\xAF",
0359              "\xC5\xB0" => "\xC5\xB1", "\xC5\xB2" => "\xC5\xB3", "\xC5\xB4" => "\xC5\xB5", "\xC5\xB6" => "\xC5\xB7",
0360              "\xC5\xB8" => "\xC3\xBF", "\xC5\xB9" => "\xC5\xBA", "\xC5\xBB" => "\xC5\xBC", "\xC5\xBD" => "\xC5\xBE",
0361              "\xC6\xA0" => "\xC6\xA1", "\xC6\xAF" => "\xC6\xB0", "\xC8\x98" => "\xC8\x99", "\xC8\x9A" => "\xC8\x9B",
0362              "\xCE\x86" => "\xCE\xAC", "\xCE\x88" => "\xCE\xAD", "\xCE\x89" => "\xCE\xAE", "\xCE\x8A" => "\xCE\xAF",
0363              "\xCE\x8C" => "\xCF\x8C", "\xCE\x8E" => "\xCF\x8D", "\xCE\x8F" => "\xCF\x8E", "\xCE\x91" => "\xCE\xB1",
0364              "\xCE\x92" => "\xCE\xB2", "\xCE\x93" => "\xCE\xB3", "\xCE\x94" => "\xCE\xB4", "\xCE\x95" => "\xCE\xB5",
0365              "\xCE\x96" => "\xCE\xB6", "\xCE\x97" => "\xCE\xB7", "\xCE\x98" => "\xCE\xB8", "\xCE\x99" => "\xCE\xB9",
0366              "\xCE\x9A" => "\xCE\xBA", "\xCE\x9B" => "\xCE\xBB", "\xCE\x9C" => "\xCE\xBC", "\xCE\x9D" => "\xCE\xBD",
0367              "\xCE\x9E" => "\xCE\xBE", "\xCE\x9F" => "\xCE\xBF", "\xCE\xA0" => "\xCF\x80", "\xCE\xA1" => "\xCF\x81",
0368              "\xCE\xA3" => "\xCF\x83", "\xCE\xA4" => "\xCF\x84", "\xCE\xA5" => "\xCF\x85", "\xCE\xA6" => "\xCF\x86",
0369              "\xCE\xA7" => "\xCF\x87", "\xCE\xA8" => "\xCF\x88", "\xCE\xA9" => "\xCF\x89", "\xCE\xAA" => "\xCF\x8A",
0370              "\xCE\xAB" => "\xCF\x8B", "\xD0\x81" => "\xD1\x91", "\xD0\x82" => "\xD1\x92", "\xD0\x83" => "\xD1\x93",
0371              "\xD0\x84" => "\xD1\x94", "\xD0\x85" => "\xD1\x95", "\xD0\x86" => "\xD1\x96", "\xD0\x87" => "\xD1\x97",
0372              "\xD0\x88" => "\xD1\x98", "\xD0\x89" => "\xD1\x99", "\xD0\x8A" => "\xD1\x9A", "\xD0\x8B" => "\xD1\x9B",
0373              "\xD0\x8C" => "\xD1\x9C", "\xD0\x8E" => "\xD1\x9E", "\xD0\x8F" => "\xD1\x9F", "\xD0\x90" => "\xD0\xB0",
0374              "\xD0\x91" => "\xD0\xB1", "\xD0\x92" => "\xD0\xB2", "\xD0\x93" => "\xD0\xB3", "\xD0\x94" => "\xD0\xB4",
0375              "\xD0\x95" => "\xD0\xB5", "\xD0\x96" => "\xD0\xB6", "\xD0\x97" => "\xD0\xB7", "\xD0\x98" => "\xD0\xB8",
0376              "\xD0\x99" => "\xD0\xB9", "\xD0\x9A" => "\xD0\xBA", "\xD0\x9B" => "\xD0\xBB", "\xD0\x9C" => "\xD0\xBC",
0377              "\xD0\x9D" => "\xD0\xBD", "\xD0\x9E" => "\xD0\xBE", "\xD0\x9F" => "\xD0\xBF", "\xD0\xA0" => "\xD1\x80",
0378              "\xD0\xA1" => "\xD1\x81", "\xD0\xA2" => "\xD1\x82", "\xD0\xA3" => "\xD1\x83", "\xD0\xA4" => "\xD1\x84",
0379              "\xD0\xA5" => "\xD1\x85", "\xD0\xA6" => "\xD1\x86", "\xD0\xA7" => "\xD1\x87", "\xD0\xA8" => "\xD1\x88",
0380              "\xD0\xA9" => "\xD1\x89", "\xD0\xAA" => "\xD1\x8A", "\xD0\xAB" => "\xD1\x8B", "\xD0\xAC" => "\xD1\x8C",
0381              "\xD0\xAD" => "\xD1\x8D", "\xD0\xAE" => "\xD1\x8E", "\xD0\xAF" => "\xD1\x8F", "\xD2\x90" => "\xD2\x91",
0382              "\xE1\xB8\x82" => "\xE1\xB8\x83", "\xE1\xB8\x8A" => "\xE1\xB8\x8B", "\xE1\xB8\x9E" => "\xE1\xB8\x9F", "\xE1\xB9\x80" => "\xE1\xB9\x81",
0383              "\xE1\xB9\x96" => "\xE1\xB9\x97", "\xE1\xB9\xA0" => "\xE1\xB9\xA1", "\xE1\xB9\xAA" => "\xE1\xB9\xAB", "\xE1\xBA\x80" => "\xE1\xBA\x81",
0384              "\xE1\xBA\x82" => "\xE1\xBA\x83", "\xE1\xBA\x84" => "\xE1\xBA\x85", "\xE1\xBB\xB2" => "\xE1\xBB\xB3"
0385          );
0386   
0387          return strtr(strtolower($string), $utf8_upper_to_lower);
0388      }
0389   
0390      /**
0391      * UTF-8 aware alternative to strtoupper
0392      * Make a string uppercase
0393      * Note: The concept of a characters "case" only exists is some alphabets
0394      * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
0395      * not exist in the Chinese alphabet, for example. See Unicode Standard
0396      * Annex #21: Case Mappings
0397      *
0398      * @param string
0399      * @return string string in uppercase
0400      */
0401      function utf8_strtoupper($string)
0402      {
0403          static $utf8_lower_to_upper = array(
0404              "\xC3\xA0" => "\xC3\x80", "\xC3\xA1" => "\xC3\x81",
0405              "\xC3\xA2" => "\xC3\x82", "\xC3\xA3" => "\xC3\x83", "\xC3\xA4" => "\xC3\x84", "\xC3\xA5" => "\xC3\x85",
0406              "\xC3\xA6" => "\xC3\x86", "\xC3\xA7" => "\xC3\x87", "\xC3\xA8" => "\xC3\x88", "\xC3\xA9" => "\xC3\x89",
0407              "\xC3\xAA" => "\xC3\x8A", "\xC3\xAB" => "\xC3\x8B", "\xC3\xAC" => "\xC3\x8C", "\xC3\xAD" => "\xC3\x8D",
0408              "\xC3\xAE" => "\xC3\x8E", "\xC3\xAF" => "\xC3\x8F", "\xC3\xB0" => "\xC3\x90", "\xC3\xB1" => "\xC3\x91",
0409              "\xC3\xB2" => "\xC3\x92", "\xC3\xB3" => "\xC3\x93", "\xC3\xB4" => "\xC3\x94", "\xC3\xB5" => "\xC3\x95",
0410              "\xC3\xB6" => "\xC3\x96", "\xC3\xB8" => "\xC3\x98", "\xC3\xB9" => "\xC3\x99", "\xC3\xBA" => "\xC3\x9A",
0411              "\xC3\xBB" => "\xC3\x9B", "\xC3\xBC" => "\xC3\x9C", "\xC3\xBD" => "\xC3\x9D", "\xC3\xBE" => "\xC3\x9E",
0412              "\xC3\xBF" => "\xC5\xB8", "\xC4\x81" => "\xC4\x80", "\xC4\x83" => "\xC4\x82", "\xC4\x85" => "\xC4\x84",
0413              "\xC4\x87" => "\xC4\x86", "\xC4\x89" => "\xC4\x88", "\xC4\x8B" => "\xC4\x8A", "\xC4\x8D" => "\xC4\x8C",
0414              "\xC4\x8F" => "\xC4\x8E", "\xC4\x91" => "\xC4\x90", "\xC4\x93" => "\xC4\x92", "\xC4\x97" => "\xC4\x96",
0415              "\xC4\x99" => "\xC4\x98", "\xC4\x9B" => "\xC4\x9A", "\xC4\x9D" => "\xC4\x9C", "\xC4\x9F" => "\xC4\x9E",
0416              "\xC4\xA1" => "\xC4\xA0", "\xC4\xA3" => "\xC4\xA2", "\xC4\xA5" => "\xC4\xA4", "\xC4\xA7" => "\xC4\xA6",
0417              "\xC4\xA9" => "\xC4\xA8", "\xC4\xAB" => "\xC4\xAA", "\xC4\xAF" => "\xC4\xAE", "\xC4\xB5" => "\xC4\xB4",
0418              "\xC4\xB7" => "\xC4\xB6", "\xC4\xBA" => "\xC4\xB9", "\xC4\xBC" => "\xC4\xBB", "\xC4\xBE" => "\xC4\xBD",
0419              "\xC5\x82" => "\xC5\x81", "\xC5\x84" => "\xC5\x83", "\xC5\x86" => "\xC5\x85", "\xC5\x88" => "\xC5\x87",
0420              "\xC5\x8B" => "\xC5\x8A", "\xC5\x8D" => "\xC5\x8C", "\xC5\x91" => "\xC5\x90", "\xC5\x95" => "\xC5\x94",
0421              "\xC5\x97" => "\xC5\x96", "\xC5\x99" => "\xC5\x98", "\xC5\x9B" => "\xC5\x9A", "\xC5\x9D" => "\xC5\x9C",
0422              "\xC5\x9F" => "\xC5\x9E", "\xC5\xA1" => "\xC5\xA0", "\xC5\xA3" => "\xC5\xA2", "\xC5\xA5" => "\xC5\xA4",
0423              "\xC5\xA7" => "\xC5\xA6", "\xC5\xA9" => "\xC5\xA8", "\xC5\xAB" => "\xC5\xAA", "\xC5\xAD" => "\xC5\xAC",
0424              "\xC5\xAF" => "\xC5\xAE", "\xC5\xB1" => "\xC5\xB0", "\xC5\xB3" => "\xC5\xB2", "\xC5\xB5" => "\xC5\xB4",
0425              "\xC5\xB7" => "\xC5\xB6", "\xC5\xBA" => "\xC5\xB9", "\xC5\xBC" => "\xC5\xBB", "\xC5\xBE" => "\xC5\xBD",
0426              "\xC6\xA1" => "\xC6\xA0", "\xC6\xB0" => "\xC6\xAF", "\xC8\x99" => "\xC8\x98", "\xC8\x9B" => "\xC8\x9A",
0427              "\xCE\xAC" => "\xCE\x86", "\xCE\xAD" => "\xCE\x88", "\xCE\xAE" => "\xCE\x89", "\xCE\xAF" => "\xCE\x8A",
0428              "\xCE\xB1" => "\xCE\x91", "\xCE\xB2" => "\xCE\x92", "\xCE\xB3" => "\xCE\x93", "\xCE\xB4" => "\xCE\x94",
0429              "\xCE\xB5" => "\xCE\x95", "\xCE\xB6" => "\xCE\x96", "\xCE\xB7" => "\xCE\x97", "\xCE\xB8" => "\xCE\x98",
0430              "\xCE\xB9" => "\xCE\x99", "\xCE\xBA" => "\xCE\x9A", "\xCE\xBB" => "\xCE\x9B", "\xCE\xBC" => "\xCE\x9C",
0431              "\xCE\xBD" => "\xCE\x9D", "\xCE\xBE" => "\xCE\x9E", "\xCE\xBF" => "\xCE\x9F", "\xCF\x80" => "\xCE\xA0",
0432              "\xCF\x81" => "\xCE\xA1", "\xCF\x83" => "\xCE\xA3", "\xCF\x84" => "\xCE\xA4", "\xCF\x85" => "\xCE\xA5",
0433              "\xCF\x86" => "\xCE\xA6", "\xCF\x87" => "\xCE\xA7", "\xCF\x88" => "\xCE\xA8", "\xCF\x89" => "\xCE\xA9",
0434              "\xCF\x8A" => "\xCE\xAA", "\xCF\x8B" => "\xCE\xAB", "\xCF\x8C" => "\xCE\x8C", "\xCF\x8D" => "\xCE\x8E",
0435              "\xCF\x8E" => "\xCE\x8F", "\xD0\xB0" => "\xD0\x90", "\xD0\xB1" => "\xD0\x91", "\xD0\xB2" => "\xD0\x92",
0436              "\xD0\xB3" => "\xD0\x93", "\xD0\xB4" => "\xD0\x94", "\xD0\xB5" => "\xD0\x95", "\xD0\xB6" => "\xD0\x96",
0437              "\xD0\xB7" => "\xD0\x97", "\xD0\xB8" => "\xD0\x98", "\xD0\xB9" => "\xD0\x99", "\xD0\xBA" => "\xD0\x9A",
0438              "\xD0\xBB" => "\xD0\x9B", "\xD0\xBC" => "\xD0\x9C", "\xD0\xBD" => "\xD0\x9D", "\xD0\xBE" => "\xD0\x9E",
0439              "\xD0\xBF" => "\xD0\x9F", "\xD1\x80" => "\xD0\xA0", "\xD1\x81" => "\xD0\xA1", "\xD1\x82" => "\xD0\xA2",
0440              "\xD1\x83" => "\xD0\xA3", "\xD1\x84" => "\xD0\xA4", "\xD1\x85" => "\xD0\xA5", "\xD1\x86" => "\xD0\xA6",
0441              "\xD1\x87" => "\xD0\xA7", "\xD1\x88" => "\xD0\xA8", "\xD1\x89" => "\xD0\xA9", "\xD1\x8A" => "\xD0\xAA",
0442              "\xD1\x8B" => "\xD0\xAB", "\xD1\x8C" => "\xD0\xAC", "\xD1\x8D" => "\xD0\xAD", "\xD1\x8E" => "\xD0\xAE",
0443              "\xD1\x8F" => "\xD0\xAF", "\xD1\x91" => "\xD0\x81", "\xD1\x92" => "\xD0\x82", "\xD1\x93" => "\xD0\x83",
0444              "\xD1\x94" => "\xD0\x84", "\xD1\x95" => "\xD0\x85", "\xD1\x96" => "\xD0\x86", "\xD1\x97" => "\xD0\x87",
0445              "\xD1\x98" => "\xD0\x88", "\xD1\x99" => "\xD0\x89", "\xD1\x9A" => "\xD0\x8A", "\xD1\x9B" => "\xD0\x8B",
0446              "\xD1\x9C" => "\xD0\x8C", "\xD1\x9E" => "\xD0\x8E", "\xD1\x9F" => "\xD0\x8F", "\xD2\x91" => "\xD2\x90",
0447              "\xE1\xB8\x83" => "\xE1\xB8\x82", "\xE1\xB8\x8B" => "\xE1\xB8\x8A", "\xE1\xB8\x9F" => "\xE1\xB8\x9E", "\xE1\xB9\x81" => "\xE1\xB9\x80",
0448              "\xE1\xB9\x97" => "\xE1\xB9\x96", "\xE1\xB9\xA1" => "\xE1\xB9\xA0", "\xE1\xB9\xAB" => "\xE1\xB9\xAA", "\xE1\xBA\x81" => "\xE1\xBA\x80",
0449              "\xE1\xBA\x83" => "\xE1\xBA\x82", "\xE1\xBA\x85" => "\xE1\xBA\x84", "\xE1\xBB\xB3" => "\xE1\xBB\xB2"
0450          );
0451   
0452          return strtr(strtoupper($string), $utf8_lower_to_upper);
0453      }
0454   
0455      /**
0456      * UTF-8 aware alternative to substr
0457      * Return part of a string given character offset (and optionally length)
0458      *
0459      * Note arguments: comparied to substr - if offset or length are
0460      * not integers, this version will not complain but rather massages them
0461      * into an integer.
0462      *
0463      * Note on returned values: substr documentation states false can be
0464      * returned in some cases (e.g. offset > string length)
0465      * mb_substr never returns false, it will return an empty string instead.
0466      * This adopts the mb_substr approach
0467      *
0468      * Note on implementation: PCRE only supports repetitions of less than
0469      * 65536, in order to accept up to MAXINT values for offset and length,
0470      * we'll repeat a group of 65535 characters when needed.
0471      *
0472      * Note on implementation: calculating the number of characters in the
0473      * string is a relatively expensive operation, so we only carry it out when
0474      * necessary. It isn't necessary for +ve offsets and no specified length
0475      *
0476      * @author Chris Smith<chris@jalakai.co.uk>
0477      * @param string $str
0478      * @param integer $offset number of UTF-8 characters offset (from left)
0479      * @param integer $length (optional) length in UTF-8 characters from offset
0480      * @return mixed string or FALSE if failure
0481      */
0482      function utf8_substr($str, $offset, $length = NULL)
0483      {
0484          // generates E_NOTICE
0485          // for PHP4 objects, but not PHP5 objects
0486          $str = (string) $str;
0487          $offset = (int) $offset;
0488          if (!is_null($length))
0489          {
0490              $length = (int) $length;
0491          }
0492   
0493          // handle trivial cases
0494          if ($length === 0 || ($offset < 0 && $length < 0 && $length < $offset))
0495          {
0496              return '';
0497          }
0498   
0499          // normalise negative offsets (we could use a tail
0500          // anchored pattern, but they are horribly slow!)
0501          if ($offset < 0)
0502          {
0503              // see notes
0504              $strlen = utf8_strlen($str);
0505              $offset = $strlen + $offset;
0506              if ($offset < 0)
0507              {
0508                  $offset = 0;
0509              }
0510          }
0511   
0512          $op = '';
0513          $lp = '';
0514   
0515          // establish a pattern for offset, a
0516          // non-captured group equal in length to offset
0517          if ($offset > 0)
0518          {
0519              $ox = (int) ($offset / 65535);
0520              $oy = $offset % 65535;
0521   
0522              if ($ox)
0523              {
0524                  $op = '(?:.{65535}){' . $ox . '}';
0525              }
0526   
0527              $op = '^(?:' . $op . '.{' . $oy . '})';
0528          }
0529          else
0530          {    
0531              // offset == 0; just anchor the pattern
0532              $op = '^';
0533          }
0534   
0535          // establish a pattern for length
0536          if (is_null($length))
0537          {
0538              // the rest of the string
0539              $lp = '(.*)$';
0540          }
0541          else
0542          {
0543              if (!isset($strlen))
0544              {
0545                  // see notes
0546                  $strlen = utf8_strlen($str);
0547              }
0548   
0549              // another trivial case
0550              if ($offset > $strlen)
0551              {
0552                  return '';
0553              }
0554   
0555              if ($length > 0)
0556              {
0557                  // reduce any length that would
0558                  // go passed the end of the string
0559                  $length = min($strlen - $offset, $length);
0560   
0561                  $lx = (int) ($length / 65535);
0562                  $ly = $length % 65535;
0563                  
0564                  // negative length requires a captured group
0565                  // of length characters
0566                  if ($lx)
0567                  {
0568                      $lp = '(?:.{65535}){' . $lx . '}';
0569                  }
0570                  $lp = '(' . $lp . '.{'. $ly . '})';
0571              }
0572              else if ($length < 0)
0573              {
0574                  if ($length < ($offset - $strlen))
0575                  {
0576                      return '';
0577                  }
0578   
0579                  $lx = (int)((-$length) / 65535);
0580                  $ly = (-$length) % 65535;
0581   
0582                  // negative length requires ... capture everything
0583                  // except a group of  -length characters
0584                  // anchored at the tail-end of the string
0585                  if ($lx)
0586                  {
0587                      $lp = '(?:.{65535}){' . $lx . '}';
0588                  }
0589                  $lp = '(.*)(?:' . $lp . '.{' . $ly . '})$';
0590              }
0591          }
0592   
0593          if (!preg_match('#' . $op . $lp . '#us', $str, $match))
0594          {
0595              return '';
0596          }
0597   
0598          return $match[1];
0599      }
0600   
0601      /**
0602      * Return the length (in characters) of a UTF-8 string
0603      *
0604      * @param    string    $text        UTF-8 string
0605      * @return    integer                Length (in chars) of given string
0606      */
0607      function utf8_strlen($text)
0608      {
0609          // Since utf8_decode is replacing multibyte characters to ? strlen works fine
0610          return strlen(utf8_decode($text));
0611      }
0612  }
0613   
0614  /**
0615  * UTF-8 aware alternative to str_split
0616  * Convert a string to an array
0617  *
0618  * @author Harry Fuecks
0619  * @param string $str UTF-8 encoded
0620  * @param int $split_len number to characters to split string by
0621  * @return array characters in string reverses
0622  */
0623  function utf8_str_split($str, $split_len = 1)
0624  {
0625      if (!is_int($split_len) || $split_len < 1)
0626      {
0627          return false;
0628      }
0629   
0630      $len = utf8_strlen($str);
0631      if ($len <= $split_len)
0632      {
0633          return array($str);
0634      }
0635      
0636      preg_match_all('/.{' . $split_len . '}|[^\x00]{1,' . $split_len . '}$/us', $str, $ar);
0637      return $ar[0];
0638  }
0639   
0640  /**
0641  * UTF-8 aware alternative to strspn
0642  * Find length of initial segment matching the mask
0643  *
0644  * @author Harry Fuecks
0645  */
0646  function utf8_strspn($str, $mask, $start = null, $length = null)
0647  {
0648      if ($start !== null || $length !== null)
0649      {
0650          $str = utf8_substr($str, $start, $length);
0651      }
0652   
0653      preg_match('/^[' . $mask . ']+/u', $str, $matches);
0654   
0655      if (isset($matches[0]))
0656      {
0657          return utf8_strlen($matches[0]);
0658      }
0659   
0660      return 0;
0661  }
0662   
0663  /**
0664  * UTF-8 aware alternative to ucfirst
0665  * Make a string's first character uppercase
0666  *
0667  * @author Harry Fuecks
0668  * @param string
0669  * @return string with first character as upper case (if applicable)
0670  */
0671  function utf8_ucfirst($str)
0672  {
0673      switch (utf8_strlen($str))
0674      {
0675          case 0:
0676              return '';
0677          break;
0678   
0679          case 1:
0680              return utf8_strtoupper($str);
0681          break;
0682   
0683          default:
0684              preg_match('/^(.{1})(.*)$/us', $str, $matches);
0685              return utf8_strtoupper($matches[1]) . $matches[2];
0686          break;
0687      }
0688  }
0689   
0690  /**
0691  * Recode a string to UTF-8
0692  *
0693  * If the encoding is not supported, the string is returned as-is
0694  *
0695  * @param    string    $string        Original string
0696  * @param    string    $encoding    Original encoding (lowered)
0697  * @return    string                The string, encoded in UTF-8
0698  */
0699  function utf8_recode($string, $encoding)
0700  {
0701      $encoding = strtolower($encoding);
0702   
0703      if ($encoding == 'utf-8' || !is_string($string) || empty($string))
0704      {
0705          return $string;
0706      }
0707   
0708      // we force iso-8859-1 to be cp1252
0709      if ($encoding == 'iso-8859-1')
0710      {
0711          $encoding = 'cp1252';
0712      }
0713      // convert iso-8859-8-i to iso-8859-8
0714      else if ($encoding == 'iso-8859-8-i')
0715      {
0716          $encoding = 'iso-8859-8';
0717          $string = hebrev($string);
0718      }
0719   
0720      // First, try iconv()
0721      if (function_exists('iconv'))
0722      {
0723          $ret = @iconv($encoding, 'utf-8', $string);
0724   
0725          if (!empty($ret))
0726          {
0727              return $ret;
0728          }
0729      }
0730   
0731      // Try the mb_string extension
0732      if (function_exists('mb_convert_encoding'))
0733      {
0734          // mbstring is nasty on PHP4, we must make *sure* that we send a good encoding
0735          switch ($encoding)
0736          {
0737              case 'iso-8859-1':
0738              case 'iso-8859-2':
0739              case 'iso-8859-4':
0740              case 'iso-8859-7':
0741              case 'iso-8859-9':
0742              case 'iso-8859-15':
0743              case 'windows-1251':
0744              case 'windows-1252':
0745              case 'cp1252':
0746              case 'shift_jis':
0747              case 'euc-kr':
0748              case 'big5':
0749              case 'gb2312':
0750                  $ret = @mb_convert_encoding($string, 'utf-8', $encoding);
0751   
0752                  if (!empty($ret))
0753                  {
0754                      return $ret;
0755                  }
0756          }
0757      }
0758   
0759      // Try the recode extension
0760      if (function_exists('recode_string'))
0761      {
0762          $ret = @recode_string($encoding . '..utf-8', $string);
0763   
0764          if (!empty($ret))
0765          {
0766              return $ret;
0767          }
0768      }
0769   
0770      // If nothing works, check if we have a custom transcoder available
0771      if (!preg_match('#^[a-z0-9_ \\-]+$#', $encoding))
0772      {
0773          // Make sure the encoding name is alphanumeric, we don't want it to be abused into loading arbitrary files
0774          trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
0775      }
0776   
0777      global $phpbb_root_path, $phpEx;
0778   
0779      // iso-8859-* character encoding
0780      if (preg_match('/iso[_ -]?8859[_ -]?(\\d+)/', $encoding, $array))
0781      {
0782          switch ($array[1])
0783          {
0784              case '1':
0785              case '2':
0786              case '4':
0787              case '7':
0788              case '8':
0789              case '9':
0790              case '15':
0791                  if (!function_exists('iso_8859_' . $array[1]))
0792                  {
0793                      if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
0794                      {
0795                          trigger_error('Basic reencoder file is missing', E_USER_ERROR);
0796                      }
0797                      include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
0798                  }
0799                  return call_user_func('iso_8859_' . $array[1], $string);
0800              break;
0801   
0802              default:
0803                  trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
0804              break;
0805          }
0806      }
0807   
0808      // CP/WIN character encoding
0809      if (preg_match('/(?:cp|windows)[_\- ]?(\\d+)/', $encoding, $array))
0810      {
0811          switch ($array[1])
0812          {
0813              case '932':
0814              break;
0815              case '1250':
0816              case '1251':
0817              case '1252':
0818              case '1254':
0819              case '1255':
0820              case '1256':
0821              case '1257':
0822              case '874':
0823                  if (!function_exists('cp' . $array[1]))
0824                  {
0825                      if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
0826                      {
0827                          trigger_error('Basic reencoder file is missing', E_USER_ERROR);
0828                      }
0829                      include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
0830                  }
0831                  return call_user_func('cp' . $array[1], $string);
0832              break;
0833   
0834              default:
0835                  trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
0836              break;
0837          }
0838      }
0839   
0840      // TIS-620
0841      if (preg_match('/tis[_ -]?620/', $encoding))
0842      {
0843          if (!function_exists('tis_620'))
0844          {
0845              if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
0846              {
0847                  trigger_error('Basic reencoder file is missing', E_USER_ERROR);
0848              }
0849              include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
0850          }
0851          return tis_620($string);
0852      }
0853   
0854      // SJIS
0855      if (preg_match('/sjis(?:[_ -]?win)?|(?:cp|ibm)[_ -]?932|shift[_ -]?jis/', $encoding))
0856      {
0857          if (!function_exists('sjis'))
0858          {
0859              if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
0860              {
0861                  trigger_error('CJK reencoder file is missing', E_USER_ERROR);
0862              }
0863              include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
0864          }
0865          return sjis($string);
0866      }
0867   
0868      // EUC_KR
0869      if (preg_match('/euc[_ -]?kr/', $encoding))
0870      {
0871          if (!function_exists('euc_kr'))
0872          {
0873              if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
0874              {
0875                  trigger_error('CJK reencoder file is missing', E_USER_ERROR);
0876              }
0877              include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
0878          }
0879          return euc_kr($string);
0880      }
0881   
0882      // BIG-5
0883      if (preg_match('/big[_ -]?5/', $encoding))
0884      {
0885          if (!function_exists('big5'))
0886          {
0887              if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
0888              {
0889                  trigger_error('CJK reencoder file is missing', E_USER_ERROR);
0890              }
0891              include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
0892          }
0893          return big5($string);
0894      }
0895   
0896      // GB2312
0897      if (preg_match('/gb[_ -]?2312/', $encoding))
0898      {
0899          if (!function_exists('gb2312'))
0900          {
0901              if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
0902              {
0903                  trigger_error('CJK reencoder file is missing', E_USER_ERROR);
0904              }
0905              include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
0906          }
0907          return gb2312($string);
0908      }
0909   
0910      // Trigger an error?! Fow now just give bad data :-(
0911      trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
0912      //return $string; // use utf_normalizer::cleanup() ?
0913  }
0914   
0915  /**
0916  * Replace all UTF-8 chars that are not in ASCII with their NCR
0917  *
0918  * @param    string    $text        UTF-8 string in NFC
0919  * @return    string                ASCII string using NCRs for non-ASCII chars
0920  */
0921  function utf8_encode_ncr($text)
0922  {
0923      return preg_replace_callback('#[\\xC2-\\xF4][\\x80-\\xBF]{1,3}#', 'utf8_encode_ncr_callback', $text);
0924  }
0925   
0926  /**
0927  * Callback used in encode_ncr()
0928  *
0929  * Takes a UTF-8 char and replaces it with its NCR. Attention, $m is an array
0930  *
0931  * @param    array    $m            0-based numerically indexed array passed by preg_replace_callback()
0932  * @return    string                A HTML NCR if the character is valid, or the original string otherwise
0933  */
0934  function utf8_encode_ncr_callback($m)
0935  {
0936      return '&#' . utf8_ord($m[0]) . ';';
0937  }
0938   
0939  /**
0940  * Converts a UTF-8 char to an NCR
0941  *
0942  * @param string $chr UTF-8 char
0943  * @return integer UNICODE code point
0944  */
0945  function utf8_ord($chr)
0946  {
0947      switch (strlen($chr))
0948      {
0949          case 1:
0950              return ord($chr);
0951          break;
0952   
0953          case 2:
0954              return ((ord($chr[0]) & 0x1F) << 6) | (ord($chr[1]) & 0x3F);
0955          break;
0956   
0957          case 3:
0958              return ((ord($chr[0]) & 0x0F) << 12) | ((ord($chr[1]) & 0x3F) << 6) | (ord($chr[2]) & 0x3F);
0959          break;
0960   
0961          case 4:
0962              return ((ord($chr[0]) & 0x07) << 18) | ((ord($chr[1]) & 0x3F) << 12) | ((ord($chr[2]) & 0x3F) << 6) | (ord($chr[3]) & 0x3F);
0963          break;
0964   
0965          default:
0966              return $chr;
0967      }
0968  }
0969   
0970  /**
0971  * Converts an NCR to a UTF-8 char
0972  *
0973  * @param    int        $cp    UNICODE code point
0974  * @return    string        UTF-8 char
0975  */
0976  function utf8_chr($cp)
0977  {
0978      if ($cp > 0xFFFF)
0979      {
0980          return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
0981      }
0982      else if ($cp > 0x7FF)
0983      {
0984          return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
0985      }
0986      else if ($cp > 0x7F)
0987      {
0988          return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F));
0989      }
0990      else
0991      {
0992          return chr($cp);
0993      }
0994  }
0995   
0996  /**
0997  * Convert Numeric Character References to UTF-8 chars
0998  *
0999  * Notes:
1000  *    - we do not convert NCRs recursively, if you pass &#38;#38; it will return &#38;
1001  *    - we DO NOT check for the existence of the Unicode characters, therefore an entity may be converted to an inexistent codepoint
1002  *
1003  * @param    string    $text        String to convert, encoded in UTF-8 (no normal form required)
1004  * @return    string                UTF-8 string where NCRs have been replaced with the actual chars
1005  */
1006  function utf8_decode_ncr($text)
1007  {
1008      return preg_replace_callback('/&#([0-9]{1,6}|x[0-9A-F]{1,5});/i', 'utf8_decode_ncr_callback', $text);
1009  }
1010   
1011  /**
1012  * Callback used in decode_ncr()
1013  *
1014  * Takes a NCR (in decimal or hexadecimal) and returns a UTF-8 char. Attention, $m is an array.
1015  * It will ignore most of invalid NCRs, but not all!
1016  *
1017  * @param    array    $m            0-based numerically indexed array passed by preg_replace_callback()
1018  * @return    string                UTF-8 char
1019  */
1020  function utf8_decode_ncr_callback($m)
1021  {
1022      $cp = (strncasecmp($m[1], 'x', 1)) ? $m[1] : hexdec(substr($m[1], 1));
1023   
1024      return utf8_chr($cp);
1025  }
1026   
1027  /**
1028  * Case folds a unicode string as per Unicode 5.0, section 3.13
1029  *
1030  * @param    string    $text    text to be case folded
1031  * @param    string    $option    determines how we will fold the cases
1032  * @return    string            case folded text
1033  */
1034  function utf8_case_fold($text, $option = 'full')
1035  {
1036      static $uniarray = array();
1037      global $phpbb_root_path, $phpEx;
1038   
1039      // common is always set
1040      if (!isset($uniarray['c']))
1041      {
1042          $uniarray['c'] = include($phpbb_root_path . 'includes/utf/data/case_fold_c.' . $phpEx);
1043      }
1044   
1045      // only set full if we need to
1046      if ($option === 'full' && !isset($uniarray['f']))
1047      {
1048          $uniarray['f'] = include($phpbb_root_path . 'includes/utf/data/case_fold_f.' . $phpEx);
1049      }
1050   
1051      // only set simple if we need to
1052      if ($option !== 'full' && !isset($uniarray['s']))
1053      {
1054          $uniarray['s'] = include($phpbb_root_path . 'includes/utf/data/case_fold_s.' . $phpEx);
1055      }
1056   
1057      // common is always replaced
1058      $text = strtr($text, $uniarray['c']);
1059   
1060      if ($option === 'full')
1061      {
1062          // full replaces a character with multiple characters
1063          $text = strtr($text, $uniarray['f']);
1064      }
1065      else
1066      {
1067          // simple replaces a character with another character
1068          $text = strtr($text, $uniarray['s']);
1069      }
1070   
1071      return $text;
1072  }
1073   
1074  /**
1075  * Takes the input and does a "special" case fold. It does minor normalization
1076  * and returns NFKC compatable text
1077  *
1078  * @param    string    $text    text to be case folded
1079  * @param    string    $option    determines how we will fold the cases
1080  * @return    string            case folded text
1081  */
1082  function utf8_case_fold_nfkc($text, $option = 'full')
1083  {
1084      static $fc_nfkc_closure = array(
1085          "\xCD\xBA"    => "\x20\xCE\xB9",
1086          "\xCF\x92"    => "\xCF\x85",
1087          "\xCF\x93"    => "\xCF\x8D",
1088          "\xCF\x94"    => "\xCF\x8B",
1089          "\xCF\xB2"    => "\xCF\x83",
1090          "\xCF\xB9"    => "\xCF\x83",
1091          "\xE1\xB4\xAC"    => "\x61",
1092          "\xE1\xB4\xAD"    => "\xC3\xA6",
1093          "\xE1\xB4\xAE"    => "\x62",
1094          "\xE1\xB4\xB0"    => "\x64",
1095          "\xE1\xB4\xB1"    => "\x65",
1096          "\xE1\xB4\xB2"    => "\xC7\x9D",
1097          "\xE1\xB4\xB3"    => "\x67",
1098          "\xE1\xB4\xB4"    => "\x68",
1099          "\xE1\xB4\xB5"    => "\x69",
1100          "\xE1\xB4\xB6"    => "\x6A",
1101          "\xE1\xB4\xB7"    => "\x6B",
1102          "\xE1\xB4\xB8"    => "\x6C",
1103          "\xE1\xB4\xB9"    => "\x6D",
1104          "\xE1\xB4\xBA"    => "\x6E",
1105          "\xE1\xB4\xBC"    => "\x6F",
1106          "\xE1\xB4\xBD"    => "\xC8\xA3",
1107          "\xE1\xB4\xBE"    => "\x70",
1108          "\xE1\xB4\xBF"    => "\x72",
1109          "\xE1\xB5\x80"    => "\x74",
1110          "\xE1\xB5\x81"    => "\x75",
1111          "\xE1\xB5\x82"    => "\x77",
1112          "\xE2\x82\xA8"    => "\x72\x73",
1113          "\xE2\x84\x82"    => "\x63",
1114          "\xE2\x84\x83"    => "\xC2\xB0\x63",
1115          "\xE2\x84\x87"    => "\xC9\x9B",
1116          "\xE2\x84\x89"    => "\xC2\xB0\x66",
1117          "\xE2\x84\x8B"    => "\x68",
1118          "\xE2\x84\x8C"    => "\x68",
1119          "\xE2\x84\x8D"    => "\x68",
1120          "\xE2\x84\x90"    => "\x69",
1121          "\xE2\x84\x91"    => "\x69",
1122          "\xE2\x84\x92"    => "\x6C",
1123          "\xE2\x84\x95"    => "\x6E",
1124          "\xE2\x84\x96"    => "\x6E\x6F",
1125          "\xE2\x84\x99"    => "\x70",
1126          "\xE2\x84\x9A"    => "\x71",
1127          "\xE2\x84\x9B"    => "\x72",
1128          "\xE2\x84\x9C"    => "\x72",
1129          "\xE2\x84\x9D"    => "\x72",
1130          "\xE2\x84\xA0"    => "\x73\x6D",
1131          "\xE2\x84\xA1"    => "\x74\x65\x6C",
1132          "\xE2\x84\xA2"    => "\x74\x6D",
1133          "\xE2\x84\xA4"    => "\x7A",
1134          "\xE2\x84\xA8"    => "\x7A",
1135          "\xE2\x84\xAC"    => "\x62",
1136          "\xE2\x84\xAD"    => "\x63",
1137          "\xE2\x84\xB0"    => "\x65",
1138          "\xE2\x84\xB1"    => "\x66",
1139          "\xE2\x84\xB3"    => "\x6D",
1140          "\xE2\x84\xBB"    => "\x66\x61\x78",
1141          "\xE2\x84\xBE"    => "\xCE\xB3",
1142          "\xE2\x84\xBF"    => "\xCF\x80",
1143          "\xE2\x85\x85"    => "\x64",
1144          "\xE3\x89\x90"    => "\x70\x74\x65",
1145          "\xE3\x8B\x8C"    => "\x68\x67",
1146          "\xE3\x8B\x8E"    => "\x65\x76",
1147          "\xE3\x8B\x8F"    => "\x6C\x74\x64",
1148          "\xE3\x8D\xB1"    => "\x68\x70\x61",
1149          "\xE3\x8D\xB3"    => "\x61\x75",
1150          "\xE3\x8D\xB5"    => "\x6F\x76",
1151          "\xE3\x8D\xBA"    => "\x69\x75",
1152          "\xE3\x8E\x80"    => "\x70\x61",
1153          "\xE3\x8E\x81"    => "\x6E\x61",
1154          "\xE3\x8E\x82"    => "\xCE\xBC\x61",
1155          "\xE3\x8E\x83"    => "\x6D\x61",
1156          "\xE3\x8E\x84"    => "\x6B\x61",
1157          "\xE3\x8E\x85"    => "\x6B\x62",
1158          "\xE3\x8E\x86"    => "\x6D\x62",
1159          "\xE3\x8E\x87"    => "\x67\x62",
1160          "\xE3\x8E\x8A"    => "\x70\x66",
1161          "\xE3\x8E\x8B"    => "\x6E\x66",
1162          "\xE3\x8E\x8C"    => "\xCE\xBC\x66",
1163          "\xE3\x8E\x90"    => "\x68\x7A",
1164          "\xE3\x8E\x91"    => "\x6B\x68\x7A",
1165          "\xE3\x8E\x92"    => "\x6D\x68\x7A",
1166          "\xE3\x8E\x93"    => "\x67\x68\x7A",
1167          "\xE3\x8E\x94"    => "\x74\x68\x7A",
1168          "\xE3\x8E\xA9"    => "\x70\x61",
1169          "\xE3\x8E\xAA"    => "\x6B\x70\x61",
1170          "\xE3\x8E\xAB"    => "\x6D\x70\x61",
1171          "\xE3\x8E\xAC"    => "\x67\x70\x61",
1172          "\xE3\x8E\xB4"    => "\x70\x76",
1173          "\xE3\x8E\xB5"    => "\x6E\x76",
1174          "\xE3\x8E\xB6"    => "\xCE\xBC\x76",
1175          "\xE3\x8E\xB7"    => "\x6D\x76",
1176          "\xE3\x8E\xB8"    => "\x6B\x76",
1177          "\xE3\x8E\xB9"    => "\x6D\x76",
1178          "\xE3\x8E\xBA"    => "\x70\x77",
1179          "\xE3\x8E\xBB"    => "\x6E\x77",
1180          "\xE3\x8E\xBC"    => "\xCE\xBC\x77",
1181          "\xE3\x8E\xBD"    => "\x6D\x77",
1182          "\xE3\x8E\xBE"    => "\x6B\x77",
1183          "\xE3\x8E\xBF"    => "\x6D\x77",
1184          "\xE3\x8F\x80"    => "\x6B\xCF\x89",
1185          "\xE3\x8F\x81"    => "\x6D\xCF\x89",
1186          "\xE3\x8F\x83"    => "\x62\x71",
1187          "\xE3\x8F\x86"    => "\x63\xE2\x88\x95\x6B\x67",
1188          "\xE3\x8F\x87"    => "\x63\x6F\x2E",
1189          "\xE3\x8F\x88"    => "\x64\x62",
1190          "\xE3\x8F\x89"    => "\x67\x79",
1191          "\xE3\x8F\x8B"    => "\x68\x70",
1192          "\xE3\x8F\x8D"    => "\x6B\x6B",
1193          "\xE3\x8F\x8E"    => "\x6B\x6D",
1194          "\xE3\x8F\x97"    => "\x70\x68",
1195          "\xE3\x8F\x99"    => "\x70\x70\x6D",
1196          "\xE3\x8F\x9A"    => "\x70\x72",
1197          "\xE3\x8F\x9C"    => "\x73\x76",
1198          "\xE3\x8F\x9D"    => "\x77\x62",
1199          "\xE3\x8F\x9E"    => "\x76\xE2\x88\x95\x6D",
1200          "\xE3\x8F\x9F"    => "\x61\xE2\x88\x95\x6D",
1201          "\xF0\x9D\x90\x80"    => "\x61",
1202          "\xF0\x9D\x90\x81"    => "\x62",
1203          "\xF0\x9D\x90\x82"    => "\x63",
1204          "\xF0\x9D\x90\x83"    => "\x64",
1205          "\xF0\x9D\x90\x84"    => "\x65",
1206          "\xF0\x9D\x90\x85"    => "\x66",
1207          "\xF0\x9D\x90\x86"    => "\x67",
1208          "\xF0\x9D\x90\x87"    => "\x68",
1209          "\xF0\x9D\x90\x88"    => "\x69",
1210          "\xF0\x9D\x90\x89"    => "\x6A",
1211          "\xF0\x9D\x90\x8A"    => "\x6B",
1212          "\xF0\x9D\x90\x8B"    => "\x6C",
1213          "\xF0\x9D\x90\x8C"    => "\x6D",
1214          "\xF0\x9D\x90\x8D"    => "\x6E",
1215          "\xF0\x9D\x90\x8E"    => "\x6F",
1216          "\xF0\x9D\x90\x8F"    => "\x70",
1217          "\xF0\x9D\x90\x90"    => "\x71",
1218          "\xF0\x9D\x90\x91"    => "\x72",
1219          "\xF0\x9D\x90\x92"    => "\x73",
1220          "\xF0\x9D\x90\x93"    => "\x74",
1221          "\xF0\x9D\x90\x94"    => "\x75",
1222          "\xF0\x9D\x90\x95"    => "\x76",
1223          "\xF0\x9D\x90\x96"    => "\x77",
1224          "\xF0\x9D\x90\x97"    => "\x78",
1225          "\xF0\x9D\x90\x98"    => "\x79",
1226          "\xF0\x9D\x90\x99"    => "\x7A",
1227          "\xF0\x9D\x90\xB4"    => "\x61",
1228          "\xF0\x9D\x90\xB5"    => "\x62",
1229          "\xF0\x9D\x90\xB6"    => "\x63",
1230          "\xF0\x9D\x90\xB7"    => "\x64",
1231          "\xF0\x9D\x90\xB8"    => "\x65",
1232          "\xF0\x9D\x90\xB9"    => "\x66",
1233          "\xF0\x9D\x90\xBA"    => "\x67",
1234          "\xF0\x9D\x90\xBB"    => "\x68",
1235          "\xF0\x9D\x90\xBC"    => "\x69",
1236          "\xF0\x9D\x90\xBD"    => "\x6A",
1237          "\xF0\x9D\x90\xBE"    => "\x6B",
1238          "\xF0\x9D\x90\xBF"    => "\x6C",
1239          "\xF0\x9D\x91\x80"    => "\x6D",
1240          "\xF0\x9D\x91\x81"    => "\x6E",
1241          "\xF0\x9D\x91\x82"    => "\x6F",
1242          "\xF0\x9D\x91\x83"    => "\x70",
1243          "\xF0\x9D\x91\x84"    => "\x71",
1244          "\xF0\x9D\x91\x85"    => "\x72",
1245          "\xF0\x9D\x91\x86"    => "\x73",
1246          "\xF0\x9D\x91\x87"    => "\x74",
1247          "\xF0\x9D\x91\x88"    => "\x75",
1248          "\xF0\x9D\x91\x89"    => "\x76",
1249          "\xF0\x9D\x91\x8A"    => "\x77",
1250          "\xF0\x9D\x91\x8B"    => "\x78",
1251          "\xF0\x9D\x91\x8C"    => "\x79",
1252          "\xF0\x9D\x91\x8D"    => "\x7A",
1253          "\xF0\x9D\x91\xA8"    => "\x61",
1254          "\xF0\x9D\x91\xA9"    => "\x62",
1255          "\xF0\x9D\x91\xAA"    => "\x63",
1256          "\xF0\x9D\x91\xAB"    => "\x64",
1257          "\xF0\x9D\x91\xAC"    => "\x65",
1258          "\xF0\x9D\x91\xAD"    => "\x66",
1259          "\xF0\x9D\x91\xAE"    => "\x67",
1260          "\xF0\x9D\x91\xAF"    => "\x68",
1261          "\xF0\x9D\x91\xB0"    => "\x69",
1262          "\xF0\x9D\x91\xB1"    => "\x6A",
1263          "\xF0\x9D\x91\xB2"    => "\x6B",
1264          "\xF0\x9D\x91\xB3"    => "\x6C",
1265          "\xF0\x9D\x91\xB4"    => "\x6D",
1266          "\xF0\x9D\x91\xB5"    => "\x6E",
1267          "\xF0\x9D\x91\xB6"    => "\x6F",
1268          "\xF0\x9D\x91\xB7"    => "\x70",
1269          "\xF0\x9D\x91\xB8"    => "\x71",
1270          "\xF0\x9D\x91\xB9"    => "\x72",
1271          "\xF0\x9D\x91\xBA"    => "\x73",
1272          "\xF0\x9D\x91\xBB"    => "\x74",
1273          "\xF0\x9D\x91\xBC"    => "\x75",
1274          "\xF0\x9D\x91\xBD"    => "\x76",
1275          "\xF0\x9D\x91\xBE"    => "\x77",
1276          "\xF0\x9D\x91\xBF"    => "\x78",
1277          "\xF0\x9D\x92\x80"    => "\x79",
1278          "\xF0\x9D\x92\x81"    => "\x7A",
1279          "\xF0\x9D\x92\x9C"    => "\x61",
1280          "\xF0\x9D\x92\x9E"    => "\x63",
1281          "\xF0\x9D\x92\x9F"    => "\x64",
1282          "\xF0\x9D\x92\xA2"    => "\x67",
1283          "\xF0\x9D\x92\xA5"    => "\x6A",
1284          "\xF0\x9D\x92\xA6"    => "\x6B",
1285          "\xF0\x9D\x92\xA9"    => "\x6E",
1286          "\xF0\x9D\x92\xAA"    => "\x6F",
1287          "\xF0\x9D\x92\xAB"    => "\x70",
1288          "\xF0\x9D\x92\xAC"    => "\x71",
1289          "\xF0\x9D\x92\xAE"    => "\x73",
1290          "\xF0\x9D\x92\xAF"    => "\x74",
1291          "\xF0\x9D\x92\xB0"    => "\x75",
1292          "\xF0\x9D\x92\xB1"    => "\x76",
1293          "\xF0\x9D\x92\xB2"    => "\x77",
1294          "\xF0\x9D\x92\xB3"    => "\x78",
1295          "\xF0\x9D\x92\xB4"    => "\x79",
1296          "\xF0\x9D\x92\xB5"    => "\x7A",
1297          "\xF0\x9D\x93\x90"    => "\x61",
1298          "\xF0\x9D\x93\x91"    => "\x62",
1299          "\xF0\x9D\x93\x92"    => "\x63",
1300          "\xF0\x9D\x93\x93"    => "\x64",
1301          "\xF0\x9D\x93\x94"    => "\x65",
1302          "\xF0\x9D\x93\x95"    => "\x66",
1303          "\xF0\x9D\x93\x96"    => "\x67",
1304          "\xF0\x9D\x93\x97"    => "\x68",
1305          "\xF0\x9D\x93\x98"    => "\x69",
1306          "\xF0\x9D\x93\x99"    => "\x6A",
1307          "\xF0\x9D\x93\x9A"    => "\x6B",
1308          "\xF0\x9D\x93\x9B"    => "\x6C",
1309          "\xF0\x9D\x93\x9C"    => "\x6D",
1310          "\xF0\x9D\x93\x9D"    => "\x6E",
1311          "\xF0\x9D\x93\x9E"    => "\x6F",
1312          "\xF0\x9D\x93\x9F"    => "\x70",
1313          "\xF0\x9D\x93\xA0"    => "\x71",
1314          "\xF0\x9D\x93\xA1"    => "\x72",
1315          "\xF0\x9D\x93\xA2"    => "\x73",
1316          "\xF0\x9D\x93\xA3"    => "\x74",
1317          "\xF0\x9D\x93\xA4"    => "\x75",
1318          "\xF0\x9D\x93\xA5"    => "\x76",
1319          "\xF0\x9D\x93\xA6"    => "\x77",
1320          "\xF0\x9D\x93\xA7"    => "\x78",
1321          "\xF0\x9D\x93\xA8"    => "\x79",
1322          "\xF0\x9D\x93\xA9"    => "\x7A",
1323          "\xF0\x9D\x94\x84"    => "\x61",
1324          "\xF0\x9D\x94\x85"    => "\x62",
1325          "\xF0\x9D\x94\x87"    => "\x64",
1326          "\xF0\x9D\x94\x88"    => "\x65",
1327          "\xF0\x9D\x94\x89"    => "\x66",
1328          "\xF0\x9D\x94\x8A"    => "\x67",
1329          "\xF0\x9D\x94\x8D"    => "\x6A",
1330          "\xF0\x9D\x94\x8E"    => "\x6B",
1331          "\xF0\x9D\x94\x8F"    => "\x6C",
1332          "\xF0\x9D\x94\x90"    => "\x6D",
1333          "\xF0\x9D\x94\x91"    => "\x6E",
1334          "\xF0\x9D\x94\x92"    => "\x6F",
1335          "\xF0\x9D\x94\x93"    => "\x70",
1336          "\xF0\x9D\x94\x94"    => "\x71",
1337          "\xF0\x9D\x94\x96"    => "\x73",
1338          "\xF0\x9D\x94\x97"    => "\x74",
1339          "\xF0\x9D\x94\x98"    => "\x75",
1340          "\xF0\x9D\x94\x99"    => "\x76",
1341          "\xF0\x9D\x94\x9A"    => "\x77",
1342          "\xF0\x9D\x94\x9B"    => "\x78",
1343          "\xF0\x9D\x94\x9C"    => "\x79",
1344          "\xF0\x9D\x94\xB8"    => "\x61",
1345          "\xF0\x9D\x94\xB9"    => "\x62",
1346          "\xF0\x9D\x94\xBB"    => "\x64",
1347          "\xF0\x9D\x94\xBC"    => "\x65",
1348          "\xF0\x9D\x94\xBD"    => "\x66",
1349          "\xF0\x9D\x94\xBE"    => "\x67",
1350          "\xF0\x9D\x95\x80"    => "\x69",
1351          "\xF0\x9D\x95\x81"    => "\x6A",
1352          "\xF0\x9D\x95\x82"    => "\x6B",
1353          "\xF0\x9D\x95\x83"    => "\x6C",
1354          "\xF0\x9D\x95\x84"    => "\x6D",
1355          "\xF0\x9D\x95\x86"    => "\x6F",
1356          "\xF0\x9D\x95\x8A"    => "\x73",
1357          "\xF0\x9D\x95\x8B"    => "\x74",
1358          "\xF0\x9D\x95\x8C"    => "\x75",
1359          "\xF0\x9D\x95\x8D"    => "\x76",
1360          "\xF0\x9D\x95\x8E"    => "\x77",
1361          "\xF0\x9D\x95\x8F"    => "\x78",
1362          "\xF0\x9D\x95\x90"    => "\x79",
1363          "\xF0\x9D\x95\xAC"    => "\x61",
1364          "\xF0\x9D\x95\xAD"    => "\x62",
1365          "\xF0\x9D\x95\xAE"    => "\x63",
1366          "\xF0\x9D\x95\xAF"    => "\x64",
1367          "\xF0\x9D\x95\xB0"    => "\x65",
1368          "\xF0\x9D\x95\xB1"    => "\x66",
1369          "\xF0\x9D\x95\xB2"    => "\x67",
1370          "\xF0\x9D\x95\xB3"    => "\x68",
1371          "\xF0\x9D\x95\xB4"    => "\x69",
1372          "\xF0\x9D\x95\xB5"    => "\x6A",
1373          "\xF0\x9D\x95\xB6"    => "\x6B",
1374          "\xF0\x9D\x95\xB7"    => "\x6C",
1375          "\xF0\x9D\x95\xB8"    => "\x6D",
1376          "\xF0\x9D\x95\xB9"    => "\x6E",
1377          "\xF0\x9D\x95\xBA"    => "\x6F",
1378          "\xF0\x9D\x95\xBB"    => "\x70",
1379          "\xF0\x9D\x95\xBC"    => "\x71",
1380          "\xF0\x9D\x95\xBD"    => "\x72",
1381          "\xF0\x9D\x95\xBE"    => "\x73",
1382          "\xF0\x9D\x95\xBF"    => "\x74",
1383          "\xF0\x9D\x96\x80"    => "\x75",
1384          "\xF0\x9D\x96\x81"    => "\x76",
1385          "\xF0\x9D\x96\x82"    => "\x77",
1386          "\xF0\x9D\x96\x83"    => "\x78",
1387          "\xF0\x9D\x96\x84"    => "\x79",
1388          "\xF0\x9D\x96\x85"    => "\x7A",
1389          "\xF0\x9D\x96\xA0"    => "\x61",
1390          "\xF0\x9D\x96\xA1"    => "\x62",
1391          "\xF0\x9D\x96\xA2"    => "\x63",
1392          "\xF0\x9D\x96\xA3"    => "\x64",
1393          "\xF0\x9D\x96\xA4"    => "\x65",
1394          "\xF0\x9D\x96\xA5"    => "\x66",
1395          "\xF0\x9D\x96\xA6"    => "\x67",
1396          "\xF0\x9D\x96\xA7"    => "\x68",
1397          "\xF0\x9D\x96\xA8"    => "\x69",
1398          "\xF0\x9D\x96\xA9"    => "\x6A",
1399          "\xF0\x9D\x96\xAA"    => "\x6B",
1400          "\xF0\x9D\x96\xAB"    => "\x6C",
1401          "\xF0\x9D\x96\xAC"    => "\x6D",
1402          "\xF0\x9D\x96\xAD"    => "\x6E",
1403          "\xF0\x9D\x96\xAE"    => "\x6F",
1404          "\xF0\x9D\x96\xAF"    => "\x70",
1405          "\xF0\x9D\x96\xB0"    => "\x71",
1406          "\xF0\x9D\x96\xB1"    => "\x72",
1407          "\xF0\x9D\x96\xB2"    => "\x73",
1408          "\xF0\x9D\x96\xB3"    => "\x74",
1409          "\xF0\x9D\x96\xB4"    => "\x75",
1410          "\xF0\x9D\x96\xB5"    => "\x76",
1411          "\xF0\x9D\x96\xB6"    => "\x77",
1412          "\xF0\x9D\x96\xB7"    => "\x78",
1413          "\xF0\x9D\x96\xB8"    => "\x79",
1414          "\xF0\x9D\x96\xB9"    => "\x7A",
1415          "\xF0\x9D\x97\x94"    => "\x61",
1416          "\xF0\x9D\x97\x95"    => "\x62",
1417          "\xF0\x9D\x97\x96"    => "\x63",
1418          "\xF0\x9D\x97\x97"    => "\x64",
1419          "\xF0\x9D\x97\x98"    => "\x65",
1420          "\xF0\x9D\x97\x99"    => "\x66",
1421          "\xF0\x9D\x97\x9A"    => "\x67",
1422          "\xF0\x9D\x97\x9B"    => "\x68",
1423          "\xF0\x9D\x97\x9C"    => "\x69",
1424          "\xF0\x9D\x97\x9D"    => "\x6A",
1425          "\xF0\x9D\x97\x9E"    => "\x6B",
1426          "\xF0\x9D\x97\x9F"    => "\x6C",
1427          "\xF0\x9D\x97\xA0"    => "\x6D",
1428          "\xF0\x9D\x97\xA1"    => "\x6E",
1429          "\xF0\x9D\x97\xA2"    => "\x6F",
1430          "\xF0\x9D\x97\xA3"    => "\x70",
1431          "\xF0\x9D\x97\xA4"    => "\x71",
1432          "\xF0\x9D\x97\xA5"    => "\x72",
1433          "\xF0\x9D\x97\xA6"    => "\x73",
1434          "\xF0\x9D\x97\xA7"    => "\x74",
1435          "\xF0\x9D\x97\xA8"    => "\x75",
1436          "\xF0\x9D\x97\xA9"    => "\x76",
1437          "\xF0\x9D\x97\xAA"    => "\x77",
1438          "\xF0\x9D\x97\xAB"    => "\x78",
1439          "\xF0\x9D\x97\xAC"    => "\x79",
1440          "\xF0\x9D\x97\xAD"    => "\x7A",
1441          "\xF0\x9D\x98\x88"    => "\x61",
1442          "\xF0\x9D\x98\x89"    => "\x62",
1443          "\xF0\x9D\x98\x8A"    => "\x63",
1444          "\xF0\x9D\x98\x8B"    => "\x64",
1445          "\xF0\x9D\x98\x8C"    => "\x65",
1446          "\xF0\x9D\x98\x8D"    => "\x66",
1447          "\xF0\x9D\x98\x8E"    => "\x67",
1448          "\xF0\x9D\x98\x8F"    => "\x68",
1449          "\xF0\x9D\x98\x90"    => "\x69",
1450          "\xF0\x9D\x98\x91"    => "\x6A",
1451          "\xF0\x9D\x98\x92"    => "\x6B",
1452          "\xF0\x9D\x98\x93"    => "\x6C",
1453          "\xF0\x9D\x98\x94"    => "\x6D",
1454          "\xF0\x9D\x98\x95"    => "\x6E",
1455          "\xF0\x9D\x98\x96"    => "\x6F",
1456          "\xF0\x9D\x98\x97"    => "\x70",
1457          "\xF0\x9D\x98\x98"    => "\x71",
1458          "\xF0\x9D\x98\x99"    => "\x72",
1459          "\xF0\x9D\x98\x9A"    => "\x73",
1460          "\xF0\x9D\x98\x9B"    => "\x74",
1461          "\xF0\x9D\x98\x9C"    => "\x75",
1462          "\xF0\x9D\x98\x9D"    => "\x76",
1463          "\xF0\x9D\x98\x9E"    => "\x77",
1464          "\xF0\x9D\x98\x9F"    => "\x78",
1465          "\xF0\x9D\x98\xA0"    => "\x79",
1466          "\xF0\x9D\x98\xA1"    => "\x7A",
1467          "\xF0\x9D\x98\xBC"    => "\x61",
1468          "\xF0\x9D\x98\xBD"    => "\x62",
1469          "\xF0\x9D\x98\xBE"    => "\x63",
1470          "\xF0\x9D\x98\xBF"    => "\x64",
1471          "\xF0\x9D\x99\x80"    => "\x65",
1472          "\xF0\x9D\x99\x81"    => "\x66",
1473          "\xF0\x9D\x99\x82"    => "\x67",
1474          "\xF0\x9D\x99\x83"    => "\x68",
1475          "\xF0\x9D\x99\x84"    => "\x69",
1476          "\xF0\x9D\x99\x85"    => "\x6A",
1477          "\xF0\x9D\x99\x86"    => "\x6B",
1478          "\xF0\x9D\x99\x87"    => "\x6C",
1479          "\xF0\x9D\x99\x88"    => "\x6D",
1480          "\xF0\x9D\x99\x89"    => "\x6E",
1481          "\xF0\x9D\x99\x8A"    => "\x6F",
1482          "\xF0\x9D\x99\x8B"    => "\x70",
1483          "\xF0\x9D\x99\x8C"    => "\x71",
1484          "\xF0\x9D\x99\x8D"    => "\x72",
1485          "\xF0\x9D\x99\x8E"    => "\x73",
1486          "\xF0\x9D\x99\x8F"    => "\x74",
1487          "\xF0\x9D\x99\x90"    => "\x75",
1488          "\xF0\x9D\x99\x91"    => "\x76",
1489          "\xF0\x9D\x99\x92"    => "\x77",
1490          "\xF0\x9D\x99\x93"    => "\x78",
1491          "\xF0\x9D\x99\x94"    => "\x79",
1492          "\xF0\x9D\x99\x95"    => "\x7A",
1493          "\xF0\x9D\x99\xB0"    => "\x61",
1494          "\xF0\x9D\x99\xB1"    => "\x62",
1495          "\xF0\x9D\x99\xB2"    => "\x63",
1496          "\xF0\x9D\x99\xB3"    => "\x64",
1497          "\xF0\x9D\x99\xB4"    => "\x65",
1498          "\xF0\x9D\x99\xB5"    => "\x66",
1499          "\xF0\x9D\x99\xB6"    => "\x67",
1500          "\xF0\x9D\x99\xB7"    => "\x68",
1501          "\xF0\x9D\x99\xB8"    => "\x69",
1502          "\xF0\x9D\x99\xB9"    => "\x6A",
1503          "\xF0\x9D\x99\xBA"    => "\x6B",
1504          "\xF0\x9D\x99\xBB"    => "\x6C",
1505          "\xF0\x9D\x99\xBC"    => "\x6D",
1506          "\xF0\x9D\x99\xBD"    => "\x6E",
1507          "\xF0\x9D\x99\xBE"    => "\x6F",
1508          "\xF0\x9D\x99\xBF"    => "\x70",
1509          "\xF0\x9D\x9A\x80"    => "\x71",
1510          "\xF0\x9D\x9A\x81"    => "\x72",
1511          "\xF0\x9D\x9A\x82"    => "\x73",
1512          "\xF0\x9D\x9A\x83"    => "\x74",
1513          "\xF0\x9D\x9A\x84"    => "\x75",
1514          "\xF0\x9D\x9A\x85"    => "\x76",
1515          "\xF0\x9D\x9A\x86"    => "\x77",
1516          "\xF0\x9D\x9A\x87"    => "\x78",
1517          "\xF0\x9D\x9A\x88"    => "\x79",
1518          "\xF0\x9D\x9A\x89"    => "\x7A",
1519          "\xF0\x9D\x9A\xA8"    => "\xCE\xB1",
1520          "\xF0\x9D\x9A\xA9"    => "\xCE\xB2",
1521          "\xF0\x9D\x9A\xAA"    => "\xCE\xB3",
1522          "\xF0\x9D\x9A\xAB"    => "\xCE\xB4",
1523          "\xF0\x9D\x9A\xAC"    => "\xCE\xB5",
1524          "\xF0\x9D\x9A\xAD"    => "\xCE\xB6",
1525          "\xF0\x9D\x9A\xAE"    => "\xCE\xB7",
1526          "\xF0\x9D\x9A\xAF"    => "\xCE\xB8",
1527          "\xF0\x9D\x9A\xB0"    => "\xCE\xB9",
1528          "\xF0\x9D\x9A\xB1"    => "\xCE\xBA",
1529          "\xF0\x9D\x9A\xB2"    => "\xCE\xBB",
1530          "\xF0\x9D\x9A\xB3"    => "\xCE\xBC",
1531          "\xF0\x9D\x9A\xB4"    => "\xCE\xBD",
1532          "\xF0\x9D\x9A\xB5"    => "\xCE\xBE",
1533          "\xF0\x9D\x9A\xB6"    => "\xCE\xBF",
1534          "\xF0\x9D\x9A\xB7"    => "\xCF\x80",
1535          "\xF0\x9D\x9A\xB8"    => "\xCF\x81",
1536          "\xF0\x9D\x9A\xB9"    => "\xCE\xB8",
1537          "\xF0\x9D\x9A\xBA"    => "\xCF\x83",
1538          "\xF0\x9D\x9A\xBB"    => "\xCF\x84",
1539          "\xF0\x9D\x9A\xBC"    => "\xCF\x85",
1540          "\xF0\x9D\x9A\xBD"    => "\xCF\x86",
1541          "\xF0\x9D\x9A\xBE"    => "\xCF\x87",
1542          "\xF0\x9D\x9A\xBF"    => "\xCF\x88",
1543          "\xF0\x9D\x9B\x80"    => "\xCF\x89",
1544          "\xF0\x9D\x9B\x93"    => "\xCF\x83",
1545          "\xF0\x9D\x9B\xA2"    => "\xCE\xB1",
1546          "\xF0\x9D\x9B\xA3"    => "\xCE\xB2",
1547          "\xF0\x9D\x9B\xA4"    => "\xCE\xB3",
1548          "\xF0\x9D\x9B\xA5"    => "\xCE\xB4",
1549          "\xF0\x9D\x9B\xA6"    => "\xCE\xB5",
1550          "\xF0\x9D\x9B\xA7"    => "\xCE\xB6",
1551          "\xF0\x9D\x9B\xA8"    => "\xCE\xB7",
1552          "\xF0\x9D\x9B\xA9"    => "\xCE\xB8",
1553          "\xF0\x9D\x9B\xAA"    => "\xCE\xB9",
1554          "\xF0\x9D\x9B\xAB"    => "\xCE\xBA",
1555          "\xF0\x9D\x9B\xAC"    => "\xCE\xBB",
1556          "\xF0\x9D\x9B\xAD"    => "\xCE\xBC",
1557          "\xF0\x9D\x9B\xAE"    => "\xCE\xBD",
1558          "\xF0\x9D\x9B\xAF"    => "\xCE\xBE",
1559          "\xF0\x9D\x9B\xB0"    => "\xCE\xBF",
1560          "\xF0\x9D\x9B\xB1"    => "\xCF\x80",
1561          "\xF0\x9D\x9B\xB2"    => "\xCF\x81",
1562          "\xF0\x9D\x9B\xB3"    => "\xCE\xB8",
1563          "\xF0\x9D\x9B\xB4"    => "\xCF\x83",
1564          "\xF0\x9D\x9B\xB5"    => "\xCF\x84",
1565          "\xF0\x9D\x9B\xB6"    => "\xCF\x85",
1566          "\xF0\x9D\x9B\xB7"    => "\xCF\x86",
1567          "\xF0\x9D\x9B\xB8"    => "\xCF\x87",
1568          "\xF0\x9D\x9B\xB9"    => "\xCF\x88",
1569          "\xF0\x9D\x9B\xBA"    => "\xCF\x89",
1570          "\xF0\x9D\x9C\x8D"    => "\xCF\x83",
1571          "\xF0\x9D\x9C\x9C"    => "\xCE\xB1",
1572          "\xF0\x9D\x9C\x9D"    => "\xCE\xB2",
1573          "\xF0\x9D\x9C\x9E"    => "\xCE\xB3",
1574          "\xF0\x9D\x9C\x9F"    => "\xCE\xB4",
1575          "\xF0\x9D\x9C\xA0"    => "\xCE\xB5",
1576          "\xF0\x9D\x9C\xA1"    => "\xCE\xB6",
1577          "\xF0\x9D\x9C\xA2"    => "\xCE\xB7",
1578          "\xF0\x9D\x9C\xA3"    => "\xCE\xB8",
1579          "\xF0\x9D\x9C\xA4"    => "\xCE\xB9",
1580          "\xF0\x9D\x9C\xA5"    => "\xCE\xBA",
1581          "\xF0\x9D\x9C\xA6"    => "\xCE\xBB",
1582          "\xF0\x9D\x9C\xA7"    => "\xCE\xBC",
1583          "\xF0\x9D\x9C\xA8"    => "\xCE\xBD",
1584          "\xF0\x9D\x9C\xA9"    => "\xCE\xBE",
1585          "\xF0\x9D\x9C\xAA"    => "\xCE\xBF",
1586          "\xF0\x9D\x9C\xAB"    => "\xCF\x80",
1587          "\xF0\x9D\x9C\xAC"    => "\xCF\x81",
1588          "\xF0\x9D\x9C\xAD"    => "\xCE\xB8",
1589          "\xF0\x9D\x9C\xAE"    => "\xCF\x83",
1590          "\xF0\x9D\x9C\xAF"    => "\xCF\x84",
1591          "\xF0\x9D\x9C\xB0"    => "\xCF\x85",
1592          "\xF0\x9D\x9C\xB1"    => "\xCF\x86",
1593          "\xF0\x9D\x9C\xB2"    => "\xCF\x87",
1594          "\xF0\x9D\x9C\xB3"    => "\xCF\x88",
1595          "\xF0\x9D\x9C\xB4"    => "\xCF\x89",
1596          "\xF0\x9D\x9D\x87"    => "\xCF\x83",
1597          "\xF0\x9D\x9D\x96"    => "\xCE\xB1",
1598          "\xF0\x9D\x9D\x97"    => "\xCE\xB2",
1599          "\xF0\x9D\x9D\x98"    => "\xCE\xB3",
1600          "\xF0\x9D\x9D\x99"    => "\xCE\xB4",
1601          "\xF0\x9D\x9D\x9A"    => "\xCE\xB5",
1602          "\xF0\x9D\x9D\x9B"    => "\xCE\xB6",
1603          "\xF0\x9D\x9D\x9C"    => "\xCE\xB7",
1604          "\xF0\x9D\x9D\x9D"    => "\xCE\xB8",
1605          "\xF0\x9D\x9D\x9E"    => "\xCE\xB9",
1606          "\xF0\x9D\x9D\x9F"    => "\xCE\xBA",
1607          "\xF0\x9D\x9D\xA0"    => "\xCE\xBB",
1608          "\xF0\x9D\x9D\xA1"    => "\xCE\xBC",
1609          "\xF0\x9D\x9D\xA2"    => "\xCE\xBD",
1610          "\xF0\x9D\x9D\xA3"    => "\xCE\xBE",
1611          "\xF0\x9D\x9D\xA4"    => "\xCE\xBF",
1612          "\xF0\x9D\x9D\xA5"    => "\xCF\x80",
1613          "\xF0\x9D\x9D\xA6"    => "\xCF\x81",
1614          "\xF0\x9D\x9D\xA7"    => "\xCE\xB8",
1615          "\xF0\x9D\x9D\xA8"    => "\xCF\x83",
1616          "\xF0\x9D\x9D\xA9"    => "\xCF\x84",
1617          "\xF0\x9D\x9D\xAA"    => "\xCF\x85",
1618          "\xF0\x9D\x9D\xAB"    => "\xCF\x86",
1619          "\xF0\x9D\x9D\xAC"    => "\xCF\x87",
1620          "\xF0\x9D\x9D\xAD"    => "\xCF\x88",
1621          "\xF0\x9D\x9D\xAE"    => "\xCF\x89",
1622          "\xF0\x9D\x9E\x81"    => "\xCF\x83",
1623          "\xF0\x9D\x9E\x90"    => "\xCE\xB1",
1624          "\xF0\x9D\x9E\x91"    => "\xCE\xB2",
1625          "\xF0\x9D\x9E\x92"    => "\xCE\xB3",
1626          "\xF0\x9D\x9E\x93"    => "\xCE\xB4",
1627          "\xF0\x9D\x9E\x94"    => "\xCE\xB5",
1628          "\xF0\x9D\x9E\x95"    => "\xCE\xB6",
1629          "\xF0\x9D\x9E\x96"    => "\xCE\xB7",
1630          "\xF0\x9D\x9E\x97"    => "\xCE\xB8",
1631          "\xF0\x9D\x9E\x98"    => "\xCE\xB9",
1632          "\xF0\x9D\x9E\x99"    => "\xCE\xBA",
1633          "\xF0\x9D\x9E\x9A"    => "\xCE\xBB",
1634          "\xF0\x9D\x9E\x9B"    => "\xCE\xBC",
1635          "\xF0\x9D\x9E\x9C"    => "\xCE\xBD",
1636          "\xF0\x9D\x9E\x9D"    => "\xCE\xBE",
1637          "\xF0\x9D\x9E\x9E"    => "\xCE\xBF",
1638          "\xF0\x9D\x9E\x9F"    => "\xCF\x80",
1639          "\xF0\x9D\x9E\xA0"    => "\xCF\x81",
1640          "\xF0\x9D\x9E\xA1"    => "\xCE\xB8",
1641          "\xF0\x9D\x9E\xA2"    => "\xCF\x83",
1642          "\xF0\x9D\x9E\xA3"    => "\xCF\x84",
1643          "\xF0\x9D\x9E\xA4"    => "\xCF\x85",
1644          "\xF0\x9D\x9E\xA5"    => "\xCF\x86",
1645          "\xF0\x9D\x9E\xA6"    => "\xCF\x87",
1646          "\xF0\x9D\x9E\xA7"    => "\xCF\x88",
1647          "\xF0\x9D\x9E\xA8"    => "\xCF\x89",
1648          "\xF0\x9D\x9E\xBB"    => "\xCF\x83",
1649          "\xF0\x9D\x9F\x8A"    => "\xCF\x9D",
1650      );
1651      global $phpbb_root_path, $phpEx;
1652   
1653      // do the case fold
1654      $text = utf8_case_fold($text, $option);
1655   
1656      if (!class_exists('utf_normalizer'))
1657      {
1658          global $phpbb_root_path, $phpEx;
1659          include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
1660      }
1661   
1662      // convert to NFKC
1663      utf_normalizer::nfkc($text);
1664   
1665      // FC_NFKC_Closure, http://www.unicode.org/Public/5.0.0/ucd/DerivedNormalizationProps.txt
1666      $text = strtr($text, $fc_nfkc_closure);
1667   
1668      return $text;
1669  }
1670   
1671  /**
1672  * Assume the input is NFC:
1673  * Takes the input and does a "special" case fold. It does minor normalization as well.
1674  *
1675  * @param    string    $text    text to be case folded
1676  * @param    string    $option    determines how we will fold the cases
1677  * @return    string            case folded text
1678  */
1679  function utf8_case_fold_nfc($text, $option = 'full')
1680  {
1681      static $uniarray = array();
1682      static $ypogegrammeni = array(
1683          "\xCD\xBA"        => "\x20\xCD\x85",
1684          "\xE1\xBE\x80"    => "\xE1\xBC\x80\xCD\x85",
1685          "\xE1\xBE\x81"    => "\xE1\xBC\x81\xCD\x85",
1686          "\xE1\xBE\x82"    => "\xE1\xBC\x82\xCD\x85",
1687          "\xE1\xBE\x83"    => "\xE1\xBC\x83\xCD\x85",
1688          "\xE1\xBE\x84"    => "\xE1\xBC\x84\xCD\x85",
1689          "\xE1\xBE\x85"    => "\xE1\xBC\x85\xCD\x85",
1690          "\xE1\xBE\x86"    => "\xE1\xBC\x86\xCD\x85",
1691          "\xE1\xBE\x87"    => "\xE1\xBC\x87\xCD\x85",
1692          "\xE1\xBE\x88"    => "\xE1\xBC\x88\xCD\x85",
1693          "\xE1\xBE\x89"    => "\xE1\xBC\x89\xCD\x85",
1694          "\xE1\xBE\x8A"    => "\xE1\xBC\x8A\xCD\x85",
1695          "\xE1\xBE\x8B"    => "\xE1\xBC\x8B\xCD\x85",
1696          "\xE1\xBE\x8C"    => "\xE1\xBC\x8C\xCD\x85",
1697          "\xE1\xBE\x8D"    => "\xE1\xBC\x8D\xCD\x85",
1698          "\xE1\xBE\x8E"    => "\xE1\xBC\x8E\xCD\x85",
1699          "\xE1\xBE\x8F"    => "\xE1\xBC\x8F\xCD\x85",
1700          "\xE1\xBE\x90"    => "\xE1\xBC\xA0\xCD\x85",
1701          "\xE1\xBE\x91"    => "\xE1\xBC\xA1\xCD\x85",
1702          "\xE1\xBE\x92"    => "\xE1\xBC\xA2\xCD\x85",
1703          "\xE1\xBE\x93"    => "\xE1\xBC\xA3\xCD\x85",
1704          "\xE1\xBE\x94"    => "\xE1\xBC\xA4\xCD\x85",
1705          "\xE1\xBE\x95"    => "\xE1\xBC\xA5\xCD\x85",
1706          "\xE1\xBE\x96"    => "\xE1\xBC\xA6\xCD\x85",
1707          "\xE1\xBE\x97"    => "\xE1\xBC\xA7\xCD\x85",
1708          "\xE1\xBE\x98"    => "\xE1\xBC\xA8\xCD\x85",
1709          "\xE1\xBE\x99"    => "\xE1\xBC\xA9\xCD\x85",
1710          "\xE1\xBE\x9A"    => "\xE1\xBC\xAA\xCD\x85",
1711          "\xE1\xBE\x9B"    => "\xE1\xBC\xAB\xCD\x85",
1712          "\xE1\xBE\x9C"    => "\xE1\xBC\xAC\xCD\x85",
1713          "\xE1\xBE\x9D"    => "\xE1\xBC\xAD\xCD\x85",
1714          "\xE1\xBE\x9E"    => "\xE1\xBC\xAE\xCD\x85",
1715          "\xE1\xBE\x9F"    => "\xE1\xBC\xAF\xCD\x85",
1716          "\xE1\xBE\xA0"    => "\xE1\xBD\xA0\xCD\x85",
1717          "\xE1\xBE\xA1"    => "\xE1\xBD\xA1\xCD\x85",
1718          "\xE1\xBE\xA2"    => "\xE1\xBD\xA2\xCD\x85",
1719          "\xE1\xBE\xA3"    => "\xE1\xBD\xA3\xCD\x85",
1720          "\xE1\xBE\xA4"    => "\xE1\xBD\xA4\xCD\x85",
1721          "\xE1\xBE\xA5"    => "\xE1\xBD\xA5\xCD\x85",
1722          "\xE1\xBE\xA6"    => "\xE1\xBD\xA6\xCD\x85",
1723          "\xE1\xBE\xA7"    => "\xE1\xBD\xA7\xCD\x85",
1724          "\xE1\xBE\xA8"    => "\xE1\xBD\xA8\xCD\x85",
1725          "\xE1\xBE\xA9"    => "\xE1\xBD\xA9\xCD\x85",
1726          "\xE1\xBE\xAA"    => "\xE1\xBD\xAA\xCD\x85",
1727          "\xE1\xBE\xAB"    => "\xE1\xBD\xAB\xCD\x85",
1728          "\xE1\xBE\xAC"    => "\xE1\xBD\xAC\xCD\x85",
1729          "\xE1\xBE\xAD"    => "\xE1\xBD\xAD\xCD\x85",
1730          "\xE1\xBE\xAE"    => "\xE1\xBD\xAE\xCD\x85",
1731          "\xE1\xBE\xAF"    => "\xE1\xBD\xAF\xCD\x85",
1732          "\xE1\xBE\xB2"    => "\xE1\xBD\xB0\xCD\x85",
1733          "\xE1\xBE\xB3"    => "\xCE\xB1\xCD\x85",
1734          "\xE1\xBE\xB4"    => "\xCE\xAC\xCD\x85",
1735          "\xE1\xBE\xB7"    => "\xE1\xBE\xB6\xCD\x85",
1736          "\xE1\xBE\xBC"    => "\xCE\x91\xCD\x85",
1737          "\xE1\xBF\x82"    => "\xE1\xBD\xB4\xCD\x85",
1738          "\xE1\xBF\x83"    => "\xCE\xB7\xCD\x85",
1739          "\xE1\xBF\x84"    => "\xCE\xAE\xCD\x85",
1740          "\xE1\xBF\x87"    => "\xE1\xBF\x86\xCD\x85",
1741          "\xE1\xBF\x8C"    => "\xCE\x97\xCD\x85",
1742          "\xE1\xBF\xB2"    => "\xE1\xBD\xBC\xCD\x85",
1743          "\xE1\xBF\xB3"    => "\xCF\x89\xCD\x85",
1744          "\xE1\xBF\xB4"    => "\xCF\x8E\xCD\x85",
1745          "\xE1\xBF\xB7"    => "\xE1\xBF\xB6\xCD\x85",
1746          "\xE1\xBF\xBC"    => "\xCE\xA9\xCD\x85",
1747      );
1748      global $phpbb_root_path, $phpEx;
1749   
1750      // perform a small trick, avoid further normalization on composed points that contain U+0345 in their decomposition
1751      $text = strtr($text, $ypogegrammeni);
1752   
1753      // do the case fold
1754      $text = utf8_case_fold($text, $option);
1755   
1756      return $text;
1757  }
1758   
1759  /**
1760  * A wrapper function for the normalizer which takes care of including the class if required and modifies the passed strings
1761  * to be in NFC (Normalization Form Composition).
1762  *
1763  * @param    mixed    $strings    a string or an array of strings to normalize
1764  * @return    mixed                the normalized content, preserving array keys if array given.
1765  */
1766  function utf8_normalize_nfc($strings)
1767  {
1768      if (empty($strings))
1769      {
1770          return $strings;
1771      }
1772   
1773      if (!class_exists('utf_normalizer'))
1774      {
1775          global $phpbb_root_path, $phpEx;
1776          include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
1777      }
1778   
1779      if (!is_array($strings))
1780      {
1781          utf_normalizer::nfc($strings);
1782      }
1783      else if (is_array($strings))
1784      {
1785          foreach ($strings as $key => $string)
1786          {
1787              if (is_array($string))
1788              {
1789                  foreach ($string as $_key => $_string)
1790                  {
1791                      utf_normalizer::nfc($strings[$key][$_key]);
1792                  }
1793              }
1794              else
1795              {
1796                  utf_normalizer::nfc($strings[$key]);
1797              }
1798          }
1799      }
1800   
1801      return $strings;
1802  }
1803   
1804  /**
1805  * This function is used to generate a "clean" version of a string.
1806  * Clean means that it is a case insensitive form (case folding) and that it is normalized (NFC).
1807  * Additionally a homographs of one character are transformed into one specific character (preferably ASCII
1808  * if it is an ASCII character).
1809  *
1810  * Please be aware that if you change something within this function or within
1811  * functions used here you need to rebuild/update the username_clean column in the users table. And all other
1812  * columns that store a clean string otherwise you will break this functionality.
1813  *
1814  * @param    string    $text    An unclean string, mabye user input (has to be valid UTF-8!)
1815  * @return    string            Cleaned up version of the input string
1816  */
1817  function utf8_clean_string($text)
1818  {
1819      global $phpbb_root_path, $phpEx;
1820   
1821      static $homographs = array();
1822      if (empty($homographs))
1823      {
1824          $homographs = include($phpbb_root_path . 'includes/utf/data/confusables.' . $phpEx);
1825      }
1826   
1827      $text = utf8_case_fold_nfkc($text);
1828      $text = strtr($text, $homographs);
1829      // Other control characters
1830      $text = preg_replace('#(?:[\x00-\x1F\x7F]+|(?:\xC2[\x80-\x9F])+)#', '', $text);
1831   
1832      // we need to reduce multiple spaces to a single one
1833      $text = preg_replace('# {2,}#', ' ', $text);
1834   
1835      // we can use trim here as all the other space characters should have been turned
1836      // into normal ASCII spaces by now
1837      return trim($text);
1838  }
1839   
1840  /**
1841  * A wrapper for htmlspecialchars($value, ENT_COMPAT, 'UTF-8')
1842  */
1843  function utf8_htmlspecialchars(&$value)
1844  {
1845      return htmlspecialchars($value, ENT_COMPAT, 'UTF-8');
1846  }
1847   
1848  /**
1849  * Trying to convert returned system message to utf8
1850  *
1851  * PHP assumes such messages are ISO-8859-1 so we'll do that too
1852  * and if it breaks messages we'll blame it on them ;-)
1853  */
1854  function utf8_convert_message($message)
1855  {
1856      // First of all check if conversion is neded at all, as there is no point
1857      // in converting ASCII messages from ISO-8859-1 to UTF-8
1858      if (!preg_match('/[\x80-\xFF]/', $message))
1859      {
1860          return utf8_htmlspecialchars($message);
1861      }
1862   
1863      // else we need to convert some part of the message
1864      return utf8_htmlspecialchars(utf8_recode($message, 'ISO-8859-1'));
1865  }
1866   
1867  /**
1868  * UTF8-compatible wordwrap replacement
1869  *
1870  * @param    string    $string    The input string
1871  * @param    int        $width    The column width. Defaults to 75.
1872  * @param    string    $break    The line is broken using the optional break parameter. Defaults to '\n'.
1873  * @param    bool    $cut    If the cut is set to TRUE, the string is always wrapped at the specified width. So if you have a word that is larger than the given width, it is broken apart.
1874  *
1875  * @return    string            the given string wrapped at the specified column.
1876  *
1877  */
1878  function utf8_wordwrap($string, $width = 75, $break = "\n", $cut = false)
1879  {
1880      // We first need to explode on $break, not destroying existing (intended) breaks
1881      $lines = explode($break, $string);
1882      $new_lines = array(0 => '');
1883      $index = 0;
1884   
1885      foreach ($lines as $line)
1886      {
1887          $words = explode(' ', $line);
1888   
1889          for ($i = 0, $size = sizeof($words); $i < $size; $i++)
1890          {
1891              $word = $words[$i];
1892   
1893              // If cut is true we need to cut the word if it is > width chars
1894              if ($cut && utf8_strlen($word) > $width)
1895              {
1896                  $words[$i] = utf8_substr($word, $width);
1897                  $word = utf8_substr($word, 0, $width);
1898                  $i--;
1899              }
1900   
1901              if (utf8_strlen($new_lines[$index] . $word) > $width)
1902              {
1903                  $new_lines[$index] = substr($new_lines[$index], 0, -1);
1904                  $index++;
1905                  $new_lines[$index] = '';
1906              }
1907   
1908              $new_lines[$index] .= $word . ' ';
1909          }
1910   
1911          $new_lines[$index] = substr($new_lines[$index], 0, -1);
1912          $index++;
1913          $new_lines[$index] = '';
1914      }
1915   
1916      unset($new_lines[$index]);
1917      return implode($break, $new_lines);
1918  }
1919   
1920  ?>