Verzeichnisstruktur phpBB-3.2.0


Veröffentlicht
06.01.2017

So funktioniert es


Auf das letzte Element klicken. Dies geht jeweils ein Schritt zurück

Auf das Icon klicken, dies öffnet das Verzeichnis. Nochmal klicken schließt das Verzeichnis.
Auf den Verzeichnisnamen klicken, dies zeigt nur das Verzeichnis mit Inhalt an

(Beispiel Datei-Icons)

Auf das Icon klicken um den Quellcode anzuzeigen

utf_tools.php

Zuletzt modifiziert: 09.10.2024, 12:52 - Dateigröße: 39.78 KiB


0001  <?php
0002  /**
0003  *
0004  * This file is part of the phpBB Forum Software package.
0005  *
0006  * @copyright (c) phpBB Limited <https://www.phpbb.com>
0007  * @license GNU General Public License, version 2 (GPL-2.0)
0008  *
0009  * For full copyright and license information, please see
0010  * the docs/CREDITS.txt file.
0011  *
0012  */
0013   
0014  /**
0015  */
0016  if (!defined('IN_PHPBB'))
0017  {
0018      exit;
0019  }
0020   
0021  // Enforce ASCII only string handling
0022  setlocale(LC_CTYPE, 'C');
0023   
0024  /**
0025  * Setup the UTF-8 portability layer
0026  */
0027  Patchwork\Utf8\Bootup::initUtf8Encode();
0028  Patchwork\Utf8\Bootup::initMbstring();
0029  Patchwork\Utf8\Bootup::initIntl();
0030   
0031  /**
0032  * UTF-8 tools
0033  *
0034  * Whenever possible, these functions will try to use PHP's built-in functions or
0035  * extensions, otherwise they will default to custom routines.
0036  *
0037  */
0038   
0039  /**
0040  * UTF-8 aware alternative to strrpos
0041  * @ignore
0042  */
0043  function utf8_strrpos($str,    $needle, $offset = null)
0044  {
0045      // Emulate behaviour of strrpos rather than raising warning
0046      if (empty($str))
0047      {
0048          return false;
0049      }
0050   
0051      if (is_null($offset))
0052      {
0053          return mb_strrpos($str, $needle);
0054      }
0055      else
0056      {
0057          return mb_strrpos($str, $needle, $offset);
0058      }
0059  }
0060   
0061  /**
0062  * UTF-8 aware alternative to strpos
0063  * @ignore
0064  */
0065  function utf8_strpos($str, $needle, $offset = null)
0066  {
0067      if (is_null($offset))
0068      {
0069          return mb_strpos($str, $needle);
0070      }
0071      else
0072      {
0073          return mb_strpos($str, $needle, $offset);
0074      }
0075  }
0076   
0077  /**
0078  * UTF-8 aware alternative to strtolower
0079  * @ignore
0080  */
0081  function utf8_strtolower($str)
0082  {
0083      return mb_strtolower($str);
0084  }
0085   
0086  /**
0087  * UTF-8 aware alternative to strtoupper
0088  * @ignore
0089  */
0090  function utf8_strtoupper($str)
0091  {
0092      return mb_strtoupper($str);
0093  }
0094   
0095  /**
0096  * UTF-8 aware alternative to substr
0097  * @ignore
0098  */
0099  function utf8_substr($str, $offset, $length = null)
0100  {
0101      if (is_null($length))
0102      {
0103          return mb_substr($str, $offset);
0104      }
0105      else
0106      {
0107          return mb_substr($str, $offset, $length);
0108      }
0109  }
0110   
0111  /**
0112  * Return the length (in characters) of a UTF-8 string
0113  * @ignore
0114  */
0115  function utf8_strlen($text)
0116  {
0117      return mb_strlen($text, 'utf-8');
0118  }
0119   
0120  /**
0121  * UTF-8 aware alternative to str_split
0122  * Convert a string to an array
0123  *
0124  * @author Harry Fuecks
0125  * @param string $str UTF-8 encoded
0126  * @param int $split_len number to characters to split string by
0127  * @return array characters in string reverses
0128  */
0129  function utf8_str_split($str, $split_len = 1)
0130  {
0131      if (!is_int($split_len) || $split_len < 1)
0132      {
0133          return false;
0134      }
0135   
0136      $len = utf8_strlen($str);
0137      if ($len <= $split_len)
0138      {
0139          return array($str);
0140      }
0141   
0142      preg_match_all('/.{' . $split_len . '}|[^\x00]{1,' . $split_len . '}$/us', $str, $ar);
0143      return $ar[0];
0144  }
0145   
0146  /**
0147  * UTF-8 aware alternative to strspn
0148  * Find length of initial segment matching the mask
0149  *
0150  * @author Harry Fuecks
0151  */
0152  function utf8_strspn($str, $mask, $start = null, $length = null)
0153  {
0154      if ($start !== null || $length !== null)
0155      {
0156          $str = utf8_substr($str, $start, $length);
0157      }
0158   
0159      preg_match('/^[' . $mask . ']+/u', $str, $matches);
0160   
0161      if (isset($matches[0]))
0162      {
0163          return utf8_strlen($matches[0]);
0164      }
0165   
0166      return 0;
0167  }
0168   
0169  /**
0170  * UTF-8 aware alternative to ucfirst
0171  * Make a string's first character uppercase
0172  *
0173  * @author Harry Fuecks
0174  * @param string
0175  * @return string with first character as upper case (if applicable)
0176  */
0177  function utf8_ucfirst($str)
0178  {
0179      switch (utf8_strlen($str))
0180      {
0181          case 0:
0182              return '';
0183          break;
0184   
0185          case 1:
0186              return utf8_strtoupper($str);
0187          break;
0188   
0189          default:
0190              preg_match('/^(.{1})(.*)$/us', $str, $matches);
0191              return utf8_strtoupper($matches[1]) . $matches[2];
0192          break;
0193      }
0194  }
0195   
0196  /**
0197  * Recode a string to UTF-8
0198  *
0199  * If the encoding is not supported, the string is returned as-is
0200  *
0201  * @param    string    $string        Original string
0202  * @param    string    $encoding    Original encoding (lowered)
0203  * @return    string                The string, encoded in UTF-8
0204  */
0205  function utf8_recode($string, $encoding)
0206  {
0207      $encoding = strtolower($encoding);
0208   
0209      if ($encoding == 'utf-8' || !is_string($string) || empty($string))
0210      {
0211          return $string;
0212      }
0213   
0214      // we force iso-8859-1 to be cp1252
0215      if ($encoding == 'iso-8859-1')
0216      {
0217          $encoding = 'cp1252';
0218      }
0219      // convert iso-8859-8-i to iso-8859-8
0220      else if ($encoding == 'iso-8859-8-i')
0221      {
0222          $encoding = 'iso-8859-8';
0223          $string = hebrev($string);
0224      }
0225   
0226      // First, try iconv()
0227      if (function_exists('iconv'))
0228      {
0229          $ret = @iconv($encoding, 'utf-8', $string);
0230   
0231          if (!empty($ret))
0232          {
0233              return $ret;
0234          }
0235      }
0236   
0237      // Try the mb_string extension
0238      if (function_exists('mb_convert_encoding'))
0239      {
0240          // mbstring is nasty on PHP4, we must make *sure* that we send a good encoding
0241          switch ($encoding)
0242          {
0243              case 'iso-8859-1':
0244              case 'iso-8859-2':
0245              case 'iso-8859-4':
0246              case 'iso-8859-7':
0247              case 'iso-8859-9':
0248              case 'iso-8859-15':
0249              case 'windows-1251':
0250              case 'windows-1252':
0251              case 'cp1252':
0252              case 'shift_jis':
0253              case 'euc-kr':
0254              case 'big5':
0255              case 'gb2312':
0256                  $ret = @mb_convert_encoding($string, 'utf-8', $encoding);
0257   
0258                  if (!empty($ret))
0259                  {
0260                      return $ret;
0261                  }
0262          }
0263      }
0264   
0265      // Try the recode extension
0266      if (function_exists('recode_string'))
0267      {
0268          $ret = @recode_string($encoding . '..utf-8', $string);
0269   
0270          if (!empty($ret))
0271          {
0272              return $ret;
0273          }
0274      }
0275   
0276      // If nothing works, check if we have a custom transcoder available
0277      if (!preg_match('#^[a-z0-9_ \\-]+$#', $encoding))
0278      {
0279          // Make sure the encoding name is alphanumeric, we don't want it to be abused into loading arbitrary files
0280          trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
0281      }
0282   
0283      global $phpbb_root_path, $phpEx;
0284   
0285      // iso-8859-* character encoding
0286      if (preg_match('/iso[_ -]?8859[_ -]?(\\d+)/', $encoding, $array))
0287      {
0288          switch ($array[1])
0289          {
0290              case '1':
0291              case '2':
0292              case '4':
0293              case '7':
0294              case '8':
0295              case '9':
0296              case '15':
0297                  if (!function_exists('iso_8859_' . $array[1]))
0298                  {
0299                      if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
0300                      {
0301                          trigger_error('Basic reencoder file is missing', E_USER_ERROR);
0302                      }
0303                      include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
0304                  }
0305                  return call_user_func('iso_8859_' . $array[1], $string);
0306              break;
0307   
0308              default:
0309                  trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
0310              break;
0311          }
0312      }
0313   
0314      // CP/WIN character encoding
0315      if (preg_match('/(?:cp|windows)[_\- ]?(\\d+)/', $encoding, $array))
0316      {
0317          switch ($array[1])
0318          {
0319              case '932':
0320              break;
0321              case '1250':
0322              case '1251':
0323              case '1252':
0324              case '1254':
0325              case '1255':
0326              case '1256':
0327              case '1257':
0328              case '874':
0329                  if (!function_exists('cp' . $array[1]))
0330                  {
0331                      if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
0332                      {
0333                          trigger_error('Basic reencoder file is missing', E_USER_ERROR);
0334                      }
0335                      include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
0336                  }
0337                  return call_user_func('cp' . $array[1], $string);
0338              break;
0339   
0340              default:
0341                  trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
0342              break;
0343          }
0344      }
0345   
0346      // TIS-620
0347      if (preg_match('/tis[_ -]?620/', $encoding))
0348      {
0349          if (!function_exists('tis_620'))
0350          {
0351              if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
0352              {
0353                  trigger_error('Basic reencoder file is missing', E_USER_ERROR);
0354              }
0355              include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
0356          }
0357          return tis_620($string);
0358      }
0359   
0360      // SJIS
0361      if (preg_match('/sjis(?:[_ -]?win)?|(?:cp|ibm)[_ -]?932|shift[_ -]?jis/', $encoding))
0362      {
0363          if (!function_exists('sjis'))
0364          {
0365              if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
0366              {
0367                  trigger_error('CJK reencoder file is missing', E_USER_ERROR);
0368              }
0369              include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
0370          }
0371          return sjis($string);
0372      }
0373   
0374      // EUC_KR
0375      if (preg_match('/euc[_ -]?kr/', $encoding))
0376      {
0377          if (!function_exists('euc_kr'))
0378          {
0379              if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
0380              {
0381                  trigger_error('CJK reencoder file is missing', E_USER_ERROR);
0382              }
0383              include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
0384          }
0385          return euc_kr($string);
0386      }
0387   
0388      // BIG-5
0389      if (preg_match('/big[_ -]?5/', $encoding))
0390      {
0391          if (!function_exists('big5'))
0392          {
0393              if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
0394              {
0395                  trigger_error('CJK reencoder file is missing', E_USER_ERROR);
0396              }
0397              include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
0398          }
0399          return big5($string);
0400      }
0401   
0402      // GB2312
0403      if (preg_match('/gb[_ -]?2312/', $encoding))
0404      {
0405          if (!function_exists('gb2312'))
0406          {
0407              if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
0408              {
0409                  trigger_error('CJK reencoder file is missing', E_USER_ERROR);
0410              }
0411              include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
0412          }
0413          return gb2312($string);
0414      }
0415   
0416      // Trigger an error?! Fow now just give bad data :-(
0417      trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
0418  }
0419   
0420  /**
0421  * Replace all UTF-8 chars that are not in ASCII with their NCR
0422  *
0423  * @param    string    $text        UTF-8 string in NFC
0424  * @return    string                ASCII string using NCRs for non-ASCII chars
0425  */
0426  function utf8_encode_ncr($text)
0427  {
0428      return preg_replace_callback('#[\\xC2-\\xF4][\\x80-\\xBF]{1,3}#', 'utf8_encode_ncr_callback', $text);
0429  }
0430   
0431  /**
0432  * Callback used in encode_ncr()
0433  *
0434  * Takes a UTF-8 char and replaces it with its NCR. Attention, $m is an array
0435  *
0436  * @param    array    $m            0-based numerically indexed array passed by preg_replace_callback()
0437  * @return    string                A HTML NCR if the character is valid, or the original string otherwise
0438  */
0439  function utf8_encode_ncr_callback($m)
0440  {
0441      return '&#' . utf8_ord($m[0]) . ';';
0442  }
0443   
0444  /**
0445  * Converts a UTF-8 char to an NCR
0446  *
0447  * @param string $chr UTF-8 char
0448  * @return integer UNICODE code point
0449  */
0450  function utf8_ord($chr)
0451  {
0452      switch (strlen($chr))
0453      {
0454          case 1:
0455              return ord($chr);
0456          break;
0457   
0458          case 2:
0459              return ((ord($chr[0]) & 0x1F) << 6) | (ord($chr[1]) & 0x3F);
0460          break;
0461   
0462          case 3:
0463              return ((ord($chr[0]) & 0x0F) << 12) | ((ord($chr[1]) & 0x3F) << 6) | (ord($chr[2]) & 0x3F);
0464          break;
0465   
0466          case 4:
0467              return ((ord($chr[0]) & 0x07) << 18) | ((ord($chr[1]) & 0x3F) << 12) | ((ord($chr[2]) & 0x3F) << 6) | (ord($chr[3]) & 0x3F);
0468          break;
0469   
0470          default:
0471              return $chr;
0472      }
0473  }
0474   
0475  /**
0476  * Converts an NCR to a UTF-8 char
0477  *
0478  * @param    int        $cp    UNICODE code point
0479  * @return    string        UTF-8 char
0480  */
0481  function utf8_chr($cp)
0482  {
0483      if ($cp > 0xFFFF)
0484      {
0485          return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
0486      }
0487      else if ($cp > 0x7FF)
0488      {
0489          return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
0490      }
0491      else if ($cp > 0x7F)
0492      {
0493          return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F));
0494      }
0495      else
0496      {
0497          return chr($cp);
0498      }
0499  }
0500   
0501  /**
0502  * Convert Numeric Character References to UTF-8 chars
0503  *
0504  * Notes:
0505  *    - we do not convert NCRs recursively, if you pass &#38;#38; it will return &#38;
0506  *    - we DO NOT check for the existence of the Unicode characters, therefore an entity may be converted to an inexistent codepoint
0507  *
0508  * @param    string    $text        String to convert, encoded in UTF-8 (no normal form required)
0509  * @return    string                UTF-8 string where NCRs have been replaced with the actual chars
0510  */
0511  function utf8_decode_ncr($text)
0512  {
0513      return preg_replace_callback('/&#([0-9]{1,6}|x[0-9A-F]{1,5});/i', 'utf8_decode_ncr_callback', $text);
0514  }
0515   
0516  /**
0517  * Callback used in decode_ncr()
0518  *
0519  * Takes a NCR (in decimal or hexadecimal) and returns a UTF-8 char. Attention, $m is an array.
0520  * It will ignore most of invalid NCRs, but not all!
0521  *
0522  * @param    array    $m            0-based numerically indexed array passed by preg_replace_callback()
0523  * @return    string                UTF-8 char
0524  */
0525  function utf8_decode_ncr_callback($m)
0526  {
0527      $cp = (strncasecmp($m[1], 'x', 1)) ? $m[1] : hexdec(substr($m[1], 1));
0528   
0529      return utf8_chr($cp);
0530  }
0531   
0532  /**
0533  * Case folds a unicode string as per Unicode 5.0, section 3.13
0534  *
0535  * @param    string    $text    text to be case folded
0536  * @param    string    $option    determines how we will fold the cases
0537  * @return    string            case folded text
0538  */
0539  function utf8_case_fold($text, $option = 'full')
0540  {
0541      static $uniarray = array();
0542      global $phpbb_root_path, $phpEx;
0543   
0544      // common is always set
0545      if (!isset($uniarray['c']))
0546      {
0547          $uniarray['c'] = include($phpbb_root_path . 'includes/utf/data/case_fold_c.' . $phpEx);
0548      }
0549   
0550      // only set full if we need to
0551      if ($option === 'full' && !isset($uniarray['f']))
0552      {
0553          $uniarray['f'] = include($phpbb_root_path . 'includes/utf/data/case_fold_f.' . $phpEx);
0554      }
0555   
0556      // only set simple if we need to
0557      if ($option !== 'full' && !isset($uniarray['s']))
0558      {
0559          $uniarray['s'] = include($phpbb_root_path . 'includes/utf/data/case_fold_s.' . $phpEx);
0560      }
0561   
0562      // common is always replaced
0563      $text = strtr($text, $uniarray['c']);
0564   
0565      if ($option === 'full')
0566      {
0567          // full replaces a character with multiple characters
0568          $text = strtr($text, $uniarray['f']);
0569      }
0570      else
0571      {
0572          // simple replaces a character with another character
0573          $text = strtr($text, $uniarray['s']);
0574      }
0575   
0576      return $text;
0577  }
0578   
0579  /**
0580  * Takes the input and does a "special" case fold. It does minor normalization
0581  * and returns NFKC compatable text
0582  *
0583  * @param    string    $text    text to be case folded
0584  * @param    string    $option    determines how we will fold the cases
0585  * @return    string            case folded text
0586  */
0587  function utf8_case_fold_nfkc($text, $option = 'full')
0588  {
0589      static $fc_nfkc_closure = array(
0590          "\xCD\xBA"    => "\x20\xCE\xB9",
0591          "\xCF\x92"    => "\xCF\x85",
0592          "\xCF\x93"    => "\xCF\x8D",
0593          "\xCF\x94"    => "\xCF\x8B",
0594          "\xCF\xB2"    => "\xCF\x83",
0595          "\xCF\xB9"    => "\xCF\x83",
0596          "\xE1\xB4\xAC"    => "\x61",
0597          "\xE1\xB4\xAD"    => "\xC3\xA6",
0598          "\xE1\xB4\xAE"    => "\x62",
0599          "\xE1\xB4\xB0"    => "\x64",
0600          "\xE1\xB4\xB1"    => "\x65",
0601          "\xE1\xB4\xB2"    => "\xC7\x9D",
0602          "\xE1\xB4\xB3"    => "\x67",
0603          "\xE1\xB4\xB4"    => "\x68",
0604          "\xE1\xB4\xB5"    => "\x69",
0605          "\xE1\xB4\xB6"    => "\x6A",
0606          "\xE1\xB4\xB7"    => "\x6B",
0607          "\xE1\xB4\xB8"    => "\x6C",
0608          "\xE1\xB4\xB9"    => "\x6D",
0609          "\xE1\xB4\xBA"    => "\x6E",
0610          "\xE1\xB4\xBC"    => "\x6F",
0611          "\xE1\xB4\xBD"    => "\xC8\xA3",
0612          "\xE1\xB4\xBE"    => "\x70",
0613          "\xE1\xB4\xBF"    => "\x72",
0614          "\xE1\xB5\x80"    => "\x74",
0615          "\xE1\xB5\x81"    => "\x75",
0616          "\xE1\xB5\x82"    => "\x77",
0617          "\xE2\x82\xA8"    => "\x72\x73",
0618          "\xE2\x84\x82"    => "\x63",
0619          "\xE2\x84\x83"    => "\xC2\xB0\x63",
0620          "\xE2\x84\x87"    => "\xC9\x9B",
0621          "\xE2\x84\x89"    => "\xC2\xB0\x66",
0622          "\xE2\x84\x8B"    => "\x68",
0623          "\xE2\x84\x8C"    => "\x68",
0624          "\xE2\x84\x8D"    => "\x68",
0625          "\xE2\x84\x90"    => "\x69",
0626          "\xE2\x84\x91"    => "\x69",
0627          "\xE2\x84\x92"    => "\x6C",
0628          "\xE2\x84\x95"    => "\x6E",
0629          "\xE2\x84\x96"    => "\x6E\x6F",
0630          "\xE2\x84\x99"    => "\x70",
0631          "\xE2\x84\x9A"    => "\x71",
0632          "\xE2\x84\x9B"    => "\x72",
0633          "\xE2\x84\x9C"    => "\x72",
0634          "\xE2\x84\x9D"    => "\x72",
0635          "\xE2\x84\xA0"    => "\x73\x6D",
0636          "\xE2\x84\xA1"    => "\x74\x65\x6C",
0637          "\xE2\x84\xA2"    => "\x74\x6D",
0638          "\xE2\x84\xA4"    => "\x7A",
0639          "\xE2\x84\xA8"    => "\x7A",
0640          "\xE2\x84\xAC"    => "\x62",
0641          "\xE2\x84\xAD"    => "\x63",
0642          "\xE2\x84\xB0"    => "\x65",
0643          "\xE2\x84\xB1"    => "\x66",
0644          "\xE2\x84\xB3"    => "\x6D",
0645          "\xE2\x84\xBB"    => "\x66\x61\x78",
0646          "\xE2\x84\xBE"    => "\xCE\xB3",
0647          "\xE2\x84\xBF"    => "\xCF\x80",
0648          "\xE2\x85\x85"    => "\x64",
0649          "\xE3\x89\x90"    => "\x70\x74\x65",
0650          "\xE3\x8B\x8C"    => "\x68\x67",
0651          "\xE3\x8B\x8E"    => "\x65\x76",
0652          "\xE3\x8B\x8F"    => "\x6C\x74\x64",
0653          "\xE3\x8D\xB1"    => "\x68\x70\x61",
0654          "\xE3\x8D\xB3"    => "\x61\x75",
0655          "\xE3\x8D\xB5"    => "\x6F\x76",
0656          "\xE3\x8D\xBA"    => "\x69\x75",
0657          "\xE3\x8E\x80"    => "\x70\x61",
0658          "\xE3\x8E\x81"    => "\x6E\x61",
0659          "\xE3\x8E\x82"    => "\xCE\xBC\x61",
0660          "\xE3\x8E\x83"    => "\x6D\x61",
0661          "\xE3\x8E\x84"    => "\x6B\x61",
0662          "\xE3\x8E\x85"    => "\x6B\x62",
0663          "\xE3\x8E\x86"    => "\x6D\x62",
0664          "\xE3\x8E\x87"    => "\x67\x62",
0665          "\xE3\x8E\x8A"    => "\x70\x66",
0666          "\xE3\x8E\x8B"    => "\x6E\x66",
0667          "\xE3\x8E\x8C"    => "\xCE\xBC\x66",
0668          "\xE3\x8E\x90"    => "\x68\x7A",
0669          "\xE3\x8E\x91"    => "\x6B\x68\x7A",
0670          "\xE3\x8E\x92"    => "\x6D\x68\x7A",
0671          "\xE3\x8E\x93"    => "\x67\x68\x7A",
0672          "\xE3\x8E\x94"    => "\x74\x68\x7A",
0673          "\xE3\x8E\xA9"    => "\x70\x61",
0674          "\xE3\x8E\xAA"    => "\x6B\x70\x61",
0675          "\xE3\x8E\xAB"    => "\x6D\x70\x61",
0676          "\xE3\x8E\xAC"    => "\x67\x70\x61",
0677          "\xE3\x8E\xB4"    => "\x70\x76",
0678          "\xE3\x8E\xB5"    => "\x6E\x76",
0679          "\xE3\x8E\xB6"    => "\xCE\xBC\x76",
0680          "\xE3\x8E\xB7"    => "\x6D\x76",
0681          "\xE3\x8E\xB8"    => "\x6B\x76",
0682          "\xE3\x8E\xB9"    => "\x6D\x76",
0683          "\xE3\x8E\xBA"    => "\x70\x77",
0684          "\xE3\x8E\xBB"    => "\x6E\x77",
0685          "\xE3\x8E\xBC"    => "\xCE\xBC\x77",
0686          "\xE3\x8E\xBD"    => "\x6D\x77",
0687          "\xE3\x8E\xBE"    => "\x6B\x77",
0688          "\xE3\x8E\xBF"    => "\x6D\x77",
0689          "\xE3\x8F\x80"    => "\x6B\xCF\x89",
0690          "\xE3\x8F\x81"    => "\x6D\xCF\x89",
0691          "\xE3\x8F\x83"    => "\x62\x71",
0692          "\xE3\x8F\x86"    => "\x63\xE2\x88\x95\x6B\x67",
0693          "\xE3\x8F\x87"    => "\x63\x6F\x2E",
0694          "\xE3\x8F\x88"    => "\x64\x62",
0695          "\xE3\x8F\x89"    => "\x67\x79",
0696          "\xE3\x8F\x8B"    => "\x68\x70",
0697          "\xE3\x8F\x8D"    => "\x6B\x6B",
0698          "\xE3\x8F\x8E"    => "\x6B\x6D",
0699          "\xE3\x8F\x97"    => "\x70\x68",
0700          "\xE3\x8F\x99"    => "\x70\x70\x6D",
0701          "\xE3\x8F\x9A"    => "\x70\x72",
0702          "\xE3\x8F\x9C"    => "\x73\x76",
0703          "\xE3\x8F\x9D"    => "\x77\x62",
0704          "\xE3\x8F\x9E"    => "\x76\xE2\x88\x95\x6D",
0705          "\xE3\x8F\x9F"    => "\x61\xE2\x88\x95\x6D",
0706          "\xF0\x9D\x90\x80"    => "\x61",
0707          "\xF0\x9D\x90\x81"    => "\x62",
0708          "\xF0\x9D\x90\x82"    => "\x63",
0709          "\xF0\x9D\x90\x83"    => "\x64",
0710          "\xF0\x9D\x90\x84"    => "\x65",
0711          "\xF0\x9D\x90\x85"    => "\x66",
0712          "\xF0\x9D\x90\x86"    => "\x67",
0713          "\xF0\x9D\x90\x87"    => "\x68",
0714          "\xF0\x9D\x90\x88"    => "\x69",
0715          "\xF0\x9D\x90\x89"    => "\x6A",
0716          "\xF0\x9D\x90\x8A"    => "\x6B",
0717          "\xF0\x9D\x90\x8B"    => "\x6C",
0718          "\xF0\x9D\x90\x8C"    => "\x6D",
0719          "\xF0\x9D\x90\x8D"    => "\x6E",
0720          "\xF0\x9D\x90\x8E"    => "\x6F",
0721          "\xF0\x9D\x90\x8F"    => "\x70",
0722          "\xF0\x9D\x90\x90"    => "\x71",
0723          "\xF0\x9D\x90\x91"    => "\x72",
0724          "\xF0\x9D\x90\x92"    => "\x73",
0725          "\xF0\x9D\x90\x93"    => "\x74",
0726          "\xF0\x9D\x90\x94"    => "\x75",
0727          "\xF0\x9D\x90\x95"    => "\x76",
0728          "\xF0\x9D\x90\x96"    => "\x77",
0729          "\xF0\x9D\x90\x97"    => "\x78",
0730          "\xF0\x9D\x90\x98"    => "\x79",
0731          "\xF0\x9D\x90\x99"    => "\x7A",
0732          "\xF0\x9D\x90\xB4"    => "\x61",
0733          "\xF0\x9D\x90\xB5"    => "\x62",
0734          "\xF0\x9D\x90\xB6"    => "\x63",
0735          "\xF0\x9D\x90\xB7"    => "\x64",
0736          "\xF0\x9D\x90\xB8"    => "\x65",
0737          "\xF0\x9D\x90\xB9"    => "\x66",
0738          "\xF0\x9D\x90\xBA"    => "\x67",
0739          "\xF0\x9D\x90\xBB"    => "\x68",
0740          "\xF0\x9D\x90\xBC"    => "\x69",
0741          "\xF0\x9D\x90\xBD"    => "\x6A",
0742          "\xF0\x9D\x90\xBE"    => "\x6B",
0743          "\xF0\x9D\x90\xBF"    => "\x6C",
0744          "\xF0\x9D\x91\x80"    => "\x6D",
0745          "\xF0\x9D\x91\x81"    => "\x6E",
0746          "\xF0\x9D\x91\x82"    => "\x6F",
0747          "\xF0\x9D\x91\x83"    => "\x70",
0748          "\xF0\x9D\x91\x84"    => "\x71",
0749          "\xF0\x9D\x91\x85"    => "\x72",
0750          "\xF0\x9D\x91\x86"    => "\x73",
0751          "\xF0\x9D\x91\x87"    => "\x74",
0752          "\xF0\x9D\x91\x88"    => "\x75",
0753          "\xF0\x9D\x91\x89"    => "\x76",
0754          "\xF0\x9D\x91\x8A"    => "\x77",
0755          "\xF0\x9D\x91\x8B"    => "\x78",
0756          "\xF0\x9D\x91\x8C"    => "\x79",
0757          "\xF0\x9D\x91\x8D"    => "\x7A",
0758          "\xF0\x9D\x91\xA8"    => "\x61",
0759          "\xF0\x9D\x91\xA9"    => "\x62",
0760          "\xF0\x9D\x91\xAA"    => "\x63",
0761          "\xF0\x9D\x91\xAB"    => "\x64",
0762          "\xF0\x9D\x91\xAC"    => "\x65",
0763          "\xF0\x9D\x91\xAD"    => "\x66",
0764          "\xF0\x9D\x91\xAE"    => "\x67",
0765          "\xF0\x9D\x91\xAF"    => "\x68",
0766          "\xF0\x9D\x91\xB0"    => "\x69",
0767          "\xF0\x9D\x91\xB1"    => "\x6A",
0768          "\xF0\x9D\x91\xB2"    => "\x6B",
0769          "\xF0\x9D\x91\xB3"    => "\x6C",
0770          "\xF0\x9D\x91\xB4"    => "\x6D",
0771          "\xF0\x9D\x91\xB5"    => "\x6E",
0772          "\xF0\x9D\x91\xB6"    => "\x6F",
0773          "\xF0\x9D\x91\xB7"    => "\x70",
0774          "\xF0\x9D\x91\xB8"    => "\x71",
0775          "\xF0\x9D\x91\xB9"    => "\x72",
0776          "\xF0\x9D\x91\xBA"    => "\x73",
0777          "\xF0\x9D\x91\xBB"    => "\x74",
0778          "\xF0\x9D\x91\xBC"    => "\x75",
0779          "\xF0\x9D\x91\xBD"    => "\x76",
0780          "\xF0\x9D\x91\xBE"    => "\x77",
0781          "\xF0\x9D\x91\xBF"    => "\x78",
0782          "\xF0\x9D\x92\x80"    => "\x79",
0783          "\xF0\x9D\x92\x81"    => "\x7A",
0784          "\xF0\x9D\x92\x9C"    => "\x61",
0785          "\xF0\x9D\x92\x9E"    => "\x63",
0786          "\xF0\x9D\x92\x9F"    => "\x64",
0787          "\xF0\x9D\x92\xA2"    => "\x67",
0788          "\xF0\x9D\x92\xA5"    => "\x6A",
0789          "\xF0\x9D\x92\xA6"    => "\x6B",
0790          "\xF0\x9D\x92\xA9"    => "\x6E",
0791          "\xF0\x9D\x92\xAA"    => "\x6F",
0792          "\xF0\x9D\x92\xAB"    => "\x70",
0793          "\xF0\x9D\x92\xAC"    => "\x71",
0794          "\xF0\x9D\x92\xAE"    => "\x73",
0795          "\xF0\x9D\x92\xAF"    => "\x74",
0796          "\xF0\x9D\x92\xB0"    => "\x75",
0797          "\xF0\x9D\x92\xB1"    => "\x76",
0798          "\xF0\x9D\x92\xB2"    => "\x77",
0799          "\xF0\x9D\x92\xB3"    => "\x78",
0800          "\xF0\x9D\x92\xB4"    => "\x79",
0801          "\xF0\x9D\x92\xB5"    => "\x7A",
0802          "\xF0\x9D\x93\x90"    => "\x61",
0803          "\xF0\x9D\x93\x91"    => "\x62",
0804          "\xF0\x9D\x93\x92"    => "\x63",
0805          "\xF0\x9D\x93\x93"    => "\x64",
0806          "\xF0\x9D\x93\x94"    => "\x65",
0807          "\xF0\x9D\x93\x95"    => "\x66",
0808          "\xF0\x9D\x93\x96"    => "\x67",
0809          "\xF0\x9D\x93\x97"    => "\x68",
0810          "\xF0\x9D\x93\x98"    => "\x69",
0811          "\xF0\x9D\x93\x99"    => "\x6A",
0812          "\xF0\x9D\x93\x9A"    => "\x6B",
0813          "\xF0\x9D\x93\x9B"    => "\x6C",
0814          "\xF0\x9D\x93\x9C"    => "\x6D",
0815          "\xF0\x9D\x93\x9D"    => "\x6E",
0816          "\xF0\x9D\x93\x9E"    => "\x6F",
0817          "\xF0\x9D\x93\x9F"    => "\x70",
0818          "\xF0\x9D\x93\xA0"    => "\x71",
0819          "\xF0\x9D\x93\xA1"    => "\x72",
0820          "\xF0\x9D\x93\xA2"    => "\x73",
0821          "\xF0\x9D\x93\xA3"    => "\x74",
0822          "\xF0\x9D\x93\xA4"    => "\x75",
0823          "\xF0\x9D\x93\xA5"    => "\x76",
0824          "\xF0\x9D\x93\xA6"    => "\x77",
0825          "\xF0\x9D\x93\xA7"    => "\x78",
0826          "\xF0\x9D\x93\xA8"    => "\x79",
0827          "\xF0\x9D\x93\xA9"    => "\x7A",
0828          "\xF0\x9D\x94\x84"    => "\x61",
0829          "\xF0\x9D\x94\x85"    => "\x62",
0830          "\xF0\x9D\x94\x87"    => "\x64",
0831          "\xF0\x9D\x94\x88"    => "\x65",
0832          "\xF0\x9D\x94\x89"    => "\x66",
0833          "\xF0\x9D\x94\x8A"    => "\x67",
0834          "\xF0\x9D\x94\x8D"    => "\x6A",
0835          "\xF0\x9D\x94\x8E"    => "\x6B",
0836          "\xF0\x9D\x94\x8F"    => "\x6C",
0837          "\xF0\x9D\x94\x90"    => "\x6D",
0838          "\xF0\x9D\x94\x91"    => "\x6E",
0839          "\xF0\x9D\x94\x92"    => "\x6F",
0840          "\xF0\x9D\x94\x93"    => "\x70",
0841          "\xF0\x9D\x94\x94"    => "\x71",
0842          "\xF0\x9D\x94\x96"    => "\x73",
0843          "\xF0\x9D\x94\x97"    => "\x74",
0844          "\xF0\x9D\x94\x98"    => "\x75",
0845          "\xF0\x9D\x94\x99"    => "\x76",
0846          "\xF0\x9D\x94\x9A"    => "\x77",
0847          "\xF0\x9D\x94\x9B"    => "\x78",
0848          "\xF0\x9D\x94\x9C"    => "\x79",
0849          "\xF0\x9D\x94\xB8"    => "\x61",
0850          "\xF0\x9D\x94\xB9"    => "\x62",
0851          "\xF0\x9D\x94\xBB"    => "\x64",
0852          "\xF0\x9D\x94\xBC"    => "\x65",
0853          "\xF0\x9D\x94\xBD"    => "\x66",
0854          "\xF0\x9D\x94\xBE"    => "\x67",
0855          "\xF0\x9D\x95\x80"    => "\x69",
0856          "\xF0\x9D\x95\x81"    => "\x6A",
0857          "\xF0\x9D\x95\x82"    => "\x6B",
0858          "\xF0\x9D\x95\x83"    => "\x6C",
0859          "\xF0\x9D\x95\x84"    => "\x6D",
0860          "\xF0\x9D\x95\x86"    => "\x6F",
0861          "\xF0\x9D\x95\x8A"    => "\x73",
0862          "\xF0\x9D\x95\x8B"    => "\x74",
0863          "\xF0\x9D\x95\x8C"    => "\x75",
0864          "\xF0\x9D\x95\x8D"    => "\x76",
0865          "\xF0\x9D\x95\x8E"    => "\x77",
0866          "\xF0\x9D\x95\x8F"    => "\x78",
0867          "\xF0\x9D\x95\x90"    => "\x79",
0868          "\xF0\x9D\x95\xAC"    => "\x61",
0869          "\xF0\x9D\x95\xAD"    => "\x62",
0870          "\xF0\x9D\x95\xAE"    => "\x63",
0871          "\xF0\x9D\x95\xAF"    => "\x64",
0872          "\xF0\x9D\x95\xB0"    => "\x65",
0873          "\xF0\x9D\x95\xB1"    => "\x66",
0874          "\xF0\x9D\x95\xB2"    => "\x67",
0875          "\xF0\x9D\x95\xB3"    => "\x68",
0876          "\xF0\x9D\x95\xB4"    => "\x69",
0877          "\xF0\x9D\x95\xB5"    => "\x6A",
0878          "\xF0\x9D\x95\xB6"    => "\x6B",
0879          "\xF0\x9D\x95\xB7"    => "\x6C",
0880          "\xF0\x9D\x95\xB8"    => "\x6D",
0881          "\xF0\x9D\x95\xB9"    => "\x6E",
0882          "\xF0\x9D\x95\xBA"    => "\x6F",
0883          "\xF0\x9D\x95\xBB"    => "\x70",
0884          "\xF0\x9D\x95\xBC"    => "\x71",
0885          "\xF0\x9D\x95\xBD"    => "\x72",
0886          "\xF0\x9D\x95\xBE"    => "\x73",
0887          "\xF0\x9D\x95\xBF"    => "\x74",
0888          "\xF0\x9D\x96\x80"    => "\x75",
0889          "\xF0\x9D\x96\x81"    => "\x76",
0890          "\xF0\x9D\x96\x82"    => "\x77",
0891          "\xF0\x9D\x96\x83"    => "\x78",
0892          "\xF0\x9D\x96\x84"    => "\x79",
0893          "\xF0\x9D\x96\x85"    => "\x7A",
0894          "\xF0\x9D\x96\xA0"    => "\x61",
0895          "\xF0\x9D\x96\xA1"    => "\x62",
0896          "\xF0\x9D\x96\xA2"    => "\x63",
0897          "\xF0\x9D\x96\xA3"    => "\x64",
0898          "\xF0\x9D\x96\xA4"    => "\x65",
0899          "\xF0\x9D\x96\xA5"    => "\x66",
0900          "\xF0\x9D\x96\xA6"    => "\x67",
0901          "\xF0\x9D\x96\xA7"    => "\x68",
0902          "\xF0\x9D\x96\xA8"    => "\x69",
0903          "\xF0\x9D\x96\xA9"    => "\x6A",
0904          "\xF0\x9D\x96\xAA"    => "\x6B",
0905          "\xF0\x9D\x96\xAB"    => "\x6C",
0906          "\xF0\x9D\x96\xAC"    => "\x6D",
0907          "\xF0\x9D\x96\xAD"    => "\x6E",
0908          "\xF0\x9D\x96\xAE"    => "\x6F",
0909          "\xF0\x9D\x96\xAF"    => "\x70",
0910          "\xF0\x9D\x96\xB0"    => "\x71",
0911          "\xF0\x9D\x96\xB1"    => "\x72",
0912          "\xF0\x9D\x96\xB2"    => "\x73",
0913          "\xF0\x9D\x96\xB3"    => "\x74",
0914          "\xF0\x9D\x96\xB4"    => "\x75",
0915          "\xF0\x9D\x96\xB5"    => "\x76",
0916          "\xF0\x9D\x96\xB6"    => "\x77",
0917          "\xF0\x9D\x96\xB7"    => "\x78",
0918          "\xF0\x9D\x96\xB8"    => "\x79",
0919          "\xF0\x9D\x96\xB9"    => "\x7A",
0920          "\xF0\x9D\x97\x94"    => "\x61",
0921          "\xF0\x9D\x97\x95"    => "\x62",
0922          "\xF0\x9D\x97\x96"    => "\x63",
0923          "\xF0\x9D\x97\x97"    => "\x64",
0924          "\xF0\x9D\x97\x98"    => "\x65",
0925          "\xF0\x9D\x97\x99"    => "\x66",
0926          "\xF0\x9D\x97\x9A"    => "\x67",
0927          "\xF0\x9D\x97\x9B"    => "\x68",
0928          "\xF0\x9D\x97\x9C"    => "\x69",
0929          "\xF0\x9D\x97\x9D"    => "\x6A",
0930          "\xF0\x9D\x97\x9E"    => "\x6B",
0931          "\xF0\x9D\x97\x9F"    => "\x6C",
0932          "\xF0\x9D\x97\xA0"    => "\x6D",
0933          "\xF0\x9D\x97\xA1"    => "\x6E",
0934          "\xF0\x9D\x97\xA2"    => "\x6F",
0935          "\xF0\x9D\x97\xA3"    => "\x70",
0936          "\xF0\x9D\x97\xA4"    => "\x71",
0937          "\xF0\x9D\x97\xA5"    => "\x72",
0938          "\xF0\x9D\x97\xA6"    => "\x73",
0939          "\xF0\x9D\x97\xA7"    => "\x74",
0940          "\xF0\x9D\x97\xA8"    => "\x75",
0941          "\xF0\x9D\x97\xA9"    => "\x76",
0942          "\xF0\x9D\x97\xAA"    => "\x77",
0943          "\xF0\x9D\x97\xAB"    => "\x78",
0944          "\xF0\x9D\x97\xAC"    => "\x79",
0945          "\xF0\x9D\x97\xAD"    => "\x7A",
0946          "\xF0\x9D\x98\x88"    => "\x61",
0947          "\xF0\x9D\x98\x89"    => "\x62",
0948          "\xF0\x9D\x98\x8A"    => "\x63",
0949          "\xF0\x9D\x98\x8B"    => "\x64",
0950          "\xF0\x9D\x98\x8C"    => "\x65",
0951          "\xF0\x9D\x98\x8D"    => "\x66",
0952          "\xF0\x9D\x98\x8E"    => "\x67",
0953          "\xF0\x9D\x98\x8F"    => "\x68",
0954          "\xF0\x9D\x98\x90"    => "\x69",
0955          "\xF0\x9D\x98\x91"    => "\x6A",
0956          "\xF0\x9D\x98\x92"    => "\x6B",
0957          "\xF0\x9D\x98\x93"    => "\x6C",
0958          "\xF0\x9D\x98\x94"    => "\x6D",
0959          "\xF0\x9D\x98\x95"    => "\x6E",
0960          "\xF0\x9D\x98\x96"    => "\x6F",
0961          "\xF0\x9D\x98\x97"    => "\x70",
0962          "\xF0\x9D\x98\x98"    => "\x71",
0963          "\xF0\x9D\x98\x99"    => "\x72",
0964          "\xF0\x9D\x98\x9A"    => "\x73",
0965          "\xF0\x9D\x98\x9B"    => "\x74",
0966          "\xF0\x9D\x98\x9C"    => "\x75",
0967          "\xF0\x9D\x98\x9D"    => "\x76",
0968          "\xF0\x9D\x98\x9E"    => "\x77",
0969          "\xF0\x9D\x98\x9F"    => "\x78",
0970          "\xF0\x9D\x98\xA0"    => "\x79",
0971          "\xF0\x9D\x98\xA1"    => "\x7A",
0972          "\xF0\x9D\x98\xBC"    => "\x61",
0973          "\xF0\x9D\x98\xBD"    => "\x62",
0974          "\xF0\x9D\x98\xBE"    => "\x63",
0975          "\xF0\x9D\x98\xBF"    => "\x64",
0976          "\xF0\x9D\x99\x80"    => "\x65",
0977          "\xF0\x9D\x99\x81"    => "\x66",
0978          "\xF0\x9D\x99\x82"    => "\x67",
0979          "\xF0\x9D\x99\x83"    => "\x68",
0980          "\xF0\x9D\x99\x84"    => "\x69",
0981          "\xF0\x9D\x99\x85"    => "\x6A",
0982          "\xF0\x9D\x99\x86"    => "\x6B",
0983          "\xF0\x9D\x99\x87"    => "\x6C",
0984          "\xF0\x9D\x99\x88"    => "\x6D",
0985          "\xF0\x9D\x99\x89"    => "\x6E",
0986          "\xF0\x9D\x99\x8A"    => "\x6F",
0987          "\xF0\x9D\x99\x8B"    => "\x70",
0988          "\xF0\x9D\x99\x8C"    => "\x71",
0989          "\xF0\x9D\x99\x8D"    => "\x72",
0990          "\xF0\x9D\x99\x8E"    => "\x73",
0991          "\xF0\x9D\x99\x8F"    => "\x74",
0992          "\xF0\x9D\x99\x90"    => "\x75",
0993          "\xF0\x9D\x99\x91"    => "\x76",
0994          "\xF0\x9D\x99\x92"    => "\x77",
0995          "\xF0\x9D\x99\x93"    => "\x78",
0996          "\xF0\x9D\x99\x94"    => "\x79",
0997          "\xF0\x9D\x99\x95"    => "\x7A",
0998          "\xF0\x9D\x99\xB0"    => "\x61",
0999          "\xF0\x9D\x99\xB1"    => "\x62",
1000          "\xF0\x9D\x99\xB2"    => "\x63",
1001          "\xF0\x9D\x99\xB3"    => "\x64",
1002          "\xF0\x9D\x99\xB4"    => "\x65",
1003          "\xF0\x9D\x99\xB5"    => "\x66",
1004          "\xF0\x9D\x99\xB6"    => "\x67",
1005          "\xF0\x9D\x99\xB7"    => "\x68",
1006          "\xF0\x9D\x99\xB8"    => "\x69",
1007          "\xF0\x9D\x99\xB9"    => "\x6A",
1008          "\xF0\x9D\x99\xBA"    => "\x6B",
1009          "\xF0\x9D\x99\xBB"    => "\x6C",
1010          "\xF0\x9D\x99\xBC"    => "\x6D",
1011          "\xF0\x9D\x99\xBD"    => "\x6E",
1012          "\xF0\x9D\x99\xBE"    => "\x6F",
1013          "\xF0\x9D\x99\xBF"    => "\x70",
1014          "\xF0\x9D\x9A\x80"    => "\x71",
1015          "\xF0\x9D\x9A\x81"    => "\x72",
1016          "\xF0\x9D\x9A\x82"    => "\x73",
1017          "\xF0\x9D\x9A\x83"    => "\x74",
1018          "\xF0\x9D\x9A\x84"    => "\x75",
1019          "\xF0\x9D\x9A\x85"    => "\x76",
1020          "\xF0\x9D\x9A\x86"    => "\x77",
1021          "\xF0\x9D\x9A\x87"    => "\x78",
1022          "\xF0\x9D\x9A\x88"    => "\x79",
1023          "\xF0\x9D\x9A\x89"    => "\x7A",
1024          "\xF0\x9D\x9A\xA8"    => "\xCE\xB1",
1025          "\xF0\x9D\x9A\xA9"    => "\xCE\xB2",
1026          "\xF0\x9D\x9A\xAA"    => "\xCE\xB3",
1027          "\xF0\x9D\x9A\xAB"    => "\xCE\xB4",
1028          "\xF0\x9D\x9A\xAC"    => "\xCE\xB5",
1029          "\xF0\x9D\x9A\xAD"    => "\xCE\xB6",
1030          "\xF0\x9D\x9A\xAE"    => "\xCE\xB7",
1031          "\xF0\x9D\x9A\xAF"    => "\xCE\xB8",
1032          "\xF0\x9D\x9A\xB0"    => "\xCE\xB9",
1033          "\xF0\x9D\x9A\xB1"    => "\xCE\xBA",
1034          "\xF0\x9D\x9A\xB2"    => "\xCE\xBB",
1035          "\xF0\x9D\x9A\xB3"    => "\xCE\xBC",
1036          "\xF0\x9D\x9A\xB4"    => "\xCE\xBD",
1037          "\xF0\x9D\x9A\xB5"    => "\xCE\xBE",
1038          "\xF0\x9D\x9A\xB6"    => "\xCE\xBF",
1039          "\xF0\x9D\x9A\xB7"    => "\xCF\x80",
1040          "\xF0\x9D\x9A\xB8"    => "\xCF\x81",
1041          "\xF0\x9D\x9A\xB9"    => "\xCE\xB8",
1042          "\xF0\x9D\x9A\xBA"    => "\xCF\x83",
1043          "\xF0\x9D\x9A\xBB"    => "\xCF\x84",
1044          "\xF0\x9D\x9A\xBC"    => "\xCF\x85",
1045          "\xF0\x9D\x9A\xBD"    => "\xCF\x86",
1046          "\xF0\x9D\x9A\xBE"    => "\xCF\x87",
1047          "\xF0\x9D\x9A\xBF"    => "\xCF\x88",
1048          "\xF0\x9D\x9B\x80"    => "\xCF\x89",
1049          "\xF0\x9D\x9B\x93"    => "\xCF\x83",
1050          "\xF0\x9D\x9B\xA2"    => "\xCE\xB1",
1051          "\xF0\x9D\x9B\xA3"    => "\xCE\xB2",
1052          "\xF0\x9D\x9B\xA4"    => "\xCE\xB3",
1053          "\xF0\x9D\x9B\xA5"    => "\xCE\xB4",
1054          "\xF0\x9D\x9B\xA6"    => "\xCE\xB5",
1055          "\xF0\x9D\x9B\xA7"    => "\xCE\xB6",
1056          "\xF0\x9D\x9B\xA8"    => "\xCE\xB7",
1057          "\xF0\x9D\x9B\xA9"    => "\xCE\xB8",
1058          "\xF0\x9D\x9B\xAA"    => "\xCE\xB9",
1059          "\xF0\x9D\x9B\xAB"    => "\xCE\xBA",
1060          "\xF0\x9D\x9B\xAC"    => "\xCE\xBB",
1061          "\xF0\x9D\x9B\xAD"    => "\xCE\xBC",
1062          "\xF0\x9D\x9B\xAE"    => "\xCE\xBD",
1063          "\xF0\x9D\x9B\xAF"    => "\xCE\xBE",
1064          "\xF0\x9D\x9B\xB0"    => "\xCE\xBF",
1065          "\xF0\x9D\x9B\xB1"    => "\xCF\x80",
1066          "\xF0\x9D\x9B\xB2"    => "\xCF\x81",
1067          "\xF0\x9D\x9B\xB3"    => "\xCE\xB8",
1068          "\xF0\x9D\x9B\xB4"    => "\xCF\x83",
1069          "\xF0\x9D\x9B\xB5"    => "\xCF\x84",
1070          "\xF0\x9D\x9B\xB6"    => "\xCF\x85",
1071          "\xF0\x9D\x9B\xB7"    => "\xCF\x86",
1072          "\xF0\x9D\x9B\xB8"    => "\xCF\x87",
1073          "\xF0\x9D\x9B\xB9"    => "\xCF\x88",
1074          "\xF0\x9D\x9B\xBA"    => "\xCF\x89",
1075          "\xF0\x9D\x9C\x8D"    => "\xCF\x83",
1076          "\xF0\x9D\x9C\x9C"    => "\xCE\xB1",
1077          "\xF0\x9D\x9C\x9D"    => "\xCE\xB2",
1078          "\xF0\x9D\x9C\x9E"    => "\xCE\xB3",
1079          "\xF0\x9D\x9C\x9F"    => "\xCE\xB4",
1080          "\xF0\x9D\x9C\xA0"    => "\xCE\xB5",
1081          "\xF0\x9D\x9C\xA1"    => "\xCE\xB6",
1082          "\xF0\x9D\x9C\xA2"    => "\xCE\xB7",
1083          "\xF0\x9D\x9C\xA3"    => "\xCE\xB8",
1084          "\xF0\x9D\x9C\xA4"    => "\xCE\xB9",
1085          "\xF0\x9D\x9C\xA5"    => "\xCE\xBA",
1086          "\xF0\x9D\x9C\xA6"    => "\xCE\xBB",
1087          "\xF0\x9D\x9C\xA7"    => "\xCE\xBC",
1088          "\xF0\x9D\x9C\xA8"    => "\xCE\xBD",
1089          "\xF0\x9D\x9C\xA9"    => "\xCE\xBE",
1090          "\xF0\x9D\x9C\xAA"    => "\xCE\xBF",
1091          "\xF0\x9D\x9C\xAB"    => "\xCF\x80",
1092          "\xF0\x9D\x9C\xAC"    => "\xCF\x81",
1093          "\xF0\x9D\x9C\xAD"    => "\xCE\xB8",
1094          "\xF0\x9D\x9C\xAE"    => "\xCF\x83",
1095          "\xF0\x9D\x9C\xAF"    => "\xCF\x84",
1096          "\xF0\x9D\x9C\xB0"    => "\xCF\x85",
1097          "\xF0\x9D\x9C\xB1"    => "\xCF\x86",
1098          "\xF0\x9D\x9C\xB2"    => "\xCF\x87",
1099          "\xF0\x9D\x9C\xB3"    => "\xCF\x88",
1100          "\xF0\x9D\x9C\xB4"    => "\xCF\x89",
1101          "\xF0\x9D\x9D\x87"    => "\xCF\x83",
1102          "\xF0\x9D\x9D\x96"    => "\xCE\xB1",
1103          "\xF0\x9D\x9D\x97"    => "\xCE\xB2",
1104          "\xF0\x9D\x9D\x98"    => "\xCE\xB3",
1105          "\xF0\x9D\x9D\x99"    => "\xCE\xB4",
1106          "\xF0\x9D\x9D\x9A"    => "\xCE\xB5",
1107          "\xF0\x9D\x9D\x9B"    => "\xCE\xB6",
1108          "\xF0\x9D\x9D\x9C"    => "\xCE\xB7",
1109          "\xF0\x9D\x9D\x9D"    => "\xCE\xB8",
1110          "\xF0\x9D\x9D\x9E"    => "\xCE\xB9",
1111          "\xF0\x9D\x9D\x9F"    => "\xCE\xBA",
1112          "\xF0\x9D\x9D\xA0"    => "\xCE\xBB",
1113          "\xF0\x9D\x9D\xA1"    => "\xCE\xBC",
1114          "\xF0\x9D\x9D\xA2"    => "\xCE\xBD",
1115          "\xF0\x9D\x9D\xA3"    => "\xCE\xBE",
1116          "\xF0\x9D\x9D\xA4"    => "\xCE\xBF",
1117          "\xF0\x9D\x9D\xA5"    => "\xCF\x80",
1118          "\xF0\x9D\x9D\xA6"    => "\xCF\x81",
1119          "\xF0\x9D\x9D\xA7"    => "\xCE\xB8",
1120          "\xF0\x9D\x9D\xA8"    => "\xCF\x83",
1121          "\xF0\x9D\x9D\xA9"    => "\xCF\x84",
1122          "\xF0\x9D\x9D\xAA"    => "\xCF\x85",
1123          "\xF0\x9D\x9D\xAB"    => "\xCF\x86",
1124          "\xF0\x9D\x9D\xAC"    => "\xCF\x87",
1125          "\xF0\x9D\x9D\xAD"    => "\xCF\x88",
1126          "\xF0\x9D\x9D\xAE"    => "\xCF\x89",
1127          "\xF0\x9D\x9E\x81"    => "\xCF\x83",
1128          "\xF0\x9D\x9E\x90"    => "\xCE\xB1",
1129          "\xF0\x9D\x9E\x91"    => "\xCE\xB2",
1130          "\xF0\x9D\x9E\x92"    => "\xCE\xB3",
1131          "\xF0\x9D\x9E\x93"    => "\xCE\xB4",
1132          "\xF0\x9D\x9E\x94"    => "\xCE\xB5",
1133          "\xF0\x9D\x9E\x95"    => "\xCE\xB6",
1134          "\xF0\x9D\x9E\x96"    => "\xCE\xB7",
1135          "\xF0\x9D\x9E\x97"    => "\xCE\xB8",
1136          "\xF0\x9D\x9E\x98"    => "\xCE\xB9",
1137          "\xF0\x9D\x9E\x99"    => "\xCE\xBA",
1138          "\xF0\x9D\x9E\x9A"    => "\xCE\xBB",
1139          "\xF0\x9D\x9E\x9B"    => "\xCE\xBC",
1140          "\xF0\x9D\x9E\x9C"    => "\xCE\xBD",
1141          "\xF0\x9D\x9E\x9D"    => "\xCE\xBE",
1142          "\xF0\x9D\x9E\x9E"    => "\xCE\xBF",
1143          "\xF0\x9D\x9E\x9F"    => "\xCF\x80",
1144          "\xF0\x9D\x9E\xA0"    => "\xCF\x81",
1145          "\xF0\x9D\x9E\xA1"    => "\xCE\xB8",
1146          "\xF0\x9D\x9E\xA2"    => "\xCF\x83",
1147          "\xF0\x9D\x9E\xA3"    => "\xCF\x84",
1148          "\xF0\x9D\x9E\xA4"    => "\xCF\x85",
1149          "\xF0\x9D\x9E\xA5"    => "\xCF\x86",
1150          "\xF0\x9D\x9E\xA6"    => "\xCF\x87",
1151          "\xF0\x9D\x9E\xA7"    => "\xCF\x88",
1152          "\xF0\x9D\x9E\xA8"    => "\xCF\x89",
1153          "\xF0\x9D\x9E\xBB"    => "\xCF\x83",
1154          "\xF0\x9D\x9F\x8A"    => "\xCF\x9D",
1155      );
1156   
1157      // do the case fold
1158      $text = utf8_case_fold($text, $option);
1159   
1160      // convert to NFKC
1161      Normalizer::normalize($text, Normalizer::NFKC);
1162   
1163      // FC_NFKC_Closure, http://www.unicode.org/Public/5.0.0/ucd/DerivedNormalizationProps.txt
1164      $text = strtr($text, $fc_nfkc_closure);
1165   
1166      return $text;
1167  }
1168   
1169  /**
1170  * Assume the input is NFC:
1171  * Takes the input and does a "special" case fold. It does minor normalization as well.
1172  *
1173  * @param    string    $text    text to be case folded
1174  * @param    string    $option    determines how we will fold the cases
1175  * @return    string            case folded text
1176  */
1177  function utf8_case_fold_nfc($text, $option = 'full')
1178  {
1179      static $uniarray = array();
1180      static $ypogegrammeni = array(
1181          "\xCD\xBA"        => "\x20\xCD\x85",
1182          "\xE1\xBE\x80"    => "\xE1\xBC\x80\xCD\x85",
1183          "\xE1\xBE\x81"    => "\xE1\xBC\x81\xCD\x85",
1184          "\xE1\xBE\x82"    => "\xE1\xBC\x82\xCD\x85",
1185          "\xE1\xBE\x83"    => "\xE1\xBC\x83\xCD\x85",
1186          "\xE1\xBE\x84"    => "\xE1\xBC\x84\xCD\x85",
1187          "\xE1\xBE\x85"    => "\xE1\xBC\x85\xCD\x85",
1188          "\xE1\xBE\x86"    => "\xE1\xBC\x86\xCD\x85",
1189          "\xE1\xBE\x87"    => "\xE1\xBC\x87\xCD\x85",
1190          "\xE1\xBE\x88"    => "\xE1\xBC\x88\xCD\x85",
1191          "\xE1\xBE\x89"    => "\xE1\xBC\x89\xCD\x85",
1192          "\xE1\xBE\x8A"    => "\xE1\xBC\x8A\xCD\x85",
1193          "\xE1\xBE\x8B"    => "\xE1\xBC\x8B\xCD\x85",
1194          "\xE1\xBE\x8C"    => "\xE1\xBC\x8C\xCD\x85",
1195          "\xE1\xBE\x8D"    => "\xE1\xBC\x8D\xCD\x85",
1196          "\xE1\xBE\x8E"    => "\xE1\xBC\x8E\xCD\x85",
1197          "\xE1\xBE\x8F"    => "\xE1\xBC\x8F\xCD\x85",
1198          "\xE1\xBE\x90"    => "\xE1\xBC\xA0\xCD\x85",
1199          "\xE1\xBE\x91"    => "\xE1\xBC\xA1\xCD\x85",
1200          "\xE1\xBE\x92"    => "\xE1\xBC\xA2\xCD\x85",
1201          "\xE1\xBE\x93"    => "\xE1\xBC\xA3\xCD\x85",
1202          "\xE1\xBE\x94"    => "\xE1\xBC\xA4\xCD\x85",
1203          "\xE1\xBE\x95"    => "\xE1\xBC\xA5\xCD\x85",
1204          "\xE1\xBE\x96"    => "\xE1\xBC\xA6\xCD\x85",
1205          "\xE1\xBE\x97"    => "\xE1\xBC\xA7\xCD\x85",
1206          "\xE1\xBE\x98"    => "\xE1\xBC\xA8\xCD\x85",
1207          "\xE1\xBE\x99"    => "\xE1\xBC\xA9\xCD\x85",
1208          "\xE1\xBE\x9A"    => "\xE1\xBC\xAA\xCD\x85",
1209          "\xE1\xBE\x9B"    => "\xE1\xBC\xAB\xCD\x85",
1210          "\xE1\xBE\x9C"    => "\xE1\xBC\xAC\xCD\x85",
1211          "\xE1\xBE\x9D"    => "\xE1\xBC\xAD\xCD\x85",
1212          "\xE1\xBE\x9E"    => "\xE1\xBC\xAE\xCD\x85",
1213          "\xE1\xBE\x9F"    => "\xE1\xBC\xAF\xCD\x85",
1214          "\xE1\xBE\xA0"    => "\xE1\xBD\xA0\xCD\x85",
1215          "\xE1\xBE\xA1"    => "\xE1\xBD\xA1\xCD\x85",
1216          "\xE1\xBE\xA2"    => "\xE1\xBD\xA2\xCD\x85",
1217          "\xE1\xBE\xA3"    => "\xE1\xBD\xA3\xCD\x85",
1218          "\xE1\xBE\xA4"    => "\xE1\xBD\xA4\xCD\x85",
1219          "\xE1\xBE\xA5"    => "\xE1\xBD\xA5\xCD\x85",
1220          "\xE1\xBE\xA6"    => "\xE1\xBD\xA6\xCD\x85",
1221          "\xE1\xBE\xA7"    => "\xE1\xBD\xA7\xCD\x85",
1222          "\xE1\xBE\xA8"    => "\xE1\xBD\xA8\xCD\x85",
1223          "\xE1\xBE\xA9"    => "\xE1\xBD\xA9\xCD\x85",
1224          "\xE1\xBE\xAA"    => "\xE1\xBD\xAA\xCD\x85",
1225          "\xE1\xBE\xAB"    => "\xE1\xBD\xAB\xCD\x85",
1226          "\xE1\xBE\xAC"    => "\xE1\xBD\xAC\xCD\x85",
1227          "\xE1\xBE\xAD"    => "\xE1\xBD\xAD\xCD\x85",
1228          "\xE1\xBE\xAE"    => "\xE1\xBD\xAE\xCD\x85",
1229          "\xE1\xBE\xAF"    => "\xE1\xBD\xAF\xCD\x85",
1230          "\xE1\xBE\xB2"    => "\xE1\xBD\xB0\xCD\x85",
1231          "\xE1\xBE\xB3"    => "\xCE\xB1\xCD\x85",
1232          "\xE1\xBE\xB4"    => "\xCE\xAC\xCD\x85",
1233          "\xE1\xBE\xB7"    => "\xE1\xBE\xB6\xCD\x85",
1234          "\xE1\xBE\xBC"    => "\xCE\x91\xCD\x85",
1235          "\xE1\xBF\x82"    => "\xE1\xBD\xB4\xCD\x85",
1236          "\xE1\xBF\x83"    => "\xCE\xB7\xCD\x85",
1237          "\xE1\xBF\x84"    => "\xCE\xAE\xCD\x85",
1238          "\xE1\xBF\x87"    => "\xE1\xBF\x86\xCD\x85",
1239          "\xE1\xBF\x8C"    => "\xCE\x97\xCD\x85",
1240          "\xE1\xBF\xB2"    => "\xE1\xBD\xBC\xCD\x85",
1241          "\xE1\xBF\xB3"    => "\xCF\x89\xCD\x85",
1242          "\xE1\xBF\xB4"    => "\xCF\x8E\xCD\x85",
1243          "\xE1\xBF\xB7"    => "\xE1\xBF\xB6\xCD\x85",
1244          "\xE1\xBF\xBC"    => "\xCE\xA9\xCD\x85",
1245      );
1246   
1247      // perform a small trick, avoid further normalization on composed points that contain U+0345 in their decomposition
1248      $text = strtr($text, $ypogegrammeni);
1249   
1250      // do the case fold
1251      $text = utf8_case_fold($text, $option);
1252   
1253      return $text;
1254  }
1255   
1256  /**
1257  * wrapper around PHP's native normalizer from intl
1258  * previously a PECL extension, included in the core since PHP 5.3.0
1259  * http://php.net/manual/en/normalizer.normalize.php
1260  *
1261  * @param    mixed    $strings    a string or an array of strings to normalize
1262  * @return    mixed                the normalized content, preserving array keys if array given.
1263  */
1264  function utf8_normalize_nfc($strings)
1265  {
1266      if (empty($strings))
1267      {
1268          return $strings;
1269      }
1270   
1271      if (!is_array($strings))
1272      {
1273          if (Normalizer::isNormalized($strings))
1274          {
1275              return $strings;
1276          }
1277          return (string) Normalizer::normalize($strings);
1278      }
1279      else
1280      {
1281          foreach ($strings as $key => $string)
1282          {
1283              if (is_array($string))
1284              {
1285                  foreach ($string as $_key => $_string)
1286                  {
1287                      if (Normalizer::isNormalized($strings[$key][$_key]))
1288                      {
1289                          continue;
1290                      }
1291                      $strings[$key][$_key] = (string) Normalizer::normalize($strings[$key][$_key]);
1292                  }
1293              }
1294              else
1295              {
1296                  if (Normalizer::isNormalized($strings[$key]))
1297                  {
1298                      continue;
1299                  }
1300                  $strings[$key] = (string) Normalizer::normalize($strings[$key]);
1301              }
1302          }
1303      }
1304   
1305      return $strings;
1306  }
1307   
1308  /**
1309  * This function is used to generate a "clean" version of a string.
1310  * Clean means that it is a case insensitive form (case folding) and that it is normalized (NFC).
1311  * Additionally a homographs of one character are transformed into one specific character (preferably ASCII
1312  * if it is an ASCII character).
1313  *
1314  * Please be aware that if you change something within this function or within
1315  * functions used here you need to rebuild/update the username_clean column in the users table. And all other
1316  * columns that store a clean string otherwise you will break this functionality.
1317  *
1318  * @param    string    $text    An unclean string, mabye user input (has to be valid UTF-8!)
1319  * @return    string            Cleaned up version of the input string
1320  */
1321  function utf8_clean_string($text)
1322  {
1323      global $phpbb_root_path, $phpEx;
1324   
1325      static $homographs = array();
1326      if (empty($homographs))
1327      {
1328          $homographs = include($phpbb_root_path . 'includes/utf/data/confusables.' . $phpEx);
1329      }
1330   
1331      $text = utf8_case_fold_nfkc($text);
1332      $text = strtr($text, $homographs);
1333      // Other control characters
1334      $text = preg_replace('#(?:[\x00-\x1F\x7F]+|(?:\xC2[\x80-\x9F])+)#', '', $text);
1335   
1336      // we need to reduce multiple spaces to a single one
1337      $text = preg_replace('# {2,}#', ' ', $text);
1338   
1339      // we can use trim here as all the other space characters should have been turned
1340      // into normal ASCII spaces by now
1341      return trim($text);
1342  }
1343   
1344  /**
1345  * A wrapper for htmlspecialchars($value, ENT_COMPAT, 'UTF-8')
1346  */
1347  function utf8_htmlspecialchars($value)
1348  {
1349      return htmlspecialchars($value, ENT_COMPAT, 'UTF-8');
1350  }
1351   
1352  /**
1353  * Trying to convert returned system message to utf8
1354  *
1355  * PHP assumes such messages are ISO-8859-1 so we'll do that too
1356  * and if it breaks messages we'll blame it on them ;-)
1357  */
1358  function utf8_convert_message($message)
1359  {
1360      // First of all check if conversion is neded at all, as there is no point
1361      // in converting ASCII messages from ISO-8859-1 to UTF-8
1362      if (!preg_match('/[\x80-\xFF]/', $message))
1363      {
1364          return utf8_htmlspecialchars($message);
1365      }
1366   
1367      // else we need to convert some part of the message
1368      return utf8_htmlspecialchars(utf8_recode($message, 'ISO-8859-1'));
1369  }
1370   
1371  /**
1372  * UTF8-compatible wordwrap replacement
1373  *
1374  * @param    string    $string    The input string
1375  * @param    int        $width    The column width. Defaults to 75.
1376  * @param    string    $break    The line is broken using the optional break parameter. Defaults to '\n'.
1377  * @param    bool    $cut    If the cut is set to TRUE, the string is always wrapped at the specified width. So if you have a word that is larger than the given width, it is broken apart.
1378  *
1379  * @return    string            the given string wrapped at the specified column.
1380  *
1381  */
1382  function utf8_wordwrap($string, $width = 75, $break = "\n", $cut = false)
1383  {
1384      // We first need to explode on $break, not destroying existing (intended) breaks
1385      $lines = explode($break, $string);
1386      $new_lines = array(0 => '');
1387      $index = 0;
1388   
1389      foreach ($lines as $line)
1390      {
1391          $words = explode(' ', $line);
1392   
1393          for ($i = 0, $size = sizeof($words); $i < $size; $i++)
1394          {
1395              $word = $words[$i];
1396   
1397              // If cut is true we need to cut the word if it is > width chars
1398              if ($cut && utf8_strlen($word) > $width)
1399              {
1400                  $words[$i] = utf8_substr($word, $width);
1401                  $word = utf8_substr($word, 0, $width);
1402                  $i--;
1403              }
1404   
1405              if (utf8_strlen($new_lines[$index] . $word) > $width)
1406              {
1407                  $new_lines[$index] = substr($new_lines[$index], 0, -1);
1408                  $index++;
1409                  $new_lines[$index] = '';
1410              }
1411   
1412              $new_lines[$index] .= $word . ' ';
1413          }
1414   
1415          $new_lines[$index] = substr($new_lines[$index], 0, -1);
1416          $index++;
1417          $new_lines[$index] = '';
1418      }
1419   
1420      unset($new_lines[$index]);
1421      return implode($break, $new_lines);
1422  }
1423   
1424  /**
1425  * UTF8-safe basename() function
1426  *
1427  * basename() has some limitations and is dependent on the locale setting
1428  * according to the PHP manual. Therefore we provide our own locale independent
1429  * basename function.
1430  *
1431  * @param string $filename The filename basename() should be applied to
1432  * @return string The basenamed filename
1433  */
1434  function utf8_basename($filename)
1435  {
1436      // We always check for forward slash AND backward slash
1437      // because they could be mixed or "sneaked" in. ;)
1438      // You know, never trust user input...
1439      if (strpos($filename, '/') !== false)
1440      {
1441          $filename = utf8_substr($filename, utf8_strrpos($filename, '/') + 1);
1442      }
1443   
1444      if (strpos($filename, '\\') !== false)
1445      {
1446          $filename = utf8_substr($filename, utf8_strrpos($filename, '\\') + 1);
1447      }
1448   
1449      return $filename;
1450  }
1451