Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F224219
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
54 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/program/include/rcube_charset.php b/program/include/rcube_charset.php
index ff4c2bbce..e6da882ac 100644
--- a/program/include/rcube_charset.php
+++ b/program/include/rcube_charset.php
@@ -1,763 +1,790 @@
<?php
/*
+-----------------------------------------------------------------------+
| program/include/rcube_charset.php |
| |
| This file is part of the Roundcube Webmail client |
| Copyright (C) 2005-2012, The Roundcube Dev Team |
| Copyright (C) 2011-2012, Kolab Systems AG |
| Copyright (C) 2000 Edmund Grimley Evans <edmundo@rano.org> |
| |
| Licensed under the GNU General Public License version 3 or |
| any later version with exceptions for skins & plugins. |
| See the README file for a full license statement. |
| |
| PURPOSE: |
| Provide charset conversion functionality |
| |
+-----------------------------------------------------------------------+
| Author: Thomas Bruederli <roundcube@gmail.com> |
| Author: Aleksander Machniak <alec@alec.pl> |
+-----------------------------------------------------------------------+
*/
/**
* Character sets conversion functionality
*
* @package Core
* @author Thomas Bruederli <roundcube@gmail.com>
* @author Aleksander Machniak <alec@alec.pl>
* @author Edmund Grimley Evans <edmundo@rano.org>
*/
class rcube_charset
{
// Aliases: some of them from HTML5 spec.
static public $aliases = array(
'USASCII' => 'WINDOWS-1252',
'ANSIX31101983' => 'WINDOWS-1252',
'ANSIX341968' => 'WINDOWS-1252',
'UNKNOWN8BIT' => 'ISO-8859-15',
'UNKNOWN' => 'ISO-8859-15',
'USERDEFINED' => 'ISO-8859-15',
'KSC56011987' => 'EUC-KR',
'GB2312' => 'GBK',
'GB231280' => 'GBK',
'UNICODE' => 'UTF-8',
'UTF7IMAP' => 'UTF7-IMAP',
'TIS620' => 'WINDOWS-874',
'ISO88599' => 'WINDOWS-1254',
'ISO885911' => 'WINDOWS-874',
'MACROMAN' => 'MACINTOSH',
'77' => 'MAC',
'128' => 'SHIFT-JIS',
'129' => 'CP949',
'130' => 'CP1361',
'134' => 'GBK',
'136' => 'BIG5',
'161' => 'WINDOWS-1253',
'162' => 'WINDOWS-1254',
'163' => 'WINDOWS-1258',
'177' => 'WINDOWS-1255',
'178' => 'WINDOWS-1256',
'186' => 'WINDOWS-1257',
'204' => 'WINDOWS-1251',
'222' => 'WINDOWS-874',
'238' => 'WINDOWS-1250',
'MS950' => 'CP950',
'WINDOWS949' => 'UHC',
);
/**
* Catch an error and throw an exception.
*
* @param int Level of the error
* @param string Error message
*/
public static function error_handler($errno, $errstr)
{
throw new ErrorException($errstr, 0, $errno);
}
/**
* Parse and validate charset name string (see #1485758).
* Sometimes charset string is malformed, there are also charset aliases
* but we need strict names for charset conversion (specially utf8 class)
*
* @param string $input Input charset name
*
* @return string The validated charset name
*/
public static function parse_charset($input)
{
static $charsets = array();
$charset = strtoupper($input);
if (isset($charsets[$input])) {
return $charsets[$input];
}
$charset = preg_replace(array(
'/^[^0-9A-Z]+/', // e.g. _ISO-8859-JP$SIO
'/\$.*$/', // e.g. _ISO-8859-JP$SIO
'/UNICODE-1-1-*/', // RFC1641/1642
'/^X-/', // X- prefix (e.g. X-ROMAN8 => ROMAN8)
), '', $charset);
if ($charset == 'BINARY') {
return $charsets[$input] = null;
}
// allow A-Z and 0-9 only
$str = preg_replace('/[^A-Z0-9]/', '', $charset);
if (isset(self::$aliases[$str])) {
$result = self::$aliases[$str];
}
// UTF
else if (preg_match('/U[A-Z][A-Z](7|8|16|32)(BE|LE)*/', $str, $m)) {
$result = 'UTF-' . $m[1] . $m[2];
}
// ISO-8859
else if (preg_match('/ISO8859([0-9]{0,2})/', $str, $m)) {
$iso = 'ISO-8859-' . ($m[1] ? $m[1] : 1);
// some clients sends windows-1252 text as latin1,
// it is safe to use windows-1252 for all latin1
$result = $iso == 'ISO-8859-1' ? 'WINDOWS-1252' : $iso;
}
// handle broken charset names e.g. WINDOWS-1250HTTP-EQUIVCONTENT-TYPE
else if (preg_match('/(WIN|WINDOWS)([0-9]+)/', $str, $m)) {
$result = 'WINDOWS-' . $m[2];
}
// LATIN
else if (preg_match('/LATIN(.*)/', $str, $m)) {
$aliases = array('2' => 2, '3' => 3, '4' => 4, '5' => 9, '6' => 10,
'7' => 13, '8' => 14, '9' => 15, '10' => 16,
'ARABIC' => 6, 'CYRILLIC' => 5, 'GREEK' => 7, 'GREEK1' => 7, 'HEBREW' => 8
);
// some clients sends windows-1252 text as latin1,
// it is safe to use windows-1252 for all latin1
if ($m[1] == 1) {
$result = 'WINDOWS-1252';
}
// if iconv is not supported we need ISO labels, it's also safe for iconv
else if (!empty($aliases[$m[1]])) {
$result = 'ISO-8859-'.$aliases[$m[1]];
}
// iconv requires convertion of e.g. LATIN-1 to LATIN1
else {
$result = $str;
}
}
else {
$result = $charset;
}
$charsets[$input] = $result;
return $result;
}
/**
* Convert a string from one charset to another.
* Uses mbstring and iconv functions if possible
*
* @param string Input string
* @param string Suspected charset of the input string
* @param string Target charset to convert to; defaults to RCMAIL_CHARSET
*
* @return string Converted string
*/
public static function convert($str, $from, $to = null)
{
static $iconv_options = null;
static $mbstring_list = null;
static $mbstring_sch = null;
static $conv = null;
$to = empty($to) ? RCMAIL_CHARSET : $to;
$from = self::parse_charset($from);
// It is a common case when UTF-16 charset is used with US-ASCII content (#1488654)
// In that case we can just skip the conversion (use UTF-8)
if ($from == 'UTF-16' && !preg_match('/[^\x00-\x7F]/', $str)) {
$from = 'UTF-8';
}
if ($from == $to || empty($str) || empty($from)) {
return $str;
}
if ($iconv_options === null) {
if (function_exists('iconv')) {
// ignore characters not available in output charset
$iconv_options = '//IGNORE';
if (iconv('', $iconv_options, '') === false) {
// iconv implementation does not support options
$iconv_options = '';
}
}
}
// convert charset using iconv module
if ($iconv_options !== null && $from != 'UTF7-IMAP' && $to != 'UTF7-IMAP') {
// throw an exception if iconv reports an illegal character in input
// it means that input string has been truncated
set_error_handler(array('rcube_charset', 'error_handler'), E_NOTICE);
try {
$_iconv = iconv($from, $to . $iconv_options, $str);
} catch (ErrorException $e) {
$_iconv = false;
}
restore_error_handler();
if ($_iconv !== false) {
return $_iconv;
}
}
if ($mbstring_list === null) {
if (extension_loaded('mbstring')) {
$mbstring_sch = mb_substitute_character();
$mbstring_list = mb_list_encodings();
$mbstring_list = array_map('strtoupper', $mbstring_list);
}
}
// convert charset using mbstring module
if ($mbstring_list !== null) {
$aliases['WINDOWS-1257'] = 'ISO-8859-13';
// it happens that mbstring supports ASCII but not US-ASCII
if (($from == 'US-ASCII' || $to == 'US-ASCII') && !in_array('US-ASCII', $mbstring_list)) {
$aliases['US-ASCII'] = 'ASCII';
}
$mb_from = $aliases[$from] ? $aliases[$from] : $from;
$mb_to = $aliases[$to] ? $aliases[$to] : $to;
// return if encoding found, string matches encoding and convert succeeded
if (in_array($mb_from, $mbstring_list) && in_array($mb_to, $mbstring_list)) {
if (mb_check_encoding($str, $mb_from)) {
// Do the same as //IGNORE with iconv
mb_substitute_character('none');
$out = mb_convert_encoding($str, $mb_to, $mb_from);
mb_substitute_character($mbstring_sch);
if ($out !== false) {
return $out;
}
}
}
}
// convert charset using bundled classes/functions
if ($to == 'UTF-8') {
if ($from == 'UTF7-IMAP') {
if ($_str = self::utf7imap_to_utf8($str)) {
return $_str;
}
}
else if ($from == 'UTF-7') {
if ($_str = self::utf7_to_utf8($str)) {
return $_str;
}
}
else if ($from == 'ISO-8859-1' && function_exists('utf8_encode')) {
return utf8_encode($str);
}
else if (class_exists('utf8')) {
if (!$conv) {
$conv = new utf8($from);
}
else {
$conv->loadCharset($from);
}
if ($_str = $conv->strToUtf8($str)) {
return $_str;
}
}
}
// encode string for output
if ($from == 'UTF-8') {
// @TODO: we need a function for UTF-7 (RFC2152) conversion
if ($to == 'UTF7-IMAP' || $to == 'UTF-7') {
if ($_str = self::utf8_to_utf7imap($str)) {
return $_str;
}
}
else if ($to == 'ISO-8859-1' && function_exists('utf8_decode')) {
return utf8_decode($str);
}
else if (class_exists('utf8')) {
if (!$conv) {
$conv = new utf8($to);
}
else {
$conv->loadCharset($from);
}
if ($_str = $conv->strToUtf8($str)) {
return $_str;
}
}
}
// return original string
return $str;
}
/**
* Converts string from standard UTF-7 (RFC 2152) to UTF-8.
*
* @param string Input string (UTF-7)
*
* @return string Converted string (UTF-8)
*/
public static function utf7_to_utf8($str)
{
$Index_64 = array(
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0, 0,0,0,1, 0,0,0,0,
1,1,1,1, 1,1,1,1, 1,1,0,0, 0,0,0,0,
0,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,0, 0,0,0,0,
0,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,0, 0,0,0,0,
);
$u7len = strlen($str);
$str = strval($str);
$res = '';
for ($i=0; $u7len > 0; $i++, $u7len--) {
$u7 = $str[$i];
if ($u7 == '+') {
$i++;
$u7len--;
$ch = '';
for (; $u7len > 0; $i++, $u7len--) {
$u7 = $str[$i];
if (!$Index_64[ord($u7)]) {
break;
}
$ch .= $u7;
}
if ($ch == '') {
if ($u7 == '-') {
$res .= '+';
}
continue;
}
$res .= self::utf16_to_utf8(base64_decode($ch));
}
else {
$res .= $u7;
}
}
return $res;
}
/**
* Converts string from UTF-16 to UTF-8 (helper for utf-7 to utf-8 conversion)
*
* @param string Input string
*
* @return string The converted string
*/
public static function utf16_to_utf8($str)
{
$len = strlen($str);
$dec = '';
for ($i = 0; $i < $len; $i += 2) {
$c = ord($str[$i]) << 8 | ord($str[$i + 1]);
if ($c >= 0x0001 && $c <= 0x007F) {
$dec .= chr($c);
}
else if ($c > 0x07FF) {
$dec .= chr(0xE0 | (($c >> 12) & 0x0F));
$dec .= chr(0x80 | (($c >> 6) & 0x3F));
$dec .= chr(0x80 | (($c >> 0) & 0x3F));
}
else {
$dec .= chr(0xC0 | (($c >> 6) & 0x1F));
$dec .= chr(0x80 | (($c >> 0) & 0x3F));
}
}
return $dec;
}
/**
* Convert the data ($str) from RFC 2060's UTF-7 to UTF-8.
* If input data is invalid, return the original input string.
* RFC 2060 obviously intends the encoding to be unique (see
* point 5 in section 5.1.3), so we reject any non-canonical
* form, such as &ACY- (instead of &-) or &AMA-&AMA- (instead
* of &AMAAwA-).
*
* Translated from C to PHP by Thomas Bruederli <roundcube@gmail.com>
*
* @param string $str Input string (UTF7-IMAP)
*
* @return string Output string (UTF-8)
*/
public static function utf7imap_to_utf8($str)
{
$Index_64 = array(
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, 63,-1,-1,-1,
52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
);
$u7len = strlen($str);
$str = strval($str);
$p = '';
$err = '';
for ($i=0; $u7len > 0; $i++, $u7len--) {
$u7 = $str[$i];
if ($u7 == '&') {
$i++;
$u7len--;
$u7 = $str[$i];
if ($u7len && $u7 == '-') {
$p .= '&';
continue;
}
$ch = 0;
$k = 10;
for (; $u7len > 0; $i++, $u7len--) {
$u7 = $str[$i];
if ((ord($u7) & 0x80) || ($b = $Index_64[ord($u7)]) == -1) {
break;
}
if ($k > 0) {
$ch |= $b << $k;
$k -= 6;
}
else {
$ch |= $b >> (-$k);
if ($ch < 0x80) {
// Printable US-ASCII
if (0x20 <= $ch && $ch < 0x7f) {
return $err;
}
$p .= chr($ch);
}
else if ($ch < 0x800) {
$p .= chr(0xc0 | ($ch >> 6));
$p .= chr(0x80 | ($ch & 0x3f));
}
else {
$p .= chr(0xe0 | ($ch >> 12));
$p .= chr(0x80 | (($ch >> 6) & 0x3f));
$p .= chr(0x80 | ($ch & 0x3f));
}
$ch = ($b << (16 + $k)) & 0xffff;
$k += 10;
}
}
// Non-zero or too many extra bits
if ($ch || $k < 6) {
return $err;
}
// BASE64 not properly terminated
if (!$u7len || $u7 != '-') {
return $err;
}
// Adjacent BASE64 sections
if ($u7len > 2 && $str[$i+1] == '&' && $str[$i+2] != '-') {
return $err;
}
}
// Not printable US-ASCII
else if (ord($u7) < 0x20 || ord($u7) >= 0x7f) {
return $err;
}
else {
$p .= $u7;
}
}
return $p;
}
/**
* Convert the data ($str) from UTF-8 to RFC 2060's UTF-7.
* Unicode characters above U+FFFF are replaced by U+FFFE.
* If input data is invalid, return an empty string.
*
* Translated from C to PHP by Thomas Bruederli <roundcube@gmail.com>
*
* @param string $str Input string (UTF-8)
*
* @return string Output string (UTF7-IMAP)
*/
public static function utf8_to_utf7imap($str)
{
$B64Chars = array(
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd',
'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', '+', ','
);
$u8len = strlen($str);
$base64 = 0;
$i = 0;
$p = '';
$err = '';
while ($u8len) {
$u8 = $str[$i];
$c = ord($u8);
if ($c < 0x80) {
$ch = $c;
$n = 0;
}
else if ($c < 0xc2) {
return $err;
}
else if ($c < 0xe0) {
$ch = $c & 0x1f;
$n = 1;
}
else if ($c < 0xf0) {
$ch = $c & 0x0f;
$n = 2;
}
else if ($c < 0xf8) {
$ch = $c & 0x07;
$n = 3;
}
else if ($c < 0xfc) {
$ch = $c & 0x03;
$n = 4;
}
else if ($c < 0xfe) {
$ch = $c & 0x01;
$n = 5;
}
else {
return $err;
}
$i++;
$u8len--;
if ($n > $u8len) {
return $err;
}
for ($j=0; $j < $n; $j++) {
$o = ord($str[$i+$j]);
if (($o & 0xc0) != 0x80) {
return $err;
}
$ch = ($ch << 6) | ($o & 0x3f);
}
if ($n > 1 && !($ch >> ($n * 5 + 1))) {
return $err;
}
$i += $n;
$u8len -= $n;
if ($ch < 0x20 || $ch >= 0x7f) {
if (!$base64) {
$p .= '&';
$base64 = 1;
$b = 0;
$k = 10;
}
if ($ch & ~0xffff) {
$ch = 0xfffe;
}
$p .= $B64Chars[($b | $ch >> $k)];
$k -= 6;
for (; $k >= 0; $k -= 6) {
$p .= $B64Chars[(($ch >> $k) & 0x3f)];
}
$b = ($ch << (-$k)) & 0x3f;
$k += 16;
}
else {
if ($base64) {
if ($k > 10) {
$p .= $B64Chars[$b];
}
$p .= '-';
$base64 = 0;
}
$p .= chr($ch);
if (chr($ch) == '&') {
$p .= '-';
}
}
}
if ($base64) {
if ($k > 10) {
$p .= $B64Chars[$b];
}
$p .= '-';
}
return $p;
}
/**
* A method to guess character set of a string.
*
* @param string $string String.
* @param string $failover Default result for failover.
*
* @return string Charset name
*/
public static function detect($string, $failover='')
{
- if (!function_exists('mb_detect_encoding')) {
- return $failover;
- }
-
- // FIXME: the order is important, because sometimes
- // iso string is detected as euc-jp and etc.
- $enc = array(
- 'UTF-8', 'SJIS', 'BIG5', 'GB2312',
- 'ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4',
- 'ISO-8859-5', 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8', 'ISO-8859-9',
- 'ISO-8859-10', 'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'ISO-8859-16',
- 'WINDOWS-1252', 'WINDOWS-1251', 'EUC-JP', 'EUC-TW', 'KOI8-R',
- 'ISO-2022-KR', 'ISO-2022-JP'
- );
+ if (substr($string, 0, 4) == "\0\0\xFE\xFF") return 'UTF-32BE'; // Big Endian
+ if (substr($string, 0, 4) == "\xFF\xFE\0\0") return 'UTF-32LE'; // Little Endian
+ if (substr($string, 0, 2) == "\xFE\xFF") return 'UTF-16BE'; // Big Endian
+ if (substr($string, 0, 2) == "\xFF\xFE") return 'UTF-16LE'; // Little Endian
+ if (substr($string, 0, 3) == "\xEF\xBB\xBF") return 'UTF-8';
+
+ // heuristics
+ if ($string[0] == "\0" && $string[1] == "\0" && $string[2] == "\0" && $string[3] != "\0") return 'UTF-32BE';
+ if ($string[0] != "\0" && $string[1] == "\0" && $string[2] == "\0" && $string[3] == "\0") return 'UTF-32LE';
+ if ($string[0] == "\0" && $string[1] != "\0" && $string[2] == "\0" && $string[3] != "\0") return 'UTF-16BE';
+ if ($string[0] != "\0" && $string[1] == "\0" && $string[2] != "\0" && $string[3] == "\0") return 'UTF-16LE';
+
+ if (function_exists('mb_detect_encoding')) {
+ // FIXME: the order is important, because sometimes
+ // iso string is detected as euc-jp and etc.
+ $enc = array(
+ 'UTF-8', 'SJIS', 'BIG5', 'GB2312',
+ 'ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4',
+ 'ISO-8859-5', 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8', 'ISO-8859-9',
+ 'ISO-8859-10', 'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'ISO-8859-16',
+ 'WINDOWS-1252', 'WINDOWS-1251', 'EUC-JP', 'EUC-TW', 'KOI8-R',
+ 'ISO-2022-KR', 'ISO-2022-JP'
+ );
- $result = mb_detect_encoding($string, join(',', $enc));
+ $result = mb_detect_encoding($string, join(',', $enc));
+ }
+ else {
+ // No match, check for UTF-8
+ // from http://w3.org/International/questions/qa-forms-utf-8.html
+ if (preg_match('/\A(
+ [\x09\x0A\x0D\x20-\x7E]
+ | [\xC2-\xDF][\x80-\xBF]
+ | \xE0[\xA0-\xBF][\x80-\xBF]
+ | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}
+ | \xED[\x80-\x9F][\x80-\xBF]
+ | \xF0[\x90-\xBF][\x80-\xBF]{2}
+ | [\xF1-\xF3][\x80-\xBF]{3}
+ | \xF4[\x80-\x8F][\x80-\xBF]{2}
+ )*\z/xs', substr($string, 0, 2048))
+ ) {
+ return 'UTF-8';
+ }
+ }
return $result ? $result : $failover;
}
/**
* Removes non-unicode characters from input.
*
* @param mixed $input String or array.
*
* @return mixed String or array
*/
public static function clean($input)
{
// handle input of type array
if (is_array($input)) {
foreach ($input as $idx => $val) {
$input[$idx] = self::clean($val);
}
return $input;
}
if (!is_string($input) || $input == '') {
return $input;
}
// iconv/mbstring are much faster (especially with long strings)
if (function_exists('mb_convert_encoding')) {
if (($res = mb_convert_encoding($input, 'UTF-8', 'UTF-8')) !== false) {
return $res;
}
}
if (function_exists('iconv')) {
if (($res = @iconv('UTF-8', 'UTF-8//IGNORE', $input)) !== false) {
return $res;
}
}
$seq = '';
$out = '';
$regexp = '/^('.
// '[\x00-\x7F]'. // UTF8-1
'|[\xC2-\xDF][\x80-\xBF]'. // UTF8-2
'|\xE0[\xA0-\xBF][\x80-\xBF]'. // UTF8-3
'|[\xE1-\xEC][\x80-\xBF][\x80-\xBF]'. // UTF8-3
'|\xED[\x80-\x9F][\x80-\xBF]'. // UTF8-3
'|[\xEE-\xEF][\x80-\xBF][\x80-\xBF]'. // UTF8-3
'|\xF0[\x90-\xBF][\x80-\xBF][\x80-\xBF]'. // UTF8-4
'|[\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]'.// UTF8-4
'|\xF4[\x80-\x8F][\x80-\xBF][\x80-\xBF]'. // UTF8-4
')$/';
for ($i = 0, $len = strlen($input); $i < $len; $i++) {
$chr = $input[$i];
$ord = ord($chr);
// 1-byte character
if ($ord <= 0x7F) {
if ($seq) {
$out .= preg_match($regexp, $seq) ? $seq : '';
}
$seq = '';
$out .= $chr;
// first (or second) byte of multibyte sequence
}
else if ($ord >= 0xC0) {
if (strlen($seq) > 1) {
$out .= preg_match($regexp, $seq) ? $seq : '';
$seq = '';
}
else if ($seq && ord($seq) < 0xC0) {
$seq = '';
}
$seq .= $chr;
// next byte of multibyte sequence
}
else if ($seq) {
$seq .= $chr;
}
}
if ($seq) {
$out .= preg_match($regexp, $seq) ? $seq : '';
}
return $out;
}
}
diff --git a/program/include/rcube_vcard.php b/program/include/rcube_vcard.php
index 00903c257..65598e735 100644
--- a/program/include/rcube_vcard.php
+++ b/program/include/rcube_vcard.php
@@ -1,825 +1,792 @@
<?php
/*
+-----------------------------------------------------------------------+
| program/include/rcube_vcard.php |
| |
| This file is part of the Roundcube Webmail client |
| Copyright (C) 2008-2011, The Roundcube Dev Team |
| |
| Licensed under the GNU General Public License version 3 or |
| any later version with exceptions for skins & plugins. |
| See the README file for a full license statement. |
| |
| PURPOSE: |
| Logical representation of a vcard address record |
+-----------------------------------------------------------------------+
| Author: Thomas Bruederli <roundcube@gmail.com> |
+-----------------------------------------------------------------------+
*/
/**
* Logical representation of a vcard-based address record
* Provides functions to parse and export vCard data format
*
* @package Addressbook
* @author Thomas Bruederli <roundcube@gmail.com>
*/
class rcube_vcard
{
private static $values_decoded = false;
private $raw = array(
'FN' => array(),
'N' => array(array('','','','','')),
);
private static $fieldmap = array(
'phone' => 'TEL',
'birthday' => 'BDAY',
'website' => 'URL',
'notes' => 'NOTE',
'email' => 'EMAIL',
'address' => 'ADR',
'jobtitle' => 'TITLE',
'department' => 'X-DEPARTMENT',
'gender' => 'X-GENDER',
'maidenname' => 'X-MAIDENNAME',
'anniversary' => 'X-ANNIVERSARY',
'assistant' => 'X-ASSISTANT',
'manager' => 'X-MANAGER',
'spouse' => 'X-SPOUSE',
'edit' => 'X-AB-EDIT',
);
private $typemap = array('IPHONE' => 'mobile', 'CELL' => 'mobile', 'WORK,FAX' => 'workfax');
private $phonetypemap = array('HOME1' => 'HOME', 'BUSINESS1' => 'WORK', 'BUSINESS2' => 'WORK2', 'BUSINESSFAX' => 'WORK,FAX');
private $addresstypemap = array('BUSINESS' => 'WORK');
private $immap = array('X-JABBER' => 'jabber', 'X-ICQ' => 'icq', 'X-MSN' => 'msn', 'X-AIM' => 'aim', 'X-YAHOO' => 'yahoo', 'X-SKYPE' => 'skype', 'X-SKYPE-USERNAME' => 'skype');
public $business = false;
public $displayname;
public $surname;
public $firstname;
public $middlename;
public $nickname;
public $organization;
public $notes;
public $email = array();
public static $eol = "\r\n";
/**
* Constructor
*/
public function __construct($vcard = null, $charset = RCMAIL_CHARSET, $detect = false, $fieldmap = array())
{
if (!empty($fielmap))
$this->extend_fieldmap($fieldmap);
if (!empty($vcard))
$this->load($vcard, $charset, $detect);
}
/**
* Load record from (internal, unfolded) vcard 3.0 format
*
* @param string vCard string to parse
* @param string Charset of string values
* @param boolean True if loading a 'foreign' vcard and extra heuristics for charset detection is required
*/
public function load($vcard, $charset = RCMAIL_CHARSET, $detect = false)
{
self::$values_decoded = false;
$this->raw = self::vcard_decode($vcard);
// resolve charset parameters
if ($charset == null) {
$this->raw = self::charset_convert($this->raw);
}
// vcard has encoded values and charset should be detected
else if ($detect && self::$values_decoded &&
($detected_charset = self::detect_encoding(self::vcard_encode($this->raw))) && $detected_charset != RCMAIL_CHARSET) {
$this->raw = self::charset_convert($this->raw, $detected_charset);
}
// consider FN empty if the same as the primary e-mail address
if ($this->raw['FN'][0][0] == $this->raw['EMAIL'][0][0])
$this->raw['FN'][0][0] = '';
// find well-known address fields
$this->displayname = $this->raw['FN'][0][0];
$this->surname = $this->raw['N'][0][0];
$this->firstname = $this->raw['N'][0][1];
$this->middlename = $this->raw['N'][0][2];
$this->nickname = $this->raw['NICKNAME'][0][0];
$this->organization = $this->raw['ORG'][0][0];
$this->business = ($this->raw['X-ABSHOWAS'][0][0] == 'COMPANY') || (join('', (array)$this->raw['N'][0]) == '' && !empty($this->organization));
foreach ((array)$this->raw['EMAIL'] as $i => $raw_email)
$this->email[$i] = is_array($raw_email) ? $raw_email[0] : $raw_email;
// make the pref e-mail address the first entry in $this->email
$pref_index = $this->get_type_index('EMAIL', 'pref');
if ($pref_index > 0) {
$tmp = $this->email[0];
$this->email[0] = $this->email[$pref_index];
$this->email[$pref_index] = $tmp;
}
}
/**
* Return vCard data as associative array to be unsed in Roundcube address books
*
* @return array Hash array with key-value pairs
*/
public function get_assoc()
{
$out = array('name' => $this->displayname);
$typemap = $this->typemap;
// copy name fields to output array
foreach (array('firstname','surname','middlename','nickname','organization') as $col) {
if (strlen($this->$col))
$out[$col] = $this->$col;
}
if ($this->raw['N'][0][3])
$out['prefix'] = $this->raw['N'][0][3];
if ($this->raw['N'][0][4])
$out['suffix'] = $this->raw['N'][0][4];
// convert from raw vcard data into associative data for Roundcube
foreach (array_flip(self::$fieldmap) as $tag => $col) {
foreach ((array)$this->raw[$tag] as $i => $raw) {
if (is_array($raw)) {
$k = -1;
$key = $col;
$subtype = '';
if (!empty($raw['type'])) {
$combined = join(',', self::array_filter((array)$raw['type'], 'internet,pref', true));
$combined = strtoupper($combined);
if ($typemap[$combined]) {
$subtype = $typemap[$combined];
}
else if ($typemap[$raw['type'][++$k]]) {
$subtype = $typemap[$raw['type'][$k]];
}
else {
$subtype = strtolower($raw['type'][$k]);
}
while ($k < count($raw['type']) && ($subtype == 'internet' || $subtype == 'pref'))
$subtype = $typemap[$raw['type'][++$k]] ? $typemap[$raw['type'][$k]] : strtolower($raw['type'][$k]);
}
// read vcard 2.1 subtype
if (!$subtype) {
foreach ($raw as $k => $v) {
if (!is_numeric($k) && $v === true && ($k = strtolower($k))
&& !in_array($k, array('pref','internet','voice','base64'))
) {
$k_uc = strtoupper($k);
$subtype = $typemap[$k_uc] ? $typemap[$k_uc] : $k;
break;
}
}
}
// force subtype if none set
if (!$subtype && preg_match('/^(email|phone|address|website)/', $key))
$subtype = 'other';
if ($subtype)
$key .= ':' . $subtype;
// split ADR values into assoc array
if ($tag == 'ADR') {
list(,, $value['street'], $value['locality'], $value['region'], $value['zipcode'], $value['country']) = $raw;
$out[$key][] = $value;
}
else
$out[$key][] = $raw[0];
}
else {
$out[$col][] = $raw;
}
}
}
// handle special IM fields as used by Apple
foreach ($this->immap as $tag => $type) {
foreach ((array)$this->raw[$tag] as $i => $raw) {
$out['im:'.$type][] = $raw[0];
}
}
// copy photo data
if ($this->raw['PHOTO'])
$out['photo'] = $this->raw['PHOTO'][0][0];
return $out;
}
/**
* Convert the data structure into a vcard 3.0 string
*/
public function export($folded = true)
{
$vcard = self::vcard_encode($this->raw);
return $folded ? self::rfc2425_fold($vcard) : $vcard;
}
/**
* Clear the given fields in the loaded vcard data
*
* @param array List of field names to be reset
*/
public function reset($fields = null)
{
if (!$fields)
$fields = array_merge(array_values(self::$fieldmap), array_keys($this->immap), array('FN','N','ORG','NICKNAME','EMAIL','ADR','BDAY'));
foreach ($fields as $f)
unset($this->raw[$f]);
if (!$this->raw['N'])
$this->raw['N'] = array(array('','','','',''));
if (!$this->raw['FN'])
$this->raw['FN'] = array();
$this->email = array();
}
/**
* Setter for address record fields
*
* @param string Field name
* @param string Field value
* @param string Type/section name
*/
public function set($field, $value, $type = 'HOME')
{
$field = strtolower($field);
$type_uc = strtoupper($type);
$typemap = array_flip($this->typemap);
switch ($field) {
case 'name':
case 'displayname':
$this->raw['FN'][0][0] = $value;
break;
case 'surname':
$this->raw['N'][0][0] = $value;
break;
case 'firstname':
$this->raw['N'][0][1] = $value;
break;
case 'middlename':
$this->raw['N'][0][2] = $value;
break;
case 'prefix':
$this->raw['N'][0][3] = $value;
break;
case 'suffix':
$this->raw['N'][0][4] = $value;
break;
case 'nickname':
$this->raw['NICKNAME'][0][0] = $value;
break;
case 'organization':
$this->raw['ORG'][0][0] = $value;
break;
case 'photo':
if (strpos($value, 'http:') === 0) {
// TODO: fetch file from URL and save it locally?
$this->raw['PHOTO'][0] = array(0 => $value, 'url' => true);
}
else {
$this->raw['PHOTO'][0] = array(0 => $value, 'base64' => (bool) preg_match('![^a-z0-9/=+-]!i', $value));
}
break;
case 'email':
$this->raw['EMAIL'][] = array(0 => $value, 'type' => array_filter(array('INTERNET', $type_uc)));
$this->email[] = $value;
break;
case 'im':
// save IM subtypes into extension fields
$typemap = array_flip($this->immap);
if ($field = $typemap[strtolower($type)])
$this->raw[$field][] = array(0 => $value);
break;
case 'birthday':
case 'anniversary':
if (($val = rcube_utils::strtotime($value)) && ($fn = self::$fieldmap[$field]))
$this->raw[$fn][] = array(0 => date('Y-m-d', $val), 'value' => array('date'));
break;
case 'address':
if ($this->addresstypemap[$type_uc])
$type = $this->addresstypemap[$type_uc];
$value = $value[0] ? $value : array('', '', $value['street'], $value['locality'], $value['region'], $value['zipcode'], $value['country']);
// fall through if not empty
if (!strlen(join('', $value)))
break;
default:
if ($field == 'phone' && $this->phonetypemap[$type_uc])
$type = $this->phonetypemap[$type_uc];
if (($tag = self::$fieldmap[$field]) && (is_array($value) || strlen($value))) {
$index = count($this->raw[$tag]);
$this->raw[$tag][$index] = (array)$value;
if ($type)
$this->raw[$tag][$index]['type'] = explode(',', ($typemap[$type_uc] ? $typemap[$type_uc] : $type));
}
break;
}
}
/**
* Setter for individual vcard properties
*
* @param string VCard tag name
* @param array Value-set of this vcard property
* @param boolean Set to true if the value-set should be appended instead of replacing any existing value-set
*/
public function set_raw($tag, $value, $append = false)
{
$index = $append ? count($this->raw[$tag]) : 0;
$this->raw[$tag][$index] = (array)$value;
}
/**
* Find index with the '$type' attribute
*
* @param string Field name
* @return int Field index having $type set
*/
private function get_type_index($field, $type = 'pref')
{
$result = 0;
if ($this->raw[$field]) {
foreach ($this->raw[$field] as $i => $data) {
if (is_array($data['type']) && in_array_nocase('pref', $data['type']))
$result = $i;
}
}
return $result;
}
/**
* Convert a whole vcard (array) to UTF-8.
* If $force_charset is null, each member value that has a charset parameter will be converted
*/
private static function charset_convert($card, $force_charset = null)
{
foreach ($card as $key => $node) {
foreach ($node as $i => $subnode) {
if (is_array($subnode) && (($charset = $force_charset) || ($subnode['charset'] && ($charset = $subnode['charset'][0])))) {
foreach ($subnode as $j => $value) {
if (is_numeric($j) && is_string($value))
$card[$key][$i][$j] = rcube_charset::convert($value, $charset);
}
unset($card[$key][$i]['charset']);
}
}
}
return $card;
}
/**
* Extends fieldmap definition
*/
public function extend_fieldmap($map)
{
if (is_array($map))
self::$fieldmap = array_merge($map, self::$fieldmap);
}
/**
* Factory method to import a vcard file
*
* @param string vCard file content
* @return array List of rcube_vcard objects
*/
public static function import($data)
{
$out = array();
// check if charsets are specified (usually vcard version < 3.0 but this is not reliable)
if (preg_match('/charset=/i', substr($data, 0, 2048)))
$charset = null;
// detect charset and convert to utf-8
else if (($charset = self::detect_encoding($data)) && $charset != RCMAIL_CHARSET) {
$data = rcube_charset::convert($data, $charset);
$data = preg_replace(array('/^[\xFE\xFF]{2}/', '/^\xEF\xBB\xBF/', '/^\x00+/'), '', $data); // also remove BOM
$charset = RCMAIL_CHARSET;
}
$vcard_block = '';
$in_vcard_block = false;
foreach (preg_split("/[\r\n]+/", $data) as $i => $line) {
if ($in_vcard_block && !empty($line))
$vcard_block .= $line . "\n";
$line = trim($line);
if (preg_match('/^END:VCARD$/i', $line)) {
// parse vcard
$obj = new rcube_vcard(self::cleanup($vcard_block), $charset, true, self::$fieldmap);
if (!empty($obj->displayname) || !empty($obj->email))
$out[] = $obj;
$in_vcard_block = false;
}
else if (preg_match('/^BEGIN:VCARD$/i', $line)) {
$vcard_block = $line . "\n";
$in_vcard_block = true;
}
}
return $out;
}
/**
* Normalize vcard data for better parsing
*
* @param string vCard block
* @return string Cleaned vcard block
*/
private static function cleanup($vcard)
{
// Convert special types (like Skype) to normal type='skype' classes with this simple regex ;)
$vcard = preg_replace(
'/item(\d+)\.(TEL|EMAIL|URL)([^:]*?):(.*?)item\1.X-ABLabel:(?:_\$!<)?([\w-() ]*)(?:>!\$_)?./s',
'\2;type=\5\3:\4',
$vcard);
// convert Apple X-ABRELATEDNAMES into X-* fields for better compatibility
$vcard = preg_replace_callback(
'/item(\d+)\.(X-ABRELATEDNAMES)([^:]*?):(.*?)item\1.X-ABLabel:(?:_\$!<)?([\w-() ]*)(?:>!\$_)?./s',
array('self', 'x_abrelatednames_callback'),
$vcard);
// Remove cruft like item1.X-AB*, item1.ADR instead of ADR, and empty lines
$vcard = preg_replace(array('/^item\d*\.X-AB.*$/m', '/^item\d*\./m', "/\n+/"), array('', '', "\n"), $vcard);
// convert X-WAB-GENDER to X-GENDER
if (preg_match('/X-WAB-GENDER:(\d)/', $vcard, $matches)) {
$value = $matches[1] == '2' ? 'male' : 'female';
$vcard = preg_replace('/X-WAB-GENDER:\d/', 'X-GENDER:' . $value, $vcard);
}
// if N doesn't have any semicolons, add some
$vcard = preg_replace('/^(N:[^;\R]*)$/m', '\1;;;;', $vcard);
return $vcard;
}
private static function x_abrelatednames_callback($matches)
{
return 'X-' . strtoupper($matches[5]) . $matches[3] . ':'. $matches[4];
}
private static function rfc2425_fold_callback($matches)
{
// chunk_split string and avoid lines breaking multibyte characters
$c = 71;
$out .= substr($matches[1], 0, $c);
for ($n = $c; $c < strlen($matches[1]); $c++) {
// break if length > 75 or mutlibyte character starts after position 71
if ($n > 75 || ($n > 71 && ord($matches[1][$c]) >> 6 == 3)) {
$out .= "\r\n ";
$n = 0;
}
$out .= $matches[1][$c];
$n++;
}
return $out;
}
public static function rfc2425_fold($val)
{
return preg_replace_callback('/([^\n]{72,})/', array('self', 'rfc2425_fold_callback'), $val);
}
/**
* Decodes a vcard block (vcard 3.0 format, unfolded)
* into an array structure
*
* @param string vCard block to parse
* @return array Raw data structure
*/
private static function vcard_decode($vcard)
{
// Perform RFC2425 line unfolding and split lines
$vcard = preg_replace(array("/\r/", "/\n\s+/"), '', $vcard);
$lines = explode("\n", $vcard);
$data = array();
for ($i=0; $i < count($lines); $i++) {
if (!preg_match('/^([^:]+):(.+)$/', $lines[$i], $line))
continue;
if (preg_match('/^(BEGIN|END)$/i', $line[1]))
continue;
// convert 2.1-style "EMAIL;internet;home:" to 3.0-style "EMAIL;TYPE=internet;TYPE=home:"
if (($data['VERSION'][0] == "2.1") && preg_match('/^([^;]+);([^:]+)/', $line[1], $regs2) && !preg_match('/^TYPE=/i', $regs2[2])) {
$line[1] = $regs2[1];
foreach (explode(';', $regs2[2]) as $prop)
$line[1] .= ';' . (strpos($prop, '=') ? $prop : 'TYPE='.$prop);
}
if (preg_match_all('/([^\\;]+);?/', $line[1], $regs2)) {
$entry = array();
$field = strtoupper($regs2[1][0]);
$enc = null;
foreach($regs2[1] as $attrid => $attr) {
if ((list($key, $value) = explode('=', $attr)) && $value) {
$value = trim($value);
if ($key == 'ENCODING') {
$value = strtoupper($value);
// add next line(s) to value string if QP line end detected
if ($value == 'QUOTED-PRINTABLE') {
while (preg_match('/=$/', $lines[$i]))
$line[2] .= "\n" . $lines[++$i];
}
$enc = $value;
}
else {
$lc_key = strtolower($key);
$entry[$lc_key] = array_merge((array)$entry[$lc_key], (array)self::vcard_unquote($value, ','));
}
}
else if ($attrid > 0) {
$entry[strtolower($key)] = true; // true means attr without =value
}
}
// decode value
if ($enc || !empty($entry['base64'])) {
// save encoding type (#1488432)
if ($enc == 'B') {
$entry['encoding'] = 'B';
// should we use vCard 3.0 instead?
// $entry['base64'] = true;
}
$line[2] = self::decode_value($line[2], $enc ? $enc : 'base64');
}
if ($enc != 'B' && empty($entry['base64'])) {
$line[2] = self::vcard_unquote($line[2]);
}
$entry = array_merge($entry, (array) $line[2]);
$data[$field][] = $entry;
}
}
unset($data['VERSION']);
return $data;
}
/**
* Decode a given string with the encoding rule from ENCODING attributes
*
* @param string String to decode
* @param string Encoding type (quoted-printable and base64 supported)
* @return string Decoded 8bit value
*/
private static function decode_value($value, $encoding)
{
switch (strtolower($encoding)) {
case 'quoted-printable':
self::$values_decoded = true;
return quoted_printable_decode($value);
case 'base64':
case 'b':
self::$values_decoded = true;
return base64_decode($value);
default:
return $value;
}
}
/**
* Encodes an entry for storage in our database (vcard 3.0 format, unfolded)
*
* @param array Raw data structure to encode
* @return string vCard encoded string
*/
static function vcard_encode($data)
{
foreach((array)$data as $type => $entries) {
/* valid N has 5 properties */
while ($type == "N" && is_array($entries[0]) && count($entries[0]) < 5)
$entries[0][] = "";
// make sure FN is not empty (required by RFC2426)
if ($type == "FN" && empty($entries))
$entries[0] = $data['EMAIL'][0][0];
foreach((array)$entries as $entry) {
$attr = '';
if (is_array($entry)) {
$value = array();
foreach($entry as $attrname => $attrvalues) {
if (is_int($attrname)) {
if (!empty($entry['base64']) || $entry['encoding'] == 'B') {
$attrvalues = base64_encode($attrvalues);
}
$value[] = $attrvalues;
}
else if (is_bool($attrvalues)) {
if ($attrvalues) {
$attr .= strtoupper(";$attrname"); // true means just tag, not tag=value, as in PHOTO;BASE64:...
}
}
else {
foreach((array)$attrvalues as $attrvalue)
$attr .= strtoupper(";$attrname=") . self::vcard_quote($attrvalue, ',');
}
}
}
else {
$value = $entry;
}
// skip empty entries
if (self::is_empty($value))
continue;
$vcard .= self::vcard_quote($type) . $attr . ':' . self::vcard_quote($value) . self::$eol;
}
}
return 'BEGIN:VCARD' . self::$eol . 'VERSION:3.0' . self::$eol . $vcard . 'END:VCARD';
}
/**
* Join indexed data array to a vcard quoted string
*
* @param array Field data
* @param string Separator
* @return string Joined and quoted string
*/
private static function vcard_quote($s, $sep = ';')
{
if (is_array($s)) {
foreach($s as $part) {
$r[] = self::vcard_quote($part, $sep);
}
return(implode($sep, (array)$r));
}
else {
return strtr($s, array('\\' => '\\\\', "\r" => '', "\n" => '\n', ',' => '\,', ';' => '\;'));
}
}
/**
* Split quoted string
*
* @param string vCard string to split
* @param string Separator char/string
* @return array List with splited values
*/
private static function vcard_unquote($s, $sep = ';')
{
// break string into parts separated by $sep, but leave escaped $sep alone
if (count($parts = explode($sep, strtr($s, array("\\$sep" => "\007")))) > 1) {
foreach($parts as $s) {
$result[] = self::vcard_unquote(strtr($s, array("\007" => "\\$sep")), $sep);
}
return $result;
}
else {
return strtr($s, array("\r" => '', '\\\\' => '\\', '\n' => "\n", '\N' => "\n", '\,' => ',', '\;' => ';', '\:' => ':'));
}
}
/**
* Check if vCard entry is empty: empty string or an array with
* all entries empty.
*
* @param mixed $value Attribute value (string or array)
*
* @return bool True if the value is empty, False otherwise
*/
private static function is_empty($value)
{
foreach ((array)$value as $v) {
if (((string)$v) !== '') {
return false;
}
}
return true;
}
/**
* Extract array values by a filter
*
* @param array Array to filter
* @param keys Array or comma separated list of values to keep
* @param boolean Invert key selection: remove the listed values
* @return array The filtered array
*/
private static function array_filter($arr, $values, $inverse = false)
{
if (!is_array($values))
$values = explode(',', $values);
$result = array();
$keep = array_flip((array)$values);
foreach ($arr as $key => $val)
if ($inverse != isset($keep[strtolower($val)]))
$result[$key] = $val;
return $result;
}
/**
* Returns UNICODE type based on BOM (Byte Order Mark)
*
* @param string Input string to test
* @return string Detected encoding
*/
private static function detect_encoding($string)
{
- if (substr($string, 0, 4) == "\0\0\xFE\xFF") return 'UTF-32BE'; // Big Endian
- if (substr($string, 0, 4) == "\xFF\xFE\0\0") return 'UTF-32LE'; // Little Endian
- if (substr($string, 0, 2) == "\xFE\xFF") return 'UTF-16BE'; // Big Endian
- if (substr($string, 0, 2) == "\xFF\xFE") return 'UTF-16LE'; // Little Endian
- if (substr($string, 0, 3) == "\xEF\xBB\xBF") return 'UTF-8';
-
- // heuristics
- if ($string[0] == "\0" && $string[1] == "\0" && $string[2] == "\0" && $string[3] != "\0") return 'UTF-32BE';
- if ($string[0] != "\0" && $string[1] == "\0" && $string[2] == "\0" && $string[3] == "\0") return 'UTF-32LE';
- if ($string[0] == "\0" && $string[1] != "\0" && $string[2] == "\0" && $string[3] != "\0") return 'UTF-16BE';
- if ($string[0] != "\0" && $string[1] == "\0" && $string[2] != "\0" && $string[3] == "\0") return 'UTF-16LE';
-
- // use mb_detect_encoding()
- $encodings = array('UTF-8', 'ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3',
- 'ISO-8859-4', 'ISO-8859-5', 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8', 'ISO-8859-9',
- 'ISO-8859-10', 'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'ISO-8859-16',
- 'WINDOWS-1252', 'WINDOWS-1251', 'BIG5', 'GB2312');
-
- if (function_exists('mb_detect_encoding') && ($enc = mb_detect_encoding($string, $encodings)))
- return $enc;
-
- // No match, check for UTF-8
- // from http://w3.org/International/questions/qa-forms-utf-8.html
- if (preg_match('/\A(
- [\x09\x0A\x0D\x20-\x7E]
- | [\xC2-\xDF][\x80-\xBF]
- | \xE0[\xA0-\xBF][\x80-\xBF]
- | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}
- | \xED[\x80-\x9F][\x80-\xBF]
- | \xF0[\x90-\xBF][\x80-\xBF]{2}
- | [\xF1-\xF3][\x80-\xBF]{3}
- | \xF4[\x80-\x8F][\x80-\xBF]{2}
- )*\z/xs', substr($string, 0, 2048)))
- return 'UTF-8';
-
- return rcube::get_instance()->config->get('default_charset', 'ISO-8859-1'); # fallback to Latin-1
+ $fallback = rcube::get_instance()->config->get('default_charset', 'ISO-8859-1'); // fallback to Latin-1
+
+ return rcube_charset::detect($string, $fallback);
}
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sat, Mar 1, 8:43 AM (1 d, 16 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
166411
Default Alt Text
(54 KB)
Attached To
Mode
R3 roundcubemail
Attached
Detach File
Event Timeline
Log In to Comment