Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F174687
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
78 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/program/lib/Roundcube/rcube_charset.php b/program/lib/Roundcube/rcube_charset.php
index 5ef78b886..e5c8e2ce8 100644
--- a/program/lib/Roundcube/rcube_charset.php
+++ b/program/lib/Roundcube/rcube_charset.php
@@ -1,910 +1,912 @@
<?php
/**
+-----------------------------------------------------------------------+
| This file is part of the Roundcube Webmail client |
| |
| Copyright (C) The Roundcube Dev Team |
| Copyright (C) Kolab Systems AG |
| Copyright (C) 2000 Edmund Grimley Evans <edmundo@rano.org> |
| |
| Licensed under the GNU General Public License version 3 or |
| any later version with exceptions for skins & plugins. |
| See the README file for a full license statement. |
| |
| PURPOSE: |
| Provide charset conversion functionality |
+-----------------------------------------------------------------------+
| Author: Thomas Bruederli <roundcube@gmail.com> |
| Author: Aleksander Machniak <alec@alec.pl> |
| Author: Edmund Grimley Evans <edmundo@rano.org> |
+-----------------------------------------------------------------------+
*/
/**
* Character sets conversion functionality
*
* @package Framework
* @subpackage Core
*/
class rcube_charset
{
// Aliases: some of them from HTML5 spec.
static public $aliases = array(
'USASCII' => 'WINDOWS-1252',
'ANSIX31101983' => 'WINDOWS-1252',
'ANSIX341968' => 'WINDOWS-1252',
'UNKNOWN8BIT' => 'ISO-8859-15',
'UNKNOWN' => 'ISO-8859-15',
'USERDEFINED' => 'ISO-8859-15',
'KSC56011987' => 'EUC-KR',
'GB2312' => 'GBK',
'GB231280' => 'GBK',
'UNICODE' => 'UTF-8',
'UTF7IMAP' => 'UTF7-IMAP',
'TIS620' => 'WINDOWS-874',
'ISO88599' => 'WINDOWS-1254',
'ISO885911' => 'WINDOWS-874',
'MACROMAN' => 'MACINTOSH',
'77' => 'MAC',
'128' => 'SHIFT-JIS',
'129' => 'CP949',
'130' => 'CP1361',
'134' => 'GBK',
'136' => 'BIG5',
'161' => 'WINDOWS-1253',
'162' => 'WINDOWS-1254',
'163' => 'WINDOWS-1258',
'177' => 'WINDOWS-1255',
'178' => 'WINDOWS-1256',
'186' => 'WINDOWS-1257',
'204' => 'WINDOWS-1251',
'222' => 'WINDOWS-874',
'238' => 'WINDOWS-1250',
'MS950' => 'CP950',
'WINDOWS949' => 'UHC',
);
/**
* Windows codepages
*
* @var array
*/
static public $windows_codepages = array(
37 => 'IBM037', // IBM EBCDIC US-Canada
437 => 'IBM437', // OEM United States
500 => 'IBM500', // IBM EBCDIC International
708 => 'ASMO-708', // Arabic (ASMO 708)
720 => 'DOS-720', // Arabic (Transparent ASMO); Arabic (DOS)
737 => 'IBM737', // OEM Greek (formerly 437G); Greek (DOS)
775 => 'IBM775', // OEM Baltic; Baltic (DOS)
850 => 'IBM850', // OEM Multilingual Latin 1; Western European (DOS)
852 => 'IBM852', // OEM Latin 2; Central European (DOS)
855 => 'IBM855', // OEM Cyrillic (primarily Russian)
857 => 'IBM857', // OEM Turkish; Turkish (DOS)
858 => 'IBM00858', // OEM Multilingual Latin 1 + Euro symbol
860 => 'IBM860', // OEM Portuguese; Portuguese (DOS)
861 => 'IBM861', // OEM Icelandic; Icelandic (DOS)
862 => 'DOS-862', // OEM Hebrew; Hebrew (DOS)
863 => 'IBM863', // OEM French Canadian; French Canadian (DOS)
864 => 'IBM864', // OEM Arabic; Arabic (864)
865 => 'IBM865', // OEM Nordic; Nordic (DOS)
866 => 'cp866', // OEM Russian; Cyrillic (DOS)
869 => 'IBM869', // OEM Modern Greek; Greek, Modern (DOS)
870 => 'IBM870', // IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2
874 => 'windows-874', // ANSI/OEM Thai (ISO 8859-11); Thai (Windows)
875 => 'cp875', // IBM EBCDIC Greek Modern
932 => 'shift_jis', // ANSI/OEM Japanese; Japanese (Shift-JIS)
936 => 'gb2312', // ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312)
950 => 'big5', // ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)
1026 => 'IBM1026', // IBM EBCDIC Turkish (Latin 5)
1047 => 'IBM01047', // IBM EBCDIC Latin 1/Open System
1140 => 'IBM01140', // IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro)
1141 => 'IBM01141', // IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro)
1142 => 'IBM01142', // IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro)
1143 => 'IBM01143', // IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro)
1144 => 'IBM01144', // IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro)
1145 => 'IBM01145', // IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro)
1146 => 'IBM01146', // IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro)
1147 => 'IBM01147', // IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro)
1148 => 'IBM01148', // IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro)
1149 => 'IBM01149', // IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro)
1200 => 'UTF-16', // Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications
1201 => 'UTF-16BE', // Unicode UTF-16, big endian byte order; available only to managed applications
1250 => 'windows-1250', // ANSI Central European; Central European (Windows)
1251 => 'windows-1251', // ANSI Cyrillic; Cyrillic (Windows)
1252 => 'windows-1252', // ANSI Latin 1; Western European (Windows)
1253 => 'windows-1253', // ANSI Greek; Greek (Windows)
1254 => 'windows-1254', // ANSI Turkish; Turkish (Windows)
1255 => 'windows-1255', // ANSI Hebrew; Hebrew (Windows)
1256 => 'windows-1256', // ANSI Arabic; Arabic (Windows)
1257 => 'windows-1257', // ANSI Baltic; Baltic (Windows)
1258 => 'windows-1258', // ANSI/OEM Vietnamese; Vietnamese (Windows)
10000 => 'macintosh', // MAC Roman; Western European (Mac)
12000 => 'UTF-32', // Unicode UTF-32, little endian byte order; available only to managed applications
12001 => 'UTF-32BE', // Unicode UTF-32, big endian byte order; available only to managed applications
20127 => 'US-ASCII', // US-ASCII (7-bit)
20273 => 'IBM273', // IBM EBCDIC Germany
20277 => 'IBM277', // IBM EBCDIC Denmark-Norway
20278 => 'IBM278', // IBM EBCDIC Finland-Sweden
20280 => 'IBM280', // IBM EBCDIC Italy
20284 => 'IBM284', // IBM EBCDIC Latin America-Spain
20285 => 'IBM285', // IBM EBCDIC United Kingdom
20290 => 'IBM290', // IBM EBCDIC Japanese Katakana Extended
20297 => 'IBM297', // IBM EBCDIC France
20420 => 'IBM420', // IBM EBCDIC Arabic
20423 => 'IBM423', // IBM EBCDIC Greek
20424 => 'IBM424', // IBM EBCDIC Hebrew
20838 => 'IBM-Thai', // IBM EBCDIC Thai
20866 => 'koi8-r', // Russian (KOI8-R); Cyrillic (KOI8-R)
20871 => 'IBM871', // IBM EBCDIC Icelandic
20880 => 'IBM880', // IBM EBCDIC Cyrillic Russian
20905 => 'IBM905', // IBM EBCDIC Turkish
20924 => 'IBM00924', // IBM EBCDIC Latin 1/Open System (1047 + Euro symbol)
20932 => 'EUC-JP', // Japanese (JIS 0208-1990 and 0212-1990)
20936 => 'cp20936', // Simplified Chinese (GB2312); Chinese Simplified (GB2312-80)
20949 => 'cp20949', // Korean Wansung
21025 => 'cp1025', // IBM EBCDIC Cyrillic Serbian-Bulgarian
21866 => 'koi8-u', // Ukrainian (KOI8-U); Cyrillic (KOI8-U)
28591 => 'iso-8859-1', // ISO 8859-1 Latin 1; Western European (ISO)
28592 => 'iso-8859-2', // ISO 8859-2 Central European; Central European (ISO)
28593 => 'iso-8859-3', // ISO 8859-3 Latin 3
28594 => 'iso-8859-4', // ISO 8859-4 Baltic
28595 => 'iso-8859-5', // ISO 8859-5 Cyrillic
28596 => 'iso-8859-6', // ISO 8859-6 Arabic
28597 => 'iso-8859-7', // ISO 8859-7 Greek
28598 => 'iso-8859-8', // ISO 8859-8 Hebrew; Hebrew (ISO-Visual)
28599 => 'iso-8859-9', // ISO 8859-9 Turkish
28603 => 'iso-8859-13', // ISO 8859-13 Estonian
28605 => 'iso-8859-15', // ISO 8859-15 Latin 9
38598 => 'iso-8859-8-i', // ISO 8859-8 Hebrew; Hebrew (ISO-Logical)
50220 => 'iso-2022-jp', // ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
50221 => 'csISO2022JP', // ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana)
50222 => 'iso-2022-jp', // ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI)
50225 => 'iso-2022-kr', // ISO 2022 Korean
51932 => 'EUC-JP', // EUC Japanese
51936 => 'EUC-CN', // EUC Simplified Chinese; Chinese Simplified (EUC)
51949 => 'EUC-KR', // EUC Korean
52936 => 'hz-gb-2312', // HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ)
54936 => 'GB18030', // Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030)
65000 => 'UTF-7',
65001 => 'UTF-8',
);
+ static public $mb_aliases = array(
+ 'WINDOWS-1257' => 'ISO-8859-13',
+ 'US-ASCII' => 'ASCII',
+ 'ISO-2022-JP' => 'ISO-2022-JP-MS',
+ );
+
/**
* Catch an error and throw an exception.
*
* @param int $errno Level of the error
* @param string $errstr Error message
*/
public static function error_handler($errno, $errstr)
{
throw new ErrorException($errstr, 0, $errno);
}
/**
* Parse and validate charset name string.
* Sometimes charset string is malformed, there are also charset aliases,
* but we need strict names for charset conversion (specially utf8 class)
*
* @param string $input Input charset name
*
* @return string The validated charset name
*/
public static function parse_charset($input)
{
static $charsets = array();
$charset = strtoupper($input);
if (isset($charsets[$input])) {
return $charsets[$input];
}
$charset = preg_replace(array(
'/^[^0-9A-Z]+/', // e.g. _ISO-8859-JP$SIO
'/\$.*$/', // e.g. _ISO-8859-JP$SIO
'/UNICODE-1-1-*/', // RFC1641/1642
'/^X-/', // X- prefix (e.g. X-ROMAN8 => ROMAN8)
'/\*.*$/' // lang code according to RFC 2231.5
), '', $charset);
if ($charset == 'BINARY') {
return $charsets[$input] = null;
}
// allow A-Z and 0-9 only
$str = preg_replace('/[^A-Z0-9]/', '', $charset);
if (isset(self::$aliases[$str])) {
$result = self::$aliases[$str];
}
// UTF
else if (preg_match('/U[A-Z][A-Z](7|8|16|32)(BE|LE)*/', $str, $m)) {
$result = 'UTF-' . $m[1] . $m[2];
}
// ISO-8859
else if (preg_match('/ISO8859([0-9]{0,2})/', $str, $m)) {
$iso = 'ISO-8859-' . ($m[1] ?: 1);
// some clients sends windows-1252 text as latin1,
// it is safe to use windows-1252 for all latin1
$result = $iso == 'ISO-8859-1' ? 'WINDOWS-1252' : $iso;
}
// handle broken charset names e.g. WINDOWS-1250HTTP-EQUIVCONTENT-TYPE
else if (preg_match('/(WIN|WINDOWS)([0-9]+)/', $str, $m)) {
$result = 'WINDOWS-' . $m[2];
}
// LATIN
else if (preg_match('/LATIN(.*)/', $str, $m)) {
$aliases = array('2' => 2, '3' => 3, '4' => 4, '5' => 9, '6' => 10,
'7' => 13, '8' => 14, '9' => 15, '10' => 16,
'ARABIC' => 6, 'CYRILLIC' => 5, 'GREEK' => 7, 'GREEK1' => 7, 'HEBREW' => 8
);
// some clients sends windows-1252 text as latin1,
// it is safe to use windows-1252 for all latin1
if ($m[1] == 1) {
$result = 'WINDOWS-1252';
}
// if iconv is not supported we need ISO labels, it's also safe for iconv
else if (!empty($aliases[$m[1]])) {
$result = 'ISO-8859-'.$aliases[$m[1]];
}
// iconv requires conversion of e.g. LATIN-1 to LATIN1
else {
$result = $str;
}
}
else {
$result = $charset;
}
$charsets[$input] = $result;
return $result;
}
/**
* Convert a string from one charset to another.
* Uses mbstring and iconv functions if possible
*
* @param string $str Input string
* @param string $from Suspected charset of the input string
* @param string $to Target charset to convert to; defaults to RCUBE_CHARSET
*
* @return string Converted string
*/
public static function convert($str, $from, $to = null)
{
static $iconv_options = null;
static $mbstring_sc = null;
$to = empty($to) ? RCUBE_CHARSET : strtoupper($to);
$from = self::parse_charset($from);
// It is a common case when UTF-16 charset is used with US-ASCII content (#1488654)
// In that case we can just skip the conversion (use UTF-8)
if ($from == 'UTF-16' && !preg_match('/[^\x00-\x7F]/', $str)) {
$from = 'UTF-8';
}
if ($from == $to || empty($str) || empty($from)) {
return $str;
}
if ($iconv_options === null) {
if (function_exists('iconv')) {
// ignore characters not available in output charset
$iconv_options = '//IGNORE';
if (iconv('', $iconv_options, '') === false) {
// iconv implementation does not support options
$iconv_options = '';
}
}
else {
$iconv_options = false;
}
}
// convert charset using iconv module
if ($iconv_options !== false && $from != 'UTF7-IMAP' && $to != 'UTF7-IMAP'
&& $from !== 'ISO-2022-JP' && $to !== 'ISO-2022-JP'
) {
// throw an exception if iconv reports an illegal character in input
// it means that input string has been truncated
set_error_handler(array('rcube_charset', 'error_handler'), E_NOTICE);
try {
$out = iconv($from, $to . $iconv_options, $str);
}
catch (ErrorException $e) {
$out = false;
}
restore_error_handler();
if ($out !== false) {
return $out;
}
}
if ($mbstring_sc === null) {
$mbstring_sc = extension_loaded('mbstring') ? mb_substitute_character() : false;
}
// convert charset using mbstring module
if ($mbstring_sc !== false) {
- $aliases = array(
- 'WINDOWS-1257' => 'ISO-8859-13',
- 'US-ASCII' => 'ASCII',
- 'ISO-2022-JP' => 'ISO-2022-JP-MS',
- );
+ $aliases = self::$mb_aliases;
$mb_from = $aliases[$from] ?: $from;
$mb_to = $aliases[$to] ?: $to;
// Do the same as //IGNORE with iconv
mb_substitute_character('none');
// throw an exception if mbstring reports an illegal character in input
// using mb_check_encoding() is much slower
set_error_handler(array('rcube_charset', 'error_handler'), E_WARNING);
try {
$out = mb_convert_encoding($str, $mb_to, $mb_from);
}
catch (ErrorException $e) {
$out = false;
}
restore_error_handler();
mb_substitute_character($mbstring_sc);
if ($out !== false) {
return $out;
}
}
// convert charset using bundled classes/functions
if ($to == 'UTF-8') {
if ($from == 'UTF7-IMAP') {
if ($out = self::utf7imap_to_utf8($str)) {
return $out;
}
}
else if ($from == 'UTF-7') {
if ($out = self::utf7_to_utf8($str)) {
return $out;
}
}
}
// encode string for output
if ($from == 'UTF-8') {
// @TODO: we need a function for UTF-7 (RFC2152) conversion
if ($to == 'UTF7-IMAP' || $to == 'UTF-7') {
if ($out = self::utf8_to_utf7imap($str)) {
return $out;
}
}
}
if (!isset($out)) {
trigger_error("No suitable function found for '$from' to '$to' conversion");
}
// return original string
return $str;
}
/**
* Converts string from standard UTF-7 (RFC 2152) to UTF-8.
*
* @param string $str Input string (UTF-7)
*
* @return string Converted string (UTF-8)
*/
public static function utf7_to_utf8($str)
{
$Index_64 = array(
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0, 0,0,0,1, 0,0,0,0,
1,1,1,1, 1,1,1,1, 1,1,0,0, 0,0,0,0,
0,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,0, 0,0,0,0,
0,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,0, 0,0,0,0,
);
$u7len = strlen($str);
$str = strval($str);
$res = '';
for ($i=0; $u7len > 0; $i++, $u7len--) {
$u7 = $str[$i];
if ($u7 == '+') {
$i++;
$u7len--;
$ch = '';
for (; $u7len > 0; $i++, $u7len--) {
$u7 = $str[$i];
if (!$Index_64[ord($u7)]) {
break;
}
$ch .= $u7;
}
if ($ch == '') {
if ($u7 == '-') {
$res .= '+';
}
continue;
}
$res .= self::utf16_to_utf8(base64_decode($ch));
}
else {
$res .= $u7;
}
}
return $res;
}
/**
* Converts string from UTF-16 to UTF-8 (helper for utf-7 to utf-8 conversion)
*
* @param string $str Input string
*
* @return string The converted string
*/
public static function utf16_to_utf8($str)
{
$len = strlen($str);
$dec = '';
for ($i = 0; $i < $len; $i += 2) {
$c = ord($str[$i]) << 8 | ord($str[$i + 1]);
if ($c >= 0x0001 && $c <= 0x007F) {
$dec .= chr($c);
}
else if ($c > 0x07FF) {
$dec .= chr(0xE0 | (($c >> 12) & 0x0F));
$dec .= chr(0x80 | (($c >> 6) & 0x3F));
$dec .= chr(0x80 | (($c >> 0) & 0x3F));
}
else {
$dec .= chr(0xC0 | (($c >> 6) & 0x1F));
$dec .= chr(0x80 | (($c >> 0) & 0x3F));
}
}
return $dec;
}
/**
* Convert the data ($str) from RFC 2060's UTF-7 to UTF-8.
* If input data is invalid, return the original input string.
* RFC 2060 obviously intends the encoding to be unique (see
* point 5 in section 5.1.3), so we reject any non-canonical
* form, such as &ACY- (instead of &-) or &AMA-&AMA- (instead
* of &AMAAwA-).
*
* Translated from C to PHP by Thomas Bruederli <roundcube@gmail.com>
*
* @param string $str Input string (UTF7-IMAP)
*
* @return string Output string (UTF-8)
*/
public static function utf7imap_to_utf8($str)
{
$Index_64 = array(
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, 63,-1,-1,-1,
52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
);
$u7len = strlen($str);
$str = strval($str);
$p = '';
$err = '';
for ($i=0; $u7len > 0; $i++, $u7len--) {
$u7 = $str[$i];
if ($u7 == '&') {
$i++;
$u7len--;
$u7 = $str[$i];
if ($u7len && $u7 == '-') {
$p .= '&';
continue;
}
$ch = 0;
$k = 10;
for (; $u7len > 0; $i++, $u7len--) {
$u7 = $str[$i];
if ((ord($u7) & 0x80) || ($b = $Index_64[ord($u7)]) == -1) {
break;
}
if ($k > 0) {
$ch |= $b << $k;
$k -= 6;
}
else {
$ch |= $b >> (-$k);
if ($ch < 0x80) {
// Printable US-ASCII
if (0x20 <= $ch && $ch < 0x7f) {
return $err;
}
$p .= chr($ch);
}
else if ($ch < 0x800) {
$p .= chr(0xc0 | ($ch >> 6));
$p .= chr(0x80 | ($ch & 0x3f));
}
else {
$p .= chr(0xe0 | ($ch >> 12));
$p .= chr(0x80 | (($ch >> 6) & 0x3f));
$p .= chr(0x80 | ($ch & 0x3f));
}
$ch = ($b << (16 + $k)) & 0xffff;
$k += 10;
}
}
// Non-zero or too many extra bits
if ($ch || $k < 6) {
return $err;
}
// BASE64 not properly terminated
if (!$u7len || $u7 != '-') {
return $err;
}
// Adjacent BASE64 sections
if ($u7len > 2 && $str[$i+1] == '&' && $str[$i+2] != '-') {
return $err;
}
}
// Not printable US-ASCII
else if (ord($u7) < 0x20 || ord($u7) >= 0x7f) {
return $err;
}
else {
$p .= $u7;
}
}
return $p;
}
/**
* Convert the data ($str) from UTF-8 to RFC 2060's UTF-7.
* Unicode characters above U+FFFF are replaced by U+FFFE.
* If input data is invalid, return an empty string.
*
* Translated from C to PHP by Thomas Bruederli <roundcube@gmail.com>
*
* @param string $str Input string (UTF-8)
*
* @return string Output string (UTF7-IMAP)
*/
public static function utf8_to_utf7imap($str)
{
$B64Chars = array(
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd',
'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', '+', ','
);
$u8len = strlen($str);
$base64 = 0;
$i = 0;
$p = '';
$err = '';
while ($u8len) {
$u8 = $str[$i];
$c = ord($u8);
if ($c < 0x80) {
$ch = $c;
$n = 0;
}
else if ($c < 0xc2) {
return $err;
}
else if ($c < 0xe0) {
$ch = $c & 0x1f;
$n = 1;
}
else if ($c < 0xf0) {
$ch = $c & 0x0f;
$n = 2;
}
else if ($c < 0xf8) {
$ch = $c & 0x07;
$n = 3;
}
else if ($c < 0xfc) {
$ch = $c & 0x03;
$n = 4;
}
else if ($c < 0xfe) {
$ch = $c & 0x01;
$n = 5;
}
else {
return $err;
}
$i++;
$u8len--;
if ($n > $u8len) {
return $err;
}
for ($j=0; $j < $n; $j++) {
$o = ord($str[$i+$j]);
if (($o & 0xc0) != 0x80) {
return $err;
}
$ch = ($ch << 6) | ($o & 0x3f);
}
if ($n > 1 && !($ch >> ($n * 5 + 1))) {
return $err;
}
$i += $n;
$u8len -= $n;
if ($ch < 0x20 || $ch >= 0x7f) {
if (!$base64) {
$p .= '&';
$base64 = 1;
$b = 0;
$k = 10;
}
if ($ch & ~0xffff) {
$ch = 0xfffe;
}
$p .= $B64Chars[($b | $ch >> $k)];
$k -= 6;
for (; $k >= 0; $k -= 6) {
$p .= $B64Chars[(($ch >> $k) & 0x3f)];
}
$b = ($ch << (-$k)) & 0x3f;
$k += 16;
}
else {
if ($base64) {
if ($k > 10) {
$p .= $B64Chars[$b];
}
$p .= '-';
$base64 = 0;
}
$p .= chr($ch);
if (chr($ch) == '&') {
$p .= '-';
}
}
}
if ($base64) {
if ($k > 10) {
$p .= $B64Chars[$b];
}
$p .= '-';
}
return $p;
}
/**
* A method to guess character set of a string.
*
* @param string $string String
* @param string $failover Default result for failover
* @param string $language User language
*
* @return string Charset name
*/
public static function detect($string, $failover = null, $language = null)
{
if (substr($string, 0, 4) == "\0\0\xFE\xFF") return 'UTF-32BE'; // Big Endian
if (substr($string, 0, 4) == "\xFF\xFE\0\0") return 'UTF-32LE'; // Little Endian
if (substr($string, 0, 2) == "\xFE\xFF") return 'UTF-16BE'; // Big Endian
if (substr($string, 0, 2) == "\xFF\xFE") return 'UTF-16LE'; // Little Endian
if (substr($string, 0, 3) == "\xEF\xBB\xBF") return 'UTF-8';
// heuristics
if ($string[0] == "\0" && $string[1] == "\0" && $string[2] == "\0" && $string[3] != "\0") return 'UTF-32BE';
if ($string[0] != "\0" && $string[1] == "\0" && $string[2] == "\0" && $string[3] == "\0") return 'UTF-32LE';
if ($string[0] == "\0" && $string[1] != "\0" && $string[2] == "\0" && $string[3] != "\0") return 'UTF-16BE';
if ($string[0] != "\0" && $string[1] == "\0" && $string[2] != "\0" && $string[3] == "\0") return 'UTF-16LE';
if (empty($language)) {
$rcube = rcube::get_instance();
$language = $rcube->get_user_language();
}
// Prioritize charsets according to current language (#1485669)
switch ($language) {
case 'ja_JP':
$prio = array('ISO-2022-JP', 'JIS', 'UTF-8', 'EUC-JP', 'eucJP-win', 'SJIS', 'SJIS-win');
break;
case 'zh_CN':
case 'zh_TW':
$prio = array('UTF-8', 'BIG-5', 'GB2312', 'EUC-TW');
break;
case 'ko_KR':
$prio = array('UTF-8', 'EUC-KR', 'ISO-2022-KR');
break;
case 'ru_RU':
$prio = array('UTF-8', 'WINDOWS-1251', 'KOI8-R');
break;
case 'tr_TR':
$prio = array('UTF-8', 'ISO-8859-9', 'WINDOWS-1254');
break;
}
// mb_detect_encoding() is not reliable for some charsets (#1490135)
// use mb_check_encoding() to make charset priority lists really working
if ($prio && function_exists('mb_check_encoding')) {
foreach ($prio as $encoding) {
if (mb_check_encoding($string, $encoding)) {
return $encoding;
}
}
}
if (function_exists('mb_detect_encoding')) {
if (!$prio) {
$prio = array('UTF-8', 'SJIS', 'GB2312',
'ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4',
'ISO-8859-5', 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8', 'ISO-8859-9',
'ISO-8859-10', 'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'ISO-8859-16',
'WINDOWS-1252', 'WINDOWS-1251', 'EUC-JP', 'EUC-TW', 'KOI8-R', 'BIG-5',
'ISO-2022-KR', 'ISO-2022-JP',
);
}
$encodings = array_unique(array_merge($prio, mb_list_encodings()));
if ($encoding = mb_detect_encoding($string, $encodings)) {
return $encoding;
}
}
// No match, check for UTF-8
// from http://w3.org/International/questions/qa-forms-utf-8.html
if (preg_match('/\A(
[\x09\x0A\x0D\x20-\x7E]
| [\xC2-\xDF][\x80-\xBF]
| \xE0[\xA0-\xBF][\x80-\xBF]
| [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}
| \xED[\x80-\x9F][\x80-\xBF]
| \xF0[\x90-\xBF][\x80-\xBF]{2}
| [\xF1-\xF3][\x80-\xBF]{3}
| \xF4[\x80-\x8F][\x80-\xBF]{2}
)*\z/xs', substr($string, 0, 2048))
) {
return 'UTF-8';
}
return $failover;
}
/**
* Removes non-unicode characters from input.
*
* @param mixed $input String or array.
*
* @return mixed String or array
*/
public static function clean($input)
{
// handle input of type array
if (is_array($input)) {
foreach ($input as $idx => $val) {
$input[$idx] = self::clean($val);
}
return $input;
}
if (!is_string($input) || $input == '') {
return $input;
}
// iconv/mbstring are much faster (especially with long strings)
if (function_exists('mb_convert_encoding')) {
$msch = mb_substitute_character();
mb_substitute_character('none');
$res = mb_convert_encoding($input, 'UTF-8', 'UTF-8');
mb_substitute_character($msch);
if ($res !== false) {
return $res;
}
}
if (function_exists('iconv')) {
if (($res = @iconv('UTF-8', 'UTF-8//IGNORE', $input)) !== false) {
return $res;
}
}
$seq = '';
$out = '';
$regexp = '/^('.
// '[\x00-\x7F]'. // UTF8-1
'|[\xC2-\xDF][\x80-\xBF]'. // UTF8-2
'|\xE0[\xA0-\xBF][\x80-\xBF]'. // UTF8-3
'|[\xE1-\xEC][\x80-\xBF][\x80-\xBF]'. // UTF8-3
'|\xED[\x80-\x9F][\x80-\xBF]'. // UTF8-3
'|[\xEE-\xEF][\x80-\xBF][\x80-\xBF]'. // UTF8-3
'|\xF0[\x90-\xBF][\x80-\xBF][\x80-\xBF]'. // UTF8-4
'|[\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]'.// UTF8-4
'|\xF4[\x80-\x8F][\x80-\xBF][\x80-\xBF]'. // UTF8-4
')$/';
for ($i = 0, $len = strlen($input); $i < $len; $i++) {
$chr = $input[$i];
$ord = ord($chr);
// 1-byte character
if ($ord <= 0x7F) {
if ($seq !== '') {
$out .= preg_match($regexp, $seq) ? $seq : '';
$seq = '';
}
$out .= $chr;
}
// first byte of multibyte sequence
else if ($ord >= 0xC0) {
if ($seq !== '') {
$out .= preg_match($regexp, $seq) ? $seq : '';
$seq = '';
}
$seq = $chr;
}
// next byte of multibyte sequence
else if ($seq !== '') {
$seq .= $chr;
}
}
if ($seq !== '') {
$out .= preg_match($regexp, $seq) ? $seq : '';
}
return $out;
}
}
diff --git a/program/lib/Roundcube/rcube_mime.php b/program/lib/Roundcube/rcube_mime.php
index d3848d507..b2aaba7bb 100644
--- a/program/lib/Roundcube/rcube_mime.php
+++ b/program/lib/Roundcube/rcube_mime.php
@@ -1,896 +1,899 @@
<?php
/**
+-----------------------------------------------------------------------+
| This file is part of the Roundcube Webmail client |
| |
| Copyright (C) The Roundcube Dev Team |
| Copyright (C) Kolab Systems AG |
| |
| Licensed under the GNU General Public License version 3 or |
| any later version with exceptions for skins & plugins. |
| See the README file for a full license statement. |
| |
| PURPOSE: |
| MIME message parsing utilities |
+-----------------------------------------------------------------------+
| Author: Thomas Bruederli <roundcube@gmail.com> |
| Author: Aleksander Machniak <alec@alec.pl> |
+-----------------------------------------------------------------------+
*/
/**
* Class for parsing MIME messages
*
* @package Framework
* @subpackage Storage
*/
class rcube_mime
{
private static $default_charset;
/**
* Object constructor.
*/
function __construct($default_charset = null)
{
self::$default_charset = $default_charset;
}
/**
* Returns message/object character set name
*
* @return string Character set name
*/
public static function get_charset()
{
if (self::$default_charset) {
return self::$default_charset;
}
if ($charset = rcube::get_instance()->config->get('default_charset')) {
return $charset;
}
return RCUBE_CHARSET;
}
/**
* Parse the given raw message source and return a structure
* of rcube_message_part objects.
*
* It makes use of the rcube_mime_decode library
*
* @param string $raw_body The message source
*
* @return object rcube_message_part The message structure
*/
public static function parse_message($raw_body)
{
$conf = array(
'include_bodies' => true,
'decode_bodies' => true,
'decode_headers' => false,
'default_charset' => self::get_charset(),
);
$mime = new rcube_mime_decode($conf);
return $mime->decode($raw_body);
}
/**
* Split an address list into a structured array list
*
* @param string|array $input Input string (or list of strings)
* @param int $max List only this number of addresses
* @param boolean $decode Decode address strings
* @param string $fallback Fallback charset if none specified
* @param boolean $addronly Return flat array with e-mail addresses only
*
* @return array Indexed list of addresses
*/
static function decode_address_list($input, $max = null, $decode = true, $fallback = null, $addronly = false)
{
// A common case when the same header is used many times in a mail message
if (is_array($input)) {
$input = implode(', ', $input);
}
$a = self::parse_address_list($input, $decode, $fallback);
$out = array();
$j = 0;
// Special chars as defined by RFC 822 need to in quoted string (or escaped).
$special_chars = '[\(\)\<\>\\\.\[\]@,;:"]';
if (!is_array($a)) {
return $out;
}
foreach ($a as $val) {
$j++;
$address = trim($val['address']);
if ($addronly) {
$out[$j] = $address;
}
else {
$name = trim($val['name']);
if ($name && $address && $name != $address)
$string = sprintf('%s <%s>', preg_match("/$special_chars/", $name) ? '"'.addcslashes($name, '"').'"' : $name, $address);
else if ($address)
$string = $address;
else if ($name)
$string = $name;
$out[$j] = array('name' => $name, 'mailto' => $address, 'string' => $string);
}
if ($max && $j==$max)
break;
}
return $out;
}
/**
* Decode a message header value
*
* @param string $input Header value
* @param string $fallback Fallback charset if none specified
*
* @return string Decoded string
*/
public static function decode_header($input, $fallback = null)
{
$str = self::decode_mime_string((string)$input, $fallback);
return $str;
}
/**
* Decode a mime-encoded string to internal charset
*
* @param string $input Header value
* @param string $fallback Fallback charset if none specified
*
* @return string Decoded string
*/
public static function decode_mime_string($input, $fallback = null)
{
$default_charset = $fallback ?: self::get_charset();
// rfc: all line breaks or other characters not found
// in the Base64 Alphabet must be ignored by decoding software
// delete all blanks between MIME-lines, differently we can
// receive unnecessary blanks and broken utf-8 symbols
$input = preg_replace("/\?=\s+=\?/", '?==?', $input);
// encoded-word regexp
$re = '/=\?([^?]+)\?([BbQq])\?([^\n]*?)\?=/';
// Find all RFC2047's encoded words
if (preg_match_all($re, $input, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER)) {
// Initialize variables
$tmp = array();
$out = '';
$start = 0;
foreach ($matches as $idx => $m) {
$pos = $m[0][1];
$charset = $m[1][0];
$encoding = $m[2][0];
$text = $m[3][0];
$length = strlen($m[0][0]);
// Append everything that is before the text to be decoded
if ($start != $pos) {
$substr = substr($input, $start, $pos-$start);
$out .= rcube_charset::convert($substr, $default_charset);
$start = $pos;
}
$start += $length;
// Per RFC2047, each string part "MUST represent an integral number
// of characters . A multi-octet character may not be split across
// adjacent encoded-words." However, some mailers break this, so we
// try to handle characters spanned across parts anyway by iterating
// through and aggregating sequential encoded parts with the same
// character set and encoding, then perform the decoding on the
// aggregation as a whole.
$tmp[] = $text;
if ($next_match = $matches[$idx+1]) {
if ($next_match[0][1] == $start
&& $next_match[1][0] == $charset
&& $next_match[2][0] == $encoding
) {
continue;
}
}
$count = count($tmp);
$text = '';
// Decode and join encoded-word's chunks
if ($encoding == 'B' || $encoding == 'b') {
$rest = '';
// base64 must be decoded a segment at a time.
// However, there are broken implementations that continue
// in the following word, we'll handle that (#6048)
for ($i=0; $i<$count; $i++) {
$chunk = $rest . $tmp[$i];
$length = strlen($chunk);
if ($length % 4) {
$length = floor($length / 4) * 4;
$rest = substr($chunk, $length);
$chunk = substr($chunk, 0, $length);
}
$text .= base64_decode($chunk);
}
}
else { //if ($encoding == 'Q' || $encoding == 'q') {
// quoted printable can be combined and processed at once
for ($i=0; $i<$count; $i++)
$text .= $tmp[$i];
$text = str_replace('_', ' ', $text);
$text = quoted_printable_decode($text);
}
$out .= rcube_charset::convert($text, $charset);
$tmp = array();
}
// add the last part of the input string
if ($start != strlen($input)) {
$out .= rcube_charset::convert(substr($input, $start), $default_charset);
}
// return the results
return $out;
}
// no encoding information, use fallback
return rcube_charset::convert($input, $default_charset);
}
/**
* Decode a mime part
*
* @param string $input Input string
* @param string $encoding Part encoding
*
* @return string Decoded string
*/
public static function decode($input, $encoding = '7bit')
{
switch (strtolower($encoding)) {
case 'quoted-printable':
return quoted_printable_decode($input);
case 'base64':
return base64_decode($input);
case 'x-uuencode':
case 'x-uue':
case 'uue':
case 'uuencode':
return convert_uudecode($input);
case '7bit':
default:
return $input;
}
}
/**
* Split RFC822 header string into an associative array
*/
public static function parse_headers($headers)
{
$a_headers = array();
$headers = preg_replace('/\r?\n(\t| )+/', ' ', $headers);
$lines = explode("\n", $headers);
$count = count($lines);
for ($i=0; $i<$count; $i++) {
if ($p = strpos($lines[$i], ': ')) {
$field = strtolower(substr($lines[$i], 0, $p));
$value = trim(substr($lines[$i], $p+1));
if (!empty($value)) {
$a_headers[$field] = $value;
}
}
}
return $a_headers;
}
/**
* E-mail address list parser
*/
private static function parse_address_list($str, $decode = true, $fallback = null)
{
// remove any newlines and carriage returns before
$str = preg_replace('/\r?\n(\s|\t)?/', ' ', $str);
// extract list items, remove comments
$str = self::explode_header_string(',;', $str, true);
$result = array();
// simplified regexp, supporting quoted local part
$email_rx = '(\S+|("\s*(?:[^"\f\n\r\t\v\b\s]+\s*)+"))@\S+';
foreach ($str as $key => $val) {
$name = '';
$address = '';
$val = trim($val);
if (preg_match('/(.*)<('.$email_rx.')>$/', $val, $m)) {
$address = $m[2];
$name = trim($m[1]);
}
else if (preg_match('/^('.$email_rx.')$/', $val, $m)) {
$address = $m[1];
$name = '';
}
// special case (#1489092)
else if (preg_match('/(\s*<MAILER-DAEMON>)$/', $val, $m)) {
$address = 'MAILER-DAEMON';
$name = substr($val, 0, -strlen($m[1]));
}
else if (preg_match('/('.$email_rx.')/', $val, $m)) {
$name = $m[1];
}
else {
$name = $val;
}
// dequote and/or decode name
if ($name) {
if ($name[0] == '"' && $name[strlen($name)-1] == '"') {
$name = substr($name, 1, -1);
$name = stripslashes($name);
}
if ($decode) {
$name = self::decode_header($name, $fallback);
// some clients encode addressee name with quotes around it
if ($name[0] == '"' && $name[strlen($name)-1] == '"') {
$name = substr($name, 1, -1);
}
}
}
if (!$address && $name) {
$address = $name;
$name = '';
}
if ($address) {
$address = self::fix_email($address);
$result[$key] = array('name' => $name, 'address' => $address);
}
}
return $result;
}
/**
* Explodes header (e.g. address-list) string into array of strings
* using specified separator characters with proper handling
* of quoted-strings and comments (RFC2822)
*
* @param string $separator String containing separator characters
* @param string $str Header string
* @param bool $remove_comments Enable to remove comments
*
* @return array Header items
*/
public static function explode_header_string($separator, $str, $remove_comments = false)
{
$length = strlen($str);
$result = array();
$quoted = false;
$comment = 0;
$out = '';
for ($i=0; $i<$length; $i++) {
// we're inside a quoted string
if ($quoted) {
if ($str[$i] == '"') {
$quoted = false;
}
else if ($str[$i] == "\\") {
if ($comment <= 0) {
$out .= "\\";
}
$i++;
}
}
// we are inside a comment string
else if ($comment > 0) {
if ($str[$i] == ')') {
$comment--;
}
else if ($str[$i] == '(') {
$comment++;
}
else if ($str[$i] == "\\") {
$i++;
}
continue;
}
// separator, add to result array
else if (strpos($separator, $str[$i]) !== false) {
if ($out) {
$result[] = $out;
}
$out = '';
continue;
}
// start of quoted string
else if ($str[$i] == '"') {
$quoted = true;
}
// start of comment
else if ($remove_comments && $str[$i] == '(') {
$comment++;
}
if ($comment <= 0) {
$out .= $str[$i];
}
}
if ($out && $comment <= 0) {
$result[] = $out;
}
return $result;
}
/**
* Interpret a format=flowed message body according to RFC 2646
*
* @param string $text Raw body formatted as flowed text
* @param string $mark Mark each flowed line with specified character
* @param boolean $delsp Remove the trailing space of each flowed line
*
* @return string Interpreted text with unwrapped lines and stuffed space removed
*/
public static function unfold_flowed($text, $mark = null, $delsp = false)
{
$text = preg_split('/\r?\n/', $text);
$last = -1;
$q_level = 0;
$marks = array();
foreach ($text as $idx => $line) {
if ($q = strspn($line, '>')) {
// remove quote chars
$line = substr($line, $q);
// remove (optional) space-staffing
if ($line[0] === ' ') $line = substr($line, 1);
// The same paragraph (We join current line with the previous one) when:
// - the same level of quoting
// - previous line was flowed
// - previous line contains more than only one single space (and quote char(s))
if ($q == $q_level
&& isset($text[$last]) && $text[$last][strlen($text[$last])-1] == ' '
&& !preg_match('/^>+ {0,1}$/', $text[$last])
) {
if ($delsp) {
$text[$last] = substr($text[$last], 0, -1);
}
$text[$last] .= $line;
unset($text[$idx]);
if ($mark) {
$marks[$last] = true;
}
}
else {
$last = $idx;
}
}
else {
if ($line == '-- ') {
$last = $idx;
}
else {
// remove space-stuffing
if ($line[0] === ' ') $line = substr($line, 1);
if (isset($text[$last]) && $line && !$q_level
&& $text[$last] != '-- '
&& $text[$last][strlen($text[$last])-1] == ' '
) {
if ($delsp) {
$text[$last] = substr($text[$last], 0, -1);
}
$text[$last] .= $line;
unset($text[$idx]);
if ($mark) {
$marks[$last] = true;
}
}
else {
$text[$idx] = $line;
$last = $idx;
}
}
}
$q_level = $q;
}
if (!empty($marks)) {
foreach (array_keys($marks) as $mk) {
$text[$mk] = $mark . $text[$mk];
}
}
return implode("\r\n", $text);
}
/**
* Wrap the given text to comply with RFC 2646
*
* @param string $text Text to wrap
* @param int $length Length
* @param string $charset Character encoding of $text
*
* @return string Wrapped text
*/
public static function format_flowed($text, $length = 72, $charset=null)
{
$text = preg_split('/\r?\n/', $text);
foreach ($text as $idx => $line) {
if ($line != '-- ') {
if ($level = strspn($line, '>')) {
// remove quote chars
$line = substr($line, $level);
// remove (optional) space-staffing and spaces before the line end
$line = rtrim($line, ' ');
if ($line[0] === ' ') $line = substr($line, 1);
$prefix = str_repeat('>', $level) . ' ';
$line = $prefix . self::wordwrap($line, $length - $level - 2, " \r\n$prefix", false, $charset);
}
else if ($line) {
$line = self::wordwrap(rtrim($line), $length - 2, " \r\n", false, $charset);
// space-stuffing
$line = preg_replace('/(^|\r\n)(From| |>)/', '\\1 \\2', $line);
}
$text[$idx] = $line;
}
}
return implode("\r\n", $text);
}
/**
* Improved wordwrap function with multibyte support.
* The code is based on Zend_Text_MultiByte::wordWrap().
*
* @param string $string Text to wrap
* @param int $width Line width
* @param string $break Line separator
* @param bool $cut Enable to cut word
* @param string $charset Charset of $string
* @param bool $wrap_quoted When enabled quoted lines will not be wrapped
*
* @return string Text
*/
public static function wordwrap($string, $width=75, $break="\n", $cut=false, $charset=null, $wrap_quoted=true)
{
// Note: Never try to use iconv instead of mbstring functions here
// Iconv's substr/strlen are 100x slower (#1489113)
+ if (isset(rcube_charset::$mb_aliases[$charset])) {
+ $charset = rcube_charset::$mb_aliases[$charset];
+ }
if ($charset && $charset != RCUBE_CHARSET) {
mb_internal_encoding($charset);
}
// Convert \r\n to \n, this is our line-separator
$string = str_replace("\r\n", "\n", $string);
$separator = "\n"; // must be 1 character length
$result = array();
while (($stringLength = mb_strlen($string)) > 0) {
$breakPos = mb_strpos($string, $separator, 0);
// quoted line (do not wrap)
if ($wrap_quoted && $string[0] == '>') {
if ($breakPos === $stringLength - 1 || $breakPos === false) {
$subString = $string;
$cutLength = null;
}
else {
$subString = mb_substr($string, 0, $breakPos);
$cutLength = $breakPos + 1;
}
}
// next line found and current line is shorter than the limit
else if ($breakPos !== false && $breakPos < $width) {
if ($breakPos === $stringLength - 1) {
$subString = $string;
$cutLength = null;
}
else {
$subString = mb_substr($string, 0, $breakPos);
$cutLength = $breakPos + 1;
}
}
else {
$subString = mb_substr($string, 0, $width);
// last line
if ($breakPos === false && $subString === $string) {
$cutLength = null;
}
else {
$nextChar = mb_substr($string, $width, 1);
if ($nextChar === ' ' || $nextChar === $separator) {
$afterNextChar = mb_substr($string, $width + 1, 1);
// Note: mb_substr() does never return False
if ($afterNextChar === false || $afterNextChar === '') {
$subString .= $nextChar;
}
$cutLength = mb_strlen($subString) + 1;
}
else {
$spacePos = mb_strrpos($subString, ' ', 0);
if ($spacePos !== false) {
$subString = mb_substr($subString, 0, $spacePos);
$cutLength = $spacePos + 1;
}
else if ($cut === false) {
$spacePos = mb_strpos($string, ' ', 0);
if ($spacePos !== false && ($breakPos === false || $spacePos < $breakPos)) {
$subString = mb_substr($string, 0, $spacePos);
$cutLength = $spacePos + 1;
}
else if ($breakPos === false) {
$subString = $string;
$cutLength = null;
}
else {
$subString = mb_substr($string, 0, $breakPos);
$cutLength = $breakPos + 1;
}
}
else {
$cutLength = $width;
}
}
}
}
$result[] = $subString;
if ($cutLength !== null) {
$string = mb_substr($string, $cutLength, ($stringLength - $cutLength));
}
else {
break;
}
}
if ($charset && $charset != RCUBE_CHARSET) {
mb_internal_encoding(RCUBE_CHARSET);
}
return implode($break, $result);
}
/**
* A method to guess the mime_type of an attachment.
*
* @param string $path Path to the file or file contents
* @param string $name File name (with suffix)
* @param string $failover Mime type supplied for failover
* @param boolean $is_stream Set to True if $path contains file contents
* @param boolean $skip_suffix Set to True if the config/mimetypes.php mappig should be ignored
*
* @return string
* @author Till Klampaeckel <till@php.net>
* @see http://de2.php.net/manual/en/ref.fileinfo.php
* @see http://de2.php.net/mime_content_type
*/
public static function file_content_type($path, $name, $failover = 'application/octet-stream', $is_stream = false, $skip_suffix = false)
{
static $mime_ext = array();
$mime_type = null;
$config = rcube::get_instance()->config;
if (!$skip_suffix && empty($mime_ext)) {
foreach ($config->resolve_paths('mimetypes.php') as $fpath) {
$mime_ext = array_merge($mime_ext, (array) @include($fpath));
}
}
// use file name suffix with hard-coded mime-type map
if (!$skip_suffix && is_array($mime_ext) && $name) {
if ($suffix = substr($name, strrpos($name, '.')+1)) {
$mime_type = $mime_ext[strtolower($suffix)];
}
}
// try fileinfo extension if available
if (!$mime_type && function_exists('finfo_open')) {
$mime_magic = $config->get('mime_magic');
// null as a 2nd argument should be the same as no argument
// this however is not true on all systems/versions
if ($mime_magic) {
$finfo = finfo_open(FILEINFO_MIME, $mime_magic);
}
else {
$finfo = finfo_open(FILEINFO_MIME);
}
if ($finfo) {
$func = $is_stream ? 'finfo_buffer' : 'finfo_file';
$mime_type = $func($finfo, $path, FILEINFO_MIME_TYPE);
finfo_close($finfo);
}
}
// try PHP's mime_content_type
if (!$mime_type && !$is_stream && function_exists('mime_content_type')) {
$mime_type = @mime_content_type($path);
}
// fall back to user-submitted string
if (!$mime_type) {
$mime_type = $failover;
}
return $mime_type;
}
/**
* Get mimetype => file extension mapping
*
* @param string Mime-Type to get extensions for
*
* @return array List of extensions matching the given mimetype or a hash array
* with ext -> mimetype mappings if $mimetype is not given
*/
public static function get_mime_extensions($mimetype = null)
{
static $mime_types, $mime_extensions;
// return cached data
if (is_array($mime_types)) {
return $mimetype ? $mime_types[$mimetype] : $mime_extensions;
}
// load mapping file
$file_paths = array();
if ($mime_types = rcube::get_instance()->config->get('mime_types')) {
$file_paths[] = $mime_types;
}
// try common locations
if (strtoupper(substr(PHP_OS, 0, 3)) == 'WIN') {
$file_paths[] = 'C:/xampp/apache/conf/mime.types.';
}
else {
$file_paths[] = '/etc/mime.types';
$file_paths[] = '/etc/httpd/mime.types';
$file_paths[] = '/etc/httpd2/mime.types';
$file_paths[] = '/etc/apache/mime.types';
$file_paths[] = '/etc/apache2/mime.types';
$file_paths[] = '/etc/nginx/mime.types';
$file_paths[] = '/usr/local/etc/httpd/conf/mime.types';
$file_paths[] = '/usr/local/etc/apache/conf/mime.types';
$file_paths[] = '/usr/local/etc/apache24/mime.types';
}
foreach ($file_paths as $fp) {
if (@is_readable($fp)) {
$lines = file($fp, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
break;
}
}
$mime_types = $mime_extensions = array();
$regex = "/([\w\+\-\.\/]+)\s+([\w\s]+)/i";
foreach ((array)$lines as $line) {
// skip comments or mime types w/o any extensions
if ($line[0] == '#' || !preg_match($regex, $line, $matches))
continue;
$mime = $matches[1];
foreach (explode(' ', $matches[2]) as $ext) {
$ext = trim($ext);
$mime_types[$mime][] = $ext;
$mime_extensions[$ext] = $mime;
}
}
// fallback to some well-known types most important for daily emails
if (empty($mime_types)) {
foreach (rcube::get_instance()->config->resolve_paths('mimetypes.php') as $fpath) {
$mime_extensions = array_merge($mime_extensions, (array) @include($fpath));
}
foreach ($mime_extensions as $ext => $mime) {
$mime_types[$mime][] = $ext;
}
}
// Add some known aliases that aren't included by some mime.types (#1488891)
// the order is important here so standard extensions have higher prio
$aliases = array(
'image/gif' => array('gif'),
'image/png' => array('png'),
'image/x-png' => array('png'),
'image/jpeg' => array('jpg', 'jpeg', 'jpe'),
'image/jpg' => array('jpg', 'jpeg', 'jpe'),
'image/pjpeg' => array('jpg', 'jpeg', 'jpe'),
'image/tiff' => array('tif'),
'image/bmp' => array('bmp'),
'image/x-ms-bmp' => array('bmp'),
'message/rfc822' => array('eml'),
'text/x-mail' => array('eml'),
);
foreach ($aliases as $mime => $exts) {
$mime_types[$mime] = array_unique(array_merge((array) $mime_types[$mime], $exts));
foreach ($exts as $ext) {
if (!isset($mime_extensions[$ext])) {
$mime_extensions[$ext] = $mime;
}
}
}
return $mimetype ? $mime_types[$mimetype] : $mime_extensions;
}
/**
* Detect image type of the given binary data by checking magic numbers.
*
* @param string $data Binary file content
*
* @return string Detected mime-type or jpeg as fallback
*/
public static function image_content_type($data)
{
$type = 'jpeg';
if (preg_match('/^\x89\x50\x4E\x47/', $data)) $type = 'png';
else if (preg_match('/^\x47\x49\x46\x38/', $data)) $type = 'gif';
else if (preg_match('/^\x00\x00\x01\x00/', $data)) $type = 'ico';
// else if (preg_match('/^\xFF\xD8\xFF\xE0/', $data)) $type = 'jpeg';
return 'image/' . $type;
}
/**
* Try to fix invalid email addresses
*/
public static function fix_email($email)
{
$parts = rcube_utils::explode_quoted_string('@', $email);
foreach ($parts as $idx => $part) {
// remove redundant quoting (#1490040)
if ($part[0] == '"' && preg_match('/^"([a-zA-Z0-9._+=-]+)"$/', $part, $m)) {
$parts[$idx] = $m[1];
}
}
return implode('@', $parts);
}
}
diff --git a/tests/Framework/Mime.php b/tests/Framework/Mime.php
index bbab0e64b..9d93b53b4 100644
--- a/tests/Framework/Mime.php
+++ b/tests/Framework/Mime.php
@@ -1,280 +1,284 @@
<?php
/**
* Test class to test rcube_mime class
*
* @package Tests
*/
class Framework_Mime extends PHPUnit_Framework_TestCase
{
/**
* Test decoding of single e-mail address strings
* Uses rcube_mime::decode_address_list()
*/
function test_decode_single_address()
{
$headers = array(
0 => 'test@domain.tld',
1 => '<test@domain.tld>',
2 => 'Test <test@domain.tld>',
3 => 'Test Test <test@domain.tld>',
4 => 'Test Test<test@domain.tld>',
5 => '"Test Test" <test@domain.tld>',
6 => '"Test Test"<test@domain.tld>',
7 => '"Test \\" Test" <test@domain.tld>',
8 => '"Test<Test" <test@domain.tld>',
9 => '=?ISO-8859-1?B?VGVzdAo=?= <test@domain.tld>',
10 => '=?ISO-8859-1?B?VGVzdAo=?=<test@domain.tld>', // #1487068
// comments in address (#1487673)
11 => 'Test (comment) <test@domain.tld>',
12 => '"Test" (comment) <test@domain.tld>',
13 => '"Test (comment)" (comment) <test@domain.tld>',
14 => '(comment) <test@domain.tld>',
15 => 'Test <test@(comment)domain.tld>',
16 => 'Test Test ((comment)) <test@domain.tld>',
17 => 'test@domain.tld (comment)',
18 => '"Test,Test" <test@domain.tld>',
// 1487939
19 => 'Test <"test test"@domain.tld>',
20 => '<"test test"@domain.tld>',
21 => '"test test"@domain.tld',
// invalid (#1489092)
22 => '"John Doe @ SomeBusinessName" <MAILER-DAEMON>',
23 => '=?UTF-8?B?IlRlc3QsVGVzdCI=?= <test@domain.tld>',
// invalid, but we do our best to parse correctly
24 => '"email@test.com" <>',
// valid with redundant quoting (#1490040)
25 => '"user"@"domain.tld"',
);
$results = array(
0 => array(1, '', 'test@domain.tld'),
1 => array(1, '', 'test@domain.tld'),
2 => array(1, 'Test', 'test@domain.tld'),
3 => array(1, 'Test Test', 'test@domain.tld'),
4 => array(1, 'Test Test', 'test@domain.tld'),
5 => array(1, 'Test Test', 'test@domain.tld'),
6 => array(1, 'Test Test', 'test@domain.tld'),
7 => array(1, 'Test " Test', 'test@domain.tld'),
8 => array(1, 'Test<Test', 'test@domain.tld'),
9 => array(1, 'Test', 'test@domain.tld'),
10 => array(1, 'Test', 'test@domain.tld'),
11 => array(1, 'Test', 'test@domain.tld'),
12 => array(1, 'Test', 'test@domain.tld'),
13 => array(1, 'Test (comment)', 'test@domain.tld'),
14 => array(1, '', 'test@domain.tld'),
15 => array(1, 'Test', 'test@domain.tld'),
16 => array(1, 'Test Test', 'test@domain.tld'),
17 => array(1, '', 'test@domain.tld'),
18 => array(1, 'Test,Test', 'test@domain.tld'),
19 => array(1, 'Test', '"test test"@domain.tld'),
20 => array(1, '', '"test test"@domain.tld'),
21 => array(1, '', '"test test"@domain.tld'),
// invalid (#1489092)
22 => array(1, 'John Doe @ SomeBusinessName', 'MAILER-DAEMON'),
23 => array(1, 'Test,Test', 'test@domain.tld'),
24 => array(1, '', 'email@test.com'),
25 => array(1, '', 'user@domain.tld'),
);
foreach ($headers as $idx => $header) {
$res = rcube_mime::decode_address_list($header);
$this->assertEquals($results[$idx][0], count($res), "Rows number in result for header: " . $header);
$this->assertEquals($results[$idx][1], $res[1]['name'], "Name part decoding for header: " . $header);
$this->assertEquals($results[$idx][2], $res[1]['mailto'], "Email part decoding for header: " . $header);
}
}
/**
* Test decoding of header values
* Uses rcube_mime::decode_mime_string()
*/
function test_header_decode_qp()
{
$test = array(
// #1488232: invalid character "?"
'quoted-printable (1)' => array(
'in' => '=?utf-8?Q?Certifica=C3=A7=C3=A3??=',
'out' => 'Certifica=C3=A7=C3=A3?',
),
'quoted-printable (2)' => array(
'in' => '=?utf-8?Q?Certifica=?= =?utf-8?Q?C3=A7=C3=A3?=',
'out' => 'Certifica=C3=A7=C3=A3',
),
'quoted-printable (3)' => array(
'in' => '=?utf-8?Q??= =?utf-8?Q??=',
'out' => '',
),
'quoted-printable (4)' => array(
'in' => '=?utf-8?Q??= a =?utf-8?Q??=',
'out' => ' a ',
),
'quoted-printable (5)' => array(
'in' => '=?utf-8?Q?a?= =?utf-8?Q?b?=',
'out' => 'ab',
),
'quoted-printable (6)' => array(
'in' => '=?utf-8?Q? ?= =?utf-8?Q?a?=',
'out' => ' a',
),
'quoted-printable (7)' => array(
'in' => '=?utf-8?Q?___?= =?utf-8?Q?a?=',
'out' => ' a',
),
);
foreach ($test as $idx => $item) {
$res = rcube_mime::decode_mime_string($item['in'], 'UTF-8');
$res = quoted_printable_encode($res);
$this->assertEquals($item['out'], $res, "Header decoding for: " . $idx);
}
}
/**
* Test format=flowed unfolding
*/
function test_format_flowed()
{
$raw = file_get_contents(TESTS_DIR . 'src/format-flowed-unfolded.txt');
$flowed = file_get_contents(TESTS_DIR . 'src/format-flowed.txt');
$this->assertEquals($flowed, rcube_mime::format_flowed($raw, 80), "Test correct folding and space-stuffing");
}
/**
* Test format=flowed unfolding
*/
function test_unfold_flowed()
{
$flowed = file_get_contents(TESTS_DIR . 'src/format-flowed.txt');
$unfolded = file_get_contents(TESTS_DIR . 'src/format-flowed-unfolded.txt');
$this->assertEquals($unfolded, rcube_mime::unfold_flowed($flowed), "Test correct unfolding of quoted lines");
}
/**
* Test format=flowed unfolding (#1490284)
*/
function test_unfold_flowed2()
{
$flowed = "> culpa qui officia deserunt mollit anim id est laborum.\r\n"
."> \r\n"
."Sed ut perspiciatis unde omnis iste natus error \r\nsit voluptatem";
$unfolded = "> culpa qui officia deserunt mollit anim id est laborum.\r\n"
."> \r\n"
."Sed ut perspiciatis unde omnis iste natus error sit voluptatem";
$this->assertEquals($unfolded, rcube_mime::unfold_flowed($flowed), "Test correct unfolding of quoted lines [2]");
}
/**
* Test format=flowed delsp=yes unfolding (RFC3676)
*/
function test_unfold_flowed_delsp()
{
$flowed = "そしてジョバンニはすぐうしろの天気輪の柱が \r\n"
."いつかぼんやりした三角標の形になって、しば \r\n"
."らく蛍のように、ぺかぺか消えたりともったり \r\n"
."しているのを見ました。";
$unfolded = "そしてジョバンニはすぐうしろの天気輪の柱がいつかぼんやりした三角標の形になって、しばらく蛍のように、ぺかぺか消えたりともったりしているのを見ました。";
$this->assertEquals($unfolded, rcube_mime::unfold_flowed($flowed, null, true), "Test correct unfolding of flowed DelSp=Yes lines");
}
/**
* Test wordwrap()
*/
function test_wordwrap()
{
$samples = array(
array(
array("aaaa aaaa\n aaaa"),
"aaaa aaaa\n aaaa",
),
array(
array("123456789 123456789 123456789 123", 29),
"123456789 123456789 123456789\n123",
),
array(
array("123456789 3456789 123456789", 29),
"123456789 3456789 123456789",
),
array(
array("123456789 123456789 123456789 123", 29),
"123456789 123456789 123456789\n 123",
),
array(
array("abc", 1, "\n", true),
"a\nb\nc",
),
array(
array("ąść", 1, "\n", true, 'UTF-8'),
"ą\nś\nć",
),
array(
array(">abc\n>def", 2, "\n", true),
">abc\n>def",
),
array(
array("abc def", 3, "-"),
"abc-def",
),
array(
array("----------------------------------------------------------------------------------------\nabc def123456789012345", 76),
"----------------------------------------------------------------------------------------\nabc def123456789012345",
),
array(
array("-------\nabc def", 5),
"-------\nabc\ndef",
),
array(
array("http://xx.xxx.xx.xxx:8080/addressbooks/roundcubexxxxx%40xxxxxxxxxxxxxxxxxxxxxxx.xx.xx/testing/", 70),
"http://xx.xxx.xx.xxx:8080/addressbooks/roundcubexxxxx%40xxxxxxxxxxxxxxxxxxxxxxx.xx.xx/testing/",
),
array(
array("this-is-just-some-blabla-to-make-this-more-than-seventy-five-characters-in-a-row -- this line should be wrapped", 20, "\n"),
"this-is-just-some-blabla-to-make-this-more-than-seventy-five-characters-in-a-row\n-- this line should\nbe wrapped",
),
+ array(
+ array(rcube_charset::convert("㈱山﨑工業", 'UTF-8', 'ISO-2022-JP'), 1, "\n", true, 'ISO-2022-JP'),
+ rcube_charset::convert("㈱\n山\n﨑\n工\n業", 'UTF-8', 'ISO-2022-JP'),
+ ),
);
foreach ($samples as $sample) {
$this->assertEquals($sample[1], call_user_func_array(array('rcube_mime', 'wordwrap'), $sample[0]), "Test text wrapping");
}
}
/**
* Test parse_message()
*/
function test_parse_message()
{
$file = file_get_contents(__DIR__ . '/../src/html.msg');
$result = rcube_mime::parse_message($file);
$this->assertInstanceOf('rcube_message_part', $result);
$this->assertSame('multipart/alternative', $result->mimetype);
$this->assertSame('1.0', $result->headers['mime-version']);
$this->assertSame('=_68eeaf4ab95b5312965e45c33362338e', $result->ctype_parameters['boundary']);
$this->assertSame('1', $result->parts[0]->mime_id);
$this->assertSame(12, $result->parts[0]->size);
$this->assertSame('text/plain', $result->parts[0]->mimetype);
$this->assertSame("this is test", $result->parts[0]->body);
$this->assertSame('2', $result->parts[1]->mime_id);
$this->assertSame(0, $result->parts[1]->size);
$this->assertSame('multipart/related', $result->parts[1]->mimetype);
$this->assertCount(2, $result->parts[1]->parts);
$this->assertSame('2.1', $result->parts[1]->parts[0]->mime_id);
$this->assertSame(257, $result->parts[1]->parts[0]->size);
$this->assertSame('text/html', $result->parts[1]->parts[0]->mimetype);
$this->assertSame('UTF-8', $result->parts[1]->parts[0]->charset);
$this->assertRegExp('/<html>/', $result->parts[1]->parts[0]->body);
$this->assertSame('2.2', $result->parts[1]->parts[1]->mime_id);
$this->assertSame(793, $result->parts[1]->parts[1]->size);
$this->assertSame('image/jpeg', $result->parts[1]->parts[1]->mimetype);
$this->assertSame('base64', $result->parts[1]->parts[1]->encoding);
$this->assertSame('inline', $result->parts[1]->parts[1]->disposition);
$this->assertSame('photo-mini.jpg', $result->parts[1]->parts[1]->filename);
}
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sun, Jan 19, 12:05 AM (16 h, 3 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
120041
Default Alt Text
(78 KB)
Attached To
Mode
R3 roundcubemail
Attached
Detach File
Event Timeline
Log In to Comment