Page MenuHomePhorge

No OneTemporary

This document is not UTF8. It was detected as ISO-8859-1 (Latin 1) and converted to UTF8 for display.
diff --git a/program/lib/Mail/mimePart.php b/program/lib/Mail/mimePart.php
index 7427a1002..b404fc522 100644
--- a/program/lib/Mail/mimePart.php
+++ b/program/lib/Mail/mimePart.php
@@ -1,523 +1,522 @@
<?php
/**
* The Mail_mimePart class is used to create MIME E-mail messages
*
* This class enables you to manipulate and build a mime email
* from the ground up. The Mail_Mime class is a userfriendly api
* to this class for people who aren't interested in the internals
* of mime mail.
* This class however allows full control over the email.
*
* Compatible with PHP versions 4 and 5
*
* LICENSE: This LICENSE is in the BSD license style.
* Copyright (c) 2002-2003, Richard Heyes <richard@phpguru.org>
* Copyright (c) 2003-2006, PEAR <pear-group@php.net>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* - Neither the name of the authors, nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*
* @category Mail
* @package Mail_Mime
* @author Richard Heyes <richard@phpguru.org>
* @author Cipriano Groenendal <cipri@php.net>
* @author Sean Coates <sean@php.net>
* @copyright 2003-2006 PEAR <pear-group@php.net>
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @version CVS: $Id$
* @link http://pear.php.net/package/Mail_mime
*/
/**
* The Mail_mimePart class is used to create MIME E-mail messages
*
* This class enables you to manipulate and build a mime email
* from the ground up. The Mail_Mime class is a userfriendly api
* to this class for people who aren't interested in the internals
* of mime mail.
* This class however allows full control over the email.
*
* @category Mail
* @package Mail_Mime
* @author Richard Heyes <richard@phpguru.org>
* @author Cipriano Groenendal <cipri@php.net>
* @author Sean Coates <sean@php.net>
* @copyright 2003-2006 PEAR <pear-group@php.net>
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @version Release: @package_version@
* @link http://pear.php.net/package/Mail_mime
*/
class Mail_mimePart {
/**
* The encoding type of this part
*
* @var string
* @access private
*/
var $_encoding;
/**
* An array of subparts
*
* @var array
* @access private
*/
var $_subparts;
/**
* The output of this part after being built
*
* @var string
* @access private
*/
var $_encoded;
/**
* Headers for this part
*
* @var array
* @access private
*/
var $_headers;
/**
* The body of this part (not encoded)
*
* @var string
* @access private
*/
var $_body;
/**
* Constructor.
*
* Sets up the object.
*
* @param $body - The body of the mime part if any.
* @param $params - An associative array of parameters:
* content_type - The content type for this part eg multipart/mixed
* encoding - The encoding to use, 7bit, 8bit, base64, or quoted-printable
* cid - Content ID to apply
* disposition - Content disposition, inline or attachment
* dfilename - Optional filename parameter for content disposition
* description - Content description
* charset - Character set to use
* @access public
*/
function Mail_mimePart($body = '', $params = array())
{
if (!defined('MAIL_MIMEPART_CRLF')) {
define('MAIL_MIMEPART_CRLF', defined('MAIL_MIME_CRLF') ? MAIL_MIME_CRLF : "\r\n", TRUE);
}
$contentType = array();
$contentDisp = array();
foreach ($params as $key => $value) {
switch ($key) {
case 'content_type':
$contentType['type'] = $value;
//$headers['Content-Type'] = $value . (isset($charset) ? '; charset="' . $charset . '"' : '');
break;
case 'encoding':
$this->_encoding = $value;
$headers['Content-Transfer-Encoding'] = $value;
break;
case 'cid':
$headers['Content-ID'] = '<' . $value . '>';
break;
case 'disposition':
$contentDisp['disp'] = $value;
break;
case 'dfilename':
$contentDisp['filename'] = $value;
$contentType['name'] = $value;
break;
case 'description':
$headers['Content-Description'] = $value;
break;
case 'charset':
$contentType['charset'] = $value;
$contentDisp['charset'] = $value;
break;
case 'language':
$contentType['language'] = $value;
$contentDisp['language'] = $value;
break;
case 'location':
$headers['Content-Location'] = $value;
break;
}
}
if (isset($contentType['type'])) {
$headers['Content-Type'] = $contentType['type'];
if (isset($contentType['charset'])) {
$headers['Content-Type'] .= "; charset=\"{$contentType['charset']}\"";
}
if (isset($contentType['name'])) {
$headers['Content-Type'] .= ';' . MAIL_MIMEPART_CRLF;
$headers['Content-Type'] .=
$this->_buildHeaderParam('name', $contentType['name'],
isset($contentType['charset']) ? $contentType['charset'] : 'US-ASCII',
isset($contentType['language']) ? $contentType['language'] : NULL,
isset($params['name-encoding']) ? $params['name-encoding'] : NULL);
}
}
if (isset($contentDisp['disp'])) {
$headers['Content-Disposition'] = $contentDisp['disp'];
if (isset($contentDisp['filename'])) {
$headers['Content-Disposition'] .= ';' . MAIL_MIMEPART_CRLF;
$headers['Content-Disposition'] .=
$this->_buildHeaderParam('filename', $contentDisp['filename'],
isset($contentDisp['charset']) ? $contentDisp['charset'] : 'US-ASCII',
isset($contentDisp['language']) ? $contentDisp['language'] : NULL,
isset($params['filename-encoding']) ? $params['filename-encoding'] : NULL);
}
}
// Default content-type
if (!isset($headers['Content-Type'])) {
$headers['Content-Type'] = 'text/plain';
}
//Default encoding
if (!isset($this->_encoding)) {
$this->_encoding = '7bit';
}
// Assign stuff to member variables
$this->_encoded = array();
$this->_headers = $headers;
$this->_body = $body;
}
/**
* encode()
*
* Encodes and returns the email. Also stores
* it in the encoded member variable
*
* @return An associative array containing two elements,
* body and headers. The headers element is itself
* an indexed array.
* @access public
*/
function encode()
{
$encoded =& $this->_encoded;
if (count($this->_subparts)) {
- srand((double)microtime()*1000000);
$boundary = '=_' . md5(rand() . microtime());
$this->_headers['Content-Type'] .= ';' . MAIL_MIMEPART_CRLF . "\t" . 'boundary="' . $boundary . '"';
// Add body parts to $subparts
for ($i = 0; $i < count($this->_subparts); $i++) {
$headers = array();
$tmp = $this->_subparts[$i]->encode();
foreach ($tmp['headers'] as $key => $value) {
$headers[] = $key . ': ' . $value;
}
$subparts[] = implode(MAIL_MIMEPART_CRLF, $headers) . MAIL_MIMEPART_CRLF . MAIL_MIMEPART_CRLF . $tmp['body'] . MAIL_MIMEPART_CRLF;
}
$encoded['body'] = '--' . $boundary . MAIL_MIMEPART_CRLF .
implode('--' . $boundary . MAIL_MIMEPART_CRLF , $subparts) .
'--' . $boundary.'--' . MAIL_MIMEPART_CRLF;
} else {
$encoded['body'] = $this->_getEncodedData($this->_body, $this->_encoding);
}
// Add headers to $encoded
$encoded['headers'] =& $this->_headers;
return $encoded;
}
/**
* &addSubPart()
*
* Adds a subpart to current mime part and returns
* a reference to it
*
* @param $body The body of the subpart, if any.
* @param $params The parameters for the subpart, same
* as the $params argument for constructor.
* @return A reference to the part you just added. It is
* crucial if using multipart/* in your subparts that
* you use =& in your script when calling this function,
* otherwise you will not be able to add further subparts.
* @access public
*/
function &addSubPart($body, $params)
{
$this->_subparts[] = new Mail_mimePart($body, $params);
return $this->_subparts[count($this->_subparts) - 1];
}
/**
* _getEncodedData()
*
* Returns encoded data based upon encoding passed to it
*
* @param $data The data to encode.
* @param $encoding The encoding type to use, 7bit, base64,
* or quoted-printable.
* @access private
*/
function _getEncodedData($data, $encoding)
{
switch ($encoding) {
case '8bit':
case '7bit':
return $data;
break;
case 'quoted-printable':
return $this->_quotedPrintableEncode($data);
break;
case 'base64':
return rtrim(chunk_split(base64_encode($data), 76, MAIL_MIMEPART_CRLF));
break;
default:
return $data;
}
}
/**
* quotedPrintableEncode()
*
* Encodes data to quoted-printable standard.
*
* @param $input The data to encode
* @param $line_max Optional max line length. Should
* not be more than 76 chars
*
* @access private
*/
function _quotedPrintableEncode($input , $line_max = 76)
{
$lines = preg_split("/\r?\n/", $input);
$eol = MAIL_MIMEPART_CRLF;
$escape = '=';
$output = '';
while (list(, $line) = each($lines)) {
$line = preg_split('||', $line, -1, PREG_SPLIT_NO_EMPTY);
$linlen = count($line);
$newline = '';
for ($i = 0; $i < $linlen; $i++) {
$char = $line[$i];
$dec = ord($char);
if (($dec == 32) AND ($i == ($linlen - 1))) { // convert space at eol only
$char = '=20';
} elseif (($dec == 9) AND ($i == ($linlen - 1))) { // convert tab at eol only
$char = '=09';
} elseif ($dec == 9) {
; // Do nothing if a tab.
} elseif (($dec == 61) OR ($dec < 32 ) OR ($dec > 126)) {
$char = $escape . strtoupper(sprintf('%02s', dechex($dec)));
} elseif (($dec == 46) AND (($newline == '') || ((strlen($newline) + strlen("=2E")) >= $line_max))) {
//Bug #9722: convert full-stop at bol,
//some Windows servers need this, won't break anything (cipri)
//Bug #11731: full-stop at bol also needs to be encoded
//if this line would push us over the line_max limit.
$char = '=2E';
}
//Note, when changing this line, also change the ($dec == 46)
//check line, as it mimics this line due to Bug #11731
if ((strlen($newline) + strlen($char)) >= $line_max) { // MAIL_MIMEPART_CRLF is not counted
$output .= $newline . $escape . $eol; // soft line break; " =\r\n" is okay
$newline = '';
}
$newline .= $char;
} // end of for
$output .= $newline . $eol;
}
$output = substr($output, 0, -1 * strlen($eol)); // Don't want last crlf
return $output;
}
/**
* _buildHeaderParam()
*
* Encodes the paramater of a header.
*
* @param $name The name of the header-parameter
* @param $value The value of the paramter
* @param $charset The characterset of $value
* @param $language The language used in $value
* @param $paramEnc Parameter encoding type
* @param $maxLength The maximum length of a line. Defauls to 78
*
* @access private
*/
function _buildHeaderParam($name, $value, $charset=NULL, $language=NULL, $paramEnc=NULL, $maxLength=78)
{
// RFC 2183/2184/2822:
// value needs encoding if contains non-ASCII chars or is longer than 78 chars
if (!preg_match('#[^\x20-\x7E]#', $value)) { // ASCII
$quoted = addcslashes($value, '\\"');
if (strlen($name) + strlen($quoted) + 6 <= $maxLength)
return " {$name}=\"{$quoted}\"; ";
}
// use quoted-printable/base64 encoding (RFC2047)
if ($paramEnc == 'quoted-printable' || $paramEnc == 'base64')
return $this->_buildRFC2047Param($name, $value, $charset, $paramEnc);
$encValue = preg_replace('#([^\x20-\x7E])#e', '"%" . strtoupper(dechex(ord("\1")))', $value);
$value = "$charset'$language'$encValue";
$header = " {$name}*=\"{$value}\"; ";
if (strlen($header) <= $maxLength) {
return $header;
}
$preLength = strlen(" {$name}*0*=\"");
$sufLength = strlen("\";");
$maxLength = max(16, $maxLength - $preLength - $sufLength - 2);
$maxLengthReg = "|(.{0,$maxLength}[^\%][^\%])|";
$headers = array();
$headCount = 0;
while ($value) {
$matches = array();
$found = preg_match($maxLengthReg, $value, $matches);
if ($found) {
$headers[] = " {$name}*{$headCount}*=\"{$matches[0]}\"";
$value = substr($value, strlen($matches[0]));
} else {
$headers[] = " {$name}*{$headCount}*=\"{$value}\"";
$value = "";
}
$headCount++;
}
$headers = implode(MAIL_MIMEPART_CRLF, $headers) . ';';
return $headers;
}
/**
* Encodes header parameter as per RFC2047 if needed (values too long will be truncated)
*
* @param string $name The parameter name
* @param string $value The parameter value
* @param string $charset The parameter charset
* @param string $encoding Encoding type (quoted-printable or base64)
* @param int $maxLength Encoded parameter max length (75 is the value specified in the RFC)
*
* @return string Parameter line
* @access private
*/
function _buildRFC2047Param($name, $value, $charset, $encoding='quoted-printable', $maxLength=75)
{
if (!preg_match('#([^\x20-\x7E]){1}#', $value))
{
$quoted = addcslashes($value, '\\"');
$maxLength = $maxLength - 6;
if (strlen($quoted) > $maxLength)
{
// truncate filename leaving extension
$ext = strrchr($quoted, '.');
$quoted = substr($quoted, 0, $maxLength - strlen($ext));
// remove backslashes from the end of filename
preg_replace('/[\\\\]+$/', '', $quoted);
$quoted .= $ext;
}
}
else if ($encoding == 'base64')
{
$ext = strrchr($value, '.');
$value = substr($value, 0, strlen($value) - strlen($ext));
$ext = base64_encode($ext);
$value = base64_encode($value);
$prefix = '=?' . $charset . '?B?';
$suffix = '?=';
$maxLength = $maxLength - strlen($prefix . $suffix) - strlen($ext) - 2;
//We can cut base64 every 4 characters, so the real max
//we can get must be rounded down.
$maxLength = $maxLength - ($maxLength % 4);
$quoted = $prefix . substr($value, 0, $maxLength) . $ext . $suffix;
}
else // quoted-printable
{
$ext = strrchr($value, '.');
$value = substr($value, 0, strlen($value) - strlen($ext));
// Replace all special characters used by the encoder.
$search = array('=', '_', '?', ' ');
$replace = array('=3D', '=5F', '=3F', '_');
$ext = str_replace($search, $replace, $ext);
$value = str_replace($search, $replace, $value);
// Replace all extended characters (\x80-xFF) with their
// ASCII values.
$ext = preg_replace('/([\x80-\xFF])/e',
'"=" . strtoupper(dechex(ord("\1")))', $ext);
$value = preg_replace('/([\x80-\xFF])/e',
'"=" . strtoupper(dechex(ord("\1")))', $value);
$prefix = '=?' . $charset . '?Q?';
$suffix = '?=';
$maxLength = $maxLength - strlen($prefix . $suffix) - strlen($ext) - 2;
// Truncate QP-encoded text at $maxLength
// but not break any encoded letters.
if(preg_match("/^(.{0,$maxLength}[^\=][^\=])/", $value, $matches))
$value = $matches[1];
$quoted = $prefix . $value . $ext . $suffix;
}
return " {$name}=\"{$quoted}\"; ";
}
} // End of class
diff --git a/program/lib/html2text.php b/program/lib/html2text.php
index d298ee2e4..e2a5b241e 100644
--- a/program/lib/html2text.php
+++ b/program/lib/html2text.php
@@ -1,595 +1,595 @@
<?php
/*************************************************************************
* *
* class.html2text.inc *
* *
*************************************************************************
* *
* Converts HTML to formatted plain text *
* *
* Copyright (c) 2005-2007 Jon Abernathy <jon@chuggnutt.com> *
* All rights reserved. *
* *
* This script is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* The GNU General Public License can be found at *
* http://www.gnu.org/copyleft/gpl.html. *
* *
* This script is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* Author(s): Jon Abernathy <jon@chuggnutt.com> *
* *
* Last modified: 08/08/07 *
* *
*************************************************************************/
/**
* Takes HTML and converts it to formatted, plain text.
*
* Thanks to Alexander Krug (http://www.krugar.de/) to pointing out and
* correcting an error in the regexp search array. Fixed 7/30/03.
*
* Updated set_html() function's file reading mechanism, 9/25/03.
*
* Thanks to Joss Sanglier (http://www.dancingbear.co.uk/) for adding
* several more HTML entity codes to the $search and $replace arrays.
* Updated 11/7/03.
*
* Thanks to Darius Kasperavicius (http://www.dar.dar.lt/) for
* suggesting the addition of $allowed_tags and its supporting function
* (which I slightly modified). Updated 3/12/04.
*
* Thanks to Justin Dearing for pointing out that a replacement for the
* <TH> tag was missing, and suggesting an appropriate fix.
* Updated 8/25/04.
*
* Thanks to Mathieu Collas (http://www.myefarm.com/) for finding a
* display/formatting bug in the _build_link_list() function: email
* readers would show the left bracket and number ("[1") as part of the
* rendered email address.
* Updated 12/16/04.
*
* Thanks to Wojciech Bajon (http://histeria.pl/) for submitting code
* to handle relative links, which I hadn't considered. I modified his
* code a bit to handle normal HTTP links and MAILTO links. Also for
* suggesting three additional HTML entity codes to search for.
* Updated 03/02/05.
*
* Thanks to Jacob Chandler for pointing out another link condition
* for the _build_link_list() function: "https".
* Updated 04/06/05.
*
* Thanks to Marc Bertrand (http://www.dresdensky.com/) for
* suggesting a revision to the word wrapping functionality; if you
* specify a $width of 0 or less, word wrapping will be ignored.
* Updated 11/02/06.
*
* *** Big housecleaning updates below:
*
* Thanks to Colin Brown (http://www.sparkdriver.co.uk/) for
* suggesting the fix to handle </li> and blank lines (whitespace).
* Christian Basedau (http://www.movetheweb.de/) also suggested the
* blank lines fix.
*
* Special thanks to Marcus Bointon (http://www.synchromedia.co.uk/),
* Christian Basedau, Norbert Laposa (http://ln5.co.uk/),
* Bas van de Weijer, and Marijn van Butselaar
* for pointing out my glaring error in the <th> handling. Marcus also
* supplied a host of fixes.
*
* Thanks to Jeffrey Silverman (http://www.newtnotes.com/) for pointing
* out that extra spaces should be compressed--a problem addressed with
* Marcus Bointon's fixes but that I had not yet incorporated.
*
* Thanks to Daniel Schledermann (http://www.typoconsult.dk/) for
* suggesting a valuable fix with <a> tag handling.
*
* Thanks to Wojciech Bajon (again!) for suggesting fixes and additions,
* including the <a> tag handling that Daniel Schledermann pointed
* out but that I had not yet incorporated. I haven't (yet)
* incorporated all of Wojciech's changes, though I may at some
* future time.
*
* *** End of the housecleaning updates. Updated 08/08/07.
*
* @author Jon Abernathy <jon@chuggnutt.com>
* @version 1.0.0
* @since PHP 4.0.2
*/
class html2text
{
/**
* Contains the HTML content to convert.
*
* @var string $html
* @access public
*/
var $html;
/**
* Contains the converted, formatted text.
*
* @var string $text
* @access public
*/
var $text;
/**
* Maximum width of the formatted text, in columns.
*
* Set this value to 0 (or less) to ignore word wrapping
* and not constrain text to a fixed-width column.
*
* @var integer $width
* @access public
*/
var $width = 70;
/**
* List of preg* regular expression patterns to search for,
* used in conjunction with $replace.
*
* @var array $search
* @access public
* @see $replace
*/
var $search = array(
"/\r/", // Non-legal carriage return
"/[\n\t]+/", // Newlines and tabs
'/[ ]{2,}/', // Runs of spaces, pre-handling
'/<script[^>]*>.*?<\/script>/i', // <script>s -- which strip_tags supposedly has problems with
'/<style[^>]*>.*?<\/style>/i', // <style>s -- which strip_tags supposedly has problems with
//'/<!-- .* -->/', // Comments -- which strip_tags might have problem a with
'/<p[^>]*>/i', // <P>
'/<br[^>]*>/i', // <br>
'/<i[^>]*>(.*?)<\/i>/i', // <i>
'/<em[^>]*>(.*?)<\/em>/i', // <em>
'/(<ul[^>]*>|<\/ul>)/i', // <ul> and </ul>
'/(<ol[^>]*>|<\/ol>)/i', // <ol> and </ol>
'/<li[^>]*>(.*?)<\/li>/i', // <li> and </li>
'/<li[^>]*>/i', // <li>
'/<hr[^>]*>/i', // <hr>
'/(<table[^>]*>|<\/table>)/i', // <table> and </table>
'/(<tr[^>]*>|<\/tr>)/i', // <tr> and </tr>
'/<td[^>]*>(.*?)<\/td>/i', // <td> and </td>
'/&(nbsp|#160);/i', // Non-breaking space
'/&(quot|rdquo|ldquo|#8220|#8221|#147|#148);/i',
// Double quotes
'/&(apos|rsquo|lsquo|#8216|#8217);/i', // Single quotes
'/&gt;/i', // Greater-than
'/&lt;/i', // Less-than
'/&(amp|#38);/i', // Ampersand
'/&(copy|#169);/i', // Copyright
'/&(trade|#8482|#153);/i', // Trademark
'/&(reg|#174);/i', // Registered
'/&(mdash|#151|#8212);/i', // mdash
'/&(ndash|minus|#8211|#8722);/i', // ndash
'/&(bull|#149|#8226);/i', // Bullet
'/&(pound|#163);/i', // Pound sign
'/&(euro|#8364);/i', // Euro sign
'/&[^&;]+;/i', // Unknown/unhandled entities
'/[ ]{2,}/' // Runs of spaces, post-handling
);
/**
* List of pattern replacements corresponding to patterns searched.
*
* @var array $replace
* @access public
* @see $search
*/
var $replace = array(
'', // Non-legal carriage return
' ', // Newlines and tabs
' ', // Runs of spaces, pre-handling
'', // <script>s -- which strip_tags supposedly has problems with
'', // <style>s -- which strip_tags supposedly has problems with
//'', // Comments -- which strip_tags might have problem a with
"\n\n", // <P>
"\n", // <br>
'_\\1_', // <i>
'_\\1_', // <em>
"\n\n", // <ul> and </ul>
"\n\n", // <ol> and </ol>
"\t* \\1\n", // <li> and </li>
"\n\t* ", // <li>
"\n-------------------------\n", // <hr>
"\n\n", // <table> and </table>
"\n", // <tr> and </tr>
"\t\t\\1\n", // <td> and </td>
' ', // Non-breaking space
'"', // Double quotes
"'", // Single quotes
'>',
'<',
'&',
'(c)',
'(tm)',
'(R)',
'--',
'-',
'*',
'£',
'EUR', // Euro sign. € ?
'', // Unknown/unhandled entities
' ' // Runs of spaces, post-handling
);
/**
* List of preg* regular expression patterns to search for
* and replace using callback function.
*
* @var array $callback_search
* @access public
*/
var $callback_search = array(
'/<(h)[123456][^>]*>(.*?)<\/h[123456]>/i', // H1 - H3
'/<(b)[^>]*>(.*?)<\/b>/i', // <b>
'/<(strong)[^>]*>(.*?)<\/strong>/i', // <strong>
'/<(a) [^>]*href=("|\')([^"\']+)\2[^>]*>(.*?)<\/a>/i',
// <a href="">
'/<(th)[^>]*>(.*?)<\/th>/i', // <th> and </th>
);
/**
* List of preg* regular expression patterns to search for in PRE body,
* used in conjunction with $pre_replace.
*
* @var array $pre_search
* @access public
* @see $pre_replace
*/
var $pre_search = array(
"/\n/",
"/\t/",
'/ /',
'/<pre[^>]*>/',
'/<\/pre>/'
);
/**
* List of pattern replacements corresponding to patterns searched for PRE body.
*
* @var array $pre_replace
* @access public
* @see $pre_search
*/
var $pre_replace = array(
'<br>',
'&nbsp;&nbsp;&nbsp;&nbsp;',
'&nbsp;',
'',
''
);
/**
* Contains a list of HTML tags to allow in the resulting text.
*
* @var string $allowed_tags
* @access public
* @see set_allowed_tags()
*/
var $allowed_tags = '';
/**
* Contains the base URL that relative links should resolve to.
*
* @var string $url
* @access public
*/
var $url;
/**
* Indicates whether content in the $html variable has been converted yet.
*
* @var boolean $_converted
* @access private
* @see $html, $text
*/
var $_converted = false;
/**
* Contains URL addresses from links to be rendered in plain text.
*
* @var string $_link_list
* @access private
* @see _build_link_list()
*/
var $_link_list = '';
/**
* Number of valid links detected in the text, used for plain text
* display (rendered similar to footnotes).
*
* @var integer $_link_count
* @access private
* @see _build_link_list()
*/
var $_link_count = 0;
/**
* Boolean flag, true if a table of link URLs should be listed after the text.
*
* @var boolean $_do_links
* @access private
* @see html2text()
*/
var $_do_links = true;
/**
* Constructor.
*
* If the HTML source string (or file) is supplied, the class
* will instantiate with that source propagated, all that has
* to be done it to call get_text().
*
* @param string $source HTML content
* @param boolean $from_file Indicates $source is a file to pull content from
* @param boolean $do_links Indicate whether a table of link URLs is desired
* @param integer $width Maximum width of the formatted text, 0 for no limit
* @access public
* @return void
*/
function html2text( $source = '', $from_file = false, $do_links = true, $width = 75 )
{
if ( !empty($source) ) {
$this->set_html($source, $from_file);
}
$this->set_base_url();
$this->_do_links = $do_links;
$this->width = $width;
}
/**
* Loads source HTML into memory, either from $source string or a file.
*
* @param string $source HTML content
* @param boolean $from_file Indicates $source is a file to pull content from
* @access public
* @return void
*/
function set_html( $source, $from_file = false )
{
if ( $from_file && file_exists($source) ) {
$this->html = file_get_contents($source);
}
else
$this->html = $source;
$this->_converted = false;
}
/**
* Returns the text, converted from HTML.
*
* @access public
* @return string
*/
function get_text()
{
if ( !$this->_converted ) {
$this->_convert();
}
return $this->text;
}
/**
* Prints the text, converted from HTML.
*
* @access public
* @return void
*/
function print_text()
{
print $this->get_text();
}
/**
* Alias to print_text(), operates identically.
*
* @access public
* @return void
* @see print_text()
*/
function p()
{
print $this->get_text();
}
/**
* Sets the allowed HTML tags to pass through to the resulting text.
*
* Tags should be in the form "<p>", with no corresponding closing tag.
*
* @access public
* @return void
*/
function set_allowed_tags( $allowed_tags = '' )
{
if ( !empty($allowed_tags) ) {
$this->allowed_tags = $allowed_tags;
}
}
/**
* Sets a base URL to handle relative links.
*
* @access public
* @return void
*/
function set_base_url( $url = '' )
{
if ( empty($url) ) {
if ( !empty($_SERVER['HTTP_HOST']) ) {
$this->url = 'http://' . $_SERVER['HTTP_HOST'];
} else {
$this->url = '';
}
} else {
// Strip any trailing slashes for consistency (relative
// URLs may already start with a slash like "/file.html")
if ( substr($url, -1) == '/' ) {
$url = substr($url, 0, -1);
}
$this->url = $url;
}
}
/**
* Workhorse function that does actual conversion.
*
* First performs custom tag replacement specified by $search and
* $replace arrays. Then strips any remaining HTML tags, reduces whitespace
* and newlines to a readable format, and word wraps the text to
* $width characters.
*
* @access private
* @return void
*/
function _convert()
{
// Variables used for building the link list
$this->_link_count = 0;
$this->_link_list = '';
$text = trim(stripslashes($this->html));
// Convert <PRE>
$this->_convert_pre($text);
- // Replace known html entities
- $text = html_entity_decode($text, ENT_COMPAT, 'UTF-8');
-
// Run our defined search-and-replace
$text = preg_replace($this->search, $this->replace, $text);
$text = preg_replace_callback($this->callback_search, array('html2text', '_preg_callback'), $text);
+ // Replace known html entities
+ $text = html_entity_decode($text, ENT_COMPAT, 'UTF-8');
+
// Strip any other HTML tags
$text = strip_tags($text, $this->allowed_tags);
// Bring down number of empty lines to 2 max
$text = preg_replace("/\n\s+\n/", "\n\n", $text);
$text = preg_replace("/[\n]{3,}/", "\n\n", $text);
// Add link list
if ( !empty($this->_link_list) ) {
$text .= "\n\nLinks:\n------\n" . $this->_link_list;
}
// Wrap the text to a readable format
// for PHP versions >= 4.0.2. Default width is 75
// If width is 0 or less, don't wrap the text.
if ( $this->width > 0 ) {
$text = wordwrap($text, $this->width);
}
$this->text = $text;
$this->_converted = true;
}
/**
* Helper function called by preg_replace() on link replacement.
*
* Maintains an internal list of links to be displayed at the end of the
* text, with numeric indices to the original point in the text they
* appeared. Also makes an effort at identifying and handling absolute
* and relative links.
*
* @param string $link URL of the link
* @param string $display Part of the text to associate number with
* @access private
* @return string
*/
function _build_link_list( $link, $display )
{
if ( !$this->_do_links ) return $display;
if ( substr($link, 0, 7) == 'http://' || substr($link, 0, 8) == 'https://' ||
substr($link, 0, 7) == 'mailto:' ) {
$this->_link_count++;
$this->_link_list .= "[" . $this->_link_count . "] $link\n";
$additional = ' [' . $this->_link_count . ']';
} elseif ( substr($link, 0, 11) == 'javascript:' ) {
// Don't count the link; ignore it
$additional = '';
// what about href="#anchor" ?
} else {
$this->_link_count++;
$this->_link_list .= "[" . $this->_link_count . "] " . $this->url;
if ( substr($link, 0, 1) != '/' ) {
$this->_link_list .= '/';
}
$this->_link_list .= "$link\n";
$additional = ' [' . $this->_link_count . ']';
}
return $display . $additional;
}
/**
* Helper function for PRE body conversion.
*
* @param string HTML content
* @access private
*/
function _convert_pre(&$text)
{
while(preg_match('/<pre[^>]*>(.*)<\/pre>/ismU', $text, $matches))
{
$result = preg_replace($this->pre_search, $this->pre_replace, $matches[1]);
$text = preg_replace('/<pre[^>]*>.*<\/pre>/ismU', '<div><br>' . $result . '<br></div>', $text, 1);
}
}
/**
* Callback function for preg_replace_callback use.
*
* @param array PREG matches
* @return string
* @access private
*/
function _preg_callback($matches)
{
switch($matches[1])
{
case 'b':
case 'strong':
return $this->_strtoupper($matches[2]);
case 'hr':
return $this->_strtoupper("\t\t". $matches[2] ."\n");
case 'h':
return $this->_strtoupper("\n\n". $matches[2] ."\n\n");
case 'a':
return $this->_build_link_list($matches[3], $matches[4]);
}
}
/**
* Strtoupper multibyte wrapper function
*
* @param string
* @return string
* @access private
*/
function _strtoupper($str)
{
if (function_exists('mb_strtoupper'))
return mb_strtoupper($str);
else
return strtoupper($str);
}
}
?>

File Metadata

Mime Type
text/x-diff
Expires
Sat, Mar 1, 12:36 PM (4 h, 43 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
166971
Default Alt Text
(39 KB)

Event Timeline