No OneTemporary
Actions

Size

39 KB

Referenced Files

None

Subscribers

None

View Options

This document is not UTF8. It was detected as ISO-8859-1 (Latin 1) and converted to UTF8 for display.

	diff --git a/program/lib/Mail/mimePart.php b/program/lib/Mail/mimePart.php
	index 7427a1002..b404fc522 100644
	--- a/program/lib/Mail/mimePart.php
	+++ b/program/lib/Mail/mimePart.php
	@@ -1,523 +1,522 @@
	<?php
	/**
	* The Mail_mimePart class is used to create MIME E-mail messages
	*
	* This class enables you to manipulate and build a mime email
	* from the ground up. The Mail_Mime class is a userfriendly api
	* to this class for people who aren't interested in the internals
	* of mime mail.
	* This class however allows full control over the email.
	*
	* Compatible with PHP versions 4 and 5
	*
	* LICENSE: This LICENSE is in the BSD license style.
	* Copyright (c) 2002-2003, Richard Heyes <richard@phpguru.org>
	* Copyright (c) 2003-2006, PEAR <pear-group@php.net>
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or
	* without modification, are permitted provided that the following
	* conditions are met:
	*
	* - Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* - Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* - Neither the name of the authors, nor the names of its contributors
	* may be used to endorse or promote products derived from this
	* software without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
	* THE POSSIBILITY OF SUCH DAMAGE.
	*
	* @category Mail
	* @package Mail_Mime
	* @author Richard Heyes <richard@phpguru.org>
	* @author Cipriano Groenendal <cipri@php.net>
	* @author Sean Coates <sean@php.net>
	* @copyright 2003-2006 PEAR <pear-group@php.net>
	* @license http://www.opensource.org/licenses/bsd-license.php BSD License
	* @version CVS: $Id$
	* @link http://pear.php.net/package/Mail_mime
	*/


	/**
	* The Mail_mimePart class is used to create MIME E-mail messages
	*
	* This class enables you to manipulate and build a mime email
	* from the ground up. The Mail_Mime class is a userfriendly api
	* to this class for people who aren't interested in the internals
	* of mime mail.
	* This class however allows full control over the email.
	*
	* @category Mail
	* @package Mail_Mime
	* @author Richard Heyes <richard@phpguru.org>
	* @author Cipriano Groenendal <cipri@php.net>
	* @author Sean Coates <sean@php.net>
	* @copyright 2003-2006 PEAR <pear-group@php.net>
	* @license http://www.opensource.org/licenses/bsd-license.php BSD License
	* @version Release: @package_version@
	* @link http://pear.php.net/package/Mail_mime
	*/
	class Mail_mimePart {

	/**
	* The encoding type of this part
	*
	* @var string
	* @access private
	*/
	var $_encoding;

	/**
	* An array of subparts
	*
	* @var array
	* @access private
	*/
	var $_subparts;

	/**
	* The output of this part after being built
	*
	* @var string
	* @access private
	*/
	var $_encoded;

	/**
	* Headers for this part
	*
	* @var array
	* @access private
	*/
	var $_headers;

	/**
	* The body of this part (not encoded)
	*
	* @var string
	* @access private
	*/
	var $_body;

	/**
	* Constructor.
	*
	* Sets up the object.
	*
	* @param $body - The body of the mime part if any.
	* @param $params - An associative array of parameters:
	* content_type - The content type for this part eg multipart/mixed
	* encoding - The encoding to use, 7bit, 8bit, base64, or quoted-printable
	* cid - Content ID to apply
	* disposition - Content disposition, inline or attachment
	* dfilename - Optional filename parameter for content disposition
	* description - Content description
	* charset - Character set to use
	* @access public
	*/
	function Mail_mimePart($body = '', $params = array())
	{
	if (!defined('MAIL_MIMEPART_CRLF')) {
	define('MAIL_MIMEPART_CRLF', defined('MAIL_MIME_CRLF') ? MAIL_MIME_CRLF : "\r\n", TRUE);
	}

	$contentType = array();
	$contentDisp = array();
	foreach ($params as $key => $value) {
	switch ($key) {
	case 'content_type':
	$contentType['type'] = $value;
	//$headers['Content-Type'] = $value . (isset($charset) ? '; charset="' . $charset . '"' : '');
	break;

	case 'encoding':
	$this->_encoding = $value;
	$headers['Content-Transfer-Encoding'] = $value;
	break;

	case 'cid':
	$headers['Content-ID'] = '<' . $value . '>';
	break;

	case 'disposition':
	$contentDisp['disp'] = $value;
	break;

	case 'dfilename':
	$contentDisp['filename'] = $value;
	$contentType['name'] = $value;
	break;

	case 'description':
	$headers['Content-Description'] = $value;
	break;

	case 'charset':
	$contentType['charset'] = $value;
	$contentDisp['charset'] = $value;
	break;

	case 'language':
	$contentType['language'] = $value;
	$contentDisp['language'] = $value;
	break;

	case 'location':
	$headers['Content-Location'] = $value;
	break;

	}
	}

	if (isset($contentType['type'])) {
	$headers['Content-Type'] = $contentType['type'];
	if (isset($contentType['charset'])) {
	$headers['Content-Type'] .= "; charset=\"{$contentType['charset']}\"";
	}
	if (isset($contentType['name'])) {
	$headers['Content-Type'] .= ';' . MAIL_MIMEPART_CRLF;
	$headers['Content-Type'] .=
	$this->_buildHeaderParam('name', $contentType['name'],
	isset($contentType['charset']) ? $contentType['charset'] : 'US-ASCII',
	isset($contentType['language']) ? $contentType['language'] : NULL,
	isset($params['name-encoding']) ? $params['name-encoding'] : NULL);
	}
	}


	if (isset($contentDisp['disp'])) {
	$headers['Content-Disposition'] = $contentDisp['disp'];
	if (isset($contentDisp['filename'])) {
	$headers['Content-Disposition'] .= ';' . MAIL_MIMEPART_CRLF;
	$headers['Content-Disposition'] .=
	$this->_buildHeaderParam('filename', $contentDisp['filename'],
	isset($contentDisp['charset']) ? $contentDisp['charset'] : 'US-ASCII',
	isset($contentDisp['language']) ? $contentDisp['language'] : NULL,
	isset($params['filename-encoding']) ? $params['filename-encoding'] : NULL);
	}
	}

	// Default content-type
	if (!isset($headers['Content-Type'])) {
	$headers['Content-Type'] = 'text/plain';
	}

	//Default encoding
	if (!isset($this->_encoding)) {
	$this->_encoding = '7bit';
	}

	// Assign stuff to member variables
	$this->_encoded = array();
	$this->_headers = $headers;
	$this->_body = $body;
	}

	/**
	* encode()
	*
	* Encodes and returns the email. Also stores
	* it in the encoded member variable
	*
	* @return An associative array containing two elements,
	* body and headers. The headers element is itself
	* an indexed array.
	* @access public
	*/
	function encode()
	{
	$encoded =& $this->_encoded;

	if (count($this->_subparts)) {
	- srand((double)microtime()*1000000);
	$boundary = '=_' . md5(rand() . microtime());
	$this->_headers['Content-Type'] .= ';' . MAIL_MIMEPART_CRLF . "\t" . 'boundary="' . $boundary . '"';

	// Add body parts to $subparts
	for ($i = 0; $i < count($this->_subparts); $i++) {
	$headers = array();
	$tmp = $this->_subparts[$i]->encode();
	foreach ($tmp['headers'] as $key => $value) {
	$headers[] = $key . ': ' . $value;
	}
	$subparts[] = implode(MAIL_MIMEPART_CRLF, $headers) . MAIL_MIMEPART_CRLF . MAIL_MIMEPART_CRLF . $tmp['body'] . MAIL_MIMEPART_CRLF;
	}

	$encoded['body'] = '--' . $boundary . MAIL_MIMEPART_CRLF .
	implode('--' . $boundary . MAIL_MIMEPART_CRLF , $subparts) .
	'--' . $boundary.'--' . MAIL_MIMEPART_CRLF;

	} else {
	$encoded['body'] = $this->_getEncodedData($this->_body, $this->_encoding);
	}

	// Add headers to $encoded
	$encoded['headers'] =& $this->_headers;

	return $encoded;
	}

	/**
	* &addSubPart()
	*
	* Adds a subpart to current mime part and returns
	* a reference to it
	*
	* @param $body The body of the subpart, if any.
	* @param $params The parameters for the subpart, same
	* as the $params argument for constructor.
	* @return A reference to the part you just added. It is
	* crucial if using multipart/* in your subparts that
	* you use =& in your script when calling this function,
	* otherwise you will not be able to add further subparts.
	* @access public
	*/
	function &addSubPart($body, $params)
	{
	$this->_subparts[] = new Mail_mimePart($body, $params);
	return $this->_subparts[count($this->_subparts) - 1];
	}

	/**
	* _getEncodedData()
	*
	* Returns encoded data based upon encoding passed to it
	*
	* @param $data The data to encode.
	* @param $encoding The encoding type to use, 7bit, base64,
	* or quoted-printable.
	* @access private
	*/
	function _getEncodedData($data, $encoding)
	{
	switch ($encoding) {
	case '8bit':
	case '7bit':
	return $data;
	break;

	case 'quoted-printable':
	return $this->_quotedPrintableEncode($data);
	break;

	case 'base64':
	return rtrim(chunk_split(base64_encode($data), 76, MAIL_MIMEPART_CRLF));
	break;

	default:
	return $data;
	}
	}

	/**
	* quotedPrintableEncode()
	*
	* Encodes data to quoted-printable standard.
	*
	* @param $input The data to encode
	* @param $line_max Optional max line length. Should
	* not be more than 76 chars
	*
	* @access private
	*/
	function _quotedPrintableEncode($input , $line_max = 76)
	{
	$lines = preg_split("/\r?\n/", $input);
	$eol = MAIL_MIMEPART_CRLF;
	$escape = '=';
	$output = '';

	while (list(, $line) = each($lines)) {

	$line = preg_split('\|\|', $line, -1, PREG_SPLIT_NO_EMPTY);
	$linlen = count($line);
	$newline = '';

	for ($i = 0; $i < $linlen; $i++) {
	$char = $line[$i];
	$dec = ord($char);

	if (($dec == 32) AND ($i == ($linlen - 1))) { // convert space at eol only
	$char = '=20';

	} elseif (($dec == 9) AND ($i == ($linlen - 1))) { // convert tab at eol only
	$char = '=09';
	} elseif ($dec == 9) {
	; // Do nothing if a tab.
	} elseif (($dec == 61) OR ($dec < 32 ) OR ($dec > 126)) {
	$char = $escape . strtoupper(sprintf('%02s', dechex($dec)));
	} elseif (($dec == 46) AND (($newline == '') \|\| ((strlen($newline) + strlen("=2E")) >= $line_max))) {
	//Bug #9722: convert full-stop at bol,
	//some Windows servers need this, won't break anything (cipri)
	//Bug #11731: full-stop at bol also needs to be encoded
	//if this line would push us over the line_max limit.
	$char = '=2E';
	}

	//Note, when changing this line, also change the ($dec == 46)
	//check line, as it mimics this line due to Bug #11731
	if ((strlen($newline) + strlen($char)) >= $line_max) { // MAIL_MIMEPART_CRLF is not counted
	$output .= $newline . $escape . $eol; // soft line break; " =\r\n" is okay
	$newline = '';
	}
	$newline .= $char;
	} // end of for
	$output .= $newline . $eol;
	}
	$output = substr($output, 0, -1 * strlen($eol)); // Don't want last crlf
	return $output;
	}

	/**
	* _buildHeaderParam()
	*
	* Encodes the paramater of a header.
	*
	* @param $name The name of the header-parameter
	* @param $value The value of the paramter
	* @param $charset The characterset of $value
	* @param $language The language used in $value
	* @param $paramEnc Parameter encoding type
	* @param $maxLength The maximum length of a line. Defauls to 78
	*
	* @access private
	*/
	function _buildHeaderParam($name, $value, $charset=NULL, $language=NULL, $paramEnc=NULL, $maxLength=78)
	{
	// RFC 2183/2184/2822:
	// value needs encoding if contains non-ASCII chars or is longer than 78 chars
	if (!preg_match('#[^\x20-\x7E]#', $value)) { // ASCII
	$quoted = addcslashes($value, '\\"');
	if (strlen($name) + strlen($quoted) + 6 <= $maxLength)
	return " {$name}=\"{$quoted}\"; ";
	}

	// use quoted-printable/base64 encoding (RFC2047)
	if ($paramEnc == 'quoted-printable' \|\| $paramEnc == 'base64')
	return $this->_buildRFC2047Param($name, $value, $charset, $paramEnc);

	$encValue = preg_replace('#([^\x20-\x7E])#e', '"%" . strtoupper(dechex(ord("\1")))', $value);
	$value = "$charset'$language'$encValue";

	$header = " {$name}*=\"{$value}\"; ";
	if (strlen($header) <= $maxLength) {
	return $header;
	}

	$preLength = strlen(" {$name}0=\"");
	$sufLength = strlen("\";");
	$maxLength = max(16, $maxLength - $preLength - $sufLength - 2);
	$maxLengthReg = "\|(.{0,$maxLength}[^\%][^\%])\|";

	$headers = array();
	$headCount = 0;
	while ($value) {
	$matches = array();
	$found = preg_match($maxLengthReg, $value, $matches);
	if ($found) {
	$headers[] = " {$name}{$headCount}=\"{$matches[0]}\"";
	$value = substr($value, strlen($matches[0]));
	} else {
	$headers[] = " {$name}{$headCount}=\"{$value}\"";
	$value = "";
	}
	$headCount++;
	}
	$headers = implode(MAIL_MIMEPART_CRLF, $headers) . ';';
	return $headers;
	}

	/**
	* Encodes header parameter as per RFC2047 if needed (values too long will be truncated)
	*
	* @param string $name The parameter name
	* @param string $value The parameter value
	* @param string $charset The parameter charset
	* @param string $encoding Encoding type (quoted-printable or base64)
	* @param int $maxLength Encoded parameter max length (75 is the value specified in the RFC)
	*
	* @return string Parameter line
	* @access private
	*/
	function _buildRFC2047Param($name, $value, $charset, $encoding='quoted-printable', $maxLength=75)
	{
	if (!preg_match('#([^\x20-\x7E]){1}#', $value))
	{
	$quoted = addcslashes($value, '\\"');
	$maxLength = $maxLength - 6;
	if (strlen($quoted) > $maxLength)
	{
	// truncate filename leaving extension
	$ext = strrchr($quoted, '.');
	$quoted = substr($quoted, 0, $maxLength - strlen($ext));
	// remove backslashes from the end of filename
	preg_replace('/[\\\\]+$/', '', $quoted);
	$quoted .= $ext;
	}
	}
	else if ($encoding == 'base64')
	{
	$ext = strrchr($value, '.');
	$value = substr($value, 0, strlen($value) - strlen($ext));

	$ext = base64_encode($ext);
	$value = base64_encode($value);

	$prefix = '=?' . $charset . '?B?';
	$suffix = '?=';
	$maxLength = $maxLength - strlen($prefix . $suffix) - strlen($ext) - 2;

	//We can cut base64 every 4 characters, so the real max
	//we can get must be rounded down.
	$maxLength = $maxLength - ($maxLength % 4);
	$quoted = $prefix . substr($value, 0, $maxLength) . $ext . $suffix;
	}
	else // quoted-printable
	{
	$ext = strrchr($value, '.');
	$value = substr($value, 0, strlen($value) - strlen($ext));

	// Replace all special characters used by the encoder.
	$search = array('=', '_', '?', ' ');
	$replace = array('=3D', '=5F', '=3F', '_');
	$ext = str_replace($search, $replace, $ext);
	$value = str_replace($search, $replace, $value);

	// Replace all extended characters (\x80-xFF) with their
	// ASCII values.
	$ext = preg_replace('/([\x80-\xFF])/e',
	'"=" . strtoupper(dechex(ord("\1")))', $ext);
	$value = preg_replace('/([\x80-\xFF])/e',
	'"=" . strtoupper(dechex(ord("\1")))', $value);

	$prefix = '=?' . $charset . '?Q?';
	$suffix = '?=';

	$maxLength = $maxLength - strlen($prefix . $suffix) - strlen($ext) - 2;

	// Truncate QP-encoded text at $maxLength
	// but not break any encoded letters.
	if(preg_match("/^(.{0,$maxLength}[^\=][^\=])/", $value, $matches))
	$value = $matches[1];

	$quoted = $prefix . $value . $ext . $suffix;
	}

	return " {$name}=\"{$quoted}\"; ";
	}

	} // End of class
	diff --git a/program/lib/html2text.php b/program/lib/html2text.php
	index d298ee2e4..e2a5b241e 100644
	--- a/program/lib/html2text.php
	+++ b/program/lib/html2text.php
	@@ -1,595 +1,595 @@
	<?php

	/*************************************************************************
	* *
	* class.html2text.inc *
	* *
	*************************************************************************
	* *
	* Converts HTML to formatted plain text *
	* *
	* Copyright (c) 2005-2007 Jon Abernathy <jon@chuggnutt.com> *
	* All rights reserved. *
	* *
	* This script is free software; you can redistribute it and/or modify *
	* it under the terms of the GNU General Public License as published by *
	* the Free Software Foundation; either version 2 of the License, or *
	* (at your option) any later version. *
	* *
	* The GNU General Public License can be found at *
	* http://www.gnu.org/copyleft/gpl.html. *
	* *
	* This script is distributed in the hope that it will be useful, *
	* but WITHOUT ANY WARRANTY; without even the implied warranty of *
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
	* GNU General Public License for more details. *
	* *
	* Author(s): Jon Abernathy <jon@chuggnutt.com> *
	* *
	* Last modified: 08/08/07 *
	* *
	*************************************************************************/


	/**
	* Takes HTML and converts it to formatted, plain text.
	*
	* Thanks to Alexander Krug (http://www.krugar.de/) to pointing out and
	* correcting an error in the regexp search array. Fixed 7/30/03.
	*
	* Updated set_html() function's file reading mechanism, 9/25/03.
	*
	* Thanks to Joss Sanglier (http://www.dancingbear.co.uk/) for adding
	* several more HTML entity codes to the $search and $replace arrays.
	* Updated 11/7/03.
	*
	* Thanks to Darius Kasperavicius (http://www.dar.dar.lt/) for
	* suggesting the addition of $allowed_tags and its supporting function
	* (which I slightly modified). Updated 3/12/04.
	*
	* Thanks to Justin Dearing for pointing out that a replacement for the
	* <TH> tag was missing, and suggesting an appropriate fix.
	* Updated 8/25/04.
	*
	* Thanks to Mathieu Collas (http://www.myefarm.com/) for finding a
	* display/formatting bug in the _build_link_list() function: email
	* readers would show the left bracket and number ("[1") as part of the
	* rendered email address.
	* Updated 12/16/04.
	*
	* Thanks to Wojciech Bajon (http://histeria.pl/) for submitting code
	* to handle relative links, which I hadn't considered. I modified his
	* code a bit to handle normal HTTP links and MAILTO links. Also for
	* suggesting three additional HTML entity codes to search for.
	* Updated 03/02/05.
	*
	* Thanks to Jacob Chandler for pointing out another link condition
	* for the _build_link_list() function: "https".
	* Updated 04/06/05.
	*
	* Thanks to Marc Bertrand (http://www.dresdensky.com/) for
	* suggesting a revision to the word wrapping functionality; if you
	* specify a $width of 0 or less, word wrapping will be ignored.
	* Updated 11/02/06.
	*
	* *** Big housecleaning updates below:
	*
	* Thanks to Colin Brown (http://www.sparkdriver.co.uk/) for
	* suggesting the fix to handle </li> and blank lines (whitespace).
	* Christian Basedau (http://www.movetheweb.de/) also suggested the
	* blank lines fix.
	*
	* Special thanks to Marcus Bointon (http://www.synchromedia.co.uk/),
	* Christian Basedau, Norbert Laposa (http://ln5.co.uk/),
	* Bas van de Weijer, and Marijn van Butselaar
	* for pointing out my glaring error in the <th> handling. Marcus also
	* supplied a host of fixes.
	*
	* Thanks to Jeffrey Silverman (http://www.newtnotes.com/) for pointing
	* out that extra spaces should be compressed--a problem addressed with
	* Marcus Bointon's fixes but that I had not yet incorporated.
	*
	* Thanks to Daniel Schledermann (http://www.typoconsult.dk/) for
	* suggesting a valuable fix with <a> tag handling.
	*
	* Thanks to Wojciech Bajon (again!) for suggesting fixes and additions,
	* including the <a> tag handling that Daniel Schledermann pointed
	* out but that I had not yet incorporated. I haven't (yet)
	* incorporated all of Wojciech's changes, though I may at some
	* future time.
	*
	* *** End of the housecleaning updates. Updated 08/08/07.
	*
	* @author Jon Abernathy <jon@chuggnutt.com>
	* @version 1.0.0
	* @since PHP 4.0.2
	*/
	class html2text
	{

	/**
	* Contains the HTML content to convert.
	*
	* @var string $html
	* @access public
	*/
	var $html;

	/**
	* Contains the converted, formatted text.
	*
	* @var string $text
	* @access public
	*/
	var $text;

	/**
	* Maximum width of the formatted text, in columns.
	*
	* Set this value to 0 (or less) to ignore word wrapping
	* and not constrain text to a fixed-width column.
	*
	* @var integer $width
	* @access public
	*/
	var $width = 70;

	/**
	* List of preg* regular expression patterns to search for,
	* used in conjunction with $replace.
	*
	* @var array $search
	* @access public
	* @see $replace
	*/
	var $search = array(
	"/\r/", // Non-legal carriage return
	"/[\n\t]+/", // Newlines and tabs
	'/[ ]{2,}/', // Runs of spaces, pre-handling
	'/<script[^>]>.?<\/script>/i', // <script>s -- which strip_tags supposedly has problems with
	'/<style[^>]>.?<\/style>/i', // <style>s -- which strip_tags supposedly has problems with
	//'/<!-- .* -->/', // Comments -- which strip_tags might have problem a with
	'/<p[^>]*>/i', // <P>
	'/<br[^>]*>/i', // <br>
	'/<i[^>]>(.?)<\/i>/i', // <i>
	'/<em[^>]>(.?)<\/em>/i', // <em>
	'/(<ul[^>]*>\|<\/ul>)/i', // <ul> and </ul>
	'/(<ol[^>]*>\|<\/ol>)/i', // <ol> and </ol>
	'/<li[^>]>(.?)<\/li>/i', // <li> and </li>
	'/<li[^>]*>/i', // <li>
	'/<hr[^>]*>/i', // <hr>
	'/(<table[^>]*>\|<\/table>)/i', // <table> and </table>
	'/(<tr[^>]*>\|<\/tr>)/i', // <tr> and </tr>
	'/<td[^>]>(.?)<\/td>/i', // <td> and </td>
	'/&(nbsp\|#160);/i', // Non-breaking space
	'/&(quot\|rdquo\|ldquo\|#8220\|#8221\|#147\|#148);/i',
	// Double quotes
	'/&(apos\|rsquo\|lsquo\|#8216\|#8217);/i', // Single quotes
	'/>/i', // Greater-than
	'/</i', // Less-than
	'/&(amp\|#38);/i', // Ampersand
	'/&(copy\|#169);/i', // Copyright
	'/&(trade\|#8482\|#153);/i', // Trademark
	'/&(reg\|#174);/i', // Registered
	'/&(mdash\|#151\|#8212);/i', // mdash
	'/&(ndash\|minus\|#8211\|#8722);/i', // ndash
	'/&(bull\|#149\|#8226);/i', // Bullet
	'/&(pound\|#163);/i', // Pound sign
	'/&(euro\|#8364);/i', // Euro sign
	'/&[^&;]+;/i', // Unknown/unhandled entities
	'/[ ]{2,}/' // Runs of spaces, post-handling
	);

	/**
	* List of pattern replacements corresponding to patterns searched.
	*
	* @var array $replace
	* @access public
	* @see $search
	*/
	var $replace = array(
	'', // Non-legal carriage return
	' ', // Newlines and tabs
	' ', // Runs of spaces, pre-handling
	'', // <script>s -- which strip_tags supposedly has problems with
	'', // <style>s -- which strip_tags supposedly has problems with
	//'', // Comments -- which strip_tags might have problem a with
	"\n\n", // <P>
	"\n", // <br>
	'_\\1_', // <i>
	'_\\1_', // <em>
	"\n\n", // <ul> and </ul>
	"\n\n", // <ol> and </ol>
	"\t* \\1\n", // <li> and </li>
	"\n\t* ", // <li>
	"\n-------------------------\n", // <hr>
	"\n\n", // <table> and </table>
	"\n", // <tr> and </tr>
	"\t\t\\1\n", // <td> and </td>
	' ', // Non-breaking space
	'"', // Double quotes
	"'", // Single quotes
	'>',
	'<',
	'&',
	'(c)',
	'(tm)',
	'(R)',
	'--',
	'-',
	'*',
	'Â£',
	'EUR', // Euro sign. ?
	'', // Unknown/unhandled entities
	' ' // Runs of spaces, post-handling
	);

	/**
	* List of preg* regular expression patterns to search for
	* and replace using callback function.
	*
	* @var array $callback_search
	* @access public
	*/
	var $callback_search = array(
	'/<(h)[123456][^>]>(.?)<\/h[123456]>/i', // H1 - H3
	'/<(b)[^>]>(.?)<\/b>/i', // <b>
	'/<(strong)[^>]>(.?)<\/strong>/i', // <strong>
	'/<(a) [^>]href=("\|\')([^"\']+)\2[^>]>(.*?)<\/a>/i',
	// <a href="">
	'/<(th)[^>]>(.?)<\/th>/i', // <th> and </th>
	);

	/**
	* List of preg* regular expression patterns to search for in PRE body,
	* used in conjunction with $pre_replace.
	*
	* @var array $pre_search
	* @access public
	* @see $pre_replace
	*/
	var $pre_search = array(
	"/\n/",
	"/\t/",
	'/ /',
	'/<pre[^>]*>/',
	'/<\/pre>/'
	);

	/**
	* List of pattern replacements corresponding to patterns searched for PRE body.
	*
	* @var array $pre_replace
	* @access public
	* @see $pre_search
	*/
	var $pre_replace = array(
	'<br>',
	'    ',
	' ',
	'',
	''
	);

	/**
	* Contains a list of HTML tags to allow in the resulting text.
	*
	* @var string $allowed_tags
	* @access public
	* @see set_allowed_tags()
	*/
	var $allowed_tags = '';

	/**
	* Contains the base URL that relative links should resolve to.
	*
	* @var string $url
	* @access public
	*/
	var $url;

	/**
	* Indicates whether content in the $html variable has been converted yet.
	*
	* @var boolean $_converted
	* @access private
	* @see $html, $text
	*/
	var $_converted = false;

	/**
	* Contains URL addresses from links to be rendered in plain text.
	*
	* @var string $_link_list
	* @access private
	* @see _build_link_list()
	*/
	var $_link_list = '';

	/**
	* Number of valid links detected in the text, used for plain text
	* display (rendered similar to footnotes).
	*
	* @var integer $_link_count
	* @access private
	* @see _build_link_list()
	*/
	var $_link_count = 0;

	/**
	* Boolean flag, true if a table of link URLs should be listed after the text.
	*
	* @var boolean $_do_links
	* @access private
	* @see html2text()
	*/
	var $_do_links = true;

	/**
	* Constructor.
	*
	* If the HTML source string (or file) is supplied, the class
	* will instantiate with that source propagated, all that has
	* to be done it to call get_text().
	*
	* @param string $source HTML content
	* @param boolean $from_file Indicates $source is a file to pull content from
	* @param boolean $do_links Indicate whether a table of link URLs is desired
	* @param integer $width Maximum width of the formatted text, 0 for no limit
	* @access public
	* @return void
	*/
	function html2text( $source = '', $from_file = false, $do_links = true, $width = 75 )
	{
	if ( !empty($source) ) {
	$this->set_html($source, $from_file);
	}

	$this->set_base_url();
	$this->_do_links = $do_links;
	$this->width = $width;
	}

	/**
	* Loads source HTML into memory, either from $source string or a file.
	*
	* @param string $source HTML content
	* @param boolean $from_file Indicates $source is a file to pull content from
	* @access public
	* @return void
	*/
	function set_html( $source, $from_file = false )
	{
	if ( $from_file && file_exists($source) ) {
	$this->html = file_get_contents($source);
	}
	else
	$this->html = $source;

	$this->_converted = false;
	}

	/**
	* Returns the text, converted from HTML.
	*
	* @access public
	* @return string
	*/
	function get_text()
	{
	if ( !$this->_converted ) {
	$this->_convert();
	}

	return $this->text;
	}

	/**
	* Prints the text, converted from HTML.
	*
	* @access public
	* @return void
	*/
	function print_text()
	{
	print $this->get_text();
	}

	/**
	* Alias to print_text(), operates identically.
	*
	* @access public
	* @return void
	* @see print_text()
	*/
	function p()
	{
	print $this->get_text();
	}

	/**
	* Sets the allowed HTML tags to pass through to the resulting text.
	*
	* Tags should be in the form "<p>", with no corresponding closing tag.
	*
	* @access public
	* @return void
	*/
	function set_allowed_tags( $allowed_tags = '' )
	{
	if ( !empty($allowed_tags) ) {
	$this->allowed_tags = $allowed_tags;
	}
	}

	/**
	* Sets a base URL to handle relative links.
	*
	* @access public
	* @return void
	*/
	function set_base_url( $url = '' )
	{
	if ( empty($url) ) {
	if ( !empty($_SERVER['HTTP_HOST']) ) {
	$this->url = 'http://' . $_SERVER['HTTP_HOST'];
	} else {
	$this->url = '';
	}
	} else {
	// Strip any trailing slashes for consistency (relative
	// URLs may already start with a slash like "/file.html")
	if ( substr($url, -1) == '/' ) {
	$url = substr($url, 0, -1);
	}
	$this->url = $url;
	}
	}

	/**
	* Workhorse function that does actual conversion.
	*
	* First performs custom tag replacement specified by $search and
	* $replace arrays. Then strips any remaining HTML tags, reduces whitespace
	* and newlines to a readable format, and word wraps the text to
	* $width characters.
	*
	* @access private
	* @return void
	*/
	function _convert()
	{
	// Variables used for building the link list
	$this->_link_count = 0;
	$this->_link_list = '';

	$text = trim(stripslashes($this->html));

	// Convert <PRE>
	$this->_convert_pre($text);

	- // Replace known html entities
	- $text = html_entity_decode($text, ENT_COMPAT, 'UTF-8');
	-
	// Run our defined search-and-replace
	$text = preg_replace($this->search, $this->replace, $text);
	$text = preg_replace_callback($this->callback_search, array('html2text', '_preg_callback'), $text);

	+ // Replace known html entities
	+ $text = html_entity_decode($text, ENT_COMPAT, 'UTF-8');
	+
	// Strip any other HTML tags
	$text = strip_tags($text, $this->allowed_tags);

	// Bring down number of empty lines to 2 max
	$text = preg_replace("/\n\s+\n/", "\n\n", $text);
	$text = preg_replace("/[\n]{3,}/", "\n\n", $text);

	// Add link list
	if ( !empty($this->_link_list) ) {
	$text .= "\n\nLinks:\n------\n" . $this->_link_list;
	}

	// Wrap the text to a readable format
	// for PHP versions >= 4.0.2. Default width is 75
	// If width is 0 or less, don't wrap the text.
	if ( $this->width > 0 ) {
	$text = wordwrap($text, $this->width);
	}

	$this->text = $text;

	$this->_converted = true;
	}

	/**
	* Helper function called by preg_replace() on link replacement.
	*
	* Maintains an internal list of links to be displayed at the end of the
	* text, with numeric indices to the original point in the text they
	* appeared. Also makes an effort at identifying and handling absolute
	* and relative links.
	*
	* @param string $link URL of the link
	* @param string $display Part of the text to associate number with
	* @access private
	* @return string
	*/
	function _build_link_list( $link, $display )
	{
	if ( !$this->_do_links ) return $display;

	if ( substr($link, 0, 7) == 'http://' \|\| substr($link, 0, 8) == 'https://' \|\|
	substr($link, 0, 7) == 'mailto:' ) {
	$this->_link_count++;
	$this->_link_list .= "[" . $this->_link_count . "] $link\n";
	$additional = ' [' . $this->_link_count . ']';
	} elseif ( substr($link, 0, 11) == 'javascript:' ) {
	// Don't count the link; ignore it
	$additional = '';
	// what about href="#anchor" ?
	} else {
	$this->_link_count++;
	$this->_link_list .= "[" . $this->_link_count . "] " . $this->url;
	if ( substr($link, 0, 1) != '/' ) {
	$this->_link_list .= '/';
	}
	$this->_link_list .= "$link\n";
	$additional = ' [' . $this->_link_count . ']';
	}

	return $display . $additional;
	}

	/**
	* Helper function for PRE body conversion.
	*
	* @param string HTML content
	* @access private
	*/
	function _convert_pre(&$text)
	{
	while(preg_match('/<pre[^>]>(.)<\/pre>/ismU', $text, $matches))
	{
	$result = preg_replace($this->pre_search, $this->pre_replace, $matches[1]);
	$text = preg_replace('/<pre[^>]>.<\/pre>/ismU', '<div><br>' . $result . '<br></div>', $text, 1);
	}
	}

	/**
	* Callback function for preg_replace_callback use.
	*
	* @param array PREG matches
	* @return string
	* @access private
	*/
	function _preg_callback($matches)
	{
	switch($matches[1])
	{
	case 'b':
	case 'strong':
	return $this->_strtoupper($matches[2]);
	case 'hr':
	return $this->_strtoupper("\t\t". $matches[2] ."\n");
	case 'h':
	return $this->_strtoupper("\n\n". $matches[2] ."\n\n");
	case 'a':
	return $this->_build_link_list($matches[3], $matches[4]);
	}
	}

	/**
	* Strtoupper multibyte wrapper function
	*
	* @param string
	* @return string
	* @access private
	*/
	function _strtoupper($str)
	{
	if (function_exists('mb_strtoupper'))
	return mb_strtoupper($str);
	else
	return strtoupper($str);
	}
	}

	?>

File Metadata

Mime Type: text/x-diff
Expires: Sat, Mar 1, 12:36 PM (4 h, 43 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 166971
Default Alt Text: (39 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions