Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F4690762
rcube_spellchecker.php
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Size
17 KB
Referenced Files
None
Subscribers
None
rcube_spellchecker.php
View Options
<?php
/*
+-----------------------------------------------------------------------+
| This file is part of the Roundcube Webmail client |
| Copyright (C) 2011, Kolab Systems AG |
| Copyright (C) 2008-2011, The Roundcube Dev Team |
| |
| Licensed under the GNU General Public License version 3 or |
| any later version with exceptions for skins & plugins. |
| See the README file for a full license statement. |
| |
| PURPOSE: |
| Spellchecking using different backends |
+-----------------------------------------------------------------------+
| Author: Aleksander Machniak <machniak@kolabsys.com> |
| Author: Thomas Bruederli <roundcube@gmail.com> |
+-----------------------------------------------------------------------+
*/
/**
* Helper class for spellchecking with Googielspell and PSpell support.
*
* @package Framework
* @subpackage Utils
*/
class
rcube_spellchecker
{
private
$matches
=
array
();
private
$engine
;
private
$lang
;
private
$rc
;
private
$error
;
private
$separator
=
'/[
\s\r\n\t\(\)\/\[\]
{}<>
\\
"]+|[:;?!,
\.
](?=
\W
|$)/'
;
private
$options
=
array
();
private
$dict
;
private
$have_dict
;
// default settings
const
GOOGLE_HOST
=
'ssl://www.google.com'
;
const
GOOGLE_PORT
=
443
;
const
MAX_SUGGESTIONS
=
10
;
/**
* Constructor
*
* @param string $lang Language code
*/
function
__construct
(
$lang
=
'en'
)
{
$this
->
rc
=
rcube
::
get_instance
();
$this
->
engine
=
$this
->
rc
->
config
->
get
(
'spellcheck_engine'
,
'googie'
);
$this
->
lang
=
$lang
?
$lang
:
'en'
;
$this
->
options
=
array
(
'ignore_syms'
=>
$this
->
rc
->
config
->
get
(
'spellcheck_ignore_syms'
),
'ignore_nums'
=>
$this
->
rc
->
config
->
get
(
'spellcheck_ignore_nums'
),
'ignore_caps'
=>
$this
->
rc
->
config
->
get
(
'spellcheck_ignore_caps'
),
'dictionary'
=>
$this
->
rc
->
config
->
get
(
'spellcheck_dictionary'
),
);
}
/**
* Set content and check spelling
*
* @param string $text Text content for spellchecking
* @param bool $is_html Enables HTML-to-Text conversion
*
* @return bool True when no mispelling found, otherwise false
*/
function
check
(
$text
,
$is_html
=
false
)
{
// convert to plain text
if
(
$is_html
)
{
$this
->
content
=
$this
->
html2text
(
$text
);
}
else
{
$this
->
content
=
$text
;
}
if
(
$this
->
engine
==
'pspell'
)
{
$this
->
matches
=
$this
->
_pspell_check
(
$this
->
content
);
}
else
{
$this
->
matches
=
$this
->
_googie_check
(
$this
->
content
);
}
return
$this
->
found
()
==
0
;
}
/**
* Number of mispellings found (after check)
*
* @return int Number of mispellings
*/
function
found
()
{
return
count
(
$this
->
matches
);
}
/**
* Returns suggestions for the specified word
*
* @param string $word The word
*
* @return array Suggestions list
*/
function
get_suggestions
(
$word
)
{
if
(
$this
->
engine
==
'pspell'
)
{
return
$this
->
_pspell_suggestions
(
$word
);
}
return
$this
->
_googie_suggestions
(
$word
);
}
/**
* Returns misspelled words
*
* @param string $text The content for spellchecking. If empty content
* used for check() method will be used.
*
* @return array List of misspelled words
*/
function
get_words
(
$text
=
null
,
$is_html
=
false
)
{
if
(
$this
->
engine
==
'pspell'
)
{
return
$this
->
_pspell_words
(
$text
,
$is_html
);
}
return
$this
->
_googie_words
(
$text
,
$is_html
);
}
/**
* Returns checking result in XML (Googiespell) format
*
* @return string XML content
*/
function
get_xml
()
{
// send output
$out
=
'<?xml version="1.0" encoding="'
.
RCUBE_CHARSET
.
'"?><spellresult charschecked="'
.
mb_strlen
(
$this
->
content
).
'">'
;
foreach
(
$this
->
matches
as
$item
)
{
$out
.=
'<c o="'
.
$item
[
1
].
'" l="'
.
$item
[
2
].
'">'
;
$out
.=
is_array
(
$item
[
4
])
?
implode
(
"
\t
"
,
$item
[
4
])
:
$item
[
4
];
$out
.=
'</c>'
;
}
$out
.=
'</spellresult>'
;
return
$out
;
}
/**
* Returns checking result (misspelled words with suggestions)
*
* @return array Spellchecking result. An array indexed by word.
*/
function
get
()
{
$result
=
array
();
foreach
(
$this
->
matches
as
$item
)
{
if
(
$this
->
engine
==
'pspell'
)
{
$word
=
$item
[
0
];
}
else
{
$word
=
mb_substr
(
$this
->
content
,
$item
[
1
],
$item
[
2
],
RCUBE_CHARSET
);
}
$result
[
$word
]
=
is_array
(
$item
[
4
])
?
implode
(
"
\t
"
,
$item
[
4
])
:
$item
[
4
];
}
return
$result
;
}
/**
* Returns error message
*
* @return string Error message
*/
function
error
()
{
return
$this
->
error
;
}
/**
* Checks the text using pspell
*
* @param string $text Text content for spellchecking
*/
private
function
_pspell_check
(
$text
)
{
// init spellchecker
$this
->
_pspell_init
();
if
(!
$this
->
plink
)
{
return
array
();
}
// tokenize
$text
=
preg_split
(
$this
->
separator
,
$text
,
NULL
,
PREG_SPLIT_NO_EMPTY
|
PREG_SPLIT_OFFSET_CAPTURE
);
$diff
=
0
;
$matches
=
array
();
foreach
(
$text
as
$w
)
{
$word
=
trim
(
$w
[
0
]);
$pos
=
$w
[
1
]
-
$diff
;
$len
=
mb_strlen
(
$word
);
// skip exceptions
if
(
$this
->
is_exception
(
$word
))
{
}
else
if
(!
pspell_check
(
$this
->
plink
,
$word
))
{
$suggestions
=
pspell_suggest
(
$this
->
plink
,
$word
);
if
(
sizeof
(
$suggestions
)
>
self
::
MAX_SUGGESTIONS
)
{
$suggestions
=
array_slice
(
$suggestions
,
0
,
self
::
MAX_SUGGESTIONS
);
}
$matches
[]
=
array
(
$word
,
$pos
,
$len
,
null
,
$suggestions
);
}
$diff
+=
(
strlen
(
$word
)
-
$len
);
}
return
$matches
;
}
/**
* Returns the misspelled words
*/
private
function
_pspell_words
(
$text
=
null
,
$is_html
=
false
)
{
$result
=
array
();
if
(
$text
)
{
// init spellchecker
$this
->
_pspell_init
();
if
(!
$this
->
plink
)
{
return
array
();
}
// With PSpell we don't need to get suggestions to return misspelled words
if
(
$is_html
)
{
$text
=
$this
->
html2text
(
$text
);
}
$text
=
preg_split
(
$this
->
separator
,
$text
,
NULL
,
PREG_SPLIT_NO_EMPTY
|
PREG_SPLIT_OFFSET_CAPTURE
);
foreach
(
$text
as
$w
)
{
$word
=
trim
(
$w
[
0
]);
// skip exceptions
if
(
$this
->
is_exception
(
$word
))
{
continue
;
}
if
(!
pspell_check
(
$this
->
plink
,
$word
))
{
$result
[]
=
$word
;
}
}
return
$result
;
}
foreach
(
$this
->
matches
as
$m
)
{
$result
[]
=
$m
[
0
];
}
return
$result
;
}
/**
* Returns suggestions for misspelled word
*/
private
function
_pspell_suggestions
(
$word
)
{
// init spellchecker
$this
->
_pspell_init
();
if
(!
$this
->
plink
)
{
return
array
();
}
$suggestions
=
pspell_suggest
(
$this
->
plink
,
$word
);
if
(
sizeof
(
$suggestions
)
>
self
::
MAX_SUGGESTIONS
)
$suggestions
=
array_slice
(
$suggestions
,
0
,
self
::
MAX_SUGGESTIONS
);
return
is_array
(
$suggestions
)
?
$suggestions
:
array
();
}
/**
* Initializes PSpell dictionary
*/
private
function
_pspell_init
()
{
if
(!
$this
->
plink
)
{
if
(!
extension_loaded
(
'pspell'
))
{
$this
->
error
=
"Pspell extension not available"
;
rcube
::
raise_error
(
array
(
'code'
=>
500
,
'type'
=>
'php'
,
'file'
=>
__FILE__
,
'line'
=>
__LINE__
,
'message'
=>
$this
->
error
),
true
,
false
);
return
;
}
$this
->
plink
=
pspell_new
(
$this
->
lang
,
null
,
null
,
RCUBE_CHARSET
,
PSPELL_FAST
);
}
if
(!
$this
->
plink
)
{
$this
->
error
=
"Unable to load Pspell engine for selected language"
;
}
}
private
function
_googie_check
(
$text
)
{
// spell check uri is configured
$url
=
$this
->
rc
->
config
->
get
(
'spellcheck_uri'
);
if
(
$url
)
{
$a_uri
=
parse_url
(
$url
);
$ssl
=
(
$a_uri
[
'scheme'
]
==
'https'
||
$a_uri
[
'scheme'
]
==
'ssl'
);
$port
=
$a_uri
[
'port'
]
?
$a_uri
[
'port'
]
:
(
$ssl
?
443
:
80
);
$host
=
(
$ssl
?
'ssl://'
:
''
)
.
$a_uri
[
'host'
];
$path
=
$a_uri
[
'path'
]
.
(
$a_uri
[
'query'
]
?
'?'
.
$a_uri
[
'query'
]
:
''
)
.
$this
->
lang
;
}
else
{
$host
=
self
::
GOOGLE_HOST
;
$port
=
self
::
GOOGLE_PORT
;
$path
=
'/tbproxy/spell?lang='
.
$this
->
lang
;
}
// Google has some problem with spaces, use \n instead
$gtext
=
str_replace
(
' '
,
"
\n
"
,
$text
);
$gtext
=
'<?xml version="1.0" encoding="utf-8" ?>'
.
'<spellrequest textalreadyclipped="0" ignoredups="0" ignoredigits="1" ignoreallcaps="1">'
.
'<text>'
.
$gtext
.
'</text>'
.
'</spellrequest>'
;
$store
=
''
;
if
(
$fp
=
fsockopen
(
$host
,
$port
,
$errno
,
$errstr
,
30
))
{
$out
=
"POST $path HTTP/1.0
\r\n
"
;
$out
.=
"Host: "
.
str_replace
(
'ssl://'
,
''
,
$host
)
.
"
\r\n
"
;
$out
.=
"Content-Length: "
.
strlen
(
$gtext
)
.
"
\r\n
"
;
$out
.=
"Content-Type: application/x-www-form-urlencoded
\r\n
"
;
$out
.=
"Connection: Close
\r\n\r\n
"
;
$out
.=
$gtext
;
fwrite
(
$fp
,
$out
);
while
(!
feof
(
$fp
))
$store
.=
fgets
(
$fp
,
128
);
fclose
(
$fp
);
}
if
(!
$store
)
{
$this
->
error
=
"Empty result from spelling engine"
;
}
preg_match_all
(
'/<c o="([^"]*)" l="([^"]*)" s="([^"]*)">([^<]*)<
\/
c>/'
,
$store
,
$matches
,
PREG_SET_ORDER
);
// skip exceptions (if appropriate options are enabled)
if
(!
empty
(
$this
->
options
[
'ignore_syms'
])
||
!
empty
(
$this
->
options
[
'ignore_nums'
])
||
!
empty
(
$this
->
options
[
'ignore_caps'
])
||
!
empty
(
$this
->
options
[
'dictionary'
])
)
{
foreach
(
$matches
as
$idx
=>
$m
)
{
$word
=
mb_substr
(
$text
,
$m
[
1
],
$m
[
2
],
RCUBE_CHARSET
);
// skip exceptions
if
(
$this
->
is_exception
(
$word
))
{
unset
(
$matches
[
$idx
]);
}
}
}
return
$matches
;
}
private
function
_googie_words
(
$text
=
null
,
$is_html
=
false
)
{
if
(
$text
)
{
if
(
$is_html
)
{
$text
=
$this
->
html2text
(
$text
);
}
$matches
=
$this
->
_googie_check
(
$text
);
}
else
{
$matches
=
$this
->
matches
;
$text
=
$this
->
content
;
}
$result
=
array
();
foreach
(
$matches
as
$m
)
{
$result
[]
=
mb_substr
(
$text
,
$m
[
1
],
$m
[
2
],
RCUBE_CHARSET
);
}
return
$result
;
}
private
function
_googie_suggestions
(
$word
)
{
if
(
$word
)
{
$matches
=
$this
->
_googie_check
(
$word
);
}
else
{
$matches
=
$this
->
matches
;
}
if
(
$matches
[
0
][
4
])
{
$suggestions
=
explode
(
"
\t
"
,
$matches
[
0
][
4
]);
if
(
sizeof
(
$suggestions
)
>
self
::
MAX_SUGGESTIONS
)
{
$suggestions
=
array_slice
(
$suggestions
,
0
,
MAX_SUGGESTIONS
);
}
return
$suggestions
;
}
return
array
();
}
private
function
html2text
(
$text
)
{
$h2t
=
new
rcube_html2text
(
$text
,
false
,
true
,
0
);
return
$h2t
->
get_text
();
}
/**
* Check if the specified word is an exception accoring to
* spellcheck options.
*
* @param string $word The word
*
* @return bool True if the word is an exception, False otherwise
*/
public
function
is_exception
(
$word
)
{
// Contain only symbols (e.g. "+9,0", "2:2")
if
(!
$word
||
preg_match
(
'/^[0-9@#$%^&_+~*=:;?!,.-]+$/'
,
$word
))
return
true
;
// Contain symbols (e.g. "g@@gle"), all symbols excluding separators
if
(!
empty
(
$this
->
options
[
'ignore_syms'
])
&&
preg_match
(
'/[@#$%^&_+~*=-]/'
,
$word
))
return
true
;
// Contain numbers (e.g. "g00g13")
if
(!
empty
(
$this
->
options
[
'ignore_nums'
])
&&
preg_match
(
'/[0-9]/'
,
$word
))
return
true
;
// Blocked caps (e.g. "GOOGLE")
if
(!
empty
(
$this
->
options
[
'ignore_caps'
])
&&
$word
==
mb_strtoupper
(
$word
))
return
true
;
// Use exceptions from dictionary
if
(!
empty
(
$this
->
options
[
'dictionary'
]))
{
$this
->
load_dict
();
// @TODO: should dictionary be case-insensitive?
if
(!
empty
(
$this
->
dict
)
&&
in_array
(
$word
,
$this
->
dict
))
return
true
;
}
return
false
;
}
/**
* Add a word to dictionary
*
* @param string $word The word to add
*/
public
function
add_word
(
$word
)
{
$this
->
load_dict
();
foreach
(
explode
(
' '
,
$word
)
as
$word
)
{
// sanity check
if
(
strlen
(
$word
)
<
512
)
{
$this
->
dict
[]
=
$word
;
$valid
=
true
;
}
}
if
(
$valid
)
{
$this
->
dict
=
array_unique
(
$this
->
dict
);
$this
->
update_dict
();
}
}
/**
* Remove a word from dictionary
*
* @param string $word The word to remove
*/
public
function
remove_word
(
$word
)
{
$this
->
load_dict
();
if
((
$key
=
array_search
(
$word
,
$this
->
dict
))
!==
false
)
{
unset
(
$this
->
dict
[
$key
]);
$this
->
update_dict
();
}
}
/**
* Update dictionary row in DB
*/
private
function
update_dict
()
{
if
(
strcasecmp
(
$this
->
options
[
'dictionary'
],
'shared'
)
!=
0
)
{
$userid
=
$this
->
rc
->
get_user_id
();
}
$plugin
=
$this
->
rc
->
plugins
->
exec_hook
(
'spell_dictionary_save'
,
array
(
'userid'
=>
$userid
,
'language'
=>
$this
->
lang
,
'dictionary'
=>
$this
->
dict
));
if
(!
empty
(
$plugin
[
'abort'
]))
{
return
;
}
if
(
$this
->
have_dict
)
{
if
(!
empty
(
$this
->
dict
))
{
$this
->
rc
->
db
->
query
(
"UPDATE "
.
$this
->
rc
->
db
->
table_name
(
'dictionary'
)
.
" SET data = ?"
.
" WHERE user_id "
.
(
$plugin
[
'userid'
]
?
"= "
.
$this
->
rc
->
db
->
quote
(
$plugin
[
'userid'
])
:
"IS NULL"
)
.
" AND "
.
$this
->
rc
->
db
->
quoteIdentifier
(
'language'
)
.
" = ?"
,
implode
(
' '
,
$plugin
[
'dictionary'
]),
$plugin
[
'language'
]);
}
// don't store empty dict
else
{
$this
->
rc
->
db
->
query
(
"DELETE FROM "
.
$this
->
rc
->
db
->
table_name
(
'dictionary'
)
.
" WHERE user_id "
.
(
$plugin
[
'userid'
]
?
"= "
.
$this
->
rc
->
db
->
quote
(
$plugin
[
'userid'
])
:
"IS NULL"
)
.
" AND "
.
$this
->
rc
->
db
->
quoteIdentifier
(
'language'
)
.
" = ?"
,
$plugin
[
'language'
]);
}
}
else
if
(!
empty
(
$this
->
dict
))
{
$this
->
rc
->
db
->
query
(
"INSERT INTO "
.
$this
->
rc
->
db
->
table_name
(
'dictionary'
)
.
" (user_id, "
.
$this
->
rc
->
db
->
quoteIdentifier
(
'language'
)
.
", data) VALUES (?, ?, ?)"
,
$plugin
[
'userid'
],
$plugin
[
'language'
],
implode
(
' '
,
$plugin
[
'dictionary'
]));
}
}
/**
* Get dictionary from DB
*/
private
function
load_dict
()
{
if
(
is_array
(
$this
->
dict
))
{
return
$this
->
dict
;
}
if
(
strcasecmp
(
$this
->
options
[
'dictionary'
],
'shared'
)
!=
0
)
{
$userid
=
$this
->
rc
->
get_user_id
();
}
$plugin
=
$this
->
rc
->
plugins
->
exec_hook
(
'spell_dictionary_get'
,
array
(
'userid'
=>
$userid
,
'language'
=>
$this
->
lang
,
'dictionary'
=>
array
()));
if
(
empty
(
$plugin
[
'abort'
]))
{
$dict
=
array
();
$this
->
rc
->
db
->
query
(
"SELECT data FROM "
.
$this
->
rc
->
db
->
table_name
(
'dictionary'
)
.
" WHERE user_id "
.
(
$plugin
[
'userid'
]
?
"= "
.
$this
->
rc
->
db
->
quote
(
$plugin
[
'userid'
])
:
"IS NULL"
)
.
" AND "
.
$this
->
rc
->
db
->
quoteIdentifier
(
'language'
)
.
" = ?"
,
$plugin
[
'language'
]);
if
(
$sql_arr
=
$this
->
rc
->
db
->
fetch_assoc
(
$sql_result
))
{
$this
->
have_dict
=
true
;
if
(!
empty
(
$sql_arr
[
'data'
]))
{
$dict
=
explode
(
' '
,
$sql_arr
[
'data'
]);
}
}
$plugin
[
'dictionary'
]
=
array_merge
((
array
)
$plugin
[
'dictionary'
],
$dict
);
}
if
(!
empty
(
$plugin
[
'dictionary'
])
&&
is_array
(
$plugin
[
'dictionary'
]))
{
$this
->
dict
=
$plugin
[
'dictionary'
];
}
else
{
$this
->
dict
=
array
();
}
return
$this
->
dict
;
}
}
File Metadata
Details
Attached
Mime Type
text/x-php
Expires
Fri, May 1, 5:08 PM (1 d, 22 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
607835
Default Alt Text
rcube_spellchecker.php (17 KB)
Attached To
Mode
R3 roundcubemail
Attached
Detach File
Event Timeline
Log In to Comment