source: sandbox/2.3-MailArchiver/expressoMail1_2/spell_checker/spell_checker.php @ 6779

Revision 6779, 17.5 KB checked in by rafaelraymundo, 12 years ago (diff)

Ticket #2946 - Liberado Expresso(branch 2.3) integrado ao MailArchiver?.

Line 
1<?php
2/**********************************************************************************************
3 * AJAX Spell Checker - Version 2.8
4 * (C) 2005 - Garrison Locke
5 *
6 * This spell checker is built in the style of the Gmail spell
7 * checker.  It uses AJAX to communicate with the backend without
8 * requiring the page be reloaded.  If you use this code, please
9 * give me credit and a link to my site would be nice.
10 * http://www.broken-notebook.com.
11 *
12 * Copyright (c) 2005, Garrison Locke
13 * All rights reserved.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions are met:
17 *
18 *   * Redistributions of source code must retain the above copyright notice,
19 *     this list of conditions and the following disclaimer.
20 *   * Redistributions in binary form must reproduce the above copyright notice,
21 *     this list of conditions and the following disclaimer in the documentation
22 *     and/or other materials provided with the distribution.
23 *   * Neither the name of the http://www.broken-notebook.com nor the names of its
24 *     contributors may be used to endorse or promote products derived from this
25 *     software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
29 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
30 * IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
31 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
32 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
34 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
36 * OF SUCH DAMAGE.
37 *
38 ***********************************************************************************************/
39
40// User-configurable list of allowed HTML tags and attributes.
41// Thanks to Jake Olefsky for this little addition
42//$allowed_html = '<strong><small><p><br><a><b><u><i><img><code><ul><ol><li>';  //Removed. Accept alll HTML tags.
43
44// Set the max number of suggestions to return at a time.
45define('MAX_SUGGESTIONS', 50);
46
47// Set whether to use a personal dictionary.
48$usePersonalDict = false;
49
50//Set whether users are allowed to update the personal dictionary.
51$editablePersonalDict = false;
52
53// If using a personal dictionary, set the path to it.  Default is in the
54// personal_dictionary subdirectory of the location of spell_checker.php.
55$path_to_personal_dictionary = dirname(__FILE__) . "/personal_dictionary/personal_dictionary.txt";
56
57//If pspell doesn't exist, then include the pspell wrapper for aspell.
58if(!function_exists('pspell_suggest'))
59{
60        // Set the path to aspell if you need to use it.
61        define('ASPELL_BIN','/usr/bin/aspell');
62        require_once ("pspell_comp.php");
63}
64
65// Create and configure a link to the pspell module.
66
67//$pspell_config = pspell_config_create("en");
68pspell_config_mode($pspell_config, PSPELL_FAST);
69
70if($usePersonalDict)
71{
72        // Allows the use of a custom dictionary (Thanks to Dylan Thurston for this addition).
73        pspell_config_personal($pspell_config, $path_to_personal_dictionary);
74}
75
76//$pspell_link = pspell_new_config($pspell_config);
77
78
79require_once("cpaint/cpaint2.inc.php"); //AJAX library file
80
81$cp = new cpaint();
82$cp->register('showSuggestions');
83$cp->register('spellCheck');
84$cp->register('switchText');
85$cp->register('addWord');
86$cp->start();
87$cp->return_data();
88
89
90/*************************************************************
91 * showSuggestions($word, $id)
92 *
93 * The showSuggestions function creates the list of up to 10
94 * suggestions to return for the given misspelled word.
95 *
96 * $word - The misspelled word that was clicked on
97 * $id - The id of the span containing the misspelled word.
98 *
99 *************************************************************/
100function showSuggestions($word, $id, $language)
101{
102        global $editablePersonalDict; //bool to set editability of personal dictionary
103        //global $pspell_link; //the global link to the pspell module
104        $pspell_link = pspell_new($language);
105        global $cp; //the CPAINT object
106
107        $retVal = "";
108
109
110        $suggestions = pspell_suggest($pspell_link, $word);  //an array of all the suggestions that psepll returns for $word.
111        $orig = metaphone($word);
112
113        $final = array();
114
115        foreach($suggestions as $suggestion){
116          if(metaphone($suggestion) == $orig){
117              array_unshift($final, $suggestion);
118          } else {
119              array_push($final, $suggestion);
120          }
121        }
122
123        $suggestions = $final;
124
125        // If the number of suggestions returned by pspell is less than the maximum
126        // number, just use the number of suggestions returned.
127        $numSuggestions = count($suggestions);
128        $tmpNum = min($numSuggestions, MAX_SUGGESTIONS);
129
130        if($numSuggestions > 0)
131        {
132                //this creates the table of suggestions.
133                //in the onclick event it has a call to the replaceWord javascript function which does the actual replacing on the page
134                foreach($suggestions as $suggestion)
135                {
136                        $retVal .= "<div style class=\"suggestion\" onclick=\"replaceWord('" . addslashes_custom($id) . "', '" . addslashes(utf8_encode($suggestion)) . "'); return false;\">" . utf8_encode($suggestion) . " </div>";
137                }
138
139                if($editablePersonalDict)
140                {
141                        $retVal .= "<div class=\"addtoDictionary\" onclick=\"addWord('" . addslashes_custom($id) . "'); return false;\">Add To Dictionary</div>";
142                }
143
144                //Ignore the suggestion - Added by Nathalie
145                $retVal .= "<div class=\"ignore\" onclick=\"ignore('" .  addslashes_custom($id)  . "', '" .   addslashes($word)   . "'); return false;\"> Ignorar </div>";
146
147
148        }
149        else
150        {
151                $retVal .= "Sem sugestão";
152                $retVal .= "<div class=\"ignore\" onclick=\"ignore('" .  addslashes_custom($id)  . "', '" .   addslashes($word)   . "'); return false;\"> Ignorar </div>";
153        }
154
155        $cp->set_data($retVal);  //the return value - a string containing the table of suggestions.
156
157} // end showSuggestions
158
159
160/*************************************************************
161 * spellCheck($string)
162 *
163 * The spellCheck function takes the string of text entered
164 * in the text box and spell checks it.  It splits the text
165 * on anything inside of < > in order to prevent html from being
166 * spell checked.  Then any text is split on spaces so that only
167 * one word is spell checked at a time.  This creates a multidimensional
168 * array.  The array is flattened.  The array is looped through
169 * ignoring the html lines and spell checking the others.  If a word
170 * is misspelled, code is wrapped around it to highlight it and to
171 * make it clickable to show the user the suggestions for that
172 * misspelled word.
173 *
174 * $string - The string of text from the text box that is to be
175 *           spell checked.
176 *
177 *************************************************************/
178function spellCheck($string, $varName, $language, $browser)
179{
180        //replace href for href# to block the redirection - Nathalie
181        $string = str_replace("href=\"", "href=\"#", $string);
182        $string = str_replace("href='", "href='#", $string);
183
184        $string = utf8_decode($string);
185
186        //global $pspell_link; //the global link to the pspell module
187        $pspell_link = pspell_new($language);
188        global $cp; //the CPAINT object
189        $retVal = "";
190
191        $string = stripslashes_custom($string); //we only need to strip slashes if magic quotes are on
192
193        $string = remove_word_junk($string);
194
195        //make all the returns in the text look the same
196        $string = preg_replace("/\r?\n/", "\n", $string);
197
198        //splits the string on any html tags, preserving the tags and putting them in the $words array
199        $words = preg_split("/(<[^<>]*>)/", $string, -1, PREG_SPLIT_DELIM_CAPTURE);
200
201        $numResults = count($words); //the number of elements in the array.
202
203        $misspelledCount = 0;
204
205
206        /* There is a problem with innerHTML of IE browsers - It removes the spaces.
207         * If there are two misspelled words side by side, it makes then together (no space
208         * between then). This will be checked using this variable.
209         * */
210        $difference = 0;
211
212        //this loop looks through the words array and splits any lines of text that aren't html tags on space, preserving the spaces.
213        for($i=0; $i<$numResults; $i++){
214                // Words alternate between real words and html tags, starting with words.
215                if(($i & 1) == 0) // Even-numbered entries are word sets.
216                {
217                        $words[$i] = preg_split("/(\s+|\&nbsp;)/", $words[$i], -1, PREG_SPLIT_DELIM_CAPTURE); //then split it on the spaces
218
219                        // Now go through each word and link up the misspelled ones.
220                        $numWords = count($words[$i]);
221                        for($j=0; $j<$numWords; $j++)
222                        {
223                               // $word = utf8_decode($words[$i][$j]);
224                                $word = $words[$i][$j];
225                                $reg_expr = utf8_decode('A-ZáàâãÀéÚêëíìïîóòÎõöúùûÌÜÿçñÁÀÂÃÄÉÈÊËÍÌÏÎÓÒÔÕÖÚÙÛÜÝÇÑ');
226
227                               // $reg_expr = 'A-ZáàâãÀéÚêëíìïîóòÎõöúùûÌÜÿçñÁÀÂÃÄÉÈÊËÍÌÏÎÓÒÔÕÖÚÙÛÜÝÇÑ';
228                                preg_match("/[$reg_expr]*/i", $word , $tmp); //get the word that is in the array slot $i
229
230                                $tmpWord = $tmp[0]; //should only have one element in the array anyway, so it's just assign it to $tmpWord
231                        //        $words[$i][$j] = utf8_decode($words[$i][$j]);
232                                //
233                                //And we replace the word in the array with the span that highlights it and gives it an onClick parameter to show the suggestions.
234                                if(!pspell_check($pspell_link, $tmpWord)) // Adicionar Nathalie
235                                {
236                                        $onClick = "onclick=\"setCurrentObject(" . $varName . "); showSuggestions('" . addslashes($tmpWord) . "', '" . $varName . "_" . $misspelledCount . "_" . addslashes($tmpWord) . "'); return false;\"";
237
238                                        $difference =  $misspelledCount + 1;
239                                        if (($difference - $misspelledCount) == 1 and $browser == "ie") //if IE, adds a html space before Span tag
240                                            $words[$i][$j] = str_replace($tmpWord, "<span " . $onClick . " id=\"" . $varName . "_" . $misspelledCount . "_" . $tmpWord . "\" class=\"highlight\">" . stripslashes($tmpWord) . "</span>", $words[$i][$j]);
241                                        else
242                                            $words[$i][$j] = str_replace($tmpWord, "<span " . $onClick . " id=\"" . $varName . "_" . $misspelledCount . "_" . $tmpWord . "\" class=\"highlight\">" . stripslashes($tmpWord) . "</span>", $words[$i][$j]);
243
244                                        $misspelledCount++;
245
246                                }
247
248                                $words[$i][$j] = str_replace("\n", "<br />", $words[$i][$j]); //replace any breaks with <br />'s, for html display
249
250                        }//end for $j
251                }//end if
252
253                else //otherwise, we wrap all the html tags in comments to make them not displayed
254                {
255                        $words[$i] = str_replace("<", "<!--<", $words[$i]);
256                        $words[$i] = str_replace(">", ">-->", $words[$i]);
257                }
258        }//end for $i
259
260        $words = flattenArray($words); //flatten the array to be one dimensional.
261        $numResults = count($words); //the number of elements in the array after it's been flattened.
262
263        $string = ""; //return string
264
265        //if there were no misspellings, start the string with a 0.
266        if($misspelledCount == 0)
267        {
268                $string = "0";
269        }
270
271        else //else, there were misspellings, start the string with a 1.
272        {
273                $string = "1";
274        }
275
276        // Concatenate all the words/tags/etc. back into a string and append it to the result.
277        $string .= implode('', $words);
278
279        $string = preg_replace("/<!--</i", "<", $string);  //Retira os comentários das tags HTML
280        $string = preg_replace("/>-->/i", ">", $string);
281
282        //$cp->set_data(utf8_encode($string));  //return value - string containing all the markup for the misspelled words.
283        //$cp->basenode->set_enconding('ISO-8859-1');
284
285       // echo $string;
286        $cp->set_encoding('ISO-8859-1');
287        $cp->set_data($string);
288
289} // end spellCheck
290
291
292/*************************************************************
293 * addWord($str)
294 *
295 * This function adds a word to the custom dictionary
296 *
297 * @param $str The word to be added
298 *************************************************************/
299function addWord($str)
300{
301        global $editablePersonalDict;
302        //global $pspell_link; //the global link to the pspell module
303        $pspell_link = pspell_new("pt_BR");
304        global $cp; //the CPAINT object
305        $retVal = "";
306        pspell_add_to_personal($pspell_link, $str);
307        if($editablePersonalDict && pspell_save_wordlist($pspell_link))
308        {
309                $retVal = "Save successful!";
310        }
311
312        else
313        {
314                $retVal = "Save Failed!";
315        }
316
317        $cp->set_data($retVal);
318} // end addWord
319
320
321
322/*************************************************************
323 * flattenArray($array)
324 *
325 * The flattenArray function is a recursive function that takes a
326 * multidimensional array and flattens it to be a one-dimensional
327 * array.  The one-dimensional flattened array is returned.
328 *
329 * $array - The array to be flattened.
330 *
331 *************************************************************/
332function flattenArray($array)
333{
334        $flatArray = array();
335        foreach($array as $subElement)
336        {
337        if(is_array($subElement))
338                {
339                        $flatArray = array_merge($flatArray, flattenArray($subElement));
340                }
341                else
342                {
343                        $flatArray[] = $subElement;
344                }
345        }
346
347        return $flatArray;
348} // end flattenArray
349
350
351/*************************************************************
352 * stripslashes_custom($string)
353 *
354 * This is a custom stripslashes function that only strips
355 * the slashes if magic quotes are on.  This is written for
356 * compatibility with other servers in the event someone doesn't
357 * have magic quotes on.
358 *
359 * $string - The string that might need the slashes stripped.
360 *
361 *************************************************************/
362function stripslashes_custom($string)
363{
364        if(get_magic_quotes_gpc())
365        {
366                return stripslashes($string);
367        }
368        else
369        {
370                return $string;
371        }
372} // end stripslashes_custom
373
374/*************************************************************
375 * addslashes_custom($string)
376 *
377 * This is a custom addslashes function that only adds
378 * the slashes if magic quotes are off.  This is written for
379 * compatibility with other servers in the event someone doesn't
380 * have magic quotes on.
381 *
382 * $string - The string that might need the slashes added.
383 *
384 *************************************************************/
385function addslashes_custom($string)
386{
387        if(!get_magic_quotes_gpc())
388        {
389                return addslashes($string);
390        }
391        else
392        {
393                return $string;
394        }
395} // end addslashes_custom
396
397
398/*************************************************************
399 * remove_word_junk($t)
400 *
401 * This function strips out all the crap that Word tries to
402 * add to it's text in the even someone pastes in code from
403 * Word.
404 *
405 * $t - The text to be cleaned
406 *
407 *************************************************************/
408function remove_word_junk($t)
409{
410        $a=array(
411        "\xe2\x80\x9c"=>'"',
412        "\xe2\x80\x9d"=>'"',
413        "\xe2\x80\x99"=>"'",
414        "\xe2\x80\xa6"=>"...",
415        "\xe2\x80\x98"=>"'",
416        "\xe2\x80\x94"=>"---",
417        "\xe2\x80\x93"=>"--",
418        "\x85"=>"...",
419        "\221"=>"'",
420        "\222"=>"'",
421        "\223"=>'"',
422        "\224"=>'"',
423        "\x97"=>"---",
424        "\x96"=>"--"
425        );
426
427        foreach($a as $k=>$v){
428                $oa[]=$k;
429                $ra[]=$v;
430        }
431
432        $t=trim(str_replace($oa,$ra,$t));
433        return $t;
434
435} // end remove_word_junk
436
437
438/*************************************************************
439 * switchText($string)
440 *
441 * This function prepares the text to be sent back to the text
442 * box from the div.  The comments are removed and breaks are
443 * converted back into \n's.  All the html tags that the user
444 * might have entered that aren't on the approved list:
445 * <p><br><a><b><strong><i><small><ul><li> are stripped out.
446 * The user-entered returns have already been replaced with
447 * $u2026 so that they can be preserved.  I replace all the
448 * \n's that might have been added by the browser (Firefox does
449 * this in trying to pretty up the HTML) with " " so that
450 * everything will look the way it did when the user typed it
451 * in the box the first time.
452 *
453 * $string - The string of html from the div that will be sent
454 *           back to the text box.
455 *
456 *************************************************************/
457function switchText($string)
458{
459
460        $string = utf8_decode($string);
461
462        global $allowed_html;
463        global $cp; //the CPAINT object
464        $string = remove_word_junk($string);
465        $string = preg_replace("/<!--/", "", $string);
466        $string = preg_replace("/-->/", "", $string);
467        $string = preg_replace("/\r?\n/", " ", $string);
468        $string = stripslashes_custom($string); //we only need to strip slashes if magic quotes are on
469        //$string = strip_tags($string, $allowed_html); //Removed. Accept all HTML tags.
470        $string = preg_replace('{&lt;/?span.*?&gt;}i', '', $string);
471       
472        //$string = html_entity_decode($string);
473
474       
475
476
477        $cp->set_encoding('ISO-8859-1');
478        $cp->set_data($string); //the return value
479
480
481} // end switchText
482
483?>
Note: See TracBrowser for help on using the repository browser.