source: sandbox/expressoMail1_2/corretor_ortografico/spell_checker/spell_checker.php @ 2375

Revision 2375, 15.8 KB checked in by paula.franceschini, 14 years ago (diff)

Ticket #891 - adicionando modulo do corretor ortografico

Line 
1<?php
2/**********************************************************************************************
3 * AJAX Spell Checker - Version 2.8
4 * (C) 2005 - Garrison Locke
5 *
6 * This spell checker is built in the style of the Gmail spell
7 * checker.  It uses AJAX to communicate with the backend without
8 * requiring the page be reloaded.  If you use this code, please
9 * give me credit and a link to my site would be nice.
10 * http://www.broken-notebook.com.
11 *
12 * Copyright (c) 2005, Garrison Locke
13 * All rights reserved.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions are met:
17 *
18 *   * Redistributions of source code must retain the above copyright notice,
19 *     this list of conditions and the following disclaimer.
20 *   * Redistributions in binary form must reproduce the above copyright notice,
21 *     this list of conditions and the following disclaimer in the documentation
22 *     and/or other materials provided with the distribution.
23 *   * Neither the name of the http://www.broken-notebook.com nor the names of its
24 *     contributors may be used to endorse or promote products derived from this
25 *     software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
29 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
30 * IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
31 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
32 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
34 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
36 * OF SUCH DAMAGE.
37 *
38 ***********************************************************************************************/
39
40// User-configurable list of allowed HTML tags and attributes.
41// Thanks to Jake Olefsky for this little addition
42$allowed_html = '<strong><small><p><br><a><b><u><i><img><code><ul><ol><li>';
43
44// Set the max number of suggestions to return at a time.
45define('MAX_SUGGESTIONS', 10);
46
47// Set whether to use a personal dictionary.
48$usePersonalDict = true;
49
50//Set whether users are allowed to update the personal dictionary.
51$editablePersonalDict = true;
52
53// If using a personal dictionary, set the path to it.  Default is in the
54// personal_dictionary subdirectory of the location of spell_checker.php.
55$path_to_personal_dictionary = dirname(__FILE__) . "/personal_dictionary/personal_dictionary.txt";
56
57//If pspell doesn't exist, then include the pspell wrapper for aspell.
58if(!function_exists('pspell_suggest'))
59{
60        // Set the path to aspell if you need to use it.
61        define('ASPELL_BIN','/usr/bin/aspell');
62        require_once ("pspell_comp.php");
63}
64
65// Create and configure a link to the pspell module.
66
67//$pspell_config = pspell_config_create("en");
68pspell_config_mode($pspell_config, PSPELL_FAST);
69
70if($usePersonalDict)
71{
72        // Allows the use of a custom dictionary (Thanks to Dylan Thurston for this addition).
73        pspell_config_personal($pspell_config, $path_to_personal_dictionary);
74}
75
76//$pspell_link = pspell_new_config($pspell_config);
77$pspell_link = pspell_new("pt_BR");
78
79require_once("cpaint/cpaint2.inc.php"); //AJAX library file
80
81$cp = new cpaint();
82$cp->register('showSuggestions');
83$cp->register('spellCheck');
84$cp->register('switchText');
85$cp->register('addWord');
86$cp->start();
87$cp->return_data();
88
89
90/*************************************************************
91 * showSuggestions($word, $id)
92 *
93 * The showSuggestions function creates the list of up to 10
94 * suggestions to return for the given misspelled word.
95 *
96 * $word - The misspelled word that was clicked on
97 * $id - The id of the span containing the misspelled word.
98 *
99 *************************************************************/
100function showSuggestions($word, $id)
101{
102        global $editablePersonalDict; //bool to set editability of personal dictionary
103        global $pspell_link; //the global link to the pspell module
104        global $cp; //the CPAINT object
105       
106        $retVal = "";
107       
108        $suggestions = pspell_suggest($pspell_link, $word);  //an array of all the suggestions that psepll returns for $word.
109       
110        // If the number of suggestions returned by pspell is less than the maximum
111        // number, just use the number of suggestions returned.
112        $numSuggestions = count($suggestions);
113        $tmpNum = min($numSuggestions, MAX_SUGGESTIONS);
114                       
115        if($tmpNum > 0)
116        {
117                //this creates the table of suggestions.
118                //in the onclick event it has a call to the replaceWord javascript function which does the actual replacing on the page
119                for($i=0; $i<$tmpNum; $i++)
120                {
121                        $retVal .= "<div class=\"suggestion\" onclick=\"replaceWord('" . addslashes_custom($id) . "', '" . addslashes(utf8_encode($suggestions[$i])) . "'); return false;\">" . utf8_encode($suggestions[$i]) . "</div>";
122                }
123       
124                if($editablePersonalDict)
125                {
126                        $retVal .= "<div class=\"addtoDictionary\" onclick=\"addWord('" . addslashes_custom($id) . "'); return false;\">Add To Dictionary</div>";
127                }
128        }
129        else
130        {
131                $retVal .= "No Suggestions";
132        }
133       
134        $cp->set_data($retVal);  //the return value - a string containing the table of suggestions.
135       
136} // end showSuggestions
137
138
139/*************************************************************
140 * spellCheck($string)
141 *
142 * The spellCheck function takes the string of text entered
143 * in the text box and spell checks it.  It splits the text
144 * on anything inside of < > in order to prevent html from being
145 * spell checked.  Then any text is split on spaces so that only
146 * one word is spell checked at a time.  This creates a multidimensional
147 * array.  The array is flattened.  The array is looped through
148 * ignoring the html lines and spell checking the others.  If a word
149 * is misspelled, code is wrapped around it to highlight it and to
150 * make it clickable to show the user the suggestions for that
151 * misspelled word.
152 *
153 * $string - The string of text from the text box that is to be
154 *           spell checked.
155 *
156 *************************************************************/
157function spellCheck($string, $varName)
158{
159        global $pspell_link; //the global link to the pspell module
160        global $cp; //the CPAINT object
161        $retVal = "";
162
163        //$string = stripslashes_custom($string); //we only need to strip slashes if magic quotes are on
164
165        $string = remove_word_junk($string);       
166
167        //make all the returns in the text look the same
168        $string = preg_replace("/\r?\n/", "\n", $string);
169   
170        //splits the string on any html tags, preserving the tags and putting them in the $words array
171        $words = preg_split("/(<[^<>]*>)/", $string, -1, PREG_SPLIT_DELIM_CAPTURE);
172   
173        $numResults = count($words); //the number of elements in the array.
174
175        $misspelledCount = 0;   
176   
177        //this loop looks through the words array and splits any lines of text that aren't html tags on space, preserving the spaces.
178        for($i=0; $i<$numResults; $i++){
179                // Words alternate between real words and html tags, starting with words.
180                if(($i & 1) == 0) // Even-numbered entries are word sets.
181                {
182                        $words[$i] = preg_split("/(\s+)/", $words[$i], -1, PREG_SPLIT_DELIM_CAPTURE); //then split it on the spaces
183
184                        // Now go through each word and link up the misspelled ones.
185                        $numWords = count($words[$i]);
186                        for($j=0; $j<$numWords; $j++)
187                        {
188                                //preg_match("/[A-Z']{1,16}/i", $words[$i][$j], $tmp); //get the word that is in the array slot $i
189                                $tmp = split(" ", $words[$i][$j]);
190                                $tmpWord = utf8_encode($tmp[0]); //should only have one element in the array anyway, so it's just assign it to $tmpWord
191                               
192                                //And we replace the word in the array with the span that highlights it and gives it an onClick parameter to show the suggestions.
193                                if(!pspell_check($pspell_link, $tmpWord))
194                                {
195                                        $onClick = "onclick=\"setCurrentObject(" . $varName . "); showSuggestions('" . addslashes($tmpWord) . "', '" . $varName . "_" . $misspelledCount . "_" . addslashes($tmpWord) . "'); return false;\"";
196                                        $words[$i][$j] = str_replace($tmpWord, "<span " . $onClick . " id=\"" . $varName . "_" . $misspelledCount . "_" . $tmpWord . "\" class=\"highlight\">" . stripslashes($tmpWord) . "</span>", $words[$i][$j]);
197                                        $misspelledCount++;
198                                }
199                               
200                                $words[$i][$j] = str_replace("\n", "<br />", $words[$i][$j]); //replace any breaks with <br />'s, for html display
201                        }//end for $j
202                }//end if
203               
204                else //otherwise, we wrap all the html tags in comments to make them not displayed
205                {
206                        $words[$i] = str_replace("<", "<!--<", $words[$i]);
207                        $words[$i] = str_replace(">", ">-->", $words[$i]);
208                }
209        }//end for $i
210
211        $words = flattenArray($words); //flatten the array to be one dimensional.
212        $numResults = count($words); //the number of elements in the array after it's been flattened.
213       
214        $string = ""; //return string 
215   
216        //if there were no misspellings, start the string with a 0.
217        if($misspelledCount == 0)
218        {
219                $string = "0";
220        }
221       
222        else //else, there were misspellings, start the string with a 1.
223        {
224                $string = "1";
225        }
226       
227        // Concatenate all the words/tags/etc. back into a string and append it to the result.
228        $string .= implode('', $words);
229       
230        //remove comments from around all html tags except for <a> because we don't want the links to be clickable
231        //but we want the html to be rendered in the div for preview purposes.
232        $string = preg_replace("/<!--<br( [^>]*)?>-->/i", "<br />", $string);
233        $string = preg_replace("/<!--<p( [^>]*)?>-->/i", "<p>", $string);
234        $string = preg_replace("/<!--<\/p>-->/i", "</p>", $string);
235        $string = preg_replace("/<!--<b( [^>]*)?>-->/i", "<b>", $string);
236        $string = preg_replace("/<!--<\/b>-->/i", "</b>", $string);
237        $string = preg_replace("/<!--<strong( [^>]*)?>-->/i", "<strong>", $string);
238        $string = preg_replace("/<!--<\/strong>-->/i", "</strong>", $string);
239        $string = preg_replace("/<!--<i( [^>]*)?>-->/i", "<i>", $string);
240        $string = preg_replace("/<!--<\/i>-->/i", "</i>", $string);
241        $string = preg_replace("/<!--<small( [^>]*)?>-->/i", "<small>", $string);
242        $string = preg_replace("/<!--<\/small>-->/i", "</small>", $string);
243        $string = preg_replace("/<!--<ul( [^>]*)?>-->/i", "<ul>", $string);
244        $string = preg_replace("/<!--<\/ul>-->/i", "</ul>", $string);
245        $string = preg_replace("/<!--<li( [^>]*)?>-->/i", "<li>", $string);
246        $string = preg_replace("/<!--<\/li>-->/i", "</li>", $string);
247        $string = preg_replace("/<!--<img (?:[^>]+ )?src=\"?([^\"]*)\"?[^>]*>-->/i", "<img src=\"\\1\" />", $string);
248               
249        $cp->set_data($string);  //return value - string containing all the markup for the misspelled words.
250
251} // end spellCheck
252
253
254/*************************************************************
255 * addWord($str)
256 *
257 * This function adds a word to the custom dictionary
258 *
259 * @param $str The word to be added
260 *************************************************************/
261function addWord($str)
262{
263        global $editablePersonalDict;
264        global $pspell_link; //the global link to the pspell module
265        global $cp; //the CPAINT object
266        $retVal = "";
267        pspell_add_to_personal($pspell_link, $str);
268        if($editablePersonalDict && pspell_save_wordlist($pspell_link))
269        {
270                $retVal = "Save successful!";
271        }
272       
273        else
274        {
275                $retVal = "Save Failed!";
276        }
277       
278        $cp->set_data($retVal);
279} // end addWord
280
281
282
283/*************************************************************
284 * flattenArray($array)
285 *
286 * The flattenArray function is a recursive function that takes a
287 * multidimensional array and flattens it to be a one-dimensional
288 * array.  The one-dimensional flattened array is returned.
289 *
290 * $array - The array to be flattened.
291 *
292 *************************************************************/
293function flattenArray($array)
294{
295        $flatArray = array();
296        foreach($array as $subElement)
297        {
298        if(is_array($subElement))
299                {
300                        $flatArray = array_merge($flatArray, flattenArray($subElement));
301                }
302                else
303                {
304                        $flatArray[] = $subElement;
305                }
306        }
307       
308        return $flatArray;
309} // end flattenArray
310
311
312/*************************************************************
313 * stripslashes_custom($string)
314 *
315 * This is a custom stripslashes function that only strips
316 * the slashes if magic quotes are on.  This is written for
317 * compatibility with other servers in the event someone doesn't
318 * have magic quotes on.
319 *
320 * $string - The string that might need the slashes stripped.
321 *
322 *************************************************************/
323function stripslashes_custom($string)
324{
325        if(get_magic_quotes_gpc())
326        {
327                return stripslashes($string);
328        }
329        else
330        {
331                return $string;
332        }
333} // end stripslashes_custom
334
335/*************************************************************
336 * addslashes_custom($string)
337 *
338 * This is a custom addslashes function that only adds
339 * the slashes if magic quotes are off.  This is written for
340 * compatibility with other servers in the event someone doesn't
341 * have magic quotes on.
342 *
343 * $string - The string that might need the slashes added.
344 *
345 *************************************************************/
346function addslashes_custom($string)
347{
348        if(!get_magic_quotes_gpc())
349        {
350                return addslashes($string);
351        }
352        else
353        {
354                return $string;
355        }
356} // end addslashes_custom
357
358
359/*************************************************************
360 * remove_word_junk($t)
361 *
362 * This function strips out all the crap that Word tries to
363 * add to it's text in the even someone pastes in code from
364 * Word.
365 *
366 * $t - The text to be cleaned
367 *
368 *************************************************************/
369function remove_word_junk($t)
370{
371        $a=array(
372        "\xe2\x80\x9c"=>'"',
373        "\xe2\x80\x9d"=>'"',
374        "\xe2\x80\x99"=>"'",
375        "\xe2\x80\xa6"=>"...",
376        "\xe2\x80\x98"=>"'",
377        "\xe2\x80\x94"=>"---",
378        "\xe2\x80\x93"=>"--",
379        "\x85"=>"...",
380        "\221"=>"'",
381        "\222"=>"'",
382        "\223"=>'"',
383        "\224"=>'"',
384        "\x97"=>"---",
385        "\x96"=>"--"
386        );
387
388        foreach($a as $k=>$v){
389                $oa[]=$k;
390                $ra[]=$v;
391        }
392       
393        $t=trim(str_replace($oa,$ra,$t));
394        return $t;
395
396} // end remove_word_junk
397
398
399/*************************************************************
400 * switchText($string)
401 *
402 * This function prepares the text to be sent back to the text
403 * box from the div.  The comments are removed and breaks are
404 * converted back into \n's.  All the html tags that the user
405 * might have entered that aren't on the approved list:
406 * <p><br><a><b><strong><i><small><ul><li> are stripped out.
407 * The user-entered returns have already been replaced with
408 * $u2026 so that they can be preserved.  I replace all the
409 * \n's that might have been added by the browser (Firefox does
410 * this in trying to pretty up the HTML) with " " so that
411 * everything will look the way it did when the user typed it
412 * in the box the first time.
413 *
414 * $string - The string of html from the div that will be sent
415 *           back to the text box.
416 *
417 *************************************************************/
418function switchText($string)
419{
420        global $allowed_html;
421        global $cp; //the CPAINT object
422        $string = remove_word_junk($string);
423        $string = preg_replace("/<!--/", "", $string);
424        $string = preg_replace("/-->/", "", $string);   
425        $string = preg_replace("/\r?\n/", " ", $string);
426        $string = stripslashes_custom($string); //we only need to strip slashes if magic quotes are on
427        $string = strip_tags($string, $allowed_html);
428        $string = preg_replace('{&lt;/?span.*?&gt;}i', '', $string);
429        $string = html_entity_decode($string);
430        $cp->set_data($string); //the return value
431       
432} // end switchText
433
434?>
Note: See TracBrowser for help on using the repository browser.