[6779] | 1 | #! /usr/bin/perl -w |
---|
| 2 | |
---|
| 3 | # Spell Checker Plugin for HTMLArea-3.0 |
---|
| 4 | # Sponsored by www.americanbible.org |
---|
| 5 | # Implementation by Mihai Bazon, http://dynarch.com/mishoo/ |
---|
| 6 | # |
---|
| 7 | # (c) dynarch.com 2003. |
---|
| 8 | # Distributed under the same terms as HTMLArea itself. |
---|
| 9 | # This notice MUST stay intact for use (see license.txt). |
---|
| 10 | # |
---|
| 11 | # $Id: spell-check-logic.cgi,v 1.3 2004/04/20 22:17:10 mipmip Exp $ |
---|
| 12 | |
---|
| 13 | use strict; |
---|
| 14 | use utf8; |
---|
| 15 | use Encode; |
---|
| 16 | use Text::Aspell; |
---|
| 17 | use XML::DOM; |
---|
| 18 | use CGI; |
---|
| 19 | |
---|
| 20 | my $TIMER_start = undef; |
---|
| 21 | eval { |
---|
| 22 | use Time::HiRes qw( gettimeofday tv_interval ); |
---|
| 23 | $TIMER_start = [gettimeofday()]; |
---|
| 24 | }; |
---|
| 25 | # use POSIX qw( locale_h ); |
---|
| 26 | |
---|
| 27 | binmode STDIN, ':utf8'; |
---|
| 28 | binmode STDOUT, ':utf8'; |
---|
| 29 | |
---|
| 30 | my $debug = 0; |
---|
| 31 | |
---|
| 32 | my $speller = new Text::Aspell; |
---|
| 33 | my $cgi = new CGI; |
---|
| 34 | |
---|
| 35 | my $total_words = 0; |
---|
| 36 | my $total_mispelled = 0; |
---|
| 37 | my $total_suggestions = 0; |
---|
| 38 | my $total_words_suggested = 0; |
---|
| 39 | |
---|
| 40 | # FIXME: report a nice error... |
---|
| 41 | die "Can't create speller!" unless $speller; |
---|
| 42 | |
---|
| 43 | my $dict = $cgi->param('dictionary') || $cgi->cookie('dictionary') || 'en'; |
---|
| 44 | |
---|
| 45 | # add configurable option for this |
---|
| 46 | $speller->set_option('lang', $dict); |
---|
| 47 | $speller->set_option('encoding', 'UTF-8'); |
---|
| 48 | #setlocale(LC_CTYPE, $dict); |
---|
| 49 | |
---|
| 50 | # ultra, fast, normal, bad-spellers |
---|
| 51 | # bad-spellers seems to cause segmentation fault |
---|
| 52 | $speller->set_option('sug-mode', 'normal'); |
---|
| 53 | |
---|
| 54 | my %suggested_words = (); |
---|
| 55 | keys %suggested_words = 128; |
---|
| 56 | |
---|
| 57 | my $file_content = decode('UTF-8', $cgi->param('content')); |
---|
| 58 | $file_content = parse_with_dom($file_content); |
---|
| 59 | |
---|
| 60 | my $ck_dictionary = $cgi->cookie(-name => 'dictionary', |
---|
| 61 | -value => $dict, |
---|
| 62 | -expires => '+30d'); |
---|
| 63 | |
---|
| 64 | print $cgi->header(-type => 'text/html; charset: utf-8', |
---|
| 65 | -cookie => $ck_dictionary); |
---|
| 66 | |
---|
| 67 | my $js_suggested_words = make_js_hash(\%suggested_words); |
---|
| 68 | my $js_spellcheck_info = make_js_hash_from_array |
---|
| 69 | ([ |
---|
| 70 | [ 'Total words' , $total_words ], |
---|
| 71 | [ 'Mispelled words' , $total_mispelled . ' in dictionary \"'.$dict.'\"' ], |
---|
| 72 | [ 'Total suggestions' , $total_suggestions ], |
---|
| 73 | [ 'Total words suggested' , $total_words_suggested ], |
---|
| 74 | [ 'Spell-checked in' , defined $TIMER_start ? (tv_interval($TIMER_start) . ' seconds') : 'n/a' ] |
---|
| 75 | ]); |
---|
| 76 | |
---|
| 77 | print qq^<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> |
---|
| 78 | <html> |
---|
| 79 | <head> |
---|
| 80 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> |
---|
| 81 | <link rel="stylesheet" type="text/css" media="all" href="spell-check-style.css" /> |
---|
| 82 | <script type="text/javascript"> |
---|
| 83 | var suggested_words = { $js_suggested_words }; |
---|
| 84 | var spellcheck_info = { $js_spellcheck_info }; </script> |
---|
| 85 | </head> |
---|
| 86 | <body onload="window.parent.finishedSpellChecking();">^; |
---|
| 87 | |
---|
| 88 | print $file_content; |
---|
| 89 | if ($cgi->param('init') eq '1') { |
---|
| 90 | my @dicts = $speller->dictionary_info(); |
---|
| 91 | my $dictionaries = ''; |
---|
| 92 | foreach my $i (@dicts) { |
---|
| 93 | next if $i->{jargon}; |
---|
| 94 | my $name = $i->{name}; |
---|
| 95 | if ($name eq $dict) { |
---|
| 96 | $name = '@'.$name; |
---|
| 97 | } |
---|
| 98 | $dictionaries .= ',' . $name; |
---|
| 99 | } |
---|
| 100 | $dictionaries =~ s/^,//; |
---|
| 101 | print qq^<div id="HA-spellcheck-dictionaries">$dictionaries</div>^; |
---|
| 102 | } |
---|
| 103 | |
---|
| 104 | print '</body></html>'; |
---|
| 105 | |
---|
| 106 | # Perl is beautiful. |
---|
| 107 | sub spellcheck { |
---|
| 108 | my $node = shift; |
---|
| 109 | my $doc = $node->getOwnerDocument; |
---|
| 110 | my $check = sub { # called for each word in the text |
---|
| 111 | # input is in UTF-8 |
---|
| 112 | my $word = shift; |
---|
| 113 | my $already_suggested = defined $suggested_words{$word}; |
---|
| 114 | ++$total_words; |
---|
| 115 | if (!$already_suggested && $speller->check($word)) { |
---|
| 116 | return undef; |
---|
| 117 | } else { |
---|
| 118 | # we should have suggestions; give them back to browser in UTF-8 |
---|
| 119 | ++$total_mispelled; |
---|
| 120 | if (!$already_suggested) { |
---|
| 121 | # compute suggestions for this word |
---|
| 122 | my @suggestions = $speller->suggest($word); |
---|
| 123 | my $suggestions = decode($speller->get_option('encoding'), join(',', @suggestions)); |
---|
| 124 | $suggested_words{$word} = $suggestions; |
---|
| 125 | ++$total_suggestions; |
---|
| 126 | $total_words_suggested += scalar @suggestions; |
---|
| 127 | } |
---|
| 128 | # HA-spellcheck-error |
---|
| 129 | my $err = $doc->createElement('span'); |
---|
| 130 | $err->setAttribute('class', 'HA-spellcheck-error'); |
---|
| 131 | my $tmp = $doc->createTextNode; |
---|
| 132 | $tmp->setNodeValue($word); |
---|
| 133 | $err->appendChild($tmp); |
---|
| 134 | return $err; |
---|
| 135 | } |
---|
| 136 | }; |
---|
| 137 | while ($node->getNodeValue =~ /([\p{IsWord}']+)/) { |
---|
| 138 | my $word = $1; |
---|
| 139 | my $before = $`; |
---|
| 140 | my $after = $'; |
---|
| 141 | my $df = &$check($word); |
---|
| 142 | if (!$df) { |
---|
| 143 | $before .= $word; |
---|
| 144 | } |
---|
| 145 | { |
---|
| 146 | my $parent = $node->getParentNode; |
---|
| 147 | my $n1 = $doc->createTextNode; |
---|
| 148 | $n1->setNodeValue($before); |
---|
| 149 | $parent->insertBefore($n1, $node); |
---|
| 150 | $parent->insertBefore($df, $node) if $df; |
---|
| 151 | $node->setNodeValue($after); |
---|
| 152 | } |
---|
| 153 | } |
---|
| 154 | }; |
---|
| 155 | |
---|
| 156 | sub check_inner_text { |
---|
| 157 | my $node = shift; |
---|
| 158 | my $text = ''; |
---|
| 159 | for (my $i = $node->getFirstChild; defined $i; $i = $i->getNextSibling) { |
---|
| 160 | if ($i->getNodeType == TEXT_NODE) { |
---|
| 161 | spellcheck($i); |
---|
| 162 | } |
---|
| 163 | } |
---|
| 164 | }; |
---|
| 165 | |
---|
| 166 | sub parse_with_dom { |
---|
| 167 | my ($text) = @_; |
---|
| 168 | $text = '<spellchecker>'.$text.'</spellchecker>'; |
---|
| 169 | |
---|
| 170 | my $parser = new XML::DOM::Parser; |
---|
| 171 | if ($debug) { |
---|
| 172 | open(FOO, '>:utf8', '/tmp/foo'); |
---|
| 173 | print FOO $text; |
---|
| 174 | close FOO; |
---|
| 175 | } |
---|
| 176 | my $doc = $parser->parse($text); |
---|
| 177 | my $nodes = $doc->getElementsByTagName('*'); |
---|
| 178 | my $n = $nodes->getLength; |
---|
| 179 | |
---|
| 180 | for (my $i = 0; $i < $n; ++$i) { |
---|
| 181 | my $node = $nodes->item($i); |
---|
| 182 | if ($node->getNodeType == ELEMENT_NODE) { |
---|
| 183 | check_inner_text($node); |
---|
| 184 | } |
---|
| 185 | } |
---|
| 186 | |
---|
| 187 | my $ret = $doc->toString; |
---|
| 188 | $ret =~ s{<spellchecker>(.*)</spellchecker>}{$1}sg; |
---|
| 189 | return $ret; |
---|
| 190 | }; |
---|
| 191 | |
---|
| 192 | sub make_js_hash { |
---|
| 193 | my ($hash) = @_; |
---|
| 194 | my $js_hash = ''; |
---|
| 195 | while (my ($key, $val) = each %$hash) { |
---|
| 196 | $js_hash .= ',' if $js_hash; |
---|
| 197 | $js_hash .= '"'.$key.'":"'.$val.'"'; |
---|
| 198 | } |
---|
| 199 | return $js_hash; |
---|
| 200 | }; |
---|
| 201 | |
---|
| 202 | sub make_js_hash_from_array { |
---|
| 203 | my ($array) = @_; |
---|
| 204 | my $js_hash = ''; |
---|
| 205 | foreach my $i (@$array) { |
---|
| 206 | $js_hash .= ',' if $js_hash; |
---|
| 207 | $js_hash .= '"'.$i->[0].'":"'.$i->[1].'"'; |
---|
| 208 | } |
---|
| 209 | return $js_hash; |
---|
| 210 | }; |
---|