1 | <?php
|
---|
2 | /*
|
---|
3 | This class contains code from rtfclass.php that was written by Markus Fischer and placed by him under
|
---|
4 | GPLv2 License.
|
---|
5 |
|
---|
6 | =======================================NOTES FROM ORIGINAL AUTHOR====================================
|
---|
7 | Rich Text Format - Parsing Class
|
---|
8 | ================================
|
---|
9 |
|
---|
10 | (c) 2000 Markus Fischer
|
---|
11 | <mfischer@josefine.ben.tuwien.ac.at>
|
---|
12 | http://josefine.ben.tuwien.ac.at/~mfischer/
|
---|
13 |
|
---|
14 | Latest versions of this class can always be found at
|
---|
15 | http://josefine.ben.tuwien.ac.at/~mfischer/developing/php/rtf/rtfclass.phps
|
---|
16 | Testing suite is available at
|
---|
17 | http://josefine.ben.tuwien.ac.at/~mfischer/developing/php/rtf/
|
---|
18 |
|
---|
19 | License: GPLv2
|
---|
20 |
|
---|
21 | Specification:
|
---|
22 | http://msdn.microsoft.com/library/default.asp?URL=/library/specs/rtfspec.htm
|
---|
23 |
|
---|
24 | General Notes:
|
---|
25 | ==============
|
---|
26 | Unknown or unspupported control symbols are silently gnored
|
---|
27 |
|
---|
28 | Group stacking is still not supported :(
|
---|
29 | group stack logic implemented; however not really used yet
|
---|
30 | =====================================================================================================
|
---|
31 |
|
---|
32 | It was modified by me (Andreas Brodowski) to allow compressed RTF being uncompressed by code I ported from
|
---|
33 | Java to PHP and adapted according the needs of Z-Push.
|
---|
34 |
|
---|
35 | Currently it is being used to detect empty RTF Streams from Nokia Phones in MfE Clients
|
---|
36 |
|
---|
37 | It needs to be used by other backend writers that needs to have notes in calendar, appointment or tasks
|
---|
38 | objects to be written to their databases since devices send them usually in RTF Format... With Zarafa
|
---|
39 | you can write them directly to DB and Zarafa is doing the conversion job. Other Groupware systems usually
|
---|
40 | don't have this possibility...
|
---|
41 |
|
---|
42 | */
|
---|
43 |
|
---|
44 |
|
---|
45 | class rtf {
|
---|
46 | var $LZRTF_HDR_DATA = "{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}{\\f0\\fnil \\froman \\fswiss \\fmodern \\fscript \\fdecor MS Sans SerifSymbolArialTimes New RomanCourier{\\colortbl\\red0\\green0\\blue0\n\r\\par \\pard\\plain\\f0\\fs20\\b\\i\\u\\tab\\tx";
|
---|
47 | var $LZRTF_HDR_LEN = 207;
|
---|
48 | var $CRC32_TABLE = array( 0x00000000,0x77073096,0xEE0E612C,0x990951BA,0x076DC419,0x706AF48F,0xE963A535,0x9E6495A3,
|
---|
49 | 0x0EDB8832,0x79DCB8A4,0xE0D5E91E,0x97D2D988,0x09B64C2B,0x7EB17CBD,0xE7B82D07,0x90BF1D91,
|
---|
50 | 0x1DB71064,0x6AB020F2,0xF3B97148,0x84BE41DE,0x1ADAD47D,0x6DDDE4EB,0xF4D4B551,0x83D385C7,
|
---|
51 | 0x136C9856,0x646BA8C0,0xFD62F97A,0x8A65C9EC,0x14015C4F,0x63066CD9,0xFA0F3D63,0x8D080DF5,
|
---|
52 | 0x3B6E20C8,0x4C69105E,0xD56041E4,0xA2677172,0x3C03E4D1,0x4B04D447,0xD20D85FD,0xA50AB56B,
|
---|
53 | 0x35B5A8FA,0x42B2986C,0xDBBBC9D6,0xACBCF940,0x32D86CE3,0x45DF5C75,0xDCD60DCF,0xABD13D59,
|
---|
54 | 0x26D930AC,0x51DE003A,0xC8D75180,0xBFD06116,0x21B4F4B5,0x56B3C423,0xCFBA9599,0xB8BDA50F,
|
---|
55 | 0x2802B89E,0x5F058808,0xC60CD9B2,0xB10BE924,0x2F6F7C87,0x58684C11,0xC1611DAB,0xB6662D3D,
|
---|
56 | 0x76DC4190,0x01DB7106,0x98D220BC,0xEFD5102A,0x71B18589,0x06B6B51F,0x9FBFE4A5,0xE8B8D433,
|
---|
57 | 0x7807C9A2,0x0F00F934,0x9609A88E,0xE10E9818,0x7F6A0DBB,0x086D3D2D,0x91646C97,0xE6635C01,
|
---|
58 | 0x6B6B51F4,0x1C6C6162,0x856530D8,0xF262004E,0x6C0695ED,0x1B01A57B,0x8208F4C1,0xF50FC457,
|
---|
59 | 0x65B0D9C6,0x12B7E950,0x8BBEB8EA,0xFCB9887C,0x62DD1DDF,0x15DA2D49,0x8CD37CF3,0xFBD44C65,
|
---|
60 | 0x4DB26158,0x3AB551CE,0xA3BC0074,0xD4BB30E2,0x4ADFA541,0x3DD895D7,0xA4D1C46D,0xD3D6F4FB,
|
---|
61 | 0x4369E96A,0x346ED9FC,0xAD678846,0xDA60B8D0,0x44042D73,0x33031DE5,0xAA0A4C5F,0xDD0D7CC9,
|
---|
62 | 0x5005713C,0x270241AA,0xBE0B1010,0xC90C2086,0x5768B525,0x206F85B3,0xB966D409,0xCE61E49F,
|
---|
63 | 0x5EDEF90E,0x29D9C998,0xB0D09822,0xC7D7A8B4,0x59B33D17,0x2EB40D81,0xB7BD5C3B,0xC0BA6CAD,
|
---|
64 | 0xEDB88320,0x9ABFB3B6,0x03B6E20C,0x74B1D29A,0xEAD54739,0x9DD277AF,0x04DB2615,0x73DC1683,
|
---|
65 | 0xE3630B12,0x94643B84,0x0D6D6A3E,0x7A6A5AA8,0xE40ECF0B,0x9309FF9D,0x0A00AE27,0x7D079EB1,
|
---|
66 | 0xF00F9344,0x8708A3D2,0x1E01F268,0x6906C2FE,0xF762575D,0x806567CB,0x196C3671,0x6E6B06E7,
|
---|
67 | 0xFED41B76,0x89D32BE0,0x10DA7A5A,0x67DD4ACC,0xF9B9DF6F,0x8EBEEFF9,0x17B7BE43,0x60B08ED5,
|
---|
68 | 0xD6D6A3E8,0xA1D1937E,0x38D8C2C4,0x4FDFF252,0xD1BB67F1,0xA6BC5767,0x3FB506DD,0x48B2364B,
|
---|
69 | 0xD80D2BDA,0xAF0A1B4C,0x36034AF6,0x41047A60,0xDF60EFC3,0xA867DF55,0x316E8EEF,0x4669BE79,
|
---|
70 | 0xCB61B38C,0xBC66831A,0x256FD2A0,0x5268E236,0xCC0C7795,0xBB0B4703,0x220216B9,0x5505262F,
|
---|
71 | 0xC5BA3BBE,0xB2BD0B28,0x2BB45A92,0x5CB36A04,0xC2D7FFA7,0xB5D0CF31,0x2CD99E8B,0x5BDEAE1D,
|
---|
72 | 0x9B64C2B0,0xEC63F226,0x756AA39C,0x026D930A,0x9C0906A9,0xEB0E363F,0x72076785,0x05005713,
|
---|
73 | 0x95BF4A82,0xE2B87A14,0x7BB12BAE,0x0CB61B38,0x92D28E9B,0xE5D5BE0D,0x7CDCEFB7,0x0BDBDF21,
|
---|
74 | 0x86D3D2D4,0xF1D4E242,0x68DDB3F8,0x1FDA836E,0x81BE16CD,0xF6B9265B,0x6FB077E1,0x18B74777,
|
---|
75 | 0x88085AE6,0xFF0F6A70,0x66063BCA,0x11010B5C,0x8F659EFF,0xF862AE69,0x616BFFD3,0x166CCF45,
|
---|
76 | 0xA00AE278,0xD70DD2EE,0x4E048354,0x3903B3C2,0xA7672661,0xD06016F7,0x4969474D,0x3E6E77DB,
|
---|
77 | 0xAED16A4A,0xD9D65ADC,0x40DF0B66,0x37D83BF0,0xA9BCAE53,0xDEBB9EC5,0x47B2CF7F,0x30B5FFE9,
|
---|
78 | 0xBDBDF21C,0xCABAC28A,0x53B39330,0x24B4A3A6,0xBAD03605,0xCDD70693,0x54DE5729,0x23D967BF,
|
---|
79 | 0xB3667A2E,0xC4614AB8,0x5D681B02,0x2A6F2B94,0xB40BBE37,0xC30C8EA1,0x5A05DF1B,0x2D02EF8D,
|
---|
80 | );
|
---|
81 |
|
---|
82 | var $rtf; // rtf core stream
|
---|
83 | var $rtf_len; // length in characters of the stream (get performace due avoiding calling strlen everytime)
|
---|
84 | var $err = array(); // array of error message, no entities on no error
|
---|
85 |
|
---|
86 | var $wantXML; // convert to XML
|
---|
87 | var $wantHTML; // convert to HTML
|
---|
88 | var $wantASCII; // convert to HTML
|
---|
89 |
|
---|
90 | // the only variable which should be accessed from the outside
|
---|
91 | var $out; // output data stream (depends on which $wantXXXXX is set to true
|
---|
92 | var $outstyles; // htmlified styles (generated after parsing if wantHTML
|
---|
93 | var $styles; // if wantHTML, stylesheet definitions are put in here
|
---|
94 |
|
---|
95 | // internal parser variables --------------------------------
|
---|
96 | // control word variables
|
---|
97 | var $cword; // holds the current (or last) control word, depending on $cw
|
---|
98 | var $cw; // are we currently parsing a control word ?
|
---|
99 | var $cfirst; // could this be the first character ? so watch out for control symbols
|
---|
100 |
|
---|
101 | var $flags = array(); // parser flags
|
---|
102 |
|
---|
103 | var $queue; // every character which is no sepcial char, not belongs to a control word/symbol; is generally considered being 'plain'
|
---|
104 |
|
---|
105 | var $stack = array(); // group stack
|
---|
106 |
|
---|
107 | /* keywords which don't follw the specification (used by Word '97 - 2000) */
|
---|
108 | // not yet used
|
---|
109 | var $control_exception = array(
|
---|
110 | "clFitText",
|
---|
111 | "clftsWidth(-?[0-9]+)?",
|
---|
112 | "clNoWrap(-?[0-9]+)?",
|
---|
113 | "clwWidth(-?[0-9]+)?",
|
---|
114 | "tdfrmtxtBottom(-?[0-9]+)?",
|
---|
115 | "tdfrmtxtLeft(-?[0-9]+)?",
|
---|
116 | "tdfrmtxtRight(-?[0-9]+)?",
|
---|
117 | "tdfrmtxtTop(-?[0-9]+)?",
|
---|
118 | "trftsWidthA(-?[0-9]+)?",
|
---|
119 | "trftsWidthB(-?[0-9]+)?",
|
---|
120 | "trftsWidth(-?[0-9]+)?",
|
---|
121 | "trwWithA(-?[0-9]+)?",
|
---|
122 | "trwWithB(-?[0-9]+)?",
|
---|
123 | "trwWith(-?[0-9]+)?",
|
---|
124 | "spectspecifygen(-?[0-9]+)?",
|
---|
125 | );
|
---|
126 |
|
---|
127 | var $charset_table = array(
|
---|
128 | "0" => "ANSI",
|
---|
129 | "1" => "Default",
|
---|
130 | "2" => "Symbol",
|
---|
131 | "77" => "Mac",
|
---|
132 | "128" => "Shift Jis",
|
---|
133 | "129" => "Hangul",
|
---|
134 | "130" => "Johab",
|
---|
135 | "134" => "GB2312",
|
---|
136 | "136" => "Big5",
|
---|
137 | "161" => "Greek",
|
---|
138 | "162" => "Turkish",
|
---|
139 | "163" => "Vietnamese",
|
---|
140 | "177" => "Hebrew",
|
---|
141 | "178" => "Arabic",
|
---|
142 | "179" => "Arabic Traditional",
|
---|
143 | "180" => "Arabic user",
|
---|
144 | "181" => "Hebrew user",
|
---|
145 | "186" => "Baltic",
|
---|
146 | "204" => "Russian",
|
---|
147 | "222" => "Thai",
|
---|
148 | "238" => "Eastern European",
|
---|
149 | "255" => "PC 437",
|
---|
150 | "255" => "OEM",
|
---|
151 | );
|
---|
152 |
|
---|
153 | /* note: the only conversion table used */
|
---|
154 | var $fontmodifier_table = array(
|
---|
155 | "bold" => "b",
|
---|
156 | "italic" => "i",
|
---|
157 | "underlined" => "u",
|
---|
158 | "strikethru" => "strike",
|
---|
159 | );
|
---|
160 |
|
---|
161 |
|
---|
162 | function rtf() {
|
---|
163 | $this->rtf_len = 0;
|
---|
164 | $this->rtf = '';
|
---|
165 |
|
---|
166 | $this->out = '';
|
---|
167 | }
|
---|
168 |
|
---|
169 | // loadrtf - load the raw rtf data to be converted by this class
|
---|
170 | // data = the raw rtf
|
---|
171 | function loadrtf($data) {
|
---|
172 | if (($this->rtf = $this->uncompress($data))) {
|
---|
173 | $this->rtf_len = strlen($this->rtf);
|
---|
174 | };
|
---|
175 | if($this->rtf_len == 0) {
|
---|
176 | debugLog("No data in stream found");
|
---|
177 | return false;
|
---|
178 | };
|
---|
179 | return true;
|
---|
180 | }
|
---|
181 |
|
---|
182 | function output($typ) {
|
---|
183 | switch($typ) {
|
---|
184 | case "ascii": $this->wantASCII = true; break;
|
---|
185 | case "xml": $this->wantXML = true; break;
|
---|
186 | case "html": $this->wantHTML = true; break;
|
---|
187 | default: break;
|
---|
188 | }
|
---|
189 | }
|
---|
190 |
|
---|
191 | // uncompress - uncompress compressed rtf data
|
---|
192 | // src = the compressed raw rtf in LZRTF format
|
---|
193 | function uncompress($src) {
|
---|
194 | $header = unpack("LcSize/LuSize/Lmagic/Lcrc32",substr($src,0,16));
|
---|
195 | $in = 16;
|
---|
196 | if ($header['cSize'] != strlen($src)-4) {
|
---|
197 | debugLog("Stream too short");
|
---|
198 | return false;
|
---|
199 | }
|
---|
200 |
|
---|
201 | if ($header['crc32'] != $this->LZRTFCalcCRC32($src,16,(($header['cSize']+4))-16)) {
|
---|
202 | debugLog("CRC MISMATCH");
|
---|
203 | return false;
|
---|
204 | }
|
---|
205 |
|
---|
206 | if ($header['magic'] == 0x414c454d) { // uncompressed RTF - return as is.
|
---|
207 | $dest = substr($src,$in,$header['uSize']);
|
---|
208 | } else if ($header['magic'] == 0x75465a4c) { // compressed RTF - uncompress.
|
---|
209 | $dst = $this->LZRTF_HDR_DATA;
|
---|
210 | $out = $this->LZRTF_HDR_LEN;
|
---|
211 | $oblen = $this->LZRTF_HDR_LEN + $header['uSize'];
|
---|
212 | $flagCount = 0;
|
---|
213 | $flags = 0;
|
---|
214 | while ($out<$oblen) {
|
---|
215 | $flags = ($flagCount++ % 8 == 0) ? ord($src{$in++}) : $flags >> 1;
|
---|
216 | if (($flags & 1) == 1) {
|
---|
217 | $offset = ord($src{$in++});
|
---|
218 | $length = ord($src{$in++});
|
---|
219 | $offset = ($offset << 4) | ($length >> 4);
|
---|
220 | $length = ($length & 0xF) + 2;
|
---|
221 | $offset = (int)($out / 4096) * 4096 + $offset;
|
---|
222 | if ($offset >= $out) $offset -= 4096;
|
---|
223 | $end = $offset + $length;
|
---|
224 | while ($offset < $end) {
|
---|
225 | $dst{$out++} = $dst{$offset++};
|
---|
226 | };
|
---|
227 | } else {
|
---|
228 | $dst{$out++} = $src{$in++};
|
---|
229 | }
|
---|
230 | }
|
---|
231 | $src = $dst;
|
---|
232 | $dest = substr($src,$this->LZRTF_HDR_LEN,$header['uSize']);
|
---|
233 | } else { // unknown magic - returfn false (please report if this ever happens)
|
---|
234 | debugLog("Unknown Magic");
|
---|
235 | return false;
|
---|
236 | }
|
---|
237 |
|
---|
238 | return $dest;
|
---|
239 | }
|
---|
240 |
|
---|
241 | // LZRTFCalcCRC32 - calculates the CRC32 of the LZRTF data part
|
---|
242 | // buf = the whole rtf data part
|
---|
243 | // off = start point of crc calculation
|
---|
244 | // len = length of data to calculate CRC for
|
---|
245 | // function is necessary since in RTF there is no XOR 0xffffffff being done (said to be 0x00 unsafe CRC32 calculation
|
---|
246 | function LZRTFCalcCRC32($buf, $off, $len) {
|
---|
247 | $c=0;
|
---|
248 | $end = $off + $len;
|
---|
249 | for($i=$off;$i < $end;$i++) {
|
---|
250 | $c=$this->CRC32_TABLE[($c ^ ord($buf{$i})) & 0xFF] ^ (($c >> 8) & 0x00ffffff);
|
---|
251 | }
|
---|
252 | return $c;
|
---|
253 | }
|
---|
254 |
|
---|
255 | function parserInit() { /* Default values according to the specs */
|
---|
256 | $this->flags = array(
|
---|
257 | "fontsize" => 24,
|
---|
258 | "beginparagraph" => true,
|
---|
259 | );
|
---|
260 | }
|
---|
261 |
|
---|
262 | function parseControl($control, $parameter) {
|
---|
263 | switch ($control) {
|
---|
264 | case "fonttbl": // font table definition start
|
---|
265 | $this->flags["fonttbl"] = true; // signal fonttable control words they are allowed to behave as expected
|
---|
266 | break;
|
---|
267 | case "f": // define or set font
|
---|
268 | if($this->flags["fonttbl"]) { // if its set, the fonttable definition is written to; else its read from
|
---|
269 | $this->flags["fonttbl_current_write"] = $parameter;
|
---|
270 | } else {
|
---|
271 | $this->flags["fonttbl_current_read"] = $parameter;
|
---|
272 | }
|
---|
273 | break;
|
---|
274 | case "fcharset": // this is for preparing flushQueue; it then moves the Queue to $this->fonttable .. instead to formatted output
|
---|
275 | $this->flags["fonttbl_want_fcharset"] = $parameter;
|
---|
276 | break;
|
---|
277 | case "fs": // sets the current fontsize; is used by stylesheets (which are therefore generated on the fly
|
---|
278 | $this->flags["fontsize"] = $parameter;
|
---|
279 | break;
|
---|
280 |
|
---|
281 | case "qc": // handle center alignment
|
---|
282 | $this->flags["alignment"] = "center";
|
---|
283 | break;
|
---|
284 | case "qr": // handle right alignment
|
---|
285 | $this->flags["alignment"] = "right";
|
---|
286 | break;
|
---|
287 |
|
---|
288 | case "pard": // reset paragraph settings (only alignment)
|
---|
289 | $this->flags["alignment"] = "";
|
---|
290 | break;
|
---|
291 | case "par": // define new paragraph (for now, thats a simple break in html) begin new line
|
---|
292 | $this->flags["beginparagraph"] = true;
|
---|
293 | if($this->wantHTML) {
|
---|
294 | $this->out .= "</div>";
|
---|
295 | }
|
---|
296 | if($this->wantASCII) {
|
---|
297 | $this->out .= "\n";
|
---|
298 | }
|
---|
299 | break;
|
---|
300 | case "bnone": // bold
|
---|
301 | $parameter = "0";
|
---|
302 | case "b":
|
---|
303 | // haven'y yet figured out WHY I need a (string)-cast here ... hm
|
---|
304 | if((string)$parameter == "0")
|
---|
305 | $this->flags["bold"] = false;
|
---|
306 | else
|
---|
307 | $this->flags["bold"] = true;
|
---|
308 | break;
|
---|
309 | case "ulnone": // underlined
|
---|
310 | $parameter = "0";
|
---|
311 | case "ul":
|
---|
312 | if((string)$parameter == "0")
|
---|
313 | $this->flags["underlined"] = false;
|
---|
314 | else
|
---|
315 | $this->flags["underlined"] = true;
|
---|
316 | break;
|
---|
317 | case "inone": // italic
|
---|
318 | $parameter = "0";
|
---|
319 | case "i":
|
---|
320 | if((string)$parameter == "0")
|
---|
321 | $this->flags["italic"] = false;
|
---|
322 | else
|
---|
323 | $this->flags["italic"] = true;
|
---|
324 | break;
|
---|
325 | case "strikenone": // strikethru
|
---|
326 | $parameter = "0";
|
---|
327 | case "strike":
|
---|
328 | if((string)$parameter == "0")
|
---|
329 | $this->flags["strikethru"] = false;
|
---|
330 | else
|
---|
331 | $this->flags["strikethru"] = true;
|
---|
332 | break;
|
---|
333 | case "plain": // reset all font modifiers and fontsize to 12
|
---|
334 | $this->flags["bold"] = false;
|
---|
335 | $this->flags["italic"] = false;
|
---|
336 | $this->flags["underlined"] = false;
|
---|
337 | $this->flags["strikethru"] = false;
|
---|
338 | $this->flags["fontsize"] = 12;
|
---|
339 |
|
---|
340 | $this->flags["subscription"] = false;
|
---|
341 | $this->flags["superscription"] = false;
|
---|
342 | break;
|
---|
343 | case "subnone": // subscription
|
---|
344 | $parameter = "0";
|
---|
345 | case "sub":
|
---|
346 | if((string)$parameter == "0")
|
---|
347 | $this->flags["subscription"] = false;
|
---|
348 | else
|
---|
349 | $this->flags["subscription"] = true;
|
---|
350 | break;
|
---|
351 | case "supernone": // superscription
|
---|
352 | $parameter = "0";
|
---|
353 | case "super":
|
---|
354 | if((string)$parameter == "0")
|
---|
355 | $this->flags["superscription"] = false;
|
---|
356 | else
|
---|
357 | $this->flags["superscription"] = true;
|
---|
358 | break;
|
---|
359 | }
|
---|
360 | }
|
---|
361 |
|
---|
362 | /*
|
---|
363 | Dispatch the control word to the output stream
|
---|
364 | */
|
---|
365 |
|
---|
366 | function flushControl() {
|
---|
367 | if(ereg("^([A-Za-z]+)(-?[0-9]*) ?$", $this->cword, $match)) {
|
---|
368 | $this->parseControl($match[1], $match[2]);
|
---|
369 | if($this->wantXML) {
|
---|
370 | $this->out.="<control word=\"".$match[1]."\"";
|
---|
371 | if(strlen($match[2]) > 0)
|
---|
372 | $this->out.=" param=\"".$match[2]."\"";
|
---|
373 | $this->out.="/>";
|
---|
374 | }
|
---|
375 | }
|
---|
376 | }
|
---|
377 |
|
---|
378 | /*
|
---|
379 | If output stream supports comments, dispatch it
|
---|
380 | */
|
---|
381 |
|
---|
382 | function flushComment($comment) {
|
---|
383 | if($this->wantXML || $this->wantHTML) {
|
---|
384 | $this->out.="<!-- ".$comment." -->";
|
---|
385 | }
|
---|
386 | }
|
---|
387 |
|
---|
388 | /*
|
---|
389 | Dispatch start/end of logical rtf groups (not every output type needs it; merely debugging purpose)
|
---|
390 | */
|
---|
391 |
|
---|
392 | function flushGroup($state) {
|
---|
393 | if($state == "open") { /* push onto the stack */
|
---|
394 | array_push($this->stack, $this->flags);
|
---|
395 |
|
---|
396 | if($this->wantXML)
|
---|
397 | $this->out.="<group>";
|
---|
398 | }
|
---|
399 | if($state == "close") { /* pop from the stack */
|
---|
400 | $this->last_flags = $this->flags;
|
---|
401 | $this->flags = array_pop($this->stack);
|
---|
402 |
|
---|
403 | $this->flags["fonttbl_current_write"] = ""; // on group close, no more fontdefinition will be written to this id
|
---|
404 | // this is not really the right way to do it !
|
---|
405 | // of course a '}' not necessarily donates a fonttable end; a fonttable
|
---|
406 | // group at least *can* contain sub-groups
|
---|
407 | // therefore an stacked approach is heavily needed
|
---|
408 | $this->flags["fonttbl"] = false; // no matter what you do, if a group closes, its fonttbl definition is closed too
|
---|
409 |
|
---|
410 | if($this->wantXML)
|
---|
411 | $this->out.="</group>";
|
---|
412 | }
|
---|
413 | }
|
---|
414 |
|
---|
415 | function flushHead() {
|
---|
416 | if($this->wantXML)
|
---|
417 | $this->out.="<rtf>";
|
---|
418 | }
|
---|
419 |
|
---|
420 | function flushBottom() {
|
---|
421 | if($this->wantXML)
|
---|
422 | $this->out.="</rtf>";
|
---|
423 | }
|
---|
424 |
|
---|
425 | function checkHtmlSpanContent($command) {
|
---|
426 | reset($this->fontmodifier_table);
|
---|
427 | while(list($rtf, $html) = each($this->fontmodifier_table)) {
|
---|
428 | if($this->flags[$rtf] == true) {
|
---|
429 | if($command == "start")
|
---|
430 | $this->out .= "<".$html.">";
|
---|
431 | else
|
---|
432 | $this->out .= "</".$html.">";
|
---|
433 | }
|
---|
434 | }
|
---|
435 | }
|
---|
436 |
|
---|
437 | /*
|
---|
438 | flush text in queue
|
---|
439 | */
|
---|
440 | function flushQueue() {
|
---|
441 | if(strlen($this->queue)) {
|
---|
442 | // processing logic
|
---|
443 | if (isset($this->flags["fonttbl_want_fcharset"]) &&
|
---|
444 | ereg("^[0-9]+$", $this->flags["fonttbl_want_fcharset"])) {
|
---|
445 | $this->fonttable[$this->flags["fonttbl_want_fcharset"]]["charset"] = $this->queue;
|
---|
446 | $this->flags["fonttbl_want_fcharset"] = "";
|
---|
447 | $this->queue = "";
|
---|
448 | }
|
---|
449 |
|
---|
450 | // output logic
|
---|
451 | if (strlen($this->queue)) {
|
---|
452 | /*
|
---|
453 | Everything which passes this is (or, at leat, *should*) be only outputted plaintext
|
---|
454 | Thats why we can safely add the css-stylesheet when using wantHTML
|
---|
455 | */
|
---|
456 | if($this->wantXML)
|
---|
457 | $this->out.= "<plain>".$this->queue."</plain>";
|
---|
458 | else if($this->wantHTML) {
|
---|
459 | // only output html if a valid (for now, just numeric;) fonttable is given
|
---|
460 | if(ereg("^[0-9]+$", $this->flags["fonttbl_current_read"])) {
|
---|
461 | if($this->flags["beginparagraph"] == true) {
|
---|
462 | $this->flags["beginparagraph"] = false;
|
---|
463 | $this->out .= "<div align=\"";
|
---|
464 | switch($this->flags["alignment"]) {
|
---|
465 | case "right":
|
---|
466 | $this->out .= "right";
|
---|
467 | break;
|
---|
468 | case "center":
|
---|
469 | $this->out .= "center";
|
---|
470 | break;
|
---|
471 | case "left":
|
---|
472 | default:
|
---|
473 | $this->out .= "left";
|
---|
474 | }
|
---|
475 | $this->out .= "\">";
|
---|
476 | }
|
---|
477 |
|
---|
478 | /* define new style for that span */
|
---|
479 | $this->styles["f".$this->flags["fonttbl_current_read"]."s".$this->flags["fontsize"]] = "font-family:".$this->fonttable[$this->flags["fonttbl_current_read"]]["charset"]." font-size:".$this->flags["fontsize"].";";
|
---|
480 | /* write span start */
|
---|
481 | $this->out .= "<span class=\"f".$this->flags["fonttbl_current_read"]."s".$this->flags["fontsize"]."\">";
|
---|
482 |
|
---|
483 | /* check if the span content has a modifier */
|
---|
484 | $this->checkHtmlSpanContent("start");
|
---|
485 | /* write span content */
|
---|
486 | $this->out .= $this->queue;
|
---|
487 | /* close modifiers */
|
---|
488 | $this->checkHtmlSpanContent("stop");
|
---|
489 | /* close span */
|
---|
490 | "</span>";
|
---|
491 | }
|
---|
492 | }
|
---|
493 | $this->queue = "";
|
---|
494 | }
|
---|
495 | }
|
---|
496 | }
|
---|
497 |
|
---|
498 | /*
|
---|
499 | handle special charactes like \'ef
|
---|
500 | */
|
---|
501 |
|
---|
502 | function flushSpecial($special) {
|
---|
503 | if(strlen($special) == 2) {
|
---|
504 | if($this->wantASCII)
|
---|
505 | $this->out .= chr(hexdec('0x'.$special));
|
---|
506 | else if($this->wantXML)
|
---|
507 | $this->out .= "<special value=\"".$special."\"/>";
|
---|
508 | else if($this->wantHTML){
|
---|
509 | $this->out .= "<special value=\"".$special."\"/>";
|
---|
510 | switch($special) {
|
---|
511 | case "c1": $this->out .= "Á"; break;
|
---|
512 | case "e1": $this->out .= "á"; break;
|
---|
513 | case "c0": $this->out .= "À"; break;
|
---|
514 | case "e0": $this->out .= "à"; break;
|
---|
515 | case "c9": $this->out .= "É"; break;
|
---|
516 | case "e9": $this->out .= "é"; break;
|
---|
517 | case "c8": $this->out .= "È"; break;
|
---|
518 | case "e8": $this->out .= "è"; break;
|
---|
519 | case "cd": $this->out .= "Í"; break;
|
---|
520 | case "ed": $this->out .= "í"; break;
|
---|
521 | case "cc": $this->out .= "Ì"; break;
|
---|
522 | case "ec": $this->out .= "ì"; break;
|
---|
523 | case "d3": $this->out .= "Ó"; break;
|
---|
524 | case "f3": $this->out .= "ó"; break;
|
---|
525 | case "d2": $this->out .= "Ò"; break;
|
---|
526 | case "f2": $this->out .= "ò"; break;
|
---|
527 | case "da": $this->out .= "Ú"; break;
|
---|
528 | case "fa": $this->out .= "ú"; break;
|
---|
529 | case "d9": $this->out .= "Ù"; break;
|
---|
530 | case "f9": $this->out .= "ù"; break;
|
---|
531 | case "80": $this->out .= "€"; break;
|
---|
532 | case "d1": $this->out .= "Ñ"; break;
|
---|
533 | case "f1": $this->out .= "ñ"; break;
|
---|
534 | case "c7": $this->out .= "Ç"; break;
|
---|
535 | case "e7": $this->out .= "ç"; break;
|
---|
536 | case "dc": $this->out .= "Ü"; break;
|
---|
537 | case "fc": $this->out .= "ü"; break;
|
---|
538 | case "bf": $this->out .= "¿"; break;
|
---|
539 | case "a1": $this->out .= "¡"; break;
|
---|
540 | case "b7": $this->out .= "·"; break;
|
---|
541 | case "a9": $this->out .= "©"; break;
|
---|
542 | case "ae": $this->out .= "®"; break;
|
---|
543 | case "ba": $this->out .= "º"; break;
|
---|
544 | case "aa": $this->out .= "ª"; break;
|
---|
545 | case "b2": $this->out .= "²"; break;
|
---|
546 | case "b3": $this->out .= "³"; break;
|
---|
547 | }
|
---|
548 | }
|
---|
549 | }
|
---|
550 | }
|
---|
551 |
|
---|
552 | /*
|
---|
553 | Output errors at end
|
---|
554 | */
|
---|
555 | function flushErrors() {
|
---|
556 | if(count($this->err) > 0) {
|
---|
557 | if($this->wantXML) {
|
---|
558 | $this->out .= "<errors>";
|
---|
559 | while(list($num,$value) = each($this->err)) {
|
---|
560 | $this->out .= "<message>".$value."</message>";
|
---|
561 | }
|
---|
562 | $this->out .= "</errors>";
|
---|
563 | }
|
---|
564 | }
|
---|
565 | }
|
---|
566 |
|
---|
567 | function makeStyles() {
|
---|
568 | $this->outstyles = "<style type=\"text/css\"><!--\n";
|
---|
569 | reset($this->styles);
|
---|
570 | while(list($stylename, $styleattrib) = each($this->styles)) {
|
---|
571 | $this->outstyles .= ".".$stylename." { ".$styleattrib." }\n";
|
---|
572 | }
|
---|
573 | $this->outstyles .= "--></style>\n";
|
---|
574 | }
|
---|
575 |
|
---|
576 | function parse() {
|
---|
577 |
|
---|
578 | $this->parserInit();
|
---|
579 |
|
---|
580 | $i = 0;
|
---|
581 | $this->cw= false; // flag if control word is currently parsed
|
---|
582 | $this->cfirst = false; // first control character ?
|
---|
583 | $this->cword = ""; // last or current control word (depends on $this->cw
|
---|
584 |
|
---|
585 | $this->queue = ""; // plain text data found during parsing
|
---|
586 |
|
---|
587 | $this->flushHead();
|
---|
588 |
|
---|
589 | while($i < $this->rtf_len) {
|
---|
590 | switch($this->rtf[$i]) {
|
---|
591 | case "{":
|
---|
592 | if($this->cw) {
|
---|
593 | $this->flushControl();
|
---|
594 | $this->cw = false;
|
---|
595 | $this->cfirst = false;
|
---|
596 | } else
|
---|
597 | $this->flushQueue();
|
---|
598 |
|
---|
599 | $this->flushGroup("open");
|
---|
600 | break;
|
---|
601 | case "}":
|
---|
602 | if($this->cw) {
|
---|
603 | $this->flushControl();
|
---|
604 | $this->cw = false;
|
---|
605 | $this->cfirst = false;
|
---|
606 | } else
|
---|
607 | $this->flushQueue();
|
---|
608 |
|
---|
609 | $this->flushGroup("close");
|
---|
610 | break;
|
---|
611 | case "\\":
|
---|
612 | if($this->cfirst) { // catches '\\'
|
---|
613 | $this->queue .= "\\"; // replaced single quotes
|
---|
614 | $this->cfirst = false;
|
---|
615 | $this->cw = false;
|
---|
616 | break;
|
---|
617 | }
|
---|
618 | if($this->cw) {
|
---|
619 | $this->flushControl();
|
---|
620 | } else
|
---|
621 | $this->flushQueue();
|
---|
622 | $this->cw = true;
|
---|
623 | $this->cfirst = true;
|
---|
624 | $this->cword = "";
|
---|
625 | break;
|
---|
626 | default:
|
---|
627 | if((ord($this->rtf[$i]) == 10) || (ord($this->rtf[$i]) == 13)) break; // eat line breaks
|
---|
628 | if($this->cw) { // active control word ?
|
---|
629 | /*
|
---|
630 | Watch the RE: there's an optional space at the end which IS part of
|
---|
631 | the control word (but actually its ignored by flushControl)
|
---|
632 | */
|
---|
633 | if(ereg("^[a-zA-Z0-9-]?$", $this->rtf[$i])) { // continue parsing
|
---|
634 | $this->cword .= $this->rtf[$i];
|
---|
635 | $this->cfirst = false;
|
---|
636 | } else {
|
---|
637 | /*
|
---|
638 | Control word could be a 'control symbol', like \~ or \* etc.
|
---|
639 | */
|
---|
640 | $specialmatch = false;
|
---|
641 | if($this->cfirst) {
|
---|
642 | if($this->rtf[$i] == '\'') { // expect to get some special chars
|
---|
643 | $this->flushQueue();
|
---|
644 | $this->flushSpecial($this->rtf[$i+1].$this->rtf[$i+2]);
|
---|
645 | $i+=2;
|
---|
646 | $specialmatch = true;
|
---|
647 | $this->cw = false;
|
---|
648 | $this->cfirst = false;
|
---|
649 | $this->cword = "";
|
---|
650 | } else
|
---|
651 | if(ereg("^[{}\*]$", $this->rtf[$i])) {
|
---|
652 | $this->flushComment("control symbols not yet handled");
|
---|
653 | $specialmatch = true;
|
---|
654 | }
|
---|
655 | $this->cfirst = false;
|
---|
656 | } else {
|
---|
657 | if($this->rtf[$i] == ' ') { // space delimtes control words, so just discard it and flush the controlword
|
---|
658 | $this->cw = false;
|
---|
659 | $this->flushControl();
|
---|
660 | break;
|
---|
661 | }
|
---|
662 | }
|
---|
663 | if(!$specialmatch) {
|
---|
664 | $this->flushControl();
|
---|
665 | $this->cw = false;
|
---|
666 | $this->cfirst = false;
|
---|
667 | /*
|
---|
668 | The current character is a delimeter, but is NOT
|
---|
669 | part of the control word so we hop one step back
|
---|
670 | in the stream and process it again
|
---|
671 | */
|
---|
672 | $i--;
|
---|
673 | }
|
---|
674 | }
|
---|
675 | } else {
|
---|
676 | // < and > need translation before putting into queue when XML or HTML is wanted
|
---|
677 | if(($this->wantHTML) || ($this->wantXML)) {
|
---|
678 | switch($this->rtf[$i]) {
|
---|
679 | case "<":
|
---|
680 | $this->queue .= "<";
|
---|
681 | break;
|
---|
682 | case ">":
|
---|
683 | $this->queue .= ">";
|
---|
684 | break;
|
---|
685 | default:
|
---|
686 | $this->queue .= $this->rtf[$i];
|
---|
687 | break;
|
---|
688 | }
|
---|
689 | } else
|
---|
690 | $this->queue .= $this->rtf[$i];
|
---|
691 | }
|
---|
692 |
|
---|
693 | }
|
---|
694 | $i++;
|
---|
695 | }
|
---|
696 | $this->flushQueue();
|
---|
697 | $this->flushErrors();
|
---|
698 | $this->flushBottom();
|
---|
699 |
|
---|
700 | if($this->wantHTML) {
|
---|
701 | $this->makeStyles();
|
---|
702 | }
|
---|
703 | }
|
---|
704 | }
|
---|
705 |
|
---|
706 |
|
---|
707 | ?>
|
---|