[3637] | 1 | <?php
|
---|
| 2 | /*
|
---|
| 3 | This class contains code from rtfclass.php that was written by Markus Fischer and placed by him under
|
---|
| 4 | GPLv2 License.
|
---|
| 5 |
|
---|
| 6 | =======================================NOTES FROM ORIGINAL AUTHOR====================================
|
---|
| 7 | Rich Text Format - Parsing Class
|
---|
| 8 | ================================
|
---|
| 9 |
|
---|
| 10 | (c) 2000 Markus Fischer
|
---|
| 11 | <mfischer@josefine.ben.tuwien.ac.at>
|
---|
| 12 | http://josefine.ben.tuwien.ac.at/~mfischer/
|
---|
| 13 |
|
---|
| 14 | Latest versions of this class can always be found at
|
---|
| 15 | http://josefine.ben.tuwien.ac.at/~mfischer/developing/php/rtf/rtfclass.phps
|
---|
| 16 | Testing suite is available at
|
---|
| 17 | http://josefine.ben.tuwien.ac.at/~mfischer/developing/php/rtf/
|
---|
| 18 |
|
---|
| 19 | License: GPLv2
|
---|
| 20 |
|
---|
| 21 | Specification:
|
---|
| 22 | http://msdn.microsoft.com/library/default.asp?URL=/library/specs/rtfspec.htm
|
---|
| 23 |
|
---|
| 24 | General Notes:
|
---|
| 25 | ==============
|
---|
| 26 | Unknown or unspupported control symbols are silently gnored
|
---|
| 27 |
|
---|
| 28 | Group stacking is still not supported :(
|
---|
| 29 | group stack logic implemented; however not really used yet
|
---|
| 30 | =====================================================================================================
|
---|
| 31 |
|
---|
| 32 | It was modified by me (Andreas Brodowski) to allow compressed RTF being uncompressed by code I ported from
|
---|
| 33 | Java to PHP and adapted according the needs of Z-Push.
|
---|
| 34 |
|
---|
| 35 | Currently it is being used to detect empty RTF Streams from Nokia Phones in MfE Clients
|
---|
| 36 |
|
---|
| 37 | It needs to be used by other backend writers that needs to have notes in calendar, appointment or tasks
|
---|
| 38 | objects to be written to their databases since devices send them usually in RTF Format... With Zarafa
|
---|
| 39 | you can write them directly to DB and Zarafa is doing the conversion job. Other Groupware systems usually
|
---|
| 40 | don't have this possibility...
|
---|
| 41 |
|
---|
| 42 | */
|
---|
| 43 |
|
---|
| 44 |
|
---|
| 45 | class rtf {
|
---|
| 46 | var $LZRTF_HDR_DATA = "{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}{\\f0\\fnil \\froman \\fswiss \\fmodern \\fscript \\fdecor MS Sans SerifSymbolArialTimes New RomanCourier{\\colortbl\\red0\\green0\\blue0\n\r\\par \\pard\\plain\\f0\\fs20\\b\\i\\u\\tab\\tx";
|
---|
| 47 | var $LZRTF_HDR_LEN = 207;
|
---|
| 48 | var $CRC32_TABLE = array( 0x00000000,0x77073096,0xEE0E612C,0x990951BA,0x076DC419,0x706AF48F,0xE963A535,0x9E6495A3,
|
---|
| 49 | 0x0EDB8832,0x79DCB8A4,0xE0D5E91E,0x97D2D988,0x09B64C2B,0x7EB17CBD,0xE7B82D07,0x90BF1D91,
|
---|
| 50 | 0x1DB71064,0x6AB020F2,0xF3B97148,0x84BE41DE,0x1ADAD47D,0x6DDDE4EB,0xF4D4B551,0x83D385C7,
|
---|
| 51 | 0x136C9856,0x646BA8C0,0xFD62F97A,0x8A65C9EC,0x14015C4F,0x63066CD9,0xFA0F3D63,0x8D080DF5,
|
---|
| 52 | 0x3B6E20C8,0x4C69105E,0xD56041E4,0xA2677172,0x3C03E4D1,0x4B04D447,0xD20D85FD,0xA50AB56B,
|
---|
| 53 | 0x35B5A8FA,0x42B2986C,0xDBBBC9D6,0xACBCF940,0x32D86CE3,0x45DF5C75,0xDCD60DCF,0xABD13D59,
|
---|
| 54 | 0x26D930AC,0x51DE003A,0xC8D75180,0xBFD06116,0x21B4F4B5,0x56B3C423,0xCFBA9599,0xB8BDA50F,
|
---|
| 55 | 0x2802B89E,0x5F058808,0xC60CD9B2,0xB10BE924,0x2F6F7C87,0x58684C11,0xC1611DAB,0xB6662D3D,
|
---|
| 56 | 0x76DC4190,0x01DB7106,0x98D220BC,0xEFD5102A,0x71B18589,0x06B6B51F,0x9FBFE4A5,0xE8B8D433,
|
---|
| 57 | 0x7807C9A2,0x0F00F934,0x9609A88E,0xE10E9818,0x7F6A0DBB,0x086D3D2D,0x91646C97,0xE6635C01,
|
---|
| 58 | 0x6B6B51F4,0x1C6C6162,0x856530D8,0xF262004E,0x6C0695ED,0x1B01A57B,0x8208F4C1,0xF50FC457,
|
---|
| 59 | 0x65B0D9C6,0x12B7E950,0x8BBEB8EA,0xFCB9887C,0x62DD1DDF,0x15DA2D49,0x8CD37CF3,0xFBD44C65,
|
---|
| 60 | 0x4DB26158,0x3AB551CE,0xA3BC0074,0xD4BB30E2,0x4ADFA541,0x3DD895D7,0xA4D1C46D,0xD3D6F4FB,
|
---|
| 61 | 0x4369E96A,0x346ED9FC,0xAD678846,0xDA60B8D0,0x44042D73,0x33031DE5,0xAA0A4C5F,0xDD0D7CC9,
|
---|
| 62 | 0x5005713C,0x270241AA,0xBE0B1010,0xC90C2086,0x5768B525,0x206F85B3,0xB966D409,0xCE61E49F,
|
---|
| 63 | 0x5EDEF90E,0x29D9C998,0xB0D09822,0xC7D7A8B4,0x59B33D17,0x2EB40D81,0xB7BD5C3B,0xC0BA6CAD,
|
---|
| 64 | 0xEDB88320,0x9ABFB3B6,0x03B6E20C,0x74B1D29A,0xEAD54739,0x9DD277AF,0x04DB2615,0x73DC1683,
|
---|
| 65 | 0xE3630B12,0x94643B84,0x0D6D6A3E,0x7A6A5AA8,0xE40ECF0B,0x9309FF9D,0x0A00AE27,0x7D079EB1,
|
---|
| 66 | 0xF00F9344,0x8708A3D2,0x1E01F268,0x6906C2FE,0xF762575D,0x806567CB,0x196C3671,0x6E6B06E7,
|
---|
| 67 | 0xFED41B76,0x89D32BE0,0x10DA7A5A,0x67DD4ACC,0xF9B9DF6F,0x8EBEEFF9,0x17B7BE43,0x60B08ED5,
|
---|
| 68 | 0xD6D6A3E8,0xA1D1937E,0x38D8C2C4,0x4FDFF252,0xD1BB67F1,0xA6BC5767,0x3FB506DD,0x48B2364B,
|
---|
| 69 | 0xD80D2BDA,0xAF0A1B4C,0x36034AF6,0x41047A60,0xDF60EFC3,0xA867DF55,0x316E8EEF,0x4669BE79,
|
---|
| 70 | 0xCB61B38C,0xBC66831A,0x256FD2A0,0x5268E236,0xCC0C7795,0xBB0B4703,0x220216B9,0x5505262F,
|
---|
| 71 | 0xC5BA3BBE,0xB2BD0B28,0x2BB45A92,0x5CB36A04,0xC2D7FFA7,0xB5D0CF31,0x2CD99E8B,0x5BDEAE1D,
|
---|
| 72 | 0x9B64C2B0,0xEC63F226,0x756AA39C,0x026D930A,0x9C0906A9,0xEB0E363F,0x72076785,0x05005713,
|
---|
| 73 | 0x95BF4A82,0xE2B87A14,0x7BB12BAE,0x0CB61B38,0x92D28E9B,0xE5D5BE0D,0x7CDCEFB7,0x0BDBDF21,
|
---|
| 74 | 0x86D3D2D4,0xF1D4E242,0x68DDB3F8,0x1FDA836E,0x81BE16CD,0xF6B9265B,0x6FB077E1,0x18B74777,
|
---|
| 75 | 0x88085AE6,0xFF0F6A70,0x66063BCA,0x11010B5C,0x8F659EFF,0xF862AE69,0x616BFFD3,0x166CCF45,
|
---|
| 76 | 0xA00AE278,0xD70DD2EE,0x4E048354,0x3903B3C2,0xA7672661,0xD06016F7,0x4969474D,0x3E6E77DB,
|
---|
| 77 | 0xAED16A4A,0xD9D65ADC,0x40DF0B66,0x37D83BF0,0xA9BCAE53,0xDEBB9EC5,0x47B2CF7F,0x30B5FFE9,
|
---|
| 78 | 0xBDBDF21C,0xCABAC28A,0x53B39330,0x24B4A3A6,0xBAD03605,0xCDD70693,0x54DE5729,0x23D967BF,
|
---|
| 79 | 0xB3667A2E,0xC4614AB8,0x5D681B02,0x2A6F2B94,0xB40BBE37,0xC30C8EA1,0x5A05DF1B,0x2D02EF8D,
|
---|
| 80 | );
|
---|
| 81 |
|
---|
| 82 | var $rtf; // rtf core stream
|
---|
| 83 | var $rtf_len; // length in characters of the stream (get performace due avoiding calling strlen everytime)
|
---|
| 84 | var $err = array(); // array of error message, no entities on no error
|
---|
| 85 |
|
---|
| 86 | var $wantXML; // convert to XML
|
---|
| 87 | var $wantHTML; // convert to HTML
|
---|
| 88 | var $wantASCII; // convert to HTML
|
---|
| 89 |
|
---|
| 90 | // the only variable which should be accessed from the outside
|
---|
| 91 | var $out; // output data stream (depends on which $wantXXXXX is set to true
|
---|
| 92 | var $outstyles; // htmlified styles (generated after parsing if wantHTML
|
---|
| 93 | var $styles; // if wantHTML, stylesheet definitions are put in here
|
---|
| 94 |
|
---|
| 95 | // internal parser variables --------------------------------
|
---|
| 96 | // control word variables
|
---|
| 97 | var $cword; // holds the current (or last) control word, depending on $cw
|
---|
| 98 | var $cw; // are we currently parsing a control word ?
|
---|
| 99 | var $cfirst; // could this be the first character ? so watch out for control symbols
|
---|
| 100 |
|
---|
| 101 | var $flags = array(); // parser flags
|
---|
| 102 |
|
---|
| 103 | var $queue; // every character which is no sepcial char, not belongs to a control word/symbol; is generally considered being 'plain'
|
---|
| 104 |
|
---|
| 105 | var $stack = array(); // group stack
|
---|
| 106 |
|
---|
| 107 | /* keywords which don't follw the specification (used by Word '97 - 2000) */
|
---|
| 108 | // not yet used
|
---|
| 109 | var $control_exception = array(
|
---|
| 110 | "clFitText",
|
---|
| 111 | "clftsWidth(-?[0-9]+)?",
|
---|
| 112 | "clNoWrap(-?[0-9]+)?",
|
---|
| 113 | "clwWidth(-?[0-9]+)?",
|
---|
| 114 | "tdfrmtxtBottom(-?[0-9]+)?",
|
---|
| 115 | "tdfrmtxtLeft(-?[0-9]+)?",
|
---|
| 116 | "tdfrmtxtRight(-?[0-9]+)?",
|
---|
| 117 | "tdfrmtxtTop(-?[0-9]+)?",
|
---|
| 118 | "trftsWidthA(-?[0-9]+)?",
|
---|
| 119 | "trftsWidthB(-?[0-9]+)?",
|
---|
| 120 | "trftsWidth(-?[0-9]+)?",
|
---|
| 121 | "trwWithA(-?[0-9]+)?",
|
---|
| 122 | "trwWithB(-?[0-9]+)?",
|
---|
| 123 | "trwWith(-?[0-9]+)?",
|
---|
| 124 | "spectspecifygen(-?[0-9]+)?",
|
---|
| 125 | );
|
---|
| 126 |
|
---|
| 127 | var $charset_table = array(
|
---|
| 128 | "0" => "ANSI",
|
---|
| 129 | "1" => "Default",
|
---|
| 130 | "2" => "Symbol",
|
---|
| 131 | "77" => "Mac",
|
---|
| 132 | "128" => "Shift Jis",
|
---|
| 133 | "129" => "Hangul",
|
---|
| 134 | "130" => "Johab",
|
---|
| 135 | "134" => "GB2312",
|
---|
| 136 | "136" => "Big5",
|
---|
| 137 | "161" => "Greek",
|
---|
| 138 | "162" => "Turkish",
|
---|
| 139 | "163" => "Vietnamese",
|
---|
| 140 | "177" => "Hebrew",
|
---|
| 141 | "178" => "Arabic",
|
---|
| 142 | "179" => "Arabic Traditional",
|
---|
| 143 | "180" => "Arabic user",
|
---|
| 144 | "181" => "Hebrew user",
|
---|
| 145 | "186" => "Baltic",
|
---|
| 146 | "204" => "Russian",
|
---|
| 147 | "222" => "Thai",
|
---|
| 148 | "238" => "Eastern European",
|
---|
| 149 | "255" => "PC 437",
|
---|
| 150 | "255" => "OEM",
|
---|
| 151 | );
|
---|
| 152 |
|
---|
| 153 | /* note: the only conversion table used */
|
---|
| 154 | var $fontmodifier_table = array(
|
---|
| 155 | "bold" => "b",
|
---|
| 156 | "italic" => "i",
|
---|
| 157 | "underlined" => "u",
|
---|
| 158 | "strikethru" => "strike",
|
---|
| 159 | );
|
---|
| 160 |
|
---|
| 161 |
|
---|
| 162 | function rtf() {
|
---|
| 163 | $this->rtf_len = 0;
|
---|
| 164 | $this->rtf = '';
|
---|
| 165 |
|
---|
| 166 | $this->out = '';
|
---|
| 167 | }
|
---|
| 168 |
|
---|
| 169 | // loadrtf - load the raw rtf data to be converted by this class
|
---|
| 170 | // data = the raw rtf
|
---|
| 171 | function loadrtf($data) {
|
---|
| 172 | if (($this->rtf = $this->uncompress($data))) {
|
---|
| 173 | $this->rtf_len = strlen($this->rtf);
|
---|
| 174 | };
|
---|
| 175 | if($this->rtf_len == 0) {
|
---|
| 176 | debugLog("No data in stream found");
|
---|
| 177 | return false;
|
---|
| 178 | };
|
---|
| 179 | return true;
|
---|
| 180 | }
|
---|
| 181 |
|
---|
| 182 | function output($typ) {
|
---|
| 183 | switch($typ) {
|
---|
| 184 | case "ascii": $this->wantASCII = true; break;
|
---|
| 185 | case "xml": $this->wantXML = true; break;
|
---|
| 186 | case "html": $this->wantHTML = true; break;
|
---|
| 187 | default: break;
|
---|
| 188 | }
|
---|
| 189 | }
|
---|
| 190 |
|
---|
| 191 | // uncompress - uncompress compressed rtf data
|
---|
| 192 | // src = the compressed raw rtf in LZRTF format
|
---|
| 193 | function uncompress($src) {
|
---|
| 194 | $header = unpack("LcSize/LuSize/Lmagic/Lcrc32",substr($src,0,16));
|
---|
| 195 | $in = 16;
|
---|
| 196 | if ($header['cSize'] != strlen($src)-4) {
|
---|
| 197 | debugLog("Stream too short");
|
---|
| 198 | return false;
|
---|
| 199 | }
|
---|
| 200 |
|
---|
| 201 | if ($header['crc32'] != $this->LZRTFCalcCRC32($src,16,(($header['cSize']+4))-16)) {
|
---|
| 202 | debugLog("CRC MISMATCH");
|
---|
| 203 | return false;
|
---|
| 204 | }
|
---|
| 205 |
|
---|
| 206 | if ($header['magic'] == 0x414c454d) { // uncompressed RTF - return as is.
|
---|
| 207 | $dest = substr($src,$in,$header['uSize']);
|
---|
| 208 | } else if ($header['magic'] == 0x75465a4c) { // compressed RTF - uncompress.
|
---|
| 209 | $dst = $this->LZRTF_HDR_DATA;
|
---|
| 210 | $out = $this->LZRTF_HDR_LEN;
|
---|
| 211 | $oblen = $this->LZRTF_HDR_LEN + $header['uSize'];
|
---|
| 212 | $flagCount = 0;
|
---|
| 213 | $flags = 0;
|
---|
| 214 | while ($out<$oblen) {
|
---|
| 215 | $flags = ($flagCount++ % 8 == 0) ? ord($src{$in++}) : $flags >> 1;
|
---|
| 216 | if (($flags & 1) == 1) {
|
---|
| 217 | $offset = ord($src{$in++});
|
---|
| 218 | $length = ord($src{$in++});
|
---|
| 219 | $offset = ($offset << 4) | ($length >> 4);
|
---|
| 220 | $length = ($length & 0xF) + 2;
|
---|
| 221 | $offset = (int)($out / 4096) * 4096 + $offset;
|
---|
| 222 | if ($offset >= $out) $offset -= 4096;
|
---|
| 223 | $end = $offset + $length;
|
---|
| 224 | while ($offset < $end) {
|
---|
| 225 | $dst{$out++} = $dst{$offset++};
|
---|
| 226 | };
|
---|
| 227 | } else {
|
---|
| 228 | $dst{$out++} = $src{$in++};
|
---|
| 229 | }
|
---|
| 230 | }
|
---|
| 231 | $src = $dst;
|
---|
| 232 | $dest = substr($src,$this->LZRTF_HDR_LEN,$header['uSize']);
|
---|
| 233 | } else { // unknown magic - returfn false (please report if this ever happens)
|
---|
| 234 | debugLog("Unknown Magic");
|
---|
| 235 | return false;
|
---|
| 236 | }
|
---|
| 237 |
|
---|
| 238 | return $dest;
|
---|
| 239 | }
|
---|
| 240 |
|
---|
| 241 | // LZRTFCalcCRC32 - calculates the CRC32 of the LZRTF data part
|
---|
| 242 | // buf = the whole rtf data part
|
---|
| 243 | // off = start point of crc calculation
|
---|
| 244 | // len = length of data to calculate CRC for
|
---|
| 245 | // function is necessary since in RTF there is no XOR 0xffffffff being done (said to be 0x00 unsafe CRC32 calculation
|
---|
| 246 | function LZRTFCalcCRC32($buf, $off, $len) {
|
---|
| 247 | $c=0;
|
---|
| 248 | $end = $off + $len;
|
---|
| 249 | for($i=$off;$i < $end;$i++) {
|
---|
| 250 | $c=$this->CRC32_TABLE[($c ^ ord($buf{$i})) & 0xFF] ^ (($c >> 8) & 0x00ffffff);
|
---|
| 251 | }
|
---|
| 252 | return $c;
|
---|
| 253 | }
|
---|
| 254 |
|
---|
| 255 | function parserInit() { /* Default values according to the specs */
|
---|
| 256 | $this->flags = array(
|
---|
| 257 | "fontsize" => 24,
|
---|
| 258 | "beginparagraph" => true,
|
---|
| 259 | );
|
---|
| 260 | }
|
---|
| 261 |
|
---|
| 262 | function parseControl($control, $parameter) {
|
---|
| 263 | switch ($control) {
|
---|
| 264 | case "fonttbl": // font table definition start
|
---|
| 265 | $this->flags["fonttbl"] = true; // signal fonttable control words they are allowed to behave as expected
|
---|
| 266 | break;
|
---|
| 267 | case "f": // define or set font
|
---|
| 268 | if($this->flags["fonttbl"]) { // if its set, the fonttable definition is written to; else its read from
|
---|
| 269 | $this->flags["fonttbl_current_write"] = $parameter;
|
---|
| 270 | } else {
|
---|
| 271 | $this->flags["fonttbl_current_read"] = $parameter;
|
---|
| 272 | }
|
---|
| 273 | break;
|
---|
| 274 | case "fcharset": // this is for preparing flushQueue; it then moves the Queue to $this->fonttable .. instead to formatted output
|
---|
| 275 | $this->flags["fonttbl_want_fcharset"] = $parameter;
|
---|
| 276 | break;
|
---|
| 277 | case "fs": // sets the current fontsize; is used by stylesheets (which are therefore generated on the fly
|
---|
| 278 | $this->flags["fontsize"] = $parameter;
|
---|
| 279 | break;
|
---|
| 280 |
|
---|
| 281 | case "qc": // handle center alignment
|
---|
| 282 | $this->flags["alignment"] = "center";
|
---|
| 283 | break;
|
---|
| 284 | case "qr": // handle right alignment
|
---|
| 285 | $this->flags["alignment"] = "right";
|
---|
| 286 | break;
|
---|
| 287 |
|
---|
| 288 | case "pard": // reset paragraph settings (only alignment)
|
---|
| 289 | $this->flags["alignment"] = "";
|
---|
| 290 | break;
|
---|
| 291 | case "par": // define new paragraph (for now, thats a simple break in html) begin new line
|
---|
| 292 | $this->flags["beginparagraph"] = true;
|
---|
| 293 | if($this->wantHTML) {
|
---|
| 294 | $this->out .= "</div>";
|
---|
| 295 | }
|
---|
| 296 | if($this->wantASCII) {
|
---|
| 297 | $this->out .= "\n";
|
---|
| 298 | }
|
---|
| 299 | break;
|
---|
| 300 | case "bnone": // bold
|
---|
| 301 | $parameter = "0";
|
---|
| 302 | case "b":
|
---|
| 303 | // haven'y yet figured out WHY I need a (string)-cast here ... hm
|
---|
| 304 | if((string)$parameter == "0")
|
---|
| 305 | $this->flags["bold"] = false;
|
---|
| 306 | else
|
---|
| 307 | $this->flags["bold"] = true;
|
---|
| 308 | break;
|
---|
| 309 | case "ulnone": // underlined
|
---|
| 310 | $parameter = "0";
|
---|
| 311 | case "ul":
|
---|
| 312 | if((string)$parameter == "0")
|
---|
| 313 | $this->flags["underlined"] = false;
|
---|
| 314 | else
|
---|
| 315 | $this->flags["underlined"] = true;
|
---|
| 316 | break;
|
---|
| 317 | case "inone": // italic
|
---|
| 318 | $parameter = "0";
|
---|
| 319 | case "i":
|
---|
| 320 | if((string)$parameter == "0")
|
---|
| 321 | $this->flags["italic"] = false;
|
---|
| 322 | else
|
---|
| 323 | $this->flags["italic"] = true;
|
---|
| 324 | break;
|
---|
| 325 | case "strikenone": // strikethru
|
---|
| 326 | $parameter = "0";
|
---|
| 327 | case "strike":
|
---|
| 328 | if((string)$parameter == "0")
|
---|
| 329 | $this->flags["strikethru"] = false;
|
---|
| 330 | else
|
---|
| 331 | $this->flags["strikethru"] = true;
|
---|
| 332 | break;
|
---|
| 333 | case "plain": // reset all font modifiers and fontsize to 12
|
---|
| 334 | $this->flags["bold"] = false;
|
---|
| 335 | $this->flags["italic"] = false;
|
---|
| 336 | $this->flags["underlined"] = false;
|
---|
| 337 | $this->flags["strikethru"] = false;
|
---|
| 338 | $this->flags["fontsize"] = 12;
|
---|
| 339 |
|
---|
| 340 | $this->flags["subscription"] = false;
|
---|
| 341 | $this->flags["superscription"] = false;
|
---|
| 342 | break;
|
---|
| 343 | case "subnone": // subscription
|
---|
| 344 | $parameter = "0";
|
---|
| 345 | case "sub":
|
---|
| 346 | if((string)$parameter == "0")
|
---|
| 347 | $this->flags["subscription"] = false;
|
---|
| 348 | else
|
---|
| 349 | $this->flags["subscription"] = true;
|
---|
| 350 | break;
|
---|
| 351 | case "supernone": // superscription
|
---|
| 352 | $parameter = "0";
|
---|
| 353 | case "super":
|
---|
| 354 | if((string)$parameter == "0")
|
---|
| 355 | $this->flags["superscription"] = false;
|
---|
| 356 | else
|
---|
| 357 | $this->flags["superscription"] = true;
|
---|
| 358 | break;
|
---|
| 359 | }
|
---|
| 360 | }
|
---|
| 361 |
|
---|
| 362 | /*
|
---|
| 363 | Dispatch the control word to the output stream
|
---|
| 364 | */
|
---|
| 365 |
|
---|
| 366 | function flushControl() {
|
---|
| 367 | if(ereg("^([A-Za-z]+)(-?[0-9]*) ?$", $this->cword, $match)) {
|
---|
| 368 | $this->parseControl($match[1], $match[2]);
|
---|
| 369 | if($this->wantXML) {
|
---|
| 370 | $this->out.="<control word=\"".$match[1]."\"";
|
---|
| 371 | if(strlen($match[2]) > 0)
|
---|
| 372 | $this->out.=" param=\"".$match[2]."\"";
|
---|
| 373 | $this->out.="/>";
|
---|
| 374 | }
|
---|
| 375 | }
|
---|
| 376 | }
|
---|
| 377 |
|
---|
| 378 | /*
|
---|
| 379 | If output stream supports comments, dispatch it
|
---|
| 380 | */
|
---|
| 381 |
|
---|
| 382 | function flushComment($comment) {
|
---|
| 383 | if($this->wantXML || $this->wantHTML) {
|
---|
| 384 | $this->out.="<!-- ".$comment." -->";
|
---|
| 385 | }
|
---|
| 386 | }
|
---|
| 387 |
|
---|
| 388 | /*
|
---|
| 389 | Dispatch start/end of logical rtf groups (not every output type needs it; merely debugging purpose)
|
---|
| 390 | */
|
---|
| 391 |
|
---|
| 392 | function flushGroup($state) {
|
---|
| 393 | if($state == "open") { /* push onto the stack */
|
---|
| 394 | array_push($this->stack, $this->flags);
|
---|
| 395 |
|
---|
| 396 | if($this->wantXML)
|
---|
| 397 | $this->out.="<group>";
|
---|
| 398 | }
|
---|
| 399 | if($state == "close") { /* pop from the stack */
|
---|
| 400 | $this->last_flags = $this->flags;
|
---|
| 401 | $this->flags = array_pop($this->stack);
|
---|
| 402 |
|
---|
| 403 | $this->flags["fonttbl_current_write"] = ""; // on group close, no more fontdefinition will be written to this id
|
---|
| 404 | // this is not really the right way to do it !
|
---|
| 405 | // of course a '}' not necessarily donates a fonttable end; a fonttable
|
---|
| 406 | // group at least *can* contain sub-groups
|
---|
| 407 | // therefore an stacked approach is heavily needed
|
---|
| 408 | $this->flags["fonttbl"] = false; // no matter what you do, if a group closes, its fonttbl definition is closed too
|
---|
| 409 |
|
---|
| 410 | if($this->wantXML)
|
---|
| 411 | $this->out.="</group>";
|
---|
| 412 | }
|
---|
| 413 | }
|
---|
| 414 |
|
---|
| 415 | function flushHead() {
|
---|
| 416 | if($this->wantXML)
|
---|
| 417 | $this->out.="<rtf>";
|
---|
| 418 | }
|
---|
| 419 |
|
---|
| 420 | function flushBottom() {
|
---|
| 421 | if($this->wantXML)
|
---|
| 422 | $this->out.="</rtf>";
|
---|
| 423 | }
|
---|
| 424 |
|
---|
| 425 | function checkHtmlSpanContent($command) {
|
---|
| 426 | reset($this->fontmodifier_table);
|
---|
| 427 | while(list($rtf, $html) = each($this->fontmodifier_table)) {
|
---|
| 428 | if($this->flags[$rtf] == true) {
|
---|
| 429 | if($command == "start")
|
---|
| 430 | $this->out .= "<".$html.">";
|
---|
| 431 | else
|
---|
| 432 | $this->out .= "</".$html.">";
|
---|
| 433 | }
|
---|
| 434 | }
|
---|
| 435 | }
|
---|
| 436 |
|
---|
| 437 | /*
|
---|
| 438 | flush text in queue
|
---|
| 439 | */
|
---|
| 440 | function flushQueue() {
|
---|
| 441 | if(strlen($this->queue)) {
|
---|
| 442 | // processing logic
|
---|
| 443 | if (isset($this->flags["fonttbl_want_fcharset"]) &&
|
---|
| 444 | ereg("^[0-9]+$", $this->flags["fonttbl_want_fcharset"])) {
|
---|
| 445 | $this->fonttable[$this->flags["fonttbl_want_fcharset"]]["charset"] = $this->queue;
|
---|
| 446 | $this->flags["fonttbl_want_fcharset"] = "";
|
---|
| 447 | $this->queue = "";
|
---|
| 448 | }
|
---|
| 449 |
|
---|
| 450 | // output logic
|
---|
| 451 | if (strlen($this->queue)) {
|
---|
| 452 | /*
|
---|
| 453 | Everything which passes this is (or, at leat, *should*) be only outputted plaintext
|
---|
| 454 | Thats why we can safely add the css-stylesheet when using wantHTML
|
---|
| 455 | */
|
---|
| 456 | if($this->wantXML)
|
---|
| 457 | $this->out.= "<plain>".$this->queue."</plain>";
|
---|
| 458 | else if($this->wantHTML) {
|
---|
| 459 | // only output html if a valid (for now, just numeric;) fonttable is given
|
---|
| 460 | if(ereg("^[0-9]+$", $this->flags["fonttbl_current_read"])) {
|
---|
| 461 | if($this->flags["beginparagraph"] == true) {
|
---|
| 462 | $this->flags["beginparagraph"] = false;
|
---|
| 463 | $this->out .= "<div align=\"";
|
---|
| 464 | switch($this->flags["alignment"]) {
|
---|
| 465 | case "right":
|
---|
| 466 | $this->out .= "right";
|
---|
| 467 | break;
|
---|
| 468 | case "center":
|
---|
| 469 | $this->out .= "center";
|
---|
| 470 | break;
|
---|
| 471 | case "left":
|
---|
| 472 | default:
|
---|
| 473 | $this->out .= "left";
|
---|
| 474 | }
|
---|
| 475 | $this->out .= "\">";
|
---|
| 476 | }
|
---|
| 477 |
|
---|
| 478 | /* define new style for that span */
|
---|
| 479 | $this->styles["f".$this->flags["fonttbl_current_read"]."s".$this->flags["fontsize"]] = "font-family:".$this->fonttable[$this->flags["fonttbl_current_read"]]["charset"]." font-size:".$this->flags["fontsize"].";";
|
---|
| 480 | /* write span start */
|
---|
| 481 | $this->out .= "<span class=\"f".$this->flags["fonttbl_current_read"]."s".$this->flags["fontsize"]."\">";
|
---|
| 482 |
|
---|
| 483 | /* check if the span content has a modifier */
|
---|
| 484 | $this->checkHtmlSpanContent("start");
|
---|
| 485 | /* write span content */
|
---|
| 486 | $this->out .= $this->queue;
|
---|
| 487 | /* close modifiers */
|
---|
| 488 | $this->checkHtmlSpanContent("stop");
|
---|
| 489 | /* close span */
|
---|
| 490 | "</span>";
|
---|
| 491 | }
|
---|
| 492 | }
|
---|
| 493 | $this->queue = "";
|
---|
| 494 | }
|
---|
| 495 | }
|
---|
| 496 | }
|
---|
| 497 |
|
---|
| 498 | /*
|
---|
| 499 | handle special charactes like \'ef
|
---|
| 500 | */
|
---|
| 501 |
|
---|
| 502 | function flushSpecial($special) {
|
---|
| 503 | if(strlen($special) == 2) {
|
---|
| 504 | if($this->wantASCII)
|
---|
| 505 | $this->out .= chr(hexdec('0x'.$special));
|
---|
| 506 | else if($this->wantXML)
|
---|
| 507 | $this->out .= "<special value=\"".$special."\"/>";
|
---|
| 508 | else if($this->wantHTML){
|
---|
| 509 | $this->out .= "<special value=\"".$special."\"/>";
|
---|
| 510 | switch($special) {
|
---|
| 511 | case "c1": $this->out .= "Á"; break;
|
---|
| 512 | case "e1": $this->out .= "á"; break;
|
---|
| 513 | case "c0": $this->out .= "À"; break;
|
---|
| 514 | case "e0": $this->out .= "à"; break;
|
---|
| 515 | case "c9": $this->out .= "É"; break;
|
---|
| 516 | case "e9": $this->out .= "é"; break;
|
---|
| 517 | case "c8": $this->out .= "È"; break;
|
---|
| 518 | case "e8": $this->out .= "è"; break;
|
---|
| 519 | case "cd": $this->out .= "Í"; break;
|
---|
| 520 | case "ed": $this->out .= "í"; break;
|
---|
| 521 | case "cc": $this->out .= "Ì"; break;
|
---|
| 522 | case "ec": $this->out .= "ì"; break;
|
---|
| 523 | case "d3": $this->out .= "Ó"; break;
|
---|
| 524 | case "f3": $this->out .= "ó"; break;
|
---|
| 525 | case "d2": $this->out .= "Ò"; break;
|
---|
| 526 | case "f2": $this->out .= "ò"; break;
|
---|
| 527 | case "da": $this->out .= "Ú"; break;
|
---|
| 528 | case "fa": $this->out .= "ú"; break;
|
---|
| 529 | case "d9": $this->out .= "Ù"; break;
|
---|
| 530 | case "f9": $this->out .= "ù"; break;
|
---|
| 531 | case "80": $this->out .= "€"; break;
|
---|
| 532 | case "d1": $this->out .= "Ñ"; break;
|
---|
| 533 | case "f1": $this->out .= "ñ"; break;
|
---|
| 534 | case "c7": $this->out .= "Ç"; break;
|
---|
| 535 | case "e7": $this->out .= "ç"; break;
|
---|
| 536 | case "dc": $this->out .= "Ü"; break;
|
---|
| 537 | case "fc": $this->out .= "ü"; break;
|
---|
| 538 | case "bf": $this->out .= "¿"; break;
|
---|
| 539 | case "a1": $this->out .= "¡"; break;
|
---|
| 540 | case "b7": $this->out .= "·"; break;
|
---|
| 541 | case "a9": $this->out .= "©"; break;
|
---|
| 542 | case "ae": $this->out .= "®"; break;
|
---|
| 543 | case "ba": $this->out .= "º"; break;
|
---|
| 544 | case "aa": $this->out .= "ª"; break;
|
---|
| 545 | case "b2": $this->out .= "²"; break;
|
---|
| 546 | case "b3": $this->out .= "³"; break;
|
---|
| 547 | }
|
---|
| 548 | }
|
---|
| 549 | }
|
---|
| 550 | }
|
---|
| 551 |
|
---|
| 552 | /*
|
---|
| 553 | Output errors at end
|
---|
| 554 | */
|
---|
| 555 | function flushErrors() {
|
---|
| 556 | if(count($this->err) > 0) {
|
---|
| 557 | if($this->wantXML) {
|
---|
| 558 | $this->out .= "<errors>";
|
---|
| 559 | while(list($num,$value) = each($this->err)) {
|
---|
| 560 | $this->out .= "<message>".$value."</message>";
|
---|
| 561 | }
|
---|
| 562 | $this->out .= "</errors>";
|
---|
| 563 | }
|
---|
| 564 | }
|
---|
| 565 | }
|
---|
| 566 |
|
---|
| 567 | function makeStyles() {
|
---|
| 568 | $this->outstyles = "<style type=\"text/css\"><!--\n";
|
---|
| 569 | reset($this->styles);
|
---|
| 570 | while(list($stylename, $styleattrib) = each($this->styles)) {
|
---|
| 571 | $this->outstyles .= ".".$stylename." { ".$styleattrib." }\n";
|
---|
| 572 | }
|
---|
| 573 | $this->outstyles .= "--></style>\n";
|
---|
| 574 | }
|
---|
| 575 |
|
---|
| 576 | function parse() {
|
---|
| 577 |
|
---|
| 578 | $this->parserInit();
|
---|
| 579 |
|
---|
| 580 | $i = 0;
|
---|
| 581 | $this->cw= false; // flag if control word is currently parsed
|
---|
| 582 | $this->cfirst = false; // first control character ?
|
---|
| 583 | $this->cword = ""; // last or current control word (depends on $this->cw
|
---|
| 584 |
|
---|
| 585 | $this->queue = ""; // plain text data found during parsing
|
---|
| 586 |
|
---|
| 587 | $this->flushHead();
|
---|
| 588 |
|
---|
| 589 | while($i < $this->rtf_len) {
|
---|
| 590 | switch($this->rtf[$i]) {
|
---|
| 591 | case "{":
|
---|
| 592 | if($this->cw) {
|
---|
| 593 | $this->flushControl();
|
---|
| 594 | $this->cw = false;
|
---|
| 595 | $this->cfirst = false;
|
---|
| 596 | } else
|
---|
| 597 | $this->flushQueue();
|
---|
| 598 |
|
---|
| 599 | $this->flushGroup("open");
|
---|
| 600 | break;
|
---|
| 601 | case "}":
|
---|
| 602 | if($this->cw) {
|
---|
| 603 | $this->flushControl();
|
---|
| 604 | $this->cw = false;
|
---|
| 605 | $this->cfirst = false;
|
---|
| 606 | } else
|
---|
| 607 | $this->flushQueue();
|
---|
| 608 |
|
---|
| 609 | $this->flushGroup("close");
|
---|
| 610 | break;
|
---|
| 611 | case "\\":
|
---|
| 612 | if($this->cfirst) { // catches '\\'
|
---|
| 613 | $this->queue .= "\\"; // replaced single quotes
|
---|
| 614 | $this->cfirst = false;
|
---|
| 615 | $this->cw = false;
|
---|
| 616 | break;
|
---|
| 617 | }
|
---|
| 618 | if($this->cw) {
|
---|
| 619 | $this->flushControl();
|
---|
| 620 | } else
|
---|
| 621 | $this->flushQueue();
|
---|
| 622 | $this->cw = true;
|
---|
| 623 | $this->cfirst = true;
|
---|
| 624 | $this->cword = "";
|
---|
| 625 | break;
|
---|
| 626 | default:
|
---|
| 627 | if((ord($this->rtf[$i]) == 10) || (ord($this->rtf[$i]) == 13)) break; // eat line breaks
|
---|
| 628 | if($this->cw) { // active control word ?
|
---|
| 629 | /*
|
---|
| 630 | Watch the RE: there's an optional space at the end which IS part of
|
---|
| 631 | the control word (but actually its ignored by flushControl)
|
---|
| 632 | */
|
---|
| 633 | if(ereg("^[a-zA-Z0-9-]?$", $this->rtf[$i])) { // continue parsing
|
---|
| 634 | $this->cword .= $this->rtf[$i];
|
---|
| 635 | $this->cfirst = false;
|
---|
| 636 | } else {
|
---|
| 637 | /*
|
---|
| 638 | Control word could be a 'control symbol', like \~ or \* etc.
|
---|
| 639 | */
|
---|
| 640 | $specialmatch = false;
|
---|
| 641 | if($this->cfirst) {
|
---|
| 642 | if($this->rtf[$i] == '\'') { // expect to get some special chars
|
---|
| 643 | $this->flushQueue();
|
---|
| 644 | $this->flushSpecial($this->rtf[$i+1].$this->rtf[$i+2]);
|
---|
| 645 | $i+=2;
|
---|
| 646 | $specialmatch = true;
|
---|
| 647 | $this->cw = false;
|
---|
| 648 | $this->cfirst = false;
|
---|
| 649 | $this->cword = "";
|
---|
| 650 | } else
|
---|
| 651 | if(ereg("^[{}\*]$", $this->rtf[$i])) {
|
---|
| 652 | $this->flushComment("control symbols not yet handled");
|
---|
| 653 | $specialmatch = true;
|
---|
| 654 | }
|
---|
| 655 | $this->cfirst = false;
|
---|
| 656 | } else {
|
---|
| 657 | if($this->rtf[$i] == ' ') { // space delimtes control words, so just discard it and flush the controlword
|
---|
| 658 | $this->cw = false;
|
---|
| 659 | $this->flushControl();
|
---|
| 660 | break;
|
---|
| 661 | }
|
---|
| 662 | }
|
---|
| 663 | if(!$specialmatch) {
|
---|
| 664 | $this->flushControl();
|
---|
| 665 | $this->cw = false;
|
---|
| 666 | $this->cfirst = false;
|
---|
| 667 | /*
|
---|
| 668 | The current character is a delimeter, but is NOT
|
---|
| 669 | part of the control word so we hop one step back
|
---|
| 670 | in the stream and process it again
|
---|
| 671 | */
|
---|
| 672 | $i--;
|
---|
| 673 | }
|
---|
| 674 | }
|
---|
| 675 | } else {
|
---|
| 676 | // < and > need translation before putting into queue when XML or HTML is wanted
|
---|
| 677 | if(($this->wantHTML) || ($this->wantXML)) {
|
---|
| 678 | switch($this->rtf[$i]) {
|
---|
| 679 | case "<":
|
---|
| 680 | $this->queue .= "<";
|
---|
| 681 | break;
|
---|
| 682 | case ">":
|
---|
| 683 | $this->queue .= ">";
|
---|
| 684 | break;
|
---|
| 685 | default:
|
---|
| 686 | $this->queue .= $this->rtf[$i];
|
---|
| 687 | break;
|
---|
| 688 | }
|
---|
| 689 | } else
|
---|
| 690 | $this->queue .= $this->rtf[$i];
|
---|
| 691 | }
|
---|
| 692 |
|
---|
| 693 | }
|
---|
| 694 | $i++;
|
---|
| 695 | }
|
---|
| 696 | $this->flushQueue();
|
---|
| 697 | $this->flushErrors();
|
---|
| 698 | $this->flushBottom();
|
---|
| 699 |
|
---|
| 700 | if($this->wantHTML) {
|
---|
| 701 | $this->makeStyles();
|
---|
| 702 | }
|
---|
| 703 | }
|
---|
| 704 | }
|
---|
| 705 |
|
---|
| 706 |
|
---|
| 707 | ?>
|
---|