* * Copyright (C) 2016, Kolab Systems AG * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ class html_converter extends rcube_plugin { private static $replaces = array( "
" => "
&»&
", "
" => "
&«&
", "\x02\x03" => "***^^^SIG^^^***", ); /** * Plugin initialization. */ function init() { // register hook to convert HTML to Text $this->add_hook('html2text', array($this, 'html2text')); } /** * Hook to convert HTML to Text. * Arguments: body, width, links, charset */ public function html2text($p) { // prepare HTML content for conversion $html = $this->prefilter($p['body']); // convert HTML to text $result = $this->convert($html, $p); // on success skip default rcube_html2text conversion if ($result !== false) { $result = $this->postfilter($result); $p['body'] = $result; $p['abort'] = true; } return $p; } /** * Html to text converter */ private function convert($html, $p) { if (empty($html)) { return false; } $rcmail = rcmail::get_instance(); $temp_dir = $rcmail->config->get('temp_dir'); $tmpfname = tempnam($temp_dir, 'rcmHtml'); // write HTML to temp file if (!file_put_contents($tmpfname, $html)) { return false; } $args = array( '{path}' => $tmpfname, '{width}' => (int) $p['width'], '{charset}' => $p['charset'], '{links}' => $p['links'] ? 1 : 0, ); /* $command = 'links -force-html -no-connect -no-g -codepage {charset}' . ' -aggressive-cache 0 -html-margin 0 -html-numbered-links {links}' . ' -width {width} -dump {path}'; */ $command = 'lynx -force_html -noreferer -nomargins -dont_wrap_pre' . ' -nolist -display_charset={charset} -width={width} -dump {path}'; $command = str_replace(array_keys($args), array_values($args), $command); if ($p['links']) { $command = str_replace(' -nolist', '', $command); } // convert HTML to text ob_start(); passthru($command, $status); $result = ob_get_contents(); ob_end_clean(); // remove temp file unlink($tmpfname); if ($status) { rcube::raise_error(array( 'line' => __LINE__, 'file' => __FILE__, 'message' => "Failed executing: $command (code: $status)" ), true, false); return false; } return $result; } /** * HTML content preparation for conversion. */ private function prefilter($html) { // blockquotes are ignored by links, so we replace them // with special code that will be handled later in postfilter // the same for special signature-replacement sequence // which is used in compose editor $html = str_ireplace(array_keys(self::$replaces), array_values(self::$replaces), $html); return $html; } /** * Post-filtering on plain text content. */ function postfilter($text) { $replaces = self::$replaces; unset($replaces['
']); unset($replaces['
']); // replace special sequences $text = str_replace(array_values($replaces), array_keys($replaces), $text); // blockquotes handling after conversion $start = str_replace('
', '', self::$replaces['
']); $end = str_replace('
', '', self::$replaces['
']); $start = html_entity_decode($start, ENT_COMPAT, 'UTF-8'); $end = html_entity_decode($end, ENT_COMPAT, 'UTF-8'); if (strpos($text, $start) !== false) { $last = false; $level = 0; $result = explode("\n", $text); foreach ($result as $idx => $line) { if ($line === $start) { $level++; $last = true; unset($result[$idx]); } else if ($line === $end) { $level--; $last = true; unset($result[$idx]); } else if ($last && !strlen($line)) { unset($result[$idx]); $last = false; } else if ($level) { $len = strlen($line); if (!$len && isset($result[$idx+1]) && ($result[$idx+1] === $end || $result[$idx+1] === $start) ) { unset($result[$idx]); } else { $result[$idx] = str_repeat('>', $level) . ($len ? ' ' . $line : ''); } } } $text = implode("\n", $result); } return $text; } }