<?php /* Question2Answer by Gideon Greenspan and contributors http://www.question2answer.org/ File: qa-include/qa-viewer-basic.php Description: Basic viewer module for displaying HTML or plain text This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. More about this license: http://www.question2answer.org/license.php */ class qa_viewer_basic { private $htmllineseparators; private $htmlparagraphseparators; public function load_module($localdir, $htmldir) { $this->htmllineseparators='br|option'; $this->htmlparagraphseparators='address|applet|blockquote|center|cite|col|div|dd|dl|dt|embed|form|frame|frameset|h1|h2|h3|h4|h5|h6'. '|hr|iframe|input|li|marquee|ol|p|pre|samp|select|spacer|table|tbody|td|textarea|tfoot|th|thead|tr|ul'; } public function calc_quality($content, $format) { if ( ($format=='') || ($format=='html') ) return 1.0; return 0.0001; // if there's nothing better this will give an error message for unknown formats } public function get_html($content, $format, $options) { if ($format=='html') { $html=qa_sanitize_html($content, @$options['linksnewwindow'], false); // sanitize again for display, for extra safety, and due to new window setting if (isset($options['blockwordspreg'])) { // filtering out blocked words inline within HTML is pretty complex, e.g. p<b>oo</b>p must be caught require_once QA_INCLUDE_DIR.'util/string.php'; $html=preg_replace('/<\s*('.$this->htmllineseparators.')[^A-Za-z0-9]/i', "\n\\0", $html); // tags to single new line $html=preg_replace('/<\s*('.$this->htmlparagraphseparators.')[^A-Za-z0-9]/i', "\n\n\\0", $html); // tags to double new line preg_match_all('/<[^>]*>/', $html, $pregmatches, PREG_OFFSET_CAPTURE); // find tag positions and lengths $tagmatches=$pregmatches[0]; $text=preg_replace('/<[^>]*>/', '', $html); // effectively strip_tags() but use same regexp as above to ensure consistency $blockmatches=qa_block_words_match_all($text, $options['blockwordspreg']); // search for blocked words within text $nexttagmatch=array_shift($tagmatches); $texttohtml=0; $htmlshift=0; foreach ($blockmatches as $textoffset => $textlength) { while ( isset($nexttagmatch) && ($nexttagmatch[1]<=($textoffset+$texttohtml)) ) { // keep text and html in sync $texttohtml+=strlen($nexttagmatch[0]); $nexttagmatch=array_shift($tagmatches); } while (1) { $replacepart=$textlength; if (isset($nexttagmatch)) $replacepart=min($replacepart, $nexttagmatch[1]-($textoffset+$texttohtml)); // stop replacing early if we hit an HTML tag $replacelength=qa_strlen(substr($text, $textoffset, $replacepart)); // to work with multi-byte characters $html=substr_replace($html, str_repeat('*', $replacelength), $textoffset+$texttohtml+$htmlshift, $replacepart); $htmlshift+=$replacelength-$replacepart; // HTML might have moved around if we replaced multi-byte characters if ($replacepart>=$textlength) break; // we have replaced everything expected, otherwise more left (due to hitting an HTML tag) $textlength-=$replacepart; $textoffset+=$replacepart; $texttohtml+=strlen($nexttagmatch[0]); $nexttagmatch=array_shift($tagmatches); } } } if (@$options['showurllinks']) { // we need to ensure here that we don't put new links inside existing ones require_once QA_INCLUDE_DIR.'util/string.php'; $htmlunlinkeds=array_reverse(preg_split('|<[Aa]\s+[^>]+>.*</[Aa]\s*>|', $html, -1, PREG_SPLIT_OFFSET_CAPTURE)); // start from end so we substitute correctly foreach ($htmlunlinkeds as $htmlunlinked) { // and that we don't detect links inside HTML, e.g. <img src="http://..."> $thishtmluntaggeds=array_reverse(preg_split('/<[^>]*>/', $htmlunlinked[0], -1, PREG_SPLIT_OFFSET_CAPTURE)); // again, start from end foreach ($thishtmluntaggeds as $thishtmluntagged) { $innerhtml=$thishtmluntagged[0]; if (is_numeric(strpos($innerhtml, '://'))) { // quick test first $newhtml=qa_html_convert_urls($innerhtml, qa_opt('links_in_new_window')); $html=substr_replace($html, $newhtml, $htmlunlinked[1]+$thishtmluntagged[1], strlen($innerhtml)); } } } } } elseif ($format=='') { if (isset($options['blockwordspreg'])) { require_once QA_INCLUDE_DIR.'util/string.php'; $content=qa_block_words_replace($content, $options['blockwordspreg']); } $html=qa_html($content, true); if (@$options['showurllinks']) { require_once QA_INCLUDE_DIR.'app/format.php'; $html=qa_html_convert_urls($html, qa_opt('links_in_new_window')); } } else $html='[no viewer found for format: '.qa_html($format).']'; // for unknown formats return $html; } public function get_text($content, $format, $options) { if ($format=='html') { $text=strtr($content, "\n\r\t", ' '); // convert all white space in HTML to spaces $text=preg_replace('/<\s*('.$this->htmllineseparators.')[^A-Za-z0-9]/i', "\n\\0", $text); // tags to single new line $text=preg_replace('/<\s*('.$this->htmlparagraphseparators.')[^A-Za-z0-9]/i', "\n\n\\0", $text); // tags to double new line $text=strip_tags($text); // all tags removed $text=preg_replace('/ +/', ' ', $text); // combine multiple spaces into one $text=preg_replace('/ *\n */', "\n", $text); // remove spaces either side new lines $text=preg_replace('/\n\n\n+/', "\n\n", $text); // more than two new lines combine into two $text=strtr($text, array( '"' => "\x22", '&' => "\x26", ''' => "\x27", '<' => "\x3C", '>' => "\x3E", ' ' => " ", '"' => "\x22", '&' => "\x26", '<' => "\x3C", '>' => "\x3E", )); // base HTML entities (others should not be stored in database) $text=trim($text); } elseif ($format=='') $text=$content; else $text='[no viewer found for format: '.$format.']'; // for unknown formats if (isset($options['blockwordspreg'])) { require_once QA_INCLUDE_DIR.'util/string.php'; $text=qa_block_words_replace($text, $options['blockwordspreg']); } return $text; } }