qa-viewer-basic.php 6.45 KB
Newer Older
Scott committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
<?php
/*
	Question2Answer by Gideon Greenspan and contributors
	http://www.question2answer.org/

	File: qa-include/qa-viewer-basic.php
	Description: Basic viewer module for displaying HTML or plain text


	This program is free software; you can redistribute it and/or
	modify it under the terms of the GNU General Public License
	as published by the Free Software Foundation; either version 2
	of the License, or (at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	More about this license: http://www.question2answer.org/license.php
*/

Scott committed
23 24 25 26
class qa_viewer_basic
{
	private $htmllineseparators;
	private $htmlparagraphseparators;
Scott committed
27 28


Scott committed
29 30 31 32 33 34
	public function load_module($localdir, $htmldir)
	{
		$this->htmllineseparators='br|option';
		$this->htmlparagraphseparators='address|applet|blockquote|center|cite|col|div|dd|dl|dt|embed|form|frame|frameset|h1|h2|h3|h4|h5|h6'.
			'|hr|iframe|input|li|marquee|ol|p|pre|samp|select|spacer|table|tbody|td|textarea|tfoot|th|thead|tr|ul';
	}
Scott committed
35

Scott committed
36 37 38 39
	public function calc_quality($content, $format)
	{
		if ( ($format=='') || ($format=='html') )
			return 1.0;
Scott committed
40

Scott committed
41 42
		return 0.0001; // if there's nothing better this will give an error message for unknown formats
	}
Scott committed
43

Scott committed
44 45 46 47
	public function get_html($content, $format, $options)
	{
		if ($format=='html') {
			$html=qa_sanitize_html($content, @$options['linksnewwindow'], false); // sanitize again for display, for extra safety, and due to new window setting
Scott committed
48

Scott committed
49
			if (isset($options['blockwordspreg'])) { // filtering out blocked words inline within HTML is pretty complex, e.g. p<b>oo</b>p must be caught
Scott committed
50
				require_once QA_INCLUDE_DIR.'util/string.php';
Scott committed
51

Scott committed
52 53
				$html=preg_replace('/<\s*('.$this->htmllineseparators.')[^A-Za-z0-9]/i', "\n\\0", $html); // tags to single new line
				$html=preg_replace('/<\s*('.$this->htmlparagraphseparators.')[^A-Za-z0-9]/i', "\n\n\\0", $html); // tags to double new line
Scott committed
54

Scott committed
55 56 57
				preg_match_all('/<[^>]*>/', $html, $pregmatches, PREG_OFFSET_CAPTURE); // find tag positions and lengths
				$tagmatches=$pregmatches[0];
				$text=preg_replace('/<[^>]*>/', '', $html); // effectively strip_tags() but use same regexp as above to ensure consistency
Scott committed
58

Scott committed
59
				$blockmatches=qa_block_words_match_all($text, $options['blockwordspreg']); // search for blocked words within text
Scott committed
60

Scott committed
61 62 63
				$nexttagmatch=array_shift($tagmatches);
				$texttohtml=0;
				$htmlshift=0;
Scott committed
64

Scott committed
65 66 67 68 69
				foreach ($blockmatches as $textoffset => $textlength) {
					while ( isset($nexttagmatch) && ($nexttagmatch[1]<=($textoffset+$texttohtml)) ) { // keep text and html in sync
						$texttohtml+=strlen($nexttagmatch[0]);
						$nexttagmatch=array_shift($tagmatches);
					}
Scott committed
70

Scott committed
71 72 73 74
					while (1) {
						$replacepart=$textlength;
						if (isset($nexttagmatch))
							$replacepart=min($replacepart, $nexttagmatch[1]-($textoffset+$texttohtml)); // stop replacing early if we hit an HTML tag
Scott committed
75

Scott committed
76
						$replacelength=qa_strlen(substr($text, $textoffset, $replacepart)); // to work with multi-byte characters
Scott committed
77

Scott committed
78 79
						$html=substr_replace($html, str_repeat('*', $replacelength), $textoffset+$texttohtml+$htmlshift, $replacepart);
						$htmlshift+=$replacelength-$replacepart; // HTML might have moved around if we replaced multi-byte characters
Scott committed
80

Scott committed
81 82
						if ($replacepart>=$textlength)
							break; // we have replaced everything expected, otherwise more left (due to hitting an HTML tag)
Scott committed
83

Scott committed
84 85 86 87
						$textlength-=$replacepart;
						$textoffset+=$replacepart;
						$texttohtml+=strlen($nexttagmatch[0]);
						$nexttagmatch=array_shift($tagmatches);
Scott committed
88 89
					}
				}
Scott committed
90
			}
Scott committed
91

Scott committed
92
			if (@$options['showurllinks']) { // we need to ensure here that we don't put new links inside existing ones
Scott committed
93
				require_once QA_INCLUDE_DIR.'util/string.php';
Scott committed
94

Scott committed
95
				$htmlunlinkeds=array_reverse(preg_split('|<[Aa]\s+[^>]+>.*</[Aa]\s*>|', $html, -1, PREG_SPLIT_OFFSET_CAPTURE)); // start from end so we substitute correctly
Scott committed
96

Scott committed
97 98
				foreach ($htmlunlinkeds as $htmlunlinked) { // and that we don't detect links inside HTML, e.g. <img src="http://...">
					$thishtmluntaggeds=array_reverse(preg_split('/<[^>]*>/', $htmlunlinked[0], -1, PREG_SPLIT_OFFSET_CAPTURE)); // again, start from end
Scott committed
99

Scott committed
100 101
					foreach ($thishtmluntaggeds as $thishtmluntagged) {
						$innerhtml=$thishtmluntagged[0];
Scott committed
102

Scott committed
103 104
						if (is_numeric(strpos($innerhtml, '://'))) { // quick test first
							$newhtml=qa_html_convert_urls($innerhtml, qa_opt('links_in_new_window'));
Scott committed
105

Scott committed
106
							$html=substr_replace($html, $newhtml, $htmlunlinked[1]+$thishtmluntagged[1], strlen($innerhtml));
Scott committed
107 108 109 110 111
						}
					}
				}
			}

Scott committed
112 113 114
		}
		elseif ($format=='') {
			if (isset($options['blockwordspreg'])) {
Scott committed
115
				require_once QA_INCLUDE_DIR.'util/string.php';
Scott committed
116 117
				$content=qa_block_words_replace($content, $options['blockwordspreg']);
			}
Scott committed
118

Scott committed
119
			$html=qa_html($content, true);
Scott committed
120

Scott committed
121 122 123
			if (@$options['showurllinks']) {
				require_once QA_INCLUDE_DIR.'app/format.php';
				$html=qa_html_convert_urls($html, qa_opt('links_in_new_window'));
Scott committed
124 125 126
			}

		}
Scott committed
127 128
		else
			$html='[no viewer found for format: '.qa_html($format).']'; // for unknown formats
Scott committed
129

Scott committed
130 131
		return $html;
	}
Scott committed
132

Scott committed
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
	public function get_text($content, $format, $options)
	{
		if ($format=='html') {
			$text=strtr($content, "\n\r\t", '   '); // convert all white space in HTML to spaces
			$text=preg_replace('/<\s*('.$this->htmllineseparators.')[^A-Za-z0-9]/i', "\n\\0", $text); // tags to single new line
			$text=preg_replace('/<\s*('.$this->htmlparagraphseparators.')[^A-Za-z0-9]/i', "\n\n\\0", $text); // tags to double new line
			$text=strip_tags($text); // all tags removed
			$text=preg_replace('/  +/', ' ', $text); // combine multiple spaces into one
			$text=preg_replace('/ *\n */', "\n", $text); // remove spaces either side new lines
			$text=preg_replace('/\n\n\n+/', "\n\n", $text); // more than two new lines combine into two
			$text=strtr($text, array(
				'&#34;' => "\x22",
				'&#38;' => "\x26",
				'&#39;' => "\x27",
				'&#60;' => "\x3C",
				'&#62;' => "\x3E",
				'&nbsp;' => " ",
				'&quot;' => "\x22",
				'&amp;' => "\x26",
				'&lt;' => "\x3C",
				'&gt;' => "\x3E",
			)); // base HTML entities (others should not be stored in database)

			$text=trim($text);
Scott committed
157

Scott committed
158 159 160 161 162 163 164
		}
		elseif ($format=='')
			$text=$content;
		else
			$text='[no viewer found for format: '.$format.']'; // for unknown formats

		if (isset($options['blockwordspreg'])) {
Scott committed
165
			require_once QA_INCLUDE_DIR.'util/string.php';
Scott committed
166
			$text=qa_block_words_replace($text, $options['blockwordspreg']);
Scott committed
167 168
		}

Scott committed
169
		return $text;
Scott committed
170
	}
Scott committed
171
}