qa-viewer-basic.php 6.56 KB
Newer Older
Scott committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
<?php
/*
	Question2Answer by Gideon Greenspan and contributors
	http://www.question2answer.org/

	Description: Basic viewer module for displaying HTML or plain text


	This program is free software; you can redistribute it and/or
	modify it under the terms of the GNU General Public License
	as published by the Free Software Foundation; either version 2
	of the License, or (at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	More about this license: http://www.question2answer.org/license.php
*/

Scott committed
22 23 24 25
class qa_viewer_basic
{
	private $htmllineseparators;
	private $htmlparagraphseparators;
Scott committed
26 27


Scott committed
28 29
	public function load_module($localdir, $htmldir)
	{
Scott committed
30 31
		$this->htmllineseparators = 'br|option';
		$this->htmlparagraphseparators = 'address|applet|blockquote|center|cite|col|div|dd|dl|dt|embed|form|frame|frameset|h1|h2|h3|h4|h5|h6' .
Scott committed
32 33
			'|hr|iframe|input|li|marquee|ol|p|pre|samp|select|spacer|table|tbody|td|textarea|tfoot|th|thead|tr|ul';
	}
Scott committed
34

Scott committed
35 36
	public function calc_quality($content, $format)
	{
Scott committed
37
		if ($format == '' || $format == 'html')
Scott committed
38
			return 1.0;
Scott committed
39

Scott committed
40 41
		return 0.0001; // if there's nothing better this will give an error message for unknown formats
	}
Scott committed
42

Scott committed
43 44
	public function get_html($content, $format, $options)
	{
Scott committed
45 46
		if ($format == 'html') {
			$html = qa_sanitize_html($content, @$options['linksnewwindow'], false); // sanitize again for display, for extra safety, and due to new window setting
Scott committed
47

Scott committed
48
			if (isset($options['blockwordspreg'])) { // filtering out blocked words inline within HTML is pretty complex, e.g. p<b>oo</b>p must be caught
Scott committed
49
				require_once QA_INCLUDE_DIR . 'util/string.php';
Scott committed
50

Scott committed
51 52
				$html = preg_replace('/<\s*(' . $this->htmllineseparators . ')[^A-Za-z0-9]/i', "\n\\0", $html); // tags to single new line
				$html = preg_replace('/<\s*(' . $this->htmlparagraphseparators . ')[^A-Za-z0-9]/i', "\n\n\\0", $html); // tags to double new line
Scott committed
53

Scott committed
54
				preg_match_all('/<[^>]*>/', $html, $pregmatches, PREG_OFFSET_CAPTURE); // find tag positions and lengths
Scott committed
55 56
				$tagmatches = $pregmatches[0];
				$text = preg_replace('/<[^>]*>/', '', $html); // effectively strip_tags() but use same regexp as above to ensure consistency
Scott committed
57

Scott committed
58
				$blockmatches = qa_block_words_match_all($text, $options['blockwordspreg']); // search for blocked words within text
Scott committed
59

Scott committed
60 61 62
				$nexttagmatch = array_shift($tagmatches);
				$texttohtml = 0;
				$htmlshift = 0;
Scott committed
63

Scott committed
64
				foreach ($blockmatches as $textoffset => $textlength) {
Scott committed
65 66 67
					while (isset($nexttagmatch) && ($nexttagmatch[1] <= ($textoffset + $texttohtml))) { // keep text and html in sync
						$texttohtml += strlen($nexttagmatch[0]);
						$nexttagmatch = array_shift($tagmatches);
Scott committed
68
					}
Scott committed
69

Scott committed
70
					while (1) {
Scott committed
71
						$replacepart = $textlength;
Scott committed
72
						if (isset($nexttagmatch))
Scott committed
73
							$replacepart = min($replacepart, $nexttagmatch[1] - ($textoffset + $texttohtml)); // stop replacing early if we hit an HTML tag
Scott committed
74

Scott committed
75
						$replacelength = qa_strlen(substr($text, $textoffset, $replacepart)); // to work with multi-byte characters
Scott committed
76

Scott committed
77 78
						$html = substr_replace($html, str_repeat('*', $replacelength), $textoffset + $texttohtml + $htmlshift, $replacepart);
						$htmlshift += $replacelength - $replacepart; // HTML might have moved around if we replaced multi-byte characters
Scott committed
79

Scott committed
80
						if ($replacepart >= $textlength)
Scott committed
81
							break; // we have replaced everything expected, otherwise more left (due to hitting an HTML tag)
Scott committed
82

Scott committed
83 84 85 86
						$textlength -= $replacepart;
						$textoffset += $replacepart;
						$texttohtml += strlen($nexttagmatch[0]);
						$nexttagmatch = array_shift($tagmatches);
Scott committed
87 88
					}
				}
Scott committed
89
			}
Scott committed
90

Scott committed
91
			if (@$options['showurllinks']) { // we need to ensure here that we don't put new links inside existing ones
Scott committed
92
				require_once QA_INCLUDE_DIR . 'util/string.php';
Scott committed
93

94
				$htmlunlinkeds = array_reverse(preg_split('#<(a|code|pre)[^>]*>.*</(a|code|pre)\s*>#ims', $html, -1, PREG_SPLIT_OFFSET_CAPTURE)); // start from end so we substitute correctly
Scott committed
95

Scott committed
96
				foreach ($htmlunlinkeds as $htmlunlinked) { // and that we don't detect links inside HTML, e.g. <img src="http://...">
Scott committed
97
					$thishtmluntaggeds = array_reverse(preg_split('/<[^>]*>/', $htmlunlinked[0], -1, PREG_SPLIT_OFFSET_CAPTURE)); // again, start from end
Scott committed
98

Scott committed
99
					foreach ($thishtmluntaggeds as $thishtmluntagged) {
Scott committed
100
						$innerhtml = $thishtmluntagged[0];
Scott committed
101

Scott committed
102
						if (is_numeric(strpos($innerhtml, '://'))) { // quick test first
Scott committed
103
							$newhtml = qa_html_convert_urls($innerhtml, qa_opt('links_in_new_window'));
Scott committed
104

Scott committed
105
							$html = substr_replace($html, $newhtml, $htmlunlinked[1] + $thishtmluntagged[1], strlen($innerhtml));
Scott committed
106 107 108 109 110
						}
					}
				}
			}

Scott committed
111
		} elseif ($format == '') {
Scott committed
112
			if (isset($options['blockwordspreg'])) {
Scott committed
113 114
				require_once QA_INCLUDE_DIR . 'util/string.php';
				$content = qa_block_words_replace($content, $options['blockwordspreg']);
Scott committed
115
			}
Scott committed
116

Scott committed
117
			$html = qa_html($content, true);
Scott committed
118

Scott committed
119
			if (@$options['showurllinks']) {
Scott committed
120 121
				require_once QA_INCLUDE_DIR . 'app/format.php';
				$html = qa_html_convert_urls($html, qa_opt('links_in_new_window'));
Scott committed
122 123
			}

Scott committed
124 125
		} else
			$html = '[no viewer found for format: ' . qa_html($format) . ']'; // for unknown formats
Scott committed
126

Scott committed
127 128
		return $html;
	}
Scott committed
129

Scott committed
130 131
	public function get_text($content, $format, $options)
	{
Scott committed
132 133 134 135 136 137 138 139 140
		if ($format == 'html') {
			$text = strtr($content, "\n\r\t", '   '); // convert all white space in HTML to spaces
			$text = preg_replace('/<\s*(' . $this->htmllineseparators . ')[^A-Za-z0-9]/i', "\n\\0", $text); // tags to single new line
			$text = preg_replace('/<\s*(' . $this->htmlparagraphseparators . ')[^A-Za-z0-9]/i', "\n\n\\0", $text); // tags to double new line
			$text = strip_tags($text); // all tags removed
			$text = preg_replace('/  +/', ' ', $text); // combine multiple spaces into one
			$text = preg_replace('/ *\n */', "\n", $text); // remove spaces either side new lines
			$text = preg_replace('/\n\n\n+/', "\n\n", $text); // more than two new lines combine into two
			$text = strtr($text, array(
Scott committed
141 142 143 144 145 146 147 148 149 150 151 152
				'&#34;' => "\x22",
				'&#38;' => "\x26",
				'&#39;' => "\x27",
				'&#60;' => "\x3C",
				'&#62;' => "\x3E",
				'&nbsp;' => " ",
				'&quot;' => "\x22",
				'&amp;' => "\x26",
				'&lt;' => "\x3C",
				'&gt;' => "\x3E",
			)); // base HTML entities (others should not be stored in database)

Scott committed
153
			$text = trim($text);
Scott committed
154

Scott committed
155 156
		} elseif ($format == '')
			$text = $content;
Scott committed
157
		else
Scott committed
158
			$text = '[no viewer found for format: ' . $format . ']'; // for unknown formats
Scott committed
159 160

		if (isset($options['blockwordspreg'])) {
Scott committed
161 162
			require_once QA_INCLUDE_DIR . 'util/string.php';
			$text = qa_block_words_replace($text, $options['blockwordspreg']);
Scott committed
163 164
		}

Scott committed
165
		return $text;
Scott committed
166
	}
Scott committed
167
}