3 /** @class: InputFilter (PHP4 & PHP5, with comments)
4 * @project: PHP Input Filter
6 * @version: 1.2.2_php4/php5
7 * @author: Daniel Morris
8 * @contributors: Gianpaolo Racca, Ghislain Picard, Marco Wandschneider, Chris Tobin and Andrew Eddie.
9 * @copyright: Daniel Morris
10 * @email: dan@rootcube.com
11 * @license: GNU General Public License (GPL)
14 var $tagsArray; // default = empty array
15 var $attrArray; // default = empty array
17 var $tagsMethod; // default = 0
18 var $attrMethod; // default = 0
20 var $xssAuto; // default = 1
21 var $tagBlacklist = array('applet', 'body', 'bgsound', 'base', 'basefont', 'embed', 'frame', 'frameset', 'head', 'html', 'id', 'iframe', 'ilayer', 'layer', 'link', 'meta', 'name', 'object', 'script', 'style', 'title', 'xml');
22 var $attrBlacklist = array('action', 'background', 'codebase', 'dynsrc', 'lowsrc'); // also will strip ALL event handlers
25 * Constructor for inputFilter class. Only first parameter is required.
27 * @param Array $tagsArray - list of user-defined tags
28 * @param Array $attrArray - list of user-defined attributes
29 * @param int $tagsMethod - 0= allow just user-defined, 1= allow all but user-defined
30 * @param int $attrMethod - 0= allow just user-defined, 1= allow all but user-defined
31 * @param int $xssAuto - 0= only auto clean essentials, 1= allow clean blacklisted tags/attr
33 function inputFilter($tagsArray = array(), $attrArray = array(), $tagsMethod = 0, $attrMethod = 0, $xssAuto = 1) {
34 // make sure user defined arrays are in lowercase
35 for ($i = 0; $i < count($tagsArray); $i++) $tagsArray[$i] = strtolower($tagsArray[$i]);
36 for ($i = 0; $i < count($attrArray); $i++) $attrArray[$i] = strtolower($attrArray[$i]);
37 // assign to member vars
38 $this->tagsArray = (array) $tagsArray;
39 $this->attrArray = (array) $attrArray;
40 $this->tagsMethod = $tagsMethod;
41 $this->attrMethod = $attrMethod;
42 $this->xssAuto = $xssAuto;
46 * Method to be called by another php script. Processes for XSS and specified bad code.
48 * @param Mixed $source - input string/array-of-string to be 'cleaned'
49 * @return String $source - 'cleaned' version of input parameter
51 function process($source) {
52 // clean all elements in this array
53 if (is_array($source)) {
54 foreach($source as $key => $value)
55 // filter element for XSS and other 'bad' code etc.
56 if (is_string($value)) $source[$key] = $this->remove($this->decode($value));
59 } else if (is_string($source)) {
60 // filter source for XSS and other 'bad' code etc.
61 return $this->remove($this->decode($source));
62 // return parameter as given
63 } else return $source;
67 * Internal method to iteratively remove all unwanted tags and attributes
69 * @param String $source - input string to be 'cleaned'
70 * @return String $source - 'cleaned' version of input parameter
72 function remove($source) {
74 // provides nested-tag protection
75 while($source != $this->filterTags($source)) {
76 $source = $this->filterTags($source);
83 * Internal method to strip a string of certain tags
85 * @param String $source - input string to be 'cleaned'
86 * @return String $source - 'cleaned' version of input parameter
88 function filterTags($source) {
92 // find initial tag's position
93 $tagOpen_start = strpos($source, '<');
94 // interate through string until no tags left
95 while($tagOpen_start !== FALSE) {
96 // process tag interatively
97 $preTag .= substr($postTag, 0, $tagOpen_start);
98 $postTag = substr($postTag, $tagOpen_start);
99 $fromTagOpen = substr($postTag, 1);
101 $tagOpen_end = strpos($fromTagOpen, '>');
102 if ($tagOpen_end === false) break;
103 // next start of tag (for nested tag assessment)
104 $tagOpen_nested = strpos($fromTagOpen, '<');
105 if (($tagOpen_nested !== false) && ($tagOpen_nested < $tagOpen_end)) {
106 $preTag .= substr($postTag, 0, ($tagOpen_nested+1));
107 $postTag = substr($postTag, ($tagOpen_nested+1));
108 $tagOpen_start = strpos($postTag, '<');
111 $tagOpen_nested = (strpos($fromTagOpen, '<') + $tagOpen_start + 1);
112 $currentTag = substr($fromTagOpen, 0, $tagOpen_end);
113 $tagLength = strlen($currentTag);
116 $tagOpen_start = strpos($postTag, '<');
118 // iterate through tag finding attribute pairs - setup
119 $tagLeft = $currentTag;
121 $currentSpace = strpos($tagLeft, ' ');
123 if (substr($currentTag, 0, 1) == "/") {
125 list($tagName) = explode(' ', $currentTag);
126 $tagName = substr($tagName, 1);
130 list($tagName) = explode(' ', $currentTag);
132 // excludes all "non-regular" tagnames OR no tagname OR remove if xssauto is on and tag is blacklisted
133 if ((!preg_match("/^[a-z][a-z0-9]*$/i",$tagName)) || (!$tagName) || ((in_array(strtolower($tagName), $this->tagBlacklist)) && ($this->xssAuto))) {
134 $postTag = substr($postTag, ($tagLength + 2));
135 $tagOpen_start = strpos($postTag, '<');
136 // don't append this tag
139 // this while is needed to support attribute values with spaces in!
140 while ($currentSpace !== FALSE) {
141 $fromSpace = substr($tagLeft, ($currentSpace+1));
142 $nextSpace = strpos($fromSpace, ' ');
143 $openQuotes = strpos($fromSpace, '"');
144 $closeQuotes = strpos(substr($fromSpace, ($openQuotes+1)), '"') + $openQuotes + 1;
145 // another equals exists
146 if (strpos($fromSpace, '=') !== FALSE) {
147 // opening and closing quotes exists
148 if (($openQuotes !== FALSE) && (strpos(substr($fromSpace, ($openQuotes+1)), '"') !== FALSE))
149 $attr = substr($fromSpace, 0, ($closeQuotes+1));
150 // one or neither exist
151 else $attr = substr($fromSpace, 0, $nextSpace);
152 // no more equals exist
153 } else $attr = substr($fromSpace, 0, $nextSpace);
155 if (!$attr) $attr = $fromSpace;
156 // add to attribute pairs array
159 $tagLeft = substr($fromSpace, strlen($attr));
160 $currentSpace = strpos($tagLeft, ' ');
162 // appears in array specified by user
163 $tagFound = in_array(strtolower($tagName), $this->tagsArray);
164 // remove this tag on condition
165 if ((!$tagFound && $this->tagsMethod) || ($tagFound && !$this->tagsMethod)) {
166 // reconstruct tag with allowed attributes
168 $attrSet = $this->filterAttr($attrSet);
169 $preTag .= '<' . $tagName;
170 for ($i = 0; $i < count($attrSet); $i++)
171 $preTag .= ' ' . $attrSet[$i];
172 // reformat single tags to XHTML
173 if (strpos($fromTagOpen, "</" . $tagName)) $preTag .= '>';
174 else $preTag .= ' />';
176 } else $preTag .= '</' . $tagName . '>';
178 // find next tag's start
179 $postTag = substr($postTag, ($tagLength + 2));
180 $tagOpen_start = strpos($postTag, '<');
182 // append any code after end of tags
188 * Internal method to strip a tag of certain attributes
190 * @param Array $attrSet
191 * @return Array $newSet
193 function filterAttr($attrSet) {
195 // process attributes
196 for ($i = 0; $i <count($attrSet); $i++) {
197 // skip blank spaces in tag
198 if (!$attrSet[$i]) continue;
199 // split into attr name and value
200 $attrSubSet = explode('=', trim($attrSet[$i]));
201 list($attrSubSet[0]) = explode(' ', $attrSubSet[0]);
202 // removes all "non-regular" attr names AND also attr blacklisted
203 if ((!eregi("^[a-z]*$",$attrSubSet[0])) || (($this->xssAuto) && ((in_array(strtolower($attrSubSet[0]), $this->attrBlacklist)) || (substr($attrSubSet[0], 0, 2) == 'on'))))
205 // xss attr value filtering
206 if ($attrSubSet[1]) {
207 // strips unicode, hex, etc
208 $attrSubSet[1] = str_replace('&#', '', $attrSubSet[1]);
209 // strip normal newline within attr value
210 $attrSubSet[1] = preg_replace('/\s+/', '', $attrSubSet[1]);
211 // strip double quotes
212 $attrSubSet[1] = str_replace('"', '', $attrSubSet[1]);
213 // [requested feature] convert single quotes from either side to doubles (Single quotes shouldn't be used to pad attr value)
214 if ((substr($attrSubSet[1], 0, 1) == "'") && (substr($attrSubSet[1], (strlen($attrSubSet[1]) - 1), 1) == "'"))
215 $attrSubSet[1] = substr($attrSubSet[1], 1, (strlen($attrSubSet[1]) - 2));
217 $attrSubSet[1] = stripslashes($attrSubSet[1]);
219 // auto strip attr's with "javascript:
220 if ( ((strpos(strtolower($attrSubSet[1]), 'expression') !== false) && (strtolower($attrSubSet[0]) == 'style')) ||
221 (strpos(strtolower($attrSubSet[1]), 'javascript:') !== false) ||
222 (strpos(strtolower($attrSubSet[1]), 'behaviour:') !== false) ||
223 (strpos(strtolower($attrSubSet[1]), 'vbscript:') !== false) ||
224 (strpos(strtolower($attrSubSet[1]), 'mocha:') !== false) ||
225 (strpos(strtolower($attrSubSet[1]), 'livescript:') !== false)
228 // if matches user defined array
229 $attrFound = in_array(strtolower($attrSubSet[0]), $this->attrArray);
230 // keep this attr on condition
231 if ((!$attrFound && $this->attrMethod) || ($attrFound && !$this->attrMethod)) {
233 if ($attrSubSet[1]) $newSet[] = $attrSubSet[0] . '="' . $attrSubSet[1] . '"';
234 // attr has decimal zero as value
235 else if ($attrSubSet[1] == "0") $newSet[] = $attrSubSet[0] . '="0"';
236 // reformat single attributes to XHTML
237 else $newSet[] = $attrSubSet[0] . '="' . $attrSubSet[0] . '"';
244 * Try to convert to plaintext
246 * @param String $source
247 * @return String $source
249 function decode($source) {
251 // $source = html_entity_decode($source, ENT_QUOTES, "ISO-8859-1");
252 $source = html_entity_decode($source, ENT_QUOTES, "UTF-8");
254 $source = preg_replace('/&#(\d+);/me',"chr(\\1)", $source); // decimal notation
256 $source = preg_replace('/&#x([a-f0-9]+);/mei',"chr(0x\\1)", $source); // hex notation
261 * Method to be called by another php script. Processes for SQL injection
263 * @param Mixed $source - input string/array-of-string to be 'cleaned'
264 * @param Buffer $connection - An open MySQL connection
265 * @return String $source - 'cleaned' version of input parameter
267 function safeSQL($source, &$connection) {
268 // clean all elements in this array
269 if (is_array($source)) {
270 foreach($source as $key => $value)
271 // filter element for SQL injection
272 if (is_string($value)) $source[$key] = $this->quoteSmart($this->decode($value), $connection);
275 } else if (is_string($source)) {
276 // filter source for SQL injection
277 if (is_string($source)) return $this->quoteSmart($this->decode($source), $connection);
278 // return parameter as given
279 } else return $source;
283 * @author Chris Tobin
284 * @author Daniel Morris
286 * @param String $source
287 * @param Resource $connection - An open MySQL connection
288 * @return String $source
290 function quoteSmart($source, &$connection) {
292 if (get_magic_quotes_gpc()) $source = stripslashes($source);
293 // quote both numeric and text
294 $source = $this->escapeString($source, $connection);
299 * @author Chris Tobin
300 * @author Daniel Morris
302 * @param String $source
303 * @param Resource $connection - An open MySQL connection
304 * @return String $source
306 function escapeString($string, &$connection) {
307 // depreciated function
308 if (version_compare(phpversion(),"4.3.0", "<")) mysql_escape_string($string);
310 else mysql_real_escape_string($string);