Code coverage for /20081101/modules/filter/filter.module

Line #Times calledCode
1
<?php
2
// $Id: filter.module,v 1.231 2008/11/01 19:51:06 dries Exp $
3
4
/**
5
 * @file
6
 * Framework for handling filtering of content.
7
 */
8
9
/**
10
 * Special format ID which means "use the default format".
11
 *
12
 * This value can be passed to the filter APIs as a format ID: this is
13
 * equivalent to not passing an explicit format at all.
14
 */
152366
define('FILTER_FORMAT_DEFAULT', 0);
16
17
/**
18
 * Implementation of hook_help().
19
 */
202366
function filter_help($path, $arg) {
21
  switch ($path) {
221676
    case 'admin/help#filter':
232
      $output = '<p>' . t("The filter module allows administrators to
configure text input formats for use on your site. An input format defines
the HTML tags, codes, and other input allowed in both content and comments,
and is a key feature in guarding against potentially damaging input from
malicious users. Two input formats included by default are <em>Filtered
HTML</em> (which allows only an administrator-approved subset of HTML tags)
and <em>Full HTML</em> (which allows the full set of HTML tags). Additional
input formats may be created by an administrator.") . '</p>';
242
      $output .= '<p>' . t('Each input format uses filters to manipulate
text, and most input formats apply several different filters to text in a
specific order. Each filter is designed for a specific purpose, and
generally either adds, removes or transforms elements within user-entered
text before it is displayed. A filter does not change the actual content of
a post, but instead, modifies it temporarily before it is displayed. A
filter may remove unapproved HTML tags, for instance, while another
automatically adds HTML to make links referenced in text clickable.') .
'</p>';
252
      $output .= '<p>' . t('Users with access to more than one input format
can use the <em>Input format</em> fieldset to choose between available
input formats when creating or editing multi-line content. Administrators
determine the input formats available to each user role, select a default
input format, and control the order of formats listed in the <em>Input
format</em> fieldset.') . '</p>';
262
      $output .= '<p>' . t('For more information, see the online handbook
entry for <a href="@filter">Filter module</a>.', array('@filter' =>
'http://drupal.org/handbook/modules/filter/')) . '</p>';
272
      return $output;
281676
    case 'admin/settings/filters':
298
      $output = '<p>' . t('Use the list below to review the input formats
available to each user role, to select a default input format, and to
control the order of formats listed in the <em>Input format</em> fieldset.
(The <em>Input format</em> fieldset is displayed below textareas when users
with access to more than one input format create multi-line content.) The
input format selected as <em>Default</em> is available to all users and,
unless another format is selected, is applied to all content. All input
formats are available to users in roles with the "administer filters"
permission.') . '</p>';
308
      $output .= '<p>' . t('Since input formats, if available, are
presented in the same order as the list below, it may be helpful to arrange
the formats in descending order of your preference for their use. To change
the order of an input format, grab a drag-and-drop handle under the
<em>Name</em> column and drag to a new location in the list. (Grab a handle
by clicking and holding the mouse while hovering over a handle icon.)
Remember that your changes will not be saved until you click the <em>Save
changes</em> button at the bottom of the page.') . '</p>';
318
      return $output;
321676
    case 'admin/settings/filters/%':
335
      return '<p>' . t('Every <em>filter</em> performs one particular
change on the user input, for example stripping out malicious HTML or
making URLs clickable. Choose which filters you want to apply to text in
this input format. If you notice some filters are causing conflicts in the
output, you can <a href="@rearrange">rearrange them</a>.',
array('@rearrange' => url('admin/settings/filters/' . $arg[3] . '/order')))
. '</p>';
341676
    case 'admin/settings/filters/%/configure':
354
      return '<p>' . t('If you cannot find the settings for a certain
filter, make sure you have enabled it on the <a href="@url">edit tab</a>
first.', array('@url' => url('admin/settings/filters/' . $arg[3]))) .
'</p>';
361676
    case 'admin/settings/filters/%/order':
374
      $output = '<p>' . t('Because of the flexible filtering system, you
might encounter a situation where one filter prevents another from doing
its job. For example: a word in an URL gets converted into a glossary term,
before the URL can be converted to a clickable link. When this happens,
rearrange the order of the filters.') . '</p>';
384
      $output .= '<p>' . t("Filters are executed from top-to-bottom. To
change the order of the filters, modify the values in the <em>Weight</em>
column or grab a drag-and-drop handle under the <em>Name</em> column and
drag filters to new locations in the list. (Grab a handle by clicking and
holding the mouse while hovering over a handle icon.) Remember that your
changes will not be saved until you click the <em>Save configuration</em>
button at the bottom of the page.") . '</p>';
394
      return $output;
400
  }
411676
}
42
43
/**
44
 * Implementation of hook_theme().
45
 */
462366
function filter_theme() {
47
  return array(
48
    'filter_admin_overview' => array(
49178
      'arguments' => array('form' => NULL),
50178
      'file' => 'filter.admin.inc',
51178
    ),
52
    'filter_admin_order' => array(
53178
      'arguments' => array('form' => NULL),
54178
      'file' => 'filter.admin.inc',
55178
    ),
56
    'filter_tips' => array(
57178
      'arguments' => array('tips' => NULL, 'long' => FALSE, 'extra' => ''),
58178
      'file' => 'filter.pages.inc',
59178
    ),
60
    'filter_tips_more_info' => array(
61178
      'arguments' => array(),
62178
    ),
63178
  );
640
}
65
66
/**
67
 * Implementation of hook_menu().
68
 */
692366
function filter_menu() {
70161
  $items['admin/settings/filters'] = array(
71161
    'title' => 'Input formats',
72161
    'description' => 'Configure how content input by users is filtered,
including allowed HTML tags. Also allows enabling of module-provided
filters.',
73161
    'page callback' => 'drupal_get_form',
74161
    'page arguments' => array('filter_admin_overview'),
75161
    'access arguments' => array('administer filters'),
76
  );
77161
  $items['admin/settings/filters/list'] = array(
78161
    'title' => 'List',
79161
    'type' => MENU_DEFAULT_LOCAL_TASK,
80
  );
81161
  $items['admin/settings/filters/add'] = array(
82161
    'title' => 'Add input format',
83161
    'page callback' => 'filter_admin_format_page',
84161
    'access arguments' => array('administer filters'),
85161
    'type' => MENU_LOCAL_TASK,
86161
    'weight' => 1,
87
  );
88161
  $items['admin/settings/filters/delete'] = array(
89161
    'title' => 'Delete input format',
90161
    'page callback' => 'drupal_get_form',
91161
    'page arguments' => array('filter_admin_delete'),
92161
    'access arguments' => array('administer filters'),
93161
    'type' => MENU_CALLBACK,
94
  );
95161
  $items['filter/tips'] = array(
96161
    'title' => 'Compose tips',
97161
    'page callback' => 'filter_tips_long',
98161
    'access callback' => TRUE,
99161
    'type' => MENU_SUGGESTED_ITEM,
100
  );
101161
  $items['admin/settings/filters/%filter_format'] = array(
102161
    'type' => MENU_CALLBACK,
103161
    'title callback' => 'filter_admin_format_title',
104161
    'title arguments' => array(3),
105161
    'page callback' => 'filter_admin_format_page',
106161
    'page arguments' => array(3),
107161
    'access arguments' => array('administer filters'),
108
  );
109161
  $items['admin/settings/filters/%filter_format/edit'] = array(
110161
    'title' => 'Edit',
111161
    'type' => MENU_DEFAULT_LOCAL_TASK,
112161
    'weight' => 0,
113
  );
114161
  $items['admin/settings/filters/%filter_format/configure'] = array(
115161
    'title' => 'Configure',
116161
    'page callback' => 'filter_admin_configure_page',
117161
    'page arguments' => array(3),
118161
    'access arguments' => array('administer filters'),
119161
    'type' => MENU_LOCAL_TASK,
120161
    'weight' => 1,
121
  );
122161
  $items['admin/settings/filters/%filter_format/order'] = array(
123161
    'title' => 'Rearrange',
124161
    'page callback' => 'filter_admin_order_page',
125161
    'page arguments' => array(3),
126161
    'access arguments' => array('administer filters'),
127161
    'type' => MENU_LOCAL_TASK,
128161
    'weight' => 2,
129
  );
130161
  return $items;
1310
}
132
1332366
function filter_format_load($arg) {
13420
  return filter_formats($arg);
1350
}
136
137
/**
138
 * Display a filter format form title.
139
 */
1402366
function filter_admin_format_title($format) {
14116
  return $format->name;
1420
}
143
144
/**
145
 * Implementation of hook_perm().
146
 */
1472366
function filter_perm() {
148
  return array(
149
    'administer filters' => array(
150172
      'title' => t('Administer filters'),
151172
      'description' => t('Manage input formats and filters, and select
which roles may use them. %warning', array('%warning' => t('Warning: Give
to trusted roles only; this permission has security implications.'))),
152172
    ),
153172
  );
1540
}
155
156
/**
157
 * Implementation of hook_cron().
158
 *
159
 * Expire outdated filter cache entries
160
 */
1612366
function filter_cron() {
1622
  cache_clear_all(NULL, 'cache_filter');
1632
}
164
165
/**
166
 * Implementation of hook_filter_tips().
167
 */
1682366
function filter_filter_tips($delta, $format, $long = FALSE) {
169203
  global $base_url;
170
  switch ($delta) {
171203
    case 0:
172198
      if ($allowed_html = variable_get("allowed_html_$format", '<a> <em>
<strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd>')) {
173
        switch ($long) {
174198
          case 0:
175198
            return t('Allowed HTML tags: @tags', array('@tags' =>
$allowed_html));
1760
          case 1:
1770
            $output = '<p>' . t('Allowed HTML tags: @tags', array('@tags'
=> $allowed_html)) . '</p>';
1780
            if (!variable_get("filter_html_help_$format", 1)) {
1790
              return $output;
1800
            }
181
1820
            $output .= '<p>' . t('This site allows HTML content. While
learning all of HTML may feel intimidating, learning how to use a very
small number of the most basic HTML "tags" is very easy. This table
provides examples for each tag that is enabled on this site.') . '</p>';
1830
            $output .= '<p>' . t('For more information see W3C\'s <a
href="@html-specifications">HTML Specifications</a> or use your favorite
search engine to find other sites that explain HTML.',
array('@html-specifications' => 'http://www.w3.org/TR/html/')) . '</p>';
184
            $tips = array(
1850
              'a' => array( t('Anchors are used to make links to other
pages.'), '<a href="' . $base_url . '">' . variable_get('site_name',
'Drupal') . '</a>'),
1860
              'br' => array( t('By default line break tags are
automatically added, so use this tag to add additional ones. Use of this
tag is different because it is not used with an open/close pair like all
the others. Use the extra " /" inside the tag to maintain XHTML 1.0
compatibility'), t('Text with <br />line break')),
1870
              'p' => array( t('By default paragraph tags are automatically
added, so use this tag to add additional ones.'), '<p>' . t('Paragraph
one.') . '</p> <p>' . t('Paragraph two.') . '</p>'),
1880
              'strong' => array( t('Strong'), '<strong>' . t('Strong') .
'</strong>'),
1890
              'em' => array( t('Emphasized'), '<em>' . t('Emphasized') .
'</em>'),
1900
              'cite' => array( t('Cited'), '<cite>' . t('Cited') .
'</cite>'),
1910
              'code' => array( t('Coded text used to show programming
source code'), '<code>' . t('Coded') . '</code>'),
1920
              'b' => array( t('Bolded'), '<b>' . t('Bolded') . '</b>'),
1930
              'u' => array( t('Underlined'), '<u>' . t('Underlined') .
'</u>'),
1940
              'i' => array( t('Italicized'), '<i>' . t('Italicized') .
'</i>'),
1950
              'sup' => array( t('Superscripted'),
t('<sup>Super</sup>scripted')),
1960
              'sub' => array( t('Subscripted'),
t('<sub>Sub</sub>scripted')),
1970
              'pre' => array( t('Preformatted'), '<pre>' .
t('Preformatted') . '</pre>'),
1980
              'abbr' => array( t('Abbreviation'), t('<abbr
title="Abbreviation">Abbrev.</abbr>')),
1990
              'acronym' => array( t('Acronym'), t('<acronym
title="Three-Letter Acronym">TLA</acronym>')),
2000
              'blockquote' => array( t('Block quoted'), '<blockquote>' .
t('Block quoted') . '</blockquote>'),
2010
              'q' => array( t('Quoted inline'), '<q>' . t('Quoted inline')
. '</q>'),
202
              // Assumes and describes tr, td, th.
2030
              'table' => array( t('Table'), '<table> <tr><th>' . t('Table
header') . '</th></tr> <tr><td>' . t('Table cell') . '</td></tr>
</table>'),
2040
              'tr' => NULL, 'td' => NULL, 'th' => NULL,
2050
              'del' => array( t('Deleted'), '<del>' . t('Deleted') .
'</del>'),
2060
              'ins' => array( t('Inserted'), '<ins>' . t('Inserted') .
'</ins>'),
207
               // Assumes and describes li.
2080
              'ol' => array( t('Ordered list - use the &lt;li&gt; to begin
each list item'), '<ol> <li>' . t('First item') . '</li> <li>' . t('Second
item') . '</li> </ol>'),
2090
              'ul' => array( t('Unordered list - use the &lt;li&gt; to
begin each list item'), '<ul> <li>' . t('First item') . '</li> <li>' .
t('Second item') . '</li> </ul>'),
2100
              'li' => NULL,
211
              // Assumes and describes dt and dd.
2120
              'dl' => array( t('Definition lists are similar to other HTML
lists. &lt;dl&gt; begins the definition list, &lt;dt&gt; begins the
definition term and &lt;dd&gt; begins the definition description.'), '<dl>
<dt>' . t('First term') . '</dt> <dd>' . t('First definition') . '</dd>
<dt>' . t('Second term') . '</dt> <dd>' . t('Second definition') . '</dd>
</dl>'),
2130
              'dt' => NULL, 'dd' => NULL,
2140
              'h1' => array( t('Heading'), '<h1>' . t('Title') . '</h1>'),
2150
              'h2' => array( t('Heading'), '<h2>' . t('Subtitle') .
'</h2>'),
2160
              'h3' => array( t('Heading'), '<h3>' . t('Subtitle three') .
'</h3>'),
2170
              'h4' => array( t('Heading'), '<h4>' . t('Subtitle four') .
'</h4>'),
2180
              'h5' => array( t('Heading'), '<h5>' . t('Subtitle five') .
'</h5>'),
2190
              'h6' => array( t('Heading'), '<h6>' . t('Subtitle six') .
'</h6>')
2200
            );
2210
            $header = array(t('Tag Description'), t('You Type'), t('You
Get'));
2220
            preg_match_all('/<([a-z0-9]+)[^a-z0-9]/i', $allowed_html,
$out);
2230
            foreach ($out[1] as $tag) {
2240
              if (array_key_exists($tag, $tips)) {
2250
                if ($tips[$tag]) {
2260
                  $rows[] = array(
2270
                    array('data' => $tips[$tag][0], 'class' =>
'description'),
2280
                    array('data' => '<code>' . check_plain($tips[$tag][1])
. '</code>', 'class' => 'type'),
2290
                    array('data' => $tips[$tag][1], 'class' => 'get')
2300
                  );
2310
                }
2320
              }
233
              else {
2340
                $rows[] = array(
2350
                  array('data' => t('No help provided for tag %tag.',
array('%tag' => $tag)), 'class' => 'description', 'colspan' => 3),
236
                );
237
              }
2380
            }
2390
            $output .= theme('table', $header, $rows);
240
2410
            $output .= '<p>' . t('Most unusual characters can be directly
entered without any problems.') . '</p>';
2420
            $output .= '<p>' . t('If you do encounter problems, try using
HTML character entities. A common example looks like &amp;amp; for an
ampersand &amp; character. For a full list of entities see HTML\'s <a
href="@html-entities">entities</a> page. Some of the available characters
include:', array('@html-entities' =>
'http://www.w3.org/TR/html4/sgml/entities.html')) . '</p>';
243
244
            $entities = array(
2450
              array( t('Ampersand'), '&amp;'),
2460
              array( t('Greater than'), '&gt;'),
2470
              array( t('Less than'), '&lt;'),
2480
              array( t('Quotation mark'), '&quot;'),
2490
            );
2500
            $header = array(t('Character Description'), t('You Type'),
t('You Get'));
2510
            unset($rows);
2520
            foreach ($entities as $entity) {
2530
              $rows[] = array(
2540
                array('data' => $entity[0], 'class' => 'description'),
2550
                array('data' => '<code>' . check_plain($entity[1]) .
'</code>', 'class' => 'type'),
2560
                array('data' => $entity[1], 'class' => 'get')
2570
              );
2580
            }
2590
            $output .= theme('table', $header, $rows);
2600
            return $output;
2610
        }
2620
      }
2630
      break;
264
265203
    case 1:
266
      switch ($long) {
267203
        case 0:
268203
          return t('Lines and paragraphs break automatically.');
2690
        case 1:
2700
          return t('Lines and paragraphs are automatically recognized. The
&lt;br /&gt; line break, &lt;p&gt; paragraph and &lt;/p&gt; close paragraph
tags are inserted automatically. If paragraphs are not recognized simply
add a couple blank lines.');
2710
      }
2720
      break;
273
274203
    case 2:
275203
      return t('Web page addresses and e-mail addresses turn into links
automatically.');
2760
      break;
277
278202
    case 4:
2790
      return t('No HTML tags allowed');
2800
      break;
281
2820
  }
283202
}
284
285
/**
286
 * Retrieve a list of input formats.
287
 */
2882366
function filter_formats($index = NULL) {
289229
  global $user;
290229
  static $formats;
291
292
  // Administrators can always use all input formats.
293229
  $all = user_access('administer filters');
294
295229
  if (!isset($formats)) {
296229
    $formats = array();
297
298229
    $query = db_select('filter_formats', 'f');
299229
    $query->addField('f', 'format', 'format');
300229
    $query->addField('f', 'name', 'name');
301229
    $query->addField('f', 'roles', 'roles');
302229
    $query->addField('f', 'cache', 'cache');
303229
    $query->addField('f', 'weight', 'weight');
304229
    $query->orderBy('weight');
305
306
    // Build query for selecting the format(s) based on the user's roles.
307229
    if (!$all) {
308199
      $or = db_or()->condition('format',
variable_get('filter_default_format', 1));
309199
      foreach ($user->roles as $rid => $role) {
310199
        $or->condition('roles', '%'. (int)$rid .'%', 'LIKE');
311199
      }
312199
      $query->condition($or);
313199
    }
314
315229
    $formats = $query->execute()->fetchAllAssoc('format');
316229
  }
317229
  if (isset($index)) {
31820
    return isset($formats[$index]) ? $formats[$index] : FALSE;
3190
  }
320209
  return $formats;
3210
}
322
323
/**
324
 * Build a list of all filters.
325
 */
3262366
function filter_list_all() {
32710
  $filters = array();
328
32910
  foreach (module_list() as $module) {
33010
    $list = module_invoke($module, 'filter', 'list');
33110
    if (isset($list) && is_array($list)) {
33210
      foreach ($list as $delta => $name) {
33310
        $filters[$module . '/' . $delta] = (object)array('module' =>
$module, 'delta' => $delta, 'name' => $name);
33410
      }
33510
    }
33610
  }
337
33810
  uasort($filters, '_filter_list_cmp');
339
34010
  return $filters;
3410
}
342
343
/**
344
 * Helper function for sorting the filter list by filter name.
345
 */
3462366
function _filter_list_cmp($a, $b) {
34710
  return strcmp($a->name, $b->name);
3480
}
349
350
/**
351
 * Resolve a format id, including the default format.
352
 */
3532366
function filter_resolve_format($format) {
354439
  return $format == FILTER_FORMAT_DEFAULT ?
variable_get('filter_default_format', 1) : $format;
3550
}
356
/**
357
 * Check if text in a certain input format is allowed to be cached.
358
 */
3592366
function filter_format_allowcache($format) {
36098
  static $cache = array();
36198
  $format = filter_resolve_format($format);
36298
  if (!isset($cache[$format])) {
36398
    $cache[$format] = db_result(db_query('SELECT cache FROM
{filter_formats} WHERE format = %d', $format));
36498
  }
36598
  return $cache[$format];
3660
}
367
368
/**
369
 * Retrieve a list of filters for a certain format.
370
 */
3712366
function filter_list_format($format) {
372303
  static $filters = array();
373
374303
  if (!isset($filters[$format])) {
375303
    $filters[$format] = array();
376303
    $result = db_query("SELECT * FROM {filters} WHERE format = %d ORDER BY
weight, module, delta", $format);
377303
    while ($filter = db_fetch_object($result)) {
378301
      $list = module_invoke($filter->module, 'filter', 'list');
379301
      if (isset($list) && is_array($list) && isset($list[$filter->delta]))
{
380301
        $filter->name = $list[$filter->delta];
381301
        $filters[$format][$filter->module . '/' . $filter->delta] =
$filter;
382301
      }
383301
    }
384303
  }
385
386303
  return $filters[$format];
3870
}
388
389
/**
390
 * @name Filtering functions
391
 * @{
392
 * Modules which need to have content filtered can use these functions to
393
 * interact with the filter system.
394
 *
395
 * For more info, see the hook_filter() documentation.
396
 *
397
 * Note: because filters can inject JavaScript or execute PHP code,
security is
398
 * vital here. When a user supplies a $format, you should validate it with
399
 * filter_access($format) before accepting/using it. This is normally done
in
400
 * the validation stage of the node system. You should for example never
make a
401
 * preview of content in a disallowed format.
402
 */
403
404
/**
405
 * Run all the enabled filters on a piece of text.
406
 *
407
 * @param $text
408
 *    The text to be filtered.
409
 * @param $format
410
 *    The format of the text to be filtered. Specify FILTER_FORMAT_DEFAULT
for
411
 *    the default format.
412
 * @param $check
413
 *    Whether to check the $format with filter_access() first. Defaults to
TRUE.
414
 *    Note that this will check the permissions of the current user, so you
415
 *    should specify $check = FALSE when viewing other people's content.
When
416
 *    showing content that is not (yet) stored in the database (eg. upon
preview),
417
 *    set to TRUE so the user's permissions are checked.
418
 */
4192366
function check_markup($text, $format = FILTER_FORMAT_DEFAULT, $check =
TRUE) {
420
  // When $check = TRUE, do an access check on $format.
421268
  if (isset($text) && (!$check || filter_access($format))) {
422268
    $format = filter_resolve_format($format);
423
424
    // Check for a cached version of this piece of text.
425268
    $cache_id = $format . ':' . md5($text);
426268
    if ($cached = cache_get($cache_id, 'cache_filter')) {
427185
      return $cached->data;
4280
    }
429
430
    // See if caching is allowed for this format.
43198
    $cache = filter_format_allowcache($format);
432
433
    // Convert all Windows and Mac newlines to a single newline,
434
    // so filters only need to deal with one possibility.
43598
    $text = str_replace(array("\r\n", "\r"), "\n", $text);
436
437
    // Get a complete list of filters, ordered properly.
43898
    $filters = filter_list_format($format);
439
440
    // Give filters the chance to escape HTML-like data such as code or
formulas.
44198
    foreach ($filters as $filter) {
44298
      $text = module_invoke($filter->module, 'filter', 'prepare',
$filter->delta, $format, $text, $cache_id);
44398
    }
444
445
    // Perform filtering.
44698
    foreach ($filters as $filter) {
44798
      $text = module_invoke($filter->module, 'filter', 'process',
$filter->delta, $format, $text, $cache_id);
44898
    }
449
450
    // Store in cache with a minimum expiration time of 1 day.
45198
    if ($cache) {
45297
      cache_set($cache_id, $text, 'cache_filter', REQUEST_TIME + (60 * 60 *
24));
45397
    }
45498
  }
455
  else {
4560
    $text = t('n/a');
457
  }
458
45998
  return $text;
4600
}
461
462
/**
463
 * Generate a selector for choosing a format in a form.
464
 *
465
 * @ingroup forms
466
 * @see filter_form_validate()
467
 * @param $value
468
 *   The ID of the format that is currently selected.
469
 * @param $weight
470
 *   The weight of the input format.
471
 * @param $parents
472
 *   Required when defining multiple input formats on a single node or
having a different parent than 'format'.
473
 * @return
474
 *   HTML for the form element.
475
 */
4762366
function filter_form($value = FILTER_FORMAT_DEFAULT, $weight = NULL,
$parents = array('format')) {
477198
  $value = filter_resolve_format($value);
478198
  $formats = filter_formats();
479
480198
  $extra = theme('filter_tips_more_info');
481
482198
  if (count($formats) > 1) {
483
    $form = array(
4847
      '#type' => 'fieldset',
4857
      '#title' => t('Input format'),
4867
      '#collapsible' => TRUE,
4877
      '#collapsed' => TRUE,
4887
      '#weight' => $weight,
4897
      '#element_validate' => array('filter_form_validate'),
4907
    );
491
    // Multiple formats available: display radio buttons with tips.
4927
    foreach ($formats as $format) {
493
      // Generate the parents as the autogenerator does, so we will have a
494
      // unique id for each radio button.
4957
      $parents_for_id = array_merge($parents, array($format->format));
4967
      $form[$format->format] = array(
4977
        '#type' => 'radio',
4987
        '#title' => $format->name,
4997
        '#default_value' => $value,
5007
        '#return_value' => $format->format,
5017
        '#parents' => $parents,
5027
        '#description' => theme('filter_tips',
_filter_tips($format->format, FALSE)),
5037
        '#id' => form_clean_id('edit-' . implode('-', $parents_for_id)),
504
      );
5057
    }
5067
  }
507
  else {
508
    // Only one format available: use a hidden form item and only show
tips.
509191
    $format = array_shift($formats);
510191
    $form[$format->format] = array('#type' => 'value', '#value' =>
$format->format, '#parents' => $parents);
511191
    $tips = _filter_tips(variable_get('filter_default_format', 1), FALSE);
512191
    $form['format']['guidelines'] = array(
513191
      '#title' => t('Formatting guidelines'),
514191
      '#markup' => theme('filter_tips', $tips, FALSE, $extra),
515
    );
516
  }
517198
  $form[] = array('#markup' => $extra);
518198
  return $form;
5190
}
520
5212366
function filter_form_validate($form) {
5223
  foreach (element_children($form) as $key) {
5233
    if ($form[$key]['#value'] == $form[$key]['#return_value']) {
5243
      return;
5250
    }
5261
  }
5270
  form_error($form, t('An illegal choice has been detected. Please contact
the site administrator.'));
5280
  watchdog('form', 'Illegal choice %choice in %name element.',
array('%choice' => $form[$key]['#value'], '%name' => empty($form['#title'])
? $form['#parents'][0] : $form['#title']), WATCHDOG_ERROR);
5290
}
530
531
/**
532
 * Returns TRUE if the user is allowed to access this format.
533
 */
5342366
function filter_access($format) {
535265
  $format = filter_resolve_format($format);
536265
  if (user_access('administer filters') || ($format ==
variable_get('filter_default_format', 1))) {
537264
    return TRUE;
5380
  }
539
  else {
5401
    $formats = filter_formats();
5411
    return isset($formats[$format]);
542
  }
5430
}
544
545
/**
546
 * @} End of "Filtering functions".
547
 */
548
549
550
/**
551
 * Helper function for fetching filter tips.
552
 */
5532366
function _filter_tips($format, $long = FALSE) {
554206
  if ($format == -1) {
5550
    $formats = filter_formats();
5560
  }
557
  else {
558206
    $formats = array(db_fetch_object(db_query("SELECT * FROM
{filter_formats} WHERE format = %d", $format)));
559
  }
560
561206
  $tips = array();
562
563206
  foreach ($formats as $format) {
564206
    $filters = filter_list_format($format->format);
565
566206
    $tips[$format->name] = array();
567206
    foreach ($filters as $id => $filter) {
568206
      if ($tip = module_invoke($filter->module, 'filter_tips',
$filter->delta, $format->format, $long)) {
569206
        $tips[$format->name][] = array('tip' => $tip, 'id' => $id);
570206
      }
571206
    }
572206
  }
573
574206
  return $tips;
5750
}
576
577
578
/**
579
 * Format a link to the more extensive filter tips.
580
 *
581
 * @ingroup themeable
582
 */
5832366
function theme_filter_tips_more_info() {
584198
  return '<p>' . l(t('More information about formatting options'),
'filter/tips') . '</p>';
5850
}
586
587
/**
588
 * @name Standard filters
589
 * @{
590
 * Filters implemented by the filter.module.
591
 */
592
593
/**
594
 * Implementation of hook_filter(). Contains a basic set of essential
filters.
595
 * - HTML filter:
596
 *     Validates user-supplied HTML, transforming it as necessary.
597
 * - Line break converter:
598
 *     Converts newlines into paragraph and break tags.
599
 * - URL and e-mail address filter:
600
 *     Converts newlines into paragraph and break tags.
601
 */
6022366
function filter_filter($op, $delta = 0, $format = -1, $text = '') {
603
  switch ($op) {
604302
    case 'list':
605302
      return array(0 => t('Limit allowed HTML tags'), 1 => t('Convert line
breaks'), 2 => t('Convert URLs into links'), 3 => t('Correct broken HTML'),
4 => t('Escape all HTML'));
606
607113
    case 'description':
608
      switch ($delta) {
60910
        case 0:
61010
          return t('Allows you to restrict the HTML tags the user can use.
It will also remove harmful content such as JavaScript events, JavaScript
URLs and CSS styles from those tags that are not removed.');
61110
        case 1:
61210
          return t('Converts line breaks into HTML (i.e. &lt;br&gt; and
&lt;p&gt;) tags.');
61310
        case 2:
61410
          return t('Turns web and e-mail addresses into clickable links.');
61510
        case 3:
61610
          return t('Corrects faulty and chopped off HTML in postings.');
61710
        case 4:
61810
          return t('Escapes all HTML tags, so they will be visible instead
of being effective.');
6190
        default:
6200
          return;
6210
      }
622
623106
    case 'process':
624
      switch ($delta) {
62597
        case 0:
62697
          return _filter_html($text, $format);
62797
        case 1:
62897
          return _filter_autop($text);
62997
        case 2:
63097
          return _filter_url($text, $format);
63197
        case 3:
63297
          return _filter_htmlcorrector($text);
6330
        case 4:
6340
          return trim(check_plain($text));
6350
        default:
6360
          return $text;
6370
      }
638
639106
    case 'settings':
640
      switch ($delta) {
6416
        case 0:
6426
          return _filter_html_settings($format);
6436
        case 2:
6446
          return _filter_url_settings($format);
6456
        default:
6466
          return;
6476
      }
648
649100
    default:
650100
      return $text;
651100
  }
6520
}
653
654
/**
655
 * Settings for the HTML filter.
656
 */
6572366
function _filter_html_settings($format) {
6586
  $form['filter_html'] = array(
6596
    '#type' => 'fieldset',
6606
    '#title' => t('HTML filter'),
6616
    '#collapsible' => TRUE,
662
  );
6636
  $form['filter_html']["allowed_html_$format"] = array(
6646
    '#type' => 'textfield',
6656
    '#title' => t('Allowed HTML tags'),
6666
    '#default_value' => variable_get("allowed_html_$format", '<a> <em>
<strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd>'),
6676
    '#size' => 64,
6686
    '#maxlength' => 1024,
6696
    '#description' => t('Specify a list of tags which should not be
stripped. (Note that JavaScript event attributes are always stripped.)'),
670
  );
6716
  $form['filter_html']["filter_html_help_$format"] = array(
6726
    '#type' => 'checkbox',
6736
    '#title' => t('Display HTML help'),
6746
    '#default_value' => variable_get("filter_html_help_$format", 1),
6756
    '#description' => t('If enabled, Drupal will display some basic HTML
help in the long filter tips.'),
676
  );
6776
  $form['filter_html']["filter_html_nofollow_$format"] = array(
6786
    '#type' => 'checkbox',
6796
    '#title' => t('Spam link deterrent'),
6806
    '#default_value' => variable_get("filter_html_nofollow_$format",
FALSE),
6816
    '#description' => t('If enabled, Drupal will add rel="nofollow" to all
links, as a measure to reduce the effectiveness of spam links. Note: this
will also prevent valid links from being followed by search engines,
therefore it is likely most effective when enabled for anonymous users.'),
682
  );
6836
  return $form;
6840
}
685
686
/**
687
 * HTML filter. Provides filtering of input into accepted HTML.
688
 */
6892366
function _filter_html($text, $format) {
69097
  $allowed_tags = preg_split('/\s+|<|>/',
variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <blockquote>
<code> <ul> <ol> <li> <dl> <dt> <dd>'), -1, PREG_SPLIT_NO_EMPTY);
69197
  $text = filter_xss($text, $allowed_tags);
692
69397
  if (variable_get("filter_html_nofollow_$format", FALSE)) {
6940
    $text = preg_replace('/<a([^>]+)>/i', '<a\\1 rel="nofollow">', $text);
6950
  }
696
69797
  return trim($text);
6980
}
699
700
/**
701
 * Settings for URL filter.
702
 */
7032366
function _filter_url_settings($format) {
7046
  $form['filter_urlfilter'] = array(
7056
    '#type' => 'fieldset',
7066
    '#title' => t('URL filter'),
7076
    '#collapsible' => TRUE,
708
  );
7096
  $form['filter_urlfilter']['filter_url_length_' . $format] = array(
7106
    '#type' => 'textfield',
7116
    '#title' => t('Maximum link text length'),
7126
    '#default_value' => variable_get('filter_url_length_' . $format, 72),
7136
    '#maxlength' => 4,
7146
    '#description' => t('URLs longer than this number of characters will be
truncated to prevent long strings that break formatting. The link itself
will be retained; just the text portion of the link will be truncated.'),
715
  );
7166
  return $form;
7170
}
718
719
/**
720
 * URL filter. Automatically converts text web addresses (URLs, e-mail
addresses,
721
 * ftp links, etc.) into hyperlinks.
722
 */
7232366
function _filter_url($text, $format) {
724
  // Pass length to regexp callback
72597
  _filter_url_trim(NULL, variable_get('filter_url_length_' . $format, 72));
726
72797
  $text = ' ' . $text . ' ';
728
729
  // Match absolute URLs.
73097
  $text = preg_replace_callback("`(<p>|<li>|<br\s*/?>|[
\n\r\t\(])((http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://)([a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+*~#&=/;-]))([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[
\n\r\t\)]))`i", '_filter_url_parse_full_links', $text);
731
732
  // Match e-mail addresses.
73397
  $text = preg_replace("`(<p>|<li>|<br\s*/?>|[
\n\r\t\(])([A-Za-z0-9._-]+@[A-Za-z0-9._+-]+\.[A-Za-z]{2,4})([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[
\n\r\t\)]))`i", '\1<a href="mailto:\2">\2</a>\3', $text);
734
735
  // Match www domains/addresses.
73697
  $text = preg_replace_callback("`(<p>|<li>|[
\n\r\t\(])(www\.[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+~#\&=/;-])([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[
\n\r\t\)]))`i", '_filter_url_parse_partial_links', $text);
73797
  $text = substr($text, 1, -1);
738
73997
  return $text;
7400
}
741
742
/**
743
 * Scan input and make sure that all HTML tags are properly closed and
nested.
744
 */
7452366
function _filter_htmlcorrector($text) {
746
  // Prepare tag lists.
74797
  static $no_nesting, $single_use;
74897
  if (!isset($no_nesting)) {
749
    // Tags which cannot be nested but are typically left unclosed.
75097
    $no_nesting = drupal_map_assoc(array('li', 'p'));
751
752
    // Single use tags in HTML4
75397
    $single_use = drupal_map_assoc(array('base', 'meta', 'link', 'hr',
'br', 'param', 'img', 'area', 'input', 'col', 'frame'));
75497
  }
755
756
  // Properly entify angles.
75797
  $text = preg_replace('!<([^a-zA-Z/])!', '&lt;\1', $text);
758
759
  // Split tags from text.
76097
  $split = preg_split('/<([^>]+?)>/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
761
  // Note: PHP ensures the array consists of alternating delimiters and
literals
762
  // and begins and ends with a literal (inserting $null as required).
763
76497
  $tag = FALSE; // Odd/even counter. Tag or no tag.
76597
  $stack = array();
76697
  $output = '';
76797
  foreach ($split as $value) {
768
    // Process HTML tags.
76997
    if ($tag) {
77095
      list($tagname) = explode(' ', strtolower($value), 2);
771
      // Closing tag
77295
      if ($tagname{0} == '/') {
77395
        $tagname = substr($tagname, 1);
774
        // Discard XHTML closing tags for single use tags.
77595
        if (!isset($single_use[$tagname])) {
776
          // See if we possibly have a matching opening tag on the stack.
77795
          if (in_array($tagname, $stack)) {
778
            // Close other tags lingering first.
779
            do {
78095
              $output .= '</' . $stack[0] . '>';
78195
            } while (array_shift($stack) != $tagname);
78295
          }
783
          // Otherwise, discard it.
78495
        }
78595
      }
786
      // Opening tag
787
      else {
788
        // See if we have an identical 'no nesting' tag already open and
close it if found.
78995
        if (count($stack) && ($stack[0] == $tagname) &&
isset($no_nesting[$stack[0]])) {
7900
          $output .= '</' . array_shift($stack) . '>';
7910
        }
792
        // Push non-single-use tags onto the stack
79395
        if (!isset($single_use[$tagname])) {
79495
          array_unshift($stack, $tagname);
79595
        }
796
        // Add trailing slash to single-use tags as per X(HT)ML.
797
        else {
7980
          $value = rtrim($value, ' /') . ' /';
799
        }
80095
        $output .= '<' . $value . '>';
801
      }
80295
    }
803
    else {
804
      // Passthrough all text.
80597
      $output .= $value;
806
    }
80797
    $tag = !$tag;
80897
  }
809
  // Close remaining tags.
81097
  while (count($stack) > 0) {
8110
    $output .= '</' . array_shift($stack) . '>';
8120
  }
81397
  return $output;
8140
}
815
816
/**
817
 * Make links out of absolute URLs.
818
 */
8192366
function _filter_url_parse_full_links($match) {
8200
  $match[2] = decode_entities($match[2]);
8210
  $caption = check_plain(_filter_url_trim($match[2]));
8220
  $match[2] = check_url($match[2]);
8230
  return $match[1] . '<a href="' . $match[2] . '" title="' . $match[2] .
'">' . $caption . '</a>' . $match[5];
8240
}
825
826
/**
827
 * Make links out of domain names starting with "www."
828
 */
8292366
function _filter_url_parse_partial_links($match) {
8300
  $match[2] = decode_entities($match[2]);
8310
  $caption = check_plain(_filter_url_trim($match[2]));
8320
  $match[2] = check_plain($match[2]);
8330
  return $match[1] . '<a href="http://' . $match[2] . '" title="' .
$match[2] . '">' . $caption . '</a>' . $match[3];
8340
}
835
836
/**
837
 * Shortens long URLs to http://www.example.com/long/url...
838
 */
8392366
function _filter_url_trim($text, $length = NULL) {
84097
  static $_length;
84197
  if ($length !== NULL) {
84297
    $_length = $length;
84397
  }
844
845
  // Use +3 for '...' string length.
84697
  if (strlen($text) > $_length + 3) {
8470
    $text = substr($text, 0, $_length) . '...';
8480
  }
849
85097
  return $text;
8510
}
852
853
/**
854
 * Convert line breaks into <p> and <br> in an intelligent fashion.
855
 * Based on: http://photomatt.net/scripts/autop
856
 */
8572366
function _filter_autop($text) {
858
  // All block level tags
85997
  $block =
'(?:table|thead|tfoot|caption|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|form|blockquote|address|p|h[1-6]|hr)';
860
861
  // Split at <pre>, <script>, <style> and </pre>, </script>, </style>
tags.
862
  // We don't apply any processing to the contents of these tags to avoid
messing
863
  // up code. We look for matched pairs and allow basic nesting. For
example:
864
  // "processed <pre> ignored <script> ignored </script> ignored </pre>
processed"
86597
  $chunks = preg_split('@(</?(?:pre|script|style|object)[^>]*>)@i', $text,
-1, PREG_SPLIT_DELIM_CAPTURE);
866
  // Note: PHP ensures the array consists of alternating delimiters and
literals
867
  // and begins and ends with a literal (inserting NULL as required).
86897
  $ignore = FALSE;
86997
  $ignoretag = '';
87097
  $output = '';
87197
  foreach ($chunks as $i => $chunk) {
87297
    if ($i % 2) {
873
      // Opening or closing tag?
8740
      $open = ($chunk[1] != '/');
8750
      list($tag) = preg_split('/[ >]/', substr($chunk, 2 - $open), 2);
8760
      if (!$ignore) {
8770
        if ($open) {
8780
          $ignore = TRUE;
8790
          $ignoretag = $tag;
8800
        }
8810
      }
882
      // Only allow a matching tag to close it.
8830
      elseif (!$open && $ignoretag == $tag) {
8840
        $ignore = FALSE;
8850
        $ignoretag = '';
8860
      }
8870
    }
88897
    elseif (!$ignore) {
88997
      $chunk = preg_replace('|\n*$|', '', $chunk) . "\n\n"; // just to make
things a little easier, pad the end
89097
      $chunk = preg_replace('|<br />\s*<br />|', "\n\n", $chunk);
89197
      $chunk = preg_replace('!(<' . $block . '[^>]*>)!', "\n$1", $chunk);
// Space things out a little
89297
      $chunk = preg_replace('!(</' . $block . '>)!', "$1\n\n", $chunk); //
Space things out a little
89397
      $chunk = preg_replace("/\n\n+/", "\n\n", $chunk); // take care of
duplicates
89497
      $chunk = preg_replace('/\n?(.+?)(?:\n\s*\n|\z)/s', "<p>$1</p>\n",
$chunk); // make paragraphs, including one at the end
89597
      $chunk = preg_replace('|<p>\s*</p>\n|', '', $chunk); // under certain
strange conditions it could create a P of entirely whitespace
89697
      $chunk = preg_replace("|<p>(<li.+?)</p>|", "$1", $chunk); // problem
with nested lists
89797
      $chunk = preg_replace('|<p><blockquote([^>]*)>|i',
"<blockquote$1><p>", $chunk);
89897
      $chunk = str_replace('</blockquote></p>', '</p></blockquote>',
$chunk);
89997
      $chunk = preg_replace('!<p>\s*(</?' . $block . '[^>]*>)!', "$1",
$chunk);
90097
      $chunk = preg_replace('!(</?' . $block . '[^>]*>)\s*</p>!', "$1",
$chunk);
90197
      $chunk = preg_replace('|(?<!<br />)\s*\n|', "<br />\n", $chunk); //
make line breaks
90297
      $chunk = preg_replace('!(</?' . $block . '[^>]*>)\s*<br />!', "$1",
$chunk);
90397
      $chunk = preg_replace('!<br
/>(\s*</?(?:p|li|div|th|pre|td|ul|ol)>)!', '$1', $chunk);
90497
      $chunk = preg_replace('/&([^#])(?![A-Za-z0-9]{1,8};)/', '&amp;$1',
$chunk);
90597
    }
90697
    $output .= $chunk;
90797
  }
90897
  return $output;
9090
}
910
911
/**
912
 * Very permissive XSS/HTML filter for admin-only use.
913
 *
914
 * Use only for fields where it is impractical to use the
915
 * whole filter system, but where some (mainly inline) mark-up
916
 * is desired (so check_plain() is not acceptable).
917
 *
918
 * Allows all tags that can be used inside an HTML body, save
919
 * for scripts and styles.
920
 */
9212366
function filter_xss_admin($string) {
9221878
  return filter_xss($string, array('a', 'abbr', 'acronym', 'address', 'b',
'bdo', 'big', 'blockquote', 'br', 'caption', 'cite', 'code', 'col',
'colgroup', 'dd', 'del', 'dfn', 'div', 'dl', 'dt', 'em', 'h1', 'h2', 'h3',
'h4', 'h5', 'h6', 'hr', 'i', 'img', 'ins', 'kbd', 'li', 'ol', 'p', 'pre',
'q', 'samp', 'small', 'span', 'strong', 'sub', 'sup', 'table', 'tbody',
'td', 'tfoot', 'th', 'thead', 'tr', 'tt', 'ul', 'var'));
9230
}
924
925
/**
926
 * Filters XSS. Based on kses by Ulf Harnhammar, see
927
 * http://sourceforge.net/projects/kses
928
 *
929
 * For examples of various XSS attacks, see:
930
 * http://ha.ckers.org/xss.html
931
 *
932
 * This code does four things:
933
 * - Removes characters and constructs that can trick browsers
934
 * - Makes sure all HTML entities are well-formed
935
 * - Makes sure all HTML tags and attributes are well-formed
936
 * - Makes sure no HTML tags contain URLs with a disallowed protocol (e.g.
javascript:)
937
 *
938
 * @param $string
939
 *   The string with raw HTML in it. It will be stripped of everything that
can cause
940
 *   an XSS attack.
941
 * @param $allowed_tags
942
 *   An array of allowed tags.
943
 */
9442366
function filter_xss($string, $allowed_tags = array('a', 'em', 'strong',
'cite', 'blockquote', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd')) {
945
  // Only operate on valid UTF-8 strings. This is necessary to prevent
cross
946
  // site scripting issues on Internet Explorer 6.
9471880
  if (!drupal_validate_utf8($string)) {
9480
    return '';
9490
  }
950
  // Store the input format
9511880
  _filter_xss_split($allowed_tags, TRUE);
952
  // Remove NUL characters (ignored by some browsers)
9531880
  $string = str_replace(chr(0), '', $string);
954
  // Remove Netscape 4 JS entities
9551880
  $string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
956
957
  // Defuse all HTML entities
9581880
  $string = str_replace('&', '&amp;', $string);
959
  // Change back only well-formed entities in our whitelist
960
  // Named entities
9611880
  $string = preg_replace('/&amp;([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string);
962
  // Decimal numeric entities
9631880
  $string = preg_replace('/&amp;#([0-9]+;)/', '&#\1', $string);
964
  // Hexadecimal numeric entities
9651880
  $string = preg_replace('/&amp;#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1',
$string);
966
9671880
  return preg_replace_callback('%
968
    (
969
    <(?=[^a-zA-Z!/])  # a lone <
970
    |                 # or
971
    <[^>]*(>|$)       # a string that starts with a <, up until the > or
the end of the string
972
    |                 # or
973
    >                 # just a >
9741880
    )%x', '_filter_xss_split', $string);
9750
}
976
977
/**
978
 * Processes an HTML tag.
979
 *
980
 * @param $m
981
 *   An array with various meaning depending on the value of $store.
982
 *   If $store is TRUE then the array contains the allowed tags.
983
 *   If $store is FALSE then the array has one element, the HTML tag to
process.
984
 * @param $store
985
 *   Whether to store $m.
986
 * @return
987
 *   If the element isn't allowed, an empty string. Otherwise, the cleaned
up
988
 *   version of the HTML element.
989
 */
9902366
function _filter_xss_split($m, $store = FALSE) {
9911880
  static $allowed_html;
992
9931880
  if ($store) {
9941880
    $allowed_html = array_flip($m);
9951880
    return;
9960
  }
997
99881
  $string = $m[1];
999
100081
  if (substr($string, 0, 1) != '<') {
1001
    // We matched a lone ">" character
10023
    return '&gt;';
10030
  }
100481
  elseif (strlen($string) == 1) {
1005
    // We matched a lone "<" character
10062
    return '&lt;';
10070
  }
1008
100979
  if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string,
$matches)) {
1010
    // Seriously malformed
101175
    return '';
10120
  }
1013
101479
  $slash = trim($matches[1]);
101579
  $elem = &$matches[2];
101679
  $attrlist = &$matches[3];
1017
101879
  if (!isset($allowed_html[strtolower($elem)])) {
1019
    // Disallowed HTML element
102076
    return '';
10210
  }
1022
10233
  if ($slash != '') {
10243
    return "</$elem>";
10250
  }
1026
1027
  // Is there a closing XHTML slash at the end of the attributes?
1028
  $attrlist = preg_replace('%(\s?)/\s*$%', '\1', $attrlist, -1, $count);
10293
  $xhtml_slash = $count ? ' /' : '';
10303
  
1031
  // Clean up attributes
1032
  $attr2 = implode(' ', _filter_xss_attributes($attrlist));
10333
  $attr2 = preg_replace('/[<>]/', '', $attr2);
10343
  $attr2 = strlen($attr2) ? ' ' . $attr2 : '';
10353
1036
  return "<$elem$attr2$xhtml_slash>";
10373
}
10380
1039
/**
1040
 * Processes a string of HTML attributes.
1041
 *
1042
 * @return
1043
 *   Cleaned up version of the HTML attributes.
1044
 */
1045
function _filter_xss_attributes($attr) {
10462366
  $attrarr = array();
10473
  $mode = 0;
10483
  $attrname = '';
10493
1050
  while (strlen($attr) != 0) {
10513
    // Was the last operation successful?
1052
    $working = 0;
10530
1054
    switch ($mode) {
1055
      case 0:
10560
        // Attribute name, href for instance
1057
        if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) {
10580
          $attrname = strtolower($match[1]);
10590
          $skip = ($attrname == 'style' || substr($attrname, 0, 2) ==
'on');
10600
          $working = $mode = 1;
10610
          $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
10620
        }
10630
1064
        break;
10650
1066
      case 1:
10670
        // Equals sign or valueless ("selected")
1068
        if (preg_match('/^\s*=\s*/', $attr)) {
10690
          $working = 1; $mode = 2;
10700
          $attr = preg_replace('/^\s*=\s*/', '', $attr);
10710
          break;
10720
        }
10730
1074
        if (preg_match('/^\s+/', $attr)) {
10750
          $working = 1; $mode = 0;
10760
          if (!$skip) {
10770
            $attrarr[] = $attrname;
10780
          }
10790
          $attr = preg_replace('/^\s+/', '', $attr);
10800
        }
10810
1082
        break;
10830
1084
      case 2:
10850
        // Attribute value, a URL after href= for instance
1086
        if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) {
10870
          $thisval = filter_xss_bad_protocol($match[1]);
10880
1089
          if (!$skip) {
10900
            $attrarr[] = "$attrname=\"$thisval\"";
10910
          }
10920
          $working = 1;
10930
          $mode = 0;
10940
          $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
10950
          break;
10960
        }
10970
1098
        if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) {
10990
          $thisval = filter_xss_bad_protocol($match[1]);
11000
1101
          if (!$skip) {
11020
            $attrarr[] = "$attrname='$thisval'";
11030
          }
11040
          $working = 1; $mode = 0;
11050
          $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
11060
          break;
11070
        }
11080
1109
        if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) {
11100
          $thisval = filter_xss_bad_protocol($match[1]);
11110
1112
          if (!$skip) {
11130
            $attrarr[] = "$attrname=\"$thisval\"";
11140
          }
11150
          $working = 1; $mode = 0;
11160
          $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
11170
        }
11180
1119
        break;
11200
    }
11210
1122
    if ($working == 0) {
11230
      // not well formed, remove and try again
1124
      $attr = preg_replace('/
11250
        ^
1126
        (
1127
        "[^"]*("|$)     # - a string that starts with a double quote, up
until the next double quote or the end of the string
1128
        |               # or
1129
        \'[^\']*(\'|$)| # - a string that starts with a quote, up until the
next quote or the end of the string
1130
        |               # or
1131
        \S              # - a non-whitespace character
1132
        )*              # any number of the above three
1133
        \s*             # any number of whitespaces
1134
        /x', '', $attr);
11350
      $mode = 0;
11360
    }
11370
  }
11380
1139
  // the attribute list ends with a valueless attribute like "selected"
1140
  if ($mode == 1) {
11413
    $attrarr[] = $attrname;
11420
  }
11430
  return $attrarr;
11443
}
11450
1146
/**
1147
 * Processes an HTML attribute value and ensures it does not contain an URL
1148
 * with a disallowed protocol (e.g. javascript:)
1149
 *
1150
 * @param $string
1151
 *   The string with the attribute value.
1152
 * @param $decode
1153
 *   Whether to decode entities in the $string. Set to FALSE if the $string
1154
 *   is in plain text, TRUE otherwise. Defaults to TRUE.
1155
 * @return
1156
 *   Cleaned up and HTML-escaped version of $string.
1157
 */
1158
function filter_xss_bad_protocol($string, $decode = TRUE) {
11592366
  static $allowed_protocols;
11601965
  if (!isset($allowed_protocols)) {
11611965
    $allowed_protocols =
array_flip(variable_get('filter_allowed_protocols', array('ftp', 'http',
'https', 'irc', 'mailto', 'news', 'nntp', 'rtsp', 'sftp', 'ssh', 'telnet',
'webcal')));
11621965
  }
11631965
1164
  // Get the plain text representation of the attribute value (i.e. its
meaning).
1165
  if ($decode) {
11661965
    $string = decode_entities($string);
11670
  }
11680
1169
  // Iteratively remove any invalid protocol found.
1170
1171
  do {
1172
    $before = $string;
11731965
    $colonpos = strpos($string, ':');
11741965
    if ($colonpos > 0) {
11751965
      // We found a colon, possibly a protocol. Verify.
1176
      $protocol = substr($string, 0, $colonpos);
11771770
      // If a colon is preceded by a slash, question mark or hash, it
cannot
1178
      // possibly be part of the URL scheme. This must be a relative URL,
1179
      // which inherits the (safe) protocol of the base document.
1180
      if (preg_match('![/?#]!', $protocol)) {
11811770
        break;
11820
      }
11830
      // Per RFC2616, section 3.2.3 (URI Comparison) scheme comparison must
be case-insensitive
1184
      // Check if this is a disallowed protocol.
1185
      if (!isset($allowed_protocols[strtolower($protocol)])) {
11861770
        $string = substr($string, $colonpos + 1);
11870
      }
11880
    }
11891770
  } while ($before != $string);
11901965
  return check_plain($string);
11911965
}
11920
1193
/**
1194
 * @} End of "Standard filters".
1195
 */
1196