Blame view

vendor/ezyang/htmlpurifier/maintenance/generate-entity-file.php 2.16 KB
abf1649b   andryeyev   Чистая установка ...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
  #!/usr/bin/php
  <?php
  
  chdir(dirname(__FILE__));
  require_once 'common.php';
  assertCli();
  
  /**
   * @file
   * Parses *.ent files into an entity lookup table, and then serializes and
   * writes the whole kaboodle to a file. The resulting file is cached so
   * that this script does not need to be run. This script should rarely,
   * if ever, be run, since HTML's entities are fairly immutable.
   */
  
  // here's where the entity files are located, assuming working directory
  // is the same as the location of this PHP file. Needs trailing slash.
  $entity_dir = '../docs/entities/';
  
  // defines the output file for the serialized content.
  $output_file = '../library/HTMLPurifier/EntityLookup/entities.ser';
  
  // courtesy of a PHP manual comment
  function unichr($dec)
  {
      if ($dec < 128) {
          $utf  = chr($dec);
      } elseif ($dec < 2048) {
          $utf  = chr(192 + (($dec - ($dec % 64)) / 64));
          $utf .= chr(128 + ($dec % 64));
      } else {
          $utf  = chr(224 + (($dec - ($dec % 4096)) / 4096));
          $utf .= chr(128 + ((($dec % 4096) - ($dec % 64)) / 64));
          $utf .= chr(128 + ($dec % 64));
      }
      return $utf;
  }
  
  if ( !is_dir($entity_dir) ) exit("Fatal Error: Can't find entity directory.\n");
  if ( file_exists($output_file) ) exit("Fatal Error: output file already exists.\n");
  
  $dh = @opendir($entity_dir);
  if ( !$dh ) exit("Fatal Error: Cannot read entity directory.\n");
  
  $entity_files = array();
  while (($file = readdir($dh)) !== false) {
      if (@$file[0] === '.') continue;
      if (substr(strrchr($file, "."), 1) !== 'ent') continue;
      $entity_files[] = $file;
  }
  closedir($dh);
  
  if ( !$entity_files ) exit("Fatal Error: No entity files to parse.\n");
  
  $entity_table = array();
  $regexp = '/<!ENTITY\s+([A-Za-z0-9]+)\s+"&#(?:38;#)?([0-9]+);">/';
  
  foreach ( $entity_files as $file ) {
      $contents = file_get_contents($entity_dir . $file);
      $matches = array();
      preg_match_all($regexp, $contents, $matches, PREG_SET_ORDER);
      foreach ($matches as $match) {
          $entity_table[$match[1]] = unichr($match[2]);
      }
  }
  
  $output = serialize($entity_table);
  
  $fh = fopen($output_file, 'w');
  fwrite($fh, $output);
  fclose($fh);
  
  echo "Completed successfully.";
  
  // vim: et sw=4 sts=4