Blame view

sm/net.func.php 4.55 KB
42868d70   andryeyev   Создал GIT
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
  <?php
  // PHP xml sitemap generator
  
  
  // this function uses curl libraries to fetch contemporary a list of pages
  function curlMultiGetPage($urls) {
          global $CONFIG;
          $htmls = array();
          $mc = curl_multi_init();
          for($i = 0; $i < sizeof($urls); $i++) {
                  $ch[$i] = curl_init($urls[$i]);
                  curl_setopt($ch[$i], CURLOPT_RETURNTRANSFER, 1);
                  curl_setopt($ch[$i], CURLOPT_USERAGENT, $CONFIG["agent"]);
                  curl_setopt($ch[$i], CURLOPT_HEADER, 0);
                  curl_setopt($ch[$i], CURLOPT_CONNECTTIMEOUT, 10);
                  curl_setopt($ch[$i], CURLOPT_FOLLOWLOCATION, 1);
                  curl_multi_add_handle($mc, $ch[$i]);
          }
          do {
                  $n = curl_multi_exec($mc, $active);
          } while($active);
          for($i = 0; $i < sizeof($urls); $i++) {
                  $data = "";
                  $data = curl_multi_getcontent($ch[$i]);
                  if($data == "" || curl_errno($ch[$i])) {
                          $htmls[$i] = "";
                  } else
                          $htmls[$i] = $data;
                  curl_close($ch[$i]);
          }
          return $htmls;
  }
  
  //returns domain without http:// and without ending slash
  function formatDomain($domain) {
          $domain = str_replace(" ","",$domain);
          $domain = str_replace("http://","",$domain);
          $domain = str_replace("http:\\","",$domain);
          if (strpos($domain,"/") == strlen($domain)-1)
                  $domain  = substr($domain,0,strlen($domain)-1);
          return $domain;
  }
  
  function rebuildQuery($query) {
          $newterms = array();
          $terms = explode("&", $query);
          while(($tt = array_pop($terms))) {
                  if(!in_array($tt, $newterms)) {
                          array_push($newterms, $tt);
                  }
          }
          return implode("&",$newterms);
  }
  
  // this function corrects an url rebuilding it on the base domain
  function correctURL($url, $domain) {
          if(strncmp($url, "//", 2) == 0) {
                  $url = "http://".substr($url, 2);
          }
          $url = str_replace("'", "", $url);
          $url_info = parse_url($url);
          if($url_info["scheme"] == "http" || $url_info["scheme"] == "mailto" || $url_info["scheme"] == "javascript")
                  return $url;
          if ($url_info["host"] == ""){
                  $cur_link = parse_url($domain);
  //                echo "$cur_link[path]|$domain<br>";
                  $newurl = $cur_link["host"];
                  if(strncmp($url_info["path"], "./", 2) == 0)
                          $url_info["path"] = substr($url_info["path"], 2);
                  if($url_info["path"] != "") {
                          if($url_info["path"][0] == "/")
                                  $newurl .= $url_info["path"];
                          else {
                                  if(($ps = strrpos($cur_link["path"], "/")) > 0)
                                          $curpath = substr($cur_link["path"], 0, $ps);
                                  $newurl .= "/".$curpath."/".$url_info["path"];
                          }
                  }
          //        echo "tempnewurl: $newurl<br>";
          } else {
                  $newurl = $url_info["host"];
                  if($url_info["path"][0] == "/")
                          $newurl .= $url_info["path"];
                  else
                          $newurl .= "/".$url_info["path"];
  
          }
          if($url_info["query"] != "")
                  $newurl .= "?".rebuildQuery($url_info["query"]);
          while(strpos($newurl, "//") !== false)
                  $newurl = str_replace("//", "/", $newurl);
          $newurl = "http://".$newurl;
          $newurl = str_replace("&amp;", "&", $newurl);
          return $newurl;
  }
  
  // $url is an external link ? [both $url and $versus need http://]
  function isLinkExternal($url, $versus){
          $url_info = parse_url($url);
          $dom_info = parse_url($versus);
  
          if($url_info["scheme"] != "http" && $url_info["scheme"] != "")
                  return true;
          if ($url_info["host"] != $dom_info["host"] && $url_info["host"] != ""
                  && $url_info["host"] != "www.".$dom_info["host"]
                  && "www.".$url_info["host"] != $dom_info["host"])
                          return true;
          if($url_info["port"] != $dom_info["port"])
                  return true;
          if($url_info["path"][1] == "~")
                  return true;
          $type = substr(strrchr($url_info['path'],"."),1);
          if($type == "jpg" || $type == "JPG" || $type == "jpeg" || $type == "png" || $type == "gif" || $type == "rar" || $type == "db")return true;
          return false;
  }
  
  ?>