Blame view

src/lib/AparserService.php 8.13 KB
ef60cd4d   Administrator   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
  <?php
  
  namespace {
  
      /** PHPExcel root directory */
      if (!defined('PHPEXCEL_ROOT')) {
          define('PHPEXCEL_ROOT', dirname(__FILE__) . '/');
          require(PHPEXCEL_ROOT . 'A_Parser/aparser-api-php-client.php');
      }
  
      class AparserService
      {
          public function microtime_float()
          {
              list($usec, $sec) = explode(" ", microtime());
              return ((float)$usec + (float)$sec);
          }
          
          function getParser($query){
  
              $time_start = $this->microtime_float();
              $aparser = 'http://195.248.225.110:9091/API';
              $queries = $items = $competitors = $result = array();
  
              foreach($query as $one){
                  $queries[] = $one['query'];
                  $items[] = $one['item_id'];
                  $competitors[] = $one['competitor_id'];
              }
  
              $request = json_encode(array(
                  "action" => "bulkRequest",
                  "data" => array(
                        "parser" => "SE::Google",
                        "preset" => "Use AntiGate",
                        "threads" => 200,
                        "rawResults" => 1,
                        "queries" => $queries
                  ),
                  'password' => 'qwerty1'
              ));
  
              $ch = curl_init();
              curl_setopt($ch, CURLOPT_URL, $aparser);
              curl_setopt($ch, CURLOPT_POST, 1);
              curl_setopt($ch, CURLOPT_POSTFIELDS, $request);
              curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
              curl_setopt($ch, CURLOPT_HTTPHEADER, array('Content-Length: ' . strlen($request)));
              curl_setopt($ch, CURLOPT_HTTPHEADER, array('Content-Type: text/plain; charset=UTF-8'));
  
              $response = curl_exec($ch);
  
              if(!$response) {
                  throw new \Exception("А-Парсер не работает");
              }
  
              curl_close($ch);
  
              $response = json_decode($response, true);
  
              for($i = 0; $i<=count($response['data']['results'])-1; $i++){
                  preg_match("/http\:(.[^\s]*)/", $response['data']['logs'][$i][1][5][2], $str);
                  preg_match("/http\:(.[^\s]*)/", $response['data']['logs'][$i][1][4][2], $str2);
                  $link_array = isset($response['data']['results'][$i]['serp'][0]) ? $response['data']['results'][$i]['serp'][0] : '';
                  $result[$i]['google_link'] = isset($str[0]) && !empty($str[0]) ? $str[0] : $str2[0];
                  $result[$i]['links'] = $link_array;
                  $result[$i]['item_id'] = $items[$i];
                  $result[$i]['competitor_id'] = $competitors[$i];
                  $result[$i]['success'] = $response['data']['results'][$i]['info']['stats']['success'];
                  $result[$i]['result'] = $response['data']['results'][$i]['totalcount'];
  
              }
              $time_end = $this->microtime_float();
              $time = $time_end - $time_start;
              print($time);
              return $result;
  
          }
  
          public function parseExistLink($query, $h1_reg, $price_reg, $exist_regexr){
  
              $exist = '';
              $ch = curl_init();
              curl_setopt($ch, CURLOPT_URL, $query['link']);
              curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
              curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
              curl_setopt($ch,  CURLOPT_USERAGENT , "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)");
              $headers = array
              (
                  'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*;q=0.8',
                  'Accept-Language: ru,en-us;q=0.7,en;q=0.3',
                  'Accept-Encoding: deflate',
                  'Accept-Charset: windows-1251,utf-8;q=0.7,*;q=0.7'
              );
  
              curl_setopt($ch, CURLOPT_HTTPHEADER,$headers);
              $page = curl_exec($ch);
  
              sleep(1);
              curl_close($ch);
              preg_match($price_reg, $page, $price);
              preg_match($h1_reg, $page, $h1);
              if(!empty($exist_regexr)) {
  
                  preg_match($exist_regexr, $page, $exist);
              }
  
  
  
  
              if((isset($price) && !empty($price)) && (isset($h1) && !empty($h1))) {
                  $h1 = mb_convert_encoding($h1[1], 'utf-8', 'windows-1251, utf-8');
                  if(!empty($exist)){
                      $exist = mb_convert_encoding($exist[1], 'utf-8', 'windows-1251, utf-8');
                  }
                  $price = preg_replace("/[^0-9]/", '', $price[1]);
                  $result['price'] = $price;
                  $result['h1'] = $h1;
                  $result['exist'] = !empty($exist) ? $exist : '';
                  $result['link'] = $query['link'];
                  $result['google_link'] = $query['google_link'];
                  $result['success'] = '1';
                  return $result;
              } else {
                  $result['price'] = '';
                  $result['h1'] = 'Товар пропал';
                  $result['exist'] =  '';
                  $result['link'] = $query['link'];
                  $result['google_link'] = $query['google_link'];
                  $result['success'] = '1';
                  return $result;
              }
          }
  
          public function parseNewLink($query, $h1_reg, $price_reg, $exist_regexr){
              $exist = '';
  
              $link_arrays = $this->getParser($query);
  
              $i = 0;
              foreach($link_arrays as $link_array){
                  if(!empty($link_array['links'])){
  
                      $ch = curl_init();
                      curl_setopt($ch, CURLOPT_URL, $link_array['links']);
                      curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
                      curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
                      curl_setopt($ch,  CURLOPT_USERAGENT , "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)");
                      $headers = array
                      (
                          'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*;q=0.8',
                          'Accept-Language: ru,en-us;q=0.7,en;q=0.3',
                          'Accept-Encoding: deflate',
                          'Accept-Charset: windows-1251,utf-8;q=0.7,*;q=0.7'
                      );
  
                      curl_setopt($ch, CURLOPT_HTTPHEADER,$headers);
                      $page = curl_exec($ch);
  
  
  
                      curl_close($ch);
                      preg_match($price_reg, $page, $price);
                      preg_match($h1_reg, $page, $h1);
                      if(!empty($exist_regexr)) {
                          preg_match($exist_regexr, $page, $exist);
                      }
  
                      if((isset($price) && !empty($price)) && (isset($h1) && !empty($h1))) {
                          $price = preg_replace("/[^0-9]/", '', $price[1]);
  
                          if(!empty($exist)){
                              $exist = mb_convert_encoding($exist[1], 'utf-8', 'windows-1251, utf-8');
                          }
  
                          $result[$i]['price'] = $price;
                          $result[$i]['h1'] = mb_convert_encoding($h1[1], 'utf-8', 'windows-1251, utf-8');
                          $result[$i]['exist'] = !empty($exist) ? $exist : '';
                          $result[$i]['link'] = $link_array['links'];
                          $result[$i]['google_link'] = $link_array['google_link'];
                          $result[$i]['item_id'] = $link_array['item_id'];
                          $result[$i]['competitor_id'] = $link_array['competitor_id'];
                          $result[$i]['result'] = $link_array['result'];
                          $result[$i]['link_status'] = 'new';
                          $i++;
  
                          continue;
                      }
  
  
                  }
                  $result[$i]['item_id'] = $link_array['item_id'];
                  $result[$i]['competitor_id'] = $link_array['competitor_id'];
                  $result[$i]['google_link'] = $link_array['google_link'];
                  $result[$i]['link'] = '';
                  $result[$i]['exist'] = '';
                  $result[$i]['success'] = $link_array['success'];
                  $result[$i]['result'] = $link_array['result'];
                  $result[$i]['link_status'] = 'new';
                  $i++;
  
  
              }
              return $result;
          }
  
      }
  }