ServiceExcel->getCSV($route_proxy, ';'); foreach($result_proxy as $one_proxy){ $model = new \proxy(); $model->proxy = $one_proxy[1].':'.$one_proxy[2]; $model->status = 1; $model->save(); } die('готово'); } function mainAction() { $time_start = $this->microtime_float(); $page = ''; $final_list = array(); $result = array(); $item_reg = '/class="tabletitle"/'; $item_not_found = '/По вашему запросу ничего не найдено/'; $day_reg = '/(.[^\/]*)\sдн\./'; $link_name_reg = '/
(.*?)<\/a><\/div>/'; $link_name_reg_two = '/
(.*?)<\/div>/'; $link_reg = '/Поиск<\/a><\/td>/'; $items_reg = '/(.*?)<\/td><\/tr>(.*?)<\/tr>/'; $item_name = '/
]*>(.*?)<\/a>/'; $item_desc = '/
(.[^\/]*)<\/div>/'; $item_price = '/(.[^\/]*)грн.<\/td>/'; $route = STORAGE_PATH . 'temp/Price_ia.xls'; $this->exelphp->addFile($route); $result = $this->exelphp->getRows(); foreach ($result as $row) { /*foreach($result as $rows){ if($rows[1] == '03-214'){ $row = $rows; } }*/ $model = \existParser::findFirst("item_id = '$row[1]'"); if($model instanceof \existParser){ if($model->update_status){ continue; } else { $model->update_status = 1; $model->save(); } } else{ $model = new \existParser(); $model->day = 0; $model->item_id = $row[1]; $model->description = '-----'; $model->old_price = preg_replace('/\./',',',$row[7]); $model->real_price = '----'; $model->name = '----'; $model->update_status = 1; $model->save(); } //print_r($row); $proxy = \proxy::getInstance(); $page = $this->getPage('price.aspx?sr=-4&pcode='.$row[1], $proxy); print_r($page ." \n"); $test_exist = preg_match($item_not_found, $page); $test_items = preg_match($item_reg, $page); $test_links = preg_match($link_reg, $page); $items = array(); $real_links = array(); if ($test_items) { preg_match_all($items_reg, $page, $one_item); //print('it\'s page with items, so we just take items'." \n"); //print_r($one_item); $items[] = $one_item; } elseif($test_links) { preg_match_all($link_reg, $page, $links); preg_match_all($link_name_reg, $page, $links_name); preg_match_all($link_name_reg_two, $page, $links_name_two); print_r( $links_name); print_r($links_name_two); foreach($links_name[1] as $k => $link_name){ if(preg_match("/{$this->validName($row[0])}/",$this->validName($link_name))||preg_match("/{$this->validName($link_name)}/",$this->validName($row[0]))){ $real_links[] = $links[1][$k]; break; } } foreach($links_name_two[1] as $k => $link_name_two){ if(preg_match("/{$this->validName($row[0])}/",$this->validName($link_name_two))||preg_match("/{$this->validName($link_name_two)}/",$this->validName($row[0]))){ $real_links[] = $links[1][$k]; break; } } // print('it\'s page with list, we get links'." \n"); // print_r($links[1]); if($real_links){ foreach ($real_links as $link) { $page = $this->getPage($link, $proxy); //print('it\'s linls from list'." \n"); preg_match_all($items_reg, $page, $one_item); //print_r($one_item); $items[] = $one_item; } } else { $model->day = 0; $model->item_id = $row[1]; $model->description = 'нет в листе ссылок на товары'; $model->old_price = preg_replace('/\./',',',$row[7]); $model->real_price = '-----'; $model->name = $row[0]; $model->save(); continue; } } elseif($test_exist){ $model->day = 0; $model->item_id = $row[1]; $model->description ='нет на сайте'; $model->old_price = preg_replace('/\./',',',$row[7]); $model->real_price = '-----'; $model->name = $row[0]; $model->save(); continue; } else { $model->update_status = 0; $model->save(); continue; } if (!empty($items[0][0])) { print("\n items \n"); print($row[0]); print_r($items); $num = count($items); for ($i = 0; $i < $num; $i++) { // die(print_r($items)); $sub_num = count($items[$i][1]); /*for($x=0; $x<$sub_num; $x++){ $final_list[$i][$x] = preg_split('/\|+/', preg_replace('/<(.*?)>/', '|', $items[$i][0][$x])); }*/ preg_match_all($link_name_reg, $items[$i][0][0], $name); if(!$name){ preg_match_all($link_name_reg_two,$items[$i][0][0], $name); } preg_match_all($item_desc, $items[$i][0][0], $desc); preg_match_all($item_price, $items[$i][0][0], $price); preg_match_all($day_reg, $items[$i][0][0], $days); $price_sort_int = array(); if(count($price[1]) > 1){ $price_sort = $price[1]; foreach($price_sort as $ps){ $cena=str_replace(",",'.',$ps); $p_int=preg_replace("/[^x\d|*\.]/","",$cena); $price_sort_int[] = (int)str_replace('.',',',$p_int); } sort( $price_sort_int); echo($row[1]); print_r($price_sort_int); $model->lower_price = str_replace(',','.',$price_sort_int[0]); } foreach($days[1] as $k => $day){ if($day == 1){ $real_price = $price[1][$k]; $day_val = $day; break; } else { $day_val = $days[1][0]; $real_price = $price[1][0]; } } $final_list[] = [$row[1], preg_replace('/\./',',',$row[7]), $name[1][0], $desc[1][0],$real_price, $day_val]; if(preg_match("/{$this->validName($row[0])}/",$this->validName($name[1][0]))|| preg_match("/{$this->validName($name[1][0])}/",$this->validName($row[0]))){ $model->day = $day_val; $model->item_id = $row[1]; $model->description = $desc[1][0]; $model->old_price = preg_replace('/\./',',',$row[7]); $model->real_price = $real_price; $model->name = $name[1][0]; $model->save(); } else{ $model->day = 0; $model->item_id = $row[1]; $model->description ='не подошло регулярное при проверки имени'; $model->old_price = preg_replace('/\./',',',$row[7]); $model->real_price = '-----'; $model->name = $row[0]; $model->save(); continue; } } } else { $final_list[] = [$row[1], preg_replace('/\./',',',$row[7]), '----', '----','----', '----']; $model->update_status = 0; $model->save(); continue; } } $time_end = $this->microtime_float(); $time = $time_end - $time_start; print($time); } public function fileAction(){ $final_list = \existParser::find("update_status = 1")->toArray(); $route = STORAGE_PATH . 'temp/final_excel_list.xls'; $this->exelphp->convert($route,$final_list); die('ok'); } public function validName($name){ print_r($name); $name = strtolower ($name); $name=str_replace(".",'',$name); $name=preg_replace("/\s+/","",$name); $name=preg_replace("/\//","",$name); $name=preg_replace("/-/","",$name); return $name; } public function getPage($row, $proxy){ $item_reg = '/class="tabletitle"/'; $link_reg = '/Поиск<\/a><\/td>/'; $site_reg = '/src="\/\/s\.exist\.ru\/img\/logo-ua\.gif"/'; $block_proxy_reg ='/Ваш IP адрес заблокирован\.<\/span>/'; $block__reg ='/Неопознанная ошибка № -117\.<\/span>/'; $captcha_reg ='/id="imgCaptcha"/'; $item_not_found = '/По вашему запросу ничего не найдено/'; $page = $this->parseExistLink($row, $proxy->getProxy()); $test_page = preg_replace("/[[^\d]/","",$page); if(!empty($test_page) && !empty($page )&& $page && preg_match($site_reg, $page, $check)){ $test_exist = preg_match($item_not_found, $page, $check); $test_items = preg_match($item_reg, $page, $check); $test_links = preg_match($link_reg, $page, $check); $check_proxy = preg_match($site_reg, $page, $check); $check_ip = preg_match($block_proxy_reg, $page, $check); $check_captcha = preg_match($captcha_reg, $page, $check); $check_sto = preg_match($block__reg, $page, $check); if($test_exist ||$test_items||$test_links ){ if($test_exist){ return $page; } elseif($test_items) { return $page; } elseif($test_links){ return $page; } else{ $proxy->deleteProxy(); $this->getPage($row, $proxy); } } else if(!$check_proxy ||$check_ip || $check_captcha ||$check_sto){ $proxy->deleteProxy(); $this->getPage($row, $proxy); }else{ $proxy->deleteProxy(); $this->getPage($row, $proxy); } } else { //print('wrong proxy, test another one'." \n"); $proxy->deleteProxy(); $this->getPage($row, $proxy); } } public function parseExistLink($url, $proxy) { $url = 'http://www.exist.ua/' . $url; //print_r($url."\n".$proxy."\n"); $proxy = explode(':', $proxy); //$proxyauth = 'RUS143854:Z8lhjhYPUP'; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10); curl_setopt($ch, CURLOPT_TIMEOUT, 10); //curl_setopt($ch, CURLOPT_PROXYTYPE, 7); curl_setopt($ch, CURLOPT_PROXY, $proxy[0]); curl_setopt($ch, CURLOPT_PROXYPORT, $proxy[1]); // curl_setopt($ch, CURLOPT_PROXYUSERPWD, $proxyauth); curl_setopt($ch, CURLOPT_HEADER, 1); curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"); $headers = array ( 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*;q=0.8', 'Accept-Language: ru,en-us;q=0.7,en;q=0.3', 'Accept-Encoding: deflate', 'Accept-Charset: windows-1251,utf-8;q=0.7,*;q=0.7' ); curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); $exec = curl_exec($ch); $info = curl_getinfo($ch); curl_close($ch); /* print_r("\n text \n"); print_r(preg_replace('/>\s*<', $exec));*/ return preg_replace('/>\s*<', $exec); } public function startAction(){ $comand = 'php /home/dev/www/seo/www-tasks/index.php test'; exec($comand); sleep(30); $comand = 'php /home/dev/www/seo/www-tasks/index.php test'; exec($comand); sleep(30); $comand = 'php /home/dev/www/seo/www-tasks/index.php test'; exec($comand); sleep(30); $comand = 'php /home/dev/www/seo/www-tasks/index.php test'; exec($comand); sleep(30); $comand = 'php /home/dev/www/seo/www-tasks/index.php test'; exec($comand); sleep(30); $comand = 'php /home/dev/www/seo/www-tasks/index.php test'; exec($comand); sleep(30); $comand = 'php /home/dev/www/seo/www-tasks/index.php test'; exec($comand); sleep(30); $comand = 'php /home/dev/www/seo/www-tasks/index.php test'; exec($comand); sleep(30); $comand = 'php /home/dev/www/seo/www-tasks/index.php test'; exec($comand); sleep(30); $comand = 'php /home/dev/www/seo/www-tasks/index.php test'; exec($comand); sleep(30); $comand = 'php /home/dev/www/seo/www-tasks/index.php test'; exec($comand); sleep(30); $comand = 'php /home/dev/www/seo/www-tasks/index.php test'; exec($comand); sleep(30); $comand = 'php /home/dev/www/seo/www-tasks/index.php test'; exec($comand); } public function downloadAction(){ $final_list = \existParser::find()->toArray(); $route = STORAGE_PATH . 'temp/final_excel_list.xls'; $this->exelphp->convert($route,$final_list); } function dropAction(){ $phql = "UPDATE existParser SET update_status = 0"; $query = $this->modelsManager->executeQuery($phql); } public function getItems(){ // return file_get_html( $_POST['link']); } }