(.[^\/]*)<\/div>/';
$item_price = '/
(.[^\/]*)грн.<\/td>/';
$route = STORAGE_PATH . 'temp/Price_ia.xls';
$this->exelphp->addFile($route);
$result = $this->exelphp->getRows();
foreach ($result as $row) {
/*foreach($result as $rows){
if($rows[1] == '03-214'){
$row = $rows;
}
}*/
$model = \existParser::findFirst("item_id = '$row[1]'");
if($model instanceof \existParser){
if($model->update_status){
continue;
} else {
$model->update_status = 1;
$model->save();
}
} else{
$model = new \existParser();
$model->day = 0;
$model->item_id = $row[1];
$model->description = '-----';
$model->old_price = preg_replace('/\./',',',$row[7]);
$model->real_price = '----';
$model->name = '----';
$model->update_status = 1;
$model->save();
}
//print_r($row);
$proxy = \proxy::getInstance();
$page = $this->getPage('price.aspx?sr=-4&pcode='.$row[1], $proxy);
print_r($page ." \n");
$test_exist = preg_match($item_not_found, $page);
$test_items = preg_match($item_reg, $page);
$test_links = preg_match($link_reg, $page);
$items = array();
$real_links = array();
if ($test_items) {
preg_match_all($items_reg, $page, $one_item);
//print('it\'s page with items, so we just take items'." \n");
//print_r($one_item);
$items[] = $one_item;
} elseif($test_links) {
preg_match_all($link_reg, $page, $links);
preg_match_all($link_name_reg, $page, $links_name);
preg_match_all($link_name_reg_two, $page, $links_name_two);
print_r( $links_name);
print_r($links_name_two);
foreach($links_name[1] as $k => $link_name){
if(preg_match("/{$this->validName($row[0])}/",$this->validName($link_name))||preg_match("/{$this->validName($link_name)}/",$this->validName($row[0]))){
$real_links[] = $links[1][$k];
break;
}
}
foreach($links_name_two[1] as $k => $link_name_two){
if(preg_match("/{$this->validName($row[0])}/",$this->validName($link_name_two))||preg_match("/{$this->validName($link_name_two)}/",$this->validName($row[0]))){
$real_links[] = $links[1][$k];
break;
}
}
// print('it\'s page with list, we get links'." \n");
// print_r($links[1]);
if($real_links){
foreach ($real_links as $link) {
$page = $this->getPage($link, $proxy);
//print('it\'s linls from list'." \n");
preg_match_all($items_reg, $page, $one_item);
//print_r($one_item);
$items[] = $one_item;
}
} else {
$model->day = 0;
$model->item_id = $row[1];
$model->description = 'нет в листе ссылок на товары';
$model->old_price = preg_replace('/\./',',',$row[7]);
$model->real_price = '-----';
$model->name = $row[0];
$model->save();
continue;
}
} elseif($test_exist){
$model->day = 0;
$model->item_id = $row[1];
$model->description ='нет на сайте';
$model->old_price = preg_replace('/\./',',',$row[7]);
$model->real_price = '-----';
$model->name = $row[0];
$model->save();
continue;
} else {
$model->update_status = 0;
$model->save();
continue;
}
if (!empty($items[0][0])) {
print("\n items \n");
print($row[0]);
print_r($items);
$num = count($items);
for ($i = 0; $i < $num; $i++) {
// die(print_r($items));
$sub_num = count($items[$i][1]);
/*for($x=0; $x<$sub_num; $x++){
$final_list[$i][$x] = preg_split('/\|+/', preg_replace('/<(.*?)>/', '|', $items[$i][0][$x]));
}*/
preg_match_all($link_name_reg, $items[$i][0][0], $name);
if(!$name){
preg_match_all($link_name_reg_two,$items[$i][0][0], $name);
}
preg_match_all($item_desc, $items[$i][0][0], $desc);
preg_match_all($item_price, $items[$i][0][0], $price);
preg_match_all($day_reg, $items[$i][0][0], $days);
$price_sort_int = array();
if(count($price[1]) > 1){
$price_sort = $price[1];
foreach($price_sort as $ps){
$cena=str_replace(",",'.',$ps);
$p_int=preg_replace("/[^x\d|*\.]/","",$cena);
$price_sort_int[] = (int)str_replace('.',',',$p_int);
}
sort( $price_sort_int);
echo($row[1]);
print_r($price_sort_int);
$model->lower_price = str_replace(',','.',$price_sort_int[0]);
}
foreach($days[1] as $k => $day){
if($day == 1){
$real_price = $price[1][$k];
$day_val = $day;
break;
} else {
$day_val = $days[1][0];
$real_price = $price[1][0];
}
}
$final_list[] = [$row[1], preg_replace('/\./',',',$row[7]), $name[1][0], $desc[1][0],$real_price, $day_val];
if(preg_match("/{$this->validName($row[0])}/",$this->validName($name[1][0]))|| preg_match("/{$this->validName($name[1][0])}/",$this->validName($row[0]))){
$model->day = $day_val;
$model->item_id = $row[1];
$model->description = $desc[1][0];
$model->old_price = preg_replace('/\./',',',$row[7]);
$model->real_price = $real_price;
$model->name = $name[1][0];
$model->save();
} else{
$model->day = 0;
$model->item_id = $row[1];
$model->description ='не подошло регулярное при проверки имени';
$model->old_price = preg_replace('/\./',',',$row[7]);
$model->real_price = '-----';
$model->name = $row[0];
$model->save();
continue;
}
}
} else {
$final_list[] = [$row[1], preg_replace('/\./',',',$row[7]), '----', '----','----', '----'];
$model->update_status = 0;
$model->save();
continue;
}
}
$time_end = $this->microtime_float();
$time = $time_end - $time_start;
print($time);
}
public function fileAction(){
$final_list = \existParser::find("update_status = 1")->toArray();
$route = STORAGE_PATH . 'temp/final_excel_list.xls';
$this->exelphp->convert($route,$final_list);
die('ok');
}
public function validName($name){
print_r($name);
$name = strtolower ($name);
$name=str_replace(".",'',$name);
$name=preg_replace("/\s+/","",$name);
$name=preg_replace("/\//","",$name);
$name=preg_replace("/-/","",$name);
return $name;
}
public function getPage($row, $proxy){
$item_reg = '/class="tabletitle"/';
$link_reg = '/ | Поиск<\/a><\/td>/';
$site_reg = '/src="\/\/s\.exist\.ru\/img\/logo-ua\.gif"/';
$block_proxy_reg ='/Ваш IP адрес заблокирован\.<\/span>/';
$block__reg ='/Неопознанная ошибка № -117\.<\/span>/';
$captcha_reg ='/id="imgCaptcha"/';
$item_not_found = '/По вашему запросу ничего не найдено/';
$page = $this->parseExistLink($row, $proxy->getProxy());
$test_page = preg_replace("/[[^\d]/","",$page);
if(!empty($test_page) && !empty($page )&& $page && preg_match($site_reg, $page, $check)){
$test_exist = preg_match($item_not_found, $page, $check);
$test_items = preg_match($item_reg, $page, $check);
$test_links = preg_match($link_reg, $page, $check);
$check_proxy = preg_match($site_reg, $page, $check);
$check_ip = preg_match($block_proxy_reg, $page, $check);
$check_captcha = preg_match($captcha_reg, $page, $check);
$check_sto = preg_match($block__reg, $page, $check);
if($test_exist ||$test_items||$test_links ){
if($test_exist){
return $page;
} elseif($test_items) {
return $page;
} elseif($test_links){
return $page;
} else{
$proxy->deleteProxy();
$this->getPage($row, $proxy);
}
} else if(!$check_proxy ||$check_ip || $check_captcha ||$check_sto){
$proxy->deleteProxy();
$this->getPage($row, $proxy);
}else{
$proxy->deleteProxy();
$this->getPage($row, $proxy);
}
} else {
//print('wrong proxy, test another one'." \n");
$proxy->deleteProxy();
$this->getPage($row, $proxy);
}
}
public function parseExistLink($url, $proxy)
{
$url = 'http://www.exist.ua/' . $url;
//print_r($url."\n".$proxy."\n");
$proxy = explode(':', $proxy);
//$proxyauth = 'RUS143854:Z8lhjhYPUP';
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
//curl_setopt($ch, CURLOPT_PROXYTYPE, 7);
curl_setopt($ch, CURLOPT_PROXY, $proxy[0]);
curl_setopt($ch, CURLOPT_PROXYPORT, $proxy[1]);
// curl_setopt($ch, CURLOPT_PROXYUSERPWD, $proxyauth);
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)");
$headers = array
(
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*;q=0.8',
'Accept-Language: ru,en-us;q=0.7,en;q=0.3',
'Accept-Encoding: deflate',
'Accept-Charset: windows-1251,utf-8;q=0.7,*;q=0.7'
);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$exec = curl_exec($ch);
$info = curl_getinfo($ch);
curl_close($ch);
/* print_r("\n text \n");
print_r(preg_replace('/>\s*', '><', $exec));*/
return preg_replace('/>\s*', '><', $exec);
}
public function startAction(){
$comand = 'php /home/dev/www/seo/www-tasks/index.php test';
exec($comand);
sleep(30);
$comand = 'php /home/dev/www/seo/www-tasks/index.php test';
exec($comand);
sleep(30);
$comand = 'php /home/dev/www/seo/www-tasks/index.php test';
exec($comand);
sleep(30);
$comand = 'php /home/dev/www/seo/www-tasks/index.php test';
exec($comand);
sleep(30);
$comand = 'php /home/dev/www/seo/www-tasks/index.php test';
exec($comand);
sleep(30);
$comand = 'php /home/dev/www/seo/www-tasks/index.php test';
exec($comand);
sleep(30);
$comand = 'php /home/dev/www/seo/www-tasks/index.php test';
exec($comand);
sleep(30);
$comand = 'php /home/dev/www/seo/www-tasks/index.php test';
exec($comand);
sleep(30);
$comand = 'php /home/dev/www/seo/www-tasks/index.php test';
exec($comand);
sleep(30);
$comand = 'php /home/dev/www/seo/www-tasks/index.php test';
exec($comand);
sleep(30);
$comand = 'php /home/dev/www/seo/www-tasks/index.php test';
exec($comand);
sleep(30);
$comand = 'php /home/dev/www/seo/www-tasks/index.php test';
exec($comand);
sleep(30);
$comand = 'php /home/dev/www/seo/www-tasks/index.php test';
exec($comand);
}
public function downloadAction(){
$final_list = \existParser::find()->toArray();
$route = STORAGE_PATH . 'temp/final_excel_list.xls';
$this->exelphp->convert($route,$final_list);
}
function dropAction(){
$phql = "UPDATE existParser SET update_status = 0";
$query = $this->modelsManager->executeQuery($phql);
}
public function getItems(){
// return file_get_html( $_POST['link']);
}
} |