<?php
//require '../core.php';
use Goutte\Client;
use Symfony\Component\DomCrawler\Crawler;
class Bot {
private $sourceURL = 'http://www.kida.com.tr';
private $sourceCategoryURL = 'http://www.kida.com.tr/Istem/Urun/Kategori/Listele.aspx';
private $categoryURL;
private $postURL;
private $page;
public function getCategories()
{
$client = new Client();
$crawler = $client->request('GET', $this->sourceCategoryURL);
$data = $crawler->filter('section.page_content_offset')
->filter('.col-lg-9')->eq(0)
->filter('.table-responsive a.color_dark')->each(function($node){
$data = array();
$data['title'] = trim($node->text());
$data['url'] = $this->sourceURL . $node->attr('href');
return $data;
});
return $data;
}
public function getCategoryPageCount($categoryURL)
{
$this->categoryURL = $categoryURL;
$client = new Client();
$crawler = $client->request('GET', $this->categoryURL);
$pattern = '#form1 > div.wide_layout.relative.w_xs_auto > section > div > section > div.row.clearfix.m_bottom_15.m_xs_bottom_30 > div.col-lg-5.col-md-5.col-sm-4.t_align_r.t_xs_align_l > a:nth-child(5)';
if( $crawler->filter($pattern)->count() )
{
$url = $crawler->filter($pattern)->attr('href');
preg_match('/\?index=([0-9]+)&/is', $url, $matches);
return intval($matches[1]);
}
return 1;
}
public function getPosts($categoryURL, $page)
{
$this->page = $page;
$this->categoryURL = ! is_null($this->page) ? $categoryURL. "&index={$this->page}" : $categoryURL;
$client = new Client();
$crawler = $client->request('GET', $this->categoryURL);
$data = $crawler->filter('.product_item')->each(function($node){
$data = array();
$data['title'] = $node->filterXPath('//figure/figcaption/div[1]/a')->text();
$data['url'] = $this->sourceURL . $node->filterXPath('//figure/figcaption/div[1]/a')->attr('href');
$data['image'] = $this->sourceURL . $node->filterXPath('//figure/a/div[1]/div/img')->attr('src');
return $data;
});
return $data;
}
public function getPost($postURL)
{
$this->postURL = $postURL;
$client = new Client();
$crawler = $client->request('GET', $this->postURL);
$data = array();
$data['title'] = $crawler->filter('#form1 > div.wide_layout.relative.w_xs_auto > section > div.row.clearfix > section.col-lg-9.col-md-9.col-sm-9.m_xs_bottom_30 > div > div.p_top_10.t_xs_align_l > h2 > strong')->text();
$data['image'] = $this->sourceURL . $crawler->filter('#form1 > div.wide_layout.relative.w_xs_auto > section > div.row.clearfix > section.col-lg-9.col-md-9.col-sm-9.m_xs_bottom_30 > div > div.photoframe.type_2.shadow.r_corners.f_left.f_sm_none.d_xs_inline_b.product_single_preview.relative.m_right_30.m_bottom_5.m_sm_bottom_20.m_xs_right_0.w_mxs_full > div > div > img')->attr('src');
$data['description'] = $crawler->filter('#tabDetaylar > section > p')->each(function($node){
return $node->text();
});
$description = null;
foreach($data['description'] as $description )
{
$description .= $description;
}
$data['description'] = trim($description);
$data['url'] = $this->postURL;
$data['language'] = $crawler->filter('#tabDetaylar > section > table > tbody > tr:nth-child(1) > td:nth-child(2)')->text();
$data['page_count'] = $crawler->filter('#tabDetaylar > section > table > tbody > tr:nth-child(2) > td:nth-child(2)')->text();
$data['cover_type'] = $crawler->filter('#tabDetaylar > section > table > tbody > tr:nth-child(3) > td:nth-child(2)')->text();
$data['paper_type'] = $crawler->filter('#tabDetaylar > section > table > tbody > tr:nth-child(4) > td:nth-child(2)')->text();
$data['publication_date'] = $crawler->filter('#tabDetaylar > section > table > tbody > tr:nth-child(5) > td:nth-child(2)')->text();
$data['publication_place'] = $crawler->filter('#tabDetaylar > section > table > tbody > tr:nth-child(6) > td:nth-child(2)')->text();
$data['size'] = $crawler->filter('#tabDetaylar > section > table > tbody > tr:nth-child(7) > td:nth-child(2)')->text();
$data['barcode'] = $crawler->filter('.description_table tr')->eq(3)->filter('td')->eq(1)->text();
$data['pusblishing_house'] = trim($crawler->filter('.description_table tr')->eq(1)->filter('td')->eq(1)->text());
$data['writer'] = $crawler->filter('.description_table tr')->eq(0)->filter('td')->eq(1)->filter('a')
->each(function($node){
return $node->text();
});
$priceAndStock = $this->getPriceAndStock();
$data['price'] = $priceAndStock['price'];
$data['stock'] = $priceAndStock['stock'];
$categories = $crawler->filter('.description_table tr')->eq(2)->filter('td')->eq(1)->filter('a')
->each(function($node){
return $node->text();
});
$categories = array_unique($categories);
$data['categories'] = $categories;
return $data;
}
public function getPriceAndStock()
{
$data = array('KullaniciAd' => '[email protected]', 'Sifre' => '12345678');
$data_string = json_encode($data);
$headers = array(
'Content-Type:application/json; charset=UTF-8',
'Accept:application/json, text/javascript, */*; q=0.01',
'Referer:http://www.kida.com.tr/Default.aspx',
'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36',
'X-Requested-With:XMLHttpRequest'
);
$cookie = dirname(__FILE__) . '/kida-cookie.txt';
$ch = curl_init('http://www.kida.com.tr/WebMethods.aspx/KullaniciKontrolMusteri');
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "POST");
curl_setopt($ch, CURLOPT_POSTFIELDS, $data_string);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie);
$result = curl_exec($ch);
curl_setopt($ch, CURLOPT_URL, $this->postURL);
$content = curl_exec($ch);
$crawler = new Crawler($content);
$data['price'] = trim($crawler->filter('.v_align_b')->first()->text());
$data['stock'] = trim($crawler->filter('.v_align_b')->eq(6)->text());
return $data;
}
}
$bot = new Bot();
//var_dump($kida->getCategories());
//var_dump($kida->getPosts('http://www.kida.com.tr/Istem/Urun/Kategori/Detay.aspx?Id=2234&Kategori=Eğitim'));
//var_dump($kida->getPost('http://www.kida.com.tr/Istem/Urun/Detay.aspx?Id=699650'));
//var_dump($kida->getPost('http://www.kida.com.tr/Istem/Urun/Detay.aspx?Id=699649'));
//var_dump($kida->getCategoryPageCount('http://www.kida.com.tr/Istem/Urun/Kategori/Detay.aspx?Id=2234&Kategori=Eğitim'));
//var_dump($kida->getCategoryPageCount('http://www.kida.com.tr/Istem/Urun/Kategori/Detay.aspx?Id=2003&Kategori=Akademik'));
//var_dump($bot->getPost('http://www.kida.com.tr/Istem/Urun/Detay.aspx?Id=430980'));
#form1 > div.wide_layout.relative.w_xs_auto > section > div.row.clearfix > section.col-lg-9.col-md-9.col-sm-9.m_xs_bottom_30 > div > div.p_top_10.t_xs_align_l > table > tbody > tr:nth-child(4) > td:nth-child(2)
#form1 > div.wide_layout.relative.w_xs_auto > section > div.row.clearfix > section.col-lg-9.col-md-9.col-sm-9.m_xs_bottom_30 > div > div.p_top_10.t_xs_align_l > table > tbody > tr:nth-child(4) > td:nth-child(2)