diff --git a/app/Config/Routes.php b/app/Config/Routes.php index a33a32e..cb3f571 100644 --- a/app/Config/Routes.php +++ b/app/Config/Routes.php @@ -22,7 +22,7 @@ $routes->group('/user', function ($routes) { $routes->group('cli', ['namespace' => 'App\Controllers\CLI'], function ($routes) { $routes->cli('mangboard/level', 'Mangboard::level'); - $routes->cli('crawl/html', 'Crawl::html'); + $routes->cli('crawl/yamap', 'Crawl::yamap'); }); $routes->group('admin', ['namespace' => 'App\Controllers\Admin', 'filter' => 'authFilter:manager'], function ($routes) { diff --git a/app/Controllers/CLI/Crawl.php b/app/Controllers/CLI/Crawl.php index afeca8d..458f5af 100644 --- a/app/Controllers/CLI/Crawl.php +++ b/app/Controllers/CLI/Crawl.php @@ -2,29 +2,28 @@ namespace App\Controllers\CLI; +use App\Libraries\CrawlerLibrary; use App\Controllers\BaseController; -use GuzzleHttp\Client; -use Symfony\Component\DomCrawler\Crawler; + class Crawl extends BaseController { - public function html() + public function yamap() { try { - // $client = new Client(); -> CURL erro 60: SSL certificate problem - $client = new Client(['verify' => false]); - $response = $client->request('GET', 'https://www.yamap16.com/Board/List.aspx?id=free&ca=1'); - $html = $response->getBody()->getContents(); + $crawler = new CrawlerLibrary("https://www.yamap16.com"); + echo "Host-> " . $crawler->getHost() . "\n"; + $links = $crawler->getLinks("/Board/List.aspx?id=free&ca=1", "a.list_subject"); + echo var_export($links) . "\n"; + $node = $crawler->getNode($links[8], "div.contents p"); + echo $node->html() . "\n"; - $crawler = new Crawler($html); - - // 모든 이미지의 src 속성 가져오기 - $crawler->filter('img')->each(function (Crawler $node) { - echo $node->attr('src') . "\n"; - }); - //file_put_contents("test.txt", var_export($crawler, true)); + $images = $crawler->getImages($links[8], "img"); + echo var_export($images) . "\n"; + echo $crawler->getNode($links[8], "img")->attr("alt") . "\n"; + // file_put_contents("test.jpg", $url); } catch (\Exception $e) { echo $e->getMessage(); } diff --git a/app/Libraries/CrawlerLibrary.php b/app/Libraries/CrawlerLibrary.php new file mode 100644 index 0000000..79e0458 --- /dev/null +++ b/app/Libraries/CrawlerLibrary.php @@ -0,0 +1,58 @@ +_host = $host; + } + + final public function getHost(): string + { + return $this->_host; + } + + private function getClient(): Client + { + if (is_null($this->_client)) { + $this->_client = new Client(['verify' => false]); + } + return $this->_client; + } + + private function getCrawler(string $url): Crawler + { + $response = $this->getClient()->request('GET', $this->gethost() . $url); + return new Crawler($response->getBody()->getContents()); + } + + final public function getNode(string $url, string $tag): Crawler + { + return $this->getCrawler($url)->filter($tag); + } + + final public function getLinks(string $url, string $tag = "a"): array + { + return $this->getNode($url, $tag)->each( + function (Crawler $node) { + return $node->attr("href"); + } + ); + } + + final public function getImages(string $url, $tag = "img"): array + { + return $this->getNode($url, $tag)->each( + function (Crawler $node) { + return $node->attr("src"); + } + ); + } +}