diff --git a/app/Controllers/CLI/Crawl.php b/app/Controllers/CLI/Crawl.php index 458f5af..72fcdb3 100644 --- a/app/Controllers/CLI/Crawl.php +++ b/app/Controllers/CLI/Crawl.php @@ -5,24 +5,20 @@ namespace App\Controllers\CLI; use App\Libraries\CrawlerLibrary; use App\Controllers\BaseController; - - +use Symfony\Component\DomCrawler\Crawler; class Crawl extends BaseController { public function yamap() { try { - $crawler = new CrawlerLibrary("https://www.yamap16.com"); - echo "Host-> " . $crawler->getHost() . "\n"; - $links = $crawler->getLinks("/Board/List.aspx?id=free&ca=1", "a.list_subject"); - echo var_export($links) . "\n"; - $node = $crawler->getNode($links[8], "div.contents p"); - echo $node->html() . "\n"; - - $images = $crawler->getImages($links[8], "img"); - echo var_export($images) . "\n"; - echo $crawler->getNode($links[8], "img")->attr("alt") . "\n"; + $library = new CrawlerLibrary("https://www.yamap16.com"); + echo "Host-> " . $library->getHost() . "\n"; + $html = $library->getInnerHTML("/Board/List.aspx?id=free&ca=1"); + $links = $library->getLinks($html, "a.list_subject"); + $html = $library->getInnerHTML($links[27], "div.contents p"); + $images = $library->getImages($html); + var_dump($images); // file_put_contents("test.jpg", $url); } catch (\Exception $e) { echo $e->getMessage(); diff --git a/app/Libraries/CrawlerLibrary.php b/app/Libraries/CrawlerLibrary.php index 79e0458..3e55623 100644 --- a/app/Libraries/CrawlerLibrary.php +++ b/app/Libraries/CrawlerLibrary.php @@ -27,29 +27,33 @@ class CrawlerLibrary return $this->_client; } - private function getCrawler(string $url): Crawler + + final public function getContent(string $url): string { $response = $this->getClient()->request('GET', $this->gethost() . $url); - return new Crawler($response->getBody()->getContents()); + return $response->getBody()->getContents(); } - final public function getNode(string $url, string $tag): Crawler + final public function getInnerHTML(string $url, $tag = false) { - return $this->getCrawler($url)->filter($tag); + $crawler = new Crawler($this->getContent($url)); + return $tag ? $crawler->filter($tag)->html() : $crawler->html(); } - final public function getLinks(string $url, string $tag = "a"): array + final public function getLinks(string $html, string $tag = "a"): array { - return $this->getNode($url, $tag)->each( + $crawler = new Crawler($html); + return $crawler->filter($tag)->each( function (Crawler $node) { return $node->attr("href"); } ); } - final public function getImages(string $url, $tag = "img"): array + final public function getImages(string $html, $tag = "img"): array { - return $this->getNode($url, $tag)->each( + $crawler = new Crawler($html); + return $crawler->filter($tag)->each( function (Crawler $node) { return $node->attr("src"); }