Automation init...

This commit is contained in:
최준흠 2024-08-31 22:24:28 +09:00
parent af3ccfe1b4
commit 4ee7090ffb
2 changed files with 20 additions and 20 deletions

View File

@ -5,24 +5,20 @@ namespace App\Controllers\CLI;
use App\Libraries\CrawlerLibrary; use App\Libraries\CrawlerLibrary;
use App\Controllers\BaseController; use App\Controllers\BaseController;
use Symfony\Component\DomCrawler\Crawler;
class Crawl extends BaseController class Crawl extends BaseController
{ {
public function yamap() public function yamap()
{ {
try { try {
$crawler = new CrawlerLibrary("https://www.yamap16.com"); $library = new CrawlerLibrary("https://www.yamap16.com");
echo "Host-> " . $crawler->getHost() . "\n"; echo "Host-> " . $library->getHost() . "\n";
$links = $crawler->getLinks("/Board/List.aspx?id=free&ca=1", "a.list_subject"); $html = $library->getInnerHTML("/Board/List.aspx?id=free&ca=1");
echo var_export($links) . "\n"; $links = $library->getLinks($html, "a.list_subject");
$node = $crawler->getNode($links[8], "div.contents p"); $html = $library->getInnerHTML($links[27], "div.contents p");
echo $node->html() . "\n"; $images = $library->getImages($html);
var_dump($images);
$images = $crawler->getImages($links[8], "img");
echo var_export($images) . "\n";
echo $crawler->getNode($links[8], "img")->attr("alt") . "\n";
// file_put_contents("test.jpg", $url); // file_put_contents("test.jpg", $url);
} catch (\Exception $e) { } catch (\Exception $e) {
echo $e->getMessage(); echo $e->getMessage();

View File

@ -27,29 +27,33 @@ class CrawlerLibrary
return $this->_client; return $this->_client;
} }
private function getCrawler(string $url): Crawler
final public function getContent(string $url): string
{ {
$response = $this->getClient()->request('GET', $this->gethost() . $url); $response = $this->getClient()->request('GET', $this->gethost() . $url);
return new Crawler($response->getBody()->getContents()); return $response->getBody()->getContents();
} }
final public function getNode(string $url, string $tag): Crawler final public function getInnerHTML(string $url, $tag = false)
{ {
return $this->getCrawler($url)->filter($tag); $crawler = new Crawler($this->getContent($url));
return $tag ? $crawler->filter($tag)->html() : $crawler->html();
} }
final public function getLinks(string $url, string $tag = "a"): array final public function getLinks(string $html, string $tag = "a"): array
{ {
return $this->getNode($url, $tag)->each( $crawler = new Crawler($html);
return $crawler->filter($tag)->each(
function (Crawler $node) { function (Crawler $node) {
return $node->attr("href"); return $node->attr("href");
} }
); );
} }
final public function getImages(string $url, $tag = "img"): array final public function getImages(string $html, $tag = "img"): array
{ {
return $this->getNode($url, $tag)->each( $crawler = new Crawler($html);
return $crawler->filter($tag)->each(
function (Crawler $node) { function (Crawler $node) {
return $node->attr("src"); return $node->attr("src");
} }