Automation init...

This commit is contained in:
최준흠 2024-08-31 02:30:49 +09:00
parent 7228aae9ac
commit af3ccfe1b4
3 changed files with 72 additions and 15 deletions

View File

@ -22,7 +22,7 @@ $routes->group('/user', function ($routes) {
$routes->group('cli', ['namespace' => 'App\Controllers\CLI'], function ($routes) {
$routes->cli('mangboard/level', 'Mangboard::level');
$routes->cli('crawl/html', 'Crawl::html');
$routes->cli('crawl/yamap', 'Crawl::yamap');
});
$routes->group('admin', ['namespace' => 'App\Controllers\Admin', 'filter' => 'authFilter:manager'], function ($routes) {

View File

@ -2,29 +2,28 @@
namespace App\Controllers\CLI;
use App\Libraries\CrawlerLibrary;
use App\Controllers\BaseController;
use GuzzleHttp\Client;
use Symfony\Component\DomCrawler\Crawler;
class Crawl extends BaseController
{
public function html()
public function yamap()
{
try {
// $client = new Client(); -> CURL erro 60: SSL certificate problem
$client = new Client(['verify' => false]);
$response = $client->request('GET', 'https://www.yamap16.com/Board/List.aspx?id=free&ca=1');
$html = $response->getBody()->getContents();
$crawler = new CrawlerLibrary("https://www.yamap16.com");
echo "Host-> " . $crawler->getHost() . "\n";
$links = $crawler->getLinks("/Board/List.aspx?id=free&ca=1", "a.list_subject");
echo var_export($links) . "\n";
$node = $crawler->getNode($links[8], "div.contents p");
echo $node->html() . "\n";
$crawler = new Crawler($html);
// 모든 이미지의 src 속성 가져오기
$crawler->filter('img')->each(function (Crawler $node) {
echo $node->attr('src') . "\n";
});
//file_put_contents("test.txt", var_export($crawler, true));
$images = $crawler->getImages($links[8], "img");
echo var_export($images) . "\n";
echo $crawler->getNode($links[8], "img")->attr("alt") . "\n";
// file_put_contents("test.jpg", $url);
} catch (\Exception $e) {
echo $e->getMessage();
}

View File

@ -0,0 +1,58 @@
<?php
namespace App\Libraries;
use GuzzleHttp\Client;
use Symfony\Component\DomCrawler\Crawler;
class CrawlerLibrary
{
private $_client = null;
private $_host = "";
public function __construct(string $host)
{
$this->_host = $host;
}
final public function getHost(): string
{
return $this->_host;
}
private function getClient(): Client
{
if (is_null($this->_client)) {
$this->_client = new Client(['verify' => false]);
}
return $this->_client;
}
private function getCrawler(string $url): Crawler
{
$response = $this->getClient()->request('GET', $this->gethost() . $url);
return new Crawler($response->getBody()->getContents());
}
final public function getNode(string $url, string $tag): Crawler
{
return $this->getCrawler($url)->filter($tag);
}
final public function getLinks(string $url, string $tag = "a"): array
{
return $this->getNode($url, $tag)->each(
function (Crawler $node) {
return $node->attr("href");
}
);
}
final public function getImages(string $url, $tag = "img"): array
{
return $this->getNode($url, $tag)->each(
function (Crawler $node) {
return $node->attr("src");
}
);
}
}