Automation init...

This commit is contained in:
최준흠 2024-09-02 19:08:15 +09:00
parent d3b9be4ab6
commit f3f8c0c6cd
5 changed files with 133 additions and 67 deletions

View File

@ -2,24 +2,32 @@
namespace App\Controllers\CLI;
use App\Libraries\CrawlerLibrary;
use App\Controllers\BaseController;
use App\Libraries\MyCrawler\YamapLibrary;
class Crawler extends BaseController
{
public function yamap()
{
try {
$library = new CrawlerLibrary("https://www.yamap16.com");
echo "Host-> " . $library->getHost() . "\n";
// $html = $library->getInnerHTML("/Board/List.aspx?id=free&ca=1");
// $links = $library->getLinks($html, "a.list_subject");
$url = "/Board/View.aspx?id=free&ca=1&rno=192681&page=1";
$html = $library->getInnerHTML($url, "div.contents p");
$images = $library->getImages($html);
$library = new YamapLibrary("https://www.yamap16.com");
$mainPage = $library->getContent("/Board/List.aspx?id=free");
$links = $library->getLinks($mainPage);
//Image형식이나 , Viedeo형식의 Content를 가지고 있으면
log_message("debug", "viewLink-> " . $links[0]["href"]);
$viewPage = $library->getContent($links[0]["href"]);
// $viewPage = $library->getContent("/Board/View.aspx?id=free&ca=&rno=193046&page=1"); //Image
// $viewPage = $library->getContent("/Board/View.aspx?id=free&ca=&rno=193055&page=1"); //Video
$images = $library->getImages($viewPage);
foreach ($images as $image) {
echo "Image-> " . $image . "\n";
$library->download($image);
log_message("debug", "Image-> " . $image['src']);
$library->download($image['src']);
}
$videos = $library->getVideos($viewPage);
foreach ($videos as $video) {
log_message("debug", "Video-> " . $video['src']);
$library->download($video['src']);
}
log_message("info", "완료되었습니다.");
return true;

View File

@ -1,45 +0,0 @@
<?php
namespace App\Libraries;
use Symfony\Component\DomCrawler\Crawler;
class CrawlerLibrary extends WebBaseLibrary
{
public function __construct(string $host)
{
parent::__construct($host);
}
final public function getContent(string $url): string
{
$response = $this->getClient()->request('GET', $this->gethost() . $url);
return $response->getBody()->getContents();
}
final public function getInnerHTML(string $url, $tag = false)
{
$crawler = new Crawler($this->getContent($url));
return $tag ? $crawler->filter($tag)->html() : $crawler->html();
}
final public function getLinks(string $html, string $tag = "a"): array
{
$crawler = new Crawler($html);
return $crawler->filter($tag)->each(
function (Crawler $node) {
return $node->attr("href");
}
);
}
final public function getImages(string $html, $tag = "img"): array
{
$crawler = new Crawler($html);
return $crawler->filter($tag)->each(
function (Crawler $node) {
return $node->attr("src");
}
);
}
}

View File

@ -5,7 +5,7 @@ namespace App\Libraries;
use GuzzleHttp\Cookie\CookieJar;
use GuzzleHttp\Client;
abstract class WebBaseLibrary
abstract class MyBaseLibrary
{
private $_host = "";
private $_client = null;
@ -36,8 +36,13 @@ abstract class WebBaseLibrary
return $this->_cookieJar;
}
final public function getContent(string $url): string
{
return $this->getClient()->get($this->gethost() . $url)->getBody();
}
// 로그인 메서드
public function login($url, $username, $password)
final public function login($url, $username, $password)
{
try {
$response = $this->getClient()->post($this->gethost() . $url, [
@ -61,22 +66,23 @@ abstract class WebBaseLibrary
}
// 파일 다운로드 메서드
public function download($url, $addPath = false)
final public function download($url, $path = false)
{
try {
$fullPath = WRITEPATH . "uploads";
$fullPath .= !$addPath ? '' : DIRECTORY_SEPARATOR . $addPath;
if (!is_dir($fullPath)) {
mkdir($fullPath);
$fileNames = explode('/', $url);
if (!is_array($fileNames) || !count($fileNames)) {
throw new \Exception("Download URL Error:" . $url);
}
$temps = explode('/', $url);
if (!is_array($temps) || !count($temps)) {
throw new \Exception("URL error:" . var_dump($temps, true));
$storagePath = WRITEPATH . "uploads";
$storagePath .= !$path ? '' : DIRECTORY_SEPARATOR . $path;
if (!is_dir($storagePath)) {
if (!mkdir($storagePath)) {
throw new \Exception("Make Directory Error:" . $storagePath);
}
}
$file = $fullPath . DIRECTORY_SEPARATOR . array_pop($temps);
$response = $this->getClient()->get($this->gethost() . $url, [
'cookies' => $this->getCookieJar(),
'sink' => $file,
'sink' => $storagePath . DIRECTORY_SEPARATOR . array_pop($fileNames),
]);
if ($response->getStatusCode() == 200) {
log_message("info", "파일이 성공적으로 다운로드되었습니다!");

View File

@ -0,0 +1,42 @@
<?php
namespace App\Libraries\MyCrawler;
use App\Libraries\MyBaseLibrary;
use Symfony\Component\DomCrawler\Crawler;
abstract class MyCrawlerLibrary extends MyBaseLibrary
{
protected function __construct(string $host)
{
parent::__construct($host);
}
final public function getCrawler($html)
{
return new Crawler($html);
}
final public function getInnerHTML(string $html, $tag = false)
{
return $tag ? $this->getCrawler($html)->filter($tag)->html() : $this->getCrawler($html)->html();
}
public function getLinks(string $html, array $options = ["tag" => "a", "attr" => "href"]): array
{
return $this->getCrawler($html)->filter($options["tag"])->each(
function (Crawler $node) use (&$options) {
return ["anchor" => $node->text(), "href" => $node->attr($options["attr"])];
}
);
}
public function getImages(string $html, array $options = ["tag" => "img", "attr" => "src"]): array
{
return $this->getCrawler($html)->filter($options["tag"])->each(
function (Crawler $node) use (&$options) {
return ["alt" => $node->text(), "src" => $node->attr($options["attr"])];
}
);
}
}

View File

@ -0,0 +1,55 @@
<?php
namespace App\Libraries\MyCrawler;
use Symfony\Component\DomCrawler\Crawler;
class YamapLibrary extends MyCrawlerLibrary
{
public function __construct(string $host)
{
parent::__construct($host);
}
public function getLinks($html, array $options = ["skip" => "관리자"]): array
{
//div.bbs_item를 가진 객체를 찾아서 배열로 넘김
$domElements = $this->getCrawler($html)->filter("div.bbs_list div.bbs_item")->first()->siblings();
$links = [];
foreach ($domElements as $domElement) {
$this->getCrawler($domElement)->filter("span.g_nickname")->each(function (Crawler $node) use (&$options, &$links, &$domElement) {
if ($node->text() != $options["skip"]) {
$links[] = ["anchor" => $node->text(), "href" => $this->getCrawler($domElement)->filter("a.list_subject")->attr("href")];
}
});
}
return $links;
}
public function getImages(string $html, array $options = ["tag" => "img", "attr" => "src"]): array
{
//div.contents 가진 객체를 찾아서 첫번쨰 요소에서만 참조
$domElement = $this->getCrawler($html)->filter("div.contents")->first();
return $domElement->filter($options["tag"])->each(
function (Crawler $node) use (&$options) {
return [
"alt" => $node->attr('alt'),
"src" => $node->attr($options["attr"])
];
}
);
}
public function getVideos(string $html, array $options = ["tag" => "video", "attr" => "src"]): array
{
//div.contents 가진 객체를 찾아서 첫번쨰 요소에서만 참조
$domElement = $this->getCrawler($html)->filter("div.contents")->first();
return $domElement->filter($options["tag"])->each(
function (Crawler $node) use (&$options) {
return [
"alt" => $node->attr('alt'),
"src" => $node->attr($options["attr"])
];
}
);
}
}