Automation init...

This commit is contained in:
최준흠 2024-09-03 19:25:54 +09:00
parent 8e7d6793c5
commit 76cb3062a0
8 changed files with 320 additions and 187 deletions

View File

@ -23,6 +23,7 @@ $routes->group('/user', function ($routes) {
$routes->group('cli', ['namespace' => 'App\Controllers\CLI'], function ($routes) {
$routes->cli('mangboard/level', 'Mangboard::level');
$routes->cli('crawler/yamap', 'Crawler::yamap');
$routes->cli('crawler/yamap/(:any)', 'Crawler::yamap/$1');
});
$routes->group('admin', ['namespace' => 'App\Controllers\Admin', 'filter' => 'authFilter:manager'], function ($routes) {

View File

@ -3,32 +3,26 @@
namespace App\Controllers\CLI;
use App\Controllers\BaseController;
use App\Libraries\MyCrawler\YamapLibrary;
use App\Libraries\YamapLibrary;
class Crawler extends BaseController
{
public function yamap()
public function yamap(...$params)
{
try {
$library = new YamapLibrary("https://www.yamap16.com");
$mainPage = $library->getContent("/Board/List.aspx?id=free");
$links = $library->getLinks($mainPage);
//Image형식이나 , Viedeo형식의 Content를 가지고 있으면
log_message("debug", "viewLink-> " . $links[0]["href"]);
$viewPage = $library->getContent($links[0]["href"]);
// $viewPage = $library->getContent("/Board/View.aspx?id=free&ca=&rno=193046&page=1"); //Image
// $viewPage = $library->getContent("/Board/View.aspx?id=free&ca=&rno=193055&page=1"); //Video
$images = $library->getImages($viewPage);
foreach ($images as $image) {
log_message("debug", "Image-> " . $image['src']);
$library->download($image['src']);
}
$videos = $library->getVideos($viewPage);
foreach ($videos as $video) {
log_message("debug", "Video-> " . $video['src']);
$library->download($video['src']);
}
$isDebug = in_array("debug", $params);
$library = new YamapLibrary(getenv("crawler.yamap.host"));
$library->setDebug($isDebug);
//1. MainPage
$url = getenv("crawler.yamap.url.main");
$links = $library->getLinksByMainPage($url);
//2. TargetPage : div.contents 가진 객체를 찾아서 첫번쨰 요소에서만 참조
$url = !in_array("debug", $params) ? getenv("crawler.yamap.url.target") : $links[0]["href"];
$crawler = $library->getCrawlerByDetailPage($url);
//3. Image
$library->getImages($crawler);
//4. Video
$library->getVideos($crawler);
log_message("info", "완료되었습니다.");
return true;
} catch (\Exception $e) {

View File

@ -1,99 +0,0 @@
<?php
namespace App\Libraries;
use GuzzleHttp\Cookie\CookieJar;
use GuzzleHttp\Client;
abstract class MyBaseLibrary
{
private $_host = "";
private $_client = null;
private $_cookieJar = null;
protected function __construct(string $host)
{
$this->_host = $host;
}
final public function getHost(): string
{
return $this->_host;
}
final protected function getClient(): Client
{
if ($this->_client === null) {
$this->_client = new Client(['verify' => false]);
}
return $this->_client;
}
final protected function getCookieJar()
{
if ($this->_cookieJar === null) {
$this->_cookieJar = new CookieJar();
}
return $this->_cookieJar;
}
final public function getContent(string $url): string
{
return $this->getClient()->get($this->gethost() . $url)->getBody();
}
// 로그인 메서드
final public function login($url, $username, $password)
{
try {
$response = $this->getClient()->post($this->gethost() . $url, [
'form_params' => [
'username' => $username,
'password' => $password,
],
'cookies' => $this->getCookieJar(),
]);
if ($response->getStatusCode() == 200) {
log_message("info", "로그인 성공!");
return true;
} else {
log_message("info", "로그인 실패: " . $response->getStatusCode());
return false;
}
} catch (\Exception $e) {
log_message("error", "파일 다운로드 중 오류 발생: " . $e->getMessage());
return false;
}
}
// 파일 다운로드 메서드
final public function download($url, $path = false)
{
try {
$fileNames = explode('/', $url);
if (!is_array($fileNames) || !count($fileNames)) {
throw new \Exception("Download URL Error:" . $url);
}
$storagePath = WRITEPATH . "uploads";
$storagePath .= !$path ? '' : DIRECTORY_SEPARATOR . $path;
if (!is_dir($storagePath)) {
if (!mkdir($storagePath)) {
throw new \Exception("Make Directory Error:" . $storagePath);
}
}
$response = $this->getClient()->get($this->gethost() . $url, [
'cookies' => $this->getCookieJar(),
'sink' => $storagePath . DIRECTORY_SEPARATOR . array_pop($fileNames),
]);
if ($response->getStatusCode() == 200) {
log_message("info", "파일이 성공적으로 다운로드되었습니다!");
return true;
} else {
log_message("info", "파일 다운로드 실패: " . $response->getStatusCode());
return false;
}
} catch (\Exception $e) {
log_message("error", "파일 다운로드 중 오류 발생: " . $e->getMessage());
return false;
}
}
}

View File

@ -2,41 +2,78 @@
namespace App\Libraries\MyCrawler;
use App\Libraries\MyBaseLibrary;
use Symfony\Component\DomCrawler\Crawler;
abstract class MyCrawlerLibrary extends MyBaseLibrary
class MyCrawlerLibrary
{
protected function __construct(string $host)
private $_debug = false;
public function __construct() {}
final public function getDebug(): bool
{
parent::__construct($host);
return $this->_debug;
}
final public function setDebug(bool $debug): void
{
$this->_debug = $debug;
}
final public function getCrawler($html)
final public function createCrawler($html)
{
return new Crawler($html);
}
final public function getInnerHTML(string $html, $tag = false)
{
return $tag ? $this->getCrawler($html)->filter($tag)->html() : $this->getCrawler($html)->html();
return $tag ? $this->createCrawler($html)->filter($tag)->html() : $this->createCrawler($html)->html();
}
public function getLinks(string $html, array $options = ["tag" => "a", "attr" => "href"]): array
public function getLinks(Crawler $crawler, array $options = ["tag" => "a", "attr" => "href"]): array
{
return $this->getCrawler($html)->filter($options["tag"])->each(
$links = $crawler->filter($options["tag"])->each(
function (Crawler $node) use (&$options) {
return ["anchor" => $node->text(), "href" => $node->attr($options["attr"])];
return [
"anchor" => $node->text(),
"href" => $node->attr($options["attr"])
];
}
);
foreach ($links as $link) {
log_message("debug", "Link-> " . $link['href']);
}
return $links;
}
public function getImages(string $html, array $options = ["tag" => "img", "attr" => "src"]): array
public function getImages(Crawler $crawler, array $options = ["tag" => "img", "attr" => "src"]): array
{
return $this->getCrawler($html)->filter($options["tag"])->each(
$images = $crawler->filter($options["tag"])->each(
function (Crawler $node) use (&$options) {
return ["alt" => $node->text(), "src" => $node->attr($options["attr"])];
return [
"alt" => $node->attr('alt'),
"src" => $node->attr($options["attr"])
];
}
);
foreach ($images as $image) {
log_message("debug", "Image-> " . $image['src']);
}
return $images;
}
public function getVideos(Crawler $crawler, array $options = ["tag" => "video", "attr" => "src"]): array
{
$videos = $crawler->filter($options["tag"])->each(
function (Crawler $node) use (&$options) {
return [
"alt" => $node->attr('alt'),
"src" => $node->attr($options["attr"])
];
}
);
foreach ($videos as $video) {
log_message("debug", "Video-> " . $video['src']);
}
return $videos;
}
}

View File

@ -1,55 +0,0 @@
<?php
namespace App\Libraries\MyCrawler;
use Symfony\Component\DomCrawler\Crawler;
class YamapLibrary extends MyCrawlerLibrary
{
public function __construct(string $host)
{
parent::__construct($host);
}
public function getLinks($html, array $options = ["skip" => "관리자"]): array
{
//div.bbs_item를 가진 객체를 찾아서 배열로 넘김
$domElements = $this->getCrawler($html)->filter("div.bbs_list div.bbs_item")->first()->siblings();
$links = [];
foreach ($domElements as $domElement) {
$this->getCrawler($domElement)->filter("span.g_nickname")->each(function (Crawler $node) use (&$options, &$links, &$domElement) {
if ($node->text() != $options["skip"]) {
$links[] = ["anchor" => $node->text(), "href" => $this->getCrawler($domElement)->filter("a.list_subject")->attr("href")];
}
});
}
return $links;
}
public function getImages(string $html, array $options = ["tag" => "img", "attr" => "src"]): array
{
//div.contents 가진 객체를 찾아서 첫번쨰 요소에서만 참조
$domElement = $this->getCrawler($html)->filter("div.contents")->first();
return $domElement->filter($options["tag"])->each(
function (Crawler $node) use (&$options) {
return [
"alt" => $node->attr('alt'),
"src" => $node->attr($options["attr"])
];
}
);
}
public function getVideos(string $html, array $options = ["tag" => "video", "attr" => "src"]): array
{
//div.contents 가진 객체를 찾아서 첫번쨰 요소에서만 참조
$domElement = $this->getCrawler($html)->filter("div.contents")->first();
return $domElement->filter($options["tag"])->each(
function (Crawler $node) use (&$options) {
return [
"alt" => $node->attr('alt'),
"src" => $node->attr($options["attr"])
];
}
);
}
}

View File

@ -0,0 +1,28 @@
<?php
namespace App\Libraries\MyStorage;
class MyStorageLibrary
{
private $_path = WRITEPATH . "uploads";
private $_debug = false;
public function __construct() {}
final public function getPath(): string
{
return $this->_path;
}
final public function setPath(string $path): void
{
$this->_path .= DIRECTORY_SEPARATOR . $path;
}
final public function getDebug(): bool
{
return $this->_debug;
}
final public function setDebug(bool $debug): void
{
$this->_debug = $debug;
}
}

View File

@ -0,0 +1,78 @@
<?php
namespace App\Libraries\MyWeb;
use GuzzleHttp\Cookie\CookieJar;
use GuzzleHttp\Client;
class MyWebLibrary
{
private $_host = "";
private $_client = null;
private $_cookieJar = null;
private $_debug = false;
public function __construct(string $host)
{
$this->_host = $host;
}
final public function getHost(): string
{
return $this->_host;
}
final public function getClient(): Client
{
if ($this->_client === null) {
$this->_client = new Client(['verify' => false]);
}
return $this->_client;
}
final public function getCookieJar()
{
if ($this->_cookieJar === null) {
$this->_cookieJar = new CookieJar();
}
return $this->_cookieJar;
}
final public function getDebug(): bool
{
return $this->_debug;
}
final public function setDebug(bool $debug): void
{
$this->_debug = $debug;
}
final public function getContent(string $url): string
{
return $this->getClient()->get($this->gethost() . $url)->getBody();
}
// 로그인 메서드
final public function login($url, $username, $password)
{
try {
$response = $this->getClient()->post($this->gethost() . $url, [
'form_params' => [
'username' => $username,
'password' => $password,
],
'cookies' => $this->getCookieJar(),
]);
if ($response->getStatusCode() == 200) {
log_message("info", "로그인 성공!");
return true;
} else {
log_message("info", "로그인 실패: " . $response->getStatusCode());
return false;
}
} catch (\Exception $e) {
log_message("error", "로그인 중 오류 발생: " . $e->getMessage());
return false;
}
}
}

View File

@ -0,0 +1,149 @@
<?php
namespace App\Libraries;
use App\Libraries\MyWeb\MyWebLibrary;
use App\Libraries\MyStorage\MyStorageLibrary;
use App\Libraries\MyCrawler\MyCrawlerLibrary;
use Symfony\Component\DomCrawler\Crawler;
class YamapLibrary
{
private $_web = null;
private $_storage = null;
private $_crawler = null;
private $_debug = false;
private $_host = null;
public function __construct(string $host)
{
$this->_host = $host;
}
public function getMyWeb()
{
if ($this->_web === null) {
$this->_web = new MyWebLibrary($this->getHost());
$this->_web->setDebug($this->getDebug());
}
return $this->_web;
}
public function getMyStorage()
{
if ($this->_storage === null) {
$this->_storage = new MyStorageLibrary();
$this->_storage->setDebug($this->getDebug());
}
return $this->_storage;
}
public function getMyCrawler()
{
if ($this->_crawler === null) {
$this->_crawler = new MyCrawlerLibrary();
$this->_crawler->setDebug($this->getDebug());
}
return $this->_crawler;
}
final public function getDebug(): bool
{
return $this->_debug;
}
final public function setDebug(bool $debug): void
{
$this->_debug = $debug;
}
final public function getHost(): string
{
return $this->_host;
}
public function getLinks(Crawler $crawler): array
{
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
$bbs_items = $crawler->filter("div.bbs_item")->first()->siblings();
$links = [];
foreach ($bbs_items as $bbs_item) {
//bbs_item에서 span.g_nickname 객체를 찾아서 "관리자"가 작성한것이 아닌것을 확인 후
$this->getMyCrawler()->createCrawler($bbs_item)->filter("span.g_nickname")->each(function (Crawler $node) use (&$links, &$bbs_item) {
if ($node->text() != "관리자") {
//다시 bbs_item에서 a.list_subject 객체를 찾아서 Links에 추가한다.
foreach ($this->getMyCrawler()->getLinks($this->getMyCrawler()->createCrawler($bbs_item), ["tag" => ".list_subject", "attr" => "href"]) as $link) {
array_push($links, $link);
}
}
});
}
return $links;
}
public function getImages(Crawler $crawler, array $options = ["tag" => "img", "attr" => "src"]): array
{
$images = $this->getMyCrawler()->getImages($crawler, $options);
foreach ($images as $image) {
$this->download($image['src']);
}
return $images;
}
public function getVideos(Crawler $crawler, array $options = ["tag" => "video", "attr" => "src"]): array
{
$videos = $this->getMyCrawler()->getVideos($crawler, $options);
foreach ($videos as $video) {
$this->download($video['src']);
}
return $videos;
}
// 파일 다운로드 메서드
final public function download($url)
{
try {
$fileNames = explode('/', $url);
if (!is_array($fileNames) || !count($fileNames)) {
throw new \Exception("Download URL Error:" . $url);
}
if (!is_dir($this->getMyStorage()->getPath())) {
if (!mkdir($this->getMyStorage()->getPath())) {
throw new \Exception("Make Directory Error:" . $this->getMyStorage()->getPath());
}
}
$fullPath = $this->getMyStorage()->getPath() . DIRECTORY_SEPARATOR . array_pop($fileNames);
log_message("debug", "FullPath-> " . $fullPath);
if (!$this->getDebug()) {
$response = $this->getMyWeb()->getClient()->get($this->getMyWeb()->gethost() . $url, [
'cookies' => $this->getMyWeb()->getCookieJar(),
'sink' => $fullPath,
]);
if ($response->getStatusCode() == 200) {
log_message("info", "파일이 성공적으로 다운로드되었습니다!");
return true;
} else {
log_message("info", "파일 다운로드 실패: " . $response->getStatusCode());
return false;
}
}
return true;
} catch (\Exception $e) {
log_message("error", "파일 다운로드 중 오류 발생: " . $e->getMessage());
return false;
}
}
public function getLinksByMainPage(string $url): array
{
$html = $this->getMyWeb()->getContent($url);;
$crawler = $this->getMyCrawler()->createCrawler($html)->filter("div.bbs_list")->first();;
$links = $this->getLinks($crawler,);
if (!count($links)) {
throw new \Exception("Target Links가 없습니다.");
}
return $links;
}
public function getCrawlerByDetailPage(string $url): Crawler
{
log_message("debug", "Target-> " . $url);
$html = $this->getMyWeb()->getContent($url);;
return $this->getMyCrawler()->createCrawler($html)->filter("div.contents")->first();
}
}