Automation init...
This commit is contained in:
parent
8e7d6793c5
commit
76cb3062a0
@ -23,6 +23,7 @@ $routes->group('/user', function ($routes) {
|
||||
$routes->group('cli', ['namespace' => 'App\Controllers\CLI'], function ($routes) {
|
||||
$routes->cli('mangboard/level', 'Mangboard::level');
|
||||
$routes->cli('crawler/yamap', 'Crawler::yamap');
|
||||
$routes->cli('crawler/yamap/(:any)', 'Crawler::yamap/$1');
|
||||
});
|
||||
|
||||
$routes->group('admin', ['namespace' => 'App\Controllers\Admin', 'filter' => 'authFilter:manager'], function ($routes) {
|
||||
|
||||
@ -3,32 +3,26 @@
|
||||
namespace App\Controllers\CLI;
|
||||
|
||||
use App\Controllers\BaseController;
|
||||
use App\Libraries\MyCrawler\YamapLibrary;
|
||||
use App\Libraries\YamapLibrary;
|
||||
|
||||
class Crawler extends BaseController
|
||||
{
|
||||
public function yamap()
|
||||
public function yamap(...$params)
|
||||
{
|
||||
try {
|
||||
$library = new YamapLibrary("https://www.yamap16.com");
|
||||
$mainPage = $library->getContent("/Board/List.aspx?id=free");
|
||||
$links = $library->getLinks($mainPage);
|
||||
|
||||
//Image형식이나 , Viedeo형식의 Content를 가지고 있으면
|
||||
log_message("debug", "viewLink-> " . $links[0]["href"]);
|
||||
$viewPage = $library->getContent($links[0]["href"]);
|
||||
// $viewPage = $library->getContent("/Board/View.aspx?id=free&ca=&rno=193046&page=1"); //Image
|
||||
// $viewPage = $library->getContent("/Board/View.aspx?id=free&ca=&rno=193055&page=1"); //Video
|
||||
$images = $library->getImages($viewPage);
|
||||
foreach ($images as $image) {
|
||||
log_message("debug", "Image-> " . $image['src']);
|
||||
$library->download($image['src']);
|
||||
}
|
||||
$videos = $library->getVideos($viewPage);
|
||||
foreach ($videos as $video) {
|
||||
log_message("debug", "Video-> " . $video['src']);
|
||||
$library->download($video['src']);
|
||||
}
|
||||
$isDebug = in_array("debug", $params);
|
||||
$library = new YamapLibrary(getenv("crawler.yamap.host"));
|
||||
$library->setDebug($isDebug);
|
||||
//1. MainPage
|
||||
$url = getenv("crawler.yamap.url.main");
|
||||
$links = $library->getLinksByMainPage($url);
|
||||
//2. TargetPage : div.contents 가진 객체를 찾아서 첫번쨰 요소에서만 참조
|
||||
$url = !in_array("debug", $params) ? getenv("crawler.yamap.url.target") : $links[0]["href"];
|
||||
$crawler = $library->getCrawlerByDetailPage($url);
|
||||
//3. Image
|
||||
$library->getImages($crawler);
|
||||
//4. Video
|
||||
$library->getVideos($crawler);
|
||||
log_message("info", "완료되었습니다.");
|
||||
return true;
|
||||
} catch (\Exception $e) {
|
||||
|
||||
@ -1,99 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace App\Libraries;
|
||||
|
||||
use GuzzleHttp\Cookie\CookieJar;
|
||||
use GuzzleHttp\Client;
|
||||
|
||||
abstract class MyBaseLibrary
|
||||
{
|
||||
private $_host = "";
|
||||
private $_client = null;
|
||||
private $_cookieJar = null;
|
||||
protected function __construct(string $host)
|
||||
{
|
||||
$this->_host = $host;
|
||||
}
|
||||
|
||||
final public function getHost(): string
|
||||
{
|
||||
return $this->_host;
|
||||
}
|
||||
|
||||
final protected function getClient(): Client
|
||||
{
|
||||
if ($this->_client === null) {
|
||||
$this->_client = new Client(['verify' => false]);
|
||||
}
|
||||
return $this->_client;
|
||||
}
|
||||
|
||||
final protected function getCookieJar()
|
||||
{
|
||||
if ($this->_cookieJar === null) {
|
||||
$this->_cookieJar = new CookieJar();
|
||||
}
|
||||
return $this->_cookieJar;
|
||||
}
|
||||
|
||||
final public function getContent(string $url): string
|
||||
{
|
||||
return $this->getClient()->get($this->gethost() . $url)->getBody();
|
||||
}
|
||||
|
||||
// 로그인 메서드
|
||||
final public function login($url, $username, $password)
|
||||
{
|
||||
try {
|
||||
$response = $this->getClient()->post($this->gethost() . $url, [
|
||||
'form_params' => [
|
||||
'username' => $username,
|
||||
'password' => $password,
|
||||
],
|
||||
'cookies' => $this->getCookieJar(),
|
||||
]);
|
||||
if ($response->getStatusCode() == 200) {
|
||||
log_message("info", "로그인 성공!");
|
||||
return true;
|
||||
} else {
|
||||
log_message("info", "로그인 실패: " . $response->getStatusCode());
|
||||
return false;
|
||||
}
|
||||
} catch (\Exception $e) {
|
||||
log_message("error", "파일 다운로드 중 오류 발생: " . $e->getMessage());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// 파일 다운로드 메서드
|
||||
final public function download($url, $path = false)
|
||||
{
|
||||
try {
|
||||
$fileNames = explode('/', $url);
|
||||
if (!is_array($fileNames) || !count($fileNames)) {
|
||||
throw new \Exception("Download URL Error:" . $url);
|
||||
}
|
||||
$storagePath = WRITEPATH . "uploads";
|
||||
$storagePath .= !$path ? '' : DIRECTORY_SEPARATOR . $path;
|
||||
if (!is_dir($storagePath)) {
|
||||
if (!mkdir($storagePath)) {
|
||||
throw new \Exception("Make Directory Error:" . $storagePath);
|
||||
}
|
||||
}
|
||||
$response = $this->getClient()->get($this->gethost() . $url, [
|
||||
'cookies' => $this->getCookieJar(),
|
||||
'sink' => $storagePath . DIRECTORY_SEPARATOR . array_pop($fileNames),
|
||||
]);
|
||||
if ($response->getStatusCode() == 200) {
|
||||
log_message("info", "파일이 성공적으로 다운로드되었습니다!");
|
||||
return true;
|
||||
} else {
|
||||
log_message("info", "파일 다운로드 실패: " . $response->getStatusCode());
|
||||
return false;
|
||||
}
|
||||
} catch (\Exception $e) {
|
||||
log_message("error", "파일 다운로드 중 오류 발생: " . $e->getMessage());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2,41 +2,78 @@
|
||||
|
||||
namespace App\Libraries\MyCrawler;
|
||||
|
||||
use App\Libraries\MyBaseLibrary;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
|
||||
abstract class MyCrawlerLibrary extends MyBaseLibrary
|
||||
class MyCrawlerLibrary
|
||||
{
|
||||
protected function __construct(string $host)
|
||||
private $_debug = false;
|
||||
|
||||
public function __construct() {}
|
||||
|
||||
final public function getDebug(): bool
|
||||
{
|
||||
parent::__construct($host);
|
||||
return $this->_debug;
|
||||
}
|
||||
final public function setDebug(bool $debug): void
|
||||
{
|
||||
$this->_debug = $debug;
|
||||
}
|
||||
|
||||
final public function getCrawler($html)
|
||||
final public function createCrawler($html)
|
||||
{
|
||||
return new Crawler($html);
|
||||
}
|
||||
|
||||
final public function getInnerHTML(string $html, $tag = false)
|
||||
{
|
||||
return $tag ? $this->getCrawler($html)->filter($tag)->html() : $this->getCrawler($html)->html();
|
||||
return $tag ? $this->createCrawler($html)->filter($tag)->html() : $this->createCrawler($html)->html();
|
||||
}
|
||||
|
||||
public function getLinks(string $html, array $options = ["tag" => "a", "attr" => "href"]): array
|
||||
public function getLinks(Crawler $crawler, array $options = ["tag" => "a", "attr" => "href"]): array
|
||||
{
|
||||
return $this->getCrawler($html)->filter($options["tag"])->each(
|
||||
$links = $crawler->filter($options["tag"])->each(
|
||||
function (Crawler $node) use (&$options) {
|
||||
return ["anchor" => $node->text(), "href" => $node->attr($options["attr"])];
|
||||
return [
|
||||
"anchor" => $node->text(),
|
||||
"href" => $node->attr($options["attr"])
|
||||
];
|
||||
}
|
||||
);
|
||||
foreach ($links as $link) {
|
||||
log_message("debug", "Link-> " . $link['href']);
|
||||
}
|
||||
return $links;
|
||||
}
|
||||
|
||||
public function getImages(string $html, array $options = ["tag" => "img", "attr" => "src"]): array
|
||||
public function getImages(Crawler $crawler, array $options = ["tag" => "img", "attr" => "src"]): array
|
||||
{
|
||||
return $this->getCrawler($html)->filter($options["tag"])->each(
|
||||
$images = $crawler->filter($options["tag"])->each(
|
||||
function (Crawler $node) use (&$options) {
|
||||
return ["alt" => $node->text(), "src" => $node->attr($options["attr"])];
|
||||
return [
|
||||
"alt" => $node->attr('alt'),
|
||||
"src" => $node->attr($options["attr"])
|
||||
];
|
||||
}
|
||||
);
|
||||
foreach ($images as $image) {
|
||||
log_message("debug", "Image-> " . $image['src']);
|
||||
}
|
||||
return $images;
|
||||
}
|
||||
|
||||
public function getVideos(Crawler $crawler, array $options = ["tag" => "video", "attr" => "src"]): array
|
||||
{
|
||||
$videos = $crawler->filter($options["tag"])->each(
|
||||
function (Crawler $node) use (&$options) {
|
||||
return [
|
||||
"alt" => $node->attr('alt'),
|
||||
"src" => $node->attr($options["attr"])
|
||||
];
|
||||
}
|
||||
);
|
||||
foreach ($videos as $video) {
|
||||
log_message("debug", "Video-> " . $video['src']);
|
||||
}
|
||||
return $videos;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,55 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace App\Libraries\MyCrawler;
|
||||
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
|
||||
class YamapLibrary extends MyCrawlerLibrary
|
||||
{
|
||||
public function __construct(string $host)
|
||||
{
|
||||
parent::__construct($host);
|
||||
}
|
||||
|
||||
public function getLinks($html, array $options = ["skip" => "관리자"]): array
|
||||
{
|
||||
//div.bbs_item를 가진 객체를 찾아서 배열로 넘김
|
||||
$domElements = $this->getCrawler($html)->filter("div.bbs_list div.bbs_item")->first()->siblings();
|
||||
$links = [];
|
||||
foreach ($domElements as $domElement) {
|
||||
$this->getCrawler($domElement)->filter("span.g_nickname")->each(function (Crawler $node) use (&$options, &$links, &$domElement) {
|
||||
if ($node->text() != $options["skip"]) {
|
||||
$links[] = ["anchor" => $node->text(), "href" => $this->getCrawler($domElement)->filter("a.list_subject")->attr("href")];
|
||||
}
|
||||
});
|
||||
}
|
||||
return $links;
|
||||
}
|
||||
|
||||
public function getImages(string $html, array $options = ["tag" => "img", "attr" => "src"]): array
|
||||
{
|
||||
//div.contents 가진 객체를 찾아서 첫번쨰 요소에서만 참조
|
||||
$domElement = $this->getCrawler($html)->filter("div.contents")->first();
|
||||
return $domElement->filter($options["tag"])->each(
|
||||
function (Crawler $node) use (&$options) {
|
||||
return [
|
||||
"alt" => $node->attr('alt'),
|
||||
"src" => $node->attr($options["attr"])
|
||||
];
|
||||
}
|
||||
);
|
||||
}
|
||||
public function getVideos(string $html, array $options = ["tag" => "video", "attr" => "src"]): array
|
||||
{
|
||||
//div.contents 가진 객체를 찾아서 첫번쨰 요소에서만 참조
|
||||
$domElement = $this->getCrawler($html)->filter("div.contents")->first();
|
||||
return $domElement->filter($options["tag"])->each(
|
||||
function (Crawler $node) use (&$options) {
|
||||
return [
|
||||
"alt" => $node->attr('alt'),
|
||||
"src" => $node->attr($options["attr"])
|
||||
];
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
28
app/Libraries/MyStorage/MyStorageLibrary.php
Normal file
28
app/Libraries/MyStorage/MyStorageLibrary.php
Normal file
@ -0,0 +1,28 @@
|
||||
<?php
|
||||
|
||||
namespace App\Libraries\MyStorage;
|
||||
|
||||
class MyStorageLibrary
|
||||
{
|
||||
private $_path = WRITEPATH . "uploads";
|
||||
private $_debug = false;
|
||||
public function __construct() {}
|
||||
|
||||
final public function getPath(): string
|
||||
{
|
||||
return $this->_path;
|
||||
}
|
||||
final public function setPath(string $path): void
|
||||
{
|
||||
$this->_path .= DIRECTORY_SEPARATOR . $path;
|
||||
}
|
||||
|
||||
final public function getDebug(): bool
|
||||
{
|
||||
return $this->_debug;
|
||||
}
|
||||
final public function setDebug(bool $debug): void
|
||||
{
|
||||
$this->_debug = $debug;
|
||||
}
|
||||
}
|
||||
78
app/Libraries/MyWeb/MyWebLibrary.php
Normal file
78
app/Libraries/MyWeb/MyWebLibrary.php
Normal file
@ -0,0 +1,78 @@
|
||||
<?php
|
||||
|
||||
namespace App\Libraries\MyWeb;
|
||||
|
||||
use GuzzleHttp\Cookie\CookieJar;
|
||||
use GuzzleHttp\Client;
|
||||
|
||||
class MyWebLibrary
|
||||
{
|
||||
private $_host = "";
|
||||
private $_client = null;
|
||||
private $_cookieJar = null;
|
||||
private $_debug = false;
|
||||
public function __construct(string $host)
|
||||
{
|
||||
$this->_host = $host;
|
||||
}
|
||||
|
||||
final public function getHost(): string
|
||||
{
|
||||
return $this->_host;
|
||||
}
|
||||
|
||||
final public function getClient(): Client
|
||||
{
|
||||
if ($this->_client === null) {
|
||||
$this->_client = new Client(['verify' => false]);
|
||||
}
|
||||
return $this->_client;
|
||||
}
|
||||
|
||||
final public function getCookieJar()
|
||||
{
|
||||
if ($this->_cookieJar === null) {
|
||||
$this->_cookieJar = new CookieJar();
|
||||
}
|
||||
return $this->_cookieJar;
|
||||
}
|
||||
|
||||
final public function getDebug(): bool
|
||||
{
|
||||
return $this->_debug;
|
||||
}
|
||||
final public function setDebug(bool $debug): void
|
||||
{
|
||||
$this->_debug = $debug;
|
||||
}
|
||||
|
||||
|
||||
final public function getContent(string $url): string
|
||||
{
|
||||
return $this->getClient()->get($this->gethost() . $url)->getBody();
|
||||
}
|
||||
|
||||
// 로그인 메서드
|
||||
final public function login($url, $username, $password)
|
||||
{
|
||||
try {
|
||||
$response = $this->getClient()->post($this->gethost() . $url, [
|
||||
'form_params' => [
|
||||
'username' => $username,
|
||||
'password' => $password,
|
||||
],
|
||||
'cookies' => $this->getCookieJar(),
|
||||
]);
|
||||
if ($response->getStatusCode() == 200) {
|
||||
log_message("info", "로그인 성공!");
|
||||
return true;
|
||||
} else {
|
||||
log_message("info", "로그인 실패: " . $response->getStatusCode());
|
||||
return false;
|
||||
}
|
||||
} catch (\Exception $e) {
|
||||
log_message("error", "로그인 중 오류 발생: " . $e->getMessage());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
149
app/Libraries/YamapLibrary.php
Normal file
149
app/Libraries/YamapLibrary.php
Normal file
@ -0,0 +1,149 @@
|
||||
<?php
|
||||
|
||||
namespace App\Libraries;
|
||||
|
||||
use App\Libraries\MyWeb\MyWebLibrary;
|
||||
use App\Libraries\MyStorage\MyStorageLibrary;
|
||||
use App\Libraries\MyCrawler\MyCrawlerLibrary;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
|
||||
class YamapLibrary
|
||||
{
|
||||
private $_web = null;
|
||||
private $_storage = null;
|
||||
private $_crawler = null;
|
||||
private $_debug = false;
|
||||
|
||||
private $_host = null;
|
||||
public function __construct(string $host)
|
||||
{
|
||||
$this->_host = $host;
|
||||
}
|
||||
|
||||
public function getMyWeb()
|
||||
{
|
||||
if ($this->_web === null) {
|
||||
$this->_web = new MyWebLibrary($this->getHost());
|
||||
$this->_web->setDebug($this->getDebug());
|
||||
}
|
||||
return $this->_web;
|
||||
}
|
||||
public function getMyStorage()
|
||||
{
|
||||
if ($this->_storage === null) {
|
||||
$this->_storage = new MyStorageLibrary();
|
||||
$this->_storage->setDebug($this->getDebug());
|
||||
}
|
||||
return $this->_storage;
|
||||
}
|
||||
public function getMyCrawler()
|
||||
{
|
||||
if ($this->_crawler === null) {
|
||||
$this->_crawler = new MyCrawlerLibrary();
|
||||
$this->_crawler->setDebug($this->getDebug());
|
||||
}
|
||||
return $this->_crawler;
|
||||
}
|
||||
|
||||
final public function getDebug(): bool
|
||||
{
|
||||
return $this->_debug;
|
||||
}
|
||||
final public function setDebug(bool $debug): void
|
||||
{
|
||||
$this->_debug = $debug;
|
||||
}
|
||||
final public function getHost(): string
|
||||
{
|
||||
return $this->_host;
|
||||
}
|
||||
|
||||
public function getLinks(Crawler $crawler): array
|
||||
{
|
||||
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
|
||||
$bbs_items = $crawler->filter("div.bbs_item")->first()->siblings();
|
||||
$links = [];
|
||||
foreach ($bbs_items as $bbs_item) {
|
||||
//bbs_item에서 span.g_nickname 객체를 찾아서 "관리자"가 작성한것이 아닌것을 확인 후
|
||||
$this->getMyCrawler()->createCrawler($bbs_item)->filter("span.g_nickname")->each(function (Crawler $node) use (&$links, &$bbs_item) {
|
||||
if ($node->text() != "관리자") {
|
||||
//다시 bbs_item에서 a.list_subject 객체를 찾아서 Links에 추가한다.
|
||||
foreach ($this->getMyCrawler()->getLinks($this->getMyCrawler()->createCrawler($bbs_item), ["tag" => ".list_subject", "attr" => "href"]) as $link) {
|
||||
array_push($links, $link);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
return $links;
|
||||
}
|
||||
|
||||
public function getImages(Crawler $crawler, array $options = ["tag" => "img", "attr" => "src"]): array
|
||||
{
|
||||
$images = $this->getMyCrawler()->getImages($crawler, $options);
|
||||
foreach ($images as $image) {
|
||||
$this->download($image['src']);
|
||||
}
|
||||
return $images;
|
||||
}
|
||||
|
||||
public function getVideos(Crawler $crawler, array $options = ["tag" => "video", "attr" => "src"]): array
|
||||
{
|
||||
$videos = $this->getMyCrawler()->getVideos($crawler, $options);
|
||||
foreach ($videos as $video) {
|
||||
$this->download($video['src']);
|
||||
}
|
||||
return $videos;
|
||||
}
|
||||
|
||||
// 파일 다운로드 메서드
|
||||
final public function download($url)
|
||||
{
|
||||
try {
|
||||
$fileNames = explode('/', $url);
|
||||
if (!is_array($fileNames) || !count($fileNames)) {
|
||||
throw new \Exception("Download URL Error:" . $url);
|
||||
}
|
||||
if (!is_dir($this->getMyStorage()->getPath())) {
|
||||
if (!mkdir($this->getMyStorage()->getPath())) {
|
||||
throw new \Exception("Make Directory Error:" . $this->getMyStorage()->getPath());
|
||||
}
|
||||
}
|
||||
$fullPath = $this->getMyStorage()->getPath() . DIRECTORY_SEPARATOR . array_pop($fileNames);
|
||||
log_message("debug", "FullPath-> " . $fullPath);
|
||||
if (!$this->getDebug()) {
|
||||
$response = $this->getMyWeb()->getClient()->get($this->getMyWeb()->gethost() . $url, [
|
||||
'cookies' => $this->getMyWeb()->getCookieJar(),
|
||||
'sink' => $fullPath,
|
||||
]);
|
||||
if ($response->getStatusCode() == 200) {
|
||||
log_message("info", "파일이 성공적으로 다운로드되었습니다!");
|
||||
return true;
|
||||
} else {
|
||||
log_message("info", "파일 다운로드 실패: " . $response->getStatusCode());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
} catch (\Exception $e) {
|
||||
log_message("error", "파일 다운로드 중 오류 발생: " . $e->getMessage());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public function getLinksByMainPage(string $url): array
|
||||
{
|
||||
$html = $this->getMyWeb()->getContent($url);;
|
||||
$crawler = $this->getMyCrawler()->createCrawler($html)->filter("div.bbs_list")->first();;
|
||||
$links = $this->getLinks($crawler,);
|
||||
if (!count($links)) {
|
||||
throw new \Exception("Target Links가 없습니다.");
|
||||
}
|
||||
return $links;
|
||||
}
|
||||
public function getCrawlerByDetailPage(string $url): Crawler
|
||||
{
|
||||
log_message("debug", "Target-> " . $url);
|
||||
$html = $this->getMyWeb()->getContent($url);;
|
||||
return $this->getMyCrawler()->createCrawler($html)->filter("div.contents")->first();
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user