diff --git a/app/Config/Routes.php b/app/Config/Routes.php
index b7e38a7..bb597a5 100644
--- a/app/Config/Routes.php
+++ b/app/Config/Routes.php
@@ -31,17 +31,13 @@ $routes->group('mangboard', ['namespace' => 'App\Controllers\Mangboard'], functi
$routes->cli('check_level/(:alpha)', 'UserController::check_level/$1');
});
$routes->group('crawler', function ($routes) {
- $routes->cli('yamap', 'CrawlerController::yamap');
- $routes->cli('yamap/(:alpha)', 'CrawlerController::yamap/$1');
- $routes->cli('yamap/(:alpha)/(:any)', 'CrawlerController::yamap/$1/$2');
- $routes->cli('yamoon', 'CrawlerController::yamoon');
- $routes->cli('yamoon/(:alpha)', 'CrawlerController::yamoon/$1');
- $routes->cli('yamoon/(:alpha)/(:any)', 'CrawlerController::yamoon/$1/$2');
- $routes->cli('sir', 'CrawlerController::sir');
- $routes->cli('sir/(:alpha)', 'CrawlerController::sir/$1');
- $routes->cli('sir/(:alpha)/(:any)', 'CrawlerController::sir/$1/$2');
- $routes->cli('inven', 'CrawlerController::inven');
- $routes->cli('inven/(:alpha)', 'CrawlerController::inven/$1');
- $routes->cli('inven/(:alpha)/(:any)', 'CrawlerController::inven/$1/$2');
+ $routes->cli('yamap/(:any)', 'CrawlerController::yamap/$1');
+ $routes->cli('yamap/(:any)/(:any)', 'CrawlerController::yamap/$1/$2');
+ $routes->cli('yamoon/(:any)', 'CrawlerController::yamoon/$1');
+ $routes->cli('yamoon/(:any)/(:any)', 'CrawlerController::yamoon/$1/$2');
+ $routes->cli('sir/(:any)', 'CrawlerController::sir/$1');
+ $routes->cli('sir/(:any)/(:any)', 'CrawlerController::sir/$1/$2');
+ $routes->cli('inven/(:any)', 'CrawlerController::inven/$1');
+ $routes->cli('inven/(:any)/(:any)', 'CrawlerController::inven/$1/$2');
});
});
diff --git a/app/Controllers/Mangboard/CrawlerController.php b/app/Controllers/Mangboard/CrawlerController.php
index 378b077..13b8526 100644
--- a/app/Controllers/Mangboard/CrawlerController.php
+++ b/app/Controllers/Mangboard/CrawlerController.php
@@ -47,16 +47,14 @@ class CrawlerController extends CommonController
log_message("notice", "{$id}로 로그인 성공");
return $user_entity;
}
- public function yamap(string $id = "", string $option = ""): string
+ public function yamap(string $board_name, ...$params): string
{
try {
//1. 사이트 로그인 처리
- $user_entity = $this->login($id);
+ $user_entity = $this->login(in_array('id', $params) ? $params['id'] : "");
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
- $crawler = new YamapCrawler(getenv('yamap.host.url'), getenv("yamap.host.board_name"), $user_entity);
- if ($option) {
- $crawler->setDebug($option === "debug" ? true : false);
- }
+ $crawler = new YamapCrawler(getenv('yamap.host.url'), $board_name, $user_entity);
+ $crawler->setDebug(in_array('debug', $params));
$crawler->execute(intval(getenv("yamap.list.max_limit")));
return "완료되었습니다.";
} catch (\Exception $e) {
@@ -64,33 +62,29 @@ class CrawlerController extends CommonController
return $e->getMessage();
}
}
- public function yamoon(string $id = "", string $option = ""): string
+ public function yamoon(string $board_name, ...$params): string
{
try {
//1. 사이트 로그인 처리
- $user_entity = $this->login($id);
+ $user_entity = $this->login(in_array('id', $params) ? $params['id'] : "");
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
- $crawler = new YamoonCrawler(getenv("yamoon.host.url"), getenv("yamoon.host.board_name"), $user_entity);
- if ($option) {
- $crawler->setDebug($option === "debug" ? true : false);
- }
- $crawler->execute(intval(getenv("yamap.list.max_limit")));
+ $crawler = new YamoonCrawler(getenv("yamoon.host.url"), $board_name, $user_entity);
+ $crawler->setDebug(in_array('debug', $params));
+ $crawler->execute(intval(getenv("yamoon.list.max_limit")));
return "완료되었습니다.";
} catch (\Exception $e) {
log_message("error", $e->getMessage());
return $e->getMessage();
}
}
- public function sir(string $id = "", string $option = ""): string
+ public function sir(string $board_name, ...$params): string
{
try {
//1. 사이트 로그인 처리
- $user_entity = $this->login($id);
+ $user_entity = $this->login(in_array('id', $params) ? $params['id'] : "");
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
- $crawler = new SirCrawler(getenv("sir.host.url"), getenv("sir.host.board_name"), $user_entity);
- if ($option) {
- $crawler->setDebug($option === "debug" ? true : false);
- }
+ $crawler = new SirCrawler(getenv("sir.host.url"), $board_name, $user_entity);
+ $crawler->setDebug(in_array('debug', $params));
$crawler->execute(intval(getenv("sir.list.max_limit")));
return "완료되었습니다.";
} catch (\Exception $e) {
@@ -98,16 +92,14 @@ class CrawlerController extends CommonController
return $e->getMessage();
}
}
- public function inven(string $id = "", string $option = ""): string
+ public function inven(string $board_name, ...$params): string
{
try {
//1. 사이트 로그인 처리
- $user_entity = $this->login($id);
+ $user_entity = $this->login(in_array('id', $params) ? $params['id'] : "");
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
- $crawler = new InvenCrawler(getenv("inven.host.url"), getenv("inven.host.board_name"), $user_entity);
- if ($option) {
- $crawler->setDebug($option === "debug" ? true : false);
- }
+ $crawler = new InvenCrawler(getenv("inven.host.url"), $board_name, $user_entity);
+ $crawler->setDebug(in_array('debug', $params));
$crawler->execute(intval(getenv("inven.list.max_limit")));
return "완료되었습니다.";
} catch (\Exception $e) {
diff --git a/app/Libraries/MyCrawler/Mangboard/InvenCrawler.php b/app/Libraries/MyCrawler/Mangboard/InvenCrawler.php
index 0957f43..da25e67 100644
--- a/app/Libraries/MyCrawler/Mangboard/InvenCrawler.php
+++ b/app/Libraries/MyCrawler/Mangboard/InvenCrawler.php
@@ -11,6 +11,24 @@ class InvenCrawler extends MangboardCrawler
{
parent::__construct($host, $board_name, $user_entity);
}
+ protected function getUrlByMediaType(Crawler $node, string $media_type, string $attr): null|string
+ {
+ switch ($media_type) {
+ case 'video':
+ $url = parent::getUrlByMediaType($node, $media_type, $attr);
+ //그래도 null이면 data-src로 추출해본다.
+ $attributes = $node->extract(['data-src']);
+ if (count($attributes)) {
+ $url = $attributes[0];
+ }
+ break;
+ case 'img':
+ default:
+ $url = parent::getUrlByMediaType($node, $media_type, $attr);
+ break;
+ }
+ return $url;
+ }
//작성내용
//
//
@@ -33,8 +51,8 @@ class InvenCrawler extends MangboardCrawler
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
$tag = getenv("inven.view.content.tag");
$selector = $this->getSelector($response, $tag);
- $media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
- $media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls);
+ $media_urls = $this->getUrlsByMediaType($selector, "img", "src");
+ $media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
if ($this->getDebug()) {
throw new \Exception(sprintf(
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
@@ -84,28 +102,31 @@ class InvenCrawler extends MangboardCrawler
public function execute(int $max_limit): void
{
try {
- $listInfos = [];
if ($this->getDebug()) {
- $this->detail_page(1, ['detail_url' => getenv("inven.view.test.url")]);
- }
- $response = $this->getMySocket()->getContent(getenv("inven.list.url"));
- //div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
- $this->getSelector($response, getenv("inven.list.tag"))->each(
- function (Crawler $node) use (&$listInfos): void {
- $hit = $node->filter(getenv("inven.list.item.hit.tag"))->text();
- $date = date("Y") . "-" . $node->filter(getenv("inven.list.item.date.tag"))->text();
- $nickname = $node->filter(getenv("inven.list.item.nickname.tag"))->text();
- //작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
- $link_node = $node->filter(getenv("inven.list.item.link.tag"));
- $detail_url = $link_node->attr("href");
- $title = $link_node->text();
- $listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
+ $url = getenv("inven.view.test.url.{$this->_board_name}");
+ log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업시작");
+ $this->detail_page(1, ['detail_url' => $url]);
+ log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업종료");
+ } else {
+ $listInfos = [];
+ $response = $this->getMySocket()->getContent(getenv("inven.list.url.{$this->_board_name}"));
+ $this->getSelector($response, getenv("inven.list.tag"))->each(
+ function (Crawler $node) use (&$listInfos): void {
+ $hit = $node->filter(getenv("inven.list.item.hit.tag"))->text();
+ $date = date("Y") . "-" . $node->filter(getenv("inven.list.item.date.tag"))->text();
+ $nickname = $node->filter(getenv("inven.list.item.nickname.tag"))->text();
+ //작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
+ $link_node = $node->filter(getenv("inven.list.item.link.tag"));
+ $detail_url = $link_node->attr("href");
+ $title = $link_node->text();
+ $listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
+ }
+ );
+ if (!count($listInfos)) {
+ throw new \Exception("Target URL이 없습니다.");
}
- );
- if (!count($listInfos)) {
- throw new \Exception("Target URL이 없습니다.");
+ $this->main_process($max_limit, $listInfos);
}
- $this->main_process($max_limit, $listInfos);
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
} catch (\Exception $e) {
log_message("warning", sprintf(
diff --git a/app/Libraries/MyCrawler/Mangboard/MangboardCrawler.php b/app/Libraries/MyCrawler/Mangboard/MangboardCrawler.php
index 5462ae5..134c129 100644
--- a/app/Libraries/MyCrawler/Mangboard/MangboardCrawler.php
+++ b/app/Libraries/MyCrawler/Mangboard/MangboardCrawler.php
@@ -11,9 +11,9 @@ use App\Models\Mangboard\BoardsModel;
abstract class MangboardCrawler extends MyCrawler
{
- protected $_mySocket = null;
- protected $_host = "";
- private $_board_name = "";
+ private $_mySocket = null;
+ private $_host = "";
+ protected $_board_name = "";
private $_user_entity = null;
protected function __construct(string $host, string $board_name, UserEntity $user_entity)
{
@@ -22,7 +22,8 @@ abstract class MangboardCrawler extends MyCrawler
$this->_board_name = $board_name;
$this->_user_entity = $user_entity;
}
- protected function getMySocket()
+ abstract public function execute(int $max_limit): void;
+ final protected function getMySocket()
{
if ($this->_mySocket === null) {
$this->_mySocket = new WebSocket($this->_host);
diff --git a/app/Libraries/MyCrawler/Mangboard/SirCrawler.php b/app/Libraries/MyCrawler/Mangboard/SirCrawler.php
index 3cf03ce..2d9de05 100644
--- a/app/Libraries/MyCrawler/Mangboard/SirCrawler.php
+++ b/app/Libraries/MyCrawler/Mangboard/SirCrawler.php
@@ -13,9 +13,9 @@ class SirCrawler extends MangboardCrawler
{
parent::__construct($host, $board_name, $user_entity);
}
- protected function changeURLByMediaType(string $url): string
+ protected function changeURLByCrawler(string $url): string
{
- return str_replace("/sir.kr/", "", parent::changeURLByMediaType($url));
+ return str_replace("/sir.kr/", "", parent::changeURLByCrawler($url));
}
//작성내용
//
@@ -91,8 +91,8 @@ class SirCrawler extends MangboardCrawler
//작성내용
$tag = getenv("sir.view.content.tag");
$selector = $this->getSelector($response, $tag);
- $media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
- $media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls);
+ $media_urls = $this->getUrlsByMediaType($selector, "img", "src");
+ $media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
if ($this->getDebug()) {
throw new \Exception(sprintf(
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
@@ -142,29 +142,32 @@ class SirCrawler extends MangboardCrawler
public function execute(int $max_limit): void
{
try {
- $listInfos = [];
if ($this->getDebug()) {
- $this->detail_page(1, ['detail_url' => getenv("sir.view.test.url")]);
- }
- $response = $this->getMySocket()->getContent(getenv("sir.list.url"));
- $this->getSelector($response, getenv("sir.list.tag"))->each(
- function (Crawler $node) use (&$listInfos): void {
- $link_node = $node->filter(getenv("sir.list.item.link.tag"));
- // href url의 맨 앞이 /가 두개라서 한개를 빼기위함
- $detail_url = str_replace("/sir.kr/", "", $link_node->attr("href"));
- // $detail_url = $link_node->attr("href");
- $title = $link_node->text();
- $nickname = $node->filter(getenv("sir.list.item.nickname.tag"))->text();
- $hit = $node->filter(getenv("sir.list.item.hit.tag"))->text();
- // $date = $node->filter(getenv("sir.list.item.date.tag"))->text();
- $listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => "", 'hit' => $hit];
+ $url = getenv("sir.view.test.url.{$this->_board_name}");
+ log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업시작");
+ $this->detail_page(1, ['detail_url' => $url]);
+ log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업종료");
+ } else {
+ $listInfos = [];
+ $response = $this->getMySocket()->getContent(getenv("sir.list.url.{$this->_board_name}"));
+ $this->getSelector($response, getenv("sir.list.tag"))->each(
+ function (Crawler $node) use (&$listInfos): void {
+ $link_node = $node->filter(getenv("sir.list.item.link.tag"));
+ // href url의 맨 앞이 /가 두개라서 한개를 빼기위함
+ $detail_url = $this->changeURLByCrawler($link_node->attr("href"));
+ // $detail_url = $link_node->attr("href");
+ $title = $link_node->text();
+ $nickname = $node->filter(getenv("sir.list.item.nickname.tag"))->text();
+ $hit = $node->filter(getenv("sir.list.item.hit.tag"))->text();
+ // $date = $node->filter(getenv("sir.list.item.date.tag"))->text();
+ $listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => "", 'hit' => $hit];
+ }
+ );
+ if (!count($listInfos)) {
+ throw new \Exception("Target URL이 없습니다.");
}
- );
- // throw new \Exception("Target URL이 없습니다." . var_export($listInfos, true));
- if (!count($listInfos)) {
- throw new \Exception("Target URL이 없습니다.");
+ $this->main_process($max_limit, $listInfos);
}
- $this->main_process($max_limit, $listInfos);
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
} catch (\Exception $e) {
log_message("warning", sprintf(
diff --git a/app/Libraries/MyCrawler/Mangboard/YamapCrawler.php b/app/Libraries/MyCrawler/Mangboard/YamapCrawler.php
index 4e59bb0..42ff039 100644
--- a/app/Libraries/MyCrawler/Mangboard/YamapCrawler.php
+++ b/app/Libraries/MyCrawler/Mangboard/YamapCrawler.php
@@ -16,8 +16,8 @@ class YamapCrawler extends MangboardCrawler
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
$tag = getenv("yamap.view.content.tag");
$selector = $this->getSelector($response, $tag);
- $media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
- $media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls);
+ $media_urls = $this->getUrlsByMediaType($selector, "img", "src");
+ $media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
if ($this->getDebug()) {
throw new \Exception(sprintf(
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
@@ -68,32 +68,36 @@ class YamapCrawler extends MangboardCrawler
public function execute(int $max_limit): void
{
try {
- $listInfos = [];
if ($this->getDebug()) {
- $this->detail_page(1, ['detail_url' => getenv("yamap.view.test.url")]);
- }
- $response = $this->getMySocket()->getContent(getenv("yamap.list.url"));
- $selector = $this->getSelector($response, getenv("yamap.list.tag"));
- //div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
- $selector->filter(getenv("yamap.list.item.tag"))->each(
- function (Crawler $node) use (&$listInfos): void {
- $hit = $node->filter(getenv("yamap.list.item.hit.tag"))->text();
- $date = $node->filter(getenv("yamap.list.item.date.tag"))->text();
- $nickname = $node->filter(getenv("yamap.list.item.nickname.tag"))->text();
- //bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool
- if ($nickname != getenv("yamap.list.item.nickname.except")) {
- //작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
- $link_node = $node->filter(getenv("yamap.list.item.link.tag"));
- $detail_url = $link_node->attr("href");
- $title = $link_node->children()->last()->text();
- $listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
+ $url = getenv("yamap.view.test.url.{$this->_board_name}");
+ log_message("notice", __FUNCTION__ . "DEBUG 게시물 {$url} 작업시작");
+ $this->detail_page(1, ['detail_url' => $url]);
+ log_message("notice", __FUNCTION__ . "DEBUG 게시물 {$url} 작업종료");
+ } else {
+ $listInfos = [];
+ $response = $this->getMySocket()->getContent(getenv("yamap.list.url.{$this->_board_name}"));
+ $selector = $this->getSelector($response, getenv("yamap.list.tag"));
+ //div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
+ $selector->filter(getenv("yamap.list.item.tag"))->each(
+ function (Crawler $node) use (&$listInfos): void {
+ $hit = $node->filter(getenv("yamap.list.item.hit.tag"))->text();
+ $date = $node->filter(getenv("yamap.list.item.date.tag"))->text();
+ $nickname = $node->filter(getenv("yamap.list.item.nickname.tag"))->text();
+ //bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool
+ if ($nickname != getenv("yamap.list.item.nickname.except")) {
+ //작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
+ $link_node = $node->filter(getenv("yamap.list.item.link.tag"));
+ $detail_url = $link_node->attr("href");
+ $title = $link_node->children()->last()->text();
+ $listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
+ }
}
+ );
+ if (!count($listInfos)) {
+ throw new \Exception("Target URL이 없습니다.");
}
- );
- if (!count($listInfos)) {
- throw new \Exception("Target URL이 없습니다.");
+ $this->main_process($max_limit, $listInfos);
}
- $this->main_process($max_limit, $listInfos);
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
} catch (\Exception $e) {
log_message("warning", sprintf(
diff --git a/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php b/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php
index 41a760d..2e58a0c 100644
--- a/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php
+++ b/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php
@@ -61,8 +61,8 @@ class YamoonCrawler extends MangboardCrawler
//작성내용
$tag = getenv("yamoon.view.content.tag");
$selector = $this->getSelector($response, $tag);
- $media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
- $media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls);
+ $media_urls = $this->getUrlsByMediaType($selector, "img", "src");
+ $media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
if ($this->getDebug()) {
throw new \Exception(sprintf(
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
@@ -90,25 +90,29 @@ class YamoonCrawler extends MangboardCrawler
public function execute(int $max_limit): void
{
try {
- $listInfos = [];
if ($this->getDebug()) {
- $this->detail_page(1, ['detail_url' => getenv("yamoon.view.test.url")]);
- }
- $response = $this->getMySocket()->getContent(getenv("yamoon.list.url"));
- $this->getSelector($response, getenv("yamoon.list.tag"))->each(
- function (Crawler $node) use (&$listInfos): void {
- $link_node = $node->filter(getenv("yamoon.list.item.link.tag"));
- $detail_url = $link_node->attr("href");
- $title = $link_node->text();
- $info_node = $node->filter(getenv("yamoon.list.item.info.tag"));
- $infos = explode("|", $info_node->text());
- $listInfos[] = ['title' => $title, 'detail_url' => $detail_url, 'nickname' => trim($infos[0]), 'hit' => trim($infos[2]), 'date' => trim($infos[4])];
+ $url = getenv("yamoon.view.test.url.{$this->_board_name}");
+ log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업시작");
+ $this->detail_page(1, ['detail_url' => $url]);
+ log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업종료");
+ } else {
+ $listInfos = [];
+ $response = $this->getMySocket()->getContent(getenv("yamoon.list.url.{$this->_board_name}"));
+ $this->getSelector($response, getenv("yamoon.list.tag"))->each(
+ function (Crawler $node) use (&$listInfos): void {
+ $link_node = $node->filter(getenv("yamoon.list.item.link.tag"));
+ $detail_url = $link_node->attr("href");
+ $title = $link_node->text();
+ $info_node = $node->filter(getenv("yamoon.list.item.info.tag"));
+ $infos = explode("|", $info_node->text());
+ $listInfos[] = ['title' => $title, 'detail_url' => $detail_url, 'nickname' => trim($infos[0]), 'hit' => trim($infos[2]), 'date' => trim($infos[4])];
+ }
+ );
+ if (!count($listInfos)) {
+ throw new \Exception("Target URL이 없습니다.");
}
- );
- if (!count($listInfos)) {
- throw new \Exception("Target URL이 없습니다.");
+ $this->main_process($max_limit, $listInfos);
}
- $this->main_process($max_limit, $listInfos);
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
} catch (\Exception $e) {
log_message("warning", sprintf(
diff --git a/app/Libraries/MyCrawler/MyCrawler.php b/app/Libraries/MyCrawler/MyCrawler.php
index f411d08..7930da4 100644
--- a/app/Libraries/MyCrawler/MyCrawler.php
+++ b/app/Libraries/MyCrawler/MyCrawler.php
@@ -33,28 +33,45 @@ abstract class MyCrawler extends CommonLibrary
return $crawler->filter($tag);
}
- protected function changeURLByMediaType(string $url): string
+ protected function changeURLByCrawler(string $url): string
{
return preg_match('/^[^?]+/', $url, $matches) ? $matches[0] : null;
}
- protected function getUrlsByMediaType(string $media_type, Crawler $selector, array $options, array $urls = []): array
+ protected function getUrlByMediaType(Crawler $node, string $media_type, string $attr): null|string
{
- $urls[$media_type] = [];
- $selector->filter($options["tag"])->each(
- function (Crawler $node) use (&$media_type, &$options, &$urls): void {
- $url = $node->attr($options["attr"]);
- switch ($media_type) {
- case 'video':
- if ($url === null) {
- $url = $node->children()->attr("src");
- }
- break;
+ switch ($media_type) {
+ case 'video':
+ try {
+ $url = $node->attr($attr); // 또는
+ } catch (\Exception) {
+ $url = $node->children()->attr("src"); //