diff --git a/app/Controllers/Mangboard/Crawler/InvenCrawler.php b/app/Controllers/Mangboard/Crawler/InvenCrawler.php
index 8f99c19..a362bc2 100644
--- a/app/Controllers/Mangboard/Crawler/InvenCrawler.php
+++ b/app/Controllers/Mangboard/Crawler/InvenCrawler.php
@@ -24,9 +24,11 @@ class InvenCrawler extends CrawlerController
case 'video':
$url = parent::getUrlByMediaType($node, $media_type, $attr);
//그래도 null이면 data-src로 추출해본다.
- $attributes = $node->extract(['data-src']);
- if (count($attributes)) {
- $url = $attributes[0];
+ if ($url === null) {
+ $attributes = $node->extract(['data-src']);
+ if (count($attributes)) {
+ $url = $attributes[0];
+ }
}
break;
case 'img':
@@ -53,49 +55,10 @@ class InvenCrawler extends CrawlerController
//
//
//
- protected function detail_content_process(int $cnt, array $listInfo): array
+ protected function getDetailSelector(array $listInfo): array
{
- $response = $this->getMySocket()->getContent($listInfo['detail_url']);
- $selector = $this->getSelector($response, getenv("inven.view.content.tag"));
- $formDatas = [];
- $formDatas['image_path'] = "";
- $formDatas['content'] = $selector->html();
- //File DB 및 Board DB 등록작업등
- $this->getBoardModel()->createByCrawler(
- $this->getBoardsEntity(),
- $this->getUserEntity(),
- $cnt,
- $listInfo,
- [],
- $formDatas
- );
- log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
- return $listInfo;
- }
- protected function detail_download_process(int $cnt, array $listInfo): array
- {
- $response = $this->getMySocket()->getContent($listInfo['detail_url']);
- $selector = $this->getSelector($response, getenv("inven.view.content.tag"));
-
- $media_urls = $this->getUrlsByMediaType($selector, "img", "src");
- $media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
- if ($this->isDebug) {
- throw new \Exception(sprintf(
- "\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
- __FUNCTION__,
- var_export($listInfo, true),
- var_export($media_urls, true)
- ));
- } else {
- // Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리
- $storages = $this->media_process($media_urls);
- if (!count($storages)) {
- throw new \Exception("등록할 자료가 없습니다.");
- }
- $this->backend_process($cnt, $listInfo, $storages);
- }
- log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
- return $listInfo;
+ $response = $this->getMySocket()->getContent($listInfo['detail_url']);
+ return array($this->getSelector($response, getenv("inven.view.content.tag")), $listInfo);
}
//리스트내용
//
@@ -151,7 +114,6 @@ class InvenCrawler extends CrawlerController
$hit = $node->filter(getenv("inven.list.item.hit.tag"))->text();
$date = date("Y") . "-" . $node->filter(getenv("inven.list.item.date.tag"))->text();
$nickname = $node->filter(getenv("inven.list.item.nickname.tag"))->text();
- //작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
$link_node = $node->filter(getenv("inven.list.item.link.tag"));
$detail_url = $link_node->attr("href");
$title = $link_node->text();
diff --git a/app/Libraries/MyCrawler/Mangboard/SirCrawler.php b/app/Controllers/Mangboard/Crawler/SirCrawler.php
similarity index 57%
rename from app/Libraries/MyCrawler/Mangboard/SirCrawler.php
rename to app/Controllers/Mangboard/Crawler/SirCrawler.php
index ff0b740..38d8035 100644
--- a/app/Libraries/MyCrawler/Mangboard/SirCrawler.php
+++ b/app/Controllers/Mangboard/Crawler/SirCrawler.php
@@ -1,17 +1,23 @@
//
//
- // ..
getMySocket()->getContent($listInfo['detail_url']);
//작성시간
@@ -78,70 +84,25 @@ class SirCrawler extends MangboardCrawler
$listInfo['date'] = trim($selector->text());
$listInfo['date'] = DateTime::createFromFormat('Y.m.d H:i:s', $listInfo['date']);
$listInfo['date'] = $listInfo['date']->format('Y-m-d H:i:s');
- // if ($this->isDebug) {
- // throw new \Exception(
- // sprintf(
- // "\n--------------%s Debug--------------\n%s\n%s\n---------------------------------------\n",
- // __FUNCTION__,
- // var_export($listInfo, true),
- // $selector->html()
- // )
- // );
- // }
- //작성내용
- $selector = $this->getSelector($response, getenv("sir.view.content.tag"));
- $media_urls = $this->getUrlsByMediaType($selector, "img", "src");
- $media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
- if ($this->isDebug) {
- throw new \Exception(sprintf(
- "\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
- __FUNCTION__,
- var_export($listInfo, true),
- var_export($media_urls, true)
- ));
- } else {
- $storages = $this->media_process($media_urls);
- if (!count($storages)) {
- throw new \Exception("등록할 자료가 없습니다.");
- }
- $this->backend_process($cnt, $listInfo, $storages);
- }
- log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
- return $listInfo;
+ return array($this->getSelector($response, tag: getenv("sir.view.content.tag")), $listInfo);
}
- //리스트내용
- //
- //
- //
할아버지의 마술 3
- //
21967
- //
- //
- public function execute(): void
+ //리스트 내용
+ //
+ //
+ // 졸고 있는 여군
+ // 6
+ // yeeyuu | 6 | 369 | No 89372 | 2024-09-13
+ // |
+ public function execute(string $board_name, string $user_id = null, ...$params): void
{
try {
+ //추가옵션
+ $this->isDebug = in_array('debug', $params);
+ $this->isCopy = in_array('copy', $params);
+ $this->setBoardName($board_name);
+ $this->login_process($user_id);
+ //실행
+ $listInfos = [];
if ($this->isDebug) {
$listInfo = [];
$listInfo['title'] = 'test_title';
@@ -149,12 +110,10 @@ class SirCrawler extends MangboardCrawler
$listInfo['hit'] = 1;
$listInfo['date'] = date("Y-m-d H:i:s");
$listInfo['detail_url'] = getenv("sir.view.test.url.{$this->getBoardName()}");
- $this->detail_process(1, $listInfo);
- log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$listInfo['detail_url']} 작업종료");
+ $listInfos[] = $listInfo;
} else {
- $listInfos = [];
$response = $this->getMySocket()->getContent(getenv("sir.list.url.{$this->getBoardName()}"));
- $this->getSelector($response, getenv("sir.list.tag"))->each(
+ $this->getSelector($response, getenv("sir.list.tag.{$this->getBoardName()}"))->each(
function (Crawler $node) use (&$listInfos): void {
$link_node = $node->filter(getenv("sir.list.item.link.tag"));
// href url의 맨 앞이 /가 두개라서 한개를 빼기위함
@@ -167,11 +126,11 @@ class SirCrawler extends MangboardCrawler
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => "", 'hit' => $hit];
}
);
- if (!count($listInfos)) {
- throw new \Exception("Target URL이 없습니다.");
- }
- $this->list_process(intval(getenv("sir.list.max_limit")), $listInfos);
}
+ if (!count($listInfos)) {
+ throw new \Exception("Target URL이 없습니다.");
+ }
+ $this->list_process(intval(getenv("sir.list.max_limit.{$this->getBoardName()}")), $listInfos);
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
} catch (\Exception $e) {
log_message("warning", sprintf(
diff --git a/app/Controllers/Mangboard/Crawler/YamapCrawler.php b/app/Controllers/Mangboard/Crawler/YamapCrawler.php
index d5272e7..740daf7 100644
--- a/app/Controllers/Mangboard/Crawler/YamapCrawler.php
+++ b/app/Controllers/Mangboard/Crawler/YamapCrawler.php
@@ -18,48 +18,10 @@ class YamapCrawler extends CrawlerController
{
return getenv("yamap.host.url");
}
- protected function detail_content_process(int $cnt, array $listInfo): array
+ protected function getDetailSelector(array $listInfo): array
{
- $response = $this->getMySocket()->getContent($listInfo['detail_url']);
- $selector = $this->getSelector($response, getenv("yamap.view.content.tag"));
- $formDatas = [];
- $formDatas['image_path'] = "";
- $formDatas['content'] = $selector->html();
- //File DB 및 Board DB 등록작업등
- $this->getBoardModel()->createByCrawler(
- $this->getBoardsEntity(),
- $this->getUserEntity(),
- $cnt,
- $listInfo,
- [],
- $formDatas
- );
- log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
- return $listInfo;
- }
- protected function detail_download_process(int $cnt, array $listInfo): array
- {
- $response = $this->getMySocket()->getContent($listInfo['detail_url']);
- $selector = $this->getSelector($response, getenv("yamap.view.content.tag"));
- $media_urls = $this->getUrlsByMediaType($selector, "img", "src");
- $media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
- if ($this->isDebug) {
- throw new \Exception(sprintf(
- "\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
- __FUNCTION__,
- var_export($listInfo, true),
- var_export($media_urls, true)
- ));
- } else {
- // Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리
- $storages = $this->media_process($media_urls);
- if (!count($storages)) {
- throw new \Exception("등록할 자료가 없습니다.");
- }
- $this->backend_process($cnt, $listInfo, $storages);
- }
- log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
- return $listInfo;
+ $response = $this->getMySocket()->getContent($listInfo['detail_url']);
+ return array($this->getSelector($response, getenv("yamap.view.content.tag")), $listInfo);
}
//리스트내용
//
@@ -91,7 +53,8 @@ class YamapCrawler extends CrawlerController
//
//
//
- public function execute(string $board_name, string $user_id = null, ...$params): void
+
+ final public function execute(string $board_name, string $user_id = null, ...$params): void
{
try {
//추가옵션
@@ -99,8 +62,6 @@ class YamapCrawler extends CrawlerController
$this->isCopy = in_array('copy', $params);
$this->setBoardName($board_name);
$this->login_process($user_id);
- //실행
- $listInfos = [];
if ($this->isDebug) {
$listInfo = [];
$listInfo['title'] = 'test_title';
@@ -111,17 +72,20 @@ class YamapCrawler extends CrawlerController
$listInfos[] = $listInfo;
} else {
$response = $this->getMySocket()->getContent(getenv("yamap.list.url.{$this->getBoardName()}"));
- $selector = $this->getSelector($response, getenv("inven.list.tag.{$this->getBoardName()}"));
- $selector->filter(getenv("yamap.list.item.tag"))->each(
+ $selector = $this->getSelector($response, getenv("yamap.list.tag.{$this->getBoardName()}"));
+ $selector->filter(getenv("yamap.list.item.tag.{$this->getBoardName()}"))->each(
function (Crawler $node) use (&$listInfos): void {
$hit = $node->filter(getenv("yamap.list.item.hit.tag"))->text();
- $date = date("Y") . "-" . $node->filter(getenv("yamap.list.item.date.tag"))->text();
+ $date = $node->filter(getenv("yamap.list.item.date.tag"))->text();
$nickname = $node->filter(getenv("yamap.list.item.nickname.tag"))->text();
- //작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
- $link_node = $node->filter(getenv("yamap.list.item.link.tag"));
- $detail_url = $link_node->attr("href");
- $title = $link_node->text();
- $listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
+ //bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool
+ if ($nickname != getenv("yamap.list.item.nickname.except")) {
+ //작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
+ $link_node = $node->filter(getenv("yamap.list.item.link.tag"));
+ $detail_url = $link_node->attr("href");
+ $title = $link_node->children()->last()->text();
+ $listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
+ }
}
);
}
diff --git a/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php b/app/Controllers/Mangboard/Crawler/YamoonCrawler.php
similarity index 67%
rename from app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php
rename to app/Controllers/Mangboard/Crawler/YamoonCrawler.php
index 072cac7..be8ea82 100644
--- a/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php
+++ b/app/Controllers/Mangboard/Crawler/YamoonCrawler.php
@@ -1,16 +1,22 @@
@@ -43,43 +49,10 @@ class YamoonCrawler extends MangboardCrawler
//
//
//
- protected function detail_process(int $cnt, array $listInfo): array
+ protected function getDetailSelector(array $listInfo): array
{
$response = $this->getMySocket()->getContent("/newboard/yamoonboard/" . $listInfo['detail_url']);
- //작성시간
- // $selector = $this->getSelector($response, getenv("yamoon.view.date.tag"));
- // $listInfo['date'] = trim($selector->text());
- // if ($this->isDebug) {
- // throw new \Exception(
- // sprintf(
- // "\n--------------%s Debug--------------\n%s\n%s\n---------------------------------------\n",
- // __FUNCTION__,
- // var_export($listInfo, true),
- // $selector->html()
- // )
- // );
- // }
- //작성내용
- $selector = $this->getSelector($response, getenv("yamoon.view.content.tag"));
- $media_urls = $this->getUrlsByMediaType($selector, "img", "src");
- $media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
- if ($this->isDebug) {
- throw new \Exception(sprintf(
- "\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
- __FUNCTION__,
- var_export($listInfo, true),
- var_export($media_urls, true)
- ));
- } else {
- // Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리
- $storages = $this->media_process($media_urls);
- if (!count($storages)) {
- throw new \Exception("등록할 자료가 없습니다.");
- }
- $this->backend_process($cnt, $listInfo, $storages);
- }
- log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
- return $listInfo;
+ return array($this->getSelector($response, getenv("yamoon.view.content.tag")), $listInfo);
}
//리스트 내용
//
@@ -87,10 +60,17 @@ class YamoonCrawler extends MangboardCrawler
// 졸고 있는 여군
// 6
// yeeyuu | 6 | 369 | No 89372 | 2024-09-13
- // |
- public function execute(): void
+ //
+ public function execute(string $board_name, string $user_id = null, ...$params): void
{
try {
+ //추가옵션
+ $this->isDebug = in_array('debug', $params);
+ $this->isCopy = in_array('copy', $params);
+ $this->setBoardName($board_name);
+ $this->login_process($user_id);
+ //실행
+ $listInfos = [];
if ($this->isDebug) {
$listInfo = [];
$listInfo['title'] = 'test_title';
@@ -98,12 +78,10 @@ class YamoonCrawler extends MangboardCrawler
$listInfo['hit'] = 1;
$listInfo['date'] = date("Y-m-d H:i:s");
$listInfo['detail_url'] = getenv("yamoon.view.test.url.{$this->getBoardName()}");
- $this->detail_process(1, $listInfo);
- log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$listInfo['detail_url']} 작업종료");
+ $listInfos[] = $listInfo;
} else {
- $listInfos = [];
$response = $this->getMySocket()->getContent(getenv("yamoon.list.url.{$this->getBoardName()}"));
- $this->getSelector($response, getenv("yamoon.list.tag"))->each(
+ $this->getSelector($response, getenv("yamoon.list.tag.{$this->getBoardName()}"))->each(
function (Crawler $node) use (&$listInfos): void {
$link_node = $node->filter(getenv("yamoon.list.item.link.tag"));
$detail_url = $link_node->attr("href");
@@ -113,12 +91,12 @@ class YamoonCrawler extends MangboardCrawler
$listInfos[] = ['title' => $title, 'detail_url' => $detail_url, 'nickname' => trim($infos[0]), 'hit' => trim($infos[2]), 'date' => trim($infos[4])];
}
);
- if (!count($listInfos)) {
- throw new \Exception("Target URL이 없습니다.");
- }
- $this->list_process(intval(getenv("yamoon.list.max_limit")), $listInfos);
}
- log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
+ if (!count($listInfos)) {
+ throw new \Exception("Target URL이 없습니다.");
+ }
+ $this->list_process(intval(getenv("yamoon.list.max_limit.{$this->getBoardName()}")), $listInfos);
+ log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
} catch (\Exception $e) {
log_message("warning", sprintf(
"\n---%s 오류---\n%s\n-----------------------------------------\n",
diff --git a/app/Controllers/Mangboard/CrawlerController.php b/app/Controllers/Mangboard/CrawlerController.php
index ffd7b13..5621cf4 100644
--- a/app/Controllers/Mangboard/CrawlerController.php
+++ b/app/Controllers/Mangboard/CrawlerController.php
@@ -29,9 +29,8 @@ abstract class CrawlerController extends CommonController
{
parent::initController($request, $response, $logger);
}
- abstract protected function detail_content_process(int $cnt, array $listInfo): array;
- abstract protected function detail_download_process(int $cnt, array $listInfo): array;
abstract protected function getHost(): string;
+ abstract protected function getDetailSelector(array $listInfo): array;
final protected function getBoardName(): string
{
return $this->_board_name;
@@ -78,14 +77,14 @@ abstract class CrawlerController extends CommonController
}
return $this->_board_model;
}
- public function getUserModel(): UserModel
+ final protected function getUserModel(): UserModel
{
if ($this->_user_model === null) {
return $this->_user_model = new UserModel();
}
return $this->_user_model;
}
- protected function login_process(string $user_id = null): void
+ final protected function login_process(string $user_id = null): void
{
$user_id = $user_id ?? getenv("mangboard.login.default.id");
$password = getenv("mangboard.login.default.password");
@@ -135,9 +134,8 @@ abstract class CrawlerController extends CommonController
{
switch ($media_tag) {
case 'video':
- try {
- $url = $node->attr($attr); // 또는
- } catch (\Exception) {
+ $url = $node->attr($attr); // 또는
+ if ($url === null) {
$url = $node->children()->attr("src"); //