diff --git a/app/Controllers/Mangboard/Crawler/InvenCrawler.php b/app/Controllers/Mangboard/Crawler/InvenCrawler.php index 8f99c19..a362bc2 100644 --- a/app/Controllers/Mangboard/Crawler/InvenCrawler.php +++ b/app/Controllers/Mangboard/Crawler/InvenCrawler.php @@ -24,9 +24,11 @@ class InvenCrawler extends CrawlerController case 'video': $url = parent::getUrlByMediaType($node, $media_type, $attr); //그래도 null이면 data-src로 추출해본다. - $attributes = $node->extract(['data-src']); - if (count($attributes)) { - $url = $attributes[0]; + if ($url === null) { + $attributes = $node->extract(['data-src']); + if (count($attributes)) { + $url = $attributes[0]; + } } break; case 'img': @@ -53,49 +55,10 @@ class InvenCrawler extends CrawlerController // // // - protected function detail_content_process(int $cnt, array $listInfo): array + protected function getDetailSelector(array $listInfo): array { - $response = $this->getMySocket()->getContent($listInfo['detail_url']); - $selector = $this->getSelector($response, getenv("inven.view.content.tag")); - $formDatas = []; - $formDatas['image_path'] = ""; - $formDatas['content'] = $selector->html(); - //File DB 및 Board DB 등록작업등 - $this->getBoardModel()->createByCrawler( - $this->getBoardsEntity(), - $this->getUserEntity(), - $cnt, - $listInfo, - [], - $formDatas - ); - log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); - return $listInfo; - } - protected function detail_download_process(int $cnt, array $listInfo): array - { - $response = $this->getMySocket()->getContent($listInfo['detail_url']); - $selector = $this->getSelector($response, getenv("inven.view.content.tag")); - - $media_urls = $this->getUrlsByMediaType($selector, "img", "src"); - $media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls); - if ($this->isDebug) { - throw new \Exception(sprintf( - "\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n", - __FUNCTION__, - var_export($listInfo, true), - var_export($media_urls, true) - )); - } else { - // Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리 - $storages = $this->media_process($media_urls); - if (!count($storages)) { - throw new \Exception("등록할 자료가 없습니다."); - } - $this->backend_process($cnt, $listInfo, $storages); - } - log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); - return $listInfo; + $response = $this->getMySocket()->getContent($listInfo['detail_url']); + return array($this->getSelector($response, getenv("inven.view.content.tag")), $listInfo); } //리스트내용 //
@@ -151,7 +114,6 @@ class InvenCrawler extends CrawlerController $hit = $node->filter(getenv("inven.list.item.hit.tag"))->text(); $date = date("Y") . "-" . $node->filter(getenv("inven.list.item.date.tag"))->text(); $nickname = $node->filter(getenv("inven.list.item.nickname.tag"))->text(); - //작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서 $link_node = $node->filter(getenv("inven.list.item.link.tag")); $detail_url = $link_node->attr("href"); $title = $link_node->text(); diff --git a/app/Libraries/MyCrawler/Mangboard/SirCrawler.php b/app/Controllers/Mangboard/Crawler/SirCrawler.php similarity index 57% rename from app/Libraries/MyCrawler/Mangboard/SirCrawler.php rename to app/Controllers/Mangboard/Crawler/SirCrawler.php index ff0b740..38d8035 100644 --- a/app/Libraries/MyCrawler/Mangboard/SirCrawler.php +++ b/app/Controllers/Mangboard/Crawler/SirCrawler.php @@ -1,17 +1,23 @@ // //
- //

..

getMySocket()->getContent($listInfo['detail_url']); //작성시간 @@ -78,70 +84,25 @@ class SirCrawler extends MangboardCrawler $listInfo['date'] = trim($selector->text()); $listInfo['date'] = DateTime::createFromFormat('Y.m.d H:i:s', $listInfo['date']); $listInfo['date'] = $listInfo['date']->format('Y-m-d H:i:s'); - // if ($this->isDebug) { - // throw new \Exception( - // sprintf( - // "\n--------------%s Debug--------------\n%s\n%s\n---------------------------------------\n", - // __FUNCTION__, - // var_export($listInfo, true), - // $selector->html() - // ) - // ); - // } - //작성내용 - $selector = $this->getSelector($response, getenv("sir.view.content.tag")); - $media_urls = $this->getUrlsByMediaType($selector, "img", "src"); - $media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls); - if ($this->isDebug) { - throw new \Exception(sprintf( - "\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n", - __FUNCTION__, - var_export($listInfo, true), - var_export($media_urls, true) - )); - } else { - $storages = $this->media_process($media_urls); - if (!count($storages)) { - throw new \Exception("등록할 자료가 없습니다."); - } - $this->backend_process($cnt, $listInfo, $storages); - } - log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); - return $listInfo; + return array($this->getSelector($response, tag: getenv("sir.view.content.tag")), $listInfo); } - //리스트내용 - //
  • - //
    - // 할아버지의 마술 3 - //
    21967
    - //
    - // - // - // 감독님 - // - // 자기소개 - // 아이디로 검색 - // 회원게시물 - // - // - // - // - // 24.09.13 - // - // - // - // 244 - //
    - //
  • - public function execute(): void + //리스트 내용 + // + // + // 졸고 있는 여군 + // 6 + // yeeyuu | 6 | 369 | No 89372 | 2024-09-13 + // + public function execute(string $board_name, string $user_id = null, ...$params): void { try { + //추가옵션 + $this->isDebug = in_array('debug', $params); + $this->isCopy = in_array('copy', $params); + $this->setBoardName($board_name); + $this->login_process($user_id); + //실행 + $listInfos = []; if ($this->isDebug) { $listInfo = []; $listInfo['title'] = 'test_title'; @@ -149,12 +110,10 @@ class SirCrawler extends MangboardCrawler $listInfo['hit'] = 1; $listInfo['date'] = date("Y-m-d H:i:s"); $listInfo['detail_url'] = getenv("sir.view.test.url.{$this->getBoardName()}"); - $this->detail_process(1, $listInfo); - log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$listInfo['detail_url']} 작업종료"); + $listInfos[] = $listInfo; } else { - $listInfos = []; $response = $this->getMySocket()->getContent(getenv("sir.list.url.{$this->getBoardName()}")); - $this->getSelector($response, getenv("sir.list.tag"))->each( + $this->getSelector($response, getenv("sir.list.tag.{$this->getBoardName()}"))->each( function (Crawler $node) use (&$listInfos): void { $link_node = $node->filter(getenv("sir.list.item.link.tag")); // href url의 맨 앞이 /가 두개라서 한개를 빼기위함 @@ -167,11 +126,11 @@ class SirCrawler extends MangboardCrawler $listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => "", 'hit' => $hit]; } ); - if (!count($listInfos)) { - throw new \Exception("Target URL이 없습니다."); - } - $this->list_process(intval(getenv("sir.list.max_limit")), $listInfos); } + if (!count($listInfos)) { + throw new \Exception("Target URL이 없습니다."); + } + $this->list_process(intval(getenv("sir.list.max_limit.{$this->getBoardName()}")), $listInfos); log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); } catch (\Exception $e) { log_message("warning", sprintf( diff --git a/app/Controllers/Mangboard/Crawler/YamapCrawler.php b/app/Controllers/Mangboard/Crawler/YamapCrawler.php index d5272e7..740daf7 100644 --- a/app/Controllers/Mangboard/Crawler/YamapCrawler.php +++ b/app/Controllers/Mangboard/Crawler/YamapCrawler.php @@ -18,48 +18,10 @@ class YamapCrawler extends CrawlerController { return getenv("yamap.host.url"); } - protected function detail_content_process(int $cnt, array $listInfo): array + protected function getDetailSelector(array $listInfo): array { - $response = $this->getMySocket()->getContent($listInfo['detail_url']); - $selector = $this->getSelector($response, getenv("yamap.view.content.tag")); - $formDatas = []; - $formDatas['image_path'] = ""; - $formDatas['content'] = $selector->html(); - //File DB 및 Board DB 등록작업등 - $this->getBoardModel()->createByCrawler( - $this->getBoardsEntity(), - $this->getUserEntity(), - $cnt, - $listInfo, - [], - $formDatas - ); - log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); - return $listInfo; - } - protected function detail_download_process(int $cnt, array $listInfo): array - { - $response = $this->getMySocket()->getContent($listInfo['detail_url']); - $selector = $this->getSelector($response, getenv("yamap.view.content.tag")); - $media_urls = $this->getUrlsByMediaType($selector, "img", "src"); - $media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls); - if ($this->isDebug) { - throw new \Exception(sprintf( - "\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n", - __FUNCTION__, - var_export($listInfo, true), - var_export($media_urls, true) - )); - } else { - // Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리 - $storages = $this->media_process($media_urls); - if (!count($storages)) { - throw new \Exception("등록할 자료가 없습니다."); - } - $this->backend_process($cnt, $listInfo, $storages); - } - log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); - return $listInfo; + $response = $this->getMySocket()->getContent($listInfo['detail_url']); + return array($this->getSelector($response, getenv("yamap.view.content.tag")), $listInfo); } //리스트내용 //
    @@ -91,7 +53,8 @@ class YamapCrawler extends CrawlerController //
    //
    //
    - public function execute(string $board_name, string $user_id = null, ...$params): void + + final public function execute(string $board_name, string $user_id = null, ...$params): void { try { //추가옵션 @@ -99,8 +62,6 @@ class YamapCrawler extends CrawlerController $this->isCopy = in_array('copy', $params); $this->setBoardName($board_name); $this->login_process($user_id); - //실행 - $listInfos = []; if ($this->isDebug) { $listInfo = []; $listInfo['title'] = 'test_title'; @@ -111,17 +72,20 @@ class YamapCrawler extends CrawlerController $listInfos[] = $listInfo; } else { $response = $this->getMySocket()->getContent(getenv("yamap.list.url.{$this->getBoardName()}")); - $selector = $this->getSelector($response, getenv("inven.list.tag.{$this->getBoardName()}")); - $selector->filter(getenv("yamap.list.item.tag"))->each( + $selector = $this->getSelector($response, getenv("yamap.list.tag.{$this->getBoardName()}")); + $selector->filter(getenv("yamap.list.item.tag.{$this->getBoardName()}"))->each( function (Crawler $node) use (&$listInfos): void { $hit = $node->filter(getenv("yamap.list.item.hit.tag"))->text(); - $date = date("Y") . "-" . $node->filter(getenv("yamap.list.item.date.tag"))->text(); + $date = $node->filter(getenv("yamap.list.item.date.tag"))->text(); $nickname = $node->filter(getenv("yamap.list.item.nickname.tag"))->text(); - //작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서 - $link_node = $node->filter(getenv("yamap.list.item.link.tag")); - $detail_url = $link_node->attr("href"); - $title = $link_node->text(); - $listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit]; + //bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool + if ($nickname != getenv("yamap.list.item.nickname.except")) { + //작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서 + $link_node = $node->filter(getenv("yamap.list.item.link.tag")); + $detail_url = $link_node->attr("href"); + $title = $link_node->children()->last()->text(); + $listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit]; + } } ); } diff --git a/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php b/app/Controllers/Mangboard/Crawler/YamoonCrawler.php similarity index 67% rename from app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php rename to app/Controllers/Mangboard/Crawler/YamoonCrawler.php index 072cac7..be8ea82 100644 --- a/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php +++ b/app/Controllers/Mangboard/Crawler/YamoonCrawler.php @@ -1,16 +1,22 @@ @@ -43,43 +49,10 @@ class YamoonCrawler extends MangboardCrawler // //
    // - protected function detail_process(int $cnt, array $listInfo): array + protected function getDetailSelector(array $listInfo): array { $response = $this->getMySocket()->getContent("/newboard/yamoonboard/" . $listInfo['detail_url']); - //작성시간 - // $selector = $this->getSelector($response, getenv("yamoon.view.date.tag")); - // $listInfo['date'] = trim($selector->text()); - // if ($this->isDebug) { - // throw new \Exception( - // sprintf( - // "\n--------------%s Debug--------------\n%s\n%s\n---------------------------------------\n", - // __FUNCTION__, - // var_export($listInfo, true), - // $selector->html() - // ) - // ); - // } - //작성내용 - $selector = $this->getSelector($response, getenv("yamoon.view.content.tag")); - $media_urls = $this->getUrlsByMediaType($selector, "img", "src"); - $media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls); - if ($this->isDebug) { - throw new \Exception(sprintf( - "\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n", - __FUNCTION__, - var_export($listInfo, true), - var_export($media_urls, true) - )); - } else { - // Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리 - $storages = $this->media_process($media_urls); - if (!count($storages)) { - throw new \Exception("등록할 자료가 없습니다."); - } - $this->backend_process($cnt, $listInfo, $storages); - } - log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); - return $listInfo; + return array($this->getSelector($response, getenv("yamoon.view.content.tag")), $listInfo); } //리스트 내용 // @@ -87,10 +60,17 @@ class YamoonCrawler extends MangboardCrawler // 졸고 있는 여군 // 6 // yeeyuu | 6 | 369 | No 89372 | 2024-09-13 - // - public function execute(): void + // + public function execute(string $board_name, string $user_id = null, ...$params): void { try { + //추가옵션 + $this->isDebug = in_array('debug', $params); + $this->isCopy = in_array('copy', $params); + $this->setBoardName($board_name); + $this->login_process($user_id); + //실행 + $listInfos = []; if ($this->isDebug) { $listInfo = []; $listInfo['title'] = 'test_title'; @@ -98,12 +78,10 @@ class YamoonCrawler extends MangboardCrawler $listInfo['hit'] = 1; $listInfo['date'] = date("Y-m-d H:i:s"); $listInfo['detail_url'] = getenv("yamoon.view.test.url.{$this->getBoardName()}"); - $this->detail_process(1, $listInfo); - log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$listInfo['detail_url']} 작업종료"); + $listInfos[] = $listInfo; } else { - $listInfos = []; $response = $this->getMySocket()->getContent(getenv("yamoon.list.url.{$this->getBoardName()}")); - $this->getSelector($response, getenv("yamoon.list.tag"))->each( + $this->getSelector($response, getenv("yamoon.list.tag.{$this->getBoardName()}"))->each( function (Crawler $node) use (&$listInfos): void { $link_node = $node->filter(getenv("yamoon.list.item.link.tag")); $detail_url = $link_node->attr("href"); @@ -113,12 +91,12 @@ class YamoonCrawler extends MangboardCrawler $listInfos[] = ['title' => $title, 'detail_url' => $detail_url, 'nickname' => trim($infos[0]), 'hit' => trim($infos[2]), 'date' => trim($infos[4])]; } ); - if (!count($listInfos)) { - throw new \Exception("Target URL이 없습니다."); - } - $this->list_process(intval(getenv("yamoon.list.max_limit")), $listInfos); } - log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); + if (!count($listInfos)) { + throw new \Exception("Target URL이 없습니다."); + } + $this->list_process(intval(getenv("yamoon.list.max_limit.{$this->getBoardName()}")), $listInfos); + log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); } catch (\Exception $e) { log_message("warning", sprintf( "\n---%s 오류---\n%s\n-----------------------------------------\n", diff --git a/app/Controllers/Mangboard/CrawlerController.php b/app/Controllers/Mangboard/CrawlerController.php index ffd7b13..5621cf4 100644 --- a/app/Controllers/Mangboard/CrawlerController.php +++ b/app/Controllers/Mangboard/CrawlerController.php @@ -29,9 +29,8 @@ abstract class CrawlerController extends CommonController { parent::initController($request, $response, $logger); } - abstract protected function detail_content_process(int $cnt, array $listInfo): array; - abstract protected function detail_download_process(int $cnt, array $listInfo): array; abstract protected function getHost(): string; + abstract protected function getDetailSelector(array $listInfo): array; final protected function getBoardName(): string { return $this->_board_name; @@ -78,14 +77,14 @@ abstract class CrawlerController extends CommonController } return $this->_board_model; } - public function getUserModel(): UserModel + final protected function getUserModel(): UserModel { if ($this->_user_model === null) { return $this->_user_model = new UserModel(); } return $this->_user_model; } - protected function login_process(string $user_id = null): void + final protected function login_process(string $user_id = null): void { $user_id = $user_id ?? getenv("mangboard.login.default.id"); $password = getenv("mangboard.login.default.password"); @@ -135,9 +134,8 @@ abstract class CrawlerController extends CommonController { switch ($media_tag) { case 'video': - try { - $url = $node->attr($attr); // 또는 - } catch (\Exception) { + $url = $node->attr($attr); // 또는 + if ($url === null) { $url = $node->children()->attr("src"); //