From befbaafae1770908744fbc144ac26a98cbb3a2c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=B5=9C=EC=A4=80=ED=9D=A0?= Date: Tue, 17 Sep 2024 16:57:32 +0900 Subject: [PATCH] Automation init...3 --- app/Config/Routes.php | 7 +- .../Mangboard/CrawlerController.php | 47 +++-- .../MyCrawler/Mangboard/MangboardCrawler.php | 65 +++++++ .../MyCrawler/Mangboard/SirCrawler.php | 177 ++++++++++++++++++ .../MyCrawler/Mangboard/YamapCrawler.php | 106 +++++++++++ .../MyCrawler/Mangboard/YamoonCrawler.php | 121 ++++++++++++ app/Libraries/MyCrawler/MyCrawler.php | 67 ++++--- app/Libraries/MyCrawler/YamapCrawler.php | 139 -------------- app/Libraries/MyCrawler/YamoonCrawler.php | 116 ------------ 9 files changed, 540 insertions(+), 305 deletions(-) create mode 100644 app/Libraries/MyCrawler/Mangboard/MangboardCrawler.php create mode 100644 app/Libraries/MyCrawler/Mangboard/SirCrawler.php create mode 100644 app/Libraries/MyCrawler/Mangboard/YamapCrawler.php create mode 100644 app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php delete mode 100644 app/Libraries/MyCrawler/YamapCrawler.php delete mode 100644 app/Libraries/MyCrawler/YamoonCrawler.php diff --git a/app/Config/Routes.php b/app/Config/Routes.php index 0829700..6e551ce 100644 --- a/app/Config/Routes.php +++ b/app/Config/Routes.php @@ -33,9 +33,12 @@ $routes->group('mangboard', ['namespace' => 'App\Controllers\Mangboard'], functi $routes->group('crawler', function ($routes) { $routes->cli('yamap/(:alpha)', 'CrawlerController::yamap/$1'); $routes->cli('yamap/(:alpha)/(:any)', 'CrawlerController::yamap/$1/$2'); - $routes->cli('yamap/(:alpha)/(:alphanum)/(:any)', 'CrawlerController::yamap/$1/$2/$3'); + $routes->cli('yamap/(:alpha)/(:any)/(:any)', 'CrawlerController::yamap/$1/$2/$3'); $routes->cli('yamoon/(:alpha)', 'CrawlerController::yamoon/$1'); $routes->cli('yamoon/(:alpha)/(:any)', 'CrawlerController::yamoon/$1/$2'); - $routes->cli('yamoon/(:alpha)/(:alphanum)/(:any)', 'CrawlerController::yamoon/$1/$2/$3'); + $routes->cli('yamoon/(:alpha)/(:any)/(:any)', 'CrawlerController::yamoon/$1/$2/$3'); + $routes->cli('sir/(:alpha)', 'CrawlerController::sir/$1'); + $routes->cli('sir/(:alpha)/(:any)', 'CrawlerController::sir/$1/$2'); + $routes->cli('sir/(:alpha)/(:any)/(:any)', 'CrawlerController::sir/$1/$2/$3'); }); }); diff --git a/app/Controllers/Mangboard/CrawlerController.php b/app/Controllers/Mangboard/CrawlerController.php index 0b01280..c55ad09 100644 --- a/app/Controllers/Mangboard/CrawlerController.php +++ b/app/Controllers/Mangboard/CrawlerController.php @@ -4,8 +4,9 @@ namespace App\Controllers\Mangboard; use App\Controllers\CommonController; use App\Entities\Mangboard\UserEntity; -use App\Libraries\MyCrawler\YamapCrawler; -use App\Libraries\MyCrawler\YamoonCrawler; +use App\Libraries\MyCrawler\Mangboard\YamapCrawler; +use App\Libraries\MyCrawler\Mangboard\YamoonCrawler; +use App\Libraries\MyCrawler\Mangboard\SirCrawler; use App\Models\Mangboard\UserModel; class CrawlerController extends CommonController @@ -18,8 +19,11 @@ class CrawlerController extends CommonController } return $this->_user_model; } - public function login(string $host, string $id, string $password): bool|UserEntity + public function login(string $id): bool|UserEntity { + $host = getenv("mangboard.host.url"); + $id = $id == "" ? getenv("mangboard.login.default.id") : $id; + $password = getenv("mangboard.login.default.password"); $user_entity = $this->getUserModel()->getEntityByID($id); // $response = $this->getWebLibrary($host)->getResponse( // $host . getenv("mangboard.login.url"), @@ -42,16 +46,16 @@ class CrawlerController extends CommonController log_message("notice", "{$id}로 로그인 성공"); return $user_entity; } - public function yamap(string $category, string $id = "", string $debug = "false"): string + public function yamap(string $category, string $id = "", string $option = ""): string { try { - $id = $id == "" ? getenv("mangboard.login.default.id") : $id; - $password = getenv("mangboard.login.default.password"); //1. 사이트 로그인 처리 - $user_entity = $this->login(getenv("mangboard.host.url"), $id, $password); + $user_entity = $this->login($id); //2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달. $crawler = new YamapCrawler(getenv('yamap.host.url'), $category, $user_entity); - $crawler->setDebug($debug === "true" ? true : false); + if ($option) { + $crawler->setDebug($option === "debug" ? true : false); + } $crawler->execute(intval(getenv("yamap.list.max_limit"))); return "완료되었습니다."; } catch (\Exception $e) { @@ -59,16 +63,16 @@ class CrawlerController extends CommonController return $e->getMessage(); } } - public function yamoon(string $category, string $id = "", string $debug = "false"): string + public function yamoon(string $category, string $id = "", string $option = ""): string { try { - $id = $id == "" ? getenv("mangboard.login.default.id") : $id; - $password = getenv("mangboard.login.default.password"); //1. 사이트 로그인 처리 - $user_entity = $this->login(getenv("mangboard.host.url"), $id, $password); + $user_entity = $this->login($id); //2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달. $crawler = new YamoonCrawler(getenv("yamoon.host.url"), $category, $user_entity); - $crawler->setDebug($debug === "true" ? true : false); + if ($option) { + $crawler->setDebug($option === "debug" ? true : false); + } $crawler->execute(intval(getenv("yamap.list.max_limit"))); return "완료되었습니다."; } catch (\Exception $e) { @@ -76,4 +80,21 @@ class CrawlerController extends CommonController return $e->getMessage(); } } + public function sir(string $category, string $id = "", string $option = ""): string + { + try { + //1. 사이트 로그인 처리 + $user_entity = $this->login($id); + //2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달. + $crawler = new SirCrawler(getenv("sir.host.url"), $category, $user_entity); + if ($option) { + $crawler->setDebug($option === "debug" ? true : false); + } + $crawler->execute(intval(getenv("sir.list.max_limit"))); + return "완료되었습니다."; + } catch (\Exception $e) { + log_message("error", $e->getMessage()); + return $e->getMessage(); + } + } } diff --git a/app/Libraries/MyCrawler/Mangboard/MangboardCrawler.php b/app/Libraries/MyCrawler/Mangboard/MangboardCrawler.php new file mode 100644 index 0000000..3f1fe9a --- /dev/null +++ b/app/Libraries/MyCrawler/Mangboard/MangboardCrawler.php @@ -0,0 +1,65 @@ +_host = $host; + $this->_category = $category; + $this->_user_entity = $user_entity; + } + protected function getMySocket() + { + if ($this->_mySocket === null) { + $this->_mySocket = new WebSocket($this->_host); + } + return $this->_mySocket; + } + final protected function createMyStorage() + { + return new MangboardStorage($this->_category, $this->_user_entity); + } + protected function backend_process(int $cnt, array $listInfo, array $storages) + { + //File DB 및 Board DB 등록작업등 + $baord_name = "board_" . $this->_category; + $boardsModel = new BoardsModel(); + $boards_entity = $boardsModel->getEntityByID("board_" . $this->_category); + $boardModel = new BoardModel("mb_" . $baord_name); + $board_entity = $boardModel->createByCrawler( + $boards_entity, + $this->_user_entity, + $cnt, + $listInfo, + $storages + ); + foreach ($storages as $storage) { + try { + $storage->backend($boards_entity, $board_entity, $boardModel->getTable()); + } catch (\Exception $e) { + log_message("notice", sprintf( + "\n---%s -> %s 게시물의 %s번째:%s 파일 등록 오류---\n%s\n--------------------------------\n", + __FUNCTION__, + $board_entity->getTitle(), + $storage->getOriginSequence(), + $storage->getOriginName(), + $e->getMessage() + )); + } + } + } +} diff --git a/app/Libraries/MyCrawler/Mangboard/SirCrawler.php b/app/Libraries/MyCrawler/Mangboard/SirCrawler.php new file mode 100644 index 0000000..92e846f --- /dev/null +++ b/app/Libraries/MyCrawler/Mangboard/SirCrawler.php @@ -0,0 +1,177 @@ + + //
+ //

할아버지의 마술 정보

+ // + // 할아버지의 마술 + // + //
+ //
+ // + // + // + //
+ //

본문

+ //
+ //
+ // + //
+ //

..

getMySocket()->getContent($listInfo['detail_url']); + //작성시간 + $selector = $this->getSelector($response, getenv("sir.view.date.tag")); + //Date Format이 맞지않아 변경해주기위함 : 2024.09.13 00:24:04 -> 2024-09-13 00:24:04 + $listInfo['date'] = trim($selector->text()); + $listInfo['date'] = DateTime::createFromFormat('Y.m.d H:i:s', $listInfo['date']); + $listInfo['date'] = $listInfo['date']->format('Y-m-d H:i:s'); + // if ($this->getDebug()) { + // throw new \Exception( + // sprintf( + // "\n--------------%s Debug--------------\n%s\n%s\n---------------------------------------\n", + // __FUNCTION__, + // var_export($listInfo, true), + // $selector->html() + // ) + // ); + // } + //작성내용 + $tag = getenv("sir.view.content.tag"); + $selector = $this->getSelector($response, $tag, true); + $media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]); + $media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls); + if ($this->getDebug()) { + throw new \Exception(sprintf( + "\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n", + __FUNCTION__, + var_export($listInfo, true), + var_export($media_urls, true) + )); + } else { + $storages = $this->media_process($media_urls); + if (!count($storages)) { + throw new \Exception("등록할 자료가 없습니다."); + } + $this->backend_process($cnt, $listInfo, $storages); + } + return $listInfo; + } + //리스트내용 + //
  • + //
    + // 할아버지의 마술 3 + //
    21967
    + //
    + // + // + // 감독님 + // + // 자기소개 + // 아이디로 검색 + // 회원게시물 + // + // + // + // + // 24.09.13 + // + // + // + // 244 + //
    + //
  • + public function execute(int $max_limit): void + { + try { + $listInfos = []; + if ($this->getDebug()) { + $this->detail_page(1, ['detail_url' => getenv("sir.view.test.url")]); + } + $response = $this->getMySocket()->getContent(getenv("sir.list.url")); + $this->getSelector($response, getenv("sir.list.tag"))->each( + function (Crawler $node) use (&$listInfos): void { + $link_node = $node->filter(getenv("sir.list.item.link.tag")); + // href url의 맨 앞이 /가 두개라서 한개를 빼기위함 + $detail_url = $this->getChangeURL($link_node->attr("href")); + // $detail_url = $link_node->attr("href"); + $title = $link_node->text(); + $nickname = $node->filter(getenv("sir.list.item.nickname.tag"))->text(); + $hit = $node->filter(getenv("sir.list.item.hit.tag"))->text(); + // $date = $node->filter(getenv("sir.list.item.date.tag"))->text(); + $listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => "", 'hit' => $hit]; + } + ); + // throw new \Exception("Target URL이 없습니다." . var_export($listInfos, true)); + if (!count($listInfos)) { + throw new \Exception("Target URL이 없습니다."); + } + $this->main_process($max_limit, $listInfos); + log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); + } catch (\Exception $e) { + log_message("warning", sprintf( + "\n---%s 오류---\n%s\n-----------------------------------------\n", + __FUNCTION__, + $e->getMessage() + )); + } + } +} diff --git a/app/Libraries/MyCrawler/Mangboard/YamapCrawler.php b/app/Libraries/MyCrawler/Mangboard/YamapCrawler.php new file mode 100644 index 0000000..e3d2764 --- /dev/null +++ b/app/Libraries/MyCrawler/Mangboard/YamapCrawler.php @@ -0,0 +1,106 @@ +getMySocket()->getContent($listInfo['detail_url']); + $tag = getenv("yamap.view.content.tag"); + $selector = $this->getSelector($response, $tag); + $media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]); + $media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls); + if ($this->getDebug()) { + throw new \Exception(sprintf( + "\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n", + __FUNCTION__, + var_export($listInfo, true), + var_export($media_urls, true) + )); + } else { + // Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리 + $storages = $this->media_process($media_urls); + if (!count($storages)) { + throw new \Exception("등록할 자료가 없습니다."); + } + $this->backend_process($cnt, $listInfo, $storages); + } + return $listInfo; + } + //리스트내용 + //
    + //
    요즘 패션
    + //
    + // + // 괴강고귀 + // + // + // | 추천 (14) | 조회 (432) + // + //
    + //
    + // + // + // + // 2024-09-14 01:53:45 + // + //
    + //
    + //
    + //

    + //

     

    + //
    + //
    + //
    + // + // + //
    + //
    + //
    + public function execute(int $max_limit): void + { + try { + $listInfos = []; + if ($this->getDebug()) { + $this->detail_page(1, ['detail_url' => getenv("yamap.view.test.url")]); + } + $response = $this->getMySocket()->getContent(getenv("yamap.list.url")); + $selector = $this->getSelector($response, getenv("yamap.list.tag")); + //div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김 + $selector->filter(getenv("yamap.list.item.tag"))->each( + function (Crawler $node) use (&$listInfos): void { + $hit = $node->filter(getenv("yamap.list.item.hit.tag"))->text(); + $date = $node->filter(getenv("yamap.list.item.date.tag"))->text(); + $nickname = $node->filter(getenv("yamap.list.item.nickname.tag"))->text(); + //bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool + if ($nickname != getenv("yamap.list.item.nickname.except")) { + //작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서 + $link_node = $node->filter(getenv("yamap.list.item.link.tag")); + $detail_url = $link_node->attr("href"); + $title = $link_node->children()->last()->text(); + $listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit]; + } + } + ); + if (!count($listInfos)) { + throw new \Exception("Target URL이 없습니다."); + } + $this->main_process($max_limit, $listInfos); + log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); + } catch (\Exception $e) { + log_message("warning", sprintf( + "\n---%s 오류---\n%s\n-----------------------------------------\n", + __FUNCTION__, + $e->getMessage() + )); + } + } +} diff --git a/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php b/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php new file mode 100644 index 0000000..7dc4bfe --- /dev/null +++ b/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php @@ -0,0 +1,121 @@ + + //
    요즘 화제라는 명품 목걸이
    + //
    + // CAT7478 + // | 추천 (8) | 조회 (268) + //
    + //
    + // + // + // + // 2024-09-16 09:52:39 + //
    + //
    + //
    + //

    + //

    + //

     

    + //

     

    + //

    전화기선 짤라서 목걸이 만들어도 위화감이 전혀 없을것같은

    + //

     

    + //

    디자인이군요

    + //

     

    + //
    + //
    + //
    + // + // + //
    + //
    + //
    + protected function detail_page(int $cnt, array $listInfo): array + { + $response = $this->getMySocket()->getContent("/newboard/yamoonboard/" . $listInfo['detail_url']); + //작성시간 + // $selector = $this->getSelector($response, getenv("yamoon.view.date.tag")); + // $listInfo['date'] = trim($selector->text()); + // if ($this->getDebug()) { + // throw new \Exception( + // sprintf( + // "\n--------------%s Debug--------------\n%s\n%s\n---------------------------------------\n", + // __FUNCTION__, + // var_export($listInfo, true), + // $selector->html() + // ) + // ); + // } + //작성내용 + $tag = getenv("yamoon.view.content.tag"); + $selector = $this->getSelector($response, $tag); + $media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]); + $media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls); + if ($this->getDebug()) { + throw new \Exception(sprintf( + "\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n", + __FUNCTION__, + var_export($listInfo, true), + var_export($media_urls, true) + )); + } else { + // Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리 + $storages = $this->media_process($media_urls); + if (!count($storages)) { + throw new \Exception("등록할 자료가 없습니다."); + } + $this->backend_process($cnt, $listInfo, $storages); + } + return $listInfo; + } + //리스트 내용 + // + // + // 졸고 있는 여군 + // 6 + // yeeyuu | 6 | 369 | No 89372 | 2024-09-13 + // + public function execute(int $max_limit): void + { + try { + $listInfos = []; + if ($this->getDebug()) { + $this->detail_page(1, ['detail_url' => getenv("yamoon.view.test.url")]); + } + $response = $this->getMySocket()->getContent(getenv("yamoon.list.url")); + $this->getSelector($response, getenv("yamoon.list.tag"))->each( + function (Crawler $node) use (&$listInfos): void { + $link_node = $node->filter(getenv("yamoon.list.item.link.tag")); + $detail_url = $link_node->attr("href"); + $title = $link_node->text(); + $info_node = $node->filter(getenv("yamoon.list.item.info.tag")); + $infos = explode("|", $info_node->text()); + $listInfos[] = ['title' => $title, 'detail_url' => $detail_url, 'nickname' => trim($infos[0]), 'hit' => trim($infos[2]), 'date' => trim($infos[4])]; + } + ); + if (!count($listInfos)) { + throw new \Exception("Target URL이 없습니다."); + } + $this->main_process($max_limit, $listInfos); + log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); + } catch (\Exception $e) { + log_message("warning", sprintf( + "\n---%s 오류---\n%s\n-----------------------------------------\n", + __FUNCTION__, + $e->getMessage() + )); + } + } +} diff --git a/app/Libraries/MyCrawler/MyCrawler.php b/app/Libraries/MyCrawler/MyCrawler.php index 56525de..130e53f 100644 --- a/app/Libraries/MyCrawler/MyCrawler.php +++ b/app/Libraries/MyCrawler/MyCrawler.php @@ -9,64 +9,58 @@ use App\Traits\FileTrait; abstract class MyCrawler extends CommonLibrary { use FileTrait; - private $_mySocket = null; - protected function __construct($mySocket) + protected function __construct() { parent::__construct(); - $this->_mySocket = $mySocket; } + abstract protected function getMySocket(); abstract protected function createMyStorage(); - abstract protected function detail_page(int $cnt, array $listInfo): void; - final protected function getMySocket() - { - if ($this->_mySocket === null) { - throw new \Exception("Socket이 지정되지 않았습니다."); - } - return $this->_mySocket; - } - final protected function getSelector(string $content, string $tag): Crawler + abstract protected function detail_page(int $cnt, array $listInfo): array; + final protected function getSelector(string $content, string $tag, $isViewHTML = false): Crawler { $crawler = new Crawler($content); if ($this->getDebug()) { + log_message("debug", __FUNCTION__ . "=> " . $tag); + } + $crawler->filter($tag); + if ($isViewHTML) { log_message("debug", sprintf( - "\n---------%s----------\ntag:%s\n%s\n-------------------\n", + "\n------------%s HTML-------------\n%s\n-----------------------------------------------------\n", __FUNCTION__, - $tag, - $content + $crawler->filter($tag)->html() )); - exit; } return $crawler->filter($tag); } - //--------미디어 URL관련------ - private function getMediaUrlsByMediaType(string $media_type, Crawler $selector, array $options, array $urls = []): array + protected function getChangeURL(string $url): string + { + return $url; + } + protected function getUrlsByMediaType(string $media_type, Crawler $selector, array $options, array $urls = []): array { $urls[$media_type] = []; $selector->filter($options["tag"])->each( function (Crawler $node) use (&$media_type, &$options, &$urls): void { $url = $node->attr($options["attr"]); - log_message("debug", __FUNCTION__ . "-> {$media_type}[{$options["attr"]}]:{$url}"); - if (!is_null($url)) { - $urls[$media_type][] = $url; + switch ($media_type) { + case 'video': + if ($url === null) { + $url = $node->children()->attr("src"); + } + break; + } + if ($url !== null) { + $urls[$media_type][] = $this->getChangeURL($url); + } else { + log_message("debug", __FUNCTION__ . "-> {$media_type}[{$options["attr"]}]\n"); + log_message("debug", $node->html()); } } ); + log_message("notice", "-----------" . __FUNCTION__ . "=> {$media_type} 작업완료--------"); return $urls; } - //detailPage의 이미지나영상데이터가 있으면 URL과MediaType을 가져온다 - final protected function getMediaUrls(string $response, string $tag, array $listInfo): array - { - $selector = $this->getSelector($response, $tag); - log_message("debug", "\n-----------detailPage Tag: {$tag}---------------\n{$selector->html()}\n---------------------------\n"); - $urls = $this->getMediaUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]); - $urls = $this->getMediaUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $urls); - // log_message("debug", "\n-------------------------\n" . var_export($urls, true) . "\n-----------------------\n"); - log_message("notice", "-----------" . __FUNCTION__ . " 작업완료--------"); - return array($listInfo, $urls); - } - - //--------미디어 관련------- private function media_save(int $file_sequence, string $media_type, string $file_name, string $content): mixed { log_message("debug", __FUNCTION__ . " 원본파일 {$file_name} 작업 시작"); @@ -138,11 +132,14 @@ abstract class MyCrawler extends CommonLibrary $total = count($listInfos); $i = 1; foreach ($listInfos as $listInfo) { + if ($this->getDebug()) { + $i = $max_limit; + } if ($i <= $max_limit) { log_message("notice", "게시물 {$i}번째/총:{$total} {$listInfo["nickname"]} 작업시작"); try { //listInfo는 title,작성자,작성시간등등의 정보를 가지고 있어 detail_page 처리 안에서 바뀔 수 있으므로 다시 반환 받는다. - $this->detail_page($i, $listInfo); + $listInfo = $this->detail_page($i, $listInfo); } catch (\Exception $e) { log_message("warning", sprintf( "\n---%s {$i}번째/총:{$total} 오류---\n%s\n-----------------------------------------\n", diff --git a/app/Libraries/MyCrawler/YamapCrawler.php b/app/Libraries/MyCrawler/YamapCrawler.php deleted file mode 100644 index b26eb04..0000000 --- a/app/Libraries/MyCrawler/YamapCrawler.php +++ /dev/null @@ -1,139 +0,0 @@ -_category = $category; - $this->_user_entity = $user_entity; - } - final protected function createMyStorage() - { - return new MangboardStorage($this->_category, $this->_user_entity); - } - //작성내용 - //
    - //
    요즘 패션
    - //
    - // - // 괴강고귀 - // - // - // | 추천 (14) | 조회 (432) - // - //
    - //
    - // - // - // - // 2024-09-14 01:53:45 - // - //
    - //
    - //
    - //

    - //

     

    - //
    - //
    - //
    - // - // - //
    - //
    - //
    - protected function detail_page(int $cnt, array $listInfo): void - { - $response = $this->getMySocket()->getContent($listInfo['detail_url']); - $tag = getenv("yamap.view.content.tag"); - list($listInfo, $media_urls) = $this->getMediaUrls($response, $tag, $listInfo); - //Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리 - $storages = $this->media_process($media_urls); - if (!count($storages)) { - throw new \Exception("등록할 자료가 없습니다."); - } - //File DB 및 Board DB 등록작업등 - $baord_name = "board_" . $this->_category; - $boardsModel = new BoardsModel(); - $boards_entity = $boardsModel->getEntityByID("board_" . $this->_category); - $boardModel = new BoardModel("mb_" . $baord_name); - $board_entity = $boardModel->createByCrawler( - $boards_entity, - $this->_user_entity, - $cnt, - $listInfo, - $storages - ); - foreach ($storages as $storage) { - try { - $storage->backend($boards_entity, $board_entity, $boardModel->getTable()); - } catch (\Exception $e) { - log_message("notice", sprintf( - "\n---%s -> %s 게시물의 %s번째:%s 파일 등록 오류---\n%s\n--------------------------------\n", - __FUNCTION__, - $board_entity->getTitle(), - $storage->getOriginSequence(), - $storage->getOriginName(), - $e->getMessage() - )); - } - } - } - public function execute(int $max_limit): void - { - try { - $listInfos = []; - if ($this->getDebug()) { - $listInfos = [ - 'title' => getenv("yamap.view.test.title"), - 'nickname' => getenv("yamap.view.test.nickname"), - 'detail_url' => getenv("yamap.view.test.url"), - 'time' => date("Y-m-d H:i:s"), - 'hit' => 1, - ]; - } else { - $response = $this->getMySocket()->getContent(getenv("yamap.list.url.{$this->_category}")); - $selector = $this->getSelector($response, getenv("yamap.list.tag")); - //div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김 - // log_message("debug", sprintf("\n-------------MainPage------------\n%s\n--------------------------\n", $selector->html())); - $selector->filter(getenv("yamap.list.item.tag"))->each( - function (Crawler $node) use (&$listInfos): void { - //bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool - $nickname = $node->filter(getenv("yamap.list.item.nickname.tag"))->text(); - $hit = $node->filter(getenv("yamap.list.item.hit.tag"))->text(); - $date = $node->filter(getenv("yamap.list.item.date.tag"))->text(); - if ($nickname != getenv("yamap.list.item.nickname.except")) { - //작성자가 "관리자"가 아니 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서 - $link_node = $node->filter(getenv("yamap.list.item.link.tag")); - $detail_url = $link_node->attr("href"); - $title = $link_node->children()->last()->text(); - $listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit]; - } - } - ); - } - if (!count($listInfos)) { - throw new \Exception("Target URL이 없습니다."); - } - $this->main_process($max_limit, $listInfos); - log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); - } catch (\Exception $e) { - log_message("warning", sprintf( - "\n---%s 오류---\n%s\n-----------------------------------------\n", - __FUNCTION__, - $e->getMessage() - )); - } - } -} diff --git a/app/Libraries/MyCrawler/YamoonCrawler.php b/app/Libraries/MyCrawler/YamoonCrawler.php deleted file mode 100644 index 12c3cb3..0000000 --- a/app/Libraries/MyCrawler/YamoonCrawler.php +++ /dev/null @@ -1,116 +0,0 @@ -_category = $category; - $this->_user_entity = $user_entity; - } - final protected function createMyStorage() - { - return new MangboardStorage($this->_category, $this->_user_entity); - } - - protected function detail_page(int $cnt, array $listInfo): void - { - $response = $this->getMySocket()->getContent("/newboard/yamoonboard/" . $listInfo['detail_url']); - //작성시간 - // $selector = $this->getSelector($response, getenv("yamoon.view.regdate.tag")); - // $listInfo['date'] = trim($selector->text()); - //작성내용 - $tag = getenv("yamoon.view.content.tag"); - list($listInfo, $media_urls) = $this->getMediaUrls($response, $tag, $listInfo); - //Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리 - $storages = $this->media_process($media_urls); - if (!count($storages)) { - throw new \Exception("등록할 자료가 없습니다."); - } - //File DB 및 Board DB 등록작업등 - $baord_name = "board_" . $this->_category; - $boardsModel = new BoardsModel(); - $boards_entity = $boardsModel->getEntityByID("board_" . $this->_category); - $boardModel = new BoardModel("mb_" . $baord_name); - $board_entity = $boardModel->createByCrawler( - $boards_entity, - $this->_user_entity, - $cnt, - $listInfo, - $storages - ); - foreach ($storages as $storage) { - try { - $storage->backend($boards_entity, $board_entity, $boardModel->getTable()); - } catch (\Exception $e) { - log_message("notice", sprintf( - "\n---%s -> %s 게시물의 %s번째:%s 파일 등록 오류---\n%s\n--------------------------------\n", - __FUNCTION__, - $board_entity->getTitle(), - $storage->getOriginSequence(), - $storage->getOriginName(), - $e->getMessage() - )); - } - } - } - - public function execute(int $max_limit): void - { - try { - $listInfos = []; - if ($this->getDebug()) { - $listInfos = [ - 'title' => getenv("yamoon.view.test.title"), - 'nickname' => getenv("yamoon.view.test.nickname"), - 'detail_url' => getenv("yamoon.view.test.url"), - 'time' => date("Y-m-d H:i:s"), - 'hit' => 1, - ]; - } else { - $response = $this->getMySocket()->getContent(getenv("yamoon.list.url.{$this->_category}")); - //div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김 - // log_message("debug", sprintf("\n-------------MainPage------------\n%s\n--------------------------\n", $selector->html())); - // - // - // 졸고 있는 여군 - // 6 - // yeeyuu | 6 | 369 | No 89372 | 2024-09-13 - // - //bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool - $this->getSelector($response, getenv("yamoon.list.tag"))->each( - function (Crawler $node) use (&$listInfos): void { - $link_node = $node->filter(getenv("yamoon.list.item.link.tag")); - $detail_url = $link_node->attr("href"); - $title = $link_node->text(); - $info_node = $node->filter(getenv("yamoon.list.item.info.tag")); - $infos = explode("|", $info_node->text()); - $listInfos[] = ['title' => $title, 'detail_url' => $detail_url, 'nickname' => trim($infos[0]), 'hit' => trim($infos[2]), 'date' => trim($infos[4])]; - } - ); - } - if (!count($listInfos)) { - throw new \Exception("Target URL이 없습니다."); - } - $this->main_process($max_limit, $listInfos); - log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); - } catch (\Exception $e) { - log_message("warning", sprintf( - "\n---%s 오류---\n%s\n-----------------------------------------\n", - __FUNCTION__, - $e->getMessage() - )); - } - } -}