From 073f80c46af5d8419fa4bb75c0d2629e0dd10d4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=B5=9C=EC=A4=80=ED=9D=A0?= Date: Wed, 18 Sep 2024 19:00:16 +0900 Subject: [PATCH] Automation init...3 --- .../Mangboard/CrawlerController.php | 4 ++ .../MyCrawler/Mangboard/InvenCrawler.php | 65 +++++++++++++------ .../MyCrawler/Mangboard/MangboardCrawler.php | 52 ++++++++++----- .../MyCrawler/Mangboard/SirCrawler.php | 13 ++-- .../MyCrawler/Mangboard/YamapCrawler.php | 13 ++-- .../MyCrawler/Mangboard/YamoonCrawler.php | 13 ++-- 6 files changed, 113 insertions(+), 47 deletions(-) diff --git a/app/Controllers/Mangboard/CrawlerController.php b/app/Controllers/Mangboard/CrawlerController.php index d8a888c..44be22e 100644 --- a/app/Controllers/Mangboard/CrawlerController.php +++ b/app/Controllers/Mangboard/CrawlerController.php @@ -55,6 +55,7 @@ class CrawlerController extends CommonController //2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달. $crawler = new YamapCrawler(getenv('yamap.host.url'), $board_name, $user_entity); $crawler->isDebug = in_array('debug', $params); + $crawler->isCopy = in_array('copy', $params); $crawler->execute(intval(getenv("yamap.list.max_limit"))); return "완료되었습니다."; } catch (\Exception $e) { @@ -70,6 +71,7 @@ class CrawlerController extends CommonController //2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달. $crawler = new YamoonCrawler(getenv("yamoon.host.url"), $board_name, $user_entity); $crawler->isDebug = in_array('debug', $params); + $crawler->isCopy = in_array('copy', $params); $crawler->execute(intval(getenv("yamoon.list.max_limit"))); return "완료되었습니다."; } catch (\Exception $e) { @@ -85,6 +87,7 @@ class CrawlerController extends CommonController //2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달. $crawler = new SirCrawler(getenv("sir.host.url"), $board_name, $user_entity); $crawler->isDebug = in_array('debug', $params); + $crawler->isCopy = in_array('copy', $params); $crawler->execute(intval(getenv("sir.list.max_limit"))); return "완료되었습니다."; } catch (\Exception $e) { @@ -100,6 +103,7 @@ class CrawlerController extends CommonController //2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달. $crawler = new InvenCrawler(getenv("inven.host.url"), $board_name, $user_entity); $crawler->isDebug = in_array('debug', $params); + $crawler->isCopy = in_array('copy', $params); $crawler->execute(intval(getenv("inven.list.max_limit"))); return "완료되었습니다."; } catch (\Exception $e) { diff --git a/app/Libraries/MyCrawler/Mangboard/InvenCrawler.php b/app/Libraries/MyCrawler/Mangboard/InvenCrawler.php index f1deef5..b34c785 100644 --- a/app/Libraries/MyCrawler/Mangboard/InvenCrawler.php +++ b/app/Libraries/MyCrawler/Mangboard/InvenCrawler.php @@ -48,28 +48,48 @@ class InvenCrawler extends MangboardCrawler // protected function detail_process(int $cnt, array $listInfo): array { - $response = $this->getMySocket()->getContent($listInfo['detail_url']); - $selector = $this->getSelector($response, getenv("inven.view.content.tag")); - $media_urls = $this->getUrlsByMediaType($selector, "img", "src"); - $media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls); - if ($this->isDebug) { - throw new \Exception(sprintf( - "\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n", - __FUNCTION__, - var_export($listInfo, true), - var_export($media_urls, true) - )); + $response = $this->getMySocket()->getContent($listInfo['detail_url']); + $selector = $this->getSelector($response, getenv("inven.view.content.tag")); + if ($this->isCopy) { + $formDatas = []; + $formDatas['image_path'] = ""; + $formDatas['content'] = $selector->html(); + //File DB 및 Board DB 등록작업등 + $this->getBoardModel()->createByCrawler( + $this->getBoardsEntity(), + $this->getUserEntity(), + $cnt, + $listInfo, + [], + $formDatas + ); } else { - // Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리 - $storages = $this->media_process($media_urls); - if (!count($storages)) { - throw new \Exception("등록할 자료가 없습니다."); + $media_urls = $this->getUrlsByMediaType($selector, "img", "src"); + $media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls); + if ($this->isDebug) { + throw new \Exception(sprintf( + "\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n", + __FUNCTION__, + var_export($listInfo, true), + var_export($media_urls, true) + )); + } else { + // Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리 + $storages = $this->media_process($media_urls); + if (!count($storages)) { + throw new \Exception("등록할 자료가 없습니다."); + } + $this->backend_process($cnt, $listInfo, $storages); } - $this->backend_process($cnt, $listInfo, $storages); } log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); return $listInfo; } + protected function copy_process(int $cnt, array $listInfo): array + { + $response = $this->getMySocket()->getContent($listInfo['detail_url']); + return $listInfo; + } //리스트내용 //
// @@ -103,12 +123,17 @@ class InvenCrawler extends MangboardCrawler { try { if ($this->isDebug) { - $url = getenv("inven.view.test.url.{$this->_board_name}"); - $this->detail_process(1, ['detail_url' => $url]); - log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업종료"); + $listInfo = []; + $listInfo['title'] = 'test_title'; + $listInfo['nickname'] = 'test_name'; + $listInfo['hit'] = 1; + $listInfo['date'] = date("Y-m-d H:i:s"); + $listInfo['detail_url'] = getenv("inven.view.test.url.{$this->getBoardName()}"); + $this->detail_process(1, $listInfo); + log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$listInfo['detail_url']} 작업종료"); } else { $listInfos = []; - $response = $this->getMySocket()->getContent(getenv("inven.list.url.{$this->_board_name}")); + $response = $this->getMySocket()->getContent(getenv("inven.list.url.{$this->getBoardName()}")); $this->getSelector($response, getenv("inven.list.tag"))->each( function (Crawler $node) use (&$listInfos): void { $hit = $node->filter(getenv("inven.list.item.hit.tag"))->text(); diff --git a/app/Libraries/MyCrawler/Mangboard/MangboardCrawler.php b/app/Libraries/MyCrawler/Mangboard/MangboardCrawler.php index 19fe9dd..0f1d8f7 100644 --- a/app/Libraries/MyCrawler/Mangboard/MangboardCrawler.php +++ b/app/Libraries/MyCrawler/Mangboard/MangboardCrawler.php @@ -2,9 +2,10 @@ namespace App\Libraries\MyCrawler\Mangboard; -use App\Libraries\MySocket\WebSocket; +use App\Entities\Mangboard\BoardsEntity; use App\Entities\Mangboard\UserEntity; use App\Libraries\MyCrawler\MyCrawler; +use App\Libraries\MySocket\WebSocket; use App\Libraries\MyStorage\MangboardStorage; use App\Models\Mangboard\BoardModel; use App\Models\Mangboard\BoardsModel; @@ -13,13 +14,15 @@ abstract class MangboardCrawler extends MyCrawler { private $_mySocket = null; private $_host = ""; - protected $_board_name = ""; + private $_board_name = ""; + private $_board_model = null; + private $_boards_entity = null; private $_user_entity = null; protected function __construct(string $host, string $board_name, UserEntity $user_entity) { parent::__construct(); $this->_host = $host; - $this->_board_name = $board_name; + $this->_board_name = $board_name; $this->_user_entity = $user_entity; } abstract protected function detail_process(int $cnt, array $listInfo): array; @@ -33,28 +36,47 @@ abstract class MangboardCrawler extends MyCrawler } final protected function createMyStorage() { - return new MangboardStorage($this->_board_name, $this->_user_entity); + return new MangboardStorage($this->getBoardName(), $this->getUserEntity()); + } + final protected function getBoardModel(): BoardModel + { + if ($this->_board_model === null) { + $this->_board_model = new BoardModel("mb_" . $this->getBoardName()); + } + return $this->_board_model; + } + final protected function getBoardName(): string + { + return $this->_board_name; + } + final protected function getBoardsEntity(): BoardsEntity + { + if ($this->_boards_entity === null) { + $boardsModel = new BoardsModel(); + $this->_boards_entity = $boardsModel->getEntityByID($this->getBoardName()); + if ($this->_boards_entity === null) { + throw new \Exception(__FUNCTION__ . "=> {$this->getBoardName()}에 해당 Board 정보가 존재하지 않습니다."); + } + } + return $this->_boards_entity; + } + final protected function getUserEntity(): UserEntity + { + return $this->_user_entity; } protected function backend_process(int $cnt, array $listInfo, array $storages) { //File DB 및 Board DB 등록작업등 - $baord_name = $this->_board_name; - $boardsModel = new BoardsModel(); - $boards_entity = $boardsModel->getEntityByID($this->_board_name); - if ($boards_entity === null) { - throw new \Exception(__FUNCTION__ . "=>{$this->_board_name}에 해당 Board 정보가 존재하지 않습니다."); - } - $boardModel = new BoardModel("mb_" . $baord_name); - $board_entity = $boardModel->createByCrawler( - $boards_entity, - $this->_user_entity, + $board_entity = $this->getBoardModel()->createByCrawler( + $this->getBoardsEntity(), + $this->getUserEntity(), $cnt, $listInfo, $storages ); foreach ($storages as $storage) { try { - $storage->backend_process($boards_entity, $board_entity, $boardModel->getTable()); + $storage->backend_process($this->getBoardsEntity(), $board_entity, $this->getBoardModel()->getTable()); } catch (\Exception $e) { log_message("notice", sprintf( "\n---%s -> %s 게시물의 %s번째:%s 파일 등록 오류---\n%s\n--------------------------------\n", diff --git a/app/Libraries/MyCrawler/Mangboard/SirCrawler.php b/app/Libraries/MyCrawler/Mangboard/SirCrawler.php index 450f4df..264a753 100644 --- a/app/Libraries/MyCrawler/Mangboard/SirCrawler.php +++ b/app/Libraries/MyCrawler/Mangboard/SirCrawler.php @@ -143,12 +143,17 @@ class SirCrawler extends MangboardCrawler { try { if ($this->isDebug) { - $url = getenv("sir.view.test.url.{$this->_board_name}"); - $this->detail_process(1, ['detail_url' => $url]); - log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업종료"); + $listInfo = []; + $listInfo['title'] = 'test_title'; + $listInfo['nickname'] = 'test_name'; + $listInfo['hit'] = 1; + $listInfo['date'] = date("Y-m-d H:i:s"); + $listInfo['detail_url'] = getenv("sir.view.test.url.{$this->getBoardName()}"); + $this->detail_process(1, $listInfo); + log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$listInfo['detail_url']} 작업종료"); } else { $listInfos = []; - $response = $this->getMySocket()->getContent(getenv("sir.list.url.{$this->_board_name}")); + $response = $this->getMySocket()->getContent(getenv("sir.list.url.{$this->getBoardName()}")); $this->getSelector($response, getenv("sir.list.tag"))->each( function (Crawler $node) use (&$listInfos): void { $link_node = $node->filter(getenv("sir.list.item.link.tag")); diff --git a/app/Libraries/MyCrawler/Mangboard/YamapCrawler.php b/app/Libraries/MyCrawler/Mangboard/YamapCrawler.php index 59182fe..aa4fe9e 100644 --- a/app/Libraries/MyCrawler/Mangboard/YamapCrawler.php +++ b/app/Libraries/MyCrawler/Mangboard/YamapCrawler.php @@ -69,12 +69,17 @@ class YamapCrawler extends MangboardCrawler { try { if ($this->isDebug) { - $url = getenv("yamap.view.test.url.{$this->_board_name}"); - $this->detail_process(1, ['detail_url' => $url]); - log_message("notice", __FUNCTION__ . "DEBUG 게시물 {$url} 작업종료"); + $listInfo = []; + $listInfo['title'] = 'test_title'; + $listInfo['nickname'] = 'test_name'; + $listInfo['hit'] = 1; + $listInfo['date'] = date("Y-m-d H:i:s"); + $listInfo['detail_url'] = getenv("yamap.view.test.url.{$this->getBoardName()}"); + $this->detail_process(1, $listInfo); + log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$listInfo['detail_url']} 작업종료"); } else { $listInfos = []; - $response = $this->getMySocket()->getContent(getenv("yamap.list.url.{$this->_board_name}")); + $response = $this->getMySocket()->getContent(getenv("yamap.list.url.{$this->getBoardName()}")); $selector = $this->getSelector($response, getenv("yamap.list.tag")); //div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김 $selector->filter(getenv("yamap.list.item.tag"))->each( diff --git a/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php b/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php index 6136f63..f1786a2 100644 --- a/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php +++ b/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php @@ -91,12 +91,17 @@ class YamoonCrawler extends MangboardCrawler { try { if ($this->isDebug) { - $url = getenv("yamoon.view.test.url.{$this->_board_name}"); - $this->detail_process(1, ['detail_url' => $url]); - log_message("notice", __FUNCTION__ . "DEBUG 게시물 {$url} 작업종료"); + $listInfo = []; + $listInfo['title'] = 'test_title'; + $listInfo['nickname'] = 'test_name'; + $listInfo['hit'] = 1; + $listInfo['date'] = date("Y-m-d H:i:s"); + $listInfo['detail_url'] = getenv("yamoon.view.test.url.{$this->getBoardName()}"); + $this->detail_process(1, $listInfo); + log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$listInfo['detail_url']} 작업종료"); } else { $listInfos = []; - $response = $this->getMySocket()->getContent(getenv("yamoon.list.url.{$this->_board_name}")); + $response = $this->getMySocket()->getContent(getenv("yamoon.list.url.{$this->getBoardName()}")); $this->getSelector($response, getenv("yamoon.list.tag"))->each( function (Crawler $node) use (&$listInfos): void { $link_node = $node->filter(getenv("yamoon.list.item.link.tag"));