diff --git a/app/Controllers/Mangboard/CrawlerController.php b/app/Controllers/Mangboard/CrawlerController.php index 136cb5e..d87088c 100644 --- a/app/Controllers/Mangboard/CrawlerController.php +++ b/app/Controllers/Mangboard/CrawlerController.php @@ -28,7 +28,7 @@ class CrawlerController extends CommonController //2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달. $crawler = new YamapCrawler(getenv('yamap.host.url'), $category, $user_entity); $crawler->setDebug($debug === "true" ? true : false); - $crawler->execute(); + $crawler->execute(intval(getenv("yamap.list.max_limit"))); return "완료되었습니다."; } catch (\Exception $e) { log_message("error", $e->getMessage()); @@ -45,7 +45,7 @@ class CrawlerController extends CommonController //2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달. $crawler = new YamoonCrawler(getenv("yamoon.host.url"), $category, $user_entity); $crawler->setDebug($debug === "true" ? true : false); - $crawler->execute(); + $crawler->execute(intval(getenv("yamap.list.max_limit"))); return "완료되었습니다."; } catch (\Exception $e) { log_message("error", $e->getMessage()); diff --git a/app/Libraries/MyCrawler/MyCrawler.php b/app/Libraries/MyCrawler/MyCrawler.php index e4d6463..cc2eee3 100644 --- a/app/Libraries/MyCrawler/MyCrawler.php +++ b/app/Libraries/MyCrawler/MyCrawler.php @@ -17,7 +17,9 @@ abstract class MyCrawler extends CommonLibrary $this->_mySocket = $mySocket; } abstract protected function getMyStorage(); - abstract public function execute(): void; + abstract protected function list_page(): array; + abstract protected function detail_page(array $listInfo): array; + abstract protected function backend_process(int $i, array $listInfo, array $storages); final protected function getMySocket() { if ($this->_mySocket === null) { @@ -41,15 +43,15 @@ abstract class MyCrawler extends CommonLibrary } //--------미디어 URL관련------ - private function getMediaUrlsByMediaType(string $mediaType, Crawler $selector, array $options, array $urls = []): array + private function getMediaUrlsByMediaType(string $media_type, Crawler $selector, array $options, array $urls = []): array { - $urls[$mediaType] = []; + $urls[$media_type] = []; $selector->filter($options["tag"])->each( - function (Crawler $node) use (&$mediaType, &$options, &$urls): void { + function (Crawler $node) use (&$media_type, &$options, &$urls): void { $url = $node->attr($options["attr"]); - log_message("debug", __FUNCTION__ . "-> {$mediaType}[{$options["attr"]}]:{$url}"); + log_message("debug", __FUNCTION__ . "-> {$media_type}[{$options["attr"]}]:{$url}"); if (!is_null($url)) { - $urls[$mediaType][] = $url; + $urls[$media_type][] = $url; } } ); @@ -68,17 +70,17 @@ abstract class MyCrawler extends CommonLibrary } //--------미디어 관련------- - private function mediaSave(int $file_sequence, string $mediaType, string $file_name, string $content): void + private function media_save(int $file_sequence, string $media_type, string $file_name, string $content): void { log_message("debug", __FUNCTION__ . " 원본파일 {$file_name} 작업 시작"); $this->getMyStorage()->setOriginName($file_name); $this->getMyStorage()->setOriginContent($content); - $this->getMyStorage()->setOriginType($mediaType); + $this->getMyStorage()->setOriginType($media_type); $this->getMyStorage()->setOriginSequence($file_sequence); $this->_storages[] = $this->getMyStorage()->save(); } //Yamap ViewPage의 이미지나영상데이터가 있으면 Dodownload 한다. - private function mediaDownload(string $mediaType, string $url): array + private function media_download(string $media_type, string $url): array { $file_names = explode('/', $url); if (!is_array($file_names) || !count($file_names)) { @@ -87,33 +89,33 @@ abstract class MyCrawler extends CommonLibrary $file_name = array_pop($file_names); $temps = explode(".", $file_name); $file_ext = array_pop($temps); - if (!$this->isFileType_FileTrait($file_ext, $mediaType)) { - throw new \Exception("파일명 형식이 {$mediaType}가 아닙니다"); + if (!$this->isFileType_FileTrait($file_ext, $media_type)) { + throw new \Exception("파일명 형식이 {$media_type}가 아닙니다"); } $content = $this->getMySocket()->getContent($url); log_message("notice", "{$file_name} 파일이 다운로드되었습니다!"); return array($file_name, $content); } - final protected function mediaProcess(array $urls): array + final protected function media_process(array $media_urls): array { $file_sequence = 1; $this->_storages = []; //CreateBoard에서 사용을 위해 DetailPage마다 초기화 // log_message("debug", var_export($urls, true)); - foreach ($urls as $mediaType => $media_urls) { - foreach ($media_urls as $url) { + foreach ($media_urls as $media_type => $urls) { + foreach ($urls as $url) { try { if ($url === null) { continue; } - list($file_name, $content) = $this->mediaDownload($mediaType, $url); - $this->mediaSave($file_sequence, $mediaType, $file_name, $content); + list($file_name, $content) = $this->media_download($media_type, $url); + $this->media_save($file_sequence, $media_type, $file_name, $content); $file_sequence++; - log_message("notice", __FUNCTION__ . " OriginType->{$mediaType} 작업 완료"); + log_message("notice", __FUNCTION__ . " OriginType->{$media_type} 작업 완료"); } catch (\Exception $e) { log_message("warning", sprintf( - "\n---%s,OriginType->%s 오류---\n%s\n-----------------------------------------\n", + "\n---%s mediaType->%s 오류---\n%s\n-----------------------------------------\n", __FUNCTION__, - $mediaType, + $media_type, $e->getMessage() )); } @@ -124,4 +126,36 @@ abstract class MyCrawler extends CommonLibrary } return $this->_storages; } + protected function main_process(int $max_limit, array $listInfos): void + { + //Limit가 0이면 $listInfos 갯수만큼 다하고, LIMIT 갯수 혹은 item의 갯수중 작은수만큼 한다. + if ($max_limit) { + $max_limit = count($listInfos) <= $max_limit ? count($listInfos) : $max_limit; + } else { + $max_limit = count($listInfos); + } + $total = count($listInfos); + $i = 1; + foreach ($listInfos as $listInfo) { + if ($i <= $max_limit) { + try { + log_message("notice", "게시물 {$i}번째/{$total}개중 {$listInfo["nickname"]} 작업시작"); + //listInfo는 title,작성자,작성시간등등의 정보를 가지고 있어 detail_page 처리 안에서 바뀔 수 있으므로 다시 반환 받는다. + list($listInfo, $media_urls) = $this->detail_page($listInfo); + //Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리 + $this->media_process($media_urls); + //File DB 및 Board DB 등록작업등 + $this->backend_process($i, $listInfo, $this->_storages); + log_message("notice", "게시물 {$i}번째/{$total}개중 {$listInfo["nickname"]} 작업완료."); + $i++; + } catch (\Exception $e) { + log_message("warning", sprintf( + "\n---%s 오류---\n%s\n-----------------------------------------\n", + __FUNCTION__, + $e->getMessage() + )); + } + } + } + } } diff --git a/app/Libraries/MyCrawler/YamapCrawler.php b/app/Libraries/MyCrawler/YamapCrawler.php index 38b46d6..f046e5a 100644 --- a/app/Libraries/MyCrawler/YamapCrawler.php +++ b/app/Libraries/MyCrawler/YamapCrawler.php @@ -55,13 +55,13 @@ class YamapCrawler extends MyCrawler // //
// - private function detailPage(array $listInfo): array + protected function detail_page(array $listInfo): array { $response = $this->getMySocket()->getContent($listInfo['detail_url']); $tag = getenv("yamap.view.content.tag"); return $this->getMediaUrls($response, $tag, $listInfo); } - private function listPage(): array + protected function list_page(): array { if ($this->getDebug()) { return [ @@ -98,35 +98,17 @@ class YamapCrawler extends MyCrawler log_message("notice", __FUNCTION__ . " 작업 완료"); return $listInfos; } - public function execute(): void + protected function backend_process(int $i, array $listInfo, array $storages) { - $listInfos = $this->listPage(); - //Limit가 0이면 $listInfos 갯수만큼 다하고, LIMIT 갯수 혹은 item의 갯수중 작은수만큼 한다. - $max_limit = intval(getenv("yamap.list.max_limit")); - if ($max_limit) { - $max_limit = count($listInfos) <= $max_limit ? count($listInfos) : $max_limit; - } else { - $max_limit = count($listInfos); - } - $total = count($listInfos); - $i = 1; - foreach ($listInfos as $listInfo) { - if ($i <= $max_limit) { - try { - log_message("notice", "게시물 {$i}번째/{$total}개중 {$listInfo["nickname"]} 작업시작"); - list($listInfo, $urls) = $this->detailPage($listInfo); - $this->mediaProcess($urls); - //File DB 및 Board DB 등록작업 - $board_entity = $this->getMyStorage()->getBoard()->createByCrawler($i, $listInfo, $this->_storages); - $this->getMyStorage()->getFile()->createByCrawler($board_entity, $this->_storages); - $this->getMyStorage()->getImage()->createByCrawler($board_entity, $this->_storages); - log_message("notice", "게시물 {$i}번째/{$total}개중 {$listInfo["nickname"]} 작업완료."); - $i++; - } catch (\Exception $e) { - log_message("debug", $e->getMessage()); - } - } - } - log_message("notice", "Crawler->" . __FUNCTION__ . " 작업이 완료되었습니다."); + //File DB 및 Board DB 등록작업 + $board_entity = $this->getMyStorage()->getBoard()->createByCrawler($i, $listInfo, $storages); + $this->getMyStorage()->getFile()->createByCrawler($board_entity, $storages); + $this->getMyStorage()->getImage()->createByCrawler($board_entity, $storages); + } + public function execute(int $max_limit): void + { + $listInfos = $this->list_page(); + $this->main_process($max_limit, $listInfos); + log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); } } diff --git a/app/Libraries/MyCrawler/YamoonCrawler.php b/app/Libraries/MyCrawler/YamoonCrawler.php index 4c0eb98..f68462f 100644 --- a/app/Libraries/MyCrawler/YamoonCrawler.php +++ b/app/Libraries/MyCrawler/YamoonCrawler.php @@ -25,7 +25,7 @@ class YamoonCrawler extends MyCrawler } return $this->_myStorage; } - private function detailPage(array $listInfo): array + protected function detail_page(array $listInfo): array { $response = $this->getMySocket()->getContent("/newboard/yamoonboard/" . $listInfo['detail_url']); //작성시간 @@ -35,7 +35,7 @@ class YamoonCrawler extends MyCrawler $tag = getenv("yamoon.view.content.tag"); return $this->getMediaUrls($response, $tag, $listInfo); } - private function listPage(): array + protected function list_page(): array { if ($this->getDebug()) { $listInfos = [ @@ -76,35 +76,17 @@ class YamoonCrawler extends MyCrawler log_message("notice", __FUNCTION__ . " 작업 완료"); return $listInfos; } - public function execute(): void + //File DB 및 Board DB 등록작업등 + protected function backend_process(int $i, array $listInfo, array $storages) { - $listInfos = $this->listPage(); - //Limit가 0이면 $listInfos 갯수만큼 다하고, LIMIT 갯수 혹은 item의 갯수중 작은수만큼 한다. - $max_limit = intval(getenv("yamap.list.max_limit")); - if ($max_limit) { - $max_limit = count($listInfos) <= $max_limit ? count($listInfos) : $max_limit; - } else { - $max_limit = count($listInfos); - } - $total = count($listInfos); - $i = 1; - foreach ($listInfos as $listInfo) { - if ($i <= $max_limit) { - try { - log_message("notice", "게시물 {$i}번째/{$total}개중 {$listInfo["nickname"]} 작업시작"); - list($listInfo, $urls) = $this->detailPage($listInfo); - $this->mediaProcess($urls); - //File DB 및 Board DB 등록작업 - $board_entity = $this->getMyStorage()->getBoard()->createByCrawler($i, $listInfo, $this->_storages); - $this->getMyStorage()->getFile()->createByCrawler($board_entity, $this->_storages); - $this->getMyStorage()->getImage()->createByCrawler($board_entity, $this->_storages); - log_message("notice", "게시물 {$i}번째/{$total}개중 {$listInfo["nickname"]} 작업완료."); - $i++; - } catch (\Exception $e) { - log_message("debug", $e->getMessage()); - } - } - } - log_message("notice", "Crawler->" . __FUNCTION__ . " 작업이 완료되었습니다."); + $board_entity = $this->getMyStorage()->getBoard()->createByCrawler($i, $listInfo, $storages); + $this->getMyStorage()->getFile()->createByCrawler($board_entity, $storages); + $this->getMyStorage()->getImage()->createByCrawler($board_entity, $storages); + } + public function execute(int $max_limit): void + { + $listInfos = $this->list_page(); + $this->main_process($max_limit, $listInfos); + log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); } }