_category = $category; } public function getMySocket() { if ($this->_mySocket === null) { $this->_mySocket = new WebSocket(getenv('yamoon.host.url')); } return $this->_mySocket; } public function getMyStorage() { if ($this->_myStorage === null) { $this->_myStorage = new MangboardStorage($this->getCategory()); } return $this->_myStorage; } public function getBoardsLibrary(): BoardsLibrary { // $test = $this->getBoard(); // echo "TEST:{$test}\n"; // $temp = getenv("mangboard.storage.{$this->getBoard()}.name"); // echo "Temp:{$temp}\n"; // exit; if ($this->_boards_library === null) { $this->_boards_library = new BoardsLibrary( $this->getCategory(), $this->getUserEntity() ); } return $this->_boards_library; } public function getBoardLibrary(): BoardLibrary { if ($this->_board_library === null) { $this->_board_library = new BoardLibrary( $this->getBoardsLibrary()->getEntity(), $this->getUserEntity() ); } return $this->_board_library; } public function getFileLibrary(): FileLibrary { if ($this->_file_library === null) { $this->_file_library = new FileLibrary( $this->getBoardsLibrary()->getEntity(), $this->getUserEntity() ); } return $this->_file_library; } public function getImageLibrary(): ImageLibrary { if ($this->_image_library === null) { $this->_image_library = new ImageLibrary(); } return $this->_image_library; } public function getUserEntity(): UserEntity { if ($this->_user_entity === null) { throw new \Exception("사용자정보가 없습니다."); } return $this->_user_entity; } public function setUserEntity(UserEntity $user_entity): void { $this->_user_entity = $user_entity; } public function getCategory(): string { if ($this->_category == "") { throw new \Exception("저장할 Category가 정의되지 않았습니다."); } return $this->_category; } private function save(int $file_sequence, string $mediaType, string $file_name, string $content): void { log_message("debug", __FUNCTION__ . " 원본파일 {$file_name} 작업 시작"); $this->getMyStorage()->setOriginName($file_name); $this->getMyStorage()->setOriginContent($content); $this->getMyStorage()->setOriginType($mediaType); $this->getMyStorage()->setOriginSequence($file_sequence); $this->_storages[] = $this->getMyStorage()->save(); } //Yamap ViewPage의 이미지나영상데이터가 있으면 Dodownload 한다. private function download(string $mediaType, string $url): array { $file_names = explode('/', $url); if (!is_array($file_names) || !count($file_names)) { throw new \Exception("URL이 파일명 형식이 아닙니다 : " . $this->getMySocket()->getHost() . $url); } $file_name = array_pop($file_names); $temps = explode(".", $file_name); $file_ext = array_pop($temps); if (!$this->isFileType_FileTrait($file_ext, $mediaType)) { throw new \Exception("파일명 형식이 {$mediaType}가 아닙니다"); } $content = $this->getMySocket()->getContent($url); log_message("notice", "{$file_name} 파일이 다운로드되었습니다!"); return array($file_name, $content); } private function mediaContent(array $urls): void { $file_sequence = 1; $this->_storages = []; //CreateBoard에서 사용을 위해 DetailPage마다 초기화 // log_message("debug", var_export($urls, true)); foreach ($urls as $mediaType => $media_urls) { foreach ($media_urls as $url) { try { list($file_name, $content) = $this->download($mediaType, $url); $this->save($file_sequence, $mediaType, $file_name, $content); $file_sequence++; log_message("notice", __FUNCTION__ . " OriginType->{$mediaType} 작업 완료"); } catch (\Exception $e) { log_message("warning", sprintf( "\n---%s,OriginType->%s 오류---\n%s\n-----------------------------------------\n", __FUNCTION__, $mediaType, $e->getMessage() )); } } } if (!count($this->_storages)) { throw new \Exception("Download된 Content가 없습니다."); } } //Yamap ViewPage의 이미지나영상데이터가 있으면 URL과MediaType을 가져온다 private function getUrlsByDetailPageMediaType(string $mediaType, Crawler $selector, array $options, array $urls = []): array { $urls[$mediaType] = []; $selector->filter($options["tag"])->each( function (Crawler $node) use (&$mediaType, &$options, &$urls): void { $url = $node->attr($options["attr"]); log_message("debug", "getUrlsByDetailPageMediaType-> {$mediaType}[{$options["attr"]}]:{$url}"); if (!is_null($url)) { $urls[$mediaType][] = $url; } } ); return $urls; } private function detailPage(array $listInfo): array { // log_message("debug", var_export($listInfo, true)); $url = "/newboard/yamoonboard/" . $listInfo['detail_url']; $response = $this->getMySocket()->getContent($url); // log_message("debug", "\n--------------------------\n{$response}\n---------------------------\n"); //작성시간 // $selector = $this->getSelector($response, getenv("yamoon.view.regdate.tag")); // $listInfo['date'] = trim($selector->text()); //작성내용 $tag = getenv("yamoon.view.content.tag"); $selector = $this->getSelector($response, $tag); log_message("debug", "\n-----------detailPage Tag: {$tag}---------------\n{$selector->html()}\n---------------------------\n"); $urls = $this->getUrlsByDetailPageMediaType("image", $selector, ["tag" => "img", "attr" => "src"]); $urls = $this->getUrlsByDetailPageMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $urls); log_message("debug", "\n-------------------------\n" . var_export($urls, true) . "\n-----------------------\n"); log_message("notice", "-----------" . __FUNCTION__ . " 작업완료--------"); return array($listInfo, $urls); } private function mainPage(string $url): array { $listInfos = []; $response = $this->getMySocket()->getContent($url); //div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김 // log_message("debug", sprintf("\n-------------MainPage------------\n%s\n--------------------------\n", $selector->html())); // // // 졸고 있는 여군 // 6 // yeeyuu | 6 | 369 | No 89372 | 2024-09-13 // //bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool $selector = $this->getSelector($response, getenv("yamoon.list.tag"))->each( function (Crawler $node) use (&$listInfos): void { $link_node = $node->filter(getenv("yamoon.list.item.link.tag")); $detail_url = $link_node->attr("href"); $title = $link_node->text(); $info_node = $node->filter(getenv("yamoon.list.item.info.tag")); $infos = explode("|", $info_node->text()); if (trim($infos[4]) == date("Y-m-d")) { $listInfos[] = ['title' => $title, 'detail_url' => $detail_url, 'nickname' => trim($infos[0]), 'hit' => trim($infos[2]), 'date' => trim($infos[4])]; } } ); if (!count($listInfos)) { throw new \Exception("Target URL이 없습니다."); } log_message("notice", __FUNCTION__ . " 작업 완료"); return $listInfos; } public function execute(): void { if ($this->getDebug()) { $listInfos = [ 'title' => getenv("yamoon.view.test.title"), 'nickname' => getenv("yamoon.view.test.nickname"), 'detail_url' => getenv("yamoon.view.test.url"), 'time' => date("Y-m-d H:i:s"), 'hit' => 1, ]; } else { $listInfos = $this->mainPage(getenv("yamoon.list.url." . $this->getCategory())); } //Limit가 0이면 $listInfos 갯수만큼 다하고, LIMIT 갯수 혹은 item의 갯수중 작은수만큼 한다. $max_limit = intval(getenv("yamoon.list.max_limit")); if ($max_limit) { $max_limit = count($listInfos) <= $max_limit ? count($listInfos) : $max_limit; } else { $max_limit = count($listInfos); } $total = count($listInfos); $i = 1; foreach ($listInfos as $listInfo) { if ($i <= $max_limit) { try { log_message("notice", "게시물 {$i}번째/{$total}개중 {$listInfo["nickname"]} 작업시작"); list($listInfo, $urls) = $this->detailPage($listInfo); $this->mediaContent($urls); //File DB 및 Board DB 등록작업 $board_entity = $this->getBoardLibrary()->createByCrawler($i, $listInfo, $this->_storages); $this->getFileLibrary()->createByCrawler($board_entity, $this->_storages); $this->getImageLibrary()->createByCrawler($board_entity, $this->_storages); log_message("notice", "게시물 {$i}번째/{$total}개중 {$listInfo["nickname"]} 작업완료."); $i++; } catch (\Exception $e) { log_message("debug", $e->getMessage()); } } } log_message("notice", "Crawler->" . __FUNCTION__ . " 작업이 완료되었습니다."); } }