diff --git a/app/Controllers/CLI/Crawler.php b/app/Controllers/CLI/Crawler.php index bd78774..76c50e4 100644 --- a/app/Controllers/CLI/Crawler.php +++ b/app/Controllers/CLI/Crawler.php @@ -11,28 +11,13 @@ class Crawler extends BaseController { try { $isDebug = in_array("debug", $params); + //1. Yamap 사이트에서에서 자유게시판의 게시물 중 작성자가 관리자가 아닌 게시물 검색후 + // 리스트중 1번째것의 게시물 내용에 있는 이미지,비디오 정보를 가져오게 하는 기능 $library = new YamapLibrary(getenv("yamap.host")); $library->setDebug($isDebug); - //1. MainPage - $url = getenv("yamap.list.url"); - $crawler = $library->getCrawler($url, getenv("yamap.list.tag")); - $urls = $library->getListURLs( - $crawler, - getenv("yamap.list.item.tag"), - getenv("yamap.list.item.subject.tag"), - getenv("yamap.list.item.nickname.tag"), - getenv("yamap.list.item.nickname.skip") - ); - if (!count($urls)) { - throw new \Exception("Target URL이 없습니다."); - } - //2. TargetPage : div.contents 가진 객체를 찾아서 첫번쨰 요소에서만 참조 - $url = $isDebug ? getenv("yamap.view.test.url") : $urls[0]; - $crawler = $library->getCrawler($url, getenv("yamap.view.content.tag")); - //3. Image - $library->download($crawler, ["tag" => "img", "attr" => "src"]); - //4. Video - $library->download($crawler, ["tag" => "video", "attr" => "src"]); + $library->execute(); + //2. 워드프레스에 로그인 처리 기능 + //3. 워드프레스의 자유게시판에 게시물 등록 기능 log_message("info", "완료되었습니다."); return true; } catch (\Exception $e) { diff --git a/app/Libraries/YamapLibrary.php b/app/Libraries/YamapLibrary.php index 5637b00..5115287 100644 --- a/app/Libraries/YamapLibrary.php +++ b/app/Libraries/YamapLibrary.php @@ -26,7 +26,7 @@ class YamapLibrary $this->_debug = $debug; } - public function getCrawler(string $url, string $tag): Crawler + protected function getCrawler(string $url, string $tag): Crawler { $response = $this->getContentByMyWeb($url); if (!$response) { @@ -35,40 +35,65 @@ class YamapLibrary return $this->createByMyCrawler($response)->filter($tag); } - public function getListURLs( + protected function getList( Crawler $crawler, string $item_tag, string $item_subject_tag, string $item_nickname_tag, string $item_nickname_skip, - array $urls = [] + array $results = [] ): array { //div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김 $crawler->filter($item_tag)->each( function (Crawler $node) use ( - &$urls, $item_subject_tag, &$item_nickname_tag, - &$item_nickname_skip + &$item_nickname_skip, + &$results ): void { - //bbs_item에서 span.g_nickname 객체를 찾아서 작성자거 "관리자" 아닌지 확인 후 Return Bool + //bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool $nickname = $node->filter($item_nickname_tag)->text(); log_message("debug", $item_nickname_tag . ":" . $nickname); if ($nickname != $item_nickname_skip) { - $options = ["tag" => $item_subject_tag, "attr" => "href"]; - $urls = $this->getTagDatasByMyCrawler($node, $options); + //작성자가 "관리자"가 아니 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서 + $url = $node->filter($item_subject_tag)->attr("href"); + $results[] = ['nickname' => $nickname, 'url' => $url]; } } ); - return $urls; + return $results; } - public function download(Crawler $crawler, array $options): void + protected function download(Crawler $crawler, array $options): void { log_message("debug", "download:{$options["tag"]},{$options["attr"]}"); - $urls = $this->getTagDatasByMyCrawler($crawler, $options); - foreach ($urls as $url) { - $this->downloadByMyWeb($url, $this->getPathByMyStorage(), $this->getDebug()); + $nodes = $this->getNodesByMyCrawler($crawler, $options); + foreach ($nodes as $node) { + $this->downloadByMyWeb($node->attr($options["attr"]), $this->getPathByMyStorage(), $this->getDebug()); } } + + public function execute(): void + { + //1. MainPage + $url = getenv("yamap.list.url"); + $crawler = $this->getCrawler($url, getenv("yamap.list.tag")); + $lists = $this->getList( + $crawler, + getenv("yamap.list.item.tag"), + getenv("yamap.list.item.subject.tag"), + getenv("yamap.list.item.nickname.tag"), + getenv("yamap.list.item.nickname.skip") + ); + if (!count($lists)) { + throw new \Exception("Target URL이 없습니다."); + } + //2. TargetPage : div.contents 가진 객체를 찾아서 첫번쨰 요소에서만 참조 + $url = $this->getDebug() ? getenv("yamap.view.test.url") : $lists[0]["url"]; + $crawler = $this->getCrawler($url, getenv("yamap.view.content.tag")); + //3. Image + $this->download($crawler, ["tag" => "img", "attr" => "src"]); + //4. Video + $this->download($crawler, ["tag" => "video", "attr" => "src"]); + } } diff --git a/app/Traits/MyCrawlerTrait.php b/app/Traits/MyCrawlerTrait.php index ca05b9b..3bd3e6c 100644 --- a/app/Traits/MyCrawlerTrait.php +++ b/app/Traits/MyCrawlerTrait.php @@ -10,14 +10,32 @@ trait MyCrawlerTrait { return new Crawler($html); } - public function getTagDatasByMyCrawler(Crawler $crawler, array $options = ["tag" => "a", "attr" => "href"], array $tagdatas = []): array + public function getNodesByMyCrawler(Crawler $crawler, array $options, $nodes = []): array { $crawler->filter($options["tag"])->each( - function (Crawler $node) use (&$tagdatas, &$options): void { - log_message("debug", sprintf("getTagDatas-> %s:%s", $options["tag"], $node->attr($options["attr"]))); - $tagdatas[] = $node->attr($options["attr"]); + function (Crawler $node) use (&$options, &$nodes): void { + foreach (array_keys($options) as $key) { + switch ($key) { + case 'find': + if ($node->text() == $options[$key]) { + log_message("debug", sprintf("getNodeByMyCrawler-> %s:%s", $options["tag"], $options[$key])); + $nodes[] = $node; + } + break; + case 'except': + if ($node->text() != $options[$key]) { + log_message("debug", sprintf("getNodeByMyCrawler-> %s:%s", $options["tag"], $options[$key])); + $nodes[] = $node; + } + break; + default: + log_message("debug", sprintf("getNodeByMyCrawler-> %s", $options["tag"])); + $nodes[] = $node; + break; + } + } } ); - return $tagdatas; + return $nodes; } }