@@ -33,8 +51,8 @@ class InvenCrawler extends MangboardCrawler $response = $this->getMySocket()->getContent($listInfo['detail_url']); $tag = getenv("inven.view.content.tag"); $selector = $this->getSelector($response, $tag); - $media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]); - $media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls); + $media_urls = $this->getUrlsByMediaType($selector, "img", "src"); + $media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls); if ($this->getDebug()) { throw new \Exception(sprintf( "\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n", @@ -84,28 +102,31 @@ class InvenCrawler extends MangboardCrawler public function execute(int $max_limit): void { try { - $listInfos = []; if ($this->getDebug()) { - $this->detail_page(1, ['detail_url' => getenv("inven.view.test.url")]); - } - $response = $this->getMySocket()->getContent(getenv("inven.list.url")); - //div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김 - $this->getSelector($response, getenv("inven.list.tag"))->each( - function (Crawler $node) use (&$listInfos): void { - $hit = $node->filter(getenv("inven.list.item.hit.tag"))->text(); - $date = date("Y") . "-" . $node->filter(getenv("inven.list.item.date.tag"))->text(); - $nickname = $node->filter(getenv("inven.list.item.nickname.tag"))->text(); - //작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서 - $link_node = $node->filter(getenv("inven.list.item.link.tag")); - $detail_url = $link_node->attr("href"); - $title = $link_node->text(); - $listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit]; + $url = getenv("inven.view.test.url.{$this->_board_name}"); + log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업시작"); + $this->detail_page(1, ['detail_url' => $url]); + log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업종료"); + } else { + $listInfos = []; + $response = $this->getMySocket()->getContent(getenv("inven.list.url.{$this->_board_name}")); + $this->getSelector($response, getenv("inven.list.tag"))->each( + function (Crawler $node) use (&$listInfos): void { + $hit = $node->filter(getenv("inven.list.item.hit.tag"))->text(); + $date = date("Y") . "-" . $node->filter(getenv("inven.list.item.date.tag"))->text(); + $nickname = $node->filter(getenv("inven.list.item.nickname.tag"))->text(); + //작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서 + $link_node = $node->filter(getenv("inven.list.item.link.tag")); + $detail_url = $link_node->attr("href"); + $title = $link_node->text(); + $listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit]; + } + ); + if (!count($listInfos)) { + throw new \Exception("Target URL이 없습니다."); } - ); - if (!count($listInfos)) { - throw new \Exception("Target URL이 없습니다."); + $this->main_process($max_limit, $listInfos); } - $this->main_process($max_limit, $listInfos); log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); } catch (\Exception $e) { log_message("warning", sprintf( diff --git a/app/Libraries/MyCrawler/Mangboard/MangboardCrawler.php b/app/Libraries/MyCrawler/Mangboard/MangboardCrawler.php index 5462ae5..134c129 100644 --- a/app/Libraries/MyCrawler/Mangboard/MangboardCrawler.php +++ b/app/Libraries/MyCrawler/Mangboard/MangboardCrawler.php @@ -11,9 +11,9 @@ use App\Models\Mangboard\BoardsModel; abstract class MangboardCrawler extends MyCrawler { - protected $_mySocket = null; - protected $_host = ""; - private $_board_name = ""; + private $_mySocket = null; + private $_host = ""; + protected $_board_name = ""; private $_user_entity = null; protected function __construct(string $host, string $board_name, UserEntity $user_entity) { @@ -22,7 +22,8 @@ abstract class MangboardCrawler extends MyCrawler $this->_board_name = $board_name; $this->_user_entity = $user_entity; } - protected function getMySocket() + abstract public function execute(int $max_limit): void; + final protected function getMySocket() { if ($this->_mySocket === null) { $this->_mySocket = new WebSocket($this->_host); diff --git a/app/Libraries/MyCrawler/Mangboard/SirCrawler.php b/app/Libraries/MyCrawler/Mangboard/SirCrawler.php index 3cf03ce..2d9de05 100644 --- a/app/Libraries/MyCrawler/Mangboard/SirCrawler.php +++ b/app/Libraries/MyCrawler/Mangboard/SirCrawler.php @@ -13,9 +13,9 @@ class SirCrawler extends MangboardCrawler { parent::__construct($host, $board_name, $user_entity); } - protected function changeURLByMediaType(string $url): string + protected function changeURLByCrawler(string $url): string { - return str_replace("/sir.kr/", "", parent::changeURLByMediaType($url)); + return str_replace("/sir.kr/", "", parent::changeURLByCrawler($url)); } //작성내용 //
@@ -91,8 +91,8 @@ class SirCrawler extends MangboardCrawler //작성내용 $tag = getenv("sir.view.content.tag"); $selector = $this->getSelector($response, $tag); - $media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]); - $media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls); + $media_urls = $this->getUrlsByMediaType($selector, "img", "src"); + $media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls); if ($this->getDebug()) { throw new \Exception(sprintf( "\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n", @@ -142,29 +142,32 @@ class SirCrawler extends MangboardCrawler public function execute(int $max_limit): void { try { - $listInfos = []; if ($this->getDebug()) { - $this->detail_page(1, ['detail_url' => getenv("sir.view.test.url")]); - } - $response = $this->getMySocket()->getContent(getenv("sir.list.url")); - $this->getSelector($response, getenv("sir.list.tag"))->each( - function (Crawler $node) use (&$listInfos): void { - $link_node = $node->filter(getenv("sir.list.item.link.tag")); - // href url의 맨 앞이 /가 두개라서 한개를 빼기위함 - $detail_url = str_replace("/sir.kr/", "", $link_node->attr("href")); - // $detail_url = $link_node->attr("href"); - $title = $link_node->text(); - $nickname = $node->filter(getenv("sir.list.item.nickname.tag"))->text(); - $hit = $node->filter(getenv("sir.list.item.hit.tag"))->text(); - // $date = $node->filter(getenv("sir.list.item.date.tag"))->text(); - $listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => "", 'hit' => $hit]; + $url = getenv("sir.view.test.url.{$this->_board_name}"); + log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업시작"); + $this->detail_page(1, ['detail_url' => $url]); + log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업종료"); + } else { + $listInfos = []; + $response = $this->getMySocket()->getContent(getenv("sir.list.url.{$this->_board_name}")); + $this->getSelector($response, getenv("sir.list.tag"))->each( + function (Crawler $node) use (&$listInfos): void { + $link_node = $node->filter(getenv("sir.list.item.link.tag")); + // href url의 맨 앞이 /가 두개라서 한개를 빼기위함 + $detail_url = $this->changeURLByCrawler($link_node->attr("href")); + // $detail_url = $link_node->attr("href"); + $title = $link_node->text(); + $nickname = $node->filter(getenv("sir.list.item.nickname.tag"))->text(); + $hit = $node->filter(getenv("sir.list.item.hit.tag"))->text(); + // $date = $node->filter(getenv("sir.list.item.date.tag"))->text(); + $listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => "", 'hit' => $hit]; + } + ); + if (!count($listInfos)) { + throw new \Exception("Target URL이 없습니다."); } - ); - // throw new \Exception("Target URL이 없습니다." . var_export($listInfos, true)); - if (!count($listInfos)) { - throw new \Exception("Target URL이 없습니다."); + $this->main_process($max_limit, $listInfos); } - $this->main_process($max_limit, $listInfos); log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); } catch (\Exception $e) { log_message("warning", sprintf( diff --git a/app/Libraries/MyCrawler/Mangboard/YamapCrawler.php b/app/Libraries/MyCrawler/Mangboard/YamapCrawler.php index 4e59bb0..42ff039 100644 --- a/app/Libraries/MyCrawler/Mangboard/YamapCrawler.php +++ b/app/Libraries/MyCrawler/Mangboard/YamapCrawler.php @@ -16,8 +16,8 @@ class YamapCrawler extends MangboardCrawler $response = $this->getMySocket()->getContent($listInfo['detail_url']); $tag = getenv("yamap.view.content.tag"); $selector = $this->getSelector($response, $tag); - $media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]); - $media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls); + $media_urls = $this->getUrlsByMediaType($selector, "img", "src"); + $media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls); if ($this->getDebug()) { throw new \Exception(sprintf( "\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n", @@ -68,32 +68,36 @@ class YamapCrawler extends MangboardCrawler public function execute(int $max_limit): void { try { - $listInfos = []; if ($this->getDebug()) { - $this->detail_page(1, ['detail_url' => getenv("yamap.view.test.url")]); - } - $response = $this->getMySocket()->getContent(getenv("yamap.list.url")); - $selector = $this->getSelector($response, getenv("yamap.list.tag")); - //div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김 - $selector->filter(getenv("yamap.list.item.tag"))->each( - function (Crawler $node) use (&$listInfos): void { - $hit = $node->filter(getenv("yamap.list.item.hit.tag"))->text(); - $date = $node->filter(getenv("yamap.list.item.date.tag"))->text(); - $nickname = $node->filter(getenv("yamap.list.item.nickname.tag"))->text(); - //bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool - if ($nickname != getenv("yamap.list.item.nickname.except")) { - //작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서 - $link_node = $node->filter(getenv("yamap.list.item.link.tag")); - $detail_url = $link_node->attr("href"); - $title = $link_node->children()->last()->text(); - $listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit]; + $url = getenv("yamap.view.test.url.{$this->_board_name}"); + log_message("notice", __FUNCTION__ . "DEBUG 게시물 {$url} 작업시작"); + $this->detail_page(1, ['detail_url' => $url]); + log_message("notice", __FUNCTION__ . "DEBUG 게시물 {$url} 작업종료"); + } else { + $listInfos = []; + $response = $this->getMySocket()->getContent(getenv("yamap.list.url.{$this->_board_name}")); + $selector = $this->getSelector($response, getenv("yamap.list.tag")); + //div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김 + $selector->filter(getenv("yamap.list.item.tag"))->each( + function (Crawler $node) use (&$listInfos): void { + $hit = $node->filter(getenv("yamap.list.item.hit.tag"))->text(); + $date = $node->filter(getenv("yamap.list.item.date.tag"))->text(); + $nickname = $node->filter(getenv("yamap.list.item.nickname.tag"))->text(); + //bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool + if ($nickname != getenv("yamap.list.item.nickname.except")) { + //작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서 + $link_node = $node->filter(getenv("yamap.list.item.link.tag")); + $detail_url = $link_node->attr("href"); + $title = $link_node->children()->last()->text(); + $listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit]; + } } + ); + if (!count($listInfos)) { + throw new \Exception("Target URL이 없습니다."); } - ); - if (!count($listInfos)) { - throw new \Exception("Target URL이 없습니다."); + $this->main_process($max_limit, $listInfos); } - $this->main_process($max_limit, $listInfos); log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); } catch (\Exception $e) { log_message("warning", sprintf( diff --git a/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php b/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php index 41a760d..2e58a0c 100644 --- a/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php +++ b/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php @@ -61,8 +61,8 @@ class YamoonCrawler extends MangboardCrawler //작성내용 $tag = getenv("yamoon.view.content.tag"); $selector = $this->getSelector($response, $tag); - $media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]); - $media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls); + $media_urls = $this->getUrlsByMediaType($selector, "img", "src"); + $media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls); if ($this->getDebug()) { throw new \Exception(sprintf( "\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n", @@ -90,25 +90,29 @@ class YamoonCrawler extends MangboardCrawler public function execute(int $max_limit): void { try { - $listInfos = []; if ($this->getDebug()) { - $this->detail_page(1, ['detail_url' => getenv("yamoon.view.test.url")]); - } - $response = $this->getMySocket()->getContent(getenv("yamoon.list.url")); - $this->getSelector($response, getenv("yamoon.list.tag"))->each( - function (Crawler $node) use (&$listInfos): void { - $link_node = $node->filter(getenv("yamoon.list.item.link.tag")); - $detail_url = $link_node->attr("href"); - $title = $link_node->text(); - $info_node = $node->filter(getenv("yamoon.list.item.info.tag")); - $infos = explode("|", $info_node->text()); - $listInfos[] = ['title' => $title, 'detail_url' => $detail_url, 'nickname' => trim($infos[0]), 'hit' => trim($infos[2]), 'date' => trim($infos[4])]; + $url = getenv("yamoon.view.test.url.{$this->_board_name}"); + log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업시작"); + $this->detail_page(1, ['detail_url' => $url]); + log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업종료"); + } else { + $listInfos = []; + $response = $this->getMySocket()->getContent(getenv("yamoon.list.url.{$this->_board_name}")); + $this->getSelector($response, getenv("yamoon.list.tag"))->each( + function (Crawler $node) use (&$listInfos): void { + $link_node = $node->filter(getenv("yamoon.list.item.link.tag")); + $detail_url = $link_node->attr("href"); + $title = $link_node->text(); + $info_node = $node->filter(getenv("yamoon.list.item.info.tag")); + $infos = explode("|", $info_node->text()); + $listInfos[] = ['title' => $title, 'detail_url' => $detail_url, 'nickname' => trim($infos[0]), 'hit' => trim($infos[2]), 'date' => trim($infos[4])]; + } + ); + if (!count($listInfos)) { + throw new \Exception("Target URL이 없습니다."); } - ); - if (!count($listInfos)) { - throw new \Exception("Target URL이 없습니다."); + $this->main_process($max_limit, $listInfos); } - $this->main_process($max_limit, $listInfos); log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); } catch (\Exception $e) { log_message("warning", sprintf( diff --git a/app/Libraries/MyCrawler/MyCrawler.php b/app/Libraries/MyCrawler/MyCrawler.php index f411d08..7930da4 100644 --- a/app/Libraries/MyCrawler/MyCrawler.php +++ b/app/Libraries/MyCrawler/MyCrawler.php @@ -33,28 +33,45 @@ abstract class MyCrawler extends CommonLibrary return $crawler->filter($tag); } - protected function changeURLByMediaType(string $url): string + protected function changeURLByCrawler(string $url): string { return preg_match('/^[^?]+/', $url, $matches) ? $matches[0] : null; } - protected function getUrlsByMediaType(string $media_type, Crawler $selector, array $options, array $urls = []): array + protected function getUrlByMediaType(Crawler $node, string $media_type, string $attr): null|string { - $urls[$media_type] = []; - $selector->filter($options["tag"])->each( - function (Crawler $node) use (&$media_type, &$options, &$urls): void { - $url = $node->attr($options["attr"]); - switch ($media_type) { - case 'video': - if ($url === null) { - $url = $node->children()->attr("src"); - } - break; + switch ($media_type) { + case 'video': + try { + $url = $node->attr($attr); // 또는 + } catch (\Exception) { + $url = $node->children()->attr("src"); //