diff --git a/app/Controllers/Admin/UserController.php b/app/Controllers/Admin/UserController.php index f9d2197..a532da0 100644 --- a/app/Controllers/Admin/UserController.php +++ b/app/Controllers/Admin/UserController.php @@ -35,7 +35,7 @@ class UserController extends AdminController } $sign = $this->request->getPost('point') ?: "+"; - $entity = $this->setUserPointByMangboardTrait($entity, intval($point), $sign); + $entity = $this->setUserPointByMangboard($entity, intval($point), $sign); return "완료되었습니다."; } catch (\Exception $e) { log_message("error", $e->getMessage()); diff --git a/app/Controllers/CLI/Crawler.php b/app/Controllers/CLI/Crawler.php index df406a0..bd78774 100644 --- a/app/Controllers/CLI/Crawler.php +++ b/app/Controllers/CLI/Crawler.php @@ -14,19 +14,25 @@ class Crawler extends BaseController $library = new YamapLibrary(getenv("yamap.host")); $library->setDebug($isDebug); //1. MainPage - $url = getenv("yamap.url.main"); - $crawler = $library->getCrawlerByMainPage($url); - $links = $library->getLinks($crawler); - if (!count($links)) { - throw new \Exception("Target Links가 없습니다."); + $url = getenv("yamap.list.url"); + $crawler = $library->getCrawler($url, getenv("yamap.list.tag")); + $urls = $library->getListURLs( + $crawler, + getenv("yamap.list.item.tag"), + getenv("yamap.list.item.subject.tag"), + getenv("yamap.list.item.nickname.tag"), + getenv("yamap.list.item.nickname.skip") + ); + if (!count($urls)) { + throw new \Exception("Target URL이 없습니다."); } //2. TargetPage : div.contents 가진 객체를 찾아서 첫번쨰 요소에서만 참조 - $url = $isDebug ? getenv("yamap.url.test") : $links[0]["href"]; - $crawler = $library->getCrawlerByDetailPage($url); + $url = $isDebug ? getenv("yamap.view.test.url") : $urls[0]; + $crawler = $library->getCrawler($url, getenv("yamap.view.content.tag")); //3. Image - $library->getImages($crawler); + $library->download($crawler, ["tag" => "img", "attr" => "src"]); //4. Video - $library->getVideos($crawler); + $library->download($crawler, ["tag" => "video", "attr" => "src"]); log_message("info", "완료되었습니다."); return true; } catch (\Exception $e) { diff --git a/app/Controllers/CLI/Mangboard.php b/app/Controllers/CLI/Mangboard.php index f1d42d0..4cb3948 100644 --- a/app/Controllers/CLI/Mangboard.php +++ b/app/Controllers/CLI/Mangboard.php @@ -16,7 +16,7 @@ class Mangboard extends BaseController try { $userModel = new UserModel(); foreach ($userModel->getEntitys() as $entity) { - $entity = $this->setUserLevelByMangboardTrait($entity); + $entity = $this->setUserLevelByMangboard($entity); log_message("debug", __FUNCTION__ . "=>[{$entity}] 회원님의 Level은 {$entity->getLevel()} 입니다."); } log_message("info", "완료되었습니다."); diff --git a/app/Libraries/MyCrawler/MyCrawlerLibrary.php b/app/Libraries/MyCrawler/MyCrawlerLibrary.php deleted file mode 100644 index 881d2d5..0000000 --- a/app/Libraries/MyCrawler/MyCrawlerLibrary.php +++ /dev/null @@ -1,79 +0,0 @@ -_debug; - } - final public function setDebug(bool $debug): void - { - $this->_debug = $debug; - } - - final public function createCrawler($html) - { - return new Crawler($html); - } - - final public function getInnerHTML(string $html, $tag = false) - { - return $tag ? $this->createCrawler($html)->filter($tag)->html() : $this->createCrawler($html)->html(); - } - - public function getLinks(Crawler $crawler, array $options = ["tag" => "a", "attr" => "href"]): array - { - $links = $crawler->filter($options["tag"])->each( - function (Crawler $node) use (&$options): array { - return [ - "anchor" => $node->text(), - "href" => $node->attr($options["attr"]) - ]; - } - ); - foreach ($links as $link) { - log_message("debug", "Link-> " . $link['href']); - } - return $links; - } - - public function getImages(Crawler $crawler, array $options = ["tag" => "img", "attr" => "src"]): array - { - $images = $crawler->filter($options["tag"])->each( - function (Crawler $node) use (&$options): array { - return [ - "alt" => $node->attr('alt'), - "src" => $node->attr($options["attr"]) - ]; - } - ); - foreach ($images as $image) { - log_message("debug", "Image-> " . $image['src']); - } - return $images; - } - - public function getVideos(Crawler $crawler, array $options = ["tag" => "video", "attr" => "src"]): array - { - $videos = $crawler->filter($options["tag"])->each( - function (Crawler $node) use (&$options): array { - return [ - "alt" => $node->attr('alt'), - "src" => $node->attr($options["attr"]) - ]; - } - ); - foreach ($videos as $video) { - log_message("debug", "Video-> " . $video['src']); - } - return $videos; - } -} diff --git a/app/Libraries/MyStorage/MyStorageLibrary.php b/app/Libraries/MyStorage/MyStorageLibrary.php deleted file mode 100644 index 83f7892..0000000 --- a/app/Libraries/MyStorage/MyStorageLibrary.php +++ /dev/null @@ -1,28 +0,0 @@ -_path; - } - final public function setPath(string $path): void - { - $this->_path .= DIRECTORY_SEPARATOR . $path; - } - - final public function getDebug(): bool - { - return $this->_debug; - } - final public function setDebug(bool $debug): void - { - $this->_debug = $debug; - } -} diff --git a/app/Libraries/MyWeb/MyWebLibrary.php b/app/Libraries/MyWeb/MyWebLibrary.php deleted file mode 100644 index d41460e..0000000 --- a/app/Libraries/MyWeb/MyWebLibrary.php +++ /dev/null @@ -1,78 +0,0 @@ -_host = $host; - } - - final public function getHost(): string - { - return $this->_host; - } - - final public function getClient(): Client - { - if ($this->_client === null) { - $this->_client = new Client(['verify' => false]); - } - return $this->_client; - } - - final public function getCookieJar() - { - if ($this->_cookieJar === null) { - $this->_cookieJar = new CookieJar(); - } - return $this->_cookieJar; - } - - final public function getDebug(): bool - { - return $this->_debug; - } - final public function setDebug(bool $debug): void - { - $this->_debug = $debug; - } - - - final public function getContent(string $url): string - { - return $this->getClient()->get($this->gethost() . $url)->getBody(); - } - - // 로그인 메서드 - final public function login($url, $username, $password) - { - try { - $response = $this->getClient()->post($this->gethost() . $url, [ - 'form_params' => [ - 'username' => $username, - 'password' => $password, - ], - 'cookies' => $this->getCookieJar(), - ]); - if ($response->getStatusCode() == 200) { - log_message("info", "로그인 성공!"); - return true; - } else { - log_message("info", "로그인 실패: " . $response->getStatusCode()); - return false; - } - } catch (\Exception $e) { - log_message("error", "로그인 중 오류 발생: " . $e->getMessage()); - return false; - } - } -} diff --git a/app/Libraries/YamapLibrary.php b/app/Libraries/YamapLibrary.php index a9dd3b1..b9e5cd4 100644 --- a/app/Libraries/YamapLibrary.php +++ b/app/Libraries/YamapLibrary.php @@ -2,47 +2,19 @@ namespace App\Libraries; -use App\Libraries\MyWeb\MyWebLibrary; -use App\Libraries\MyStorage\MyStorageLibrary; -use App\Libraries\MyCrawler\MyCrawlerLibrary; +use App\Traits\MyCrawlerTrait; +use App\Traits\MyWebTrait; +use App\Traits\MyStorage\MyStorageFileTrait; use Symfony\Component\DomCrawler\Crawler; class YamapLibrary { - private $_web = null; - private $_storage = null; - private $_crawler = null; + use MyWebTrait, MyStorageFileTrait, MyCrawlerTrait; private $_debug = false; - - private $_host = null; public function __construct(string $host) { - $this->_host = $host; - } - - public function getMyWeb(): MyWebLibrary - { - if ($this->_web === null) { - $this->_web = new MyWebLibrary($this->getHost()); - $this->_web->setDebug($this->getDebug()); - } - return $this->_web; - } - public function getMyStorage(): MyStorageLibrary - { - if ($this->_storage === null) { - $this->_storage = new MyStorageLibrary(); - $this->_storage->setDebug($this->getDebug()); - } - return $this->_storage; - } - public function getMyCrawler(): MyCrawlerLibrary - { - if ($this->_crawler === null) { - $this->_crawler = new MyCrawlerLibrary(); - $this->_crawler->setDebug($this->getDebug()); - } - return $this->_crawler; + $this->setHostByMyWeb($host); + $this->setPathByMyStorage(WRITEPATH . "uploads" . DIRECTORY_SEPARATOR . "Yamap"); } final public function getDebug(): bool @@ -53,92 +25,48 @@ class YamapLibrary { $this->_debug = $debug; } - final public function getHost(): string + + public function getCrawler(string $url, string $tag): Crawler { - return $this->_host; + log_message("debug", __FUNCTION__ . "-> " . $url . "\n"); + $html = $this->getContentByMyWeb($url); + return $this->createByMyCrawler($html)->filter($tag); } - public function getLinks(Crawler $crawler): array - { + public function getListURLs( + Crawler $crawler, + string $item_tag, + string $item_subject_tag, + string $item_nickname_tag, + string $item_nickname_skip, + array $urls = [] + ): array { //div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김 - $bbs_items = $crawler->filter("div.bbs_item")->first()->siblings(); - $links = []; - foreach ($bbs_items as $bbs_item) { - //bbs_item에서 span.g_nickname 객체를 찾아서 "관리자"가 작성한것이 아닌것을 확인 후 - $this->getMyCrawler()->createCrawler($bbs_item)->filter("span.g_nickname")->each(function (Crawler $node) use (&$links, &$bbs_item) { - if ($node->text() != "관리자") { - //다시 bbs_item에서 a.list_subject 객체를 찾아서 Links에 추가한다. - foreach ($this->getMyCrawler()->getLinks($this->getMyCrawler()->createCrawler($bbs_item), ["tag" => ".list_subject", "attr" => "href"]) as $link) { - array_push($links, $link); - } - } - }); - } - return $links; - } - - public function getImages(Crawler $crawler, array $options = ["tag" => "img", "attr" => "src"]): array - { - $images = $this->getMyCrawler()->getImages($crawler, $options); - foreach ($images as $image) { - $this->download($image['src']); - } - return $images; - } - - public function getVideos(Crawler $crawler, array $options = ["tag" => "video", "attr" => "src"]): array - { - $videos = $this->getMyCrawler()->getVideos($crawler, $options); - foreach ($videos as $video) { - $this->download($video['src']); - } - return $videos; - } - - // 파일 다운로드 메서드 - final public function download($url): bool - { - try { - $fileNames = explode('/', $url); - if (!is_array($fileNames) || !count($fileNames)) { - throw new \Exception("Download URL Error:" . $url); - } - if (!is_dir($this->getMyStorage()->getPath())) { - if (!mkdir($this->getMyStorage()->getPath())) { - throw new \Exception("Make Directory Error:" . $this->getMyStorage()->getPath()); + $crawler->filter($item_tag)->each( + function (Crawler $node) use ( + &$urls, + $item_subject_tag, + &$item_nickname_tag, + &$item_nickname_skip + ): void { + //bbs_item에서 span.g_nickname 객체를 찾아서 작성자거 "관리자" 아닌지 확인 후 Return Bool + $nickname = $node->filter($item_nickname_tag)->text(); + log_message("debug", $item_nickname_tag . ":" . $nickname); + if ($nickname != $item_nickname_skip) { + $options = ["tag" => $item_subject_tag, "attr" => "href"]; + $urls = $this->getTagDatasByMyCrawler($node, $options); } } - $fullPath = $this->getMyStorage()->getPath() . DIRECTORY_SEPARATOR . array_pop($fileNames); - log_message("debug", "FullPath-> " . $fullPath); - if (!$this->getDebug()) { - $response = $this->getMyWeb()->getClient()->get($this->getMyWeb()->gethost() . $url, [ - 'cookies' => $this->getMyWeb()->getCookieJar(), - 'sink' => $fullPath, - ]); - if ($response->getStatusCode() == 200) { - log_message("info", "파일이 성공적으로 다운로드되었습니다!"); - return true; - } else { - log_message("info", "파일 다운로드 실패: " . $response->getStatusCode()); - return false; - } - } - return true; - } catch (\Exception $e) { - log_message("error", "파일 다운로드 중 오류 발생: " . $e->getMessage()); - return false; - } + ); + return $urls; } - public function getCrawlerByMainPage(string $url): Crawler + public function download(Crawler $crawler, array $options): void { - $html = $this->getMyWeb()->getContent($url);; - return $this->getMyCrawler()->createCrawler($html)->filter("div.bbs_list")->first(); - } - public function getCrawlerByDetailPage(string $url): Crawler - { - log_message("debug", "Target-> " . $url); - $html = $this->getMyWeb()->getContent($url);; - return $this->getMyCrawler()->createCrawler($html)->filter("div.contents")->first(); + log_message("debug", "download:{$options["tag"]},{$options["attr"]}"); + $urls = $this->getTagDatasByMyCrawler($crawler, $options); + foreach ($urls as $url) { + $this->downloadByMyWeb($url, $this->getPathByMyStorage(), $this->getDebug()); + } } } diff --git a/app/Traits/MangboardTrait.php b/app/Traits/MangboardTrait.php index f1fd8d2..4cc17c6 100644 --- a/app/Traits/MangboardTrait.php +++ b/app/Traits/MangboardTrait.php @@ -8,7 +8,7 @@ use App\Models\UserModel; trait MangboardTrait { private $_userModel = null; - public function getUserModel(): UserModel + protected function getUserModel(): UserModel { if (is_null($this->_userModel)) { $this->_userModel = new UserModel(); @@ -16,7 +16,7 @@ trait MangboardTrait return $this->_userModel; } - public function setUserPointByMangboardTrait(UserEntity $entity, int $point, $sign = '+'): UserEntity + protected function setUserPointByMangboard(UserEntity $entity, int $point, $sign = '+'): UserEntity { switch ($sign) { case '-': @@ -36,7 +36,7 @@ trait MangboardTrait return $this->setUserLevelByMangboardTrait($entity); } - public function setUserLevelByMangboardTrait(UserEntity $entity): UserEntity + protected function setUserLevelByMangboard(UserEntity $entity): UserEntity { //Admin용 Level로는 변경불가 if ($entity->getLevel() == MANGBOARD['admin']['level']) { diff --git a/app/Traits/MyCrawlerTrait.php b/app/Traits/MyCrawlerTrait.php new file mode 100644 index 0000000..3f4e8b3 --- /dev/null +++ b/app/Traits/MyCrawlerTrait.php @@ -0,0 +1,23 @@ + "a", "attr" => "href"], array $tagdatas = []): array + { + $crawler->filter($options["tag"])->each( + function (Crawler $node) use (&$tagdatas, &$options): void { + log_message("debug", sprintf("getTagDatas-> %s:%s", $options["tag"], $node->attr($options["attr"]))); + $tagdatas[] = $node->attr($options["attr"]); + } + ); + return $tagdatas; + } +} diff --git a/app/Traits/MyStorage/MyStorageFileTrait.php b/app/Traits/MyStorage/MyStorageFileTrait.php new file mode 100644 index 0000000..4316f39 --- /dev/null +++ b/app/Traits/MyStorage/MyStorageFileTrait.php @@ -0,0 +1,13 @@ +_path; + } + final protected function setPathByMyStorage(string $path): void + { + $this->_path = $path; + } +} diff --git a/app/Traits/MyWebTrait.php b/app/Traits/MyWebTrait.php new file mode 100644 index 0000000..82000ff --- /dev/null +++ b/app/Traits/MyWebTrait.php @@ -0,0 +1,118 @@ +_host; + } + final protected function setHostByMyWeb(string $host): void + { + $this->_host = $host; + } + + final protected function getClientByMyWeb(): Client + { + if ($this->_client === null) { + $this->_client = new Client(['verify' => false]); + } + return $this->_client; + } + + final protected function getCookieJarByMyWeb() + { + if ($this->_cookieJar === null) { + $this->_cookieJar = new CookieJar(); + } + return $this->_cookieJar; + } + + //url에 http 나 https가 포함되어 있으면 true + final protected function isContainsHttpOrHttpsByMyWeb($url) + { + return strpos($url, 'http://') !== false || strpos($url, 'https://') !== false; + } + final protected function getContentByMyWeb(string $url, array $options = []) + { + //url에 http 나 https가 포함되어 있지않으면 + if (!($this->isContainsHttpOrHttpsByMyWeb($url))) { + $url = $this->gethostByMyWeb() . $url; + } + $response = $this->getClientByMyWeb()->get($url, $options); + if ($response->getStatusCode() != 200) { + log_message("error", "{$url} 접속실패: " . $response->getStatusCode()); + return false; + } + return $response->getBody()->getContents(); + } + + // 로그인 메서드 + final protected function loginByMyWeb($url, $username, $password) + { + try { + $response = $this->getClientByMyWeb()->post($this->gethost() . $url, [ + 'form_params' => [ + 'username' => $username, + 'password' => $password, + ], + 'cookies' => $this->getCookieJar(), + ]); + if ($response->getStatusCode() == 200) { + log_message("info", "로그인 성공!"); + return true; + } else { + log_message("info", "로그인 실패: " . $response->getStatusCode()); + return false; + } + } catch (\Exception $e) { + log_message("error", "로그인 중 오류 발생: " . $e->getMessage()); + return false; + } + } + + // 파일 다운로드 메서드 + final protected function downloadByMyWeb(string $url, string $fullPath, bool $debug = false): bool + { + try { + log_message("debug", "donwload:URL-> " . $url); + $fileNames = explode('/', $url); + if (!is_array($fileNames) || !count($fileNames)) { + throw new \Exception("Download URL Error:" . $url); + } + if (!is_dir($fullPath)) { + if (!mkdir($fullPath)) { + throw new \Exception("Make Directory Error:" . $fullPath); + } + } + $fileName = array_pop($fileNames); + $savePath = $fullPath . DIRECTORY_SEPARATOR . $fileName; + log_message("debug", "download:SavePath-> " . $savePath); + if (!$debug) { + $response = $this->getContentByMyWeb($url, [ + 'cookies' => $this->getCookieJarByMyWeb(), + // 'sink' => $savePath, + ]); + if (!$response) { + log_message("info", "{$fileName} 파일 다운로드 실패"); + return false; + } + $this->saveByMyStorage($savePath, $response); + log_message("info", "{$fileName} 파일이 다운로드되었습니다!"); + return true; + } + return true; + } catch (\Exception $e) { + log_message("error", "다운로드 중 오류 발생: " . $e->getMessage()); + return false; + } + } +}