diff --git a/app/Config/Routes.php b/app/Config/Routes.php
index 22ad419..0829700 100644
--- a/app/Config/Routes.php
+++ b/app/Config/Routes.php
@@ -34,5 +34,8 @@ $routes->group('mangboard', ['namespace' => 'App\Controllers\Mangboard'], functi
$routes->cli('yamap/(:alpha)', 'CrawlerController::yamap/$1');
$routes->cli('yamap/(:alpha)/(:any)', 'CrawlerController::yamap/$1/$2');
$routes->cli('yamap/(:alpha)/(:alphanum)/(:any)', 'CrawlerController::yamap/$1/$2/$3');
+ $routes->cli('yamoon/(:alpha)', 'CrawlerController::yamoon/$1');
+ $routes->cli('yamoon/(:alpha)/(:any)', 'CrawlerController::yamoon/$1/$2');
+ $routes->cli('yamoon/(:alpha)/(:alphanum)/(:any)', 'CrawlerController::yamoon/$1/$2/$3');
});
});
diff --git a/app/Controllers/Mangboard/CrawlerController.php b/app/Controllers/Mangboard/CrawlerController.php
index 409f7bc..1a05ece 100644
--- a/app/Controllers/Mangboard/CrawlerController.php
+++ b/app/Controllers/Mangboard/CrawlerController.php
@@ -2,9 +2,10 @@
namespace App\Controllers\Mangboard;
+use App\Libraries\MyCrawler\YamoonCrawler;
use App\Libraries\MyCrawler\YamapCrawler;
-use App\Controllers\CommonController;
use App\Libraries\Mangboard\UserLibrary;
+use App\Controllers\CommonController;
class CrawlerController extends CommonController
{
@@ -27,4 +28,23 @@ class CrawlerController extends CommonController
return $e->getMessage();
}
}
+ public function yamoon(string $category, string $id = "", string $debug = "false"): string
+ {
+ try {
+ $id = $id == "" ? getenv("mangboard.login.default.id") : $id;
+ $password = getenv("mangboard.login.default.password");
+ //1. 사이트 로그인 처리
+ $user_library = new UserLibrary();
+ $user_entity = $user_library->login(getenv("mangboard.host.url"), $id, $password);
+ //2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
+ $crawler = new YamoonCrawler($category);
+ $crawler->setUserEntity($user_entity);
+ $crawler->setDebug($debug === "true" ? true : false);
+ $crawler->execute();
+ return "완료되었습니다.";
+ } catch (\Exception $e) {
+ log_message("error", $e->getMessage());
+ return $e->getMessage();
+ }
+ }
}
diff --git a/app/Libraries/MyCrawler/YamapCrawler.php b/app/Libraries/MyCrawler/YamapCrawler.php
index fc57ba0..70b4ba7 100644
--- a/app/Libraries/MyCrawler/YamapCrawler.php
+++ b/app/Libraries/MyCrawler/YamapCrawler.php
@@ -135,19 +135,25 @@ class YamapCrawler extends MyCrawlerLibrary
{
$file_sequence = 1;
$this->_storages = []; //CreateBoard에서 사용을 위해 DetailPage마다 초기화
- foreach ($urls as $mediaType => $url) {
- try {
- list($file_name, $content) = $this->download($mediaType, $url);
- $this->save($file_sequence, $mediaType, $file_name, $content);
- $file_sequence++;
- log_message("notice", __FUNCTION__ . " OriginType->{$mediaType} 작업 완료");
- } catch (\Exception $e) {
- log_message("warning", sprintf(
- "\n---%s,OriginType->%s 오류---\n%s\n-----------------------------------------\n",
- __FUNCTION__,
- $mediaType,
- $e->getMessage()
- ));
+ // log_message("debug", var_export($urls, true));
+ foreach ($urls as $mediaType => $media_urls) {
+ foreach ($media_urls as $url) {
+ try {
+ if ($url === null) {
+ continue;
+ }
+ list($file_name, $content) = $this->download($mediaType, $url);
+ $this->save($file_sequence, $mediaType, $file_name, $content);
+ $file_sequence++;
+ log_message("notice", __FUNCTION__ . " OriginType->{$mediaType} 작업 완료");
+ } catch (\Exception $e) {
+ log_message("warning", sprintf(
+ "\n---%s,OriginType->%s 오류---\n%s\n-----------------------------------------\n",
+ __FUNCTION__,
+ $mediaType,
+ $e->getMessage()
+ ));
+ }
}
}
if (!count($this->_storages)) {
@@ -157,26 +163,58 @@ class YamapCrawler extends MyCrawlerLibrary
//Yamap ViewPage의 이미지나영상데이터가 있으면 URL과MediaType을 가져온다
private function getUrlsByDetailPageMediaType(string $mediaType, Crawler $selector, array $options, array $urls = []): array
{
+ $urls[$mediaType] = [];
$selector->filter($options["tag"])->each(
function (Crawler $node) use (&$mediaType, &$options, &$urls): void {
- log_message("debug", sprintf(
- "getNode->%s[%s]",
- $options["tag"],
- $node->attr($options['attr'])
- ));
- $urls[$mediaType] = $node->attr($options["attr"]);
+ $url = $node->attr($options["attr"]);
+ log_message("debug", "getUrlsByDetailPageMediaType-> {$mediaType}[{$options["attr"]}]:{$url}");
+ if (!is_null($url)) {
+ $urls[$mediaType][] = $url;
+ }
}
);
return $urls;
}
private function detailPage(array $listInfo): array
{
+ //
+ //
요즘 패션
+ //
+ //
+ // 괴강고귀
+ //
+ //
+ // | 추천 (14) | 조회 (432)
+ //
+ //
+ //
+ //
+ //
+ //
+ //
2024-09-14 01:53:45
+ //
+ //
+ //
+ //
+ //

+ //
+ //
+ //
+ //
+ //
+ //
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
- $selector = $this->getSelector($response, getenv("yamap.view.content.tag"));
+ $tag = getenv("yamap.view.content.tag");
+ $selector = $this->getSelector($response, $tag);
+ log_message("debug", "\n-----------detailPage Tag: {$tag}---------------\n{$selector->html()}\n---------------------------\n");
$urls = $this->getUrlsByDetailPageMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
$urls = $this->getUrlsByDetailPageMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $urls);
- log_message("notice", sprintf("\n-----------%s 작업완료--------\n%s\n-----------------------\n", __FUNCTION__, var_export($urls, true)));
- return $urls;
+ log_message("debug", "\n-------------------------\n" . var_export($urls, true) . "\n-----------------------\n");
+ log_message("notice", "-----------" . __FUNCTION__ . " 작업완료--------");
+ return array($listInfo, $urls);
}
private function mainPage(string $url): array
{
@@ -226,17 +264,19 @@ class YamapCrawler extends MyCrawlerLibrary
} else {
$max_limit = count($listInfos);
}
+ $total = count($listInfos);
$i = 1;
foreach ($listInfos as $listInfo) {
if ($i <= $max_limit) {
try {
- log_message("notice", "게시물 {$i}번째 {$listInfo["nickname"]} 작업시작");
- $this->mediaContent($this->detailPage($listInfo));
+ log_message("notice", "게시물 {$i}번째/{$total}개중 {$listInfo["nickname"]} 작업시작");
+ list($listInfo, $urls) = $this->detailPage($listInfo);
+ $this->mediaContent($urls);
//File DB 및 Board DB 등록작업
$board_entity = $this->getBoardLibrary()->createByCrawler($i, $listInfo, $this->_storages);
$this->getFileLibrary()->createByCrawler($board_entity, $this->_storages);
$this->getImageLibrary()->createByCrawler($board_entity, $this->_storages);
- log_message("notice", "게시물 {$i}번째 {$listInfo["nickname"]} 작업완료.");
+ log_message("notice", "게시물 {$i}번째/{$total}개중 {$listInfo["nickname"]} 작업완료.");
$i++;
} catch (\Exception $e) {
log_message("debug", $e->getMessage());
diff --git a/app/Libraries/MyCrawler/YamoonCrawler.php b/app/Libraries/MyCrawler/YamoonCrawler.php
new file mode 100644
index 0000000..13130ad
--- /dev/null
+++ b/app/Libraries/MyCrawler/YamoonCrawler.php
@@ -0,0 +1,266 @@
+_category = $category;
+ }
+ public function getMySocket()
+ {
+ if ($this->_mySocket === null) {
+ $this->_mySocket = new WebSocket(getenv('yamoon.host.url'));
+ }
+ return $this->_mySocket;
+ }
+ public function getMyStorage()
+ {
+ if ($this->_myStorage === null) {
+ $this->_myStorage = new MangboardStorage($this->getCategory());
+ }
+ return $this->_myStorage;
+ }
+ public function getBoardsLibrary(): BoardsLibrary
+ {
+ // $test = $this->getBoard();
+ // echo "TEST:{$test}\n";
+ // $temp = getenv("mangboard.storage.{$this->getBoard()}.name");
+ // echo "Temp:{$temp}\n";
+ // exit;
+ if ($this->_boards_library === null) {
+ $this->_boards_library = new BoardsLibrary(
+ $this->getCategory(),
+ $this->getUserEntity()
+ );
+ }
+ return $this->_boards_library;
+ }
+ public function getBoardLibrary(): BoardLibrary
+ {
+ if ($this->_board_library === null) {
+ $this->_board_library = new BoardLibrary(
+ $this->getBoardsLibrary()->getEntity(),
+ $this->getUserEntity()
+ );
+ }
+ return $this->_board_library;
+ }
+ public function getFileLibrary(): FileLibrary
+ {
+ if ($this->_file_library === null) {
+ $this->_file_library = new FileLibrary(
+ $this->getBoardsLibrary()->getEntity(),
+ $this->getUserEntity()
+ );
+ }
+ return $this->_file_library;
+ }
+ public function getImageLibrary(): ImageLibrary
+ {
+ if ($this->_image_library === null) {
+ $this->_image_library = new ImageLibrary();
+ }
+ return $this->_image_library;
+ }
+ public function getUserEntity(): UserEntity
+ {
+ if ($this->_user_entity === null) {
+ throw new \Exception("사용자정보가 없습니다.");
+ }
+ return $this->_user_entity;
+ }
+ public function setUserEntity(UserEntity $user_entity): void
+ {
+ $this->_user_entity = $user_entity;
+ }
+ public function getCategory(): string
+ {
+ if ($this->_category == "") {
+ throw new \Exception("저장할 Category가 정의되지 않았습니다.");
+ }
+ return $this->_category;
+ }
+ private function save(int $file_sequence, string $mediaType, string $file_name, string $content): void
+ {
+ log_message("debug", __FUNCTION__ . " 원본파일 {$file_name} 작업 시작");
+ $this->getMyStorage()->setOriginName($file_name);
+ $this->getMyStorage()->setOriginContent($content);
+ $this->getMyStorage()->setOriginType($mediaType);
+ $this->getMyStorage()->setOriginSequence($file_sequence);
+ $this->_storages[] = $this->getMyStorage()->save();
+ }
+ //Yamap ViewPage의 이미지나영상데이터가 있으면 Dodownload 한다.
+ private function download(string $mediaType, string $url): array
+ {
+ $file_names = explode('/', $url);
+ if (!is_array($file_names) || !count($file_names)) {
+ throw new \Exception("URL이 파일명 형식이 아닙니다 : " . $this->getMySocket()->getHost() . $url);
+ }
+ $file_name = array_pop($file_names);
+ $temps = explode(".", $file_name);
+ $file_ext = array_pop($temps);
+ if (!$this->isFileType_FileTrait($file_ext, $mediaType)) {
+ throw new \Exception("파일명 형식이 {$mediaType}가 아닙니다");
+ }
+ $content = $this->getMySocket()->getContent($url);
+ log_message("notice", "{$file_name} 파일이 다운로드되었습니다!");
+ return array($file_name, $content);
+ }
+ private function mediaContent(array $urls): void
+ {
+ $file_sequence = 1;
+ $this->_storages = []; //CreateBoard에서 사용을 위해 DetailPage마다 초기화
+ // log_message("debug", var_export($urls, true));
+ foreach ($urls as $mediaType => $media_urls) {
+ foreach ($media_urls as $url) {
+ try {
+ list($file_name, $content) = $this->download($mediaType, $url);
+ $this->save($file_sequence, $mediaType, $file_name, $content);
+ $file_sequence++;
+ log_message("notice", __FUNCTION__ . " OriginType->{$mediaType} 작업 완료");
+ } catch (\Exception $e) {
+ log_message("warning", sprintf(
+ "\n---%s,OriginType->%s 오류---\n%s\n-----------------------------------------\n",
+ __FUNCTION__,
+ $mediaType,
+ $e->getMessage()
+ ));
+ }
+ }
+ }
+ if (!count($this->_storages)) {
+ throw new \Exception("Download된 Content가 없습니다.");
+ }
+ }
+ //Yamap ViewPage의 이미지나영상데이터가 있으면 URL과MediaType을 가져온다
+ private function getUrlsByDetailPageMediaType(string $mediaType, Crawler $selector, array $options, array $urls = []): array
+ {
+ $urls[$mediaType] = [];
+ $selector->filter($options["tag"])->each(
+ function (Crawler $node) use (&$mediaType, &$options, &$urls): void {
+ $url = $node->attr($options["attr"]);
+ log_message("debug", "getUrlsByDetailPageMediaType-> {$mediaType}[{$options["attr"]}]:{$url}");
+ if (!is_null($url)) {
+ $urls[$mediaType][] = $url;
+ }
+ }
+ );
+ return $urls;
+ }
+ private function detailPage(array $listInfo): array
+ {
+ // log_message("debug", var_export($listInfo, true));
+ $url = "/newboard/yamoonboard/" . $listInfo['detail_url'];
+ $response = $this->getMySocket()->getContent($url);
+ // log_message("debug", "\n--------------------------\n{$response}\n---------------------------\n");
+ //작성시간
+ // $selector = $this->getSelector($response, getenv("yamoon.view.regdate.tag"));
+ // $listInfo['date'] = trim($selector->text());
+ //작성내용
+ $tag = getenv("yamoon.view.content.tag");
+ $selector = $this->getSelector($response, $tag);
+ log_message("debug", "\n-----------detailPage Tag: {$tag}---------------\n{$selector->html()}\n---------------------------\n");
+ $urls = $this->getUrlsByDetailPageMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
+ $urls = $this->getUrlsByDetailPageMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $urls);
+ log_message("debug", "\n-------------------------\n" . var_export($urls, true) . "\n-----------------------\n");
+ log_message("notice", "-----------" . __FUNCTION__ . " 작업완료--------");
+ return array($listInfo, $urls);
+ }
+ private function mainPage(string $url): array
+ {
+ $listInfos = [];
+ $response = $this->getMySocket()->getContent($url);
+ //div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
+ // log_message("debug", sprintf("\n-------------MainPage------------\n%s\n--------------------------\n", $selector->html()));
+ //
+ //
+ // 졸고 있는 여군
+ // 6
+ // yeeyuu | 6 | 369 | No 89372 | 2024-09-13
+ // |
+ //bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool
+ $selector = $this->getSelector($response, getenv("yamoon.list.tag"))->each(
+ function (Crawler $node) use (&$listInfos): void {
+ $link_node = $node->filter(getenv("yamoon.list.item.link.tag"));
+ $detail_url = $link_node->attr("href");
+ $title = $link_node->text();
+ $info_node = $node->filter(getenv("yamoon.list.item.info.tag"));
+ $infos = explode("|", $info_node->text());
+ if (trim($infos[4]) == date("Y-m-d")) {
+ $listInfos[] = ['title' => $title, 'detail_url' => $detail_url, 'nickname' => trim($infos[0]), 'hit' => trim($infos[2]), 'date' => trim($infos[4])];
+ }
+ }
+ );
+ if (!count($listInfos)) {
+ throw new \Exception("Target URL이 없습니다.");
+ }
+ log_message("notice", __FUNCTION__ . " 작업 완료");
+ return $listInfos;
+ }
+ public function execute(): void
+ {
+ if ($this->getDebug()) {
+ $listInfos = [
+ 'title' => getenv("yamoon.view.test.title"),
+ 'nickname' => getenv("yamoon.view.test.nickname"),
+ 'detail_url' => getenv("yamoon.view.test.url"),
+ 'time' => date("Y-m-d H:i:s"),
+ 'hit' => 1,
+ ];
+ } else {
+ $listInfos = $this->mainPage(getenv("yamoon.list.url." . $this->getCategory()));
+ }
+ //Limit가 0이면 $listInfos 갯수만큼 다하고, LIMIT 갯수 혹은 item의 갯수중 작은수만큼 한다.
+ $max_limit = intval(getenv("yamoon.list.max_limit"));
+ if ($max_limit) {
+ $max_limit = count($listInfos) <= $max_limit ? count($listInfos) : $max_limit;
+ } else {
+ $max_limit = count($listInfos);
+ }
+ $total = count($listInfos);
+ $i = 1;
+ foreach ($listInfos as $listInfo) {
+ if ($i <= $max_limit) {
+ try {
+ log_message("notice", "게시물 {$i}번째/{$total}개중 {$listInfo["nickname"]} 작업시작");
+ list($listInfo, $urls) = $this->detailPage($listInfo);
+ $this->mediaContent($urls);
+ //File DB 및 Board DB 등록작업
+ $board_entity = $this->getBoardLibrary()->createByCrawler($i, $listInfo, $this->_storages);
+ $this->getFileLibrary()->createByCrawler($board_entity, $this->_storages);
+ $this->getImageLibrary()->createByCrawler($board_entity, $this->_storages);
+ log_message("notice", "게시물 {$i}번째/{$total}개중 {$listInfo["nickname"]} 작업완료.");
+ $i++;
+ } catch (\Exception $e) {
+ log_message("debug", $e->getMessage());
+ }
+ }
+ }
+ log_message("notice", "Crawler->" . __FUNCTION__ . " 작업이 완료되었습니다.");
+ }
+}