diff --git a/app/Config/Routes.php b/app/Config/Routes.php
index 0829700..6e551ce 100644
--- a/app/Config/Routes.php
+++ b/app/Config/Routes.php
@@ -33,9 +33,12 @@ $routes->group('mangboard', ['namespace' => 'App\Controllers\Mangboard'], functi
$routes->group('crawler', function ($routes) {
$routes->cli('yamap/(:alpha)', 'CrawlerController::yamap/$1');
$routes->cli('yamap/(:alpha)/(:any)', 'CrawlerController::yamap/$1/$2');
- $routes->cli('yamap/(:alpha)/(:alphanum)/(:any)', 'CrawlerController::yamap/$1/$2/$3');
+ $routes->cli('yamap/(:alpha)/(:any)/(:any)', 'CrawlerController::yamap/$1/$2/$3');
$routes->cli('yamoon/(:alpha)', 'CrawlerController::yamoon/$1');
$routes->cli('yamoon/(:alpha)/(:any)', 'CrawlerController::yamoon/$1/$2');
- $routes->cli('yamoon/(:alpha)/(:alphanum)/(:any)', 'CrawlerController::yamoon/$1/$2/$3');
+ $routes->cli('yamoon/(:alpha)/(:any)/(:any)', 'CrawlerController::yamoon/$1/$2/$3');
+ $routes->cli('sir/(:alpha)', 'CrawlerController::sir/$1');
+ $routes->cli('sir/(:alpha)/(:any)', 'CrawlerController::sir/$1/$2');
+ $routes->cli('sir/(:alpha)/(:any)/(:any)', 'CrawlerController::sir/$1/$2/$3');
});
});
diff --git a/app/Controllers/Mangboard/CrawlerController.php b/app/Controllers/Mangboard/CrawlerController.php
index 0b01280..c55ad09 100644
--- a/app/Controllers/Mangboard/CrawlerController.php
+++ b/app/Controllers/Mangboard/CrawlerController.php
@@ -4,8 +4,9 @@ namespace App\Controllers\Mangboard;
use App\Controllers\CommonController;
use App\Entities\Mangboard\UserEntity;
-use App\Libraries\MyCrawler\YamapCrawler;
-use App\Libraries\MyCrawler\YamoonCrawler;
+use App\Libraries\MyCrawler\Mangboard\YamapCrawler;
+use App\Libraries\MyCrawler\Mangboard\YamoonCrawler;
+use App\Libraries\MyCrawler\Mangboard\SirCrawler;
use App\Models\Mangboard\UserModel;
class CrawlerController extends CommonController
@@ -18,8 +19,11 @@ class CrawlerController extends CommonController
}
return $this->_user_model;
}
- public function login(string $host, string $id, string $password): bool|UserEntity
+ public function login(string $id): bool|UserEntity
{
+ $host = getenv("mangboard.host.url");
+ $id = $id == "" ? getenv("mangboard.login.default.id") : $id;
+ $password = getenv("mangboard.login.default.password");
$user_entity = $this->getUserModel()->getEntityByID($id);
// $response = $this->getWebLibrary($host)->getResponse(
// $host . getenv("mangboard.login.url"),
@@ -42,16 +46,16 @@ class CrawlerController extends CommonController
log_message("notice", "{$id}로 로그인 성공");
return $user_entity;
}
- public function yamap(string $category, string $id = "", string $debug = "false"): string
+ public function yamap(string $category, string $id = "", string $option = ""): string
{
try {
- $id = $id == "" ? getenv("mangboard.login.default.id") : $id;
- $password = getenv("mangboard.login.default.password");
//1. 사이트 로그인 처리
- $user_entity = $this->login(getenv("mangboard.host.url"), $id, $password);
+ $user_entity = $this->login($id);
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
$crawler = new YamapCrawler(getenv('yamap.host.url'), $category, $user_entity);
- $crawler->setDebug($debug === "true" ? true : false);
+ if ($option) {
+ $crawler->setDebug($option === "debug" ? true : false);
+ }
$crawler->execute(intval(getenv("yamap.list.max_limit")));
return "완료되었습니다.";
} catch (\Exception $e) {
@@ -59,16 +63,16 @@ class CrawlerController extends CommonController
return $e->getMessage();
}
}
- public function yamoon(string $category, string $id = "", string $debug = "false"): string
+ public function yamoon(string $category, string $id = "", string $option = ""): string
{
try {
- $id = $id == "" ? getenv("mangboard.login.default.id") : $id;
- $password = getenv("mangboard.login.default.password");
//1. 사이트 로그인 처리
- $user_entity = $this->login(getenv("mangboard.host.url"), $id, $password);
+ $user_entity = $this->login($id);
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
$crawler = new YamoonCrawler(getenv("yamoon.host.url"), $category, $user_entity);
- $crawler->setDebug($debug === "true" ? true : false);
+ if ($option) {
+ $crawler->setDebug($option === "debug" ? true : false);
+ }
$crawler->execute(intval(getenv("yamap.list.max_limit")));
return "완료되었습니다.";
} catch (\Exception $e) {
@@ -76,4 +80,21 @@ class CrawlerController extends CommonController
return $e->getMessage();
}
}
+ public function sir(string $category, string $id = "", string $option = ""): string
+ {
+ try {
+ //1. 사이트 로그인 처리
+ $user_entity = $this->login($id);
+ //2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
+ $crawler = new SirCrawler(getenv("sir.host.url"), $category, $user_entity);
+ if ($option) {
+ $crawler->setDebug($option === "debug" ? true : false);
+ }
+ $crawler->execute(intval(getenv("sir.list.max_limit")));
+ return "완료되었습니다.";
+ } catch (\Exception $e) {
+ log_message("error", $e->getMessage());
+ return $e->getMessage();
+ }
+ }
}
diff --git a/app/Libraries/MyCrawler/Mangboard/MangboardCrawler.php b/app/Libraries/MyCrawler/Mangboard/MangboardCrawler.php
new file mode 100644
index 0000000..3f1fe9a
--- /dev/null
+++ b/app/Libraries/MyCrawler/Mangboard/MangboardCrawler.php
@@ -0,0 +1,65 @@
+_host = $host;
+ $this->_category = $category;
+ $this->_user_entity = $user_entity;
+ }
+ protected function getMySocket()
+ {
+ if ($this->_mySocket === null) {
+ $this->_mySocket = new WebSocket($this->_host);
+ }
+ return $this->_mySocket;
+ }
+ final protected function createMyStorage()
+ {
+ return new MangboardStorage($this->_category, $this->_user_entity);
+ }
+ protected function backend_process(int $cnt, array $listInfo, array $storages)
+ {
+ //File DB 및 Board DB 등록작업등
+ $baord_name = "board_" . $this->_category;
+ $boardsModel = new BoardsModel();
+ $boards_entity = $boardsModel->getEntityByID("board_" . $this->_category);
+ $boardModel = new BoardModel("mb_" . $baord_name);
+ $board_entity = $boardModel->createByCrawler(
+ $boards_entity,
+ $this->_user_entity,
+ $cnt,
+ $listInfo,
+ $storages
+ );
+ foreach ($storages as $storage) {
+ try {
+ $storage->backend($boards_entity, $board_entity, $boardModel->getTable());
+ } catch (\Exception $e) {
+ log_message("notice", sprintf(
+ "\n---%s -> %s 게시물의 %s번째:%s 파일 등록 오류---\n%s\n--------------------------------\n",
+ __FUNCTION__,
+ $board_entity->getTitle(),
+ $storage->getOriginSequence(),
+ $storage->getOriginName(),
+ $e->getMessage()
+ ));
+ }
+ }
+ }
+}
diff --git a/app/Libraries/MyCrawler/Mangboard/SirCrawler.php b/app/Libraries/MyCrawler/Mangboard/SirCrawler.php
new file mode 100644
index 0000000..92e846f
--- /dev/null
+++ b/app/Libraries/MyCrawler/Mangboard/SirCrawler.php
@@ -0,0 +1,177 @@
+
+ //
+ // 할아버지의 마술 정보
+ //
+ // 할아버지의 마술
+ //
+ // -
+ // 감독님
+ //
+ // 자기소개
+ // 아이디로 검색
+ // 회원게시물
+ //
+ // (210.♡.♡.13)
+ //
+ // - 조회 245
+ //
+ // -
+ //
+ //
+ //
+ //
+ //
+ //
+ //
+ //
+ //
+ // 본문
+ //
+ //
+ //
+ //
+ //
..
getMySocket()->getContent($listInfo['detail_url']);
+ //작성시간
+ $selector = $this->getSelector($response, getenv("sir.view.date.tag"));
+ //Date Format이 맞지않아 변경해주기위함 : 2024.09.13 00:24:04 -> 2024-09-13 00:24:04
+ $listInfo['date'] = trim($selector->text());
+ $listInfo['date'] = DateTime::createFromFormat('Y.m.d H:i:s', $listInfo['date']);
+ $listInfo['date'] = $listInfo['date']->format('Y-m-d H:i:s');
+ // if ($this->getDebug()) {
+ // throw new \Exception(
+ // sprintf(
+ // "\n--------------%s Debug--------------\n%s\n%s\n---------------------------------------\n",
+ // __FUNCTION__,
+ // var_export($listInfo, true),
+ // $selector->html()
+ // )
+ // );
+ // }
+ //작성내용
+ $tag = getenv("sir.view.content.tag");
+ $selector = $this->getSelector($response, $tag, true);
+ $media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
+ $media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls);
+ if ($this->getDebug()) {
+ throw new \Exception(sprintf(
+ "\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
+ __FUNCTION__,
+ var_export($listInfo, true),
+ var_export($media_urls, true)
+ ));
+ } else {
+ $storages = $this->media_process($media_urls);
+ if (!count($storages)) {
+ throw new \Exception("등록할 자료가 없습니다.");
+ }
+ $this->backend_process($cnt, $listInfo, $storages);
+ }
+ return $listInfo;
+ }
+ //리스트내용
+ //
+ //
+ //
할아버지의 마술 3
+ //
21967
+ //
+ //
+ public function execute(int $max_limit): void
+ {
+ try {
+ $listInfos = [];
+ if ($this->getDebug()) {
+ $this->detail_page(1, ['detail_url' => getenv("sir.view.test.url")]);
+ }
+ $response = $this->getMySocket()->getContent(getenv("sir.list.url"));
+ $this->getSelector($response, getenv("sir.list.tag"))->each(
+ function (Crawler $node) use (&$listInfos): void {
+ $link_node = $node->filter(getenv("sir.list.item.link.tag"));
+ // href url의 맨 앞이 /가 두개라서 한개를 빼기위함
+ $detail_url = $this->getChangeURL($link_node->attr("href"));
+ // $detail_url = $link_node->attr("href");
+ $title = $link_node->text();
+ $nickname = $node->filter(getenv("sir.list.item.nickname.tag"))->text();
+ $hit = $node->filter(getenv("sir.list.item.hit.tag"))->text();
+ // $date = $node->filter(getenv("sir.list.item.date.tag"))->text();
+ $listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => "", 'hit' => $hit];
+ }
+ );
+ // throw new \Exception("Target URL이 없습니다." . var_export($listInfos, true));
+ if (!count($listInfos)) {
+ throw new \Exception("Target URL이 없습니다.");
+ }
+ $this->main_process($max_limit, $listInfos);
+ log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
+ } catch (\Exception $e) {
+ log_message("warning", sprintf(
+ "\n---%s 오류---\n%s\n-----------------------------------------\n",
+ __FUNCTION__,
+ $e->getMessage()
+ ));
+ }
+ }
+}
diff --git a/app/Libraries/MyCrawler/Mangboard/YamapCrawler.php b/app/Libraries/MyCrawler/Mangboard/YamapCrawler.php
new file mode 100644
index 0000000..e3d2764
--- /dev/null
+++ b/app/Libraries/MyCrawler/Mangboard/YamapCrawler.php
@@ -0,0 +1,106 @@
+getMySocket()->getContent($listInfo['detail_url']);
+ $tag = getenv("yamap.view.content.tag");
+ $selector = $this->getSelector($response, $tag);
+ $media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
+ $media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls);
+ if ($this->getDebug()) {
+ throw new \Exception(sprintf(
+ "\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
+ __FUNCTION__,
+ var_export($listInfo, true),
+ var_export($media_urls, true)
+ ));
+ } else {
+ // Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리
+ $storages = $this->media_process($media_urls);
+ if (!count($storages)) {
+ throw new \Exception("등록할 자료가 없습니다.");
+ }
+ $this->backend_process($cnt, $listInfo, $storages);
+ }
+ return $listInfo;
+ }
+ //리스트내용
+ //
+ //
요즘 패션
+ //
+ //
+ // 괴강고귀
+ //
+ //
+ // | 추천 (14) | 조회 (432)
+ //
+ //
+ //
+ //
+ //
+ //
+ //
2024-09-14 01:53:45
+ //
+ //
+ //
+ //
+ //

+ //
+ //
+ //
+ //
+ //
+ //
+ public function execute(int $max_limit): void
+ {
+ try {
+ $listInfos = [];
+ if ($this->getDebug()) {
+ $this->detail_page(1, ['detail_url' => getenv("yamap.view.test.url")]);
+ }
+ $response = $this->getMySocket()->getContent(getenv("yamap.list.url"));
+ $selector = $this->getSelector($response, getenv("yamap.list.tag"));
+ //div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
+ $selector->filter(getenv("yamap.list.item.tag"))->each(
+ function (Crawler $node) use (&$listInfos): void {
+ $hit = $node->filter(getenv("yamap.list.item.hit.tag"))->text();
+ $date = $node->filter(getenv("yamap.list.item.date.tag"))->text();
+ $nickname = $node->filter(getenv("yamap.list.item.nickname.tag"))->text();
+ //bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool
+ if ($nickname != getenv("yamap.list.item.nickname.except")) {
+ //작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
+ $link_node = $node->filter(getenv("yamap.list.item.link.tag"));
+ $detail_url = $link_node->attr("href");
+ $title = $link_node->children()->last()->text();
+ $listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
+ }
+ }
+ );
+ if (!count($listInfos)) {
+ throw new \Exception("Target URL이 없습니다.");
+ }
+ $this->main_process($max_limit, $listInfos);
+ log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
+ } catch (\Exception $e) {
+ log_message("warning", sprintf(
+ "\n---%s 오류---\n%s\n-----------------------------------------\n",
+ __FUNCTION__,
+ $e->getMessage()
+ ));
+ }
+ }
+}
diff --git a/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php b/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php
new file mode 100644
index 0000000..7dc4bfe
--- /dev/null
+++ b/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php
@@ -0,0 +1,121 @@
+
+ //
요즘 화제라는 명품 목걸이
+ //
+ //
CAT7478
+ //
| 추천 (8) | 조회 (268)
+ //
+ //
+ //
+ //
+ //
+ //
2024-09-16 09:52:39
+ //
+ //
+ //
+ //

+ //

+ //
+ //
+ //
전화기선 짤라서 목걸이 만들어도 위화감이 전혀 없을것같은
+ //
+ //
디자인이군요
+ //
+ //
+ //
+ //
+ //
+ //
+ protected function detail_page(int $cnt, array $listInfo): array
+ {
+ $response = $this->getMySocket()->getContent("/newboard/yamoonboard/" . $listInfo['detail_url']);
+ //작성시간
+ // $selector = $this->getSelector($response, getenv("yamoon.view.date.tag"));
+ // $listInfo['date'] = trim($selector->text());
+ // if ($this->getDebug()) {
+ // throw new \Exception(
+ // sprintf(
+ // "\n--------------%s Debug--------------\n%s\n%s\n---------------------------------------\n",
+ // __FUNCTION__,
+ // var_export($listInfo, true),
+ // $selector->html()
+ // )
+ // );
+ // }
+ //작성내용
+ $tag = getenv("yamoon.view.content.tag");
+ $selector = $this->getSelector($response, $tag);
+ $media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
+ $media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls);
+ if ($this->getDebug()) {
+ throw new \Exception(sprintf(
+ "\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
+ __FUNCTION__,
+ var_export($listInfo, true),
+ var_export($media_urls, true)
+ ));
+ } else {
+ // Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리
+ $storages = $this->media_process($media_urls);
+ if (!count($storages)) {
+ throw new \Exception("등록할 자료가 없습니다.");
+ }
+ $this->backend_process($cnt, $listInfo, $storages);
+ }
+ return $listInfo;
+ }
+ //리스트 내용
+ //
+ //
+ // 졸고 있는 여군
+ // 6
+ // yeeyuu | 6 | 369 | No 89372 | 2024-09-13
+ // |
+ public function execute(int $max_limit): void
+ {
+ try {
+ $listInfos = [];
+ if ($this->getDebug()) {
+ $this->detail_page(1, ['detail_url' => getenv("yamoon.view.test.url")]);
+ }
+ $response = $this->getMySocket()->getContent(getenv("yamoon.list.url"));
+ $this->getSelector($response, getenv("yamoon.list.tag"))->each(
+ function (Crawler $node) use (&$listInfos): void {
+ $link_node = $node->filter(getenv("yamoon.list.item.link.tag"));
+ $detail_url = $link_node->attr("href");
+ $title = $link_node->text();
+ $info_node = $node->filter(getenv("yamoon.list.item.info.tag"));
+ $infos = explode("|", $info_node->text());
+ $listInfos[] = ['title' => $title, 'detail_url' => $detail_url, 'nickname' => trim($infos[0]), 'hit' => trim($infos[2]), 'date' => trim($infos[4])];
+ }
+ );
+ if (!count($listInfos)) {
+ throw new \Exception("Target URL이 없습니다.");
+ }
+ $this->main_process($max_limit, $listInfos);
+ log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
+ } catch (\Exception $e) {
+ log_message("warning", sprintf(
+ "\n---%s 오류---\n%s\n-----------------------------------------\n",
+ __FUNCTION__,
+ $e->getMessage()
+ ));
+ }
+ }
+}
diff --git a/app/Libraries/MyCrawler/MyCrawler.php b/app/Libraries/MyCrawler/MyCrawler.php
index 56525de..130e53f 100644
--- a/app/Libraries/MyCrawler/MyCrawler.php
+++ b/app/Libraries/MyCrawler/MyCrawler.php
@@ -9,64 +9,58 @@ use App\Traits\FileTrait;
abstract class MyCrawler extends CommonLibrary
{
use FileTrait;
- private $_mySocket = null;
- protected function __construct($mySocket)
+ protected function __construct()
{
parent::__construct();
- $this->_mySocket = $mySocket;
}
+ abstract protected function getMySocket();
abstract protected function createMyStorage();
- abstract protected function detail_page(int $cnt, array $listInfo): void;
- final protected function getMySocket()
- {
- if ($this->_mySocket === null) {
- throw new \Exception("Socket이 지정되지 않았습니다.");
- }
- return $this->_mySocket;
- }
- final protected function getSelector(string $content, string $tag): Crawler
+ abstract protected function detail_page(int $cnt, array $listInfo): array;
+ final protected function getSelector(string $content, string $tag, $isViewHTML = false): Crawler
{
$crawler = new Crawler($content);
if ($this->getDebug()) {
+ log_message("debug", __FUNCTION__ . "=> " . $tag);
+ }
+ $crawler->filter($tag);
+ if ($isViewHTML) {
log_message("debug", sprintf(
- "\n---------%s----------\ntag:%s\n%s\n-------------------\n",
+ "\n------------%s HTML-------------\n%s\n-----------------------------------------------------\n",
__FUNCTION__,
- $tag,
- $content
+ $crawler->filter($tag)->html()
));
- exit;
}
return $crawler->filter($tag);
}
- //--------미디어 URL관련------
- private function getMediaUrlsByMediaType(string $media_type, Crawler $selector, array $options, array $urls = []): array
+ protected function getChangeURL(string $url): string
+ {
+ return $url;
+ }
+ protected function getUrlsByMediaType(string $media_type, Crawler $selector, array $options, array $urls = []): array
{
$urls[$media_type] = [];
$selector->filter($options["tag"])->each(
function (Crawler $node) use (&$media_type, &$options, &$urls): void {
$url = $node->attr($options["attr"]);
- log_message("debug", __FUNCTION__ . "-> {$media_type}[{$options["attr"]}]:{$url}");
- if (!is_null($url)) {
- $urls[$media_type][] = $url;
+ switch ($media_type) {
+ case 'video':
+ if ($url === null) {
+ $url = $node->children()->attr("src");
+ }
+ break;
+ }
+ if ($url !== null) {
+ $urls[$media_type][] = $this->getChangeURL($url);
+ } else {
+ log_message("debug", __FUNCTION__ . "-> {$media_type}[{$options["attr"]}]\n");
+ log_message("debug", $node->html());
}
}
);
+ log_message("notice", "-----------" . __FUNCTION__ . "=> {$media_type} 작업완료--------");
return $urls;
}
- //detailPage의 이미지나영상데이터가 있으면 URL과MediaType을 가져온다
- final protected function getMediaUrls(string $response, string $tag, array $listInfo): array
- {
- $selector = $this->getSelector($response, $tag);
- log_message("debug", "\n-----------detailPage Tag: {$tag}---------------\n{$selector->html()}\n---------------------------\n");
- $urls = $this->getMediaUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
- $urls = $this->getMediaUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $urls);
- // log_message("debug", "\n-------------------------\n" . var_export($urls, true) . "\n-----------------------\n");
- log_message("notice", "-----------" . __FUNCTION__ . " 작업완료--------");
- return array($listInfo, $urls);
- }
-
- //--------미디어 관련-------
private function media_save(int $file_sequence, string $media_type, string $file_name, string $content): mixed
{
log_message("debug", __FUNCTION__ . " 원본파일 {$file_name} 작업 시작");
@@ -138,11 +132,14 @@ abstract class MyCrawler extends CommonLibrary
$total = count($listInfos);
$i = 1;
foreach ($listInfos as $listInfo) {
+ if ($this->getDebug()) {
+ $i = $max_limit;
+ }
if ($i <= $max_limit) {
log_message("notice", "게시물 {$i}번째/총:{$total} {$listInfo["nickname"]} 작업시작");
try {
//listInfo는 title,작성자,작성시간등등의 정보를 가지고 있어 detail_page 처리 안에서 바뀔 수 있으므로 다시 반환 받는다.
- $this->detail_page($i, $listInfo);
+ $listInfo = $this->detail_page($i, $listInfo);
} catch (\Exception $e) {
log_message("warning", sprintf(
"\n---%s {$i}번째/총:{$total} 오류---\n%s\n-----------------------------------------\n",
diff --git a/app/Libraries/MyCrawler/YamapCrawler.php b/app/Libraries/MyCrawler/YamapCrawler.php
deleted file mode 100644
index b26eb04..0000000
--- a/app/Libraries/MyCrawler/YamapCrawler.php
+++ /dev/null
@@ -1,139 +0,0 @@
-_category = $category;
- $this->_user_entity = $user_entity;
- }
- final protected function createMyStorage()
- {
- return new MangboardStorage($this->_category, $this->_user_entity);
- }
- //작성내용
- //
- //
요즘 패션
- //
- //
- // 괴강고귀
- //
- //
- // | 추천 (14) | 조회 (432)
- //
- //
- //
- //
- //
- //
- //
2024-09-14 01:53:45
- //
- //
- //
- //
- //

- //
- //
- //
- //
- //
- //
- protected function detail_page(int $cnt, array $listInfo): void
- {
- $response = $this->getMySocket()->getContent($listInfo['detail_url']);
- $tag = getenv("yamap.view.content.tag");
- list($listInfo, $media_urls) = $this->getMediaUrls($response, $tag, $listInfo);
- //Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리
- $storages = $this->media_process($media_urls);
- if (!count($storages)) {
- throw new \Exception("등록할 자료가 없습니다.");
- }
- //File DB 및 Board DB 등록작업등
- $baord_name = "board_" . $this->_category;
- $boardsModel = new BoardsModel();
- $boards_entity = $boardsModel->getEntityByID("board_" . $this->_category);
- $boardModel = new BoardModel("mb_" . $baord_name);
- $board_entity = $boardModel->createByCrawler(
- $boards_entity,
- $this->_user_entity,
- $cnt,
- $listInfo,
- $storages
- );
- foreach ($storages as $storage) {
- try {
- $storage->backend($boards_entity, $board_entity, $boardModel->getTable());
- } catch (\Exception $e) {
- log_message("notice", sprintf(
- "\n---%s -> %s 게시물의 %s번째:%s 파일 등록 오류---\n%s\n--------------------------------\n",
- __FUNCTION__,
- $board_entity->getTitle(),
- $storage->getOriginSequence(),
- $storage->getOriginName(),
- $e->getMessage()
- ));
- }
- }
- }
- public function execute(int $max_limit): void
- {
- try {
- $listInfos = [];
- if ($this->getDebug()) {
- $listInfos = [
- 'title' => getenv("yamap.view.test.title"),
- 'nickname' => getenv("yamap.view.test.nickname"),
- 'detail_url' => getenv("yamap.view.test.url"),
- 'time' => date("Y-m-d H:i:s"),
- 'hit' => 1,
- ];
- } else {
- $response = $this->getMySocket()->getContent(getenv("yamap.list.url.{$this->_category}"));
- $selector = $this->getSelector($response, getenv("yamap.list.tag"));
- //div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
- // log_message("debug", sprintf("\n-------------MainPage------------\n%s\n--------------------------\n", $selector->html()));
- $selector->filter(getenv("yamap.list.item.tag"))->each(
- function (Crawler $node) use (&$listInfos): void {
- //bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool
- $nickname = $node->filter(getenv("yamap.list.item.nickname.tag"))->text();
- $hit = $node->filter(getenv("yamap.list.item.hit.tag"))->text();
- $date = $node->filter(getenv("yamap.list.item.date.tag"))->text();
- if ($nickname != getenv("yamap.list.item.nickname.except")) {
- //작성자가 "관리자"가 아니 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
- $link_node = $node->filter(getenv("yamap.list.item.link.tag"));
- $detail_url = $link_node->attr("href");
- $title = $link_node->children()->last()->text();
- $listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
- }
- }
- );
- }
- if (!count($listInfos)) {
- throw new \Exception("Target URL이 없습니다.");
- }
- $this->main_process($max_limit, $listInfos);
- log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
- } catch (\Exception $e) {
- log_message("warning", sprintf(
- "\n---%s 오류---\n%s\n-----------------------------------------\n",
- __FUNCTION__,
- $e->getMessage()
- ));
- }
- }
-}
diff --git a/app/Libraries/MyCrawler/YamoonCrawler.php b/app/Libraries/MyCrawler/YamoonCrawler.php
deleted file mode 100644
index 12c3cb3..0000000
--- a/app/Libraries/MyCrawler/YamoonCrawler.php
+++ /dev/null
@@ -1,116 +0,0 @@
-_category = $category;
- $this->_user_entity = $user_entity;
- }
- final protected function createMyStorage()
- {
- return new MangboardStorage($this->_category, $this->_user_entity);
- }
-
- protected function detail_page(int $cnt, array $listInfo): void
- {
- $response = $this->getMySocket()->getContent("/newboard/yamoonboard/" . $listInfo['detail_url']);
- //작성시간
- // $selector = $this->getSelector($response, getenv("yamoon.view.regdate.tag"));
- // $listInfo['date'] = trim($selector->text());
- //작성내용
- $tag = getenv("yamoon.view.content.tag");
- list($listInfo, $media_urls) = $this->getMediaUrls($response, $tag, $listInfo);
- //Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리
- $storages = $this->media_process($media_urls);
- if (!count($storages)) {
- throw new \Exception("등록할 자료가 없습니다.");
- }
- //File DB 및 Board DB 등록작업등
- $baord_name = "board_" . $this->_category;
- $boardsModel = new BoardsModel();
- $boards_entity = $boardsModel->getEntityByID("board_" . $this->_category);
- $boardModel = new BoardModel("mb_" . $baord_name);
- $board_entity = $boardModel->createByCrawler(
- $boards_entity,
- $this->_user_entity,
- $cnt,
- $listInfo,
- $storages
- );
- foreach ($storages as $storage) {
- try {
- $storage->backend($boards_entity, $board_entity, $boardModel->getTable());
- } catch (\Exception $e) {
- log_message("notice", sprintf(
- "\n---%s -> %s 게시물의 %s번째:%s 파일 등록 오류---\n%s\n--------------------------------\n",
- __FUNCTION__,
- $board_entity->getTitle(),
- $storage->getOriginSequence(),
- $storage->getOriginName(),
- $e->getMessage()
- ));
- }
- }
- }
-
- public function execute(int $max_limit): void
- {
- try {
- $listInfos = [];
- if ($this->getDebug()) {
- $listInfos = [
- 'title' => getenv("yamoon.view.test.title"),
- 'nickname' => getenv("yamoon.view.test.nickname"),
- 'detail_url' => getenv("yamoon.view.test.url"),
- 'time' => date("Y-m-d H:i:s"),
- 'hit' => 1,
- ];
- } else {
- $response = $this->getMySocket()->getContent(getenv("yamoon.list.url.{$this->_category}"));
- //div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
- // log_message("debug", sprintf("\n-------------MainPage------------\n%s\n--------------------------\n", $selector->html()));
- //
- //
- // 졸고 있는 여군
- // 6
- // yeeyuu | 6 | 369 | No 89372 | 2024-09-13
- // |
- //bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool
- $this->getSelector($response, getenv("yamoon.list.tag"))->each(
- function (Crawler $node) use (&$listInfos): void {
- $link_node = $node->filter(getenv("yamoon.list.item.link.tag"));
- $detail_url = $link_node->attr("href");
- $title = $link_node->text();
- $info_node = $node->filter(getenv("yamoon.list.item.info.tag"));
- $infos = explode("|", $info_node->text());
- $listInfos[] = ['title' => $title, 'detail_url' => $detail_url, 'nickname' => trim($infos[0]), 'hit' => trim($infos[2]), 'date' => trim($infos[4])];
- }
- );
- }
- if (!count($listInfos)) {
- throw new \Exception("Target URL이 없습니다.");
- }
- $this->main_process($max_limit, $listInfos);
- log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
- } catch (\Exception $e) {
- log_message("warning", sprintf(
- "\n---%s 오류---\n%s\n-----------------------------------------\n",
- __FUNCTION__,
- $e->getMessage()
- ));
- }
- }
-}