Automation init...3

This commit is contained in:
최준흠 2024-09-17 16:57:32 +09:00
parent 7a70c72a80
commit befbaafae1
9 changed files with 540 additions and 305 deletions

View File

@ -33,9 +33,12 @@ $routes->group('mangboard', ['namespace' => 'App\Controllers\Mangboard'], functi
$routes->group('crawler', function ($routes) { $routes->group('crawler', function ($routes) {
$routes->cli('yamap/(:alpha)', 'CrawlerController::yamap/$1'); $routes->cli('yamap/(:alpha)', 'CrawlerController::yamap/$1');
$routes->cli('yamap/(:alpha)/(:any)', 'CrawlerController::yamap/$1/$2'); $routes->cli('yamap/(:alpha)/(:any)', 'CrawlerController::yamap/$1/$2');
$routes->cli('yamap/(:alpha)/(:alphanum)/(:any)', 'CrawlerController::yamap/$1/$2/$3'); $routes->cli('yamap/(:alpha)/(:any)/(:any)', 'CrawlerController::yamap/$1/$2/$3');
$routes->cli('yamoon/(:alpha)', 'CrawlerController::yamoon/$1'); $routes->cli('yamoon/(:alpha)', 'CrawlerController::yamoon/$1');
$routes->cli('yamoon/(:alpha)/(:any)', 'CrawlerController::yamoon/$1/$2'); $routes->cli('yamoon/(:alpha)/(:any)', 'CrawlerController::yamoon/$1/$2');
$routes->cli('yamoon/(:alpha)/(:alphanum)/(:any)', 'CrawlerController::yamoon/$1/$2/$3'); $routes->cli('yamoon/(:alpha)/(:any)/(:any)', 'CrawlerController::yamoon/$1/$2/$3');
$routes->cli('sir/(:alpha)', 'CrawlerController::sir/$1');
$routes->cli('sir/(:alpha)/(:any)', 'CrawlerController::sir/$1/$2');
$routes->cli('sir/(:alpha)/(:any)/(:any)', 'CrawlerController::sir/$1/$2/$3');
}); });
}); });

View File

@ -4,8 +4,9 @@ namespace App\Controllers\Mangboard;
use App\Controllers\CommonController; use App\Controllers\CommonController;
use App\Entities\Mangboard\UserEntity; use App\Entities\Mangboard\UserEntity;
use App\Libraries\MyCrawler\YamapCrawler; use App\Libraries\MyCrawler\Mangboard\YamapCrawler;
use App\Libraries\MyCrawler\YamoonCrawler; use App\Libraries\MyCrawler\Mangboard\YamoonCrawler;
use App\Libraries\MyCrawler\Mangboard\SirCrawler;
use App\Models\Mangboard\UserModel; use App\Models\Mangboard\UserModel;
class CrawlerController extends CommonController class CrawlerController extends CommonController
@ -18,8 +19,11 @@ class CrawlerController extends CommonController
} }
return $this->_user_model; return $this->_user_model;
} }
public function login(string $host, string $id, string $password): bool|UserEntity public function login(string $id): bool|UserEntity
{ {
$host = getenv("mangboard.host.url");
$id = $id == "" ? getenv("mangboard.login.default.id") : $id;
$password = getenv("mangboard.login.default.password");
$user_entity = $this->getUserModel()->getEntityByID($id); $user_entity = $this->getUserModel()->getEntityByID($id);
// $response = $this->getWebLibrary($host)->getResponse( // $response = $this->getWebLibrary($host)->getResponse(
// $host . getenv("mangboard.login.url"), // $host . getenv("mangboard.login.url"),
@ -42,16 +46,16 @@ class CrawlerController extends CommonController
log_message("notice", "{$id}로 로그인 성공"); log_message("notice", "{$id}로 로그인 성공");
return $user_entity; return $user_entity;
} }
public function yamap(string $category, string $id = "", string $debug = "false"): string public function yamap(string $category, string $id = "", string $option = ""): string
{ {
try { try {
$id = $id == "" ? getenv("mangboard.login.default.id") : $id;
$password = getenv("mangboard.login.default.password");
//1. 사이트 로그인 처리 //1. 사이트 로그인 처리
$user_entity = $this->login(getenv("mangboard.host.url"), $id, $password); $user_entity = $this->login($id);
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달. //2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
$crawler = new YamapCrawler(getenv('yamap.host.url'), $category, $user_entity); $crawler = new YamapCrawler(getenv('yamap.host.url'), $category, $user_entity);
$crawler->setDebug($debug === "true" ? true : false); if ($option) {
$crawler->setDebug($option === "debug" ? true : false);
}
$crawler->execute(intval(getenv("yamap.list.max_limit"))); $crawler->execute(intval(getenv("yamap.list.max_limit")));
return "완료되었습니다."; return "완료되었습니다.";
} catch (\Exception $e) { } catch (\Exception $e) {
@ -59,16 +63,16 @@ class CrawlerController extends CommonController
return $e->getMessage(); return $e->getMessage();
} }
} }
public function yamoon(string $category, string $id = "", string $debug = "false"): string public function yamoon(string $category, string $id = "", string $option = ""): string
{ {
try { try {
$id = $id == "" ? getenv("mangboard.login.default.id") : $id;
$password = getenv("mangboard.login.default.password");
//1. 사이트 로그인 처리 //1. 사이트 로그인 처리
$user_entity = $this->login(getenv("mangboard.host.url"), $id, $password); $user_entity = $this->login($id);
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달. //2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
$crawler = new YamoonCrawler(getenv("yamoon.host.url"), $category, $user_entity); $crawler = new YamoonCrawler(getenv("yamoon.host.url"), $category, $user_entity);
$crawler->setDebug($debug === "true" ? true : false); if ($option) {
$crawler->setDebug($option === "debug" ? true : false);
}
$crawler->execute(intval(getenv("yamap.list.max_limit"))); $crawler->execute(intval(getenv("yamap.list.max_limit")));
return "완료되었습니다."; return "완료되었습니다.";
} catch (\Exception $e) { } catch (\Exception $e) {
@ -76,4 +80,21 @@ class CrawlerController extends CommonController
return $e->getMessage(); return $e->getMessage();
} }
} }
public function sir(string $category, string $id = "", string $option = ""): string
{
try {
//1. 사이트 로그인 처리
$user_entity = $this->login($id);
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
$crawler = new SirCrawler(getenv("sir.host.url"), $category, $user_entity);
if ($option) {
$crawler->setDebug($option === "debug" ? true : false);
}
$crawler->execute(intval(getenv("sir.list.max_limit")));
return "완료되었습니다.";
} catch (\Exception $e) {
log_message("error", $e->getMessage());
return $e->getMessage();
}
}
} }

View File

@ -0,0 +1,65 @@
<?php
namespace App\Libraries\MyCrawler\Mangboard;
use App\Libraries\MySocket\WebSocket;
use App\Entities\Mangboard\UserEntity;
use App\Libraries\MyCrawler\MyCrawler;
use App\Libraries\MyStorage\MangboardStorage;
use App\Models\Mangboard\BoardModel;
use App\Models\Mangboard\BoardsModel;
abstract class MangboardCrawler extends MyCrawler
{
protected $_mySocket = null;
protected $_host = "";
protected $_category = "";
protected $_user_entity = null;
protected function __construct(string $host, string $category, UserEntity $user_entity)
{
parent::__construct();
$this->_host = $host;
$this->_category = $category;
$this->_user_entity = $user_entity;
}
protected function getMySocket()
{
if ($this->_mySocket === null) {
$this->_mySocket = new WebSocket($this->_host);
}
return $this->_mySocket;
}
final protected function createMyStorage()
{
return new MangboardStorage($this->_category, $this->_user_entity);
}
protected function backend_process(int $cnt, array $listInfo, array $storages)
{
//File DB 및 Board DB 등록작업등
$baord_name = "board_" . $this->_category;
$boardsModel = new BoardsModel();
$boards_entity = $boardsModel->getEntityByID("board_" . $this->_category);
$boardModel = new BoardModel("mb_" . $baord_name);
$board_entity = $boardModel->createByCrawler(
$boards_entity,
$this->_user_entity,
$cnt,
$listInfo,
$storages
);
foreach ($storages as $storage) {
try {
$storage->backend($boards_entity, $board_entity, $boardModel->getTable());
} catch (\Exception $e) {
log_message("notice", sprintf(
"\n---%s -> %s 게시물의 %s번째:%s 파일 등록 오류---\n%s\n--------------------------------\n",
__FUNCTION__,
$board_entity->getTitle(),
$storage->getOriginSequence(),
$storage->getOriginName(),
$e->getMessage()
));
}
}
}
}

View File

@ -0,0 +1,177 @@
<?php
namespace App\Libraries\MyCrawler\Mangboard;
use App\Entities\Mangboard\UserEntity;
use App\Libraries\MySocket\WebSocket;
use DateTime;
use Symfony\Component\DomCrawler\Crawler;
class SirCrawler extends MangboardCrawler
{
public function __construct(string $host, string $category, UserEntity $user_entity)
{
parent::__construct($host, $category, $user_entity);
}
protected function getChangeURL(string $url): string
{
return str_replace("/sir.kr/", "", $url);
}
//작성내용
// <article class="sir_vbo ">
// <header class="vbo_head">
// <h2 class="head_h2">할아버지의 마술 정보</h2>
// <strong id="head_title">
// 할아버지의 마술 </strong>
// <ul id="head_info">
// <li id="info_name"><span class="sv_wrap">
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" class="sv_member" title="감독님 자기소개" target="_blank" rel="nofollow" onclick="return false;"><span class="sir_mb_icon"></span> <span class="member">감독님</span></a>
// <span class="sv">
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" onclick="win_profile(this.href); return false;"><i class="fa fa-user" aria-hidden="true"></i> 자기소개</a>
// <a href="//sir.kr/cm_humor?sca=&amp;sfl=mb_id,1&amp;stx=hadirector"><i class="fa fa-search" aria-hidden="true"></i> 아이디로 검색</a>
// <a href="//sir.kr/main/member/?mb_id=hadirector"><i class="fa fa-file-text-o" aria-hidden="true"></i> 회원게시물</a>
// </span>
// <noscript class="sv_nojs"><span class="sv">
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" onclick="win_profile(this.href); return false;"><i class="fa fa-user" aria-hidden="true"></i> 자기소개</a>
// <a href="//sir.kr/cm_humor?sca=&amp;sfl=mb_id,1&amp;stx=hadirector"><i class="fa fa-search" aria-hidden="true"></i> 아이디로 검색</a>
// <a href="//sir.kr/main/member/?mb_id=hadirector"><i class="fa fa-file-text-o" aria-hidden="true"></i> 회원게시물</a>
// </span>
// </noscript></span>&nbsp;(210.♡.♡.13)</li>
// <li id="info_date"><time datetime='2024-09-13T00:24:04+09:00'>2024.09.13 00:24:04</time></li>
// <li id="info_hit"> 조회 <span>245</span>
// </li>
// <li id="info_cmt">
// <a href="#vcmt_anchor" class="comment">댓글 <span>3</span></a>
// </li>
// </ul>
// <div id="head_img"><span class='sir_mb_img' title='회원정보에 사진을 올려주세요.'></span></div>
// </header>
// <script>
// $(".vcmt-btn").click(function() {
// $('html, body').animate({
// scrollTop: $("#vcmt_anchor").offset().top - 100
// }, 300);
// });
// </script>
// <ul class="sir_vbo_cmd link">
// <li><a href="javascript:void(0)" class="sir_b01 sir_prev"><span class="sound_only">이전 게시글</span><i class="fa fa-angle-left"></i></a></li>
// <li><a href="//sir.kr/cm_humor/191445" class="sir_b01 sir_next"><span class="sound_only">다음 게시글</span><i class="fa fa-angle-right"></i></a></li>
// </ul>
// <ul class="sir_vbo_com" >
// <li><a href="//sir.kr/cm_humor" class="sir_b01">목록</a></li>
// </ul>
// <section id="vbo_con">
// <h2 class="con_h2">본문</h2>
// <div class="con_inner">
// <div id="con_pix">
// <video autoplay="autoplay" loop="loop" preload="auto" playsinline webkit-playsinline muted>
// <source src="//sir.kr/data/file/cm_humor/3535243533_CiH6Iv9O_ee170eeec15e748d9bfcc895836c71d9829c07fb.mp4" type="video/mp4" />
// </video>
// </div>
// <p>..</p><div style=
protected function detail_page(int $cnt, array $listInfo): array
{
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
//작성시간
$selector = $this->getSelector($response, getenv("sir.view.date.tag"));
//Date Format이 맞지않아 변경해주기위함 : 2024.09.13 00:24:04 -> 2024-09-13 00:24:04
$listInfo['date'] = trim($selector->text());
$listInfo['date'] = DateTime::createFromFormat('Y.m.d H:i:s', $listInfo['date']);
$listInfo['date'] = $listInfo['date']->format('Y-m-d H:i:s');
// if ($this->getDebug()) {
// throw new \Exception(
// sprintf(
// "\n--------------%s Debug--------------\n%s\n%s\n---------------------------------------\n",
// __FUNCTION__,
// var_export($listInfo, true),
// $selector->html()
// )
// );
// }
//작성내용
$tag = getenv("sir.view.content.tag");
$selector = $this->getSelector($response, $tag, true);
$media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
$media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls);
if ($this->getDebug()) {
throw new \Exception(sprintf(
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
__FUNCTION__,
var_export($listInfo, true),
var_export($media_urls, true)
));
} else {
$storages = $this->media_process($media_urls);
if (!count($storages)) {
throw new \Exception("등록할 자료가 없습니다.");
}
$this->backend_process($cnt, $listInfo, $storages);
}
return $listInfo;
}
//리스트내용
// <li class="lbo_li li_bg0 lbo_like" style="z-index:30">
// <div class="li_title" style="margin:0 270px 0 50px;">
// <a href="//sir.kr/cm_humor/191449" class="title_link">할아버지의 마술 <i class="co-ico co-ico-small fa fa-folder-o"></i><i class="co-ico co-ico-small fa fa-play-circle"></i><span class="cnt_cmt">3</span></a>
// <div class="li_num">21967</div>
// <div class="li_info">
// <span class="info_span info_nick">
// <span class="sv_wrap">
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" class="sv_member" title="감독님 자기소개" target="_blank" rel="nofollow" onclick="return false;"><span class="sir_mb_icon"></span> <span class="member">감독님</span></a>
// <span class="sv">
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" onclick="win_profile(this.href); return false;"><i class="fa fa-user" aria-hidden="true"></i> 자기소개</a>
// <a href="//sir.kr/cm_humor?sca=&amp;sfl=mb_id,1&amp;stx=hadirector"><i class="fa fa-search" aria-hidden="true"></i> 아이디로 검색</a>
// <a href="//sir.kr/main/member/?mb_id=hadirector"><i class="fa fa-file-text-o" aria-hidden="true"></i> 회원게시물</a>
// </span>
// <noscript class="sv_nojs">
// <span class="sv">
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" onclick="win_profile(this.href); return false;"><i class="fa fa-user" aria-hidden="true"></i> 자기소개</a>
// <a href="//sir.kr/cm_humor?sca=&amp;sfl=mb_id,1&amp;stx=hadirector"><i class="fa fa-search" aria-hidden="true"></i> 아이디로 검색</a>
// <a href="//sir.kr/main/member/?mb_id=hadirector"><i class="fa fa-file-text-o" aria-hidden="true"></i> 회원게시물</a>
// </span>
// </noscript>
// </span>
// </span>
// <span class="info_span info_date"> 24.09.13</span>
// <span class="info_span info_like">
// <span class="like_good1">5</span>
// </span>
// <span class="info_span info_hit">244</span>
// </div>
// </li>
public function execute(int $max_limit): void
{
try {
$listInfos = [];
if ($this->getDebug()) {
$this->detail_page(1, ['detail_url' => getenv("sir.view.test.url")]);
}
$response = $this->getMySocket()->getContent(getenv("sir.list.url"));
$this->getSelector($response, getenv("sir.list.tag"))->each(
function (Crawler $node) use (&$listInfos): void {
$link_node = $node->filter(getenv("sir.list.item.link.tag"));
// href url의 맨 앞이 /가 두개라서 한개를 빼기위함
$detail_url = $this->getChangeURL($link_node->attr("href"));
// $detail_url = $link_node->attr("href");
$title = $link_node->text();
$nickname = $node->filter(getenv("sir.list.item.nickname.tag"))->text();
$hit = $node->filter(getenv("sir.list.item.hit.tag"))->text();
// $date = $node->filter(getenv("sir.list.item.date.tag"))->text();
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => "", 'hit' => $hit];
}
);
// throw new \Exception("Target URL이 없습니다." . var_export($listInfos, true));
if (!count($listInfos)) {
throw new \Exception("Target URL이 없습니다.");
}
$this->main_process($max_limit, $listInfos);
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
} catch (\Exception $e) {
log_message("warning", sprintf(
"\n---%s 오류---\n%s\n-----------------------------------------\n",
__FUNCTION__,
$e->getMessage()
));
}
}
}

View File

@ -0,0 +1,106 @@
<?php
namespace App\Libraries\MyCrawler\Mangboard;
use App\Entities\Mangboard\UserEntity;
use Symfony\Component\DomCrawler\Crawler;
class YamapCrawler extends MangboardCrawler
{
public function __construct(string $host, string $category, UserEntity $user_entity)
{
parent::__construct($host, $category, $user_entity);
}
protected function detail_page(int $cnt, array $listInfo): array
{
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
$tag = getenv("yamap.view.content.tag");
$selector = $this->getSelector($response, $tag);
$media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
$media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls);
if ($this->getDebug()) {
throw new \Exception(sprintf(
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
__FUNCTION__,
var_export($listInfo, true),
var_export($media_urls, true)
));
} else {
// Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리
$storages = $this->media_process($media_urls);
if (!count($storages)) {
throw new \Exception("등록할 자료가 없습니다.");
}
$this->backend_process($cnt, $listInfo, $storages);
}
return $listInfo;
}
//리스트내용
// <div class="panel panel-default">
// <div class="text-center panel-heading-local-title text-bold">요즘 패션</div>
// <div style="margin:5px 10px;">
// <span class="pull-left dropdown">
// 괴강고귀
// </span>
// <span class="pull-right">
// | 추천 (14) | 조회 (432)
// </span>
// <div class="clearfix"></div>
// <hr class="hr-xs-xs">
// <span>
// <a href="javascript:void(0);" id="incfont"><i class="fa fa-plus fa-fw" aria-hidden="true"></i></a><a href="javascript:void(0);" id="decfont"><i class="fa fa-minus fa-fw margin-left-5" aria-hidden="true"></i></a>
// </span>
// <span class="pull-right">2024-09-14 01:53:45
// </span>
// <div class="clearfix"></div>
// <hr class="margin-top-5 margin-bottom-20">
// <div class="fr-view margin-bottom-30" id="read-content" style="word-break:break-all;">
// <p><img title="" class="cloudzoom" data-cloudzoom="zoomImage:'/newboard/yamoonfreeboard/uploads/humor/mceu_86177012011726246415487.jpg'" class="fr-fic fr-dii" src="/newboard/yamoonfreeboard/uploads/humor/mceu_86177012011726246415487.jpg" alt=""></p>
// <p>&nbsp;</p>
// </div>
// </div>
// <div class="margin-10">
// <a href="javascript:void(0)" onclick="javascript:window.open('https://twitter.com/intent/tweet?text='+encodeURIComponent(document.title)+'%20-%20'+encodeURIComponent(document.URL), 'twittersharedialog', 'menubar=no,toolbar=no,resizable=yes,scrollbars=yes,height=300,width=600');return false;" target="_blank"> <i class="fa fa-twitter-square fa-lg ya-tooltip" title="트위터 공유하기"></i></a>
// <a href="javascript:void(0)" onclick="javascript:window.open('https://www.facebook.com/sharer/sharer.php?u='+encodeURIComponent(document.URL)+'&t='+encodeURIComponent(document.title), 'facebooksharedialog', 'menubar=no,toolbar=no,resizable=yes,scrollbars=yes,height=300,width=600');return false;" target="_blank"> <i class="fa fa-facebook-square fa-lg ya-tooltip" title="페이스북 공유하기"></i></a>
// </div>
// <div id="freesubframe"></div>
// </div>
public function execute(int $max_limit): void
{
try {
$listInfos = [];
if ($this->getDebug()) {
$this->detail_page(1, ['detail_url' => getenv("yamap.view.test.url")]);
}
$response = $this->getMySocket()->getContent(getenv("yamap.list.url"));
$selector = $this->getSelector($response, getenv("yamap.list.tag"));
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
$selector->filter(getenv("yamap.list.item.tag"))->each(
function (Crawler $node) use (&$listInfos): void {
$hit = $node->filter(getenv("yamap.list.item.hit.tag"))->text();
$date = $node->filter(getenv("yamap.list.item.date.tag"))->text();
$nickname = $node->filter(getenv("yamap.list.item.nickname.tag"))->text();
//bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool
if ($nickname != getenv("yamap.list.item.nickname.except")) {
//작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
$link_node = $node->filter(getenv("yamap.list.item.link.tag"));
$detail_url = $link_node->attr("href");
$title = $link_node->children()->last()->text();
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
}
}
);
if (!count($listInfos)) {
throw new \Exception("Target URL이 없습니다.");
}
$this->main_process($max_limit, $listInfos);
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
} catch (\Exception $e) {
log_message("warning", sprintf(
"\n---%s 오류---\n%s\n-----------------------------------------\n",
__FUNCTION__,
$e->getMessage()
));
}
}
}

View File

@ -0,0 +1,121 @@
<?php
namespace App\Libraries\MyCrawler\Mangboard;
use App\Entities\Mangboard\UserEntity;
use Symfony\Component\DomCrawler\Crawler;
class YamoonCrawler extends MangboardCrawler
{
public function __construct(string $host, string $category, UserEntity $user_entity)
{
parent::__construct($host, $category, $user_entity);
}
//작성내용
// <div class="panel panel-default">
// <div class="text-center panel-heading-local-title text-bold">요즘 화제라는 명품 목걸이</div>
// <div style="margin:5px 10px;">
// <span class="pull-left dropdown">CAT7478</span>
// <span class="pull-right">| 추천 (8) | 조회 (268)</span>
// <div class="clearfix"></div>
// <hr class="hr-xs-xs">
// <span>
// <a href="javascript:void(0);" id="incfont"><i class="fa fa-plus fa-fw" aria-hidden="true"></i></a><a href="javascript:void(0);" id="decfont"><i class="fa fa-minus fa-fw margin-left-5" aria-hidden="true"></i></a>
// </span>
// <span class="pull-right">2024-09-16 09:52:39</span>
// <div class="clearfix"></div>
// <hr class="margin-top-5 margin-bottom-20">
// <div class="fr-view margin-bottom-30" id="read-content" style="word-break:break-all;">
// <p><img title="" class="cloudzoom" data-cloudzoom="zoomImage:'/newboard/yamoonfreeboard/uploads/humor/mceu_18297097311726447898684.webp'" class="fr-fic fr-dii" src="/newboard/yamoonfreeboard/uploads/humor/mceu_18297097311726447898684.webp" alt=""></p>
// <p><img title="" class="cloudzoom" data-cloudzoom="zoomImage:'/newboard/yamoonfreeboard/uploads/humor/mceu_41105156321726447902977.png'" class="fr-fic fr-dii" src="/newboard/yamoonfreeboard/uploads/humor/mceu_41105156321726447902977.png" alt=""></p>
// <p>&nbsp;</p>
// <p>&nbsp;</p>
// <p>전화기선 짤라서 목걸이 만들어도 위화감이 전혀 없을것같은</p>
// <p>&nbsp;</p>
// <p>디자인이군요</p>
// <p>&nbsp;</p>
// </div>
// </div>
// <div class="margin-10">
// <a href="javascript:void(0)" onclick="javascript:window.open('https://twitter.com/intent/tweet?text='+encodeURIComponent(document.title)+'%20-%20'+encodeURIComponent(document.URL), 'twittersharedialog', 'menubar=no,toolbar=no,resizable=yes,scrollbars=yes,height=300,width=600');return false;" target="_blank"> <i class="fa fa-twitter-square fa-lg ya-tooltip" title="트위터 공유하기"></i></a>
// <a href="javascript:void(0)" onclick="javascript:window.open('https://www.facebook.com/sharer/sharer.php?u='+encodeURIComponent(document.URL)+'&t='+encodeURIComponent(document.title), 'facebooksharedialog', 'menubar=no,toolbar=no,resizable=yes,scrollbars=yes,height=300,width=600');return false;" target="_blank"> <i class="fa fa-facebook-square fa-lg ya-tooltip" title="페이스북 공유하기"></i></a>
// </div>
// <div id="freesubframe"></div>
// </div>
protected function detail_page(int $cnt, array $listInfo): array
{
$response = $this->getMySocket()->getContent("/newboard/yamoonboard/" . $listInfo['detail_url']);
//작성시간
// $selector = $this->getSelector($response, getenv("yamoon.view.date.tag"));
// $listInfo['date'] = trim($selector->text());
// if ($this->getDebug()) {
// throw new \Exception(
// sprintf(
// "\n--------------%s Debug--------------\n%s\n%s\n---------------------------------------\n",
// __FUNCTION__,
// var_export($listInfo, true),
// $selector->html()
// )
// );
// }
//작성내용
$tag = getenv("yamoon.view.content.tag");
$selector = $this->getSelector($response, $tag);
$media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
$media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls);
if ($this->getDebug()) {
throw new \Exception(sprintf(
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
__FUNCTION__,
var_export($listInfo, true),
var_export($media_urls, true)
));
} else {
// Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리
$storages = $this->media_process($media_urls);
if (!count($storages)) {
throw new \Exception("등록할 자료가 없습니다.");
}
$this->backend_process($cnt, $listInfo, $storages);
}
return $listInfo;
}
//리스트 내용
// <td class="listvisited mobile-td subject-view">
// <a href="board-read.asp?fullboardname=yamoonfreeboard&mtablename=humor&num=89372&ref=85575&page=1" class="ya-tooltip mobile-bold mobile-height" title="<p><br><br><video autoplay=&quot;autoplay&quot; loop=&quot;loop&quot; muted=&quot;&quot; controls=&quot;controls&quot; width=&quot;560&quot;&quot; height=&quot; &quot;> <source src=&quot; https://files.bepick.net/bbs/2024/09/c2a20ab5771cbb934940551859fce1c8_769966583.mp4 &quot;> </video><br><br><br></p">
// 졸고 있는 여군</a>
// <i class="fa fa-commenting-o" aria-hidden="true"></i> <span class="color-red small">6</span>
// <span class="visible-xs visible-sm small"><i class="fa fa-user-o" aria-hidden="true"></i> yeeyuu | <i class="fa fa-thumbs-o-up" aria-hidden="true"></i> 6 | <i class="fa fa-eye" aria-hidden="true"></i> 369 | No 89372 | 2024-09-13</span>
// </td>
public function execute(int $max_limit): void
{
try {
$listInfos = [];
if ($this->getDebug()) {
$this->detail_page(1, ['detail_url' => getenv("yamoon.view.test.url")]);
}
$response = $this->getMySocket()->getContent(getenv("yamoon.list.url"));
$this->getSelector($response, getenv("yamoon.list.tag"))->each(
function (Crawler $node) use (&$listInfos): void {
$link_node = $node->filter(getenv("yamoon.list.item.link.tag"));
$detail_url = $link_node->attr("href");
$title = $link_node->text();
$info_node = $node->filter(getenv("yamoon.list.item.info.tag"));
$infos = explode("|", $info_node->text());
$listInfos[] = ['title' => $title, 'detail_url' => $detail_url, 'nickname' => trim($infos[0]), 'hit' => trim($infos[2]), 'date' => trim($infos[4])];
}
);
if (!count($listInfos)) {
throw new \Exception("Target URL이 없습니다.");
}
$this->main_process($max_limit, $listInfos);
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
} catch (\Exception $e) {
log_message("warning", sprintf(
"\n---%s 오류---\n%s\n-----------------------------------------\n",
__FUNCTION__,
$e->getMessage()
));
}
}
}

View File

@ -9,64 +9,58 @@ use App\Traits\FileTrait;
abstract class MyCrawler extends CommonLibrary abstract class MyCrawler extends CommonLibrary
{ {
use FileTrait; use FileTrait;
private $_mySocket = null; protected function __construct()
protected function __construct($mySocket)
{ {
parent::__construct(); parent::__construct();
$this->_mySocket = $mySocket;
} }
abstract protected function getMySocket();
abstract protected function createMyStorage(); abstract protected function createMyStorage();
abstract protected function detail_page(int $cnt, array $listInfo): void; abstract protected function detail_page(int $cnt, array $listInfo): array;
final protected function getMySocket() final protected function getSelector(string $content, string $tag, $isViewHTML = false): Crawler
{
if ($this->_mySocket === null) {
throw new \Exception("Socket이 지정되지 않았습니다.");
}
return $this->_mySocket;
}
final protected function getSelector(string $content, string $tag): Crawler
{ {
$crawler = new Crawler($content); $crawler = new Crawler($content);
if ($this->getDebug()) { if ($this->getDebug()) {
log_message("debug", __FUNCTION__ . "=> " . $tag);
}
$crawler->filter($tag);
if ($isViewHTML) {
log_message("debug", sprintf( log_message("debug", sprintf(
"\n---------%s----------\ntag:%s\n%s\n-------------------\n", "\n------------%s HTML-------------\n%s\n-----------------------------------------------------\n",
__FUNCTION__, __FUNCTION__,
$tag, $crawler->filter($tag)->html()
$content
)); ));
exit;
} }
return $crawler->filter($tag); return $crawler->filter($tag);
} }
//--------미디어 URL관련------ protected function getChangeURL(string $url): string
private function getMediaUrlsByMediaType(string $media_type, Crawler $selector, array $options, array $urls = []): array {
return $url;
}
protected function getUrlsByMediaType(string $media_type, Crawler $selector, array $options, array $urls = []): array
{ {
$urls[$media_type] = []; $urls[$media_type] = [];
$selector->filter($options["tag"])->each( $selector->filter($options["tag"])->each(
function (Crawler $node) use (&$media_type, &$options, &$urls): void { function (Crawler $node) use (&$media_type, &$options, &$urls): void {
$url = $node->attr($options["attr"]); $url = $node->attr($options["attr"]);
log_message("debug", __FUNCTION__ . "-> {$media_type}[{$options["attr"]}]:{$url}"); switch ($media_type) {
if (!is_null($url)) { case 'video':
$urls[$media_type][] = $url; if ($url === null) {
$url = $node->children()->attr("src");
}
break;
}
if ($url !== null) {
$urls[$media_type][] = $this->getChangeURL($url);
} else {
log_message("debug", __FUNCTION__ . "-> {$media_type}[{$options["attr"]}]\n");
log_message("debug", $node->html());
} }
} }
); );
log_message("notice", "-----------" . __FUNCTION__ . "=> {$media_type} 작업완료--------");
return $urls; return $urls;
} }
//detailPage의 이미지나영상데이터가 있으면 URL과MediaType을 가져온다
final protected function getMediaUrls(string $response, string $tag, array $listInfo): array
{
$selector = $this->getSelector($response, $tag);
log_message("debug", "\n-----------detailPage Tag: {$tag}---------------\n{$selector->html()}\n---------------------------\n");
$urls = $this->getMediaUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
$urls = $this->getMediaUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $urls);
// log_message("debug", "\n-------------------------\n" . var_export($urls, true) . "\n-----------------------\n");
log_message("notice", "-----------" . __FUNCTION__ . " 작업완료--------");
return array($listInfo, $urls);
}
//--------미디어 관련-------
private function media_save(int $file_sequence, string $media_type, string $file_name, string $content): mixed private function media_save(int $file_sequence, string $media_type, string $file_name, string $content): mixed
{ {
log_message("debug", __FUNCTION__ . " 원본파일 {$file_name} 작업 시작"); log_message("debug", __FUNCTION__ . " 원본파일 {$file_name} 작업 시작");
@ -138,11 +132,14 @@ abstract class MyCrawler extends CommonLibrary
$total = count($listInfos); $total = count($listInfos);
$i = 1; $i = 1;
foreach ($listInfos as $listInfo) { foreach ($listInfos as $listInfo) {
if ($this->getDebug()) {
$i = $max_limit;
}
if ($i <= $max_limit) { if ($i <= $max_limit) {
log_message("notice", "게시물 {$i}번째/총:{$total} {$listInfo["nickname"]} 작업시작"); log_message("notice", "게시물 {$i}번째/총:{$total} {$listInfo["nickname"]} 작업시작");
try { try {
//listInfo는 title,작성자,작성시간등등의 정보를 가지고 있어 detail_page 처리 안에서 바뀔 수 있으므로 다시 반환 받는다. //listInfo는 title,작성자,작성시간등등의 정보를 가지고 있어 detail_page 처리 안에서 바뀔 수 있으므로 다시 반환 받는다.
$this->detail_page($i, $listInfo); $listInfo = $this->detail_page($i, $listInfo);
} catch (\Exception $e) { } catch (\Exception $e) {
log_message("warning", sprintf( log_message("warning", sprintf(
"\n---%s {$i}번째/총:{$total} 오류---\n%s\n-----------------------------------------\n", "\n---%s {$i}번째/총:{$total} 오류---\n%s\n-----------------------------------------\n",

View File

@ -1,139 +0,0 @@
<?php
namespace App\Libraries\MyCrawler;
use App\Entities\Mangboard\UserEntity;
use App\Libraries\MySocket\WebSocket;
use App\Libraries\MyStorage\MangboardStorage;
use App\Models\Mangboard\BoardModel;
use App\Models\Mangboard\BoardsModel;
use Symfony\Component\DomCrawler\Crawler;
class YamapCrawler extends MyCrawler
{
private $_category = "";
private $_user_entity = null;
public function __construct(string $host, string $category, UserEntity $user_entity)
{
parent::__construct(new WebSocket($host));
$this->_category = $category;
$this->_user_entity = $user_entity;
}
final protected function createMyStorage()
{
return new MangboardStorage($this->_category, $this->_user_entity);
}
//작성내용
// <div class="panel panel-default">
// <div class="text-center panel-heading-local-title text-bold">요즘 패션</div>
// <div style="margin:5px 10px;">
// <span class="pull-left dropdown">
// 괴강고귀
// </span>
// <span class="pull-right">
// | 추천 (14) | 조회 (432)
// </span>
// <div class="clearfix"></div>
// <hr class="hr-xs-xs">
// <span>
// <a href="javascript:void(0);" id="incfont"><i class="fa fa-plus fa-fw" aria-hidden="true"></i></a><a href="javascript:void(0);" id="decfont"><i class="fa fa-minus fa-fw margin-left-5" aria-hidden="true"></i></a>
// </span>
// <span class="pull-right">2024-09-14 01:53:45
// </span>
// <div class="clearfix"></div>
// <hr class="margin-top-5 margin-bottom-20">
// <div class="fr-view margin-bottom-30" id="read-content" style="word-break:break-all;">
// <p><img title="" class="cloudzoom" data-cloudzoom="zoomImage:'/newboard/yamoonfreeboard/uploads/humor/mceu_86177012011726246415487.jpg'" class="fr-fic fr-dii" src="/newboard/yamoonfreeboard/uploads/humor/mceu_86177012011726246415487.jpg" alt=""></p>
// <p>&nbsp;</p>
// </div>
// </div>
// <div class="margin-10">
// <a href="javascript:void(0)" onclick="javascript:window.open('https://twitter.com/intent/tweet?text='+encodeURIComponent(document.title)+'%20-%20'+encodeURIComponent(document.URL), 'twittersharedialog', 'menubar=no,toolbar=no,resizable=yes,scrollbars=yes,height=300,width=600');return false;" target="_blank"> <i class="fa fa-twitter-square fa-lg ya-tooltip" title="트위터 공유하기"></i></a>
// <a href="javascript:void(0)" onclick="javascript:window.open('https://www.facebook.com/sharer/sharer.php?u='+encodeURIComponent(document.URL)+'&t='+encodeURIComponent(document.title), 'facebooksharedialog', 'menubar=no,toolbar=no,resizable=yes,scrollbars=yes,height=300,width=600');return false;" target="_blank"> <i class="fa fa-facebook-square fa-lg ya-tooltip" title="페이스북 공유하기"></i></a>
// </div>
// <div id="freesubframe"></div>
// </div>
protected function detail_page(int $cnt, array $listInfo): void
{
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
$tag = getenv("yamap.view.content.tag");
list($listInfo, $media_urls) = $this->getMediaUrls($response, $tag, $listInfo);
//Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리
$storages = $this->media_process($media_urls);
if (!count($storages)) {
throw new \Exception("등록할 자료가 없습니다.");
}
//File DB 및 Board DB 등록작업등
$baord_name = "board_" . $this->_category;
$boardsModel = new BoardsModel();
$boards_entity = $boardsModel->getEntityByID("board_" . $this->_category);
$boardModel = new BoardModel("mb_" . $baord_name);
$board_entity = $boardModel->createByCrawler(
$boards_entity,
$this->_user_entity,
$cnt,
$listInfo,
$storages
);
foreach ($storages as $storage) {
try {
$storage->backend($boards_entity, $board_entity, $boardModel->getTable());
} catch (\Exception $e) {
log_message("notice", sprintf(
"\n---%s -> %s 게시물의 %s번째:%s 파일 등록 오류---\n%s\n--------------------------------\n",
__FUNCTION__,
$board_entity->getTitle(),
$storage->getOriginSequence(),
$storage->getOriginName(),
$e->getMessage()
));
}
}
}
public function execute(int $max_limit): void
{
try {
$listInfos = [];
if ($this->getDebug()) {
$listInfos = [
'title' => getenv("yamap.view.test.title"),
'nickname' => getenv("yamap.view.test.nickname"),
'detail_url' => getenv("yamap.view.test.url"),
'time' => date("Y-m-d H:i:s"),
'hit' => 1,
];
} else {
$response = $this->getMySocket()->getContent(getenv("yamap.list.url.{$this->_category}"));
$selector = $this->getSelector($response, getenv("yamap.list.tag"));
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
// log_message("debug", sprintf("\n-------------MainPage------------\n%s\n--------------------------\n", $selector->html()));
$selector->filter(getenv("yamap.list.item.tag"))->each(
function (Crawler $node) use (&$listInfos): void {
//bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool
$nickname = $node->filter(getenv("yamap.list.item.nickname.tag"))->text();
$hit = $node->filter(getenv("yamap.list.item.hit.tag"))->text();
$date = $node->filter(getenv("yamap.list.item.date.tag"))->text();
if ($nickname != getenv("yamap.list.item.nickname.except")) {
//작성자가 "관리자"가 아니 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
$link_node = $node->filter(getenv("yamap.list.item.link.tag"));
$detail_url = $link_node->attr("href");
$title = $link_node->children()->last()->text();
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
}
}
);
}
if (!count($listInfos)) {
throw new \Exception("Target URL이 없습니다.");
}
$this->main_process($max_limit, $listInfos);
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
} catch (\Exception $e) {
log_message("warning", sprintf(
"\n---%s 오류---\n%s\n-----------------------------------------\n",
__FUNCTION__,
$e->getMessage()
));
}
}
}

View File

@ -1,116 +0,0 @@
<?php
namespace App\Libraries\MyCrawler;
use App\Entities\Mangboard\UserEntity;
use App\Libraries\MySocket\WebSocket;
use App\Libraries\MyStorage\MangboardStorage;
use App\Models\Mangboard\BoardModel;
use App\Models\Mangboard\BoardsModel;
use Symfony\Component\DomCrawler\Crawler;
class YamoonCrawler extends MyCrawler
{
private $_category = "";
private $_user_entity = null;
public function __construct(string $host, string $category, UserEntity $user_entity)
{
parent::__construct(new WebSocket($host));
$this->_category = $category;
$this->_user_entity = $user_entity;
}
final protected function createMyStorage()
{
return new MangboardStorage($this->_category, $this->_user_entity);
}
protected function detail_page(int $cnt, array $listInfo): void
{
$response = $this->getMySocket()->getContent("/newboard/yamoonboard/" . $listInfo['detail_url']);
//작성시간
// $selector = $this->getSelector($response, getenv("yamoon.view.regdate.tag"));
// $listInfo['date'] = trim($selector->text());
//작성내용
$tag = getenv("yamoon.view.content.tag");
list($listInfo, $media_urls) = $this->getMediaUrls($response, $tag, $listInfo);
//Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리
$storages = $this->media_process($media_urls);
if (!count($storages)) {
throw new \Exception("등록할 자료가 없습니다.");
}
//File DB 및 Board DB 등록작업등
$baord_name = "board_" . $this->_category;
$boardsModel = new BoardsModel();
$boards_entity = $boardsModel->getEntityByID("board_" . $this->_category);
$boardModel = new BoardModel("mb_" . $baord_name);
$board_entity = $boardModel->createByCrawler(
$boards_entity,
$this->_user_entity,
$cnt,
$listInfo,
$storages
);
foreach ($storages as $storage) {
try {
$storage->backend($boards_entity, $board_entity, $boardModel->getTable());
} catch (\Exception $e) {
log_message("notice", sprintf(
"\n---%s -> %s 게시물의 %s번째:%s 파일 등록 오류---\n%s\n--------------------------------\n",
__FUNCTION__,
$board_entity->getTitle(),
$storage->getOriginSequence(),
$storage->getOriginName(),
$e->getMessage()
));
}
}
}
public function execute(int $max_limit): void
{
try {
$listInfos = [];
if ($this->getDebug()) {
$listInfos = [
'title' => getenv("yamoon.view.test.title"),
'nickname' => getenv("yamoon.view.test.nickname"),
'detail_url' => getenv("yamoon.view.test.url"),
'time' => date("Y-m-d H:i:s"),
'hit' => 1,
];
} else {
$response = $this->getMySocket()->getContent(getenv("yamoon.list.url.{$this->_category}"));
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
// log_message("debug", sprintf("\n-------------MainPage------------\n%s\n--------------------------\n", $selector->html()));
// <td class="listvisited mobile-td subject-view">
// <a href="board-read.asp?fullboardname=yamoonfreeboard&mtablename=humor&num=89372&ref=85575&page=1" class="ya-tooltip mobile-bold mobile-height" title="<p><br><br><video autoplay=&quot;autoplay&quot; loop=&quot;loop&quot; muted=&quot;&quot; controls=&quot;controls&quot; width=&quot;560&quot;&quot; height=&quot; &quot;> <source src=&quot; https://files.bepick.net/bbs/2024/09/c2a20ab5771cbb934940551859fce1c8_769966583.mp4 &quot;> </video><br><br><br></p">
// 졸고 있는 여군</a>
// <i class="fa fa-commenting-o" aria-hidden="true"></i> <span class="color-red small">6</span>
// <span class="visible-xs visible-sm small"><i class="fa fa-user-o" aria-hidden="true"></i> yeeyuu | <i class="fa fa-thumbs-o-up" aria-hidden="true"></i> 6 | <i class="fa fa-eye" aria-hidden="true"></i> 369 | No 89372 | 2024-09-13</span>
// </td>
//bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool
$this->getSelector($response, getenv("yamoon.list.tag"))->each(
function (Crawler $node) use (&$listInfos): void {
$link_node = $node->filter(getenv("yamoon.list.item.link.tag"));
$detail_url = $link_node->attr("href");
$title = $link_node->text();
$info_node = $node->filter(getenv("yamoon.list.item.info.tag"));
$infos = explode("|", $info_node->text());
$listInfos[] = ['title' => $title, 'detail_url' => $detail_url, 'nickname' => trim($infos[0]), 'hit' => trim($infos[2]), 'date' => trim($infos[4])];
}
);
}
if (!count($listInfos)) {
throw new \Exception("Target URL이 없습니다.");
}
$this->main_process($max_limit, $listInfos);
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
} catch (\Exception $e) {
log_message("warning", sprintf(
"\n---%s 오류---\n%s\n-----------------------------------------\n",
__FUNCTION__,
$e->getMessage()
));
}
}
}