Automation/app/Libraries/MyCrawler/Mangboard/SirCrawler.php
2024-09-17 19:35:45 +09:00

178 lines
9.9 KiB
PHP

<?php
namespace App\Libraries\MyCrawler\Mangboard;
use App\Entities\Mangboard\UserEntity;
use App\Libraries\MySocket\WebSocket;
use DateTime;
use Symfony\Component\DomCrawler\Crawler;
class SirCrawler extends MangboardCrawler
{
public function __construct(string $host, string $board_name, UserEntity $user_entity)
{
parent::__construct($host, $board_name, $user_entity);
}
protected function changeURLByMediaType(string $url): string
{
return str_replace("/sir.kr/", "", parent::changeURLByMediaType($url));
}
//작성내용
// <article class="sir_vbo ">
// <header class="vbo_head">
// <h2 class="head_h2">할아버지의 마술 정보</h2>
// <strong id="head_title">
// 할아버지의 마술 </strong>
// <ul id="head_info">
// <li id="info_name"><span class="sv_wrap">
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" class="sv_member" title="감독님 자기소개" target="_blank" rel="nofollow" onclick="return false;"><span class="sir_mb_icon"></span> <span class="member">감독님</span></a>
// <span class="sv">
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" onclick="win_profile(this.href); return false;"><i class="fa fa-user" aria-hidden="true"></i> 자기소개</a>
// <a href="//sir.kr/cm_humor?sca=&amp;sfl=mb_id,1&amp;stx=hadirector"><i class="fa fa-search" aria-hidden="true"></i> 아이디로 검색</a>
// <a href="//sir.kr/main/member/?mb_id=hadirector"><i class="fa fa-file-text-o" aria-hidden="true"></i> 회원게시물</a>
// </span>
// <noscript class="sv_nojs"><span class="sv">
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" onclick="win_profile(this.href); return false;"><i class="fa fa-user" aria-hidden="true"></i> 자기소개</a>
// <a href="//sir.kr/cm_humor?sca=&amp;sfl=mb_id,1&amp;stx=hadirector"><i class="fa fa-search" aria-hidden="true"></i> 아이디로 검색</a>
// <a href="//sir.kr/main/member/?mb_id=hadirector"><i class="fa fa-file-text-o" aria-hidden="true"></i> 회원게시물</a>
// </span>
// </noscript></span>&nbsp;(210.♡.♡.13)</li>
// <li id="info_date"><time datetime='2024-09-13T00:24:04+09:00'>2024.09.13 00:24:04</time></li>
// <li id="info_hit"> 조회 <span>245</span>
// </li>
// <li id="info_cmt">
// <a href="#vcmt_anchor" class="comment">댓글 <span>3</span></a>
// </li>
// </ul>
// <div id="head_img"><span class='sir_mb_img' title='회원정보에 사진을 올려주세요.'></span></div>
// </header>
// <script>
// $(".vcmt-btn").click(function() {
// $('html, body').animate({
// scrollTop: $("#vcmt_anchor").offset().top - 100
// }, 300);
// });
// </script>
// <ul class="sir_vbo_cmd link">
// <li><a href="javascript:void(0)" class="sir_b01 sir_prev"><span class="sound_only">이전 게시글</span><i class="fa fa-angle-left"></i></a></li>
// <li><a href="//sir.kr/cm_humor/191445" class="sir_b01 sir_next"><span class="sound_only">다음 게시글</span><i class="fa fa-angle-right"></i></a></li>
// </ul>
// <ul class="sir_vbo_com" >
// <li><a href="//sir.kr/cm_humor" class="sir_b01">목록</a></li>
// </ul>
// <section id="vbo_con">
// <h2 class="con_h2">본문</h2>
// <div class="con_inner">
// <div id="con_pix">
// <video autoplay="autoplay" loop="loop" preload="auto" playsinline webkit-playsinline muted>
// <source src="//sir.kr/data/file/cm_humor/3535243533_CiH6Iv9O_ee170eeec15e748d9bfcc895836c71d9829c07fb.mp4" type="video/mp4" />
// </video>
// </div>
// <p>..</p><div style=
protected function detail_page(int $cnt, array $listInfo): array
{
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
//작성시간
$selector = $this->getSelector($response, getenv("sir.view.date.tag"));
//Date Format이 맞지않아 변경해주기위함 : 2024.09.13 00:24:04 -> 2024-09-13 00:24:04
$listInfo['date'] = trim($selector->text());
$listInfo['date'] = DateTime::createFromFormat('Y.m.d H:i:s', $listInfo['date']);
$listInfo['date'] = $listInfo['date']->format('Y-m-d H:i:s');
// if ($this->getDebug()) {
// throw new \Exception(
// sprintf(
// "\n--------------%s Debug--------------\n%s\n%s\n---------------------------------------\n",
// __FUNCTION__,
// var_export($listInfo, true),
// $selector->html()
// )
// );
// }
//작성내용
$tag = getenv("sir.view.content.tag");
$selector = $this->getSelector($response, $tag, true);
$media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
$media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls);
if ($this->getDebug()) {
throw new \Exception(sprintf(
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
__FUNCTION__,
var_export($listInfo, true),
var_export($media_urls, true)
));
} else {
$storages = $this->media_process($media_urls);
if (!count($storages)) {
throw new \Exception("등록할 자료가 없습니다.");
}
$this->backend_process($cnt, $listInfo, $storages);
}
return $listInfo;
}
//리스트내용
// <li class="lbo_li li_bg0 lbo_like" style="z-index:30">
// <div class="li_title" style="margin:0 270px 0 50px;">
// <a href="//sir.kr/cm_humor/191449" class="title_link">할아버지의 마술 <i class="co-ico co-ico-small fa fa-folder-o"></i><i class="co-ico co-ico-small fa fa-play-circle"></i><span class="cnt_cmt">3</span></a>
// <div class="li_num">21967</div>
// <div class="li_info">
// <span class="info_span info_nick">
// <span class="sv_wrap">
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" class="sv_member" title="감독님 자기소개" target="_blank" rel="nofollow" onclick="return false;"><span class="sir_mb_icon"></span> <span class="member">감독님</span></a>
// <span class="sv">
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" onclick="win_profile(this.href); return false;"><i class="fa fa-user" aria-hidden="true"></i> 자기소개</a>
// <a href="//sir.kr/cm_humor?sca=&amp;sfl=mb_id,1&amp;stx=hadirector"><i class="fa fa-search" aria-hidden="true"></i> 아이디로 검색</a>
// <a href="//sir.kr/main/member/?mb_id=hadirector"><i class="fa fa-file-text-o" aria-hidden="true"></i> 회원게시물</a>
// </span>
// <noscript class="sv_nojs">
// <span class="sv">
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" onclick="win_profile(this.href); return false;"><i class="fa fa-user" aria-hidden="true"></i> 자기소개</a>
// <a href="//sir.kr/cm_humor?sca=&amp;sfl=mb_id,1&amp;stx=hadirector"><i class="fa fa-search" aria-hidden="true"></i> 아이디로 검색</a>
// <a href="//sir.kr/main/member/?mb_id=hadirector"><i class="fa fa-file-text-o" aria-hidden="true"></i> 회원게시물</a>
// </span>
// </noscript>
// </span>
// </span>
// <span class="info_span info_date"> 24.09.13</span>
// <span class="info_span info_like">
// <span class="like_good1">5</span>
// </span>
// <span class="info_span info_hit">244</span>
// </div>
// </li>
public function execute(int $max_limit): void
{
try {
$listInfos = [];
if ($this->getDebug()) {
$this->detail_page(1, ['detail_url' => getenv("sir.view.test.url")]);
}
$response = $this->getMySocket()->getContent(getenv("sir.list.url"));
$this->getSelector($response, getenv("sir.list.tag"))->each(
function (Crawler $node) use (&$listInfos): void {
$link_node = $node->filter(getenv("sir.list.item.link.tag"));
// href url의 맨 앞이 /가 두개라서 한개를 빼기위함
$detail_url = str_replace("/sir.kr/", "", $link_node->attr("href"));
// $detail_url = $link_node->attr("href");
$title = $link_node->text();
$nickname = $node->filter(getenv("sir.list.item.nickname.tag"))->text();
$hit = $node->filter(getenv("sir.list.item.hit.tag"))->text();
// $date = $node->filter(getenv("sir.list.item.date.tag"))->text();
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => "", 'hit' => $hit];
}
);
// throw new \Exception("Target URL이 없습니다." . var_export($listInfos, true));
if (!count($listInfos)) {
throw new \Exception("Target URL이 없습니다.");
}
$this->main_process($max_limit, $listInfos);
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
} catch (\Exception $e) {
log_message("warning", sprintf(
"\n---%s 오류---\n%s\n-----------------------------------------\n",
__FUNCTION__,
$e->getMessage()
));
}
}
}