178 lines
9.9 KiB
PHP
178 lines
9.9 KiB
PHP
<?php
|
|
|
|
namespace App\Libraries\MyCrawler\Mangboard;
|
|
|
|
use App\Entities\Mangboard\UserEntity;
|
|
use App\Libraries\MySocket\WebSocket;
|
|
use DateTime;
|
|
use Symfony\Component\DomCrawler\Crawler;
|
|
|
|
class SirCrawler extends MangboardCrawler
|
|
{
|
|
public function __construct(string $host, string $board_name, UserEntity $user_entity)
|
|
{
|
|
parent::__construct($host, $board_name, $user_entity);
|
|
}
|
|
protected function changeURLByMediaType(string $url): string
|
|
{
|
|
return str_replace("/sir.kr/", "", parent::changeURLByMediaType($url));
|
|
}
|
|
//작성내용
|
|
// <article class="sir_vbo ">
|
|
// <header class="vbo_head">
|
|
// <h2 class="head_h2">할아버지의 마술 정보</h2>
|
|
// <strong id="head_title">
|
|
// 할아버지의 마술 </strong>
|
|
// <ul id="head_info">
|
|
// <li id="info_name"><span class="sv_wrap">
|
|
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" class="sv_member" title="감독님 자기소개" target="_blank" rel="nofollow" onclick="return false;"><span class="sir_mb_icon"></span> <span class="member">감독님</span></a>
|
|
// <span class="sv">
|
|
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" onclick="win_profile(this.href); return false;"><i class="fa fa-user" aria-hidden="true"></i> 자기소개</a>
|
|
// <a href="//sir.kr/cm_humor?sca=&sfl=mb_id,1&stx=hadirector"><i class="fa fa-search" aria-hidden="true"></i> 아이디로 검색</a>
|
|
// <a href="//sir.kr/main/member/?mb_id=hadirector"><i class="fa fa-file-text-o" aria-hidden="true"></i> 회원게시물</a>
|
|
// </span>
|
|
// <noscript class="sv_nojs"><span class="sv">
|
|
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" onclick="win_profile(this.href); return false;"><i class="fa fa-user" aria-hidden="true"></i> 자기소개</a>
|
|
// <a href="//sir.kr/cm_humor?sca=&sfl=mb_id,1&stx=hadirector"><i class="fa fa-search" aria-hidden="true"></i> 아이디로 검색</a>
|
|
// <a href="//sir.kr/main/member/?mb_id=hadirector"><i class="fa fa-file-text-o" aria-hidden="true"></i> 회원게시물</a>
|
|
// </span>
|
|
// </noscript></span> (210.♡.♡.13)</li>
|
|
// <li id="info_date"><time datetime='2024-09-13T00:24:04+09:00'>2024.09.13 00:24:04</time></li>
|
|
// <li id="info_hit"> 조회 <span>245</span>
|
|
// </li>
|
|
// <li id="info_cmt">
|
|
// <a href="#vcmt_anchor" class="comment">댓글 <span>3</span></a>
|
|
// </li>
|
|
// </ul>
|
|
// <div id="head_img"><span class='sir_mb_img' title='회원정보에 사진을 올려주세요.'></span></div>
|
|
// </header>
|
|
// <script>
|
|
// $(".vcmt-btn").click(function() {
|
|
// $('html, body').animate({
|
|
// scrollTop: $("#vcmt_anchor").offset().top - 100
|
|
// }, 300);
|
|
// });
|
|
// </script>
|
|
// <ul class="sir_vbo_cmd link">
|
|
// <li><a href="javascript:void(0)" class="sir_b01 sir_prev"><span class="sound_only">이전 게시글</span><i class="fa fa-angle-left"></i></a></li>
|
|
// <li><a href="//sir.kr/cm_humor/191445" class="sir_b01 sir_next"><span class="sound_only">다음 게시글</span><i class="fa fa-angle-right"></i></a></li>
|
|
// </ul>
|
|
// <ul class="sir_vbo_com" >
|
|
// <li><a href="//sir.kr/cm_humor" class="sir_b01">목록</a></li>
|
|
// </ul>
|
|
// <section id="vbo_con">
|
|
// <h2 class="con_h2">본문</h2>
|
|
// <div class="con_inner">
|
|
// <div id="con_pix">
|
|
// <video autoplay="autoplay" loop="loop" preload="auto" playsinline webkit-playsinline muted>
|
|
// <source src="//sir.kr/data/file/cm_humor/3535243533_CiH6Iv9O_ee170eeec15e748d9bfcc895836c71d9829c07fb.mp4" type="video/mp4" />
|
|
// </video>
|
|
// </div>
|
|
// <p>..</p><div style=
|
|
protected function detail_page(int $cnt, array $listInfo): array
|
|
{
|
|
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
|
|
//작성시간
|
|
$selector = $this->getSelector($response, getenv("sir.view.date.tag"));
|
|
//Date Format이 맞지않아 변경해주기위함 : 2024.09.13 00:24:04 -> 2024-09-13 00:24:04
|
|
$listInfo['date'] = trim($selector->text());
|
|
$listInfo['date'] = DateTime::createFromFormat('Y.m.d H:i:s', $listInfo['date']);
|
|
$listInfo['date'] = $listInfo['date']->format('Y-m-d H:i:s');
|
|
// if ($this->getDebug()) {
|
|
// throw new \Exception(
|
|
// sprintf(
|
|
// "\n--------------%s Debug--------------\n%s\n%s\n---------------------------------------\n",
|
|
// __FUNCTION__,
|
|
// var_export($listInfo, true),
|
|
// $selector->html()
|
|
// )
|
|
// );
|
|
// }
|
|
//작성내용
|
|
$tag = getenv("sir.view.content.tag");
|
|
$selector = $this->getSelector($response, $tag, true);
|
|
$media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
|
|
$media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls);
|
|
if ($this->getDebug()) {
|
|
throw new \Exception(sprintf(
|
|
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
|
|
__FUNCTION__,
|
|
var_export($listInfo, true),
|
|
var_export($media_urls, true)
|
|
));
|
|
} else {
|
|
$storages = $this->media_process($media_urls);
|
|
if (!count($storages)) {
|
|
throw new \Exception("등록할 자료가 없습니다.");
|
|
}
|
|
$this->backend_process($cnt, $listInfo, $storages);
|
|
}
|
|
return $listInfo;
|
|
}
|
|
//리스트내용
|
|
// <li class="lbo_li li_bg0 lbo_like" style="z-index:30">
|
|
// <div class="li_title" style="margin:0 270px 0 50px;">
|
|
// <a href="//sir.kr/cm_humor/191449" class="title_link">할아버지의 마술 <i class="co-ico co-ico-small fa fa-folder-o"></i><i class="co-ico co-ico-small fa fa-play-circle"></i><span class="cnt_cmt">3</span></a>
|
|
// <div class="li_num">21967</div>
|
|
// <div class="li_info">
|
|
// <span class="info_span info_nick">
|
|
// <span class="sv_wrap">
|
|
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" class="sv_member" title="감독님 자기소개" target="_blank" rel="nofollow" onclick="return false;"><span class="sir_mb_icon"></span> <span class="member">감독님</span></a>
|
|
// <span class="sv">
|
|
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" onclick="win_profile(this.href); return false;"><i class="fa fa-user" aria-hidden="true"></i> 자기소개</a>
|
|
// <a href="//sir.kr/cm_humor?sca=&sfl=mb_id,1&stx=hadirector"><i class="fa fa-search" aria-hidden="true"></i> 아이디로 검색</a>
|
|
// <a href="//sir.kr/main/member/?mb_id=hadirector"><i class="fa fa-file-text-o" aria-hidden="true"></i> 회원게시물</a>
|
|
// </span>
|
|
// <noscript class="sv_nojs">
|
|
// <span class="sv">
|
|
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" onclick="win_profile(this.href); return false;"><i class="fa fa-user" aria-hidden="true"></i> 자기소개</a>
|
|
// <a href="//sir.kr/cm_humor?sca=&sfl=mb_id,1&stx=hadirector"><i class="fa fa-search" aria-hidden="true"></i> 아이디로 검색</a>
|
|
// <a href="//sir.kr/main/member/?mb_id=hadirector"><i class="fa fa-file-text-o" aria-hidden="true"></i> 회원게시물</a>
|
|
// </span>
|
|
// </noscript>
|
|
// </span>
|
|
// </span>
|
|
// <span class="info_span info_date"> 24.09.13</span>
|
|
// <span class="info_span info_like">
|
|
// <span class="like_good1">5</span>
|
|
// </span>
|
|
// <span class="info_span info_hit">244</span>
|
|
// </div>
|
|
// </li>
|
|
public function execute(int $max_limit): void
|
|
{
|
|
try {
|
|
$listInfos = [];
|
|
if ($this->getDebug()) {
|
|
$this->detail_page(1, ['detail_url' => getenv("sir.view.test.url")]);
|
|
}
|
|
$response = $this->getMySocket()->getContent(getenv("sir.list.url"));
|
|
$this->getSelector($response, getenv("sir.list.tag"))->each(
|
|
function (Crawler $node) use (&$listInfos): void {
|
|
$link_node = $node->filter(getenv("sir.list.item.link.tag"));
|
|
// href url의 맨 앞이 /가 두개라서 한개를 빼기위함
|
|
$detail_url = str_replace("/sir.kr/", "", $link_node->attr("href"));
|
|
// $detail_url = $link_node->attr("href");
|
|
$title = $link_node->text();
|
|
$nickname = $node->filter(getenv("sir.list.item.nickname.tag"))->text();
|
|
$hit = $node->filter(getenv("sir.list.item.hit.tag"))->text();
|
|
// $date = $node->filter(getenv("sir.list.item.date.tag"))->text();
|
|
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => "", 'hit' => $hit];
|
|
}
|
|
);
|
|
// throw new \Exception("Target URL이 없습니다." . var_export($listInfos, true));
|
|
if (!count($listInfos)) {
|
|
throw new \Exception("Target URL이 없습니다.");
|
|
}
|
|
$this->main_process($max_limit, $listInfos);
|
|
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
|
} catch (\Exception $e) {
|
|
log_message("warning", sprintf(
|
|
"\n---%s 오류---\n%s\n-----------------------------------------\n",
|
|
__FUNCTION__,
|
|
$e->getMessage()
|
|
));
|
|
}
|
|
}
|
|
}
|