Automation/app/Libraries/MyCrawler/Sir.php
2024-09-22 01:04:33 +09:00

123 lines
7.2 KiB
PHP

<?php
namespace App\Libraries\MyCrawler;
use DateTime;
use Symfony\Component\DomCrawler\Crawler;
class Sir extends MyCrawler
{
public function __construct($mySocket, $myStorage)
{
parent::__construct($mySocket, $myStorage);
}
protected function changeURLByCrawler(string $url): string
{
return str_replace("/sir.kr/", "", parent::changeURLByCrawler($url));
}
//작성내용
// <article class="sir_vbo ">
// <header class="vbo_head">
// <h2 class="head_h2">할아버지의 마술 정보</h2>
// <strong id="head_title">
// 할아버지의 마술 </strong>
// <ul id="head_info">
// <li id="info_name"><span class="sv_wrap">
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" class="sv_member" title="감독님 자기소개" target="_blank" rel="nofollow" onclick="return false;"><span class="sir_mb_icon"></span> <span class="member">감독님</span></a>
// <span class="sv">
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" onclick="win_profile(this.href); return false;"><i class="fa fa-user" aria-hidden="true"></i> 자기소개</a>
// <a href="//sir.kr/cm_humor?sca=&amp;sfl=mb_id,1&amp;stx=hadirector"><i class="fa fa-search" aria-hidden="true"></i> 아이디로 검색</a>
// <a href="//sir.kr/main/member/?mb_id=hadirector"><i class="fa fa-file-text-o" aria-hidden="true"></i> 회원게시물</a>
// </span>
// <noscript class="sv_nojs"><span class="sv">
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" onclick="win_profile(this.href); return false;"><i class="fa fa-user" aria-hidden="true"></i> 자기소개</a>
// <a href="//sir.kr/cm_humor?sca=&amp;sfl=mb_id,1&amp;stx=hadirector"><i class="fa fa-search" aria-hidden="true"></i> 아이디로 검색</a>
// <a href="//sir.kr/main/member/?mb_id=hadirector"><i class="fa fa-file-text-o" aria-hidden="true"></i> 회원게시물</a>
// </span>
// </noscript></span>&nbsp;(210.♡.♡.13)</li>
// <li id="info_date"><time datetime='2024-09-13T00:24:04+09:00'>2024.09.13 00:24:04</time></li>
// <li id="info_hit"> 조회 <span>245</span>
// </li>
// <li id="info_cmt">
// <a href="#vcmt_anchor" class="comment">댓글 <span>3</span></a>
// </li>
// </ul>
// <div id="head_img"><span class='sir_mb_img' title='회원정보에 사진을 올려주세요.'></span></div>
// </header>
// <script>
// $(".vcmt-btn").click(function() {
// $('html, body').animate({
// scrollTop: $("#vcmt_anchor").offset().top - 100
// }, 300);
// });
// </script>
// <ul class="sir_vbo_cmd link">
// <li><a href="javascript:void(0)" class="sir_b01 sir_prev"><span class="sound_only">이전 게시글</span><i class="fa fa-angle-left"></i></a></li>
// <li><a href="//sir.kr/cm_humor/191445" class="sir_b01 sir_next"><span class="sound_only">다음 게시글</span><i class="fa fa-angle-right"></i></a></li>
// </ul>
// <ul class="sir_vbo_com" >
// <li><a href="//sir.kr/cm_humor" class="sir_b01">목록</a></li>
// </ul>
// <section id="vbo_con">
// <h2 class="con_h2">본문</h2>
// <div class="con_inner">
// <div id="con_pix">
// <video autoplay="autoplay" loop="loop" preload="auto" playsinline webkit-playsinline muted>
// <source src="//sir.kr/data/file/cm_humor/3535243533_CiH6Iv9O_ee170eeec15e748d9bfcc895836c71d9829c07fb.mp4" type="video/mp4" />
// </video>
// </div>
//
protected function getDetailSelector(array $listInfo): array
{
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
//작성시간
$selector = $this->getSelector($response, getenv("sir.view.date.tag"));
//Date Format이 맞지않아 변경해주기위함 : 2024.09.13 00:24:04 -> 2024-09-13 00:24:04
$listInfo['date'] = trim($selector->text());
$listInfo['date'] = DateTime::createFromFormat('Y.m.d H:i:s', $listInfo['date']);
$listInfo['date'] = $listInfo['date']->format('Y-m-d H:i:s');
return array($this->getSelector($response, tag: getenv("sir.view.content.tag")), $listInfo);
}
//리스트 내용
// <td class="listvisited mobile-td subject-view">
// <a href="board-read.asp?fullboardname=yamoonfreeboard&mtablename=humor&num=89372&ref=85575&page=1" class="ya-tooltip mobile-bold mobile-height" title="<p><br><br><video autoplay=&quot;autoplay&quot; loop=&quot;loop&quot; muted=&quot;&quot; controls=&quot;controls&quot; width=&quot;560&quot;&quot; height=&quot; &quot;> <source src=&quot; https://files.bepick.net/bbs/2024/09/c2a20ab5771cbb934940551859fce1c8_769966583.mp4 &quot;> </video><br><br><br></p">
// 졸고 있는 여군</a>
// <i class="fa fa-commenting-o" aria-hidden="true"></i> <span class="color-red small">6</span>
// <span class="visible-xs visible-sm small"><i class="fa fa-user-o" aria-hidden="true"></i> yeeyuu | <i class="fa fa-thumbs-o-up" aria-hidden="true"></i> 6 | <i class="fa fa-eye" aria-hidden="true"></i> 369 | No 89372 | 2024-09-13</span>
// </td>
public function execute(): void
{
$listInfos = [];
if ($this->isDebug) {
$listInfo = [];
$listInfo['title'] = 'test_title';
$listInfo['nickname'] = 'test_name';
$listInfo['hit'] = 1;
$listInfo['date'] = date("Y-m-d H:i:s");
$listInfo['detail_url'] = getenv("sir.view.test.url.{$this->getMyStorage()->getBoardName()}");
$listInfos[] = $listInfo;
} else {
$response = $this->getMySocket()->getContent(getenv("sir.list.url.{$this->getMyStorage()->getBoardName()}"));
$this->getSelector($response, getenv("sir.list.tag.{$this->getMyStorage()->getBoardName()}"))->each(
function (Crawler $node) use (&$listInfos): void {
$nickname = $node->filter(getenv("sir.list.item.nickname.tag"))->text();
$hit = $node->filter(getenv("sir.list.item.hit.tag"))->text();
// 작성시간은 detail에서 정의
// $date = $node->filter(getenv("sir.list.item.date.tag"))->text();
//title및 detail url
$link_node = $node->filter(getenv("sir.list.item.link.tag"));
// href url의 맨 앞이 /sir.kr가 빼기
$detail_url = $this->changeURLByCrawler($link_node->attr("href"));
$title = $link_node->text();
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => "", 'hit' => $hit];
}
);
}
if (!count($listInfos)) {
throw new \Exception("Target URL이 없습니다.");
}
$this->list_process(intval(getenv("sir.list.max_limit.{$this->getMyStorage()->getBoardName()}")), $listInfos);
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
}
}