144 lines
8.2 KiB
PHP
144 lines
8.2 KiB
PHP
<?php
|
|
|
|
namespace App\Controllers\Mangboard\Crawler;
|
|
|
|
use App\Controllers\Mangboard\MyCrawler;
|
|
use Psr\Log\LoggerInterface;
|
|
use DateTime;
|
|
use CodeIgniter\HTTP\ResponseInterface;
|
|
use CodeIgniter\HTTP\RequestInterface;
|
|
use Symfony\Component\DomCrawler\Crawler;
|
|
|
|
class SirCrawler extends MyCrawler
|
|
{
|
|
public function initController(RequestInterface $request, ResponseInterface $response, LoggerInterface $logger)
|
|
{
|
|
parent::initController($request, $response, $logger);
|
|
}
|
|
final protected function getHost(): string
|
|
{
|
|
return getenv("sir.host.url");
|
|
}
|
|
protected function changeURLByCrawler(string $url): string
|
|
{
|
|
return str_replace("/sir.kr/", "", parent::changeURLByCrawler($url));
|
|
}
|
|
//작성내용
|
|
// <article class="sir_vbo ">
|
|
// <header class="vbo_head">
|
|
// <h2 class="head_h2">할아버지의 마술 정보</h2>
|
|
// <strong id="head_title">
|
|
// 할아버지의 마술 </strong>
|
|
// <ul id="head_info">
|
|
// <li id="info_name"><span class="sv_wrap">
|
|
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" class="sv_member" title="감독님 자기소개" target="_blank" rel="nofollow" onclick="return false;"><span class="sir_mb_icon"></span> <span class="member">감독님</span></a>
|
|
// <span class="sv">
|
|
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" onclick="win_profile(this.href); return false;"><i class="fa fa-user" aria-hidden="true"></i> 자기소개</a>
|
|
// <a href="//sir.kr/cm_humor?sca=&sfl=mb_id,1&stx=hadirector"><i class="fa fa-search" aria-hidden="true"></i> 아이디로 검색</a>
|
|
// <a href="//sir.kr/main/member/?mb_id=hadirector"><i class="fa fa-file-text-o" aria-hidden="true"></i> 회원게시물</a>
|
|
// </span>
|
|
// <noscript class="sv_nojs"><span class="sv">
|
|
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" onclick="win_profile(this.href); return false;"><i class="fa fa-user" aria-hidden="true"></i> 자기소개</a>
|
|
// <a href="//sir.kr/cm_humor?sca=&sfl=mb_id,1&stx=hadirector"><i class="fa fa-search" aria-hidden="true"></i> 아이디로 검색</a>
|
|
// <a href="//sir.kr/main/member/?mb_id=hadirector"><i class="fa fa-file-text-o" aria-hidden="true"></i> 회원게시물</a>
|
|
// </span>
|
|
// </noscript></span> (210.♡.♡.13)</li>
|
|
// <li id="info_date"><time datetime='2024-09-13T00:24:04+09:00'>2024.09.13 00:24:04</time></li>
|
|
// <li id="info_hit"> 조회 <span>245</span>
|
|
// </li>
|
|
// <li id="info_cmt">
|
|
// <a href="#vcmt_anchor" class="comment">댓글 <span>3</span></a>
|
|
// </li>
|
|
// </ul>
|
|
// <div id="head_img"><span class='sir_mb_img' title='회원정보에 사진을 올려주세요.'></span></div>
|
|
// </header>
|
|
// <script>
|
|
// $(".vcmt-btn").click(function() {
|
|
// $('html, body').animate({
|
|
// scrollTop: $("#vcmt_anchor").offset().top - 100
|
|
// }, 300);
|
|
// });
|
|
// </script>
|
|
// <ul class="sir_vbo_cmd link">
|
|
// <li><a href="javascript:void(0)" class="sir_b01 sir_prev"><span class="sound_only">이전 게시글</span><i class="fa fa-angle-left"></i></a></li>
|
|
// <li><a href="//sir.kr/cm_humor/191445" class="sir_b01 sir_next"><span class="sound_only">다음 게시글</span><i class="fa fa-angle-right"></i></a></li>
|
|
// </ul>
|
|
// <ul class="sir_vbo_com" >
|
|
// <li><a href="//sir.kr/cm_humor" class="sir_b01">목록</a></li>
|
|
// </ul>
|
|
// <section id="vbo_con">
|
|
// <h2 class="con_h2">본문</h2>
|
|
// <div class="con_inner">
|
|
// <div id="con_pix">
|
|
// <video autoplay="autoplay" loop="loop" preload="auto" playsinline webkit-playsinline muted>
|
|
// <source src="//sir.kr/data/file/cm_humor/3535243533_CiH6Iv9O_ee170eeec15e748d9bfcc895836c71d9829c07fb.mp4" type="video/mp4" />
|
|
// </video>
|
|
// </div>
|
|
//
|
|
protected function getDetailSelector(array $listInfo): array
|
|
{
|
|
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
|
|
//작성시간
|
|
$selector = $this->getSelector($response, getenv("sir.view.date.tag"));
|
|
//Date Format이 맞지않아 변경해주기위함 : 2024.09.13 00:24:04 -> 2024-09-13 00:24:04
|
|
$listInfo['date'] = trim($selector->text());
|
|
$listInfo['date'] = DateTime::createFromFormat('Y.m.d H:i:s', $listInfo['date']);
|
|
$listInfo['date'] = $listInfo['date']->format('Y-m-d H:i:s');
|
|
return array($this->getSelector($response, tag: getenv("sir.view.content.tag")), $listInfo);
|
|
}
|
|
//리스트 내용
|
|
// <td class="listvisited mobile-td subject-view">
|
|
// <a href="board-read.asp?fullboardname=yamoonfreeboard&mtablename=humor&num=89372&ref=85575&page=1" class="ya-tooltip mobile-bold mobile-height" title="<p><br><br><video autoplay="autoplay" loop="loop" muted="" controls="controls" width="560"" height=" "> <source src=" https://files.bepick.net/bbs/2024/09/c2a20ab5771cbb934940551859fce1c8_769966583.mp4 "> </video><br><br><br></p">
|
|
// 졸고 있는 여군</a>
|
|
// <i class="fa fa-commenting-o" aria-hidden="true"></i> <span class="color-red small">6</span>
|
|
// <span class="visible-xs visible-sm small"><i class="fa fa-user-o" aria-hidden="true"></i> yeeyuu | <i class="fa fa-thumbs-o-up" aria-hidden="true"></i> 6 | <i class="fa fa-eye" aria-hidden="true"></i> 369 | No 89372 | 2024-09-13</span>
|
|
// </td>
|
|
public function execute(string $board_name, string $user_id = null, ...$params): void
|
|
{
|
|
try {
|
|
//추가옵션
|
|
$this->isDebug = in_array('debug', $params);
|
|
$this->isCopy = in_array('copy', $params);
|
|
$this->setBoardName($board_name);
|
|
$this->login_process($user_id);
|
|
//실행
|
|
$listInfos = [];
|
|
if ($this->isDebug) {
|
|
$listInfo = [];
|
|
$listInfo['title'] = 'test_title';
|
|
$listInfo['nickname'] = 'test_name';
|
|
$listInfo['hit'] = 1;
|
|
$listInfo['date'] = date("Y-m-d H:i:s");
|
|
$listInfo['detail_url'] = getenv("sir.view.test.url.{$this->getBoardName()}");
|
|
$listInfos[] = $listInfo;
|
|
} else {
|
|
$response = $this->getMySocket()->getContent(getenv("sir.list.url.{$this->getBoardName()}"));
|
|
$this->getSelector($response, getenv("sir.list.tag.{$this->getBoardName()}"))->each(
|
|
function (Crawler $node) use (&$listInfos): void {
|
|
$link_node = $node->filter(getenv("sir.list.item.link.tag"));
|
|
// href url의 맨 앞이 /가 두개라서 한개를 빼기위함
|
|
$detail_url = $this->changeURLByCrawler($link_node->attr("href"));
|
|
// $detail_url = $link_node->attr("href");
|
|
$title = $link_node->text();
|
|
$nickname = $node->filter(getenv("sir.list.item.nickname.tag"))->text();
|
|
$hit = $node->filter(getenv("sir.list.item.hit.tag"))->text();
|
|
// $date = $node->filter(getenv("sir.list.item.date.tag"))->text();
|
|
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => "", 'hit' => $hit];
|
|
}
|
|
);
|
|
}
|
|
if (!count($listInfos)) {
|
|
throw new \Exception("Target URL이 없습니다.");
|
|
}
|
|
$this->list_process(intval(getenv("sir.list.max_limit.{$this->getBoardName()}")), $listInfos);
|
|
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
|
} catch (\Exception $e) {
|
|
log_message("warning", sprintf(
|
|
"\n---%s 오류---\n%s\n-----------------------------------------\n",
|
|
__FUNCTION__,
|
|
$e->getMessage()
|
|
));
|
|
}
|
|
}
|
|
}
|