Automation/app/Controllers/Mangboard/Crawler/InvenCrawler.php
2024-09-20 09:03:30 +09:00

137 lines
6.9 KiB
PHP

<?php
namespace App\Controllers\Mangboard\Crawler;
use Symfony\Component\DomCrawler\Crawler;
use Psr\Log\LoggerInterface;
use CodeIgniter\HTTP\ResponseInterface;
use CodeIgniter\HTTP\RequestInterface;
class InvenCrawler extends MyCrawler
{
public function initController(RequestInterface $request, ResponseInterface $response, LoggerInterface $logger)
{
parent::initController($request, $response, $logger);
}
final protected function getHost(): string
{
return getenv("inven.host.url");
}
protected function getUrlByMediaType(Crawler $node, string $media_type, string $attr): null|string
{
switch ($media_type) {
case 'video':
$url = parent::getUrlByMediaType($node, $media_type, $attr);
//그래도 null이면 data-src로 추출해본다.
if ($url === null) {
$attributes = $node->extract(['data-src']);
if (count($attributes)) {
$url = $attributes[0];
}
}
break;
case 'img':
default:
$url = parent::getUrlByMediaType($node, $media_type, $attr);
break;
}
return $url;
}
//작성내용
// <div class="articleContent">
// <div id="imageCollectDiv" class="contentBody">
// <!-- ============== CONTENT ============== -->
// <div id="powerbbsContent">
// <div id="BBSImageHolderTop" style="text-align:center;">
// <img src="https://upload3.inven.co.kr/upload/2024/09/15/bbs/i1620925350.jpg?MW=800" style="max-width: 100%; width: 800px; aspect-ratio: 1080 / 1350;" loading="lazy" />
// <br><br><img src="https://upload3.inven.co.kr/upload/2024/09/15/bbs/i1587803007.jpg?MW=800" style="max-width: 100%; width: 800px; aspect-ratio: 1080 / 1350;" loading="lazy" />
// <br><br><img src="https://upload3.inven.co.kr/upload/2024/09/15/bbs/i1134295360.jpg?MW=800" style="max-width: 100%; width: 800px; aspect-ratio: 1080 / 1350;" loading="lazy" />
// <br><br><img src="https://upload3.inven.co.kr/upload/2024/09/15/bbs/i1481352611.jpg?MW=800" style="max-width: 100%; width: 800px; aspect-ratio: 1080 / 1350;" loading="lazy" />
// <br><br><img src="https://upload3.inven.co.kr/upload/2024/09/15/bbs/i1878651605.jpg?MW=800" style="max-width: 100%; width: 800px; aspect-ratio: 850 / 1063;" loading="lazy" />
// <br><br>
// </div>
// <div>^^</div>
// </div>
// <!-- ============== End CONTENT ============== -->
// </div>
protected function getDetailSelector(array $listInfo): array
{
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
return array($this->getSelector($response, getenv("inven.view.content.tag")), $listInfo);
}
//리스트내용
// <div class="board-list">
// <table>
// <tr class="lgtm">
// <td class="num"><span>1589</span></td>
// <td class="tit">
// <div class="text-wrap">
// <div>
// <span class="user-icon">
// <img src="https://upload3.inven.co.kr/upload/2024/06/12/icon/i1237935053.jpg" alt="유저 아이콘" loading="lazy">
// </span>
// <a class="subject-link" href="https://www.inven.co.kr/board/party/5951/1589">
// <span class="board_name">[사진&움짤]</span>스테이씨 윤
// </a>
// </div>
// <span data-opinion-bbs-comeidx="5951" data-opinion-bbs-uid="1589" data-opinion-bbs-opi="1" class="con-comment">[1]</span>
// <span class="con-icon board-img photo">사진</span>
// </div>
// </td>
// <td class="user">
// <img src="https://static.inven.co.kr/image_2011/member/level/1202/lv32.gif" alt="레벨 아이콘">
// <span class="layerNickName" onclick="layerNickName('배수민', 'pbNickNameHandler'); ">배수민</span>
// </td>
// <td class="date">09-15</td>
// <td class="view">1,502</td>
// <td class="reco">1</td>
// </tr>
// </table>
// </div>
public function execute(string $board_name, string $user_id = null, ...$params): void
{
try {
//추가옵션
$this->isDebug = in_array('debug', $params);
$this->isCopy = in_array('copy', $params);
$this->setBoardName($board_name);
$this->login_process($user_id);
//실행
$listInfos = [];
if ($this->isDebug) {
$listInfo = [];
$listInfo['title'] = 'test_title';
$listInfo['nickname'] = 'test_name';
$listInfo['hit'] = 1;
$listInfo['date'] = date("Y-m-d H:i:s");
$listInfo['detail_url'] = getenv("inven.view.test.url.{$this->getBoardName()}");
$listInfos[] = $listInfo;
} else {
$response = $this->getMySocket()->getContent(getenv("inven.list.url.{$this->getBoardName()}"));
$this->getSelector($response, getenv("inven.list.tag.{$this->getBoardName()}"))->each(
function (Crawler $node) use (&$listInfos): void {
$hit = $node->filter(getenv("inven.list.item.hit.tag"))->text();
$date = date("Y") . "-" . $node->filter(getenv("inven.list.item.date.tag"))->text();
$nickname = $node->filter(getenv("inven.list.item.nickname.tag"))->text();
$link_node = $node->filter(getenv("inven.list.item.link.tag"));
$detail_url = $link_node->attr("href");
$title = $link_node->text();
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
}
);
}
if (!count($listInfos)) {
throw new \Exception("Target URL이 없습니다.");
}
$this->list_process(intval(getenv("inven.list.max_limit.{$this->getBoardName()}")), $listInfos);
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
} catch (\Exception $e) {
log_message("warning", sprintf(
"\n---%s 오류---\n%s\n-----------------------------------------\n",
__FUNCTION__,
$e->getMessage()
));
}
}
}