140 lines
7.3 KiB
PHP
140 lines
7.3 KiB
PHP
<?php
|
|
|
|
namespace App\Libraries\MyCrawler\Mangboard;
|
|
|
|
use App\Entities\Mangboard\UserEntity;
|
|
use Symfony\Component\DomCrawler\Crawler;
|
|
|
|
class InvenCrawler extends MangboardCrawler
|
|
{
|
|
public function __construct(string $host, string $board_name, UserEntity $user_entity)
|
|
{
|
|
parent::__construct($host, $board_name, $user_entity);
|
|
}
|
|
protected function getUrlByMediaType(Crawler $node, string $media_type, string $attr): null|string
|
|
{
|
|
switch ($media_type) {
|
|
case 'video':
|
|
$url = parent::getUrlByMediaType($node, $media_type, $attr);
|
|
//그래도 null이면 data-src로 추출해본다.
|
|
$attributes = $node->extract(['data-src']);
|
|
if (count($attributes)) {
|
|
$url = $attributes[0];
|
|
}
|
|
break;
|
|
case 'img':
|
|
default:
|
|
$url = parent::getUrlByMediaType($node, $media_type, $attr);
|
|
break;
|
|
}
|
|
return $url;
|
|
}
|
|
//작성내용
|
|
// <div class="articleContent">
|
|
// <div id="imageCollectDiv" class="contentBody">
|
|
// <!-- ============== CONTENT ============== -->
|
|
// <div id="powerbbsContent">
|
|
// <div id="BBSImageHolderTop" style="text-align:center;">
|
|
// <img src="https://upload3.inven.co.kr/upload/2024/09/15/bbs/i1620925350.jpg?MW=800" style="max-width: 100%; width: 800px; aspect-ratio: 1080 / 1350;" loading="lazy" />
|
|
// <br><br><img src="https://upload3.inven.co.kr/upload/2024/09/15/bbs/i1587803007.jpg?MW=800" style="max-width: 100%; width: 800px; aspect-ratio: 1080 / 1350;" loading="lazy" />
|
|
// <br><br><img src="https://upload3.inven.co.kr/upload/2024/09/15/bbs/i1134295360.jpg?MW=800" style="max-width: 100%; width: 800px; aspect-ratio: 1080 / 1350;" loading="lazy" />
|
|
// <br><br><img src="https://upload3.inven.co.kr/upload/2024/09/15/bbs/i1481352611.jpg?MW=800" style="max-width: 100%; width: 800px; aspect-ratio: 1080 / 1350;" loading="lazy" />
|
|
// <br><br><img src="https://upload3.inven.co.kr/upload/2024/09/15/bbs/i1878651605.jpg?MW=800" style="max-width: 100%; width: 800px; aspect-ratio: 850 / 1063;" loading="lazy" />
|
|
// <br><br>
|
|
// </div>
|
|
// <div>^^</div>
|
|
// </div>
|
|
// <!-- ============== End CONTENT ============== -->
|
|
// </div>
|
|
protected function detail_page(int $cnt, array $listInfo): array
|
|
{
|
|
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
|
|
$tag = getenv("inven.view.content.tag");
|
|
$selector = $this->getSelector($response, $tag);
|
|
$media_urls = $this->getUrlsByMediaType($selector, "img", "src");
|
|
$media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
|
|
if ($this->getDebug()) {
|
|
throw new \Exception(sprintf(
|
|
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
|
|
__FUNCTION__,
|
|
var_export($listInfo, true),
|
|
var_export($media_urls, true)
|
|
));
|
|
} else {
|
|
// Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리
|
|
$storages = $this->media_process($media_urls);
|
|
if (!count($storages)) {
|
|
throw new \Exception("등록할 자료가 없습니다.");
|
|
}
|
|
$this->backend_process($cnt, $listInfo, $storages);
|
|
}
|
|
return $listInfo;
|
|
}
|
|
//리스트내용
|
|
// <div class="board-list">
|
|
// <table>
|
|
// <tr class="lgtm">
|
|
// <td class="num"><span>1589</span></td>
|
|
// <td class="tit">
|
|
// <div class="text-wrap">
|
|
// <div>
|
|
// <span class="user-icon">
|
|
// <img src="https://upload3.inven.co.kr/upload/2024/06/12/icon/i1237935053.jpg" alt="유저 아이콘" loading="lazy">
|
|
// </span>
|
|
// <a class="subject-link" href="https://www.inven.co.kr/board/party/5951/1589">
|
|
// <span class="board_name">[사진&움짤]</span>스테이씨 윤
|
|
// </a>
|
|
// </div>
|
|
// <span data-opinion-bbs-comeidx="5951" data-opinion-bbs-uid="1589" data-opinion-bbs-opi="1" class="con-comment">[1]</span>
|
|
// <span class="con-icon board-img photo">사진</span>
|
|
// </div>
|
|
// </td>
|
|
// <td class="user">
|
|
// <img src="https://static.inven.co.kr/image_2011/member/level/1202/lv32.gif" alt="레벨 아이콘">
|
|
// <span class="layerNickName" onclick="layerNickName('배수민', 'pbNickNameHandler'); ">배수민</span>
|
|
// </td>
|
|
// <td class="date">09-15</td>
|
|
// <td class="view">1,502</td>
|
|
// <td class="reco">1</td>
|
|
// </tr>
|
|
// </table>
|
|
// </div>
|
|
public function execute(int $max_limit): void
|
|
{
|
|
try {
|
|
if ($this->getDebug()) {
|
|
$url = getenv("inven.view.test.url.{$this->_board_name}");
|
|
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업시작");
|
|
$this->detail_page(1, ['detail_url' => $url]);
|
|
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업종료");
|
|
} else {
|
|
$listInfos = [];
|
|
$response = $this->getMySocket()->getContent(getenv("inven.list.url.{$this->_board_name}"));
|
|
$this->getSelector($response, getenv("inven.list.tag"))->each(
|
|
function (Crawler $node) use (&$listInfos): void {
|
|
$hit = $node->filter(getenv("inven.list.item.hit.tag"))->text();
|
|
$date = date("Y") . "-" . $node->filter(getenv("inven.list.item.date.tag"))->text();
|
|
$nickname = $node->filter(getenv("inven.list.item.nickname.tag"))->text();
|
|
//작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
|
|
$link_node = $node->filter(getenv("inven.list.item.link.tag"));
|
|
$detail_url = $link_node->attr("href");
|
|
$title = $link_node->text();
|
|
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
|
|
}
|
|
);
|
|
if (!count($listInfos)) {
|
|
throw new \Exception("Target URL이 없습니다.");
|
|
}
|
|
$this->main_process($max_limit, $listInfos);
|
|
}
|
|
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
|
} catch (\Exception $e) {
|
|
log_message("warning", sprintf(
|
|
"\n---%s 오류---\n%s\n-----------------------------------------\n",
|
|
__FUNCTION__,
|
|
$e->getMessage()
|
|
));
|
|
}
|
|
}
|
|
}
|