Automation/app/Controllers/Mangboard/Crawler/YamapCrawler.php
2024-09-19 19:37:28 +09:00

142 lines
7.3 KiB
PHP

<?php
namespace App\Controllers\Mangboard\Crawler;
use App\Controllers\Mangboard\CrawlerController;
use CodeIgniter\HTTP\RequestInterface;
use CodeIgniter\HTTP\ResponseInterface;
use Psr\Log\LoggerInterface;
use Symfony\Component\DomCrawler\Crawler;
class YamapCrawler extends CrawlerController
{
public function initController(RequestInterface $request, ResponseInterface $response, LoggerInterface $logger)
{
parent::initController($request, $response, $logger);
}
final protected function getHost(): string
{
return getenv("yamap.host.url");
}
protected function detail_content_process(int $cnt, array $listInfo): array
{
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
$selector = $this->getSelector($response, getenv("yamap.view.content.tag"));
$formDatas = [];
$formDatas['image_path'] = "";
$formDatas['content'] = $selector->html();
//File DB 및 Board DB 등록작업등
$this->getBoardModel()->createByCrawler(
$this->getBoardsEntity(),
$this->getUserEntity(),
$cnt,
$listInfo,
[],
$formDatas
);
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
return $listInfo;
}
protected function detail_download_process(int $cnt, array $listInfo): array
{
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
$selector = $this->getSelector($response, getenv("yamap.view.content.tag"));
$media_urls = $this->getUrlsByMediaType($selector, "img", "src");
$media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
if ($this->isDebug) {
throw new \Exception(sprintf(
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
__FUNCTION__,
var_export($listInfo, true),
var_export($media_urls, true)
));
} else {
// Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리
$storages = $this->media_process($media_urls);
if (!count($storages)) {
throw new \Exception("등록할 자료가 없습니다.");
}
$this->backend_process($cnt, $listInfo, $storages);
}
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
return $listInfo;
}
//리스트내용
// <div class="panel panel-default">
// <div class="text-center panel-heading-local-title text-bold">요즘 패션</div>
// <div style="margin:5px 10px;">
// <span class="pull-left dropdown">
// 괴강고귀
// </span>
// <span class="pull-right">
// | 추천 (14) | 조회 (432)
// </span>
// <div class="clearfix"></div>
// <hr class="hr-xs-xs">
// <span>
// <a href="javascript:void(0);" id="incfont"><i class="fa fa-plus fa-fw" aria-hidden="true"></i></a><a href="javascript:void(0);" id="decfont"><i class="fa fa-minus fa-fw margin-left-5" aria-hidden="true"></i></a>
// </span>
// <span class="pull-right">2024-09-14 01:53:45
// </span>
// <div class="clearfix"></div>
// <hr class="margin-top-5 margin-bottom-20">
// <div class="fr-view margin-bottom-30" id="read-content" style="word-break:break-all;">
// <p><img title="" class="cloudzoom" data-cloudzoom="zoomImage:'/newboard/yamoonfreeboard/uploads/humor/mceu_86177012011726246415487.jpg'" class="fr-fic fr-dii" src="/newboard/yamoonfreeboard/uploads/humor/mceu_86177012011726246415487.jpg" alt=""></p>
// <p>&nbsp;</p>
// </div>
// </div>
// <div class="margin-10">
// <a href="javascript:void(0)" onclick="javascript:window.open('https://twitter.com/intent/tweet?text='+encodeURIComponent(document.title)+'%20-%20'+encodeURIComponent(document.URL), 'twittersharedialog', 'menubar=no,toolbar=no,resizable=yes,scrollbars=yes,height=300,width=600');return false;" target="_blank"> <i class="fa fa-twitter-square fa-lg ya-tooltip" title="트위터 공유하기"></i></a>
// <a href="javascript:void(0)" onclick="javascript:window.open('https://www.facebook.com/sharer/sharer.php?u='+encodeURIComponent(document.URL)+'&t='+encodeURIComponent(document.title), 'facebooksharedialog', 'menubar=no,toolbar=no,resizable=yes,scrollbars=yes,height=300,width=600');return false;" target="_blank"> <i class="fa fa-facebook-square fa-lg ya-tooltip" title="페이스북 공유하기"></i></a>
// </div>
// <div id="freesubframe"></div>
// </div>
public function execute(string $board_name, string $user_id = null, ...$params): void
{
try {
//추가옵션
$this->isDebug = in_array('debug', $params);
$this->isCopy = in_array('copy', $params);
$this->setBoardName($board_name);
$this->login_process($user_id);
//실행
$listInfos = [];
if ($this->isDebug) {
$listInfo = [];
$listInfo['title'] = 'test_title';
$listInfo['nickname'] = 'test_name';
$listInfo['hit'] = 1;
$listInfo['date'] = date("Y-m-d H:i:s");
$listInfo['detail_url'] = getenv("yamap.view.test.url.{$this->getBoardName()}");
$listInfos[] = $listInfo;
} else {
$response = $this->getMySocket()->getContent(getenv("yamap.list.url.{$this->getBoardName()}"));
$selector = $this->getSelector($response, getenv("inven.list.tag.{$this->getBoardName()}"));
$selector->filter(getenv("yamap.list.item.tag"))->each(
function (Crawler $node) use (&$listInfos): void {
$hit = $node->filter(getenv("yamap.list.item.hit.tag"))->text();
$date = date("Y") . "-" . $node->filter(getenv("yamap.list.item.date.tag"))->text();
$nickname = $node->filter(getenv("yamap.list.item.nickname.tag"))->text();
//작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
$link_node = $node->filter(getenv("yamap.list.item.link.tag"));
$detail_url = $link_node->attr("href");
$title = $link_node->text();
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
}
);
}
if (!count($listInfos)) {
throw new \Exception("Target URL이 없습니다.");
}
$this->list_process(intval(getenv("yamap.list.max_limit.{$this->getBoardName()}")), $listInfos);
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
} catch (\Exception $e) {
log_message("warning", sprintf(
"\n---%s 오류---\n%s\n-----------------------------------------\n",
__FUNCTION__,
$e->getMessage()
));
}
}
}