135 lines
7.1 KiB
PHP
135 lines
7.1 KiB
PHP
<?php
|
|
|
|
namespace App\Libraries\MyCrawler;
|
|
|
|
|
|
use App\Libraries\MyCrawlerLibrary;
|
|
use App\Libraries\MySocket\WebSocket;
|
|
use App\Libraries\MyStorage\MangboardStorage;
|
|
use App\Entities\Mangboard\UserEntity;
|
|
use Symfony\Component\DomCrawler\Crawler;
|
|
|
|
class YamapCrawler extends MyCrawlerLibrary
|
|
{
|
|
private $_category = "";
|
|
private $_user_entity = null;
|
|
private $_myStorage = null;
|
|
public function __construct(string $host, string $category, UserEntity $user_entity)
|
|
{
|
|
parent::__construct(new WebSocket($host));
|
|
$this->_category = $category;
|
|
$this->_user_entity = $user_entity;
|
|
}
|
|
final protected function getMyStorage()
|
|
{
|
|
if ($this->_myStorage === null) {
|
|
$this->_myStorage = new MangboardStorage($this->_category, $this->_user_entity);
|
|
}
|
|
return $this->_myStorage;
|
|
}
|
|
//작성내용
|
|
// <div class="panel panel-default">
|
|
// <div class="text-center panel-heading-local-title text-bold">요즘 패션</div>
|
|
// <div style="margin:5px 10px;">
|
|
// <span class="pull-left dropdown">
|
|
// 괴강고귀
|
|
// </span>
|
|
// <span class="pull-right">
|
|
// | 추천 (14) | 조회 (432)
|
|
// </span>
|
|
// <div class="clearfix"></div>
|
|
// <hr class="hr-xs-xs">
|
|
// <span>
|
|
// <a href="javascript:void(0);" id="incfont"><i class="fa fa-plus fa-fw" aria-hidden="true"></i></a><a href="javascript:void(0);" id="decfont"><i class="fa fa-minus fa-fw margin-left-5" aria-hidden="true"></i></a>
|
|
// </span>
|
|
// <span class="pull-right">2024-09-14 01:53:45
|
|
// </span>
|
|
// <div class="clearfix"></div>
|
|
// <hr class="margin-top-5 margin-bottom-20">
|
|
// <div class="fr-view margin-bottom-30" id="read-content" style="word-break:break-all;">
|
|
// <p><img title="" class="cloudzoom" data-cloudzoom="zoomImage:'/newboard/yamoonfreeboard/uploads/humor/mceu_86177012011726246415487.jpg'" class="fr-fic fr-dii" src="/newboard/yamoonfreeboard/uploads/humor/mceu_86177012011726246415487.jpg" alt=""></p>
|
|
// <p> </p>
|
|
// </div>
|
|
// </div>
|
|
// <div class="margin-10">
|
|
// <a href="javascript:void(0)" onclick="javascript:window.open('https://twitter.com/intent/tweet?text='+encodeURIComponent(document.title)+'%20-%20'+encodeURIComponent(document.URL), 'twittersharedialog', 'menubar=no,toolbar=no,resizable=yes,scrollbars=yes,height=300,width=600');return false;" target="_blank"> <i class="fa fa-twitter-square fa-lg ya-tooltip" title="트위터 공유하기"></i></a>
|
|
// <a href="javascript:void(0)" onclick="javascript:window.open('https://www.facebook.com/sharer/sharer.php?u='+encodeURIComponent(document.URL)+'&t='+encodeURIComponent(document.title), 'facebooksharedialog', 'menubar=no,toolbar=no,resizable=yes,scrollbars=yes,height=300,width=600');return false;" target="_blank"> <i class="fa fa-facebook-square fa-lg ya-tooltip" title="페이스북 공유하기"></i></a>
|
|
// </div>
|
|
// <div id="freesubframe"></div>
|
|
// </div>
|
|
private function detailPage(array $listInfo): array
|
|
{
|
|
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
|
|
$tag = getenv("yamap.view.content.tag");
|
|
return $this->getMediaUrls($response, $tag, $listInfo);
|
|
}
|
|
private function listPage(): array
|
|
{
|
|
if ($this->getDebug()) {
|
|
return [
|
|
'title' => getenv("yamap.view.test.title"),
|
|
'nickname' => getenv("yamap.view.test.nickname"),
|
|
'detail_url' => getenv("yamap.view.test.url"),
|
|
'time' => date("Y-m-d H:i:s"),
|
|
'hit' => 1,
|
|
];
|
|
}
|
|
$listInfos = [];
|
|
$response = $this->getMySocket()->getContent(getenv("yamap.list.url.{$this->_category}"));
|
|
$selector = $this->getSelector($response, getenv("yamap.list.tag"));
|
|
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
|
|
// log_message("debug", sprintf("\n-------------MainPage------------\n%s\n--------------------------\n", $selector->html()));
|
|
$selector->filter(getenv("yamap.list.item.tag"))->each(
|
|
function (Crawler $node) use (&$listInfos): void {
|
|
//bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool
|
|
$nickname = $node->filter(getenv("yamap.list.item.nickname.tag"))->text();
|
|
$hit = $node->filter(getenv("yamap.list.item.hit.tag"))->text();
|
|
$date = $node->filter(getenv("yamap.list.item.date.tag"))->text();
|
|
if ($nickname != getenv("yamap.list.item.nickname.except")) {
|
|
//작성자가 "관리자"가 아니 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
|
|
$link_node = $node->filter(getenv("yamap.list.item.link.tag"));
|
|
$detail_url = $link_node->attr("href");
|
|
$title = $link_node->children()->last()->text();
|
|
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
|
|
}
|
|
}
|
|
);
|
|
if (!count($listInfos)) {
|
|
throw new \Exception("Target URL이 없습니다.");
|
|
}
|
|
log_message("notice", __FUNCTION__ . " 작업 완료");
|
|
return $listInfos;
|
|
}
|
|
public function execute(): void
|
|
{
|
|
$listInfos = $this->listPage();
|
|
//Limit가 0이면 $listInfos 갯수만큼 다하고, LIMIT 갯수 혹은 item의 갯수중 작은수만큼 한다.
|
|
$max_limit = intval(getenv("yamap.list.max_limit"));
|
|
if ($max_limit) {
|
|
$max_limit = count($listInfos) <= $max_limit ? count($listInfos) : $max_limit;
|
|
} else {
|
|
$max_limit = count($listInfos);
|
|
}
|
|
$total = count($listInfos);
|
|
$i = 1;
|
|
foreach ($listInfos as $listInfo) {
|
|
if ($i <= $max_limit) {
|
|
try {
|
|
log_message("notice", "게시물 {$i}번째/{$total}개중 {$listInfo["nickname"]} 작업시작");
|
|
list($listInfo, $urls) = $this->detailPage($listInfo);
|
|
$this->mediaProcess($urls);
|
|
//File DB 및 Board DB 등록작업
|
|
$board_entity = $this->getMyStorage()->getBoardLibrary()->createByCrawler($i, $listInfo, $this->_storages);
|
|
$this->getMyStorage()->getFileLibrary()->createByCrawler($board_entity, $this->_storages);
|
|
$this->getMyStorage()->getImageLibrary()->createByCrawler($board_entity, $this->_storages);
|
|
log_message("notice", "게시물 {$i}번째/{$total}개중 {$listInfo["nickname"]} 작업완료.");
|
|
$i++;
|
|
} catch (\Exception $e) {
|
|
log_message("debug", $e->getMessage());
|
|
}
|
|
}
|
|
}
|
|
log_message("notice", "Crawler->" . __FUNCTION__ . " 작업이 완료되었습니다.");
|
|
}
|
|
}
|