Automation/app/Libraries/MyCrawler/YamapLibrary.php
2024-09-07 19:00:11 +09:00

96 lines
3.8 KiB
PHP

<?php
namespace App\Libraries\MyCrawler;
use Symfony\Component\DomCrawler\Crawler;
use App\Libraries\MySocket\WebLibrary as MySocket;
use App\Libraries\Mangboard\FileLibrary as MyStorage;
class YamapLibrary extends MyCrawlerLibrary
{
private $_mySocket = null;
private $_myStorage = null;
public function __construct()
{
parent::__construct();
}
public function getMySocket()
{
if ($this->_mySocket === null) {
$this->_mySocket = new MySocket(getenv('yamap.host.url'));
}
return $this->_mySocket;
}
public function getMyStorage()
{
if ($this->_myStorage === null) {
$this->_myStorage = new MyStorage(getenv('yamap.storage.upload.path'));
}
return $this->_myStorage;
}
private function mainPage(string $url): array
{
$crawler = $this->getContent($url, getenv("yamap.list.tag"));
$items = [];
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
$crawler->filter(getenv("yamap.list.item.tag"))->each(
function (Crawler $node) use (&$items): void {
//bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool
$nickname = $node->filter(getenv("yamap.list.item.nickname.tag"))->text();
$time = date("Y-m-d") . " " . $node->filter(getenv("yamap.list.item.time.tag "))->text();
$hit = intval($node->filter(getenv("yamap.list.item.hit.tag "))->text());
if ($nickname != getenv("yamap.list.item.nickname.except")) {
//작성자가 "관리자"가 아니 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
$link_node = $node->filter(getenv("yamap.list.item.link.tag"));
$detail_url = $link_node->attr("href");
$title = $link_node->children()->last()->text();
$items[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'time' => $time, 'hit' => $hit];
}
}
);
if (!count($items)) {
throw new \Exception("Target URL이 없습니다.");
}
return $items;
}
private function detailPage(string $url): array
{
$crawler = $this->getContent($url, getenv("yamap.view.content.tag"));
//3. Image 처리
$downloadInfos = $this->download($crawler, ["tag" => "img", "attr" => "src"]);
$fileInfos = $this->save($downloadInfos);
$mediaTags = $this->getMediaTags($fileInfos);
//4. Video(mp4) 처리
$downloadInfos = $this->download($crawler, ["tag" => "video", "attr" => "src"]);
$fileInfos = $this->save($downloadInfos);
$mediaTags = $this->getMediaTags($fileInfos);
log_message("debug", "-----mediaTags-----");
log_message("debug", var_export($mediaTags, true));
return array($fileInfos, $mediaTags);
}
public function execute(): array
{
$items = [];
//1. 해당사이트 MainPage 처리
if ($this->getDebug()) {
$items[] = [
'title' => getenv("yamap.view.test.title"),
'nickname' => getenv("yamap.view.test.nickname"),
'detail_url' => getenv("yamap.view.test.url"),
'time' => date("Y-m-d H:i:s"),
'hit' => 1
];
} else {
$items = $this->mainPage(getenv("yamap.list.url"));
}
//2. DetailPage 처리 : bbs_view > div.contents 가진 객체를 찾아서 처리
list($fileInfos, $mediaTags) = $this->detailPage($items[0]["detail_url"]);
return array($items[0], $fileInfos, $mediaTags);
}
}