Automation/app/Libraries/MyCrawler/YamapLibrary.php
2024-09-14 20:44:49 +09:00

212 lines
9.2 KiB
PHP

<?php
namespace App\Libraries\MyCrawler;
use App\Libraries\MyCrawlerLibrary;
use App\Libraries\MySocket\WebLibrary as MySocketLibrary;
use App\Libraries\Mangboard\FileLibrary as MyStorageLibrary;
use App\Libraries\Mangboard\BoardsLibrary;
use App\Libraries\Mangboard\BoardLibrary;
use App\Entities\Mangboard\UserEntity;
use Symfony\Component\DomCrawler\Crawler;
use App\Traits\FileTrait;
class YamapLibrary extends MyCrawlerLibrary
{
use FileTrait;
private $_user_entity = null;
private $_boards_library = null;
private $_board_library = null;
private $_file_librarys = [];
public function __construct()
{
parent::__construct();
}
final protected function getMySocket(): mixed
{
if ($this->_mySocket === null) {
$this->_mySocket = new MySocketLibrary(getenv('yamap.host.url'));
}
return $this->_mySocket;
}
final protected function getMyStorage(): mixed
{
if ($this->_myStorage === null) {
$this->_myStorage = new MyStorageLibrary(getenv('yamap.storage.upload.path'));
$this->_myStorage->setBoardsEntity($this->getBoardsLibrary()->getEntity());
$this->_myStorage->setUserEntity($this->getUserEntity());
}
return $this->_myStorage;
}
public function getBoardsLibrary(): BoardsLibrary
{
if ($this->_boards_library === null) {
$this->_boards_library = new BoardsLibrary(
getenv('yamap.storage.board.name'),
$this->getUserEntity()
);
}
return $this->_boards_library;
}
public function getBoardLibrary(): BoardLibrary
{
if ($this->_board_library === null) {
$this->_board_library = new BoardLibrary(
$this->getBoardsLibrary()->getEntity(),
$this->getUserEntity()
);
}
return $this->_board_library;
}
public function getUserEntity(): UserEntity
{
if ($this->_user_entity === null) {
throw new \Exception("사용자정보가 없습니다.");
}
return $this->_user_entity;
}
public function setUserEntity(UserEntity $_user_entity): void
{
$this->_user_entity = $_user_entity;
}
private function save(int $file_sequence, string $mediaType, string $file_name, string $content): void
{
$this->getMyStorage()->setOriginName($file_name);
$this->getMyStorage()->setOriginContent($content);
$this->getMyStorage()->setOriginType($mediaType);
$this->getMyStorage()->setOriginSequence($file_sequence);
$this->_file_librarys[] = $this->getMyStorage()->save();
}
//Yamap ViewPage의 이미지나영상데이터가 있으면 Dodownload 한다.
private function download(string $mediaType, string $url): array
{
$file_names = explode('/', $url);
if (!is_array($file_names) || !count($file_names)) {
throw new \Exception("URL이 파일명 형식이 아닙니다 : " . $this->getMySocket()->getHost() . $url);
}
$file_name = array_pop($file_names);
$temps = explode(".", $file_name);
$file_ext = array_pop($temps);
if (!$this->isFileType($file_ext, $mediaType)) {
throw new \Exception("파일명 형식이 {$mediaType}가 아닙니다");
}
$content = $this->getMySocket()->getContent($url);
log_message("notice", "{$file_name} 파일이 다운로드되었습니다!");
return array($file_name, $content);
}
private function mediaContent(array $urls): void
{
$file_sequence = 1;
$this->_file_librarys = []; //CreateBoard에서 사용을 위해 DetailPage마다 초기화
foreach ($urls as $mediaType => $url) {
try {
list($file_name, $content) = $this->download($mediaType, $url);
$this->save($file_sequence, $mediaType, $file_name, $content);
$file_sequence++;
log_message("notice", __FUNCTION__ . " OriginType->{$mediaType} 작업 완료");
} catch (\Exception $e) {
log_message("warning", sprintf(
"\n---%s,OriginType->%s 오류---\n%s\n-----------------------------------------\n",
__FUNCTION__,
$mediaType,
$e->getMessage()
));
}
}
if (!count($this->_file_librarys)) {
throw new \Exception("Download된 Content가 없습니다.");
}
}
//Yamap ViewPage의 이미지나영상데이터가 있으면 URL과MediaType을 가져온다
private function getUrlsByDetailPageMediaType(string $mediaType, Crawler $selector, array $options, array $urls = []): array
{
$selector->filter($options["tag"])->each(
function (Crawler $node) use (&$mediaType, &$options, &$urls): void {
log_message("debug", sprintf(
"getNode->%s[%s]",
$options["tag"],
$node->attr($options['attr'])
));
$urls[$mediaType] = $node->attr($options["attr"]);
}
);
return $urls;
}
private function detailPage(array $listInfo): array
{
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
$selector = $this->getSelector($response, getenv("yamap.view.content.tag"));
$urls = $this->getUrlsByDetailPageMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
$urls = $this->getUrlsByDetailPageMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $urls);
log_message("notice", sprintf("\n-----------%s 작업완료--------\n%s\n-----------------------\n", __FUNCTION__, var_export($urls, true)));
return $urls;
}
private function mainPage(string $url): array
{
$listInfos = [];
$response = $this->getMySocket()->getContent($url);
$selector = $this->getSelector($response, getenv("yamap.list.tag"));
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
// log_message("debug", sprintf("\n-------------MainPage------------\n%s\n--------------------------\n", $selector->html()));
$selector->filter(getenv("yamap.list.item.tag"))->each(
function (Crawler $node) use (&$listInfos): void {
//bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool
$nickname = $node->filter(getenv("yamap.list.item.nickname.tag"))->text();
$hit = $node->filter(getenv("yamap.list.item.hit.tag"))->text();
$date = $node->filter(getenv("yamap.list.item.date.tag"))->text();
if ($nickname != getenv("yamap.list.item.nickname.except")) {
//작성자가 "관리자"가 아니 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
$link_node = $node->filter(getenv("yamap.list.item.link.tag"));
$detail_url = $link_node->attr("href");
$title = $link_node->children()->last()->text();
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
}
}
);
if (!count($listInfos)) {
throw new \Exception("Target URL이 없습니다.");
}
log_message("notice", __FUNCTION__ . " 작업 완료");
return $listInfos;
}
public function execute(): void
{
if ($this->getDebug()) {
$listInfos = [
'title' => getenv("yamap.view.test.title"),
'nickname' => getenv("yamap.view.test.nickname"),
'detail_url' => getenv("yamap.view.test.url"),
'time' => date("Y-m-d H:i:s"),
'hit' => 1,
];
} else {
$listInfos = $this->mainPage(getenv("yamap.list.url"));
}
//Limit가 0이면 $listInfos 갯수만큼 다하고, LIMIT 갯수 혹은 item의 갯수중 작은수만큼 한다.
$max_limit = intval(getenv("yamap.list.max_limit"));
if ($max_limit) {
$max_limit = count($listInfos) <= $max_limit ? count($listInfos) : $max_limit;
} else {
$max_limit = count($listInfos);
}
$i = 1;
foreach ($listInfos as $listInfo) {
if ($i <= $max_limit) {
try {
log_message("notice", "게시물 {$i}번째 {$listInfo["nickname"]} 작업시작");
$this->mediaContent($this->detailPage($listInfo));
//File DB 및 Board DB 등록작업
$this->getBoardLibrary()->create($i, $listInfo, $this->_file_librarys);
log_message("notice", "게시물 {$i}번째 {$listInfo["nickname"]} 작업완료.");
$i++;
} catch (\Exception $e) {
log_message("debug", $e->getMessage());
}
}
}
log_message("notice", "Crawler->" . __FUNCTION__ . " 작업이 완료되었습니다.");
}
}