Automation/app/Libraries/YamapLibrary.php
2024-09-05 21:45:59 +09:00

192 lines
6.8 KiB
PHP

<?php
namespace App\Libraries;
use Symfony\Component\DomCrawler\Crawler;
//Yamap
// define('YAMAP', [
// 'host' => ['url' => getenv('yamap.host.url')],
// 'list' => [
// 'url' => getenv('yamap.list.url'),
// 'tag' => getenv('yamap.list.tag'),
// 'item' => [
// 'tag' => getenv('yamap.list.item.tag'),
// 'subject' => [
// 'tag' => getenv('yamap.list.item.subject.tag')
// ],
// 'nickname' => [
// 'tag' => getenv('yamap.list.item.nickname.tag'),
// 'except' => getenv('yamap.list.item.nickname.except'),
// ],
// ],
// ],
// 'view' => [
// 'tag' => getenv('yamap.view.tag'),
// 'content' => [
// 'tag' => getenv('yamap.view.content.tag'),
// ],
// 'test' => [
// 'url' => getenv('yamap.view.test.url'),
// ]
// ],
// ]);
class YamapLibrary extends CommonLibrary
{
private $_myWeb = null;
private $_myStorage = null;
private $_myCrawler = null;
public function __construct()
{
parent::__construct();
}
public function getMyWeb()
{
if ($this->_myWeb === null) {
throw new \Exception("MyWeb Library가 정의되지 않았습니다.");
}
return $this->_myWeb;
}
public function setMyWeb($myWeb)
{
$this->_myWeb = $myWeb;
}
public function getMyStorage()
{
if ($this->_myStorage === null) {
throw new \Exception("MyStorage Library가 정의되지 않았습니다.");
}
return $this->_myStorage;
}
public function setMyStorage($myStorage)
{
$this->_myStorage = $myStorage;
}
public function getMyCrawler()
{
if ($this->_myWeb === null) {
throw new \Exception("MyCrawler Library가 정의되지 않았습니다.");
}
return $this->_myCrawler;
}
public function setMyCrawler($myCrawler)
{
$this->_myCrawler = $myCrawler;
}
private function getCrawler(string $url, string $tag): Crawler
{
$response = $this->getMyWeb()->getContent($url);
if (!$response) {
throw new \Exception("getCrawler 실패:{$url}");
}
return $this->getMyCrawler()->create($response)->filter($tag);
}
private function download_process(Crawler $crawler, array $options): array
{
$mediaInfos = [];
log_message("debug", "download:{$options["tag"]},{$options["attr"]}");
$nodes = $this->getMyCrawler()->getNodes($crawler, $options);
foreach ($nodes as $node) {
$original = $node->attr($options["attr"]);
list($fileName, $content) = $this->getMyWeb()->download($original);
$this->getMyStorage()->setFileName($fileName);
if (!$this->getMyStorage()->save($content)) {
continue;
}
$mediaInfos[] = [
"orignal" => $node->html(),
"path" => $this->getMyStorage()->getPath(),
"fileName" => $fileName,
"content" => $content
];
}
return $mediaInfos;
}
public function mainPage(): array
{
$url = getenv("yamap.list.url");
$crawler = $this->getCrawler($url, getenv("yamap.list.tag"));
$item_tag = getenv("yamap.list.item.tag");
$item_subject_tag = getenv("yamap.list.item.subject.tag");
$item_nickname_tag = getenv("yamap.list.item.nickname.tag");
$item_nickname_except = getenv("yamap.list.item.nickname.except");
$lists = [];
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
$crawler->filter($item_tag)->each(
function (Crawler $node) use (
$item_subject_tag,
&$item_nickname_tag,
&$item_nickname_except,
&$lists
): void {
//bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool
$nickname = $node->filter($item_nickname_tag)->text();
log_message("debug", $item_nickname_tag . ":" . $nickname);
if ($nickname != $item_nickname_except) {
//작성자가 "관리자"가 아니 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
$url = $node->filter($item_subject_tag)->attr("href");
$lists[] = ['nickname' => $nickname, 'url' => $url];
}
}
);
if (!count($lists)) {
throw new \Exception("Target URL이 없습니다.");
}
return array($lists[0]["nickname"], $lists[0]["url"]);
}
public function detailPage($url): array
{
$crawler = $this->getCrawler($url, getenv("yamap.view.content.tag"));
$mediaTags = [];
//3. Image 처리
$images = $this->download_process($crawler, ["tag" => "img", "attr" => "src"]);
foreach ($images as $image) {
if ($this->isContainsHttpOrHttps($image['orignal'])) {
$mediaTags[] = $images['orignal'];
} else {
$mediaTags[] = sprintf("<img src=\"%s/%s\" alt=\"%s\">", $image["path"], $image["fileName"], $image["fileName"]);
};
}
//4. Video(mp4) 처리
$videos = $this->download_process($crawler, ["tag" => "video", "attr" => "src"]);
foreach ($videos as $video) {
if ($this->isContainsHttpOrHttps($video['orignal'])) {
$mediaTags[] = $videos['orignal'];
} else {
$mediaTags[] = sprintf("<video src=\"%s/%s\" alt=\"%s\">", $video["path"], $video["fileName"], $video["fileName"]);
};
}
$mediaInfos = array_merge($images, $videos);
log_message("debug", "-----mediaInfos-----");
foreach ($mediaInfos as $mediaInfo) {
log_message("debug", "fileName: " . $mediaInfo["path"] . DIRECTORY_SEPARATOR . $mediaInfo['fileName']);
}
log_message("debug", "-----mediaTags-----");
log_message("debug", var_export($mediaTags, true));
return array($mediaInfos, $mediaTags);
}
public function build(): array
{
//1. 해당사이트 MainPage 처리
if ($this->getDebug()) {
$nickname = getenv("yamap.view.test.nickname");
$detail_url = getenv("yamap.view.test.url");
} else {
list($nickname, $detail_url) = $this->mainPage();
}
//2. DetailPage 처리 : bbs_view > div.contents 가진 객체를 찾아서 처리
list($mediaInfos, $mediaTags) = $this->detailPage($detail_url);
return array($nickname, $mediaInfos, $mediaTags);
}
}