128 lines
5.3 KiB
PHP
128 lines
5.3 KiB
PHP
<?php
|
|
|
|
namespace App\Libraries\MyCrawler;
|
|
|
|
use App\Libraries\CommonLibrary;
|
|
use Symfony\Component\DomCrawler\Crawler;
|
|
use App\Traits\FileTrait;
|
|
|
|
abstract class MyCrawler extends CommonLibrary
|
|
{
|
|
use FileTrait;
|
|
private $_mySocket = null;
|
|
protected $_storages = [];
|
|
protected function __construct($mySocket)
|
|
{
|
|
parent::__construct();
|
|
$this->_mySocket = $mySocket;
|
|
}
|
|
abstract protected function getMyStorage();
|
|
abstract public function execute(): void;
|
|
final protected function getMySocket()
|
|
{
|
|
if ($this->_mySocket === null) {
|
|
throw new \Exception("Socket이 지정되지 않았습니다.");
|
|
}
|
|
return $this->_mySocket;
|
|
}
|
|
final protected function getSelector(string $content, string $tag): Crawler
|
|
{
|
|
$crawler = new Crawler($content);
|
|
if ($this->getDebug()) {
|
|
log_message("debug", sprintf(
|
|
"\n---------%s----------\ntag:%s\n%s\n-------------------\n",
|
|
__FUNCTION__,
|
|
$tag,
|
|
$content
|
|
));
|
|
exit;
|
|
}
|
|
return $crawler->filter($tag);
|
|
}
|
|
|
|
//--------미디어 URL관련------
|
|
private function getMediaUrlsByMediaType(string $mediaType, Crawler $selector, array $options, array $urls = []): array
|
|
{
|
|
$urls[$mediaType] = [];
|
|
$selector->filter($options["tag"])->each(
|
|
function (Crawler $node) use (&$mediaType, &$options, &$urls): void {
|
|
$url = $node->attr($options["attr"]);
|
|
log_message("debug", __FUNCTION__ . "-> {$mediaType}[{$options["attr"]}]:{$url}");
|
|
if (!is_null($url)) {
|
|
$urls[$mediaType][] = $url;
|
|
}
|
|
}
|
|
);
|
|
return $urls;
|
|
}
|
|
//detailPage의 이미지나영상데이터가 있으면 URL과MediaType을 가져온다
|
|
final protected function getMediaUrls(string $response, string $tag, array $listInfo): array
|
|
{
|
|
$selector = $this->getSelector($response, $tag);
|
|
log_message("debug", "\n-----------detailPage Tag: {$tag}---------------\n{$selector->html()}\n---------------------------\n");
|
|
$urls = $this->getMediaUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
|
|
$urls = $this->getMediaUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $urls);
|
|
log_message("debug", "\n-------------------------\n" . var_export($urls, true) . "\n-----------------------\n");
|
|
log_message("notice", "-----------" . __FUNCTION__ . " 작업완료--------");
|
|
return array($listInfo, $urls);
|
|
}
|
|
|
|
//--------미디어 관련-------
|
|
private function mediaSave(int $file_sequence, string $mediaType, string $file_name, string $content): void
|
|
{
|
|
log_message("debug", __FUNCTION__ . " 원본파일 {$file_name} 작업 시작");
|
|
$this->getMyStorage()->setOriginName($file_name);
|
|
$this->getMyStorage()->setOriginContent($content);
|
|
$this->getMyStorage()->setOriginType($mediaType);
|
|
$this->getMyStorage()->setOriginSequence($file_sequence);
|
|
$this->_storages[] = $this->getMyStorage()->save();
|
|
}
|
|
//Yamap ViewPage의 이미지나영상데이터가 있으면 Dodownload 한다.
|
|
private function mediaDownload(string $mediaType, string $url): array
|
|
{
|
|
$file_names = explode('/', $url);
|
|
if (!is_array($file_names) || !count($file_names)) {
|
|
throw new \Exception("URL이 파일명 형식이 아닙니다 : " . $this->getMySocket()->getHost() . $url);
|
|
}
|
|
$file_name = array_pop($file_names);
|
|
$temps = explode(".", $file_name);
|
|
$file_ext = array_pop($temps);
|
|
if (!$this->isFileType_FileTrait($file_ext, $mediaType)) {
|
|
throw new \Exception("파일명 형식이 {$mediaType}가 아닙니다");
|
|
}
|
|
$content = $this->getMySocket()->getContent($url);
|
|
log_message("notice", "{$file_name} 파일이 다운로드되었습니다!");
|
|
return array($file_name, $content);
|
|
}
|
|
final protected function mediaProcess(array $urls): array
|
|
{
|
|
$file_sequence = 1;
|
|
$this->_storages = []; //CreateBoard에서 사용을 위해 DetailPage마다 초기화
|
|
// log_message("debug", var_export($urls, true));
|
|
foreach ($urls as $mediaType => $media_urls) {
|
|
foreach ($media_urls as $url) {
|
|
try {
|
|
if ($url === null) {
|
|
continue;
|
|
}
|
|
list($file_name, $content) = $this->mediaDownload($mediaType, $url);
|
|
$this->mediaSave($file_sequence, $mediaType, $file_name, $content);
|
|
$file_sequence++;
|
|
log_message("notice", __FUNCTION__ . " OriginType->{$mediaType} 작업 완료");
|
|
} catch (\Exception $e) {
|
|
log_message("warning", sprintf(
|
|
"\n---%s,OriginType->%s 오류---\n%s\n-----------------------------------------\n",
|
|
__FUNCTION__,
|
|
$mediaType,
|
|
$e->getMessage()
|
|
));
|
|
}
|
|
}
|
|
}
|
|
if (!count($this->_storages)) {
|
|
throw new \Exception("Download된 Content가 없습니다.");
|
|
}
|
|
return $this->_storages;
|
|
}
|
|
}
|