145 lines
5.1 KiB
PHP
145 lines
5.1 KiB
PHP
<?php
|
|
|
|
namespace App\Libraries;
|
|
|
|
use App\Libraries\MyWeb\MyWebLibrary;
|
|
use App\Libraries\MyStorage\MyStorageLibrary;
|
|
use App\Libraries\MyCrawler\MyCrawlerLibrary;
|
|
use Symfony\Component\DomCrawler\Crawler;
|
|
|
|
class YamapLibrary
|
|
{
|
|
private $_web = null;
|
|
private $_storage = null;
|
|
private $_crawler = null;
|
|
private $_debug = false;
|
|
|
|
private $_host = null;
|
|
public function __construct(string $host)
|
|
{
|
|
$this->_host = $host;
|
|
}
|
|
|
|
public function getMyWeb(): MyWebLibrary
|
|
{
|
|
if ($this->_web === null) {
|
|
$this->_web = new MyWebLibrary($this->getHost());
|
|
$this->_web->setDebug($this->getDebug());
|
|
}
|
|
return $this->_web;
|
|
}
|
|
public function getMyStorage(): MyStorageLibrary
|
|
{
|
|
if ($this->_storage === null) {
|
|
$this->_storage = new MyStorageLibrary();
|
|
$this->_storage->setDebug($this->getDebug());
|
|
}
|
|
return $this->_storage;
|
|
}
|
|
public function getMyCrawler(): MyCrawlerLibrary
|
|
{
|
|
if ($this->_crawler === null) {
|
|
$this->_crawler = new MyCrawlerLibrary();
|
|
$this->_crawler->setDebug($this->getDebug());
|
|
}
|
|
return $this->_crawler;
|
|
}
|
|
|
|
final public function getDebug(): bool
|
|
{
|
|
return $this->_debug;
|
|
}
|
|
final public function setDebug(bool $debug): void
|
|
{
|
|
$this->_debug = $debug;
|
|
}
|
|
final public function getHost(): string
|
|
{
|
|
return $this->_host;
|
|
}
|
|
|
|
public function getLinks(Crawler $crawler): array
|
|
{
|
|
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
|
|
$bbs_items = $crawler->filter("div.bbs_item")->first()->siblings();
|
|
$links = [];
|
|
foreach ($bbs_items as $bbs_item) {
|
|
//bbs_item에서 span.g_nickname 객체를 찾아서 "관리자"가 작성한것이 아닌것을 확인 후
|
|
$this->getMyCrawler()->createCrawler($bbs_item)->filter("span.g_nickname")->each(function (Crawler $node) use (&$links, &$bbs_item) {
|
|
if ($node->text() != "관리자") {
|
|
//다시 bbs_item에서 a.list_subject 객체를 찾아서 Links에 추가한다.
|
|
foreach ($this->getMyCrawler()->getLinks($this->getMyCrawler()->createCrawler($bbs_item), ["tag" => ".list_subject", "attr" => "href"]) as $link) {
|
|
array_push($links, $link);
|
|
}
|
|
}
|
|
});
|
|
}
|
|
return $links;
|
|
}
|
|
|
|
public function getImages(Crawler $crawler, array $options = ["tag" => "img", "attr" => "src"]): array
|
|
{
|
|
$images = $this->getMyCrawler()->getImages($crawler, $options);
|
|
foreach ($images as $image) {
|
|
$this->download($image['src']);
|
|
}
|
|
return $images;
|
|
}
|
|
|
|
public function getVideos(Crawler $crawler, array $options = ["tag" => "video", "attr" => "src"]): array
|
|
{
|
|
$videos = $this->getMyCrawler()->getVideos($crawler, $options);
|
|
foreach ($videos as $video) {
|
|
$this->download($video['src']);
|
|
}
|
|
return $videos;
|
|
}
|
|
|
|
// 파일 다운로드 메서드
|
|
final public function download($url): bool
|
|
{
|
|
try {
|
|
$fileNames = explode('/', $url);
|
|
if (!is_array($fileNames) || !count($fileNames)) {
|
|
throw new \Exception("Download URL Error:" . $url);
|
|
}
|
|
if (!is_dir($this->getMyStorage()->getPath())) {
|
|
if (!mkdir($this->getMyStorage()->getPath())) {
|
|
throw new \Exception("Make Directory Error:" . $this->getMyStorage()->getPath());
|
|
}
|
|
}
|
|
$fullPath = $this->getMyStorage()->getPath() . DIRECTORY_SEPARATOR . array_pop($fileNames);
|
|
log_message("debug", "FullPath-> " . $fullPath);
|
|
if (!$this->getDebug()) {
|
|
$response = $this->getMyWeb()->getClient()->get($this->getMyWeb()->gethost() . $url, [
|
|
'cookies' => $this->getMyWeb()->getCookieJar(),
|
|
'sink' => $fullPath,
|
|
]);
|
|
if ($response->getStatusCode() == 200) {
|
|
log_message("info", "파일이 성공적으로 다운로드되었습니다!");
|
|
return true;
|
|
} else {
|
|
log_message("info", "파일 다운로드 실패: " . $response->getStatusCode());
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
} catch (\Exception $e) {
|
|
log_message("error", "파일 다운로드 중 오류 발생: " . $e->getMessage());
|
|
return false;
|
|
}
|
|
}
|
|
|
|
public function getCrawlerByMainPage(string $url): Crawler
|
|
{
|
|
$html = $this->getMyWeb()->getContent($url);;
|
|
return $this->getMyCrawler()->createCrawler($html)->filter("div.bbs_list")->first();
|
|
}
|
|
public function getCrawlerByDetailPage(string $url): Crawler
|
|
{
|
|
log_message("debug", "Target-> " . $url);
|
|
$html = $this->getMyWeb()->getContent($url);;
|
|
return $this->getMyCrawler()->createCrawler($html)->filter("div.contents")->first();
|
|
}
|
|
}
|