Automation init...3
This commit is contained in:
parent
8754f35058
commit
7a0e6405c5
@ -2,24 +2,28 @@
|
||||
|
||||
namespace App\Controllers\Mangboard;
|
||||
|
||||
use App\Libraries\MyCrawler\YamoonCrawler;
|
||||
use App\Libraries\MyCrawler\YamapCrawler;
|
||||
use App\Libraries\Mangboard\UserLibrary;
|
||||
use App\Controllers\CommonController;
|
||||
use App\Libraries\Mangboard\UserLibrary;
|
||||
use App\Entities\Mangboard\UserEntity;
|
||||
use App\Libraries\MyCrawler\YamapCrawler;
|
||||
use App\Libraries\MyCrawler\YamoonCrawler;
|
||||
|
||||
class CrawlerController extends CommonController
|
||||
{
|
||||
private function login_process(string $id, string $password): UserEntity
|
||||
{
|
||||
$user_library = new UserLibrary();
|
||||
return $user_library->login(getenv("mangboard.host.url"), $id, $password);
|
||||
}
|
||||
public function yamap(string $category, string $id = "", string $debug = "false"): string
|
||||
{
|
||||
try {
|
||||
$id = $id == "" ? getenv("mangboard.login.default.id") : $id;
|
||||
$password = getenv("mangboard.login.default.password");
|
||||
//1. 사이트 로그인 처리
|
||||
$user_library = new UserLibrary();
|
||||
$user_entity = $user_library->login(getenv("mangboard.host.url"), $id, $password);
|
||||
$user_entity = $this->login_process($id, $password);
|
||||
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
|
||||
$crawler = new YamapCrawler($category);
|
||||
$crawler->setUserEntity($user_entity);
|
||||
$crawler = new YamapCrawler(getenv('yamap.host.url'), $category, $user_entity);
|
||||
$crawler->setDebug($debug === "true" ? true : false);
|
||||
$crawler->execute();
|
||||
return "완료되었습니다.";
|
||||
@ -34,11 +38,9 @@ class CrawlerController extends CommonController
|
||||
$id = $id == "" ? getenv("mangboard.login.default.id") : $id;
|
||||
$password = getenv("mangboard.login.default.password");
|
||||
//1. 사이트 로그인 처리
|
||||
$user_library = new UserLibrary();
|
||||
$user_entity = $user_library->login(getenv("mangboard.host.url"), $id, $password);
|
||||
$user_entity = $this->login_process($id, $password);
|
||||
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
|
||||
$crawler = new YamoonCrawler($category);
|
||||
$crawler->setUserEntity($user_entity);
|
||||
$crawler = new YamoonCrawler(getenv("yamoon.host.url"), $category, $user_entity);
|
||||
$crawler->setDebug($debug === "true" ? true : false);
|
||||
$crawler->execute();
|
||||
return "완료되었습니다.";
|
||||
|
||||
@ -6,177 +6,28 @@ namespace App\Libraries\MyCrawler;
|
||||
use App\Libraries\MyCrawlerLibrary;
|
||||
use App\Libraries\MySocket\WebSocket;
|
||||
use App\Libraries\MyStorage\MangboardStorage;
|
||||
use App\Libraries\Mangboard\BoardsLibrary;
|
||||
use App\Libraries\Mangboard\BoardLibrary;
|
||||
use App\Libraries\Mangboard\FileLibrary;
|
||||
use App\Libraries\Mangboard\ImageLibrary;
|
||||
use App\Entities\Mangboard\UserEntity;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
use App\Traits\FileTrait;
|
||||
|
||||
class YamapCrawler extends MyCrawlerLibrary
|
||||
{
|
||||
use FileTrait;
|
||||
private $_mySocket = null;
|
||||
private $_myStorage = null;
|
||||
private $_storages = [];
|
||||
private $_category = "";
|
||||
private $_user_entity = null;
|
||||
private $_boards_library = null;
|
||||
private $_board_library = null;
|
||||
private $_file_library = null;
|
||||
private $_image_library = null;
|
||||
public function __construct(string $category)
|
||||
private $_myStorage = null;
|
||||
public function __construct(string $host, string $category, UserEntity $user_entity)
|
||||
{
|
||||
parent::__construct();
|
||||
parent::__construct(new WebSocket($host));
|
||||
$this->_category = $category;
|
||||
$this->_user_entity = $user_entity;
|
||||
}
|
||||
public function getMySocket()
|
||||
{
|
||||
if ($this->_mySocket === null) {
|
||||
$this->_mySocket = new WebSocket(getenv('yamap.host.url'));
|
||||
}
|
||||
return $this->_mySocket;
|
||||
}
|
||||
public function getMyStorage()
|
||||
final protected function getMyStorage()
|
||||
{
|
||||
if ($this->_myStorage === null) {
|
||||
$this->_myStorage = new MangboardStorage($this->getCategory());
|
||||
$this->_myStorage = new MangboardStorage($this->_category, $this->_user_entity);
|
||||
}
|
||||
return $this->_myStorage;
|
||||
}
|
||||
public function getBoardsLibrary(): BoardsLibrary
|
||||
{
|
||||
// $test = $this->getBoard();
|
||||
// echo "TEST:{$test}\n";
|
||||
// $temp = getenv("mangboard.storage.{$this->getBoard()}.name");
|
||||
// echo "Temp:{$temp}\n";
|
||||
// exit;
|
||||
if ($this->_boards_library === null) {
|
||||
$this->_boards_library = new BoardsLibrary(
|
||||
$this->getCategory(),
|
||||
$this->getUserEntity()
|
||||
);
|
||||
}
|
||||
return $this->_boards_library;
|
||||
}
|
||||
public function getBoardLibrary(): BoardLibrary
|
||||
{
|
||||
if ($this->_board_library === null) {
|
||||
$this->_board_library = new BoardLibrary(
|
||||
$this->getBoardsLibrary()->getEntity(),
|
||||
$this->getUserEntity()
|
||||
);
|
||||
}
|
||||
return $this->_board_library;
|
||||
}
|
||||
public function getFileLibrary(): FileLibrary
|
||||
{
|
||||
if ($this->_file_library === null) {
|
||||
$this->_file_library = new FileLibrary(
|
||||
$this->getBoardsLibrary()->getEntity(),
|
||||
$this->getUserEntity()
|
||||
);
|
||||
}
|
||||
return $this->_file_library;
|
||||
}
|
||||
public function getImageLibrary(): ImageLibrary
|
||||
{
|
||||
if ($this->_image_library === null) {
|
||||
$this->_image_library = new ImageLibrary();
|
||||
}
|
||||
return $this->_image_library;
|
||||
}
|
||||
public function getUserEntity(): UserEntity
|
||||
{
|
||||
if ($this->_user_entity === null) {
|
||||
throw new \Exception("사용자정보가 없습니다.");
|
||||
}
|
||||
return $this->_user_entity;
|
||||
}
|
||||
public function setUserEntity(UserEntity $user_entity): void
|
||||
{
|
||||
$this->_user_entity = $user_entity;
|
||||
}
|
||||
public function getCategory(): string
|
||||
{
|
||||
if ($this->_category == "") {
|
||||
throw new \Exception("저장할 Category가 정의되지 않았습니다.");
|
||||
}
|
||||
return $this->_category;
|
||||
}
|
||||
private function save(int $file_sequence, string $mediaType, string $file_name, string $content): void
|
||||
{
|
||||
log_message("debug", __FUNCTION__ . " 원본파일 {$file_name} 작업 시작");
|
||||
$this->getMyStorage()->setOriginName($file_name);
|
||||
$this->getMyStorage()->setOriginContent($content);
|
||||
$this->getMyStorage()->setOriginType($mediaType);
|
||||
$this->getMyStorage()->setOriginSequence($file_sequence);
|
||||
$this->_storages[] = $this->getMyStorage()->save();
|
||||
}
|
||||
//Yamap ViewPage의 이미지나영상데이터가 있으면 Dodownload 한다.
|
||||
private function download(string $mediaType, string $url): array
|
||||
{
|
||||
$file_names = explode('/', $url);
|
||||
if (!is_array($file_names) || !count($file_names)) {
|
||||
throw new \Exception("URL이 파일명 형식이 아닙니다 : " . $this->getMySocket()->getHost() . $url);
|
||||
}
|
||||
$file_name = array_pop($file_names);
|
||||
$temps = explode(".", $file_name);
|
||||
$file_ext = array_pop($temps);
|
||||
if (!$this->isFileType_FileTrait($file_ext, $mediaType)) {
|
||||
throw new \Exception("파일명 형식이 {$mediaType}가 아닙니다");
|
||||
}
|
||||
$content = $this->getMySocket()->getContent($url);
|
||||
log_message("notice", "{$file_name} 파일이 다운로드되었습니다!");
|
||||
return array($file_name, $content);
|
||||
}
|
||||
private function mediaContent(array $urls): void
|
||||
{
|
||||
$file_sequence = 1;
|
||||
$this->_storages = []; //CreateBoard에서 사용을 위해 DetailPage마다 초기화
|
||||
// log_message("debug", var_export($urls, true));
|
||||
foreach ($urls as $mediaType => $media_urls) {
|
||||
foreach ($media_urls as $url) {
|
||||
try {
|
||||
if ($url === null) {
|
||||
continue;
|
||||
}
|
||||
list($file_name, $content) = $this->download($mediaType, $url);
|
||||
$this->save($file_sequence, $mediaType, $file_name, $content);
|
||||
$file_sequence++;
|
||||
log_message("notice", __FUNCTION__ . " OriginType->{$mediaType} 작업 완료");
|
||||
} catch (\Exception $e) {
|
||||
log_message("warning", sprintf(
|
||||
"\n---%s,OriginType->%s 오류---\n%s\n-----------------------------------------\n",
|
||||
__FUNCTION__,
|
||||
$mediaType,
|
||||
$e->getMessage()
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!count($this->_storages)) {
|
||||
throw new \Exception("Download된 Content가 없습니다.");
|
||||
}
|
||||
}
|
||||
//Yamap ViewPage의 이미지나영상데이터가 있으면 URL과MediaType을 가져온다
|
||||
private function getUrlsByDetailPageMediaType(string $mediaType, Crawler $selector, array $options, array $urls = []): array
|
||||
{
|
||||
$urls[$mediaType] = [];
|
||||
$selector->filter($options["tag"])->each(
|
||||
function (Crawler $node) use (&$mediaType, &$options, &$urls): void {
|
||||
$url = $node->attr($options["attr"]);
|
||||
log_message("debug", "getUrlsByDetailPageMediaType-> {$mediaType}[{$options["attr"]}]:{$url}");
|
||||
if (!is_null($url)) {
|
||||
$urls[$mediaType][] = $url;
|
||||
}
|
||||
}
|
||||
);
|
||||
return $urls;
|
||||
}
|
||||
private function detailPage(array $listInfo): array
|
||||
{
|
||||
//작성내용
|
||||
// <div class="panel panel-default">
|
||||
// <div class="text-center panel-heading-local-title text-bold">요즘 패션</div>
|
||||
// <div style="margin:5px 10px;">
|
||||
@ -206,20 +57,25 @@ class YamapCrawler extends MyCrawlerLibrary
|
||||
// </div>
|
||||
// <div id="freesubframe"></div>
|
||||
// </div>
|
||||
private function detailPage(array $listInfo): array
|
||||
{
|
||||
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
|
||||
$tag = getenv("yamap.view.content.tag");
|
||||
$selector = $this->getSelector($response, $tag);
|
||||
log_message("debug", "\n-----------detailPage Tag: {$tag}---------------\n{$selector->html()}\n---------------------------\n");
|
||||
$urls = $this->getUrlsByDetailPageMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
|
||||
$urls = $this->getUrlsByDetailPageMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $urls);
|
||||
log_message("debug", "\n-------------------------\n" . var_export($urls, true) . "\n-----------------------\n");
|
||||
log_message("notice", "-----------" . __FUNCTION__ . " 작업완료--------");
|
||||
return array($listInfo, $urls);
|
||||
return $this->getMediaUrls($response, $tag, $listInfo);
|
||||
}
|
||||
private function mainPage(string $url): array
|
||||
private function listPage(): array
|
||||
{
|
||||
if ($this->getDebug()) {
|
||||
return [
|
||||
'title' => getenv("yamap.view.test.title"),
|
||||
'nickname' => getenv("yamap.view.test.nickname"),
|
||||
'detail_url' => getenv("yamap.view.test.url"),
|
||||
'time' => date("Y-m-d H:i:s"),
|
||||
'hit' => 1,
|
||||
];
|
||||
}
|
||||
$listInfos = [];
|
||||
$response = $this->getMySocket()->getContent($url);
|
||||
$response = $this->getMySocket()->getContent(getenv("yamap.list.url.{$this->_category}"));
|
||||
$selector = $this->getSelector($response, getenv("yamap.list.tag"));
|
||||
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
|
||||
// log_message("debug", sprintf("\n-------------MainPage------------\n%s\n--------------------------\n", $selector->html()));
|
||||
@ -246,17 +102,7 @@ class YamapCrawler extends MyCrawlerLibrary
|
||||
}
|
||||
public function execute(): void
|
||||
{
|
||||
if ($this->getDebug()) {
|
||||
$listInfos = [
|
||||
'title' => getenv("yamap.view.test.title"),
|
||||
'nickname' => getenv("yamap.view.test.nickname"),
|
||||
'detail_url' => getenv("yamap.view.test.url"),
|
||||
'time' => date("Y-m-d H:i:s"),
|
||||
'hit' => 1,
|
||||
];
|
||||
} else {
|
||||
$listInfos = $this->mainPage(getenv("yamap.list.url." . $this->getCategory()));
|
||||
}
|
||||
$listInfos = $this->listPage();
|
||||
//Limit가 0이면 $listInfos 갯수만큼 다하고, LIMIT 갯수 혹은 item의 갯수중 작은수만큼 한다.
|
||||
$max_limit = intval(getenv("yamap.list.max_limit"));
|
||||
if ($max_limit) {
|
||||
@ -271,11 +117,11 @@ class YamapCrawler extends MyCrawlerLibrary
|
||||
try {
|
||||
log_message("notice", "게시물 {$i}번째/{$total}개중 {$listInfo["nickname"]} 작업시작");
|
||||
list($listInfo, $urls) = $this->detailPage($listInfo);
|
||||
$this->mediaContent($urls);
|
||||
$this->mediaProcess($urls);
|
||||
//File DB 및 Board DB 등록작업
|
||||
$board_entity = $this->getBoardLibrary()->createByCrawler($i, $listInfo, $this->_storages);
|
||||
$this->getFileLibrary()->createByCrawler($board_entity, $this->_storages);
|
||||
$this->getImageLibrary()->createByCrawler($board_entity, $this->_storages);
|
||||
$board_entity = $this->getMyStorage()->getBoardLibrary()->createByCrawler($i, $listInfo, $this->_storages);
|
||||
$this->getMyStorage()->getFileLibrary()->createByCrawler($board_entity, $this->_storages);
|
||||
$this->getMyStorage()->getImageLibrary()->createByCrawler($board_entity, $this->_storages);
|
||||
log_message("notice", "게시물 {$i}번째/{$total}개중 {$listInfo["nickname"]} 작업완료.");
|
||||
$i++;
|
||||
} catch (\Exception $e) {
|
||||
|
||||
@ -2,199 +2,54 @@
|
||||
|
||||
namespace App\Libraries\MyCrawler;
|
||||
|
||||
|
||||
use App\Entities\Mangboard\UserEntity;
|
||||
use App\Libraries\MyCrawlerLibrary;
|
||||
use App\Libraries\MySocket\WebSocket;
|
||||
use App\Libraries\MyStorage\MangboardStorage;
|
||||
use App\Libraries\Mangboard\BoardsLibrary;
|
||||
use App\Libraries\Mangboard\BoardLibrary;
|
||||
use App\Libraries\Mangboard\FileLibrary;
|
||||
use App\Libraries\Mangboard\ImageLibrary;
|
||||
use App\Entities\Mangboard\UserEntity;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
use App\Traits\FileTrait;
|
||||
|
||||
class YamoonCrawler extends MyCrawlerLibrary
|
||||
{
|
||||
use FileTrait;
|
||||
private $_mySocket = null;
|
||||
private $_myStorage = null;
|
||||
private $_storages = [];
|
||||
private $_category = "";
|
||||
private $_user_entity = null;
|
||||
private $_boards_library = null;
|
||||
private $_board_library = null;
|
||||
private $_file_library = null;
|
||||
private $_image_library = null;
|
||||
public function __construct(string $category)
|
||||
private $_myStorage = null;
|
||||
public function __construct(string $host, string $category, UserEntity $user_entity)
|
||||
{
|
||||
parent::__construct();
|
||||
parent::__construct(new WebSocket($host));
|
||||
$this->_category = $category;
|
||||
$this->_user_entity = $user_entity;
|
||||
}
|
||||
public function getMySocket()
|
||||
{
|
||||
if ($this->_mySocket === null) {
|
||||
$this->_mySocket = new WebSocket(getenv('yamoon.host.url'));
|
||||
}
|
||||
return $this->_mySocket;
|
||||
}
|
||||
public function getMyStorage()
|
||||
final protected function getMyStorage()
|
||||
{
|
||||
if ($this->_myStorage === null) {
|
||||
$this->_myStorage = new MangboardStorage($this->getCategory());
|
||||
$this->_myStorage = new MangboardStorage($this->_category, $this->_user_entity);
|
||||
}
|
||||
return $this->_myStorage;
|
||||
}
|
||||
public function getBoardsLibrary(): BoardsLibrary
|
||||
{
|
||||
// $test = $this->getBoard();
|
||||
// echo "TEST:{$test}\n";
|
||||
// $temp = getenv("mangboard.storage.{$this->getBoard()}.name");
|
||||
// echo "Temp:{$temp}\n";
|
||||
// exit;
|
||||
if ($this->_boards_library === null) {
|
||||
$this->_boards_library = new BoardsLibrary(
|
||||
$this->getCategory(),
|
||||
$this->getUserEntity()
|
||||
);
|
||||
}
|
||||
return $this->_boards_library;
|
||||
}
|
||||
public function getBoardLibrary(): BoardLibrary
|
||||
{
|
||||
if ($this->_board_library === null) {
|
||||
$this->_board_library = new BoardLibrary(
|
||||
$this->getBoardsLibrary()->getEntity(),
|
||||
$this->getUserEntity()
|
||||
);
|
||||
}
|
||||
return $this->_board_library;
|
||||
}
|
||||
public function getFileLibrary(): FileLibrary
|
||||
{
|
||||
if ($this->_file_library === null) {
|
||||
$this->_file_library = new FileLibrary(
|
||||
$this->getBoardsLibrary()->getEntity(),
|
||||
$this->getUserEntity()
|
||||
);
|
||||
}
|
||||
return $this->_file_library;
|
||||
}
|
||||
public function getImageLibrary(): ImageLibrary
|
||||
{
|
||||
if ($this->_image_library === null) {
|
||||
$this->_image_library = new ImageLibrary();
|
||||
}
|
||||
return $this->_image_library;
|
||||
}
|
||||
public function getUserEntity(): UserEntity
|
||||
{
|
||||
if ($this->_user_entity === null) {
|
||||
throw new \Exception("사용자정보가 없습니다.");
|
||||
}
|
||||
return $this->_user_entity;
|
||||
}
|
||||
public function setUserEntity(UserEntity $user_entity): void
|
||||
{
|
||||
$this->_user_entity = $user_entity;
|
||||
}
|
||||
public function getCategory(): string
|
||||
{
|
||||
if ($this->_category == "") {
|
||||
throw new \Exception("저장할 Category가 정의되지 않았습니다.");
|
||||
}
|
||||
return $this->_category;
|
||||
}
|
||||
private function save(int $file_sequence, string $mediaType, string $file_name, string $content): void
|
||||
{
|
||||
log_message("debug", __FUNCTION__ . " 원본파일 {$file_name} 작업 시작");
|
||||
$this->getMyStorage()->setOriginName($file_name);
|
||||
$this->getMyStorage()->setOriginContent($content);
|
||||
$this->getMyStorage()->setOriginType($mediaType);
|
||||
$this->getMyStorage()->setOriginSequence($file_sequence);
|
||||
$this->_storages[] = $this->getMyStorage()->save();
|
||||
}
|
||||
//Yamap ViewPage의 이미지나영상데이터가 있으면 Dodownload 한다.
|
||||
private function download(string $mediaType, string $url): array
|
||||
{
|
||||
$file_names = explode('/', $url);
|
||||
if (!is_array($file_names) || !count($file_names)) {
|
||||
throw new \Exception("URL이 파일명 형식이 아닙니다 : " . $this->getMySocket()->getHost() . $url);
|
||||
}
|
||||
$file_name = array_pop($file_names);
|
||||
$temps = explode(".", $file_name);
|
||||
$file_ext = array_pop($temps);
|
||||
if (!$this->isFileType_FileTrait($file_ext, $mediaType)) {
|
||||
throw new \Exception("파일명 형식이 {$mediaType}가 아닙니다");
|
||||
}
|
||||
$content = $this->getMySocket()->getContent($url);
|
||||
log_message("notice", "{$file_name} 파일이 다운로드되었습니다!");
|
||||
return array($file_name, $content);
|
||||
}
|
||||
private function mediaContent(array $urls): void
|
||||
{
|
||||
$file_sequence = 1;
|
||||
$this->_storages = []; //CreateBoard에서 사용을 위해 DetailPage마다 초기화
|
||||
// log_message("debug", var_export($urls, true));
|
||||
foreach ($urls as $mediaType => $media_urls) {
|
||||
foreach ($media_urls as $url) {
|
||||
try {
|
||||
list($file_name, $content) = $this->download($mediaType, $url);
|
||||
$this->save($file_sequence, $mediaType, $file_name, $content);
|
||||
$file_sequence++;
|
||||
log_message("notice", __FUNCTION__ . " OriginType->{$mediaType} 작업 완료");
|
||||
} catch (\Exception $e) {
|
||||
log_message("warning", sprintf(
|
||||
"\n---%s,OriginType->%s 오류---\n%s\n-----------------------------------------\n",
|
||||
__FUNCTION__,
|
||||
$mediaType,
|
||||
$e->getMessage()
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!count($this->_storages)) {
|
||||
throw new \Exception("Download된 Content가 없습니다.");
|
||||
}
|
||||
}
|
||||
//Yamap ViewPage의 이미지나영상데이터가 있으면 URL과MediaType을 가져온다
|
||||
private function getUrlsByDetailPageMediaType(string $mediaType, Crawler $selector, array $options, array $urls = []): array
|
||||
{
|
||||
$urls[$mediaType] = [];
|
||||
$selector->filter($options["tag"])->each(
|
||||
function (Crawler $node) use (&$mediaType, &$options, &$urls): void {
|
||||
$url = $node->attr($options["attr"]);
|
||||
log_message("debug", "getUrlsByDetailPageMediaType-> {$mediaType}[{$options["attr"]}]:{$url}");
|
||||
if (!is_null($url)) {
|
||||
$urls[$mediaType][] = $url;
|
||||
}
|
||||
}
|
||||
);
|
||||
return $urls;
|
||||
}
|
||||
private function detailPage(array $listInfo): array
|
||||
{
|
||||
// log_message("debug", var_export($listInfo, true));
|
||||
$url = "/newboard/yamoonboard/" . $listInfo['detail_url'];
|
||||
$response = $this->getMySocket()->getContent($url);
|
||||
// log_message("debug", "\n--------------------------\n{$response}\n---------------------------\n");
|
||||
$response = $this->getMySocket()->getContent("/newboard/yamoonboard/" . $listInfo['detail_url']);
|
||||
//작성시간
|
||||
// $selector = $this->getSelector($response, getenv("yamoon.view.regdate.tag"));
|
||||
// $listInfo['date'] = trim($selector->text());
|
||||
//작성내용
|
||||
$tag = getenv("yamoon.view.content.tag");
|
||||
$selector = $this->getSelector($response, $tag);
|
||||
log_message("debug", "\n-----------detailPage Tag: {$tag}---------------\n{$selector->html()}\n---------------------------\n");
|
||||
$urls = $this->getUrlsByDetailPageMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
|
||||
$urls = $this->getUrlsByDetailPageMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $urls);
|
||||
log_message("debug", "\n-------------------------\n" . var_export($urls, true) . "\n-----------------------\n");
|
||||
log_message("notice", "-----------" . __FUNCTION__ . " 작업완료--------");
|
||||
return array($listInfo, $urls);
|
||||
return $this->getMediaUrls($response, $tag, $listInfo);
|
||||
}
|
||||
private function mainPage(string $url): array
|
||||
private function listPage(): array
|
||||
{
|
||||
if ($this->getDebug()) {
|
||||
$listInfos = [
|
||||
'title' => getenv("yamoon.view.test.title"),
|
||||
'nickname' => getenv("yamoon.view.test.nickname"),
|
||||
'detail_url' => getenv("yamoon.view.test.url"),
|
||||
'time' => date("Y-m-d H:i:s"),
|
||||
'hit' => 1,
|
||||
];
|
||||
} else {
|
||||
}
|
||||
$listInfos = [];
|
||||
$response = $this->getMySocket()->getContent($url);
|
||||
$response = $this->getMySocket()->getContent(getenv("yamoon.list.url.{$this->_category}"));
|
||||
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
|
||||
// log_message("debug", sprintf("\n-------------MainPage------------\n%s\n--------------------------\n", $selector->html()));
|
||||
// <td class="listvisited mobile-td subject-view">
|
||||
@ -204,7 +59,7 @@ class YamoonCrawler extends MyCrawlerLibrary
|
||||
// <span class="visible-xs visible-sm small"><i class="fa fa-user-o" aria-hidden="true"></i> yeeyuu | <i class="fa fa-thumbs-o-up" aria-hidden="true"></i> 6 | <i class="fa fa-eye" aria-hidden="true"></i> 369 | No 89372 | 2024-09-13</span>
|
||||
// </td>
|
||||
//bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool
|
||||
$selector = $this->getSelector($response, getenv("yamoon.list.tag"))->each(
|
||||
$this->getSelector($response, getenv("yamoon.list.tag"))->each(
|
||||
function (Crawler $node) use (&$listInfos): void {
|
||||
$link_node = $node->filter(getenv("yamoon.list.item.link.tag"));
|
||||
$detail_url = $link_node->attr("href");
|
||||
@ -224,19 +79,9 @@ class YamoonCrawler extends MyCrawlerLibrary
|
||||
}
|
||||
public function execute(): void
|
||||
{
|
||||
if ($this->getDebug()) {
|
||||
$listInfos = [
|
||||
'title' => getenv("yamoon.view.test.title"),
|
||||
'nickname' => getenv("yamoon.view.test.nickname"),
|
||||
'detail_url' => getenv("yamoon.view.test.url"),
|
||||
'time' => date("Y-m-d H:i:s"),
|
||||
'hit' => 1,
|
||||
];
|
||||
} else {
|
||||
$listInfos = $this->mainPage(getenv("yamoon.list.url." . $this->getCategory()));
|
||||
}
|
||||
$listInfos = $this->listPage();
|
||||
//Limit가 0이면 $listInfos 갯수만큼 다하고, LIMIT 갯수 혹은 item의 갯수중 작은수만큼 한다.
|
||||
$max_limit = intval(getenv("yamoon.list.max_limit"));
|
||||
$max_limit = intval(getenv("yamap.list.max_limit"));
|
||||
if ($max_limit) {
|
||||
$max_limit = count($listInfos) <= $max_limit ? count($listInfos) : $max_limit;
|
||||
} else {
|
||||
@ -249,11 +94,11 @@ class YamoonCrawler extends MyCrawlerLibrary
|
||||
try {
|
||||
log_message("notice", "게시물 {$i}번째/{$total}개중 {$listInfo["nickname"]} 작업시작");
|
||||
list($listInfo, $urls) = $this->detailPage($listInfo);
|
||||
$this->mediaContent($urls);
|
||||
$this->mediaProcess($urls);
|
||||
//File DB 및 Board DB 등록작업
|
||||
$board_entity = $this->getBoardLibrary()->createByCrawler($i, $listInfo, $this->_storages);
|
||||
$this->getFileLibrary()->createByCrawler($board_entity, $this->_storages);
|
||||
$this->getImageLibrary()->createByCrawler($board_entity, $this->_storages);
|
||||
$board_entity = $this->getMyStorage()->getBoardLibrary()->createByCrawler($i, $listInfo, $this->_storages);
|
||||
$this->getMyStorage()->getFileLibrary()->createByCrawler($board_entity, $this->_storages);
|
||||
$this->getMyStorage()->getImageLibrary()->createByCrawler($board_entity, $this->_storages);
|
||||
log_message("notice", "게시물 {$i}번째/{$total}개중 {$listInfo["nickname"]} 작업완료.");
|
||||
$i++;
|
||||
} catch (\Exception $e) {
|
||||
|
||||
@ -4,16 +4,27 @@ namespace App\Libraries;
|
||||
|
||||
use App\Libraries\CommonLibrary;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
use App\Traits\FileTrait;
|
||||
|
||||
abstract class MyCrawlerLibrary extends CommonLibrary
|
||||
{
|
||||
protected function __construct()
|
||||
use FileTrait;
|
||||
private $_mySocket = null;
|
||||
protected $_storages = [];
|
||||
protected function __construct($mySocket)
|
||||
{
|
||||
parent::__construct();
|
||||
$this->_mySocket = $mySocket;
|
||||
}
|
||||
abstract public function getMySocket();
|
||||
abstract public function getMyStorage();
|
||||
abstract protected function getMyStorage();
|
||||
abstract public function execute(): void;
|
||||
final protected function getMySocket()
|
||||
{
|
||||
if ($this->_mySocket === null) {
|
||||
throw new \Exception("Socket이 지정되지 않았습니다.");
|
||||
}
|
||||
return $this->_mySocket;
|
||||
}
|
||||
final protected function getSelector(string $content, string $tag): Crawler
|
||||
{
|
||||
$crawler = new Crawler($content);
|
||||
@ -28,4 +39,89 @@ abstract class MyCrawlerLibrary extends CommonLibrary
|
||||
}
|
||||
return $crawler->filter($tag);
|
||||
}
|
||||
|
||||
//--------미디어 URL관련------
|
||||
private function getMediaUrlsByMediaType(string $mediaType, Crawler $selector, array $options, array $urls = []): array
|
||||
{
|
||||
$urls[$mediaType] = [];
|
||||
$selector->filter($options["tag"])->each(
|
||||
function (Crawler $node) use (&$mediaType, &$options, &$urls): void {
|
||||
$url = $node->attr($options["attr"]);
|
||||
log_message("debug", __FUNCTION__ . "-> {$mediaType}[{$options["attr"]}]:{$url}");
|
||||
if (!is_null($url)) {
|
||||
$urls[$mediaType][] = $url;
|
||||
}
|
||||
}
|
||||
);
|
||||
return $urls;
|
||||
}
|
||||
//detailPage의 이미지나영상데이터가 있으면 URL과MediaType을 가져온다
|
||||
final protected function getMediaUrls(string $response, string $tag, array $listInfo): array
|
||||
{
|
||||
$selector = $this->getSelector($response, $tag);
|
||||
log_message("debug", "\n-----------detailPage Tag: {$tag}---------------\n{$selector->html()}\n---------------------------\n");
|
||||
$urls = $this->getMediaUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
|
||||
$urls = $this->getMediaUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $urls);
|
||||
log_message("debug", "\n-------------------------\n" . var_export($urls, true) . "\n-----------------------\n");
|
||||
log_message("notice", "-----------" . __FUNCTION__ . " 작업완료--------");
|
||||
return array($listInfo, $urls);
|
||||
}
|
||||
|
||||
//--------미디어 관련-------
|
||||
private function mediaSave(int $file_sequence, string $mediaType, string $file_name, string $content): void
|
||||
{
|
||||
log_message("debug", __FUNCTION__ . " 원본파일 {$file_name} 작업 시작");
|
||||
$this->getMyStorage()->setOriginName($file_name);
|
||||
$this->getMyStorage()->setOriginContent($content);
|
||||
$this->getMyStorage()->setOriginType($mediaType);
|
||||
$this->getMyStorage()->setOriginSequence($file_sequence);
|
||||
$this->_storages[] = $this->getMyStorage()->save();
|
||||
}
|
||||
//Yamap ViewPage의 이미지나영상데이터가 있으면 Dodownload 한다.
|
||||
private function mediaDownload(string $mediaType, string $url): array
|
||||
{
|
||||
$file_names = explode('/', $url);
|
||||
if (!is_array($file_names) || !count($file_names)) {
|
||||
throw new \Exception("URL이 파일명 형식이 아닙니다 : " . $this->getMySocket()->getHost() . $url);
|
||||
}
|
||||
$file_name = array_pop($file_names);
|
||||
$temps = explode(".", $file_name);
|
||||
$file_ext = array_pop($temps);
|
||||
if (!$this->isFileType_FileTrait($file_ext, $mediaType)) {
|
||||
throw new \Exception("파일명 형식이 {$mediaType}가 아닙니다");
|
||||
}
|
||||
$content = $this->getMySocket()->getContent($url);
|
||||
log_message("notice", "{$file_name} 파일이 다운로드되었습니다!");
|
||||
return array($file_name, $content);
|
||||
}
|
||||
final protected function mediaProcess(array $urls): array
|
||||
{
|
||||
$file_sequence = 1;
|
||||
$this->_storages = []; //CreateBoard에서 사용을 위해 DetailPage마다 초기화
|
||||
// log_message("debug", var_export($urls, true));
|
||||
foreach ($urls as $mediaType => $media_urls) {
|
||||
foreach ($media_urls as $url) {
|
||||
try {
|
||||
if ($url === null) {
|
||||
continue;
|
||||
}
|
||||
list($file_name, $content) = $this->mediaDownload($mediaType, $url);
|
||||
$this->mediaSave($file_sequence, $mediaType, $file_name, $content);
|
||||
$file_sequence++;
|
||||
log_message("notice", __FUNCTION__ . " OriginType->{$mediaType} 작업 완료");
|
||||
} catch (\Exception $e) {
|
||||
log_message("warning", sprintf(
|
||||
"\n---%s,OriginType->%s 오류---\n%s\n-----------------------------------------\n",
|
||||
__FUNCTION__,
|
||||
$mediaType,
|
||||
$e->getMessage()
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!count($this->_storages)) {
|
||||
throw new \Exception("Download된 Content가 없습니다.");
|
||||
}
|
||||
return $this->_storages;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2,34 +2,52 @@
|
||||
|
||||
namespace App\Libraries\MyStorage;
|
||||
|
||||
use App\Models\Mangboard\FileModel;
|
||||
use App\Entities\Mangboard\UserEntity;
|
||||
use App\Libraries\Mangboard\BoardLibrary;
|
||||
use App\Libraries\Mangboard\BoardsLibrary;
|
||||
use App\Libraries\Mangboard\FileLibrary;
|
||||
use App\Libraries\Mangboard\ImageLibrary;
|
||||
|
||||
class MangboardStorage extends FileStorage
|
||||
{
|
||||
private $_model = null;
|
||||
public function __construct(string $path)
|
||||
private $_boards_library = null;
|
||||
private $_board_library = null;
|
||||
private $_file_library = null;
|
||||
private $_image_library = null;
|
||||
private $_category = "";
|
||||
private $_user_entity = null;
|
||||
public function __construct(string $category, UserEntity $user_entity)
|
||||
{
|
||||
parent::__construct($path);
|
||||
parent::__construct($category);
|
||||
$this->_category = $category;
|
||||
$this->_user_entity = $user_entity;
|
||||
}
|
||||
public function getBasePath(): string
|
||||
private function getCategory(): string
|
||||
{
|
||||
if ($this->_category == "") {
|
||||
throw new \Exception("저장할 Category가 정의되지 않았습니다.");
|
||||
}
|
||||
return $this->_category;
|
||||
}
|
||||
private function getUserEntity(): UserEntity
|
||||
{
|
||||
if ($this->_user_entity === null) {
|
||||
throw new \Exception("사용자정보가 없습니다.");
|
||||
}
|
||||
return $this->_user_entity;
|
||||
}
|
||||
final public function getBasePath(): string
|
||||
{
|
||||
return getenv("mangboard.uploads.path");
|
||||
}
|
||||
public function getUploadPath(): string
|
||||
final public function getUploadPath(): string
|
||||
{
|
||||
return parent::getUploadPath() . DIRECTORY_SEPARATOR . $this->getBasePath();
|
||||
}
|
||||
public function getUploadURL(): string
|
||||
final public function getUploadURL(): string
|
||||
{
|
||||
return sprintf("/wp-content/%s/%s/%s", parent::getUploadURL(), $this->getBasePath(), $this->getBasePath());
|
||||
}
|
||||
public function getModel(): FileModel
|
||||
{
|
||||
if ($this->_model === null) {
|
||||
return $this->_model = new FileModel();
|
||||
}
|
||||
return $this->_model;
|
||||
}
|
||||
final public function getHTMLTag(string $content = ""): string
|
||||
{
|
||||
//Board 게시판 image_path , content용 데이터 배열에 추가 후 modifyBoard에서 처리
|
||||
@ -64,4 +82,39 @@ class MangboardStorage extends FileStorage
|
||||
));
|
||||
return $content;
|
||||
}
|
||||
|
||||
private function getBoardsLibrary(): BoardsLibrary
|
||||
{
|
||||
if ($this->_boards_library === null) {
|
||||
$this->_boards_library = new BoardsLibrary($this->getCategory(), $this->getUserEntity());
|
||||
}
|
||||
return $this->_boards_library;
|
||||
}
|
||||
final public function getBoardLibrary(): BoardLibrary
|
||||
{
|
||||
if ($this->_board_library === null) {
|
||||
$this->_board_library = new BoardLibrary(
|
||||
$this->getBoardsLibrary()->getEntity(),
|
||||
$this->getUserEntity()
|
||||
);
|
||||
}
|
||||
return $this->_board_library;
|
||||
}
|
||||
final public function getFileLibrary(): FileLibrary
|
||||
{
|
||||
if ($this->_file_library === null) {
|
||||
$this->_file_library = new FileLibrary(
|
||||
$this->getBoardsLibrary()->getEntity(),
|
||||
$this->getUserEntity()
|
||||
);
|
||||
}
|
||||
return $this->_file_library;
|
||||
}
|
||||
final public function getImageLibrary(): ImageLibrary
|
||||
{
|
||||
if ($this->_image_library === null) {
|
||||
$this->_image_library = new ImageLibrary();
|
||||
}
|
||||
return $this->_image_library;
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user